From f2d62801067f1954033b13b3dd0a3cc5a2c79cbd Mon Sep 17 00:00:00 2001 From: zingballyhoo Date: Fri, 22 Nov 2024 13:41:13 +0000 Subject: [PATCH] friendship ended with ToFrozenDictionary --- TACTLib/Core/Key/FullKey.cs | 6 +- TACTLib/Core/Key/TruncatedKey.cs | 4 +- TACTLib/Protocol/CDNIndexHandler.cs | 139 +++++++++++++++++----------- 3 files changed, 93 insertions(+), 56 deletions(-) diff --git a/TACTLib/Core/Key/FullKey.cs b/TACTLib/Core/Key/FullKey.cs index baa4a9f..87fa919 100644 --- a/TACTLib/Core/Key/FullKey.cs +++ b/TACTLib/Core/Key/FullKey.cs @@ -1,5 +1,6 @@ using System; using System.Buffers.Binary; +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using TACTLib.Helpers; @@ -11,6 +12,7 @@ namespace TACTLib.Core.Key { /// [StructLayout(LayoutKind.Sequential, Pack = 1)] [InlineArray(CASC_FULL_KEY_SIZE)] + [SuppressMessage("ReSharper", "UseSymbolAlias")] public struct FullKey : IComparable { // ReSharper disable once InconsistentNaming /// Content Key size, in bytes @@ -48,11 +50,11 @@ public static FullKey FromByteArray(ReadOnlySpan array) { return MemoryMarshal.Read(array); } - public TruncatedKey AsTruncated() { + public readonly TruncatedKey AsTruncated() { return MemoryMarshal.Read(this); } - public int CompareTo(FullKey other) { + public readonly int CompareTo(FullKey other) { return FullKeyCompare(this, other); } diff --git a/TACTLib/Core/Key/TruncatedKey.cs b/TACTLib/Core/Key/TruncatedKey.cs index 49156dd..9027567 100644 --- a/TACTLib/Core/Key/TruncatedKey.cs +++ b/TACTLib/Core/Key/TruncatedKey.cs @@ -1,5 +1,6 @@ using System; using System.Buffers.Binary; +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using TACTLib.Helpers; @@ -11,6 +12,7 @@ namespace TACTLib.Core.Key { /// [StructLayout(LayoutKind.Sequential, Pack = 1)] [InlineArray(CASC_TRUNCATED_KEY_SIZE)] + [SuppressMessage("ReSharper", "UseSymbolAlias")] public struct TruncatedKey : IComparable { // ReSharper disable once InconsistentNaming /// Encoding Key size, in bytes @@ -48,7 +50,7 @@ public static TruncatedKey FromByteArray(ReadOnlySpan array) { return MemoryMarshal.Read(array); } - public int CompareTo(TruncatedKey other) { + public readonly int CompareTo(TruncatedKey other) { return TruncatedKeyCompare(this, other); } diff --git a/TACTLib/Protocol/CDNIndexHandler.cs b/TACTLib/Protocol/CDNIndexHandler.cs index f81356a..e0f782c 100644 --- a/TACTLib/Protocol/CDNIndexHandler.cs +++ b/TACTLib/Protocol/CDNIndexHandler.cs @@ -1,6 +1,4 @@ using System; -using System.Collections.Concurrent; -using System.Collections.Frozen; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; @@ -28,7 +26,10 @@ public struct IndexEntry public class CDNIndexHandler { private readonly ClientHandler Client; - private readonly IDictionary CDNIndexData; + private readonly List Archives; + private readonly IntermediateIndexEntry[][][]? IntermediateArchiveIndices; + + private readonly Dictionary CDNIndexMap; private FixedFooter ArchiveGroupFooter; private SafeFileHandle ArchiveGroupFileHandle = new SafeFileHandle(); @@ -49,65 +50,87 @@ public static CDNIndexHandler Initialize(ClientHandler clientHandler) private CDNIndexHandler(ClientHandler client) { Client = client; - if (client.CreateArgs.ParallelCDNIndexLoading) - { - CDNIndexData = new ConcurrentDictionary(CASCKeyComparer.Instance); - } else - { - CDNIndexData = new Dictionary(CASCKeyComparer.Instance); - } + Archives = Client.ConfigHandler.CDNConfig.Archives; + CDNIndexMap = new Dictionary(CASCKeyComparer.Instance); // load loose files index so we dont have to hit the cdn just to get 404'd OpenOrDownloadIndexFile(client.ConfigHandler.CDNConfig.Values["file-index"][0], ARCHIVE_ID_LOOSE); var archiveGroupHash = client.ConfigHandler.CDNConfig.Values["archive-group"][0]; if (LoadGroupIndexFile(archiveGroupHash)) { - // new paging impl + // if agent already created the group index locally, use it + // it contains the data from all indices merged into one linear data stream return; } - // using shared impl... - //if (m_client.ContainerHandler != null && OpenIndexFile(archiveGroupHash, ARCHIVE_ID_GROUP)) { - // // no need to load individual indices - // return; - //} - if (!client.CreateArgs.LoadCDNIndices) { // we could download a million indices and that's quite expensive... // only loose files will be available return; } + + IntermediateArchiveIndices = new IntermediateIndexEntry[Archives.Count][][]; if (client.CreateArgs.ParallelCDNIndexLoading) { - Parallel.ForEach(client.ConfigHandler.CDNConfig.Archives, new ParallelOptions { + Parallel.ForEach(Archives, new ParallelOptions { MaxDegreeOfParallelism = client.CreateArgs.MaxCDNIndexLoadingParallelism }, (archive, _, index) => { OpenOrDownloadIndexFile(archive, (int)index); }); } else { - for (var index = 0; index < client.ConfigHandler.CDNConfig.Archives.Count; index++) + for (var index = 0; index < Archives.Count; index++) { - var archive = client.ConfigHandler.CDNConfig.Archives[index]; - OpenOrDownloadIndexFile(archive, index); + OpenOrDownloadIndexFile(Archives[index], index); } } - // todo: still not very happy about this system - // we create a giant dictionary (with no initial size) which can contain a lot of empty space (90+MB) - // converting to a frozen dictionary afterwards helps a bit but the arrays are still wasteful - // also means the peak memory is higher during conversion - // using IDictionary is also worse for lookup perf, but this keeps the code simpler for now - // we could load each index into an array of entries and then merge sort into one giant array... - // (also means higher building memory cost but maybe that's inevitable) + var totalIndexEntryCount = 0; + foreach (var archivePages in IntermediateArchiveIndices!) + { + foreach (var page in archivePages) + { + totalIndexEntryCount += page.Length; + } + } - if (!client.CreateArgs.ParallelCDNIndexLoading) + if (true) + { + ConstructHashMap(totalIndexEntryCount); + + // todo: ToFrozenDictionary is still quite.. slow + // initializing the hash map with an initial capacity is already helping memory a lot + //CDNIndexMap = CDNIndexMap.ToFrozenDictionary(CASCKeyComparer.Instance); + } else + { + // implementing group index construction isn't a no-brainer yet... + // it works pretty well but perf varies a lot + // is really slow in Debug + current FullKey.CompareTo impl + } + IntermediateArchiveIndices = null; + } + + private void ConstructHashMap(int totalIndexEntryCount) + { + using var _ = new PerfCounter("CDNIndexHandler::ConstructHashMap"); + + CDNIndexMap.EnsureCapacity(totalIndexEntryCount); + for (var archiveIdx = 0; archiveIdx < IntermediateArchiveIndices!.Length; archiveIdx++) { - // ToFrozenDictionary doesn't like ConcurrentDictionary - // before processing it internally converts it to a normal Dictionary - // for a dictionary with 9 million entries, this is a perf disaster - CDNIndexData = CDNIndexData.ToFrozenDictionary(CASCKeyComparer.Instance); + var pages = IntermediateArchiveIndices[archiveIdx]; + foreach (var page in pages) + { + foreach (var entry in page) + { + CDNIndexMap[entry.m_fullEKey] = new IndexEntry + { + Index = (ushort)archiveIdx, + Offset = entry.m_offset, + Size = entry.m_size + }; + } + } } } @@ -183,16 +206,20 @@ private void ParseIndex(Stream stream, int archiveIndex) { using var br = new BinaryReader(stream); var footer = ReadFooter(br); + if (footer.m_keyBytes != 16) throw new InvalidDataException($"footer.m_keyBytes != 16. got {footer.m_keyBytes}"); GetTableParameters(footer, (int)br.BaseStream.Length, out var pageSize, out var pageCount); - if (archiveIndex == ARCHIVE_ID_GROUP) footer.m_offsetBytes -= 2; // archive index is part of offset if (archiveIndex == ARCHIVE_ID_LOOSE) { LooseFilesPages = new LooseFileEntry[pageCount][]; + } else if (archiveIndex >= 0) { + IntermediateArchiveIndices![archiveIndex] = new IntermediateIndexEntry[pageCount][]; + } else { + throw new InvalidDataException("group archive not supported in ParseIndex"); } br.BaseStream.Position = 0; var page = new byte[pageSize]; - for (int pageIndex = 0; pageIndex < pageCount; pageIndex++) { + for (var pageIndex = 0; pageIndex < pageCount; pageIndex++) { br.DefinitelyRead(page); if (archiveIndex == ARCHIVE_ID_LOOSE) { @@ -213,12 +240,21 @@ private void ParseIndex(Stream stream, int archiveIndex) LooseFilesPages[pageIndex] = pageEntries.ToArray(); // dont store same array multiple times continue; } + + // group index no longer supported here + Debug.Assert(archiveIndex >= 0); var pageSpan = page.AsSpan(); - while (pageSpan.Length >= 16) { + var bytesPerEntry = footer.m_sizeBytes + footer.m_offsetBytes + footer.m_keyBytes; + var maxEntryCount = pageSpan.Length / bytesPerEntry; + IntermediateArchiveIndices![archiveIndex][pageIndex] = new IntermediateIndexEntry[maxEntryCount]; + ref var intermediateEntries = ref IntermediateArchiveIndices[archiveIndex][pageIndex]; + + for (var entryIdx = 0; entryIdx < maxEntryCount; entryIdx++) { var key = SpanHelper.ReadStruct(ref pageSpan); if (key.CompareTo(default) == 0) { // has no value, end of the list + intermediateEntries = intermediateEntries.AsSpan(0, entryIdx).ToArray(); break; } @@ -226,22 +262,16 @@ private void ParseIndex(Stream stream, int archiveIndex) if (footer.m_sizeBytes == 4) size = SpanHelper.ReadStruct(ref pageSpan).ToInt(); else throw new Exception($"unhandled `size` size: {footer.m_sizeBytes}"); - ushort entryArchiveIndex = (ushort)archiveIndex; - if (archiveIndex == ARCHIVE_ID_GROUP) { - entryArchiveIndex = SpanHelper.ReadStruct(ref pageSpan).ToInt(); - } - uint offset; if (footer.m_offsetBytes == 4) offset = SpanHelper.ReadStruct(ref pageSpan).ToInt(); else throw new Exception($"unhandled `offset` size: {footer.m_offsetBytes}"); - var entry = new IndexEntry + intermediateEntries[entryIdx] = new IntermediateIndexEntry { - Index = entryArchiveIndex, - Size = size, - Offset = offset + m_fullEKey = key, + m_size = size, + m_offset = offset }; - CDNIndexData[key] = entry; } } @@ -253,12 +283,8 @@ private void ParseIndex(Stream stream, int archiveIndex) br.BaseStream.Position += pageCount * footer.m_checksumSize; br.BaseStream.Position += footer.DynamicSize; if (br.BaseStream.Position != br.BaseStream.Length) { - throw new Exception($"didnt wrong length data read from index. pos: {br.BaseStream.Position}. len: {br.BaseStream.Length}"); + throw new Exception($"wrong length data read from index. pos: {br.BaseStream.Position}. len: {br.BaseStream.Length}"); } - - //var lastEKeys = br.ReadArray(pageCount); - //var test = lastEKeys.Select(x => x.ToHexString()).ToArray(); - //Console.Out.WriteLine(test); } private void DownloadIndexFile(string archive, int i) @@ -362,7 +388,7 @@ public bool TryGetIndexEntry(FullEKey eKey, out IndexEntry indexEntry) { return true; } - return CDNIndexData.TryGetValue(eKey, out indexEntry); + return CDNIndexMap.TryGetValue(eKey, out indexEntry); } public bool IsLooseFile(FullKey key) { @@ -379,10 +405,17 @@ public bool IsLooseFile(FullKey key) { public byte[]? OpenIndexEntry(IndexEntry entry) { - var archiveKey = Client.ConfigHandler.CDNConfig.Archives[entry.Index]; + var archiveKey = Archives[entry.Index]; return Client.CDNClient!.FetchIndexEntry(archiveKey, entry); } + private struct IntermediateIndexEntry + { + public FullEKey m_fullEKey; + public uint m_size; + public uint m_offset; + } + [StructLayout(LayoutKind.Sequential, Pack = 1)] private struct LooseFileEntry : IComparable { public FullEKey m_eKey;