diff --git a/intercept b/intercept index ee07219..845457b 160000 --- a/intercept +++ b/intercept @@ -1 +1 @@ -Subproject commit ee07219f2e5ebadffbd6828d2ceb6be59b344960 +Subproject commit 845457b89f10680f2c3456934adcb6dc6f307ccb diff --git a/src/AdapterTracy.cpp b/src/AdapterTracy.cpp index 75ece6f..ab83dcd 100644 --- a/src/AdapterTracy.cpp +++ b/src/AdapterTracy.cpp @@ -2,6 +2,7 @@ #define TRACY_ENABLE #define TRACY_ON_DEMAND +#define TRACY_FIBERS #include #include #include @@ -28,6 +29,7 @@ extern bool EngineProfilingEnabled; extern bool instructionLevelProfiling; extern bool InstructionCallstack; extern bool logPacketContent; +extern bool checkMainThread; void TracyParameterUpdated(uint32_t idx, int32_t val) { switch (idx) { @@ -49,6 +51,9 @@ void TracyParameterUpdated(uint32_t idx, int32_t val) { case TP_InstructionGetVarCallstackEnabled: InstructionCallstack = val != 0; break; + case TP_EngineProfilingMainThreadOnly: + checkMainThread = val != 0; + break; } } @@ -95,6 +100,24 @@ std::shared_ptr AdapterTracy::enterScope(std::shared_ptr scope) +{ + auto info = std::dynamic_pointer_cast(scope); + if (!info || !isConnected()) return; //#TODO debugbreak? log error? + ensureReady(); + + // Tracy code inlined, in a way such that we don't even need tempStorage + using namespace tracy; + + const SourceLocationData* srcloc = &info->info; + + TracyQueuePrepare(QueueType::ZoneBegin); + MemWrite(&item->zoneBegin.time, Profiler::GetTime()); + MemWrite(&item->zoneBegin.srcloc, (uint64_t)srcloc); + TracyQueueCommit(zoneBeginThread); + +} + std::shared_ptr AdapterTracy::enterScope(std::shared_ptr scope, uint64_t threadID) { return enterScope(scope); //auto info = std::dynamic_pointer_cast(scope); @@ -117,10 +140,19 @@ std::shared_ptr AdapterTracy::enterScope(std::shared_ptr tempStorage) { auto tmpStorage = std::dynamic_pointer_cast(tempStorage); if (!tmpStorage) return; //#TODO debugbreak? log error? - + tmpStorage->zone.end(); //zone destructor ends zone } +void AdapterTracy::leaveScopeNoStorage(uint64_t time) +{ + // Tracy code inlined, in a way such that we don't even need tempStorage + using namespace tracy; + TracyQueuePrepare(QueueType::ZoneEnd); + MemWrite(&item->zoneEnd.time, time == -1 ? Profiler::GetTime() : time); + TracyQueueCommit(zoneEndThread); +} + void AdapterTracy::setName(std::shared_ptr tempStorage, const intercept::types::r_string& name) { auto tmpStorage = std::dynamic_pointer_cast(tempStorage); if (!tmpStorage) return; //#TODO debugbreak? log error? @@ -130,9 +162,25 @@ void AdapterTracy::setName(std::shared_ptr tempStorage, const void AdapterTracy::setDescription(std::shared_ptr tempStorage, const intercept::types::r_string& descr) { auto tmpStorage = std::dynamic_pointer_cast(tempStorage); if (!tmpStorage) return; //#TODO debugbreak? log error? + tmpStorage->zone.Text(descr.c_str(), descr.length()); } +void AdapterTracy::setDescriptionNoStorage(const intercept::types::r_string& descr) +{ + // Tracy code inlined, in a way such that we don't even need tempStorage + using namespace tracy; + + const char* txt = descr.c_str(); + size_t size = descr.length(); + auto ptr = (char*)tracy_malloc(size); + memcpy(ptr, txt, size); + TracyQueuePrepare(QueueType::ZoneText); + MemWrite(&item->zoneTextFat.text, (uint64_t)ptr); + MemWrite(&item->zoneTextFat.size, (uint16_t)size); + TracyQueueCommit(zoneTextFatThread); +} + void AdapterTracy::addLog(intercept::types::r_string message) { if (message.empty()) return; tracy::Profiler::Message(message.c_str(), message.length()); @@ -212,6 +260,8 @@ void AdapterTracy::sendCallstack(intercept::types::auto_array createScope(intercept::types::r_string name, intercept::types::r_string filename, uint32_t fileline) override; std::shared_ptr enterScope(std::shared_ptr scope) override; + // The NoStorage functions can only be used if they are always paired up correctly and in correct order. Cannot leave a scope that was never entered + void enterScopeNoStorage(std::shared_ptr scope); std::shared_ptr enterScope(std::shared_ptr scope, uint64_t threadID) override; std::shared_ptr enterScope(std::shared_ptr scope, ScopeWithCallstack cs) override; void leaveScope(std::shared_ptr tempStorage) override; + void leaveScopeNoStorage(uint64_t time = -1); void setName(std::shared_ptr tempStorage, const intercept::types::r_string& name) override; void setDescription(std::shared_ptr tempStorage, const intercept::types::r_string& descr) override; + void setDescriptionNoStorage(const intercept::types::r_string& descr); void addLog(intercept::types::r_string message) override; void setCounter(intercept::types::r_string name, float val) override; void setCounter(const char* name, float val) const; @@ -39,6 +44,9 @@ class AdapterTracy final : public ProfilerAdapter static void addParameter(uint32_t idx, const char* name, bool isBool, int32_t val); + static void SwitchToFiber(const char* name); + static void LeaveFiber(); + private: static void ensureReady(); using scopeCacheKey = std::tuple; diff --git a/src/EngineProfiling.cpp b/src/EngineProfiling.cpp index 7a1383e..e901a6d 100644 --- a/src/EngineProfiling.cpp +++ b/src/EngineProfiling.cpp @@ -5,6 +5,9 @@ #include #include "scriptProfiler.hpp" #include "SignalSlot.hpp" +#if _WIN32 +#include +#endif struct CounterHasher { public: @@ -16,16 +19,26 @@ struct CounterHasher { } }; +// We need to check to not close a scope that was never opened. But if scopes always open and close in order AND we don't need ScopeTempStorage, a simple stack works too +#define OPEN_SCOPE_MAP 1 + +#if OPEN_SCOPE_MAP thread_local std::unique_ptr, std::shared_ptr, CounterHasher>> openScopes; -thread_local bool openScopesInit; -std::unordered_map> scopeCache; -std::shared_mutex scopeCacheMtx; +#else +thread_local std::vector> openScopes; +std::atomic globalZoneFlushC = 0; +thread_local uint64_t threadZoneFlushC = 0; +#endif + +thread_local std::unordered_map> scopeCache; +//std::shared_mutex scopeCacheMtx; bool noFile = false; bool noMem = false; bool tracyConnected = false; bool checkMainThread = false; thread_local bool isMainThread = false; bool EngineProfilingEnabled = true; +thread_local bool ignoreScopes = false; std::string getScriptName(const r_string& str, const r_string& filePath, uint32_t returnFirstLineIfNoName = 0); void addScopeInstruction(auto_array>& bodyCode, const r_string& scriptName); @@ -61,11 +74,12 @@ bool PCounter::shouldTime() { if (checkMainThread && !isMainThread) return false; if (!tracyConnected || !EngineProfilingEnabled) return false; + if (ignoreScopes) return false; //exclude security cat, evwfGet evGet and so on as they spam too much and aren't useful if (cat && cat[0] == 's' && cat[1] == 'e' && cat[2] == 'c' && cat[3] == 'u') return false; - if (noFile && cat && cat[0] == 'f' && cat[1] == 'i' && cat[2] == 'l' && cat[3] == 'e') return false; - if (noMem&& cat&& cat[0] == 'm' && cat[1] == 'e' && cat[2] == 'm') return false; + if (noFile && (cat && cat[0] == 'f' && cat[1] == 'i' && cat[2] == 'l' && cat[3] == 'e') || name[0] == 'f' && name[1] == 's') return false; + if (noMem && cat&& cat[0] == 'm' && cat[1] == 'e' && cat[2] == 'm') return false; if (cat && cat[0] == 'd' && cat[1] == 'r' && cat[2] == 'w') return false; //drw if (cat && cat[0] == 'd' && cat[1] == 'd' && cat[2] == '1') return false; //dd11 if (cat && cat[0] == 't' && cat[1] == 'e' && cat[2] == 'x' && cat[3] == 0) return false; //tex @@ -74,27 +88,13 @@ bool PCounter::shouldTime() { auto tracyProf = std::reinterpret_pointer_cast(GProfilerAdapter); - std::unordered_map>::iterator found; - - if (checkMainThread) {//No locks needed - found = scopeCache.find(this); - if (found == scopeCache.end()) { - auto res = scopeCache.insert({ this, tracyProf->createScopeStatic(name, cat, 0) }); - found = res.first; - } - } else { - std::shared_lock lock(scopeCacheMtx); - found = scopeCache.find(this); - if (found == scopeCache.end()) { - lock.unlock(); - std::unique_lock lockInternal(scopeCacheMtx); - auto res = scopeCache.insert({ this, tracyProf->createScopeStatic(name, cat, 0) }); - lockInternal.unlock();//#TODO this is unsafe - lock.lock(); - found = res.first; - } + std::unordered_map>::iterator found = scopeCache.find(this); + if (found == scopeCache.end()) { + auto res = scopeCache.insert({ this, tracyProf->createScopeStatic(name, cat, 0) }); + found = res.first; } +#if OPEN_SCOPE_MAP if (!openScopes) openScopes = std::make_unique, std::shared_ptr, CounterHasher>>(); @@ -102,21 +102,28 @@ bool PCounter::shouldTime() { auto p = std::make_pair(this, slot); auto ins = openScopes->insert_or_assign(p,nullptr); auto tmp = tracyProf->enterScope(found->second); - //if (tmp) + //if (tmp) // Entering scope might fail? Only really if the source location data is invalid ins.first->second = tmp; //else // openScopes.erase(p); - +#else + tracyProf->enterScopeNoStorage(found->second); + + while (globalZoneFlushC > threadZoneFlushC) + { + // We were supposed to flush all our scopes (because the tracy client connection ID changed) + openScopes.clear(); + ++threadZoneFlushC; + } + + openScopes.emplace_back(this, (int64_t)__rdtsc()); +#endif return true; } void ScopeProf::doEnd() { - if (!openScopes || openScopes->empty() || !counter) return; - auto found = openScopes->find({ counter, counter->slot }); - if (found == openScopes->end()) return; - GProfilerAdapter->leaveScope(found->second); - openScopes->erase(found); + // deprecated old stuff } void ArmaProf::frameEnd(float fps, float time, int smth) { @@ -126,12 +133,63 @@ void ArmaProf::frameEnd(float fps, float time, int smth) { } } -void ArmaProf::scopeCompleted(int64_t start, int64_t end, intercept::types::r_string* stuff, PCounter* counter) { +void ArmaProf::scopeCompleted(int64_t start, int64_t end, intercept::types::r_string* extraInfo, PCounter* counter) { +#if OPEN_SCOPE_MAP if (!openScopes || openScopes->empty() || !counter) return; auto found = openScopes->find({ counter, counter->slot }); if (found == openScopes->end()) return; + if (extraInfo && !extraInfo->empty()) + GProfilerAdapter->setDescription(found->second, *extraInfo); GProfilerAdapter->leaveScope(found->second); openScopes->erase(found); +#else + if (openScopes.empty()) + return; + bool scopeMatchesLastCounter = openScopes.back().first == counter; + bool scopeStartedBeforeLastCounter = start < openScopes.back().second; + + if (!scopeMatchesLastCounter && !scopeStartedBeforeLastCounter) + return; + + // We never opened this counter? + + while (globalZoneFlushC > threadZoneFlushC) + { + // We were supposed to flush all our scopes (because the tracy client connection ID changed) + openScopes.clear(); + ++threadZoneFlushC; + return; + } + + if (ignoreScopes) return; + + auto tracyProf = std::reinterpret_pointer_cast(GProfilerAdapter); + + if (scopeStartedBeforeLastCounter && !scopeMatchesLastCounter) + { + // We got desynchronized. + // We are currently exiting a scope, that started before the last active scope. We missed the exit of the last active scope + // This is probably because the last active scope hasn't ended, because it's in a yielded coroutine somewhere else. We'll just have to drop it + + // End all scopes, which started after the one we are now exiting + while (!openScopes.empty() && openScopes.back().second > start) + { + openScopes.pop_back(); + tracyProf->leaveScopeNoStorage(); + } + + if (openScopes.empty()) + return; // This is very unlikely, I don't think this ever happens + } + + openScopes.pop_back(); + + if (extraInfo && !extraInfo->empty()) + tracyProf->setDescriptionNoStorage(*extraInfo); + tracyProf->leaveScopeNoStorage(end); +#endif + + } @@ -169,23 +227,24 @@ HookManager::Pattern pat_compileCacheIns{ //1.88.145.302 profv1 013D40B3 HookManager::Pattern pat_frameEnd{ - "xxxxxxxxxxxxxxxxx????xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx?xxxxx?????x????xxxxxxxxx????xx????xxxx?xxxxxxxx????xx????xx?????x????xxx????xx????xx????xxxxxxxx????xxx????xxxxx????xxxxxxxxxxxxxxxxxxxxxxx"sv, - "\x48\x8B\xC4\x57\x41\x56\x48\x83\xEC\x78\x48\x89\x58\x10\x0F\xB6\x99\x00\x00\x00\x00\x48\x89\x68\xE8\x48\x89\x70\xE0\x4C\x89\x60\xD8\x44\x8B\x61\x6C\x44\x3B\x61\x68\x4C\x89\x68\xD0\x48\x8B\xF9\x0F\x29\x78\xA8\x0F\x28\xFA\x44\x0F\x4F\x61\x00\x45\x8B\xE9\xC6\x81\x00\x00\x00\x00\x00\xE8\x00\x00\x00\x00\x45\x33\xF6\x4C\x39\x77\x60\x0F\x84\x00\x00\x00\x00\x8B\x87\x00\x00\x00\x00\x4C\x89\x7C\x24\x00\x85\xC0\x7E\x1A\xFF\xC8\x89\x87\x00\x00\x00\x00\x0F\x85\x00\x00\x00\x00\xC6\x87\x00\x00\x00\x00\x00\xE9\x00\x00\x00\x00\x44\x38\xB7\x00\x00\x00\x00\x0F\x84\x00\x00\x00\x00\x8B\x87\x00\x00\x00\x00\x85\xC0\x7E\x08\xFF\xC8\x89\x87\x00\x00\x00\x00\x44\x39\xB7\x00\x00\x00\x00\x7F\x75\x48\x8B\x87\x00\x00\x00\x00\x48\x85\xC0\x74\x69\x44\x38\x70\x10\x74\x63\x48\x8D\x50\x10\x48\x85\xC0\x75\x07\x48\x8D\x15"sv + "xxxxxxxxxxxxxxxxxxxxxxxxx????xxxxxxxxxxxxxxxxxxxxxxxxxxxx?????xxxxxx????x????xxxxxxxxx????xx????xxxx?xxxxxxxxx????xx????xx?????x????"sv, + "\x48\x8B\xC4\x44\x89\x48\x20\x56\x57\x41\x57\x48\x83\xEC\x70\x48\x89\x58\x10\x48\x8B\xF1\x0F\xB6\x99\x00\x00\x00\x00\x48\x89\x68\xE0\x4C\x89\x70\xC8\x45\x8B\xF1\x0F\x29\x78\xA8\x0F\x28\xFA\x8B\x41\x6C\x8B\x69\x68\x3B\xC5\xC6\x81\x00\x00\x00\x00\x00\x0F\x4E\xE8\x89\xAC\x24\x00\x00\x00\x00\xE8\x00\x00\x00\x00\x45\x33\xFF\x4C\x39\x7E\x60\x0F\x84\x00\x00\x00\x00\x8B\x86\x00\x00\x00\x00\x4C\x89\x64\x24\x00\x85\xC0\x7E\x1B\x83\xE8\x01\x89\x86\x00\x00\x00\x00\x0F\x85\x00\x00\x00\x00\xC6\x86\x00\x00\x00\x00\x00\xE9\x00\x00\x00\x00"sv }; -HookManager::Pattern pat_doEnd{ - "xxxxxxxxxxxxxxxxx?????xxxx?????xxxxxxxxxxxxxx????xxxxxxxxxxxxxxxxx????xxxx?x????xxxxxxxxxxxxxxxxx????xxxxxxxxx?????xxxxxx"sv, - "\x40\x53\x48\x83\xEC\x30\x80\x79\x11\x00\x48\x8B\xD9\x75\x09\x80\x3D\x00\x00\x00\x00\x00\x74\x38\x80\x3D\x00\x00\x00\x00\x00\x74\x0B\x0F\x31\x48\xC1\xE2\x20\x48\x0B\xC2\xEB\x05\xE8\x00\x00\x00\x00\x48\x8B\x13\x4C\x8B\xC0\x48\x8B\x43\x08\x4C\x8D\x4B\x18\x48\x8D\x0D\x00\x00\x00\x00\x48\x89\x44\x24\x00\xE8\x00\x00\x00\x00\x48\x8B\x53\x18\x48\x85\xD2\x74\x1A\xF0\xFF\x0A\x75\x0D\x48\x8B\x0D\x00\x00\x00\x00\x48\x8B\x01\xFF\x50\x18\x48\xC7\x43\x00\x00\x00\x00\x00\x48\x83\xC4\x30\x5B\xC3"sv +// Just some scope start, so we can find the boss man +HookManager::Pattern pat_aScopeStart{ + "xxxxxxxxx?????xxxxxxxxxxxxxxxxxxxxxx????xxx????xxx????xxx????xxx????xxx????xxxxxx????xxxxx????x????xx????xx?????xxx????xxxx????xxxxxx?xx?????xxxxxxxx?xxxxx????xxxxxxxx????xx????xx"sv, + "\x48\x8B\xC4\x48\x83\xEC\x68\x80\x3D\x00\x00\x00\x00\x00\x48\x89\x58\x10\x48\x89\x68\x18\x48\x8B\xE9\x48\x89\x78\xF0\x4C\x89\x70\xE8\x4C\x8D\x35\x00\x00\x00\x00\x75\x46\xE8\x00\x00\x00\x00\x48\x8D\x15\x00\x00\x00\x00\x4C\x89\x35\x00\x00\x00\x00\x4C\x8D\x05\x00\x00\x00\x00\x48\x89\x15\x00\x00\x00\x00\x44\x8B\xC8\x4C\x89\x05\x00\x00\x00\x00\x49\x8B\xCE\x89\x05\x00\x00\x00\x00\xE8\x00\x00\x00\x00\x89\x05\x00\x00\x00\x00\xC6\x05\x00\x00\x00\x00\x00\x48\x8D\x1D\x00\x00\x00\x00\x48\x8B\xCB\xE8\x00\x00\x00\x00\x33\xFF\x48\x89\x5C\x24\x00\x83\x3D\x00\x00\x00\x00\x00\x88\x44\x24\x40\x48\x89\x7C\x24\x00\x74\x45\x40\x38\x3D\x00\x00\x00\x00\x75\x12\x84\xC0\x74\x38\xFF\x15\x00\x00\x00\x00\x39\x05\x00\x00\x00\x00\x75\x2A, "sv }; HookManager::Pattern pat_scopeCompleted{ - "xxxx?xxxx?xxxxxxxxxxxx????xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx?xxxxxxxx????xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx????xxx????xxxxxx????xxxx?xx????xxx"sv, - "\x48\x89\x5C\x24\x00\x48\x89\x6C\x24\x00\x57\x41\x54\x41\x57\x48\x83\xEC\x20\x48\x8B\x81\x00\x00\x00\x00\x49\x8B\xF8\x4D\x8B\xE1\x48\x3B\xD0\x48\x8B\xD9\x48\x0F\x4C\xD0\x48\xC1\xF8\x04\x48\xC1\xFF\x04\x48\xC1\xFA\x04\x44\x8B\xFA\x2B\xFA\x44\x2B\xF8\x48\x8B\x44\x24\x00\x48\x63\x68\x18\x85\xED\x0F\x88\x00\x00\x00\x00\x8B\x41\x68\x66\x0F\x6E\xC7\x8B\xC8\xD1\xF9\x66\x0F\x6E\xD0\x8B\x43\x6C\x0F\x5B\xC0\x2B\xC1\x66\x0F\x6E\xC8\x0F\x5B\xD2\x42\x8D\x04\x3F\xF3\x0F\x59\xD0\x66\x0F\x6E\xC0\x0F\x5B\xC9\x0F\x5B\xC0\xF3\x0F\x59\xC8\x0F\x2F\xD1\x73\x48\x80\x3B\x00\x0F\x84\x00\x00\x00\x00\x4C\x69\xC5\x00\x00\x00\x00\x4C\x03\x43\x20\x0F\x84\x00\x00\x00\x00\x41\x0F\x0D\x48\x00\x41\xB9\x00\x00\x00\x00\x0F\x1F\x00"sv + "xxxxxxxxxxxxxxxx????xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx?xxxxxxxxx????xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx????xxx????xxxxxx????"sv, + "\x40\x53\x56\x41\x54\x41\x55\x41\x57\x48\x83\xEC\x20\x48\x8B\x81\x00\x00\x00\x00\x49\x8B\xF0\x48\x3B\xD0\x4D\x8B\xE9\x48\x8B\xD9\x48\x0F\x4C\xD0\x48\xC1\xF8\x04\x48\xC1\xFA\x04\x48\xC1\xFE\x04\x44\x8B\xE2\x44\x2B\xE0\x2B\xF2\x48\x8B\x44\x24\x00\x4C\x63\x78\x18\x45\x85\xFF\x0F\x88\x00\x00\x00\x00\x8B\x41\x68\x8B\xC8\xD1\xF9\x66\x0F\x6E\xC6\x0F\x5B\xC0\x66\x0F\x6E\xD0\x8B\x43\x6C\x2B\xC1\x0F\x5B\xD2\x66\x0F\x6E\xC8\x42\x8D\x04\x26\xF3\x0F\x59\xD0\x66\x0F\x6E\xC0\x0F\x5B\xC0\x0F\x5B\xC9\xF3\x0F\x59\xC8\x0F\x2F\xD1\x73\x3C\x80\x3B\x00\x0F\x84\x00\x00\x00\x00\x49\x69\xD7\x00\x00\x00\x00\x48\x03\x53\x20\x0F\x84\x00\x00\x00\x00"sv }; HookManager::Pattern pat_shouldTime{ - "xxxxxxxxxxxxxxxxxxxxxxxxxx????xxxxxxxxxxxxxxxxxxxx????xxxxxxx"sv, - "\x48\x63\x41\x18\x85\xC0\x78\x32\x4C\x8B\x01\x33\xD2\x4D\x85\xC0\x74\x12\x41\x38\x10\x74\x0D\x48\x69\xC8\x00\x00\x00\x00\x49\x03\x48\x20\xEB\x03\x48\x8B\xCA\x48\x85\xC9\x74\x0A\x38\x51\x4A\x74\x05\xBA\x00\x00\x00\x00\x0F\xB6\xC2\xC3\x32\xC0\xC3"sv + "xxxxxxxxxxxxxxxxxxxxxxxx????xxxxxxxxxxxxxxxxxxxxxx"sv, + "\x48\x63\x41\x18\x85\xC0\x78\x2A\x48\x8B\x11\x48\x85\xD2\x74\x22\x80\x3A\x00\x74\x0D\x48\x69\xC8\x00\x00\x00\x00\x48\x03\x4A\x20\xEB\x02\x33\xC9\x48\x85\xC9\x74\x09\x80\x79\x4A\x00\x74\x03\xB0\x01\xC3"sv }; #endif @@ -197,21 +256,116 @@ EngineProfiling::EngineProfiling() { extern Signal tracyConnectionChanged; +#pragma region PETools +// Small extract of PETools from APE +#if _WIN32 + +PIMAGE_IMPORT_DESCRIPTOR GetImportDirectory(HANDLE module) { + auto dosHeader = (PIMAGE_DOS_HEADER)module; + auto pNTHeader = (PIMAGE_NT_HEADERS)((BYTE*)dosHeader + dosHeader->e_lfanew); + auto base = (DWORD64)dosHeader; + + auto importDirStartRVA = pNTHeader->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].VirtualAddress; + auto size = pNTHeader->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT].Size; + + auto importDir = (PIMAGE_IMPORT_DESCRIPTOR)((PBYTE)base + importDirStartRVA); + return importDir; +} + +PIMAGE_THUNK_DATA GetModuleImportDescriptor(HMODULE module, std::string_view libName, PIMAGE_THUNK_DATA* lookupThunk = nullptr) { + + PIMAGE_IMPORT_DESCRIPTOR importDescriptor = GetImportDirectory(module); + + bool found = false; + while (/*importDescriptor->Characteristics &&*/ importDescriptor->Name) { + PSTR importName = (PSTR)((PBYTE)module + importDescriptor->Name); + if (_stricmp(importName, libName.data()) == 0) { + found = true; + break; + } + importDescriptor++; + } +#if REL_DEBUG + if (!found) __debugbreak(); +#endif + PIMAGE_THUNK_DATA thunk = (PIMAGE_THUNK_DATA)((PBYTE)module + importDescriptor->FirstThunk); + if (lookupThunk) + *lookupThunk = (PIMAGE_THUNK_DATA)((PBYTE)module + importDescriptor->Characteristics); + + return thunk; +} + +// Get a method address by using a known modules import address table (if that dll imports the method we want) +uintptr_t PatchIAT(HMODULE module, std::string_view libname, std::string_view procname, uintptr_t newTarget) { + PIMAGE_THUNK_DATA lookupThunk; + auto thunk = GetModuleImportDescriptor(module, libname, &lookupThunk); + while (thunk->u1.Function) { + PROC* funcStorage = (PROC*)&thunk->u1.Function; + auto funcString = (char*)((uintptr_t)module + lookupThunk->u1.ForwarderString + 2); + // Found it, now let's patch it + if (std::string_view(funcString) == procname) { + + auto old = (uintptr_t)*funcStorage; + + // Get the memory page where the info is stored + MEMORY_BASIC_INFORMATION mbi; + VirtualQuery(funcStorage, &mbi, sizeof(MEMORY_BASIC_INFORMATION)); + + // Try to change the page to be writable if it's not already + if (!VirtualProtect(mbi.BaseAddress, mbi.RegionSize, PAGE_READWRITE, &mbi.Protect)) + break; + + *(uintptr_t*)funcStorage = newTarget; + + // Restore the old flag on the page + DWORD dwOldProtect; + VirtualProtect(mbi.BaseAddress, mbi.RegionSize, mbi.Protect, &dwOldProtect); + + return old; + } + + thunk++; + lookupThunk++; + } + return 0; +} + + +#endif // _WIN32 +#pragma endregion PETools + + +decltype(SwitchToFiber)* origSwitchToFiber; + +VOID +WINAPI +SwitchToFiberReplacement( + _In_ LPVOID lpFiber +) { + AdapterTracy::SwitchToFiber("Fib"); + origSwitchToFiber(lpFiber); + AdapterTracy::LeaveFiber(); +} + void EngineProfiling::init() { + tracyConnected = AdapterTracy::isConnected(); tracyConnectionChanged.connect([](bool state) { tracyConnected = state; +#if !OPEN_SCOPE_MAP + ++globalZoneFlushC; +#endif }); if (auto tracyAdapter = std::dynamic_pointer_cast(GProfilerAdapter)) { + + // Handler for these is in TracyParameterUpdated tracyAdapter->addParameter(TP_EngineProfilingEnabled, "EngineProfilingEnabled", true, 1); + tracyAdapter->addParameter(TP_EngineProfilingMainThreadOnly, "EngineProfilingMainThreadOnly", true, 0); } - - //order is important - //hooks.placeHook(hookTypes::doEnd, pat_doEnd, reinterpret_cast(doEnd), profEndJmpback, 1, true); hooks.placeHook(hookTypes::scopeCompleted, pat_scopeCompleted, reinterpret_cast(scopeCompleted), profEndJmpback, 0); hooks.placeHook(hookTypes::shouldTime, pat_shouldTime, reinterpret_cast(shouldTime), shouldTimeJmpback, 0); hooks.placeHook(hookTypes::frameEnd, pat_frameEnd, reinterpret_cast(frameEnd), frameEndJmpback, 0); @@ -224,22 +378,38 @@ void EngineProfiling::init() { auto stuffByte = found + 0x2A; uint32_t base = *reinterpret_cast(stuffByte); #else - auto found = hooks.findPattern(pat_doEnd, 0xD); + auto found = hooks.findPattern(pat_aScopeStart, 0x21); + + // lea r14, bossman - auto stuffByte = found + 0x2 + 2; - uint32_t offs = *reinterpret_cast(stuffByte); - uint64_t addr = stuffByte + 4 + 1 + offs; - uint64_t base = addr - 0x121; + uint64_t afterInstruction = found + 7; + uint32_t offs = *reinterpret_cast(found + 0x3); + uint64_t addr = afterInstruction + offs; + uint64_t base = addr; #endif armaP = reinterpret_cast(base); - armaP->blip.clear(); + armaP->slowFrameScopeFilter.clear(); armaP->forceCapture = true; armaP->capture = true; //disable captureSlowFrame because it can set forceCapture to false +#ifndef _DEBUG static auto stuff = intercept::client::host::register_sqf_command("diag_captureSlowFrame"sv, ""sv, [](game_state&, game_value_parameter) -> game_value { return {}; }, game_data_type::NOTHING, game_data_type::ARRAY); +#endif + + +#if !OPEN_SCOPE_MAP + // Game uses Fiber's, which is fine if we have the exact per-scope mapping. But its not fine if we have to rely on ordering of scopes. + // We are using a IAT hook for this, to catch all Fiber switches + + HMODULE armaHandle; + GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, reinterpret_cast(found), &armaHandle); + origSwitchToFiber = reinterpret_cast(PatchIAT(armaHandle, "kernel32.dll", "SwitchToFiber", (uintptr_t)&SwitchToFiberReplacement)); +#endif + + } void EngineProfiling::setMainThreadOnly() { diff --git a/src/EngineProfiling.h b/src/EngineProfiling.h index 3b18996..53ff25a 100644 --- a/src/EngineProfiling.h +++ b/src/EngineProfiling.h @@ -10,7 +10,7 @@ class PCounter { ArmaProf* boss; const char* name; const char* cat; - int slot, stuff2; + int slot, scale; bool shouldTime(); @@ -78,14 +78,13 @@ class ArmaProf { int64_t ouf5; int64_t ouf6; int blios; - intercept::types::r_string blip; - float blop; - - float dummy #ifndef __linux__ - , dummy2, dummy3 + uint64_t dummy; //no idea what dis is.. Stuff above is probably wrong somewhere #endif - ;//no idea what dis is.. Stuff above is probably wrong somewhere + + intercept::types::r_string slowFrameScopeFilter; // 0x110 slow frame scope filter name + float slowFrameThreshold; // 0x118 slow frame capture threshold + uint32_t slowFrameOffset; // 0x11C slow frame offset bool forceCapture; bool capture; diff --git a/src/scriptProfiler.cpp b/src/scriptProfiler.cpp index 9627ef6..39a23c0 100644 --- a/src/scriptProfiler.cpp +++ b/src/scriptProfiler.cpp @@ -1455,10 +1455,10 @@ void scriptProfiler::preStart() { } else { engineProf = std::make_shared(); - if (!getCommandLineParam("-profilerEngineThreads"sv)) { - engineProf->setMainThreadOnly(); - diag_log("ASP: Engine profiler main thread only mode"sv); - } + //if (!getCommandLineParam("-profilerEngineThreads"sv)) { + // engineProf->setMainThreadOnly(); + // diag_log("ASP: Engine profiler main thread only mode"sv); + //} if (!getCommandLineParam("-profilerEngineDoFile"sv)) { engineProf->setNoFile();