Add implementation of IPipelineCreationAPIDispatcher

Because slang-gfx doesn't provide a fine granularity APIs such that we can time the spirv generation phase and Vulkan pipeline creation phase, we have to use gfx::IPipelineCreationAPIDispatcher callback interface so that the gfx will call it when creating pipeline. We simply call Vulkan pipeline creation inside the callback function and measure the time so that we can get the time of Vulkan driver compiling the spirv code.
shader-slang · Feb 6, 2024 · a7f71b6 · a7f71b6
1 parent c3493b4
commit a7f71b6
Show file tree

Hide file tree

Showing 5 changed files with 212 additions and 65 deletions.
diff --git a/README.md b/README.md
@@ -20,5 +20,5 @@ An existing build tree can be compiled using `cmake --build build/<preset name>`
 ## Run perftest
 ```
 cd build/<preset name>/bin/Debug
-./falcor_perftest
+__GL_SHADER_DISK_CACHE=0 ./falcor_perftest
 ```
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
@@ -8,6 +8,7 @@ target_sources(falcor_perftest PRIVATE
     ProgramManager.cpp
     ProgramReflection.cpp
     ProgramVersion.cpp
+    DeviceWrapper.cpp
 )
 
 target_copy_shaders(falcor_perftest ./shaders .)

diff --git a/source/DeviceWrapper.cpp b/source/DeviceWrapper.cpp
@@ -0,0 +1,77 @@
+#include "DeviceWrapper.h"
+
+
+Device::Device()
+{
+    slang::createGlobalSession(m_slangGlobalSession.writeRef());
+    m_pProgramManager = std::make_unique<ProgramManager>(this);
+
+    gfx::IDevice::Desc gfxDesc = {};
+    gfxDesc.deviceType = gfx::DeviceType::Vulkan;
+    gfxDesc.slang.slangGlobalSession = m_slangGlobalSession;
+    gfxDesc.shaderCache.maxEntryCount = 1000;
+    gfxDesc.shaderCache.shaderCachePath = nullptr;
+
+    std::vector<void*> extendedDescs;
+    // Add extended desc for root parameter attribute.
+    gfx::D3D12DeviceExtendedDesc extDesc = {};
+    extDesc.rootParameterShaderAttributeName = "root";
+    extendedDescs.push_back(&extDesc);
+
+    gfxDesc.extendedDescCount = extendedDescs.size();
+    gfxDesc.extendedDescs = extendedDescs.data();
+
+    gfx::AdapterList adapters = gfx::gfxGetAdapters(gfxDesc.deviceType);
+    if (adapters.getCount() == 0)
+    {
+        assert(!"No GPU found");
+    }
+
+    // Try to create device on specific GPU.
+    gfxDesc.adapterLUID = &adapters.getAdapters()[0].luid;
+
+    mpAPIDispatcher.reset(new PipelineCreationAPIDispatcher());
+    gfxDesc.apiCommandDispatcher = static_cast<ISlangUnknown*>(mpAPIDispatcher.get());
+
+    if (SLANG_FAILED(gfx::gfxCreateDevice(&gfxDesc, m_gfxDevice.writeRef())))
+    {
+        printf("Failed to create device on GPU 0 (%s).", adapters.getAdapters()[0].name);
+    }
+
+    if (SLANG_FAILED(gfx::gfxSetDebugCallback(&gGFXDebugCallBack)))
+    {
+        printf("Failed to setup debug callback\n");
+    }
+    else
+    {
+        gfx::gfxEnableDebugLayer();
+    }
+
+    // Otherwise try create device on any available GPU.
+    if (!m_gfxDevice)
+    {
+        gfxDesc.adapterLUID = nullptr;
+        if (SLANG_FAILED(gfx::gfxCreateDevice(&gfxDesc, m_gfxDevice.writeRef())))
+            assert(!"Failed to create device");
+    }
+
+    gfx::ITransientResourceHeap::Desc transientHeapDesc = {};
+    transientHeapDesc.flags = gfx::ITransientResourceHeap::Flags::AllowResizing;
+    transientHeapDesc.constantBufferSize = 16 * 1024 * 1024;
+    transientHeapDesc.samplerDescriptorCount = 2048;
+    transientHeapDesc.uavDescriptorCount = 1000000;
+    transientHeapDesc.srvDescriptorCount = 1000000;
+    transientHeapDesc.constantBufferDescriptorCount = 1000000;
+    transientHeapDesc.accelerationStructureDescriptorCount = 1000000;
+    if (SLANG_FAILED(m_gfxDevice->createTransientResourceHeap(transientHeapDesc, m_transientResourceHeaps.writeRef()))) {
+        assert(!"Fail to create transient source heaps");
+    }
+}
+
+Device::~Device()
+{
+    m_pProgramManager.reset();
+    m_gfxDevice.setNull();
+    m_transientResourceHeaps.setNull();
+    mpAPIDispatcher.reset();
+}
diff --git a/source/DeviceWrapper.h b/source/DeviceWrapper.h
@@ -6,7 +6,12 @@
 #include "Types.h"
 #include "Object.h"
 #include "ProgramManager.h"
+#include <vulkan/vulkan.h>
+#include <dlfcn.h>
+#include "CpuTimer.h"
+
 class ProgramManager;
+class PipelineCreationAPIDispatcher;
 
 class GFXDebugCallBack : public gfx::IDebugCallback
 {
@@ -29,78 +34,134 @@ class GFXDebugCallBack : public gfx::IDebugCallback
 
 static GFXDebugCallBack gGFXDebugCallBack; // TODO: REMOVEGLOBAL
 
-class Device  : public Object{
+class PipelineCreationAPIDispatcher : public gfx::IPipelineCreationAPIDispatcher
+{
 public:
-    enum Type
-    {
-        Default, ///< Default device type, favors D3D12 over Vulkan.
-        D3D12,
-        Vulkan,
-    };
+    PipelineCreationAPIDispatcher() { }
+    ~PipelineCreationAPIDispatcher() { }
 
-    Device()
+    double getPipelineCreationTime() {return m_timer.delta();}
+
+    virtual SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface(SlangUUID const& uuid, void** outObject) override
     {
-        slang::createGlobalSession(m_slangGlobalSession.writeRef());
-        m_pProgramManager = std::make_unique<ProgramManager>(this);
-
-        gfx::IDevice::Desc gfxDesc = {};
-        gfxDesc.deviceType = gfx::DeviceType::Vulkan;
-        gfxDesc.slang.slangGlobalSession = m_slangGlobalSession;
-        gfxDesc.shaderCache.maxEntryCount = 1000;
-        gfxDesc.shaderCache.shaderCachePath = nullptr;
-
-        std::vector<void*> extendedDescs;
-        // Add extended desc for root parameter attribute.
-        gfx::D3D12DeviceExtendedDesc extDesc = {};
-        extDesc.rootParameterShaderAttributeName = "root";
-        extendedDescs.push_back(&extDesc);
-
-        gfxDesc.extendedDescCount = extendedDescs.size();
-        gfxDesc.extendedDescs = extendedDescs.data();
-
-        gfx::AdapterList adapters = gfx::gfxGetAdapters(gfxDesc.deviceType);
-        if (adapters.getCount() == 0)
+        if (uuid == SlangUUID SLANG_UUID_IPipelineCreationAPIDispatcher)
         {
-            assert(!"No GPU found");
+            *outObject = static_cast<gfx::IPipelineCreationAPIDispatcher*>(this);
+            return SLANG_OK;
         }
+        return SLANG_E_NO_INTERFACE;
+    }
 
-        // Try to create device on specific GPU.
-        gfxDesc.adapterLUID = &adapters.getAdapters()[0].luid;
-        if (SLANG_FAILED(gfx::gfxCreateDevice(&gfxDesc, m_gfxDevice.writeRef())))
-        {
-            printf("Failed to create device on GPU 0 (%s).", adapters.getAdapters()[0].name);
-        }
+    // The lifetime of this dispatcher object will be managed by `Falcor::Device` so we don't need
+    // to actually implement reference counting here.
+    virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 2; }
+
+    virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 2; }
+
+    // This method will be called by the gfx layer to create an API object for a compute pipeline state.
+    virtual gfx::Result createComputePipelineState(
+        gfx::IDevice* device,
+        slang::IComponentType* program,
+        void* pipelineDesc,
+        void** outPipelineState
+    )
+    {
+        void* vulkanLibraryHandle = nullptr;
+        const char* dynamicLibraryName = "Unknown";
+        dynamicLibraryName = "libvulkan.so.1";
+        vulkanLibraryHandle = dlopen(dynamicLibraryName, RTLD_NOW);
+
+        gfx::IDevice::InteropHandles outHandles;
+        device->getNativeDeviceHandles(&outHandles);
+
+        VkInstance instance;
+        instance = (VkInstance)outHandles.handles[0].handleValue;
 
-        if (SLANG_FAILED(gfx::gfxSetDebugCallback(&gGFXDebugCallBack)))
+        VkDevice vkDevice;
+        vkDevice = (VkDevice)outHandles.handles[2].handleValue;
+
+        PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr = nullptr;
+        vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)dlsym(vulkanLibraryHandle, "vkGetInstanceProcAddr");
+        if (!vkGetInstanceProcAddr)
         {
-            printf("Failed to setup debug callback\n");
+            assert(!"Fail to get instance proc address");
         }
-        else
+
+        PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr = nullptr;
+        vkGetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)vkGetInstanceProcAddr(instance, "vkGetDeviceProcAddr");
+        if (!vkGetDeviceProcAddr)
         {
-            gfx::gfxEnableDebugLayer();
+            assert(!"Fail to get device proc address");
         }
 
-        // Otherwise try create device on any available GPU.
-        if (!m_gfxDevice)
+        PFN_vkCreateComputePipelines vkCreateComputePipelines = nullptr;
+        vkCreateComputePipelines = (PFN_vkCreateComputePipelines)vkGetDeviceProcAddr(vkDevice, "vkCreateComputePipelines");
+        if (!vkCreateComputePipelines)
         {
-            gfxDesc.adapterLUID = nullptr;
-            if (SLANG_FAILED(gfx::gfxCreateDevice(&gfxDesc, m_gfxDevice.writeRef())))
-                assert(!"Failed to create device");
+            assert(!"Fail to vkCreateComputePipelines");
         }
 
-        gfx::ITransientResourceHeap::Desc transientHeapDesc = {};
-        transientHeapDesc.flags = gfx::ITransientResourceHeap::Flags::AllowResizing;
-        transientHeapDesc.constantBufferSize = 16 * 1024 * 1024;
-        transientHeapDesc.samplerDescriptorCount = 2048;
-        transientHeapDesc.uavDescriptorCount = 1000000;
-        transientHeapDesc.srvDescriptorCount = 1000000;
-        transientHeapDesc.constantBufferDescriptorCount = 1000000;
-        transientHeapDesc.accelerationStructureDescriptorCount = 1000000;
-        if (SLANG_FAILED(m_gfxDevice->createTransientResourceHeap(transientHeapDesc, m_transientResourceHeaps.writeRef()))) {
-            assert(!"Fail to create transient source heaps");
-        }
+        m_timer.update();
+
+        VkPipelineCache pipelineCache = VK_NULL_HANDLE;
+        VkComputePipelineCreateInfo* pComputePipelineInfo = static_cast<VkComputePipelineCreateInfo*>(pipelineDesc);
+        VkPipeline pipeline;
+        vkCreateComputePipelines(
+            vkDevice, pipelineCache, 1, pComputePipelineInfo, nullptr, &pipeline);
+
+        *((VkPipeline*)outPipelineState) = pipeline;
+        m_timer.update();
+        return SLANG_OK;
     }
 
+    // This method will be called by the gfx layer to create an API object for a graphics pipeline state.
+    virtual gfx::Result createGraphicsPipelineState(
+        gfx::IDevice* device,
+        slang::IComponentType* program,
+        void* pipelineDesc,
+        void** outPipelineState
+    )
+    {
+        return SLANG_OK;
+    }
+
+    virtual gfx::Result createMeshPipelineState(
+        gfx::IDevice* device,
+        slang::IComponentType* program,
+        void* pipelineDesc,
+        void** outPipelineState
+    )
+    {
+        assert(!"Mesh pipelines are not supported.");
+        return SLANG_OK;
+    }
+
+    // This method will be called by the gfx layer right before creating a ray tracing state object.
+    virtual gfx::Result beforeCreateRayTracingState(gfx::IDevice* device, slang::IComponentType* program)
+    {
+        return SLANG_OK;
+    }
+
+    // This method will be called by the gfx layer right after creating a ray tracing state object.
+    virtual gfx::Result afterCreateRayTracingState(gfx::IDevice* device, slang::IComponentType* program)
+    {
+        return SLANG_OK;
+    }
+private:
+    CpuTimer m_timer;
+};
+
+class Device  : public Object{
+public:
+    enum Type
+    {
+        Default, ///< Default device type, favors D3D12 over Vulkan.
+        D3D12,
+        Vulkan,
+    };
+    Device();
+    ~Device();
+
     gfx::ITransientResourceHeap* getCurrentTransientResourceHeap()
     {
         return m_transientResourceHeaps.get();
@@ -116,10 +177,12 @@ class Device  : public Object{
     gfx::IDevice* getGfxDevice() const { return m_gfxDevice; }
     Type getType() const { return m_type; }
 
+    double getPipelineCreationTime() {return mpAPIDispatcher->getPipelineCreationTime();}
 private:
     Slang::ComPtr<slang::IGlobalSession> m_slangGlobalSession;
     Slang::ComPtr<gfx::IDevice> m_gfxDevice;
     Slang::ComPtr<gfx::ITransientResourceHeap> m_transientResourceHeaps;
     Type m_type {Vulkan};
     std::unique_ptr<ProgramManager> m_pProgramManager;
+    std::unique_ptr<PipelineCreationAPIDispatcher> mpAPIDispatcher;
 };
diff --git a/source/main.cpp b/source/main.cpp
@@ -67,16 +67,26 @@ void TestCase(ref<Device>& device)
 
     ref<Program> pProg = Program::create(device, desc, defines);
 
+    std::vector<std::string> backendName = {"glslang", "slang"};
     for (uint32_t i = 0; i < 2; i++)
     {
+        CpuTimer timer;
+        timer.update();
         // Each set of pair of `Macro defines` and `Type conformance object` can define
         // one version of program.
         printf("Start creating program versions\n");
         const ref<const ProgramVersion>& progVersion = pProg->getActiveVersion();
+        timer.update();
+        double programVersionTime = timer.delta();
+        printf("Time for program version creation (%s): %.3fs\n", backendName[i].c_str(), programVersionTime);
 
         std::string log;
         ref<const ProgramKernels> programKernel = device->getProgramManager()->createProgramKernels(*pProg, *progVersion, log);
         const EntryPointKernel* entryPointKernel = programKernel->getKernel(ShaderType::Compute);
+        timer.update();
+        double programKernelTime = timer.delta();
+        printf("Time for program kernel creation (%s): %.3fs\n", backendName[i].c_str(), programKernelTime);
+        printf("Time for frontend execution:%.3fs\n",  programKernelTime + programVersionTime);
 
         Slang::ComPtr<gfx::IShaderObject> shaderObject;
         SlangResult res = device->getGfxDevice()->createMutableRootShaderObject(programKernel->getGfxProgram(), shaderObject.writeRef());
@@ -99,22 +109,18 @@ void TestCase(ref<Device>& device)
         res = computeCommandEncoder->bindPipelineWithRootObject(gfxPipelineState, shaderObject);
         ASSERT_EQ(res, SLANG_OK, "bindPipelineWithRootObject");
 
-        CpuTimer timer;
         timer.update();
 
         res = computeCommandEncoder->dispatchCompute(0, 0, 0);
         ASSERT_EQ(res, SLANG_OK, "create pipeline");
 
         timer.update();
         double time = timer.delta();
-        if (i == 0)
-        {
-            printf("Time for compiling spirv generated by glslang: %.3fs\n\n\n", time);
-        }
-        else
-        {
-            printf("Time for compiling spirv generated by slang: %.3fs\n\n\n", time);
-        }
+
+        double pipelineCreationTime = device->getPipelineCreationTime();
+        double spirvGenerationTime = time - pipelineCreationTime;
+        printf("Time for spirv generation by %s: %.3fs\n", backendName[i].c_str(), spirvGenerationTime);
+        printf("Time for compiling spirv generated by %s: %.3fs\n\n\n", backendName[i].c_str(), pipelineCreationTime);
 
         device->getProgramManager()->reloadAllPrograms();
         device->getProgramManager()->setSpirvDirectMode(true);