Skip to content

Commit

Permalink
Add implementation of IPipelineCreationAPIDispatcher
Browse files Browse the repository at this point in the history
Because slang-gfx doesn't provide a fine granularity APIs such
that we can time the spirv generation phase and Vulkan pipeline
creation phase, we have to use gfx::IPipelineCreationAPIDispatcher
callback interface so that the gfx will call it when creating pipeline.

We simply call Vulkan pipeline creation inside the callback function and
measure the time so that we can get the time of Vulkan driver compiling
the spirv code.
  • Loading branch information
kaizhangNV committed Feb 6, 2024
1 parent c3493b4 commit a7f71b6
Show file tree
Hide file tree
Showing 5 changed files with 212 additions and 65 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ An existing build tree can be compiled using `cmake --build build/<preset name>`
## Run perftest
```
cd build/<preset name>/bin/Debug
./falcor_perftest
__GL_SHADER_DISK_CACHE=0 ./falcor_perftest
```
1 change: 1 addition & 0 deletions source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ target_sources(falcor_perftest PRIVATE
ProgramManager.cpp
ProgramReflection.cpp
ProgramVersion.cpp
DeviceWrapper.cpp
)

target_copy_shaders(falcor_perftest ./shaders .)
Expand Down
77 changes: 77 additions & 0 deletions source/DeviceWrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#include "DeviceWrapper.h"


Device::Device()
{
slang::createGlobalSession(m_slangGlobalSession.writeRef());
m_pProgramManager = std::make_unique<ProgramManager>(this);

gfx::IDevice::Desc gfxDesc = {};
gfxDesc.deviceType = gfx::DeviceType::Vulkan;
gfxDesc.slang.slangGlobalSession = m_slangGlobalSession;
gfxDesc.shaderCache.maxEntryCount = 1000;
gfxDesc.shaderCache.shaderCachePath = nullptr;

std::vector<void*> extendedDescs;
// Add extended desc for root parameter attribute.
gfx::D3D12DeviceExtendedDesc extDesc = {};
extDesc.rootParameterShaderAttributeName = "root";
extendedDescs.push_back(&extDesc);

gfxDesc.extendedDescCount = extendedDescs.size();
gfxDesc.extendedDescs = extendedDescs.data();

gfx::AdapterList adapters = gfx::gfxGetAdapters(gfxDesc.deviceType);
if (adapters.getCount() == 0)
{
assert(!"No GPU found");
}

// Try to create device on specific GPU.
gfxDesc.adapterLUID = &adapters.getAdapters()[0].luid;

mpAPIDispatcher.reset(new PipelineCreationAPIDispatcher());
gfxDesc.apiCommandDispatcher = static_cast<ISlangUnknown*>(mpAPIDispatcher.get());

if (SLANG_FAILED(gfx::gfxCreateDevice(&gfxDesc, m_gfxDevice.writeRef())))
{
printf("Failed to create device on GPU 0 (%s).", adapters.getAdapters()[0].name);
}

if (SLANG_FAILED(gfx::gfxSetDebugCallback(&gGFXDebugCallBack)))
{
printf("Failed to setup debug callback\n");
}
else
{
gfx::gfxEnableDebugLayer();
}

// Otherwise try create device on any available GPU.
if (!m_gfxDevice)
{
gfxDesc.adapterLUID = nullptr;
if (SLANG_FAILED(gfx::gfxCreateDevice(&gfxDesc, m_gfxDevice.writeRef())))
assert(!"Failed to create device");
}

gfx::ITransientResourceHeap::Desc transientHeapDesc = {};
transientHeapDesc.flags = gfx::ITransientResourceHeap::Flags::AllowResizing;
transientHeapDesc.constantBufferSize = 16 * 1024 * 1024;
transientHeapDesc.samplerDescriptorCount = 2048;
transientHeapDesc.uavDescriptorCount = 1000000;
transientHeapDesc.srvDescriptorCount = 1000000;
transientHeapDesc.constantBufferDescriptorCount = 1000000;
transientHeapDesc.accelerationStructureDescriptorCount = 1000000;
if (SLANG_FAILED(m_gfxDevice->createTransientResourceHeap(transientHeapDesc, m_transientResourceHeaps.writeRef()))) {
assert(!"Fail to create transient source heaps");
}
}

Device::~Device()
{
m_pProgramManager.reset();
m_gfxDevice.setNull();
m_transientResourceHeaps.setNull();
mpAPIDispatcher.reset();
}
173 changes: 118 additions & 55 deletions source/DeviceWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@
#include "Types.h"
#include "Object.h"
#include "ProgramManager.h"
#include <vulkan/vulkan.h>
#include <dlfcn.h>
#include "CpuTimer.h"

class ProgramManager;
class PipelineCreationAPIDispatcher;

class GFXDebugCallBack : public gfx::IDebugCallback
{
Expand All @@ -29,78 +34,134 @@ class GFXDebugCallBack : public gfx::IDebugCallback

static GFXDebugCallBack gGFXDebugCallBack; // TODO: REMOVEGLOBAL

class Device : public Object{
class PipelineCreationAPIDispatcher : public gfx::IPipelineCreationAPIDispatcher
{
public:
enum Type
{
Default, ///< Default device type, favors D3D12 over Vulkan.
D3D12,
Vulkan,
};
PipelineCreationAPIDispatcher() { }
~PipelineCreationAPIDispatcher() { }

Device()
double getPipelineCreationTime() {return m_timer.delta();}

virtual SLANG_NO_THROW SlangResult SLANG_MCALL queryInterface(SlangUUID const& uuid, void** outObject) override
{
slang::createGlobalSession(m_slangGlobalSession.writeRef());
m_pProgramManager = std::make_unique<ProgramManager>(this);

gfx::IDevice::Desc gfxDesc = {};
gfxDesc.deviceType = gfx::DeviceType::Vulkan;
gfxDesc.slang.slangGlobalSession = m_slangGlobalSession;
gfxDesc.shaderCache.maxEntryCount = 1000;
gfxDesc.shaderCache.shaderCachePath = nullptr;

std::vector<void*> extendedDescs;
// Add extended desc for root parameter attribute.
gfx::D3D12DeviceExtendedDesc extDesc = {};
extDesc.rootParameterShaderAttributeName = "root";
extendedDescs.push_back(&extDesc);

gfxDesc.extendedDescCount = extendedDescs.size();
gfxDesc.extendedDescs = extendedDescs.data();

gfx::AdapterList adapters = gfx::gfxGetAdapters(gfxDesc.deviceType);
if (adapters.getCount() == 0)
if (uuid == SlangUUID SLANG_UUID_IPipelineCreationAPIDispatcher)
{
assert(!"No GPU found");
*outObject = static_cast<gfx::IPipelineCreationAPIDispatcher*>(this);
return SLANG_OK;
}
return SLANG_E_NO_INTERFACE;
}

// Try to create device on specific GPU.
gfxDesc.adapterLUID = &adapters.getAdapters()[0].luid;
if (SLANG_FAILED(gfx::gfxCreateDevice(&gfxDesc, m_gfxDevice.writeRef())))
{
printf("Failed to create device on GPU 0 (%s).", adapters.getAdapters()[0].name);
}
// The lifetime of this dispatcher object will be managed by `Falcor::Device` so we don't need
// to actually implement reference counting here.
virtual SLANG_NO_THROW uint32_t SLANG_MCALL addRef() override { return 2; }

virtual SLANG_NO_THROW uint32_t SLANG_MCALL release() override { return 2; }

// This method will be called by the gfx layer to create an API object for a compute pipeline state.
virtual gfx::Result createComputePipelineState(
gfx::IDevice* device,
slang::IComponentType* program,
void* pipelineDesc,
void** outPipelineState
)
{
void* vulkanLibraryHandle = nullptr;
const char* dynamicLibraryName = "Unknown";
dynamicLibraryName = "libvulkan.so.1";
vulkanLibraryHandle = dlopen(dynamicLibraryName, RTLD_NOW);

gfx::IDevice::InteropHandles outHandles;
device->getNativeDeviceHandles(&outHandles);

VkInstance instance;
instance = (VkInstance)outHandles.handles[0].handleValue;

if (SLANG_FAILED(gfx::gfxSetDebugCallback(&gGFXDebugCallBack)))
VkDevice vkDevice;
vkDevice = (VkDevice)outHandles.handles[2].handleValue;

PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr = nullptr;
vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)dlsym(vulkanLibraryHandle, "vkGetInstanceProcAddr");
if (!vkGetInstanceProcAddr)
{
printf("Failed to setup debug callback\n");
assert(!"Fail to get instance proc address");
}
else

PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr = nullptr;
vkGetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)vkGetInstanceProcAddr(instance, "vkGetDeviceProcAddr");
if (!vkGetDeviceProcAddr)
{
gfx::gfxEnableDebugLayer();
assert(!"Fail to get device proc address");
}

// Otherwise try create device on any available GPU.
if (!m_gfxDevice)
PFN_vkCreateComputePipelines vkCreateComputePipelines = nullptr;
vkCreateComputePipelines = (PFN_vkCreateComputePipelines)vkGetDeviceProcAddr(vkDevice, "vkCreateComputePipelines");
if (!vkCreateComputePipelines)
{
gfxDesc.adapterLUID = nullptr;
if (SLANG_FAILED(gfx::gfxCreateDevice(&gfxDesc, m_gfxDevice.writeRef())))
assert(!"Failed to create device");
assert(!"Fail to vkCreateComputePipelines");
}

gfx::ITransientResourceHeap::Desc transientHeapDesc = {};
transientHeapDesc.flags = gfx::ITransientResourceHeap::Flags::AllowResizing;
transientHeapDesc.constantBufferSize = 16 * 1024 * 1024;
transientHeapDesc.samplerDescriptorCount = 2048;
transientHeapDesc.uavDescriptorCount = 1000000;
transientHeapDesc.srvDescriptorCount = 1000000;
transientHeapDesc.constantBufferDescriptorCount = 1000000;
transientHeapDesc.accelerationStructureDescriptorCount = 1000000;
if (SLANG_FAILED(m_gfxDevice->createTransientResourceHeap(transientHeapDesc, m_transientResourceHeaps.writeRef()))) {
assert(!"Fail to create transient source heaps");
}
m_timer.update();

VkPipelineCache pipelineCache = VK_NULL_HANDLE;
VkComputePipelineCreateInfo* pComputePipelineInfo = static_cast<VkComputePipelineCreateInfo*>(pipelineDesc);
VkPipeline pipeline;
vkCreateComputePipelines(
vkDevice, pipelineCache, 1, pComputePipelineInfo, nullptr, &pipeline);

*((VkPipeline*)outPipelineState) = pipeline;
m_timer.update();
return SLANG_OK;
}

// This method will be called by the gfx layer to create an API object for a graphics pipeline state.
virtual gfx::Result createGraphicsPipelineState(
gfx::IDevice* device,
slang::IComponentType* program,
void* pipelineDesc,
void** outPipelineState
)
{
return SLANG_OK;
}

virtual gfx::Result createMeshPipelineState(
gfx::IDevice* device,
slang::IComponentType* program,
void* pipelineDesc,
void** outPipelineState
)
{
assert(!"Mesh pipelines are not supported.");
return SLANG_OK;
}

// This method will be called by the gfx layer right before creating a ray tracing state object.
virtual gfx::Result beforeCreateRayTracingState(gfx::IDevice* device, slang::IComponentType* program)
{
return SLANG_OK;
}

// This method will be called by the gfx layer right after creating a ray tracing state object.
virtual gfx::Result afterCreateRayTracingState(gfx::IDevice* device, slang::IComponentType* program)
{
return SLANG_OK;
}
private:
CpuTimer m_timer;
};

class Device : public Object{
public:
enum Type
{
Default, ///< Default device type, favors D3D12 over Vulkan.
D3D12,
Vulkan,
};
Device();
~Device();

gfx::ITransientResourceHeap* getCurrentTransientResourceHeap()
{
return m_transientResourceHeaps.get();
Expand All @@ -116,10 +177,12 @@ class Device : public Object{
gfx::IDevice* getGfxDevice() const { return m_gfxDevice; }
Type getType() const { return m_type; }

double getPipelineCreationTime() {return mpAPIDispatcher->getPipelineCreationTime();}
private:
Slang::ComPtr<slang::IGlobalSession> m_slangGlobalSession;
Slang::ComPtr<gfx::IDevice> m_gfxDevice;
Slang::ComPtr<gfx::ITransientResourceHeap> m_transientResourceHeaps;
Type m_type {Vulkan};
std::unique_ptr<ProgramManager> m_pProgramManager;
std::unique_ptr<PipelineCreationAPIDispatcher> mpAPIDispatcher;
};
24 changes: 15 additions & 9 deletions source/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,26 @@ void TestCase(ref<Device>& device)

ref<Program> pProg = Program::create(device, desc, defines);

std::vector<std::string> backendName = {"glslang", "slang"};
for (uint32_t i = 0; i < 2; i++)
{
CpuTimer timer;
timer.update();
// Each set of pair of `Macro defines` and `Type conformance object` can define
// one version of program.
printf("Start creating program versions\n");
const ref<const ProgramVersion>& progVersion = pProg->getActiveVersion();
timer.update();
double programVersionTime = timer.delta();
printf("Time for program version creation (%s): %.3fs\n", backendName[i].c_str(), programVersionTime);

std::string log;
ref<const ProgramKernels> programKernel = device->getProgramManager()->createProgramKernels(*pProg, *progVersion, log);
const EntryPointKernel* entryPointKernel = programKernel->getKernel(ShaderType::Compute);
timer.update();
double programKernelTime = timer.delta();
printf("Time for program kernel creation (%s): %.3fs\n", backendName[i].c_str(), programKernelTime);
printf("Time for frontend execution:%.3fs\n", programKernelTime + programVersionTime);

Slang::ComPtr<gfx::IShaderObject> shaderObject;
SlangResult res = device->getGfxDevice()->createMutableRootShaderObject(programKernel->getGfxProgram(), shaderObject.writeRef());
Expand All @@ -99,22 +109,18 @@ void TestCase(ref<Device>& device)
res = computeCommandEncoder->bindPipelineWithRootObject(gfxPipelineState, shaderObject);
ASSERT_EQ(res, SLANG_OK, "bindPipelineWithRootObject");

CpuTimer timer;
timer.update();

res = computeCommandEncoder->dispatchCompute(0, 0, 0);
ASSERT_EQ(res, SLANG_OK, "create pipeline");

timer.update();
double time = timer.delta();
if (i == 0)
{
printf("Time for compiling spirv generated by glslang: %.3fs\n\n\n", time);
}
else
{
printf("Time for compiling spirv generated by slang: %.3fs\n\n\n", time);
}

double pipelineCreationTime = device->getPipelineCreationTime();
double spirvGenerationTime = time - pipelineCreationTime;
printf("Time for spirv generation by %s: %.3fs\n", backendName[i].c_str(), spirvGenerationTime);
printf("Time for compiling spirv generated by %s: %.3fs\n\n\n", backendName[i].c_str(), pipelineCreationTime);

device->getProgramManager()->reloadAllPrograms();
device->getProgramManager()->setSpirvDirectMode(true);
Expand Down

0 comments on commit a7f71b6

Please sign in to comment.