Skip to content

Commit

Permalink
Added support for frames in flight limit in the PyTorch module.
Browse files Browse the repository at this point in the history
  • Loading branch information
chrismile committed Nov 17, 2023
1 parent 29047e4 commit 0faf7f7
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 17 deletions.
52 changes: 35 additions & 17 deletions src/PyTorch/VolumetricPathTracingModuleRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,9 +278,10 @@ void VolumetricPathTracingModuleRenderer::createCommandStructures(uint32_t numFr
interFrameSemaphores = {};
timelineValue = 0;

// TODO: Use swapchain-like structure where we have N images in flight.
size_t numImages = numFrames;
// Use swapchain-like structure where we have N images in flight.
uint32_t numImages = std::min(numFrames, maxNumFramesInFlight);
commandBuffers.clear();
frameFences.clear();
sgl::vk::CommandPoolType commandPoolType;
commandPoolType.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
#ifdef USE_TIMELINE_SEMAPHORES
Expand All @@ -294,17 +295,18 @@ void VolumetricPathTracingModuleRenderer::createCommandStructures(uint32_t numFr
renderReadySemaphore = std::make_shared<sgl::vk::SemaphoreVkCudaDriverApiInterop>(device);
denoiseFinishedSemaphore = std::make_shared<sgl::vk::SemaphoreVkCudaDriverApiInterop>(device);
#endif
for (size_t frameIdx = 0; frameIdx < numImages; frameIdx++) {
for (uint32_t frameIdx = 0; frameIdx < numImages; frameIdx++) {
commandBuffers.push_back(std::make_shared<sgl::vk::CommandBuffer>(device, commandPoolType));
}
for (size_t frameIdx = 0; frameIdx < numImages - 1; frameIdx++) {
for (uint32_t frameIdx = 0; frameIdx < numImages; frameIdx++) {
#ifdef USE_TIMELINE_SEMAPHORES
interFrameSemaphores.push_back(std::make_shared<sgl::vk::Semaphore>(
device, 0, VK_SEMAPHORE_TYPE_TIMELINE,
timelineValue));
#else
interFrameSemaphores.push_back(std::make_shared<sgl::vk::Semaphore>(device));
#endif
frameFences.push_back(std::make_shared<sgl::vk::Fence>(device, VK_FENCE_CREATE_SIGNALED_BIT));
}

//std::cout << "Done creating new command structures "<<numFrames<<std::endl;
Expand Down Expand Up @@ -404,8 +406,9 @@ float* VolumetricPathTracingModuleRenderer::renderFrameCuda(uint32_t numFrames)
"sgl::vk::getIsCudaDeviceApiFunctionTableInitialized() returned false.", false);
}

if (size_t(numFrames) > commandBuffers.size() || size_t(numFrames) > interFrameSemaphores.size() + 1) {
this->createCommandStructures(numFrames);
uint32_t numImages = std::min(numFrames, maxNumFramesInFlight);
if (size_t(numImages) > commandBuffers.size()) {
this->createCommandStructures(numImages);

//sgl::Logfile::get()->throwError(
// "Error in VolumetricPathTracingModuleRenderer::renderFrameCuda: Frame data was not allocated.",
Expand All @@ -415,30 +418,50 @@ float* VolumetricPathTracingModuleRenderer::renderFrameCuda(uint32_t numFrames)
cudaStream_t stream = at::cuda::getCurrentCUDAStream();

timelineValue++;
uint64_t waitValue = timelineValue;

#ifdef USE_TIMELINE_SEMAPHORES
renderReadySemaphore->signalSemaphoreCuda(stream, timelineValue);
#else
renderReadySemaphores->signalSemaphoreCuda(stream);
#endif

for (uint32_t frameIndex = 0; frameIndex < numFrames; frameIndex++) {
sgl::vk::CommandBufferPtr commandBuffer = commandBuffers.at(frameIndex);
const uint32_t imageIndex = frameIndex % maxNumFramesInFlight;
auto& fence = frameFences.at(imageIndex);
if (frameIndex == maxNumFramesInFlight) {
sgl::vk::g_cudaDeviceApiFunctionTable.cuStreamSynchronize(stream);
}
if (frameIndex != 0 && imageIndex == 0) {
waitValue = timelineValue;
timelineValue++;
}
fence->wait();
fence->reset();

sgl::vk::CommandBufferPtr commandBuffer = commandBuffers.at(imageIndex);
sgl::vk::SemaphorePtr waitSemaphore;
sgl::vk::SemaphorePtr signalSemaphore;
VkPipelineStageFlags waitStage;
if (frameIndex == 0) {
waitSemaphore = renderReadySemaphore;
waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
} else {
waitSemaphore = interFrameSemaphores.at(frameIndex - 1);
waitSemaphore = interFrameSemaphores.at((imageIndex + maxNumFramesInFlight - 1) % maxNumFramesInFlight);
waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
}
if (frameIndex == numFrames - 1) {
signalSemaphore = renderFinishedSemaphore;
} else {
signalSemaphore = interFrameSemaphores.at(frameIndex);
signalSemaphore = interFrameSemaphores.at(imageIndex);
}
#ifdef USE_TIMELINE_SEMAPHORES
waitSemaphore->setWaitSemaphoreValue(timelineValue);
waitSemaphore->setWaitSemaphoreValue(waitValue);
signalSemaphore->setSignalSemaphoreValue(timelineValue);
#endif
commandBuffer->pushWaitSemaphore(waitSemaphore, waitStage);
commandBuffer->pushSignalSemaphore(signalSemaphore);
commandBuffer->setFence(fence);

renderer->pushCommandBuffer(commandBuffer);
renderer->beginCommandBuffer();
Expand All @@ -455,14 +478,9 @@ float* VolumetricPathTracingModuleRenderer::renderFrameCuda(uint32_t numFrames)
}

renderer->endCommandBuffer();
}
renderer->submitToQueue();

#ifdef USE_TIMELINE_SEMAPHORES
renderReadySemaphore->signalSemaphoreCuda(stream, timelineValue);
#else
renderReadySemaphores->signalSemaphoreCuda(stream);
#endif
renderer->submitToQueue();
}

#ifdef USE_TIMELINE_SEMAPHORES
renderFinishedSemaphore->waitSemaphoreCuda(stream, timelineValue);
Expand Down
2 changes: 2 additions & 0 deletions src/PyTorch/VolumetricPathTracingModuleRenderer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,9 @@ class VolumetricPathTracingModuleRenderer {
sgl::vk::BufferPtr outputImageBufferVk;
sgl::vk::BufferCudaDriverApiExternalMemoryVkPtr outputImageBufferCu;
// Synchronization primitives.
const uint32_t maxNumFramesInFlight = 32;
std::vector<sgl::vk::CommandBufferPtr> commandBuffers;
std::vector<sgl::vk::FencePtr> frameFences;
sgl::vk::SemaphoreVkCudaDriverApiInteropPtr renderReadySemaphore;
sgl::vk::SemaphoreVkCudaDriverApiInteropPtr renderFinishedSemaphore;
std::vector<sgl::vk::SemaphorePtr> interFrameSemaphores;
Expand Down

0 comments on commit 0faf7f7

Please sign in to comment.