diff --git a/layer_gpu_support/source/device.cpp b/layer_gpu_support/source/device.cpp index 3f0bd21..83f5208 100644 --- a/layer_gpu_support/source/device.cpp +++ b/layer_gpu_support/source/device.cpp @@ -93,4 +93,25 @@ Device::Device( device(_device) { initDriverDeviceDispatchTable(device, nlayerGetProcAddress, driver); + + VkSemaphoreTypeCreateInfo timelineCreateInfo { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, + .pNext = nullptr, + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, + .initialValue = queueSerializationTimelineSemCount + }; + + VkSemaphoreCreateInfo createInfo { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &timelineCreateInfo, + .flags = 0 + }; + + auto result = driver.vkCreateSemaphore( + device, &createInfo, nullptr, &queueSerializationTimelineSem); + if (result != VK_SUCCESS) + { + LAYER_ERR("Failed vkCreateSemaphore() for queue serialization"); + queueSerializationTimelineSem = nullptr; + } } diff --git a/layer_gpu_support/source/device.hpp b/layer_gpu_support/source/device.hpp index cc4b16b..bb5c959 100644 --- a/layer_gpu_support/source/device.hpp +++ b/layer_gpu_support/source/device.hpp @@ -147,6 +147,16 @@ class Device */ static const std::vector extraExtensions; + /** + * @brief The timeline sem use for queue serialization. + */ + VkSemaphore queueSerializationTimelineSem { nullptr }; + + /** + * @brief The current timeline sem target value the next use waits for. + */ + uint64_t queueSerializationTimelineSemCount { 0 }; + private: /** * @brief The physical device this device is created with. diff --git a/layer_gpu_support/source/layer_device_functions_queue.cpp b/layer_gpu_support/source/layer_device_functions_queue.cpp index 01f4c25..2a99868 100644 --- a/layer_gpu_support/source/layer_device_functions_queue.cpp +++ b/layer_gpu_support/source/layer_device_functions_queue.cpp @@ -44,9 +44,65 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(queue); + // Serialize in the order submits are called + // TODO: This assumes a forward progress guarantee which is no longer + // guaranteed if the user is using timeline semaphores for syncs + + const uint64_t waitValue = layer->queueSerializationTimelineSemCount; + layer->queueSerializationTimelineSemCount++; + const uint64_t signalValue = layer->queueSerializationTimelineSemCount; + + VkPipelineStageFlags waitMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + + VkTimelineSemaphoreSubmitInfo timelineInfo { + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &waitValue, + .signalSemaphoreValueCount = 1, + .pSignalSemaphoreValues = &signalValue + }; + + VkSubmitInfo submitInfoPre { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timelineInfo, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &(layer->queueSerializationTimelineSem), + .pWaitDstStageMask = &waitMask, + .commandBufferCount = 0, + .pCommandBuffers = 0, + .signalSemaphoreCount = 0, + .pSignalSemaphores = nullptr + }; + + VkSubmitInfo submitInfoPost { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = &timelineInfo, + .waitSemaphoreCount = 0, + .pWaitSemaphores = 0, + .pWaitDstStageMask = 0, + .commandBufferCount = 0, + .pCommandBuffers = 0, + .signalSemaphoreCount = 1, + .pSignalSemaphores = &(layer->queueSerializationTimelineSem) + }; + // Release the lock to call into the driver lock.unlock(); - return layer->driver.vkQueueSubmit(queue, submitCount, pSubmits, fence); + + if (layer->instance->config.serialize_queue()) + { + layer->driver.vkQueueSubmit(queue, 1, &submitInfoPre, VK_NULL_HANDLE); + } + + auto result = layer->driver.vkQueueSubmit(queue, submitCount, pSubmits, fence); + + if (layer->instance->config.serialize_queue()) + { + layer->driver.vkQueueSubmit(queue, 1, &submitInfoPost, VK_NULL_HANDLE); + } + + return result; } /* See Vulkan API for documentation. */ @@ -63,9 +119,73 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(queue); + // Serialize in the order submits are called + // TODO: This assumes a forward progress guarantee which is no longer + // guaranteed if the user is using timeline semaphores for syncs + + const uint64_t waitValue = layer->queueSerializationTimelineSemCount; + layer->queueSerializationTimelineSemCount++; + const uint64_t signalValue = layer->queueSerializationTimelineSemCount; + + VkSemaphoreSubmitInfo timelineInfoPre { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .semaphore = layer->queueSerializationTimelineSem, + .value = waitValue, + .stageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + .deviceIndex = 0 + }; + + VkSemaphoreSubmitInfo timelineInfoPost { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .semaphore = layer->queueSerializationTimelineSem, + .value = signalValue, + .stageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + .deviceIndex = 0 + }; + + VkSubmitInfo2 submitInfoPre { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .pNext = nullptr, + .flags = 0, + .waitSemaphoreInfoCount = 1, + .pWaitSemaphoreInfos = &timelineInfoPre, + .commandBufferInfoCount = 0, + .pCommandBufferInfos = nullptr, + .signalSemaphoreInfoCount = 0, + .pSignalSemaphoreInfos = nullptr + }; + + VkSubmitInfo2 submitInfoPost { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .pNext = nullptr, + .flags = 0, + .waitSemaphoreInfoCount = 0, + .pWaitSemaphoreInfos = nullptr, + .commandBufferInfoCount = 0, + .pCommandBufferInfos = nullptr, + .signalSemaphoreInfoCount = 1, + .pSignalSemaphoreInfos = &timelineInfoPost + }; + // Release the lock to call into the driver lock.unlock(); - return layer->driver.vkQueueSubmit2(queue, submitCount, pSubmits, fence); + + if (layer->instance->config.serialize_queue()) + { + layer->driver.vkQueueSubmit2(queue, 1, &submitInfoPre, VK_NULL_HANDLE); + } + + auto result = layer->driver.vkQueueSubmit2(queue, submitCount, pSubmits, fence); + + if (layer->instance->config.serialize_queue()) + { + layer->driver.vkQueueSubmit2(queue, 1, &submitInfoPost, VK_NULL_HANDLE); + } + + return result; + } /* See Vulkan API for documentation. */ @@ -82,7 +202,70 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueueSubmit2KHR( std::unique_lock lock { g_vulkanLock }; auto* layer = Device::retrieve(queue); + // Serialize in the order submits are called + // TODO: This assumes a forward progress guarantee which is no longer + // guaranteed if the user is using timeline semaphores for syncs + + const uint64_t waitValue = layer->queueSerializationTimelineSemCount; + layer->queueSerializationTimelineSemCount++; + const uint64_t signalValue = layer->queueSerializationTimelineSemCount; + + VkSemaphoreSubmitInfo timelineInfoPre { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .semaphore = layer->queueSerializationTimelineSem, + .value = waitValue, + .stageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + .deviceIndex = 0 + }; + + VkSemaphoreSubmitInfo timelineInfoPost { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .semaphore = layer->queueSerializationTimelineSem, + .value = signalValue, + .stageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + .deviceIndex = 0 + }; + + VkSubmitInfo2 submitInfoPre { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .pNext = nullptr, + .flags = 0, + .waitSemaphoreInfoCount = 1, + .pWaitSemaphoreInfos = &timelineInfoPre, + .commandBufferInfoCount = 0, + .pCommandBufferInfos = nullptr, + .signalSemaphoreInfoCount = 0, + .pSignalSemaphoreInfos = nullptr + }; + + VkSubmitInfo2 submitInfoPost { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .pNext = nullptr, + .flags = 0, + .waitSemaphoreInfoCount = 0, + .pWaitSemaphoreInfos = nullptr, + .commandBufferInfoCount = 0, + .pCommandBufferInfos = nullptr, + .signalSemaphoreInfoCount = 1, + .pSignalSemaphoreInfos = &timelineInfoPost + }; + // Release the lock to call into the driver lock.unlock(); - return layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence); + + if (layer->instance->config.serialize_queue()) + { + layer->driver.vkQueueSubmit2KHR(queue, 1, &submitInfoPre, VK_NULL_HANDLE); + } + + auto result = layer->driver.vkQueueSubmit2KHR(queue, submitCount, pSubmits, fence); + + if (layer->instance->config.serialize_queue()) + { + layer->driver.vkQueueSubmit2KHR(queue, 1, &submitInfoPost, VK_NULL_HANDLE); + } + + return result; } diff --git a/source_common/framework/manual_functions.cpp b/source_common/framework/manual_functions.cpp index 6cd1acb..e56dda0 100644 --- a/source_common/framework/manual_functions.cpp +++ b/source_common/framework/manual_functions.cpp @@ -909,9 +909,10 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkCreateDevice_default( return res; } + auto device = std::make_unique(layer, physicalDevice, *pDevice, fpGetDeviceProcAddr); + // Retake the lock to access layer-wide global store lock.lock(); - auto device = std::make_unique(layer, physicalDevice, *pDevice, fpGetDeviceProcAddr); Device::store(*pDevice, std::move(device)); return VK_SUCCESS;