diff --git a/libs/vkd3d/swapchain.c b/libs/vkd3d/swapchain.c index 4c72df1788..18b558f1d1 100644 --- a/libs/vkd3d/swapchain.c +++ b/libs/vkd3d/swapchain.c @@ -140,8 +140,10 @@ struct dxgi_vk_swap_chain /* For blits. Use simple VkFences since we have to use binary semaphores with WSI release anyways. * We don't need to wait on these fences on main thread. */ - VkCommandPool vk_blit_command_pool; - VkCommandBuffer vk_blit_command_buffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; + VkCommandPool vk_graphics_blit_command_pool; + VkCommandBuffer vk_graphics_blit_command_buffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; + VkCommandPool vk_compute_blit_command_pool; + VkCommandBuffer vk_compute_blit_command_buffers[DXGI_MAX_SWAP_CHAIN_BUFFERS]; uint64_t backbuffer_blit_timelines[DXGI_MAX_SWAP_CHAIN_BUFFERS]; VkSwapchainKHR vk_swapchain; @@ -164,7 +166,8 @@ struct dxgi_vk_swap_chain uint32_t backbuffer_count; VkFormat backbuffer_format; - struct vkd3d_swapchain_info pipeline; + struct vkd3d_swapchain_info graphics_pipeline; + struct vkd3d_swapchain_info compute_pipeline; uint32_t is_occlusion_state; /* Updated atomically. */ @@ -388,6 +391,7 @@ static void dxgi_vk_swap_chain_drain_queue(struct dxgi_vk_swap_chain *chain) /* Wait for pending blits to complete on the GPU */ dxgi_vk_swap_chain_drain_complete_semaphore(chain, chain->user.present_count); + dxgi_vk_swap_chain_drain_internal_blit_semaphore(chain, chain->present.internal_blit_count); if (chain->swapchain_maintenance1) dxgi_vk_swap_chain_drain_swapchain_fences(chain); @@ -465,7 +469,8 @@ static void dxgi_vk_swap_chain_cleanup_common(struct dxgi_vk_swap_chain *chain) VK_CALL(vkDestroySemaphore(chain->queue->device->vk_device, chain->present.vk_internal_blit_semaphore, NULL)); VK_CALL(vkDestroySemaphore(chain->queue->device->vk_device, chain->present.vk_complete_semaphore, NULL)); - VK_CALL(vkDestroyCommandPool(chain->queue->device->vk_device, chain->present.vk_blit_command_pool, NULL)); + VK_CALL(vkDestroyCommandPool(chain->queue->device->vk_device, chain->present.vk_graphics_blit_command_pool, NULL)); + VK_CALL(vkDestroyCommandPool(chain->queue->device->vk_device, chain->present.vk_compute_blit_command_pool, NULL)); for (i = 0; i < ARRAY_SIZE(chain->present.vk_release_semaphores); i++) VK_CALL(vkDestroySemaphore(chain->queue->device->vk_device, chain->present.vk_release_semaphores[i], NULL)); for (i = 0; i < ARRAY_SIZE(chain->present.vk_backbuffer_image_views); i++) @@ -1340,22 +1345,38 @@ static HRESULT dxgi_vk_swap_chain_create_surface(struct dxgi_vk_swap_chain *chai * This way we can hide the blit overhead and also avoid false work spilling into main queue. * This is vitally important to make FSR3 FG work well. * NVIDIA has more than one graphics queue, so the assumption is that NVIDIA can deal with it as-is - * since FSR3 creates a different queue for frame-gen presentation. */ + * since FSR3 creates a different queue for frame-gen presentation. + * Only attempt this if the application is asking for an "unusual" priority, since this + * implies some out of order shenanigans. */ family_info = chain->queue->device->queue_families[VKD3D_QUEUE_FAMILY_COMPUTE]; - if (chain->queue->device->queue_families[VKD3D_QUEUE_FAMILY_GRAPHICS]->queue_count == 1 && + if (chain->queue->desc.Priority >= D3D12_COMMAND_QUEUE_PRIORITY_HIGH && + chain->queue->device->queue_families[VKD3D_QUEUE_FAMILY_GRAPHICS]->queue_count == 1 && family_info->vk_family_index != chain->queue->vkd3d_queue->vk_family_index && VK_CALL(vkGetPhysicalDeviceSurfaceSupportKHR(vk_physical_device, family_info->vk_family_index, chain->vk_surface, &supported)) == VK_SUCCESS && supported) { /* Async compute presentation. */ const VkImageUsageFlags required_usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + VkFormatProperties2 props2; + VkFormatProperties3 props3; VkSurfaceCapabilitiesKHR caps; + props2.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2; + props2.pNext = &props3; + props3.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3; + props3.pNext = NULL; + + /* BGRA8 storage image needs special query. Don't bother with async present if this is not supported. */ + VK_CALL(vkGetPhysicalDeviceFormatProperties2(vk_physical_device, VK_FORMAT_B8G8R8A8_UNORM, &props2)); + VK_CALL(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk_physical_device, chain->vk_surface, &caps)); + /* Assume that image usage flags won't randomly change under us. * That would just be extremely weird, and all relevant drivers support this anyway. */ - VK_CALL(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk_physical_device, chain->vk_surface, &caps)); - if ((caps.supportedUsageFlags & required_usage) == required_usage) + if ((props3.optimalTilingFeatures & VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT) && + (caps.supportedUsageFlags & required_usage) == required_usage) + { chain->async_queue = d3d12_device_allocate_vkd3d_queue(family_info, NULL); + } if (chain->async_queue) INFO("Using async compute presentation.\n"); @@ -1650,7 +1671,12 @@ static void dxgi_vk_swap_chain_init_blit_pipeline(struct dxgi_vk_swap_chain *cha key.filter = chain->desc.Scaling == DXGI_SCALING_NONE ? VK_FILTER_NEAREST : VK_FILTER_LINEAR; key.format = chain->present.backbuffer_format; - if (FAILED(hr = vkd3d_meta_get_swapchain_pipeline(&device->meta_ops, &key, &chain->present.pipeline))) + if (FAILED(hr = vkd3d_meta_get_swapchain_pipeline(&device->meta_ops, &key, &chain->present.graphics_pipeline))) + ERR("Failed to initialize swapchain pipeline.\n"); + + key.bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + key.format = VK_FORMAT_UNDEFINED; + if (FAILED(hr = vkd3d_meta_get_swapchain_pipeline(&device->meta_ops, &key, &chain->present.compute_pipeline))) ERR("Failed to initialize swapchain pipeline.\n"); } @@ -1961,13 +1987,22 @@ static void dxgi_vk_swap_chain_recreate_swapchain_in_present_task(struct dxgi_vk chain->present.backbuffer_format = swapchain_create_info.imageFormat; chain->present.current_backbuffer_index = UINT32_MAX; - if (!chain->present.vk_blit_command_pool) + if (!chain->present.vk_graphics_blit_command_pool) { command_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; command_pool_create_info.pNext = NULL; command_pool_create_info.queueFamilyIndex = chain->queue->vkd3d_queue->vk_family_index; command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - VK_CALL(vkCreateCommandPool(vk_device, &command_pool_create_info, NULL, &chain->present.vk_blit_command_pool)); + VK_CALL(vkCreateCommandPool(vk_device, &command_pool_create_info, NULL, &chain->present.vk_graphics_blit_command_pool)); + } + + if (!chain->present.vk_compute_blit_command_pool && chain->async_queue) + { + command_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_create_info.pNext = NULL; + command_pool_create_info.queueFamilyIndex = chain->async_queue->vk_family_index; + command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + VK_CALL(vkCreateCommandPool(vk_device, &command_pool_create_info, NULL, &chain->present.vk_compute_blit_command_pool)); } dxgi_vk_swap_chain_init_blit_pipeline(chain); @@ -2031,6 +2066,138 @@ static void dxgi_vk_swap_chain_present_signal_blit_semaphore(struct dxgi_vk_swap } } +static void dxgi_vk_swap_chain_record_compute_blit(struct dxgi_vk_swap_chain *chain, VkCommandBuffer vk_cmd, uint32_t swapchain_index) +{ + const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; + VkImageMemoryBarrier2 image_barrier; + struct d3d12_resource *resource; + VkDependencyInfo dep_info; + bool blank_present; + + /* TODO: compute blit in fallback scenarios. */ + resource = chain->user.backbuffers[chain->request.user_index]; + blank_present = vkd3d_atomic_uint32_load_explicit(&resource->initial_layout_transition, vkd3d_memory_order_relaxed) != 0; + + if (blank_present) + WARN("Application is presenting user index %u, but it has never been rendered to.\n", chain->request.user_index); + + memset(&dep_info, 0, sizeof(dep_info)); + dep_info.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO; + dep_info.imageMemoryBarrierCount = 1; + dep_info.pImageMemoryBarriers = &image_barrier; + + memset(&image_barrier, 0, sizeof(image_barrier)); + image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2; + + if (blank_present) + { + image_barrier.srcStageMask = VK_PIPELINE_STAGE_2_CLEAR_BIT; + image_barrier.dstStageMask = VK_PIPELINE_STAGE_2_CLEAR_BIT; + image_barrier.dstAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT; + image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + } + else + { + image_barrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; + image_barrier.dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; + image_barrier.dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT; + image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + image_barrier.image = chain->present.vk_backbuffer_images[swapchain_index]; + image_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + image_barrier.subresourceRange.levelCount = 1; + image_barrier.subresourceRange.layerCount = 1; + + VK_CALL(vkCmdPipelineBarrier2(vk_cmd, &dep_info)); + + if (blank_present) + { + VkClearColorValue clear_value; + VkImageSubresourceRange range; + + memset(&clear_value, 0, sizeof(clear_value)); + range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + range.baseArrayLayer = 0; + range.baseMipLevel = 0; + range.layerCount = 1; + range.levelCount = 1; + + VK_CALL(vkCmdClearColorImage(vk_cmd, + image_barrier.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + &clear_value, 1, &range)); + + image_barrier.srcStageMask = VK_PIPELINE_STAGE_2_CLEAR_BIT; + image_barrier.srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT; + } + else + { + VkDescriptorImageInfo image_info[2]; + VkWriteDescriptorSet write_info[2]; + float inv_resolution[2]; + + inv_resolution[0] = 1.0f / (float)chain->desc.Width; + inv_resolution[1] = 1.0f / (float)chain->desc.Height; + + write_info[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_info[0].pNext = NULL; + write_info[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + write_info[0].pBufferInfo = NULL; + write_info[0].dstSet = VK_NULL_HANDLE; + write_info[0].pTexelBufferView = NULL; + write_info[0].pImageInfo = &image_info[0]; + write_info[0].dstBinding = 0; + write_info[0].dstArrayElement = 0; + write_info[0].descriptorCount = 1; + + write_info[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_info[1].pNext = NULL; + write_info[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + write_info[1].pBufferInfo = NULL; + write_info[1].dstSet = VK_NULL_HANDLE; + write_info[1].pTexelBufferView = NULL; + write_info[1].pImageInfo = &image_info[1]; + write_info[1].dstBinding = 1; + write_info[1].dstArrayElement = 0; + write_info[1].descriptorCount = 1; + + image_info[0].imageView = chain->user.vk_image_views[chain->request.user_index]; + image_info[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image_info[0].sampler = VK_NULL_HANDLE; + + image_info[1].imageView = chain->present.vk_backbuffer_image_views[swapchain_index]; + image_info[1].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + image_info[1].sampler = VK_NULL_HANDLE; + + VK_CALL(vkCmdBindPipeline(vk_cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + chain->present.compute_pipeline.vk_pipeline)); + + VK_CALL(vkCmdPushConstants(vk_cmd, + chain->present.compute_pipeline.vk_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(inv_resolution), inv_resolution)); + + VK_CALL(vkCmdPushDescriptorSetKHR(vk_cmd, VK_PIPELINE_BIND_POINT_COMPUTE, + chain->present.compute_pipeline.vk_pipeline_layout, 0, ARRAY_SIZE(write_info), write_info)); + + VK_CALL(vkCmdDispatch(vk_cmd, + (chain->present.backbuffer_width + 7) / 8, + (chain->present.backbuffer_height + 7) / 8, + 1)); + + image_barrier.srcStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; + image_barrier.srcAccessMask = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT; + } + + image_barrier.dstStageMask = VK_PIPELINE_STAGE_2_NONE; + image_barrier.dstAccessMask = VK_ACCESS_2_NONE; + image_barrier.oldLayout = image_barrier.newLayout; + image_barrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + VK_CALL(vkCmdPipelineBarrier2(vk_cmd, &dep_info)); +} + static void dxgi_vk_swap_chain_record_render_pass(struct dxgi_vk_swap_chain *chain, VkCommandBuffer vk_cmd, uint32_t swapchain_index) { const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; @@ -2127,7 +2294,7 @@ static void dxgi_vk_swap_chain_record_render_pass(struct dxgi_vk_swap_chain *cha VK_CALL(vkCmdSetViewport(vk_cmd, 0, 1, &viewport)); VK_CALL(vkCmdSetScissor(vk_cmd, 0, 1, &rendering_info.renderArea)); VK_CALL(vkCmdBindPipeline(vk_cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, - chain->present.pipeline.vk_pipeline)); + chain->present.graphics_pipeline.vk_pipeline)); write_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; write_info.pNext = NULL; @@ -2144,7 +2311,7 @@ static void dxgi_vk_swap_chain_record_render_pass(struct dxgi_vk_swap_chain *cha image_info.sampler = VK_NULL_HANDLE; VK_CALL(vkCmdPushDescriptorSetKHR(vk_cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, - chain->present.pipeline.vk_pipeline_layout, 0, 1, &write_info)); + chain->present.graphics_pipeline.vk_pipeline_layout, 0, 1, &write_info)); VK_CALL(vkCmdDraw(vk_cmd, 3, 1, 0, 0)); } @@ -2171,14 +2338,16 @@ static bool dxgi_vk_swap_chain_submit_blit(struct dxgi_vk_swap_chain *chain, uin { const struct vkd3d_vk_device_procs *vk_procs = &chain->queue->device->vk_procs; VkDevice vk_device = chain->queue->device->vk_device; - VkSemaphoreSubmitInfo signal_semaphore_info[2]; + VkSemaphoreSubmitInfo signal_semaphore_info[3]; + VkSemaphoreSubmitInfo wait_semaphore_info[2]; VkSemaphoreCreateInfo semaphore_create_info; - VkSemaphoreSubmitInfo wait_semaphore_info; VkCommandBufferAllocateInfo allocate_info; VkCommandBufferSubmitInfo cmd_buffer_info; VkCommandBufferBeginInfo cmd_begin_info; + VkCommandBuffer *blit_command_buffers; VkSubmitInfo2 submit_infos[2]; VkCommandBuffer vk_cmd; + bool async_compute; VkQueue vk_queue; VkResult vr; @@ -2197,15 +2366,29 @@ static bool dxgi_vk_swap_chain_submit_blit(struct dxgi_vk_swap_chain *chain, uin } } - if (!chain->present.vk_blit_command_buffers[swapchain_index]) + /* Only opt-in to async compute if we don't have to handle borders in any way. + * We could be smarter about ASPECT_RATIO_STRETCH, but eeeeeeeh, noone uses that (famous last words). */ + async_compute = chain->async_queue != NULL && (chain->desc.Scaling == DXGI_SCALING_STRETCH || + (chain->desc.Width == chain->present.backbuffer_width && + chain->desc.Height == chain->present.backbuffer_height)); + + blit_command_buffers = async_compute ? + chain->present.vk_compute_blit_command_buffers : + chain->present.vk_graphics_blit_command_buffers; + + if (!blit_command_buffers[swapchain_index]) { memset(&allocate_info, 0, sizeof(allocate_info)); allocate_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocate_info.commandBufferCount = 1; - allocate_info.commandPool = chain->present.vk_blit_command_pool; + + allocate_info.commandPool = async_compute ? + chain->present.vk_compute_blit_command_pool : + chain->present.vk_graphics_blit_command_pool; + allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; vr = VK_CALL(vkAllocateCommandBuffers(vk_device, &allocate_info, - &chain->present.vk_blit_command_buffers[swapchain_index])); + &blit_command_buffers[swapchain_index])); if (vr < 0) { ERR("Failed to allocate command buffers, vr %d\n", vr); @@ -2215,33 +2398,31 @@ static bool dxgi_vk_swap_chain_submit_blit(struct dxgi_vk_swap_chain *chain, uin dxgi_vk_swap_chain_drain_internal_blit_semaphore(chain, chain->present.backbuffer_blit_timelines[swapchain_index]); - vk_cmd = chain->present.vk_blit_command_buffers[swapchain_index]; + vk_cmd = blit_command_buffers[swapchain_index]; VK_CALL(vkResetCommandBuffer(vk_cmd, 0)); memset(&cmd_begin_info, 0, sizeof(cmd_begin_info)); cmd_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; cmd_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; VK_CALL(vkBeginCommandBuffer(vk_cmd, &cmd_begin_info)); - dxgi_vk_swap_chain_record_render_pass(chain, vk_cmd, swapchain_index); + if (async_compute) + dxgi_vk_swap_chain_record_compute_blit(chain, vk_cmd, swapchain_index); + else + dxgi_vk_swap_chain_record_render_pass(chain, vk_cmd, swapchain_index); VK_CALL(vkEndCommandBuffer(vk_cmd)); assert(chain->present.acquire_semaphore_signalled[chain->present.acquire_semaphore_index]); - memset(&wait_semaphore_info, 0, sizeof(wait_semaphore_info)); - wait_semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO; - wait_semaphore_info.semaphore = chain->present.vk_acquire_semaphore[chain->present.acquire_semaphore_index]; - wait_semaphore_info.stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + memset(&wait_semaphore_info[0], 0, sizeof(wait_semaphore_info[0])); + wait_semaphore_info[0].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO; + wait_semaphore_info[0].semaphore = chain->present.vk_acquire_semaphore[chain->present.acquire_semaphore_index]; + wait_semaphore_info[0].stageMask = + async_compute ? VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT : VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; memset(signal_semaphore_info, 0, sizeof(signal_semaphore_info)); signal_semaphore_info[0].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO; signal_semaphore_info[0].semaphore = chain->present.vk_release_semaphores[swapchain_index]; signal_semaphore_info[0].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; - chain->present.internal_blit_count += 1; - signal_semaphore_info[1].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO; - signal_semaphore_info[1].semaphore = chain->present.vk_internal_blit_semaphore; - signal_semaphore_info[1].value = chain->present.internal_blit_count; - signal_semaphore_info[1].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; - memset(&cmd_buffer_info, 0, sizeof(cmd_buffer_info)); cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO; cmd_buffer_info.commandBuffer = vk_cmd; @@ -2250,20 +2431,34 @@ static bool dxgi_vk_swap_chain_submit_blit(struct dxgi_vk_swap_chain *chain, uin submit_infos[0].sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2; submit_infos[0].commandBufferInfoCount = 1; submit_infos[0].pCommandBufferInfos = &cmd_buffer_info; - submit_infos[0].pWaitSemaphoreInfos = &wait_semaphore_info; + submit_infos[0].pWaitSemaphoreInfos = &wait_semaphore_info[0]; submit_infos[0].waitSemaphoreInfoCount = 1; submit_infos[0].signalSemaphoreInfoCount = 1; submit_infos[0].pSignalSemaphoreInfos = &signal_semaphore_info[0]; + if (async_compute) + { + vkd3d_device_swapchain_patch_implicit_sync_semaphores(&chain->queue->device->swapchain_info, + &submit_infos[0], &submit_infos[0], + wait_semaphore_info, signal_semaphore_info, + 1u << chain->user.backbuffers[chain->request.user_index]->swap_chain_implicit_sync_index); + } + /* Internal blit semaphore must be signaled after we signal vk_release_semaphores. * To guarantee this, the signals must happen in different batches. */ submit_infos[1].sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2; submit_infos[1].signalSemaphoreInfoCount = 1; - submit_infos[1].pSignalSemaphoreInfos = &signal_semaphore_info[1]; + submit_infos[1].pSignalSemaphoreInfos = &signal_semaphore_info[submit_infos[0].signalSemaphoreInfoCount]; - vk_queue = vkd3d_queue_acquire(chain->queue->vkd3d_queue); + chain->present.internal_blit_count += 1; + signal_semaphore_info[submit_infos[0].signalSemaphoreInfoCount].sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO; + signal_semaphore_info[submit_infos[0].signalSemaphoreInfoCount].semaphore = chain->present.vk_internal_blit_semaphore; + signal_semaphore_info[submit_infos[0].signalSemaphoreInfoCount].value = chain->present.internal_blit_count; + signal_semaphore_info[submit_infos[0].signalSemaphoreInfoCount].stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + + vk_queue = vkd3d_queue_acquire(async_compute ? chain->async_queue : chain->queue->vkd3d_queue); vr = VK_CALL(vkQueueSubmit2(vk_queue, ARRAY_SIZE(submit_infos), submit_infos, VK_NULL_HANDLE)); - vkd3d_queue_release(chain->queue->vkd3d_queue); + vkd3d_queue_release(async_compute ? chain->async_queue : chain->queue->vkd3d_queue); VKD3D_DEVICE_REPORT_FAULT_AND_BREADCRUMB_IF(chain->queue->device, vr == VK_ERROR_DEVICE_LOST); if (vr < 0) @@ -3030,17 +3225,17 @@ static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVk INFO("Creating swapchain (%u x %u), BufferCount = %u.\n", pDesc->Width, pDesc->Height, pDesc->BufferCount); + if (FAILED(hr = dxgi_vk_swap_chain_create_surface(chain, pFactory))) + return hr; + if (FAILED(hr = dxgi_vk_swap_chain_reallocate_user_buffers(chain))) goto cleanup_common; if (FAILED(hr = dxgi_vk_swap_chain_init_sync_objects(chain))) goto cleanup_common; - if (FAILED(hr = dxgi_vk_swap_chain_create_surface(chain, pFactory))) - goto cleanup_sync_objects; - if (FAILED(hr = dxgi_vk_swap_chain_init_waiter_thread(chain))) - goto cleanup_surface; + goto cleanup_sync_objects; if (FAILED(hr = dxgi_vk_swap_chain_init_low_latency(chain))) goto cleanup_waiter_thread; @@ -3055,12 +3250,11 @@ static HRESULT dxgi_vk_swap_chain_init(struct dxgi_vk_swap_chain *chain, IDXGIVk dxgi_vk_swap_chain_cleanup_low_latency(chain); cleanup_waiter_thread: dxgi_vk_swap_chain_cleanup_waiter_thread(chain); -cleanup_surface: - dxgi_vk_swap_chain_cleanup_surface(chain); cleanup_sync_objects: dxgi_vk_swap_chain_cleanup_sync_objects(chain); cleanup_common: dxgi_vk_swap_chain_cleanup_common(chain); + dxgi_vk_swap_chain_cleanup_surface(chain); return hr; }