diff --git a/src/interfaces/vkd3d-proton_interfaces.h b/src/interfaces/vkd3d-proton_interfaces.h index 1bb445cf..a6e9c46c 100644 --- a/src/interfaces/vkd3d-proton_interfaces.h +++ b/src/interfaces/vkd3d-proton_interfaces.h @@ -50,6 +50,39 @@ typedef struct D3D12_UAV_INFO { UINT64 gpuVASize; } D3D12_UAV_INFO; +typedef struct D3D12_CREATE_CUBIN_SHADER_PARAMS { + void* pNext; + const void* pCubin; + UINT32 size; + UINT32 blockX; + UINT32 blockY; + UINT32 blockZ; + UINT32 dynSharedMemBytes; + const char* pShaderName; + UINT32 flags; + D3D12_CUBIN_DATA_HANDLE* hShader; +} D3D12_CREATE_CUBIN_SHADER_PARAMS; + +typedef struct D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS { + void* pNext; + SIZE_T texDesc; + SIZE_T smpDesc; + UINT64 textureHandle; +} D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS; + +typedef enum D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE { + D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SURFACE = 0, + D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TEXTURE = 1, + D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SAMPLER = 2, +} D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE; + +typedef struct D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS { + void* pNext; + D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE type; + SIZE_T desc; + UINT64 handle; +} D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS; + MIDL_INTERFACE("11ea7a1a-0f6a-49bf-b612-3e30f8e201dd") ID3D12DeviceExt : public IUnknown { virtual HRESULT STDMETHODCALLTYPE GetVulkanHandles( @@ -85,6 +118,20 @@ ID3D12DeviceExt : public IUnknown { D3D12_UAV_INFO * uav_info) = 0; }; +MIDL_INTERFACE("099a73fd-2199-4f45-bf48-0eb86f6fdb65") +ID3D12DeviceExt1 : public ID3D12DeviceExt { + virtual HRESULT CreateResourceFromBorrowedHandle(const D3D12_RESOURCE_DESC1* desc, UINT64 vk_handle, ID3D12Resource** resource) = 0; + virtual HRESULT GetVulkanQueueInfoEx(ID3D12CommandQueue * queue, VkQueue * vk_queue, UINT32 * vk_queue_index, UINT32 * vk_queue_flags, UINT32 * vk_queue_family) = 0; +}; + +MIDL_INTERFACE("e859c4ac-ba8f-41c4-8eac-1137fde6158d") +ID3D12DeviceExt2 : public ID3D12DeviceExt1 { + virtual BOOL SupportsCubin64bit() = 0; + virtual HRESULT CreateCubinComputeShaderExV2(D3D12_CREATE_CUBIN_SHADER_PARAMS * params) = 0; + virtual HRESULT GetCudaMergedTextureSamplerObject(D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS * params) = 0; + virtual HRESULT GetCudaIndependentDescriptorObject(D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS * params) = 0; +}; + MIDL_INTERFACE("39da4e09-bd1c-4198-9fae-86bbe3be41fd") ID3D12DXVKInteropDevice : public IUnknown { virtual HRESULT STDMETHODCALLTYPE GetDXGIAdapter( @@ -191,6 +238,8 @@ ID3D12CommandQueueExt : public IUnknown { #ifndef _MSC_VER __CRT_UUID_DECL(ID3D12DeviceExt, 0x11ea7a1a, 0x0f6a, 0x49bf, 0xb6, 0x12, 0x3e, 0x30, 0xf8, 0xe2, 0x01, 0xdd); +__CRT_UUID_DECL(ID3D12DeviceExt1, 0x099a73fd, 0x2199, 0x4f45, 0xbf, 0x48, 0x0e, 0xb8, 0x6f, 0x6f, 0xdb, 0x65); +__CRT_UUID_DECL(ID3D12DeviceExt2, 0xe859c4ac, 0xba8f, 0x41c4, 0x8e, 0xac, 0x11, 0x37, 0xfd, 0xe6, 0x15, 0x8d); __CRT_UUID_DECL(ID3D12DXVKInteropDevice, 0x39da4e09, 0xbd1c, 0x4198, 0x9f, 0xae, 0x86, 0xbb, 0xe3, 0xbe, 0x41, 0xfd); __CRT_UUID_DECL(ID3D12DXVKInteropDevice1, 0x902d8115, 0x59eb, 0x4406, 0x95, 0x18, 0xfe, 0x00, 0xf9, 0x91, 0xee, 0x65); __CRT_UUID_DECL(ID3D12GraphicsCommandListExt, 0x77a86b09, 0x2bea, 0x4801, 0xb8, 0x9a, 0x37, 0x64, 0x8e, 0x10, 0x4a, 0xf1); diff --git a/src/nvapi/nvapi_d3d12_device.cpp b/src/nvapi/nvapi_d3d12_device.cpp index 17858492..e7481f76 100644 --- a/src/nvapi/nvapi_d3d12_device.cpp +++ b/src/nvapi/nvapi_d3d12_device.cpp @@ -48,9 +48,15 @@ namespace dxvk { } NvapiD3d12Device::NvapiD3d12Device(ID3D12DeviceExt* vkd3dDevice) - : m_vkd3dDevice(vkd3dDevice) { + : m_vkd3dDevice(static_cast(vkd3dDevice)) { m_supportsNvxBinaryImport = vkd3dDevice->GetExtensionSupport(D3D12_VK_NVX_BINARY_IMPORT); m_supportsNvxImageViewHandle = vkd3dDevice->GetExtensionSupport(D3D12_VK_NVX_IMAGE_VIEW_HANDLE); + + if (m_supportsNvxBinaryImport && m_supportsNvxImageViewHandle) { + if (Com deviceExt2; SUCCEEDED(m_vkd3dDevice->QueryInterface(IID_PPV_ARGS(&deviceExt2)))) { + m_supportsCubin64bit = deviceExt2->SupportsCubin64bit(); + } + } } HRESULT NvapiD3d12Device::CreateCubinComputeShaderWithName(const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const char* shaderName, NVDX_ObjectHandle* pShader) { @@ -109,4 +115,32 @@ namespace dxvk { bool NvapiD3d12Device::IsFatbinPTXSupported() const { return m_vkd3dDevice && m_supportsNvxBinaryImport && m_supportsNvxImageViewHandle; } + + HRESULT NvapiD3d12Device::CreateCubinComputeShaderExV2(D3D12_CREATE_CUBIN_SHADER_PARAMS* params) { + if (!m_supportsCubin64bit) + return E_NOTIMPL; + + auto result = m_vkd3dDevice->CreateCubinComputeShaderExV2(params); + + if (result == S_OK) { + std::scoped_lock lock(m_cubinSmemMutex); + m_cubinSmemMap.emplace(reinterpret_cast(params->hShader), params->dynSharedMemBytes); + } + + return result; + } + + HRESULT NvapiD3d12Device::GetCudaMergedTextureSamplerObject(D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS* params) const { + if (!m_supportsCubin64bit) + return E_NOTIMPL; + + return m_vkd3dDevice->GetCudaMergedTextureSamplerObject(params); + } + + HRESULT NvapiD3d12Device::GetCudaIndependentDescriptorObject(D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS* params) const { + if (!m_supportsCubin64bit) + return E_NOTIMPL; + + return m_vkd3dDevice->GetCudaIndependentDescriptorObject(params); + } } diff --git a/src/nvapi/nvapi_d3d12_device.h b/src/nvapi/nvapi_d3d12_device.h index 8a3d2884..184a3122 100644 --- a/src/nvapi/nvapi_d3d12_device.h +++ b/src/nvapi/nvapi_d3d12_device.h @@ -22,6 +22,10 @@ namespace dxvk { [[nodiscard]] HRESULT CaptureUAVInfo(NVAPI_UAV_INFO* uavInfo) const; [[nodiscard]] bool IsFatbinPTXSupported() const; + [[nodiscard]] HRESULT CreateCubinComputeShaderExV2(D3D12_CREATE_CUBIN_SHADER_PARAMS* params); + [[nodiscard]] HRESULT GetCudaMergedTextureSamplerObject(D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS* params) const; + [[nodiscard]] HRESULT GetCudaIndependentDescriptorObject(D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS* params) const; + private: static std::unordered_map m_nvapiDeviceMap; static std::mutex m_mutex; @@ -29,7 +33,8 @@ namespace dxvk { static std::unordered_map m_cubinSmemMap; static std::mutex m_cubinSmemMutex; - ID3D12DeviceExt* m_vkd3dDevice{}; + ID3D12DeviceExt2* m_vkd3dDevice{}; + bool m_supportsCubin64bit = false; bool m_supportsNvxBinaryImport = false; bool m_supportsNvxImageViewHandle = false; }; diff --git a/src/nvapi_d3d12.cpp b/src/nvapi_d3d12.cpp index 47ea5310..193eae21 100644 --- a/src/nvapi_d3d12.cpp +++ b/src/nvapi_d3d12.cpp @@ -198,6 +198,144 @@ extern "C" { } } + NvAPI_Status __cdecl NvAPI_D3D12_CreateCubinComputeShaderExV2(NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS* pParams) { + static constexpr auto V1StructSize = offsetof(NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS, hShader) + sizeof(NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS::hShader); + // static_assert(V1StructSize == sizeof(NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS)); + constexpr auto n = __func__; + thread_local bool alreadyLoggedNoImplementation = false; + thread_local bool alreadyLoggedError = false; + thread_local bool alreadyLoggedOk = false; + + if (log::tracing()) + log::trace(n, log::fmt::nvapi_d3d12_create_cubin_shader_params(pParams)); + + if (!pParams) + return InvalidPointer(n); + + pParams->structSizeOut = V1StructSize; + + if (pParams->structSizeIn < V1StructSize) + return IncompatibleStructVersion(n, pParams->structSizeIn); + + if (!pParams->pDevice || !pParams->pShaderName) + return InvalidArgument(n); + + auto device = NvapiD3d12Device::GetOrCreate(pParams->pDevice); + if (!device) + return NoImplementation(n, alreadyLoggedNoImplementation); + + D3D12_CREATE_CUBIN_SHADER_PARAMS params; + params.pNext = nullptr; + params.pCubin = pParams->pCubin; + params.size = pParams->size; + params.blockX = pParams->blockX; + params.blockY = pParams->blockY; + params.blockZ = pParams->blockZ; + params.dynSharedMemBytes = pParams->dynSharedMemBytes; + params.pShaderName = pParams->pShaderName; + params.flags = pParams->flags; + + switch (device->CreateCubinComputeShaderExV2(¶ms)) { + case S_OK: + pParams->hShader = reinterpret_cast(params.hShader); + return Ok(n, alreadyLoggedOk); + case E_INVALIDARG: + return InvalidArgument(n); + case E_NOTIMPL: + return NoImplementation(n, alreadyLoggedNoImplementation); + default: + return Error(n, alreadyLoggedError); + } + } + + NvAPI_Status __cdecl NvAPI_D3D12_GetCudaMergedTextureSamplerObject(NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS* pParams) { + static constexpr auto V1StructSize = offsetof(NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS, textureHandle) + sizeof(NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS::textureHandle); + // static_assert(V1StructSize == sizeof(NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS)); + constexpr auto n = __func__; + thread_local bool alreadyLoggedNoImplementation = false; + thread_local bool alreadyLoggedError = false; + thread_local bool alreadyLoggedOk = false; + + if (log::tracing()) + log::trace(n, log::fmt::nvapi_d3d12_get_cuda_merged_texture_sampler_object_params(pParams)); + + if (!pParams) + return InvalidPointer(n); + + pParams->structSizeOut = V1StructSize; + + if (pParams->structSizeIn < V1StructSize) + return IncompatibleStructVersion(n, pParams->structSizeIn); + + if (!pParams->pDevice || !pParams->texDesc.ptr) + return InvalidArgument(n); + + auto device = NvapiD3d12Device::GetOrCreate(pParams->pDevice); + if (!device) + return NoImplementation(n, alreadyLoggedNoImplementation); + + D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS params; + params.pNext = nullptr; + params.texDesc = pParams->texDesc.ptr; + params.smpDesc = pParams->smpDesc.ptr; + + switch (device->GetCudaMergedTextureSamplerObject(¶ms)) { + case S_OK: + pParams->textureHandle = params.textureHandle; + return Ok(n, alreadyLoggedOk); + case E_INVALIDARG: + return InvalidArgument(n); + case E_NOTIMPL: + return NoImplementation(n, alreadyLoggedNoImplementation); + default: + return Error(n, alreadyLoggedError); + } + } + + NvAPI_Status __cdecl NvAPI_D3D12_GetCudaIndependentDescriptorObject(NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS* pParams) { + static constexpr auto V1StructSize = offsetof(NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS, handle) + sizeof(NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS::handle); + // static_assert(V1StructSize == sizeof(NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS)); + constexpr auto n = __func__; + thread_local bool alreadyLoggedNoImplementation = false; + thread_local bool alreadyLoggedError = false; + thread_local bool alreadyLoggedOk = false; + + if (log::tracing()) + log::trace(n, log::fmt::nvapi_d3d12_get_cuda_independent_descriptor_object_params(pParams)); + + if (!pParams) + return InvalidPointer(n); + + pParams->structSizeOut = V1StructSize; + + if (pParams->structSizeIn < V1StructSize) + return IncompatibleStructVersion(n, pParams->structSizeIn); + + if (!pParams->pDevice || !pParams->desc.ptr) + return InvalidArgument(n); + + auto device = NvapiD3d12Device::GetOrCreate(pParams->pDevice); + if (!device) + return NoImplementation(n, alreadyLoggedNoImplementation); + + D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS params; + params.pNext = nullptr; + params.type = static_cast(pParams->type); + params.desc = pParams->desc.ptr; + + switch (device->GetCudaIndependentDescriptorObject(¶ms)) { + case S_OK: + pParams->handle = params.handle; + return Ok(n, alreadyLoggedOk); + case E_INVALIDARG: + return InvalidArgument(n); + case E_NOTIMPL: + return NoImplementation(n, alreadyLoggedNoImplementation); + default: + return Error(n, alreadyLoggedError); + } + } + NvAPI_Status __cdecl NvAPI_D3D12_LaunchCubinShader(ID3D12GraphicsCommandList* pCmdList, NVDX_ObjectHandle pShader, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const void* params, NvU32 paramSize) { constexpr auto n = __func__; thread_local bool alreadyLoggedNoImplementation = false; diff --git a/src/nvapi_interface.cpp b/src/nvapi_interface.cpp index d3768a8f..e7b1ba20 100644 --- a/src/nvapi_interface.cpp +++ b/src/nvapi_interface.cpp @@ -107,10 +107,13 @@ extern "C" { INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_SetDepthBoundsTestValues) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShaderWithName) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShaderEx) + INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShaderExV2) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShader) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_DestroyCubinComputeShader) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetCudaTextureObject) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetCudaSurfaceObject) + INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetCudaMergedTextureSamplerObject) + INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetCudaIndependentDescriptorObject) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_LaunchCubinShader) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CaptureUAVInfo) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetGraphicsCapabilities) diff --git a/src/util/util_log.h b/src/util/util_log.h index 849aaad8..998ab1b3 100644 --- a/src/util/util_log.h +++ b/src/util/util_log.h @@ -50,6 +50,10 @@ namespace dxvk::log { return str::format("flags=0x", std::setfill('0'), std::setw(4), std::hex, h); } + inline std::string d3d12_cpu_descriptor_handle(D3D12_CPU_DESCRIPTOR_HANDLE h) { + return str::format("{ptr=", hex_prefix, std::hex, h.ptr, "}"); + } + inline std::string nv_latency_marker_params(NV_LATENCY_MARKER_PARAMS* p) { if (!p) return "nullptr"; @@ -64,6 +68,27 @@ namespace dxvk::log { return str::format("{version=", p->version, ",frameID=", p->frameID, ",markerType=", fromLatencyMarkerType(p->markerType), ",presentFrameID=", p->presentFrameID, ",rsvd}"); } + inline std::string nvapi_d3d12_create_cubin_shader_params(NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS* p) { + if (!p) + return "nullptr"; + + return str::format("{structSizeIn=", p->structSizeIn, ",pDevice=", ptr(p->pDevice), ",pCubin=", ptr(p->pCubin), ",size=", p->size, ",blockX=", p->blockX, ",blockY=", p->blockY, ",blockZ=", p->blockZ, ",dynSharedMemBytes=", p->dynSharedMemBytes, ",pShaderName=", p->pShaderName, ",flags=", p->flags, "}"); + } + + inline std::string nvapi_d3d12_get_cuda_merged_texture_sampler_object_params(NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS* p) { + if (!p) + return "nullptr"; + + return str::format("{structSizeIn=", p->structSizeIn, ",pDevice=", ptr(p->pDevice), ",texDesc=", d3d12_cpu_descriptor_handle(p->texDesc), ",smpDesc=", d3d12_cpu_descriptor_handle(p->smpDesc), "}"); + } + + inline std::string nvapi_d3d12_get_cuda_independent_descriptor_object_params(NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS* p) { + if (!p) + return "nullptr"; + + return str::format("{structSizeIn=", p->structSizeIn, ",pDevice=", ptr(p->pDevice), ",type=", p->type, ",desc=", d3d12_cpu_descriptor_handle(p->desc), "}"); + } + inline std::string nv_vk_get_sleep_status_params(NV_VULKAN_GET_SLEEP_STATUS_PARAMS* p) { if (!p) return "nullptr"; @@ -92,10 +117,6 @@ namespace dxvk::log { return str::format("{version=", p->version, ",frameID=", p->frameID, ",markerType=", p->markerType, ",rsvd}"); } - inline std::string d3d12_cpu_descriptor_handle(D3D12_CPU_DESCRIPTOR_HANDLE h) { - return str::format("{ptr=", hex_prefix, std::hex, h.ptr, "}"); - } - inline std::string ngx_dlss_override_get_state_params(NV_NGX_DLSS_OVERRIDE_GET_STATE_PARAMS* p) { return str::format("{version=", p->version, ",processIdentifier=", p->processIdentifier, "}"); } diff --git a/tests/nvapi_d3d12.cpp b/tests/nvapi_d3d12.cpp index 5d48b9c4..d3ffba6c 100644 --- a/tests/nvapi_d3d12.cpp +++ b/tests/nvapi_d3d12.cpp @@ -37,6 +37,8 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") { .LR_SIDE_EFFECT(*_2 = static_cast(&device)) .LR_SIDE_EFFECT(deviceRefCount++) .RETURN(S_OK); + ALLOW_CALL(device, QueryInterface(__uuidof(ID3D12DeviceExt2), _)) + .RETURN(E_NOINTERFACE); ALLOW_CALL(device, AddRef()) .LR_SIDE_EFFECT(deviceRefCount++) .RETURN(deviceRefCount);