Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ID3D12DeviceExt2 for cubin 64-bit support #276

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions src/interfaces/vkd3d-proton_interfaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,39 @@ typedef struct D3D12_UAV_INFO {
UINT64 gpuVASize;
} D3D12_UAV_INFO;

typedef struct D3D12_CREATE_CUBIN_SHADER_PARAMS {
void* pNext;
const void* pCubin;
UINT32 size;
UINT32 blockX;
UINT32 blockY;
UINT32 blockZ;
UINT32 dynSharedMemBytes;
const char* pShaderName;
UINT32 flags;
D3D12_CUBIN_DATA_HANDLE* hShader;
} D3D12_CREATE_CUBIN_SHADER_PARAMS;

typedef struct D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS {
void* pNext;
SIZE_T texDesc;
SIZE_T smpDesc;
UINT64 textureHandle;
} D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS;

typedef enum D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE {
D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SURFACE = 0,
D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TEXTURE = 1,
D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_SAMPLER = 2,
} D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE;

typedef struct D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS {
void* pNext;
D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE type;
SIZE_T desc;
UINT64 handle;
} D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS;

MIDL_INTERFACE("11ea7a1a-0f6a-49bf-b612-3e30f8e201dd")
ID3D12DeviceExt : public IUnknown {
virtual HRESULT STDMETHODCALLTYPE GetVulkanHandles(
Expand Down Expand Up @@ -85,6 +118,20 @@ ID3D12DeviceExt : public IUnknown {
D3D12_UAV_INFO * uav_info) = 0;
};

MIDL_INTERFACE("099a73fd-2199-4f45-bf48-0eb86f6fdb65")
ID3D12DeviceExt1 : public ID3D12DeviceExt {
virtual HRESULT CreateResourceFromBorrowedHandle(const D3D12_RESOURCE_DESC1* desc, UINT64 vk_handle, ID3D12Resource** resource) = 0;
virtual HRESULT GetVulkanQueueInfoEx(ID3D12CommandQueue * queue, VkQueue * vk_queue, UINT32 * vk_queue_index, UINT32 * vk_queue_flags, UINT32 * vk_queue_family) = 0;
};

MIDL_INTERFACE("e859c4ac-ba8f-41c4-8eac-1137fde6158d")
ID3D12DeviceExt2 : public ID3D12DeviceExt1 {
virtual BOOL SupportsCubin64bit() = 0;
virtual HRESULT CreateCubinComputeShaderExV2(D3D12_CREATE_CUBIN_SHADER_PARAMS * params) = 0;
virtual HRESULT GetCudaMergedTextureSamplerObject(D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS * params) = 0;
virtual HRESULT GetCudaIndependentDescriptorObject(D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS * params) = 0;
};

MIDL_INTERFACE("39da4e09-bd1c-4198-9fae-86bbe3be41fd")
ID3D12DXVKInteropDevice : public IUnknown {
virtual HRESULT STDMETHODCALLTYPE GetDXGIAdapter(
Expand Down Expand Up @@ -191,6 +238,8 @@ ID3D12CommandQueueExt : public IUnknown {

#ifndef _MSC_VER
__CRT_UUID_DECL(ID3D12DeviceExt, 0x11ea7a1a, 0x0f6a, 0x49bf, 0xb6, 0x12, 0x3e, 0x30, 0xf8, 0xe2, 0x01, 0xdd);
__CRT_UUID_DECL(ID3D12DeviceExt1, 0x099a73fd, 0x2199, 0x4f45, 0xbf, 0x48, 0x0e, 0xb8, 0x6f, 0x6f, 0xdb, 0x65);
__CRT_UUID_DECL(ID3D12DeviceExt2, 0xe859c4ac, 0xba8f, 0x41c4, 0x8e, 0xac, 0x11, 0x37, 0xfd, 0xe6, 0x15, 0x8d);
__CRT_UUID_DECL(ID3D12DXVKInteropDevice, 0x39da4e09, 0xbd1c, 0x4198, 0x9f, 0xae, 0x86, 0xbb, 0xe3, 0xbe, 0x41, 0xfd);
__CRT_UUID_DECL(ID3D12DXVKInteropDevice1, 0x902d8115, 0x59eb, 0x4406, 0x95, 0x18, 0xfe, 0x00, 0xf9, 0x91, 0xee, 0x65);
__CRT_UUID_DECL(ID3D12GraphicsCommandListExt, 0x77a86b09, 0x2bea, 0x4801, 0xb8, 0x9a, 0x37, 0x64, 0x8e, 0x10, 0x4a, 0xf1);
Expand Down
36 changes: 35 additions & 1 deletion src/nvapi/nvapi_d3d12_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,15 @@ namespace dxvk {
}

NvapiD3d12Device::NvapiD3d12Device(ID3D12DeviceExt* vkd3dDevice)
: m_vkd3dDevice(vkd3dDevice) {
: m_vkd3dDevice(static_cast<ID3D12DeviceExt2*>(vkd3dDevice)) {
m_supportsNvxBinaryImport = vkd3dDevice->GetExtensionSupport(D3D12_VK_NVX_BINARY_IMPORT);
m_supportsNvxImageViewHandle = vkd3dDevice->GetExtensionSupport(D3D12_VK_NVX_IMAGE_VIEW_HANDLE);

if (m_supportsNvxBinaryImport && m_supportsNvxImageViewHandle) {
if (Com<ID3D12DeviceExt2> deviceExt2; SUCCEEDED(m_vkd3dDevice->QueryInterface(IID_PPV_ARGS(&deviceExt2)))) {
m_supportsCubin64bit = deviceExt2->SupportsCubin64bit();
}
}
}

HRESULT NvapiD3d12Device::CreateCubinComputeShaderWithName(const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const char* shaderName, NVDX_ObjectHandle* pShader) {
Expand Down Expand Up @@ -109,4 +115,32 @@ namespace dxvk {
bool NvapiD3d12Device::IsFatbinPTXSupported() const {
return m_vkd3dDevice && m_supportsNvxBinaryImport && m_supportsNvxImageViewHandle;
}

HRESULT NvapiD3d12Device::CreateCubinComputeShaderExV2(D3D12_CREATE_CUBIN_SHADER_PARAMS* params) {
if (!m_supportsCubin64bit)
return E_NOTIMPL;

auto result = m_vkd3dDevice->CreateCubinComputeShaderExV2(params);

if (result == S_OK) {
std::scoped_lock lock(m_cubinSmemMutex);
m_cubinSmemMap.emplace(reinterpret_cast<NVDX_ObjectHandle>(params->hShader), params->dynSharedMemBytes);
}

return result;
}

HRESULT NvapiD3d12Device::GetCudaMergedTextureSamplerObject(D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS* params) const {
if (!m_supportsCubin64bit)
return E_NOTIMPL;

return m_vkd3dDevice->GetCudaMergedTextureSamplerObject(params);
}

HRESULT NvapiD3d12Device::GetCudaIndependentDescriptorObject(D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS* params) const {
if (!m_supportsCubin64bit)
return E_NOTIMPL;

return m_vkd3dDevice->GetCudaIndependentDescriptorObject(params);
}
}
7 changes: 6 additions & 1 deletion src/nvapi/nvapi_d3d12_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,19 @@ namespace dxvk {
[[nodiscard]] HRESULT CaptureUAVInfo(NVAPI_UAV_INFO* uavInfo) const;
[[nodiscard]] bool IsFatbinPTXSupported() const;

[[nodiscard]] HRESULT CreateCubinComputeShaderExV2(D3D12_CREATE_CUBIN_SHADER_PARAMS* params);
[[nodiscard]] HRESULT GetCudaMergedTextureSamplerObject(D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS* params) const;
[[nodiscard]] HRESULT GetCudaIndependentDescriptorObject(D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS* params) const;

private:
static std::unordered_map<ID3D12Device*, NvapiD3d12Device> m_nvapiDeviceMap;
static std::mutex m_mutex;

static std::unordered_map<NVDX_ObjectHandle, NvU32> m_cubinSmemMap;
static std::mutex m_cubinSmemMutex;

ID3D12DeviceExt* m_vkd3dDevice{};
ID3D12DeviceExt2* m_vkd3dDevice{};
bool m_supportsCubin64bit = false;
bool m_supportsNvxBinaryImport = false;
bool m_supportsNvxImageViewHandle = false;
};
Expand Down
138 changes: 138 additions & 0 deletions src/nvapi_d3d12.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,144 @@ extern "C" {
}
}

NvAPI_Status __cdecl NvAPI_D3D12_CreateCubinComputeShaderExV2(NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS* pParams) {
static constexpr auto V1StructSize = offsetof(NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS, hShader) + sizeof(NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS::hShader);
// static_assert(V1StructSize == sizeof(NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS));
constexpr auto n = __func__;
thread_local bool alreadyLoggedNoImplementation = false;
thread_local bool alreadyLoggedError = false;
thread_local bool alreadyLoggedOk = false;

if (log::tracing())
log::trace(n, log::fmt::nvapi_d3d12_create_cubin_shader_params(pParams));

if (!pParams)
return InvalidPointer(n);

pParams->structSizeOut = V1StructSize;

if (pParams->structSizeIn < V1StructSize)
return IncompatibleStructVersion(n, pParams->structSizeIn);

if (!pParams->pDevice || !pParams->pShaderName)
return InvalidArgument(n);

auto device = NvapiD3d12Device::GetOrCreate(pParams->pDevice);
if (!device)
return NoImplementation(n, alreadyLoggedNoImplementation);

D3D12_CREATE_CUBIN_SHADER_PARAMS params;
params.pNext = nullptr;
params.pCubin = pParams->pCubin;
params.size = pParams->size;
params.blockX = pParams->blockX;
params.blockY = pParams->blockY;
params.blockZ = pParams->blockZ;
params.dynSharedMemBytes = pParams->dynSharedMemBytes;
params.pShaderName = pParams->pShaderName;
params.flags = pParams->flags;

switch (device->CreateCubinComputeShaderExV2(&params)) {
case S_OK:
pParams->hShader = reinterpret_cast<NVDX_ObjectHandle>(params.hShader);
return Ok(n, alreadyLoggedOk);
case E_INVALIDARG:
return InvalidArgument(n);
case E_NOTIMPL:
return NoImplementation(n, alreadyLoggedNoImplementation);
default:
return Error(n, alreadyLoggedError);
}
}

NvAPI_Status __cdecl NvAPI_D3D12_GetCudaMergedTextureSamplerObject(NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS* pParams) {
static constexpr auto V1StructSize = offsetof(NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS, textureHandle) + sizeof(NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS::textureHandle);
// static_assert(V1StructSize == sizeof(NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS));
constexpr auto n = __func__;
thread_local bool alreadyLoggedNoImplementation = false;
thread_local bool alreadyLoggedError = false;
thread_local bool alreadyLoggedOk = false;

if (log::tracing())
log::trace(n, log::fmt::nvapi_d3d12_get_cuda_merged_texture_sampler_object_params(pParams));

if (!pParams)
return InvalidPointer(n);

pParams->structSizeOut = V1StructSize;

if (pParams->structSizeIn < V1StructSize)
return IncompatibleStructVersion(n, pParams->structSizeIn);

if (!pParams->pDevice || !pParams->texDesc.ptr)
return InvalidArgument(n);

auto device = NvapiD3d12Device::GetOrCreate(pParams->pDevice);
if (!device)
return NoImplementation(n, alreadyLoggedNoImplementation);

D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS params;
params.pNext = nullptr;
params.texDesc = pParams->texDesc.ptr;
params.smpDesc = pParams->smpDesc.ptr;

switch (device->GetCudaMergedTextureSamplerObject(&params)) {
case S_OK:
pParams->textureHandle = params.textureHandle;
return Ok(n, alreadyLoggedOk);
case E_INVALIDARG:
return InvalidArgument(n);
case E_NOTIMPL:
return NoImplementation(n, alreadyLoggedNoImplementation);
default:
return Error(n, alreadyLoggedError);
}
}

NvAPI_Status __cdecl NvAPI_D3D12_GetCudaIndependentDescriptorObject(NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS* pParams) {
static constexpr auto V1StructSize = offsetof(NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS, handle) + sizeof(NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS::handle);
// static_assert(V1StructSize == sizeof(NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS));
constexpr auto n = __func__;
thread_local bool alreadyLoggedNoImplementation = false;
thread_local bool alreadyLoggedError = false;
thread_local bool alreadyLoggedOk = false;

if (log::tracing())
log::trace(n, log::fmt::nvapi_d3d12_get_cuda_independent_descriptor_object_params(pParams));

if (!pParams)
return InvalidPointer(n);

pParams->structSizeOut = V1StructSize;

if (pParams->structSizeIn < V1StructSize)
return IncompatibleStructVersion(n, pParams->structSizeIn);

if (!pParams->pDevice || !pParams->desc.ptr)
return InvalidArgument(n);

auto device = NvapiD3d12Device::GetOrCreate(pParams->pDevice);
if (!device)
return NoImplementation(n, alreadyLoggedNoImplementation);

D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS params;
params.pNext = nullptr;
params.type = static_cast<D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_TYPE>(pParams->type);
params.desc = pParams->desc.ptr;

switch (device->GetCudaIndependentDescriptorObject(&params)) {
case S_OK:
pParams->handle = params.handle;
return Ok(n, alreadyLoggedOk);
case E_INVALIDARG:
return InvalidArgument(n);
case E_NOTIMPL:
return NoImplementation(n, alreadyLoggedNoImplementation);
default:
return Error(n, alreadyLoggedError);
}
}

NvAPI_Status __cdecl NvAPI_D3D12_LaunchCubinShader(ID3D12GraphicsCommandList* pCmdList, NVDX_ObjectHandle pShader, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const void* params, NvU32 paramSize) {
constexpr auto n = __func__;
thread_local bool alreadyLoggedNoImplementation = false;
Expand Down
3 changes: 3 additions & 0 deletions src/nvapi_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,13 @@ extern "C" {
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_SetDepthBoundsTestValues)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShaderWithName)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShaderEx)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShaderExV2)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShader)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_DestroyCubinComputeShader)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetCudaTextureObject)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetCudaSurfaceObject)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetCudaMergedTextureSamplerObject)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetCudaIndependentDescriptorObject)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_LaunchCubinShader)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CaptureUAVInfo)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetGraphicsCapabilities)
Expand Down
29 changes: 25 additions & 4 deletions src/util/util_log.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ namespace dxvk::log {
return str::format("flags=0x", std::setfill('0'), std::setw(4), std::hex, h);
}

inline std::string d3d12_cpu_descriptor_handle(D3D12_CPU_DESCRIPTOR_HANDLE h) {
return str::format("{ptr=", hex_prefix, std::hex, h.ptr, "}");
}

inline std::string nv_latency_marker_params(NV_LATENCY_MARKER_PARAMS* p) {
if (!p)
return "nullptr";
Expand All @@ -64,6 +68,27 @@ namespace dxvk::log {
return str::format("{version=", p->version, ",frameID=", p->frameID, ",markerType=", fromLatencyMarkerType(p->markerType), ",presentFrameID=", p->presentFrameID, ",rsvd}");
}

inline std::string nvapi_d3d12_create_cubin_shader_params(NVAPI_D3D12_CREATE_CUBIN_SHADER_PARAMS* p) {
if (!p)
return "nullptr";

return str::format("{structSizeIn=", p->structSizeIn, ",pDevice=", ptr(p->pDevice), ",pCubin=", ptr(p->pCubin), ",size=", p->size, ",blockX=", p->blockX, ",blockY=", p->blockY, ",blockZ=", p->blockZ, ",dynSharedMemBytes=", p->dynSharedMemBytes, ",pShaderName=", p->pShaderName, ",flags=", p->flags, "}");
}

inline std::string nvapi_d3d12_get_cuda_merged_texture_sampler_object_params(NVAPI_D3D12_GET_CUDA_MERGED_TEXTURE_SAMPLER_OBJECT_PARAMS* p) {
if (!p)
return "nullptr";

return str::format("{structSizeIn=", p->structSizeIn, ",pDevice=", ptr(p->pDevice), ",texDesc=", d3d12_cpu_descriptor_handle(p->texDesc), ",smpDesc=", d3d12_cpu_descriptor_handle(p->smpDesc), "}");
}

inline std::string nvapi_d3d12_get_cuda_independent_descriptor_object_params(NVAPI_D3D12_GET_CUDA_INDEPENDENT_DESCRIPTOR_OBJECT_PARAMS* p) {
if (!p)
return "nullptr";

return str::format("{structSizeIn=", p->structSizeIn, ",pDevice=", ptr(p->pDevice), ",type=", p->type, ",desc=", d3d12_cpu_descriptor_handle(p->desc), "}");
}

inline std::string nv_vk_get_sleep_status_params(NV_VULKAN_GET_SLEEP_STATUS_PARAMS* p) {
if (!p)
return "nullptr";
Expand Down Expand Up @@ -92,10 +117,6 @@ namespace dxvk::log {
return str::format("{version=", p->version, ",frameID=", p->frameID, ",markerType=", p->markerType, ",rsvd}");
}

inline std::string d3d12_cpu_descriptor_handle(D3D12_CPU_DESCRIPTOR_HANDLE h) {
return str::format("{ptr=", hex_prefix, std::hex, h.ptr, "}");
}

inline std::string ngx_dlss_override_get_state_params(NV_NGX_DLSS_OVERRIDE_GET_STATE_PARAMS* p) {
return str::format("{version=", p->version, ",processIdentifier=", p->processIdentifier, "}");
}
Expand Down
2 changes: 2 additions & 0 deletions tests/nvapi_d3d12.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ TEST_CASE("D3D12 methods succeed", "[.d3d12]") {
.LR_SIDE_EFFECT(*_2 = static_cast<ID3D12DeviceExt*>(&device))
.LR_SIDE_EFFECT(deviceRefCount++)
.RETURN(S_OK);
ALLOW_CALL(device, QueryInterface(__uuidof(ID3D12DeviceExt2), _))
.RETURN(E_NOINTERFACE);
ALLOW_CALL(device, AddRef())
.LR_SIDE_EFFECT(deviceRefCount++)
.RETURN(deviceRefCount);
Expand Down