From c387d961eb751f523a1908c6076b3401c6f6d4a3 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Tue, 10 Sep 2024 14:19:46 +0330 Subject: [PATCH] hlsl_generator: handwritten BDA instructions Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 46 ++++++++------- tools/hlsl_generator/out.hlsl | 104 ++++++++++++---------------------- 2 files changed, 63 insertions(+), 87 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index bb9d23867..4dd2e5815 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -29,9 +29,6 @@ { //! General Decls -template -NBL_CONSTEXPR_STATIC_INLINE bool is_pointer_v = is_spirv_type::value; - template struct pointer { @@ -47,6 +44,9 @@ template using pointer_t = typename pointer::type; +template +NBL_CONSTEXPR_STATIC_INLINE bool is_pointer_v = is_same_v::type >; + // The holy operation that makes addrof possible template [[vk::ext_instruction(spv::OpCopyObject)]] @@ -58,11 +58,31 @@ [[vk::ext_instruction(34 /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); +//! Memory instructions +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T obj, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T obj); + +//! Bitcast Instructions // Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpBitcast)]] -enable_if_t, T> bitcast(U); +enable_if_t, T> bitcast(U); template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] @@ -181,9 +201,6 @@ def gen(grammer_path, output_path): case "Atomic": processInst(writer, instruction) processInst(writer, instruction, Shape.PTR_TEMPLATE) - case "Memory": - processInst(writer, instruction, Shape.PTR_TEMPLATE) - processInst(writer, instruction, Shape.BDA) case "Barrier" | "Bit": processInst(writer, instruction) case "Reserved": @@ -208,7 +225,6 @@ def gen(grammer_path, output_path): class Shape(Enum): DEFAULT = 0, PTR_TEMPLATE = 1, # TODO: this is a DXC Workaround - BDA = 2, # PhysicalStorageBuffer Result Type def processInst(writer: io.TextIOWrapper, instruction, @@ -231,8 +247,6 @@ def processInst(writer: io.TextIOWrapper, if shape == Shape.PTR_TEMPLATE: templates.append("typename P") conds.append("is_spirv_type_v

") - elif shape == Shape.BDA: - caps.append("PhysicalStorageBufferAddresses") # split upper case words matches = [(m.group(1), m.span(1)) for m in re.finditer(r'([A-Z])[A-Z][a-z]', fn_name)] @@ -249,7 +263,7 @@ def processInst(writer: io.TextIOWrapper, conds.append("is_signed_v") break case "F": - conds.append("is_floating_point") + conds.append("(is_same_v || is_same_v || is_same_v)") break else: if instruction["class"] == "Bit": @@ -303,10 +317,6 @@ def processInst(writer: io.TextIOWrapper, case "'Pointer'": if shape == Shape.PTR_TEMPLATE: args.append("P " + operand_name) - elif shape == Shape.BDA: - if (not "typename T" in final_templates) and (result_ty == "T" or op_ty == "T"): - final_templates = ["typename T"] + final_templates - args.append("pointer_t " + operand_name) else: if (not "typename T" in final_templates) and (result_ty == "T" or op_ty == "T"): final_templates = ["typename T"] + final_templates @@ -327,10 +337,8 @@ def processInst(writer: io.TextIOWrapper, case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + operand_name) case "MemoryAccess": assert len(caps) <= 1 - if shape != Shape.BDA: - writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) - writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) - writeInst(writer, final_templates + ["uint32_t alignment"], cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) + writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) + writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) case _: return ignore(op_name) # TODO writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args) diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 25cc4f17b..62797583d 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -20,9 +20,6 @@ namespace spirv { //! General Decls -template -NBL_CONSTEXPR_STATIC_INLINE bool is_pointer_v = is_spirv_type::value; - template struct pointer { @@ -38,6 +35,9 @@ struct pointer template using pointer_t = typename pointer::type; +template +NBL_CONSTEXPR_STATIC_INLINE bool is_pointer_v = is_same_v::type >; + // The holy operation that makes addrof possible template [[vk::ext_instruction(spv::OpCopyObject)]] @@ -49,11 +49,31 @@ template [[vk::ext_instruction(34 /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); +//! Memory instructions +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T obj, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T obj); + +//! Bitcast Instructions // Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpBitcast)]] -enable_if_t, T> bitcast(U); +enable_if_t, T> bitcast(U); template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] @@ -548,58 +568,6 @@ namespace group_operation } //! Instructions -template -[[vk::ext_instruction(spv::OpLoad)]] -enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t memoryAccess); - -template -[[vk::ext_instruction(spv::OpLoad)]] -enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); - -template -[[vk::ext_instruction(spv::OpLoad)]] -enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); - -template -[[vk::ext_instruction(spv::OpLoad)]] -enable_if_t, T> load(P pointer); - -template -[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] -[[vk::ext_instruction(spv::OpLoad)]] -T load(pointer_t pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); - -template -[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] -[[vk::ext_instruction(spv::OpLoad)]] -T load(pointer_t pointer); - -template -[[vk::ext_instruction(spv::OpStore)]] -enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess); - -template -[[vk::ext_instruction(spv::OpStore)]] -enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); - -template -[[vk::ext_instruction(spv::OpStore)]] -enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); - -template -[[vk::ext_instruction(spv::OpStore)]] -enable_if_t, void> store(P pointer, T object); - -template -[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] -[[vk::ext_instruction(spv::OpStore)]] -void store(pointer_t pointer, T object, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); - -template -[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] -[[vk::ext_instruction(spv::OpStore)]] -void store(pointer_t pointer, T object); - template [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitFieldInsert)]] @@ -838,17 +806,17 @@ enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIAdd_GroupNo template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -enable_if_t, T> groupNonUniformFAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -enable_if_t, T> groupNonUniformFAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -enable_if_t, T> groupNonUniformFAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] @@ -868,17 +836,17 @@ enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIMul_GroupNo template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -enable_if_t, T> groupNonUniformFMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -enable_if_t, T> groupNonUniformFMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -enable_if_t, T> groupNonUniformFMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] @@ -913,17 +881,17 @@ enable_if_t, T> groupNonUniformUMin_GroupNonUniformPartitionedN template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -enable_if_t, T> groupNonUniformFMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -enable_if_t, T> groupNonUniformFMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -enable_if_t, T> groupNonUniformFMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] @@ -958,17 +926,17 @@ enable_if_t, T> groupNonUniformUMax_GroupNonUniformPartitionedN template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -enable_if_t, T> groupNonUniformFMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -enable_if_t, T> groupNonUniformFMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -enable_if_t, T> groupNonUniformFMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]]