From 20034bbd9193846322632406d86ededa3dd04eab Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Fri, 30 Aug 2024 16:05:51 +0330 Subject: [PATCH 01/18] Add HLSL generator --- tools/hlsl_generator/gen.py | 311 ++++++++ tools/hlsl_generator/out.hlsl | 1340 +++++++++++++++++++++++++++++++++ 2 files changed, 1651 insertions(+) create mode 100644 tools/hlsl_generator/gen.py create mode 100644 tools/hlsl_generator/out.hlsl diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py new file mode 100644 index 000000000..5308c0ede --- /dev/null +++ b/tools/hlsl_generator/gen.py @@ -0,0 +1,311 @@ +import json +import io +import os +import re +from enum import Enum +from argparse import ArgumentParser +from typing import NamedTuple +from typing import Optional + +head = """// Copyright (C) 2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ + +#ifdef __HLSL_VERSION +#include "spirv/unified1/spirv.hpp" +#include "spirv/unified1/GLSL.std.450.h" +#endif + +#include "nbl/builtin/hlsl/type_traits.hlsl" + +namespace nbl +{ +namespace hlsl +{ +#ifdef __HLSL_VERSION +namespace spirv +{ + +//! General Decls +template +using pointer_t = vk::SpirvOpaqueType >, T>; + +// The holy operation that makes addrof possible +template +[[vk::ext_instruction(spv::OpCopyObject)]] +pointer_t copyObject([[vk::ext_reference]] T value); + +//! Std 450 Extended set operations +template +[[vk::ext_instruction(GLSLstd450MatrixInverse)]] +SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); + +// Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +enable_if_t && is_spirv_type_v, T> bitcast(U); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +uint64_t bitcast(pointer_t); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +pointer_t bitcast(uint64_t); + +template +[[vk::ext_instruction(spv::OpBitcast)]] +T bitcast(U); +""" + +foot = """} + +#endif +} +} + +#endif +""" + +def gen(grammer_path, output_path): + grammer_raw = open(grammer_path, "r").read() + grammer = json.loads(grammer_raw) + del grammer_raw + + output = open(output_path, "w", buffering=1024**2) + + builtins = [x for x in grammer["operand_kinds"] if x["kind"] == "BuiltIn"][0]["enumerants"] + execution_modes = [x for x in grammer["operand_kinds"] if x["kind"] == "ExecutionMode"][0]["enumerants"] + group_operations = [x for x in grammer["operand_kinds"] if x["kind"] == "GroupOperation"][0]["enumerants"] + + with output as writer: + writer.write(head) + + writer.write("\n//! Builtins\nnamespace builtin\n{") + for b in builtins: + builtin_type = None + is_output = False + builtin_name = b["enumerant"] + match builtin_name: + case "HelperInvocation": builtin_type = "bool" + case "VertexIndex": builtin_type = "uint32_t" + case "InstanceIndex": builtin_type = "uint32_t" + case "NumWorkgroups": builtin_type = "uint32_t3" + case "WorkgroupId": builtin_type = "uint32_t3" + case "LocalInvocationId": builtin_type = "uint32_t3" + case "GlobalInvocationId": builtin_type = "uint32_t3" + case "LocalInvocationIndex": builtin_type = "uint32_t" + case "SubgroupEqMask": builtin_type = "uint32_t4" + case "SubgroupGeMask": builtin_type = "uint32_t4" + case "SubgroupGtMask": builtin_type = "uint32_t4" + case "SubgroupLeMask": builtin_type = "uint32_t4" + case "SubgroupLtMask": builtin_type = "uint32_t4" + case "SubgroupSize": builtin_type = "uint32_t" + case "NumSubgroups": builtin_type = "uint32_t" + case "SubgroupId": builtin_type = "uint32_t" + case "SubgroupLocalInvocationId": builtin_type = "uint32_t" + case "Position": + builtin_type = "float32_t4" + is_output = True + case _: continue + if is_output: + writer.write("[[vk::ext_builtin_output(spv::BuiltIn" + builtin_name + ")]]\n") + writer.write("static " + builtin_type + " " + builtin_name + ";\n") + else: + writer.write("[[vk::ext_builtin_input(spv::BuiltIn" + builtin_name + ")]]\n") + writer.write("static const " + builtin_type + " " + builtin_name + ";\n") + writer.write("}\n") + + writer.write("\n//! Execution Modes\nnamespace execution_mode\n{") + for em in execution_modes: + name = em["enumerant"] + name_l = name[0].lower() + name[1:] + writer.write("\n\tvoid " + name_l + "()\n\t{\n\t\tvk::ext_execution_mode(spv::ExecutionMode" + name + ");\n\t}\n") + writer.write("}\n") + + writer.write("\n//! Group Operations\nnamespace group_operation\n{\n") + for go in group_operations: + name = go["enumerant"] + value = go["value"] + writer.write("\tstatic const uint32_t " + name + " = " + str(value) + ";\n") + writer.write("}\n") + + writer.write("\n//! Instructions\n") + for instruction in grammer["instructions"]: + match instruction["class"]: + case "Atomic": + processInst(writer, instruction, InstOptions()) + processInst(writer, instruction, InstOptions(shape=Shape.PTR_TEMPLATE)) + case "Memory": + processInst(writer, instruction, InstOptions(shape=Shape.PTR_TEMPLATE)) + processInst(writer, instruction, InstOptions(shape=Shape.PSB_RT)) + case "Barrier" | "Bit": + processInst(writer, instruction, InstOptions()) + case "Reserved": + match instruction["opname"]: + case "OpBeginInvocationInterlockEXT" | "OpEndInvocationInterlockEXT": + processInst(writer, instruction, InstOptions()) + case "Non-Uniform": + match instruction["opname"]: + case "OpGroupNonUniformElect" | "OpGroupNonUniformAll" | "OpGroupNonUniformAny" | "OpGroupNonUniformAllEqual": + processInst(writer, instruction, InstOptions(result_ty="bool")) + case "OpGroupNonUniformBallot": + processInst(writer, instruction, InstOptions(result_ty="uint32_t4",op_ty="bool")) + case "OpGroupNonUniformInverseBallot" | "OpGroupNonUniformBallotBitExtract": + processInst(writer, instruction, InstOptions(result_ty="bool",op_ty="uint32_t4")) + case "OpGroupNonUniformBallotBitCount" | "OpGroupNonUniformBallotFindLSB" | "OpGroupNonUniformBallotFindMSB": + processInst(writer, instruction, InstOptions(result_ty="uint32_t",op_ty="uint32_t4")) + case _: processInst(writer, instruction, InstOptions()) + case _: continue # TODO + + writer.write(foot) + +class Shape(Enum): + DEFAULT = 0, + PTR_TEMPLATE = 1, # TODO: this is a DXC Workaround + PSB_RT = 2, # PhysicalStorageBuffer Result Type + +class InstOptions(NamedTuple): + shape: Shape = Shape.DEFAULT + result_ty: Optional[str] = None + op_ty: Optional[str] = None + +def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): + templates = [] + caps = [] + conds = [] + op_name = instruction["opname"] + fn_name = op_name[2].lower() + op_name[3:] + result_types = [] + + if "capabilities" in instruction and len(instruction["capabilities"]) > 0: + for cap in instruction["capabilities"]: + if cap == "Shader" or cap == "Kernel": continue + caps.append(cap) + + if options.shape == Shape.PTR_TEMPLATE: + templates.append("typename P") + conds.append("is_spirv_type_v

") + + # split upper case words + matches = [(m.group(1), m.span(1)) for m in re.finditer(r'([A-Z])[A-Z][a-z]', fn_name)] + + for m in matches: + match m[0]: + case "I": + conds.append("(is_signed_v || is_unsigned_v)") + break + case "U": + fn_name = fn_name[0:m[1][0]] + fn_name[m[1][1]:] + result_types = ["uint32_t", "uint64_t"] + break + case "S": + fn_name = fn_name[0:m[1][0]] + fn_name[m[1][1]:] + result_types = ["int32_t", "int64_t"] + break + case "F": + fn_name = fn_name[0:m[1][0]] + fn_name[m[1][1]:] + result_types = ["float"] + break + + if "operands" in instruction: + operands = instruction["operands"] + if operands[0]["kind"] == "IdResultType": + operands = operands[2:] + if len(result_types) == 0: + if options.result_ty == None: + result_types = ["T"] + else: + result_types = [options.result_ty] + else: + assert len(result_types) == 0 + result_types = ["void"] + + for rt in result_types: + op_ty = "T" + if options.op_ty != None: + op_ty = options.op_ty + elif rt != "void": + op_ty = rt + + if (not "typename T" in templates) and (rt == "T"): + templates = ["typename T"] + templates + + args = [] + for operand in operands: + operand_name = operand["name"].strip("'") if "name" in operand else None + operand_name = operand_name[0].lower() + operand_name[1:] if (operand_name != None) else "" + match operand["kind"]: + case "IdRef": + match operand["name"]: + case "'Pointer'": + if options.shape == Shape.PTR_TEMPLATE: + args.append("P " + operand_name) + elif options.shape == Shape.PSB_RT: + if (not "typename T" in templates) and (rt == "T" or op_ty == "T"): + templates = ["typename T"] + templates + args.append("pointer_t " + operand_name) + else: + if (not "typename T" in templates) and (rt == "T" or op_ty == "T"): + templates = ["typename T"] + templates + args.append("[[vk::ext_reference]] " + op_ty + " " + operand_name) + case "'Value'" | "'Object'" | "'Comparator'" | "'Base'" | "'Insert'": + if (not "typename T" in templates) and (rt == "T" or op_ty == "T"): + templates = ["typename T"] + templates + args.append(op_ty + " " + operand_name) + case "'Offset'" | "'Count'" | "'Id'" | "'Index'" | "'Mask'" | "'Delta'": + args.append("uint32_t " + operand_name) + case "'Predicate'": args.append("bool " + operand_name) + case "'ClusterSize'": + if "quantifier" in operand and operand["quantifier"] == "?": continue # TODO: overload + else: return # TODO + case _: return # TODO + case "IdScope": args.append("uint32_t " + operand_name.lower() + "Scope") + case "IdMemorySemantics": args.append(" uint32_t " + operand_name) + case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + operand_name) + case "MemoryAccess": + writeInst(writer, templates, caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) + writeInst(writer, templates, caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) + writeInst(writer, templates + ["uint32_t alignment"], caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) + case _: return # TODO + + writeInst(writer, templates, caps, op_name, fn_name, conds, rt, args) + + +def writeInst(writer: io.TextIOWrapper, templates, caps, op_name, fn_name, conds, result_type, args): + if len(caps) > 0: + for cap in caps: + final_fn_name = fn_name + if (len(caps) > 1): final_fn_name = fn_name + "_" + cap + writeInstInner(writer, templates, cap, op_name, final_fn_name, conds, result_type, args) + else: + writeInstInner(writer, templates, None, op_name, fn_name, conds, result_type, args) + +def writeInstInner(writer: io.TextIOWrapper, templates, cap, op_name, fn_name, conds, result_type, args): + if len(templates) > 0: + writer.write("template<" + ", ".join(templates) + ">\n") + if (cap != None): + writer.write("[[vk::ext_capability(spv::Capability" + cap + ")]]\n") + writer.write("[[vk::ext_instruction(spv::" + op_name + ")]]\n") + if len(conds) > 0: + writer.write("enable_if_t<" + " && ".join(conds) + ", " + result_type + ">") + else: + writer.write(result_type) + writer.write(" " + fn_name + "(" + ", ".join(args) + ");\n\n") + + +if __name__ == "__main__": + script_dir_path = os.path.abspath(os.path.dirname(__file__)) + + parser = ArgumentParser(description="Generate HLSL from SPIR-V instructions") + parser.add_argument("output", type=str, help="HLSL output file") + parser.add_argument("--grammer", required=False, type=str, help="Input SPIR-V grammer JSON file", default=os.path.join(script_dir_path, "../../include/spirv/unified1/spirv.core.grammar.json")) + args = parser.parse_args() + + gen(args.grammer, args.output) + diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl new file mode 100644 index 000000000..bd3c2d16d --- /dev/null +++ b/tools/hlsl_generator/out.hlsl @@ -0,0 +1,1340 @@ +// Copyright (C) 2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ + +#ifdef __HLSL_VERSION +#include "spirv/unified1/spirv.hpp" +#include "spirv/unified1/GLSL.std.450.h" +#endif + +#include "nbl/builtin/hlsl/type_traits.hlsl" + +namespace nbl +{ +namespace hlsl +{ +#ifdef __HLSL_VERSION +namespace spirv +{ + +//! General Decls +template +using pointer_t = vk::SpirvOpaqueType >, T>; + +// The holy operation that makes addrof possible +template +[[vk::ext_instruction(spv::OpCopyObject)]] +pointer_t copyObject([[vk::ext_reference]] T value); + +//! Std 450 Extended set operations +template +[[vk::ext_instruction(GLSLstd450MatrixInverse)]] +SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); + +// Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +enable_if_t && is_spirv_type_v, T> bitcast(U); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +uint64_t bitcast(pointer_t); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpBitcast)]] +pointer_t bitcast(uint64_t); + +template +[[vk::ext_instruction(spv::OpBitcast)]] +T bitcast(U); + +//! Builtins +namespace builtin +{[[vk::ext_builtin_output(spv::BuiltInPosition)]] +static float32_t4 Position; +[[vk::ext_builtin_input(spv::BuiltInHelperInvocation)]] +static const bool HelperInvocation; +[[vk::ext_builtin_input(spv::BuiltInNumWorkgroups)]] +static const uint32_t3 NumWorkgroups; +[[vk::ext_builtin_input(spv::BuiltInWorkgroupId)]] +static const uint32_t3 WorkgroupId; +[[vk::ext_builtin_input(spv::BuiltInLocalInvocationId)]] +static const uint32_t3 LocalInvocationId; +[[vk::ext_builtin_input(spv::BuiltInGlobalInvocationId)]] +static const uint32_t3 GlobalInvocationId; +[[vk::ext_builtin_input(spv::BuiltInLocalInvocationIndex)]] +static const uint32_t LocalInvocationIndex; +[[vk::ext_builtin_input(spv::BuiltInSubgroupSize)]] +static const uint32_t SubgroupSize; +[[vk::ext_builtin_input(spv::BuiltInNumSubgroups)]] +static const uint32_t NumSubgroups; +[[vk::ext_builtin_input(spv::BuiltInSubgroupId)]] +static const uint32_t SubgroupId; +[[vk::ext_builtin_input(spv::BuiltInSubgroupLocalInvocationId)]] +static const uint32_t SubgroupLocalInvocationId; +[[vk::ext_builtin_input(spv::BuiltInVertexIndex)]] +static const uint32_t VertexIndex; +[[vk::ext_builtin_input(spv::BuiltInInstanceIndex)]] +static const uint32_t InstanceIndex; +[[vk::ext_builtin_input(spv::BuiltInSubgroupEqMask)]] +static const uint32_t4 SubgroupEqMask; +[[vk::ext_builtin_input(spv::BuiltInSubgroupGeMask)]] +static const uint32_t4 SubgroupGeMask; +[[vk::ext_builtin_input(spv::BuiltInSubgroupGtMask)]] +static const uint32_t4 SubgroupGtMask; +[[vk::ext_builtin_input(spv::BuiltInSubgroupLeMask)]] +static const uint32_t4 SubgroupLeMask; +[[vk::ext_builtin_input(spv::BuiltInSubgroupLtMask)]] +static const uint32_t4 SubgroupLtMask; +} + +//! Execution Modes +namespace execution_mode +{ + void invocations() + { + vk::ext_execution_mode(spv::ExecutionModeInvocations); + } + + void spacingEqual() + { + vk::ext_execution_mode(spv::ExecutionModeSpacingEqual); + } + + void spacingFractionalEven() + { + vk::ext_execution_mode(spv::ExecutionModeSpacingFractionalEven); + } + + void spacingFractionalOdd() + { + vk::ext_execution_mode(spv::ExecutionModeSpacingFractionalOdd); + } + + void vertexOrderCw() + { + vk::ext_execution_mode(spv::ExecutionModeVertexOrderCw); + } + + void vertexOrderCcw() + { + vk::ext_execution_mode(spv::ExecutionModeVertexOrderCcw); + } + + void pixelCenterInteger() + { + vk::ext_execution_mode(spv::ExecutionModePixelCenterInteger); + } + + void originUpperLeft() + { + vk::ext_execution_mode(spv::ExecutionModeOriginUpperLeft); + } + + void originLowerLeft() + { + vk::ext_execution_mode(spv::ExecutionModeOriginLowerLeft); + } + + void earlyFragmentTests() + { + vk::ext_execution_mode(spv::ExecutionModeEarlyFragmentTests); + } + + void pointMode() + { + vk::ext_execution_mode(spv::ExecutionModePointMode); + } + + void xfb() + { + vk::ext_execution_mode(spv::ExecutionModeXfb); + } + + void depthReplacing() + { + vk::ext_execution_mode(spv::ExecutionModeDepthReplacing); + } + + void depthGreater() + { + vk::ext_execution_mode(spv::ExecutionModeDepthGreater); + } + + void depthLess() + { + vk::ext_execution_mode(spv::ExecutionModeDepthLess); + } + + void depthUnchanged() + { + vk::ext_execution_mode(spv::ExecutionModeDepthUnchanged); + } + + void localSize() + { + vk::ext_execution_mode(spv::ExecutionModeLocalSize); + } + + void localSizeHint() + { + vk::ext_execution_mode(spv::ExecutionModeLocalSizeHint); + } + + void inputPoints() + { + vk::ext_execution_mode(spv::ExecutionModeInputPoints); + } + + void inputLines() + { + vk::ext_execution_mode(spv::ExecutionModeInputLines); + } + + void inputLinesAdjacency() + { + vk::ext_execution_mode(spv::ExecutionModeInputLinesAdjacency); + } + + void triangles() + { + vk::ext_execution_mode(spv::ExecutionModeTriangles); + } + + void inputTrianglesAdjacency() + { + vk::ext_execution_mode(spv::ExecutionModeInputTrianglesAdjacency); + } + + void quads() + { + vk::ext_execution_mode(spv::ExecutionModeQuads); + } + + void isolines() + { + vk::ext_execution_mode(spv::ExecutionModeIsolines); + } + + void outputVertices() + { + vk::ext_execution_mode(spv::ExecutionModeOutputVertices); + } + + void outputPoints() + { + vk::ext_execution_mode(spv::ExecutionModeOutputPoints); + } + + void outputLineStrip() + { + vk::ext_execution_mode(spv::ExecutionModeOutputLineStrip); + } + + void outputTriangleStrip() + { + vk::ext_execution_mode(spv::ExecutionModeOutputTriangleStrip); + } + + void vecTypeHint() + { + vk::ext_execution_mode(spv::ExecutionModeVecTypeHint); + } + + void contractionOff() + { + vk::ext_execution_mode(spv::ExecutionModeContractionOff); + } + + void initializer() + { + vk::ext_execution_mode(spv::ExecutionModeInitializer); + } + + void finalizer() + { + vk::ext_execution_mode(spv::ExecutionModeFinalizer); + } + + void subgroupSize() + { + vk::ext_execution_mode(spv::ExecutionModeSubgroupSize); + } + + void subgroupsPerWorkgroup() + { + vk::ext_execution_mode(spv::ExecutionModeSubgroupsPerWorkgroup); + } + + void subgroupsPerWorkgroupId() + { + vk::ext_execution_mode(spv::ExecutionModeSubgroupsPerWorkgroupId); + } + + void localSizeId() + { + vk::ext_execution_mode(spv::ExecutionModeLocalSizeId); + } + + void localSizeHintId() + { + vk::ext_execution_mode(spv::ExecutionModeLocalSizeHintId); + } + + void nonCoherentColorAttachmentReadEXT() + { + vk::ext_execution_mode(spv::ExecutionModeNonCoherentColorAttachmentReadEXT); + } + + void nonCoherentDepthAttachmentReadEXT() + { + vk::ext_execution_mode(spv::ExecutionModeNonCoherentDepthAttachmentReadEXT); + } + + void nonCoherentStencilAttachmentReadEXT() + { + vk::ext_execution_mode(spv::ExecutionModeNonCoherentStencilAttachmentReadEXT); + } + + void subgroupUniformControlFlowKHR() + { + vk::ext_execution_mode(spv::ExecutionModeSubgroupUniformControlFlowKHR); + } + + void postDepthCoverage() + { + vk::ext_execution_mode(spv::ExecutionModePostDepthCoverage); + } + + void denormPreserve() + { + vk::ext_execution_mode(spv::ExecutionModeDenormPreserve); + } + + void denormFlushToZero() + { + vk::ext_execution_mode(spv::ExecutionModeDenormFlushToZero); + } + + void signedZeroInfNanPreserve() + { + vk::ext_execution_mode(spv::ExecutionModeSignedZeroInfNanPreserve); + } + + void roundingModeRTE() + { + vk::ext_execution_mode(spv::ExecutionModeRoundingModeRTE); + } + + void roundingModeRTZ() + { + vk::ext_execution_mode(spv::ExecutionModeRoundingModeRTZ); + } + + void earlyAndLateFragmentTestsAMD() + { + vk::ext_execution_mode(spv::ExecutionModeEarlyAndLateFragmentTestsAMD); + } + + void stencilRefReplacingEXT() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefReplacingEXT); + } + + void coalescingAMDX() + { + vk::ext_execution_mode(spv::ExecutionModeCoalescingAMDX); + } + + void maxNodeRecursionAMDX() + { + vk::ext_execution_mode(spv::ExecutionModeMaxNodeRecursionAMDX); + } + + void staticNumWorkgroupsAMDX() + { + vk::ext_execution_mode(spv::ExecutionModeStaticNumWorkgroupsAMDX); + } + + void shaderIndexAMDX() + { + vk::ext_execution_mode(spv::ExecutionModeShaderIndexAMDX); + } + + void maxNumWorkgroupsAMDX() + { + vk::ext_execution_mode(spv::ExecutionModeMaxNumWorkgroupsAMDX); + } + + void stencilRefUnchangedFrontAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefUnchangedFrontAMD); + } + + void stencilRefGreaterFrontAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefGreaterFrontAMD); + } + + void stencilRefLessFrontAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefLessFrontAMD); + } + + void stencilRefUnchangedBackAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefUnchangedBackAMD); + } + + void stencilRefGreaterBackAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefGreaterBackAMD); + } + + void stencilRefLessBackAMD() + { + vk::ext_execution_mode(spv::ExecutionModeStencilRefLessBackAMD); + } + + void quadDerivativesKHR() + { + vk::ext_execution_mode(spv::ExecutionModeQuadDerivativesKHR); + } + + void requireFullQuadsKHR() + { + vk::ext_execution_mode(spv::ExecutionModeRequireFullQuadsKHR); + } + + void outputLinesEXT() + { + vk::ext_execution_mode(spv::ExecutionModeOutputLinesEXT); + } + + void outputLinesNV() + { + vk::ext_execution_mode(spv::ExecutionModeOutputLinesNV); + } + + void outputPrimitivesEXT() + { + vk::ext_execution_mode(spv::ExecutionModeOutputPrimitivesEXT); + } + + void outputPrimitivesNV() + { + vk::ext_execution_mode(spv::ExecutionModeOutputPrimitivesNV); + } + + void derivativeGroupQuadsNV() + { + vk::ext_execution_mode(spv::ExecutionModeDerivativeGroupQuadsNV); + } + + void derivativeGroupLinearNV() + { + vk::ext_execution_mode(spv::ExecutionModeDerivativeGroupLinearNV); + } + + void outputTrianglesEXT() + { + vk::ext_execution_mode(spv::ExecutionModeOutputTrianglesEXT); + } + + void outputTrianglesNV() + { + vk::ext_execution_mode(spv::ExecutionModeOutputTrianglesNV); + } + + void pixelInterlockOrderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModePixelInterlockOrderedEXT); + } + + void pixelInterlockUnorderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModePixelInterlockUnorderedEXT); + } + + void sampleInterlockOrderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModeSampleInterlockOrderedEXT); + } + + void sampleInterlockUnorderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModeSampleInterlockUnorderedEXT); + } + + void shadingRateInterlockOrderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModeShadingRateInterlockOrderedEXT); + } + + void shadingRateInterlockUnorderedEXT() + { + vk::ext_execution_mode(spv::ExecutionModeShadingRateInterlockUnorderedEXT); + } + + void sharedLocalMemorySizeINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeSharedLocalMemorySizeINTEL); + } + + void roundingModeRTPINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeRoundingModeRTPINTEL); + } + + void roundingModeRTNINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeRoundingModeRTNINTEL); + } + + void floatingPointModeALTINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeFloatingPointModeALTINTEL); + } + + void floatingPointModeIEEEINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeFloatingPointModeIEEEINTEL); + } + + void maxWorkgroupSizeINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeMaxWorkgroupSizeINTEL); + } + + void maxWorkDimINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeMaxWorkDimINTEL); + } + + void noGlobalOffsetINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeNoGlobalOffsetINTEL); + } + + void numSIMDWorkitemsINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeNumSIMDWorkitemsINTEL); + } + + void schedulerTargetFmaxMhzINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeSchedulerTargetFmaxMhzINTEL); + } + + void maximallyReconvergesKHR() + { + vk::ext_execution_mode(spv::ExecutionModeMaximallyReconvergesKHR); + } + + void fPFastMathDefault() + { + vk::ext_execution_mode(spv::ExecutionModeFPFastMathDefault); + } + + void streamingInterfaceINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeStreamingInterfaceINTEL); + } + + void registerMapInterfaceINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeRegisterMapInterfaceINTEL); + } + + void namedBarrierCountINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeNamedBarrierCountINTEL); + } + + void maximumRegistersINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeMaximumRegistersINTEL); + } + + void maximumRegistersIdINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeMaximumRegistersIdINTEL); + } + + void namedMaximumRegistersINTEL() + { + vk::ext_execution_mode(spv::ExecutionModeNamedMaximumRegistersINTEL); + } +} + +//! Group Operations +namespace group_operation +{ + static const uint32_t Reduce = 0; + static const uint32_t InclusiveScan = 1; + static const uint32_t ExclusiveScan = 2; + static const uint32_t ClusteredReduce = 3; + static const uint32_t PartitionedReduceNV = 6; + static const uint32_t PartitionedInclusiveScanNV = 7; + static const uint32_t PartitionedExclusiveScanNV = 8; +} + +//! Instructions +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t memoryAccess); + +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); + +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer); + +template +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer, [[vk::ext_literal]] uint32_t memoryAccess); + +template +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); + +template +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T object); + +template +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess); + +template +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); + +template +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T object, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T object); + +template +[[vk::ext_instruction(spv::OpGenericPtrMemSemantics)]] +enable_if_t, T> genericPtrMemSemantics(P pointer); + +template +[[vk::ext_instruction(spv::OpGenericPtrMemSemantics)]] +T genericPtrMemSemantics(pointer_t pointer); + +template +[[vk::ext_capability(spv::CapabilityBitInstructions)]] +[[vk::ext_instruction(spv::OpBitFieldInsert)]] +T bitFieldInsert(T base, T insert, uint32_t offset, uint32_t count); + +[[vk::ext_capability(spv::CapabilityBitInstructions)]] +[[vk::ext_instruction(spv::OpBitFieldSExtract)]] +int32_t bitFieldExtract(int32_t base, uint32_t offset, uint32_t count); + +[[vk::ext_capability(spv::CapabilityBitInstructions)]] +[[vk::ext_instruction(spv::OpBitFieldSExtract)]] +int64_t bitFieldExtract(int64_t base, uint32_t offset, uint32_t count); + +[[vk::ext_capability(spv::CapabilityBitInstructions)]] +[[vk::ext_instruction(spv::OpBitFieldUExtract)]] +uint32_t bitFieldExtract(uint32_t base, uint32_t offset, uint32_t count); + +[[vk::ext_capability(spv::CapabilityBitInstructions)]] +[[vk::ext_instruction(spv::OpBitFieldUExtract)]] +uint64_t bitFieldExtract(uint64_t base, uint32_t offset, uint32_t count); + +template +[[vk::ext_capability(spv::CapabilityBitInstructions)]] +[[vk::ext_instruction(spv::OpBitReverse)]] +T bitReverse(T base); + +template +[[vk::ext_instruction(spv::OpBitCount)]] +T bitCount(T base); + +[[vk::ext_instruction(spv::OpControlBarrier)]] +void controlBarrier(uint32_t executionScope, uint32_t memoryScope, uint32_t semantics); + +[[vk::ext_instruction(spv::OpMemoryBarrier)]] +void memoryBarrier(uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicLoad)]] +T atomicLoad([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicLoad)]] +enable_if_t, T> atomicLoad(P pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicStore)]] +void atomicStore([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicStore)]] +enable_if_t, void> atomicStore(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicExchange)]] +T atomicExchange([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicExchange)]] +enable_if_t, T> atomicExchange(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicCompareExchange)]] +T atomicCompareExchange([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t equal, uint32_t unequal, T value, T comparator); + +template +[[vk::ext_instruction(spv::OpAtomicCompareExchange)]] +enable_if_t, T> atomicCompareExchange(P pointer, uint32_t memoryScope, uint32_t equal, uint32_t unequal, T value, T comparator); + +template +[[vk::ext_instruction(spv::OpAtomicCompareExchangeWeak)]] +T atomicCompareExchangeWeak([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t equal, uint32_t unequal, T value, T comparator); + +template +[[vk::ext_instruction(spv::OpAtomicCompareExchangeWeak)]] +enable_if_t, T> atomicCompareExchangeWeak(P pointer, uint32_t memoryScope, uint32_t equal, uint32_t unequal, T value, T comparator); + +template +[[vk::ext_instruction(spv::OpAtomicIIncrement)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> atomicIIncrement([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIIncrement)]] +enable_if_t && (is_signed_v || is_unsigned_v), T> atomicIIncrement(P pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIDecrement)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> atomicIDecrement([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIDecrement)]] +enable_if_t && (is_signed_v || is_unsigned_v), T> atomicIDecrement(P pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicIAdd)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> atomicIAdd([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicIAdd)]] +enable_if_t && (is_signed_v || is_unsigned_v), T> atomicIAdd(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicISub)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> atomicISub([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicISub)]] +enable_if_t && (is_signed_v || is_unsigned_v), T> atomicISub(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +[[vk::ext_instruction(spv::OpAtomicSMin)]] +int32_t atomicMin([[vk::ext_reference]] int32_t pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); + +[[vk::ext_instruction(spv::OpAtomicSMin)]] +int64_t atomicMin([[vk::ext_reference]] int64_t pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); + +template +[[vk::ext_instruction(spv::OpAtomicSMin)]] +enable_if_t, int32_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); + +template +[[vk::ext_instruction(spv::OpAtomicSMin)]] +enable_if_t, int64_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); + +[[vk::ext_instruction(spv::OpAtomicUMin)]] +uint32_t atomicMin([[vk::ext_reference]] uint32_t pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); + +[[vk::ext_instruction(spv::OpAtomicUMin)]] +uint64_t atomicMin([[vk::ext_reference]] uint64_t pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); + +template +[[vk::ext_instruction(spv::OpAtomicUMin)]] +enable_if_t, uint32_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); + +template +[[vk::ext_instruction(spv::OpAtomicUMin)]] +enable_if_t, uint64_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); + +[[vk::ext_instruction(spv::OpAtomicSMax)]] +int32_t atomicMax([[vk::ext_reference]] int32_t pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); + +[[vk::ext_instruction(spv::OpAtomicSMax)]] +int64_t atomicMax([[vk::ext_reference]] int64_t pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); + +template +[[vk::ext_instruction(spv::OpAtomicSMax)]] +enable_if_t, int32_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); + +template +[[vk::ext_instruction(spv::OpAtomicSMax)]] +enable_if_t, int64_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); + +[[vk::ext_instruction(spv::OpAtomicUMax)]] +uint32_t atomicMax([[vk::ext_reference]] uint32_t pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); + +[[vk::ext_instruction(spv::OpAtomicUMax)]] +uint64_t atomicMax([[vk::ext_reference]] uint64_t pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); + +template +[[vk::ext_instruction(spv::OpAtomicUMax)]] +enable_if_t, uint32_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); + +template +[[vk::ext_instruction(spv::OpAtomicUMax)]] +enable_if_t, uint64_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); + +template +[[vk::ext_instruction(spv::OpAtomicAnd)]] +T atomicAnd([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicAnd)]] +enable_if_t, T> atomicAnd(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicOr)]] +T atomicOr([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicOr)]] +enable_if_t, T> atomicOr(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicXor)]] +T atomicXor([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicXor)]] +enable_if_t, T> atomicXor(P pointer, uint32_t memoryScope, uint32_t semantics, T value); + +template +[[vk::ext_instruction(spv::OpAtomicFlagTestAndSet)]] +T atomicFlagTestAndSet([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicFlagTestAndSet)]] +enable_if_t, T> atomicFlagTestAndSet(P pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicFlagClear)]] +void atomicFlagClear([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); + +template +[[vk::ext_instruction(spv::OpAtomicFlagClear)]] +enable_if_t, void> atomicFlagClear(P pointer, uint32_t memoryScope, uint32_t semantics); + +[[vk::ext_capability(spv::CapabilityGroupNonUniform)]] +[[vk::ext_instruction(spv::OpGroupNonUniformElect)]] +bool groupNonUniformElect(uint32_t executionScope); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformVote)]] +[[vk::ext_instruction(spv::OpGroupNonUniformAll)]] +bool groupNonUniformAll(uint32_t executionScope, bool predicate); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformVote)]] +[[vk::ext_instruction(spv::OpGroupNonUniformAny)]] +bool groupNonUniformAny(uint32_t executionScope, bool predicate); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformVote)]] +[[vk::ext_instruction(spv::OpGroupNonUniformAllEqual)]] +bool groupNonUniformAllEqual(uint32_t executionScope, bool value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBroadcast)]] +T groupNonUniformBroadcast(uint32_t executionScope, T value, uint32_t id); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBroadcastFirst)]] +T groupNonUniformBroadcastFirst(uint32_t executionScope, T value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBallot)]] +uint32_t4 groupNonUniformBallot(uint32_t executionScope, bool predicate); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] +[[vk::ext_instruction(spv::OpGroupNonUniformInverseBallot)]] +bool groupNonUniformInverseBallot(uint32_t executionScope, uint32_t4 value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBallotBitExtract)]] +bool groupNonUniformBallotBitExtract(uint32_t executionScope, uint32_t4 value, uint32_t index); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBallotBitCount)]] +uint32_t groupNonUniformBallotBitCount(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t4 value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBallotFindLSB)]] +uint32_t groupNonUniformBallotFindLSB(uint32_t executionScope, uint32_t4 value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBallotFindMSB)]] +uint32_t groupNonUniformBallotFindMSB(uint32_t executionScope, uint32_t4 value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformShuffle)]] +[[vk::ext_instruction(spv::OpGroupNonUniformShuffle)]] +T groupNonUniformShuffle(uint32_t executionScope, T value, uint32_t id); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformShuffle)]] +[[vk::ext_instruction(spv::OpGroupNonUniformShuffleXor)]] +T groupNonUniformShuffleXor(uint32_t executionScope, T value, uint32_t mask); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformShuffleRelative)]] +[[vk::ext_instruction(spv::OpGroupNonUniformShuffleUp)]] +T groupNonUniformShuffleUp(uint32_t executionScope, T value, uint32_t delta); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformShuffleRelative)]] +[[vk::ext_instruction(spv::OpGroupNonUniformShuffleDown)]] +T groupNonUniformShuffleDown(uint32_t executionScope, T value, uint32_t delta); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformIAdd)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformIAdd)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformIAdd)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +float groupNonUniformAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +float groupNonUniformAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +float groupNonUniformAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformIMul)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformIMul)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformIMul)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +float groupNonUniformMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +float groupNonUniformMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +float groupNonUniformMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +int32_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +int32_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +int32_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +int64_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +int64_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +int64_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +uint32_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +uint32_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +uint32_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +uint64_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +uint64_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +uint64_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +float groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +float groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +float groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +int32_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +int32_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +int32_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +int64_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +int64_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +int64_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +uint32_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +uint32_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +uint32_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +uint64_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +uint64_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +uint64_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +float groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +float groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +float groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseAnd)]] +T groupNonUniformBitwiseAnd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseAnd)]] +T groupNonUniformBitwiseAnd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseAnd)]] +T groupNonUniformBitwiseAnd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseOr)]] +T groupNonUniformBitwiseOr_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseOr)]] +T groupNonUniformBitwiseOr_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseOr)]] +T groupNonUniformBitwiseOr_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseXor)]] +T groupNonUniformBitwiseXor_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseXor)]] +T groupNonUniformBitwiseXor_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformBitwiseXor)]] +T groupNonUniformBitwiseXor_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalAnd)]] +T groupNonUniformLogicalAnd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalAnd)]] +T groupNonUniformLogicalAnd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalAnd)]] +T groupNonUniformLogicalAnd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalOr)]] +T groupNonUniformLogicalOr_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalOr)]] +T groupNonUniformLogicalOr_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalOr)]] +T groupNonUniformLogicalOr_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalXor)]] +T groupNonUniformLogicalXor_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalXor)]] +T groupNonUniformLogicalXor_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformLogicalXor)]] +T groupNonUniformLogicalXor_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformQuad)]] +[[vk::ext_instruction(spv::OpGroupNonUniformQuadBroadcast)]] +T groupNonUniformQuadBroadcast(uint32_t executionScope, T value, uint32_t index); + +template +[[vk::ext_capability(spv::CapabilityQuadControlKHR)]] +[[vk::ext_instruction(spv::OpGroupNonUniformQuadAllKHR)]] +T groupNonUniformQuadAllKHR(bool predicate); + +template +[[vk::ext_capability(spv::CapabilityQuadControlKHR)]] +[[vk::ext_instruction(spv::OpGroupNonUniformQuadAnyKHR)]] +T groupNonUniformQuadAnyKHR(bool predicate); + +template +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformPartitionNV)]] +T groupNonUniformPartitionNV(T value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float atomicMinEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float atomicMinEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float> atomicMinEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float> atomicMinEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float> atomicMinEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float> atomicMinEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float atomicMaxEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float atomicMaxEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float atomicMaxEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t, float> atomicMaxEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t, float> atomicMaxEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t, float> atomicMaxEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t, float> atomicMaxEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float atomicAddEXT_AtomicFloat32AddEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float atomicAddEXT_AtomicFloat64AddEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float> atomicAddEXT_AtomicFloat32AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float> atomicAddEXT_AtomicFloat64AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float> atomicAddEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float value); + +[[vk::ext_capability(spv::CapabilitySplitBarrierINTEL)]] +[[vk::ext_instruction(spv::OpControlBarrierArriveINTEL)]] +void controlBarrierArriveINTEL(uint32_t executionScope, uint32_t memoryScope, uint32_t semantics); + +[[vk::ext_capability(spv::CapabilitySplitBarrierINTEL)]] +[[vk::ext_instruction(spv::OpControlBarrierWaitINTEL)]] +void controlBarrierWaitINTEL(uint32_t executionScope, uint32_t memoryScope, uint32_t semantics); + +} + +#endif +} +} + +#endif From 214602923b8ab5c3f9b675b43ba88f3db572de2e Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Sun, 8 Sep 2024 13:46:01 +0330 Subject: [PATCH 02/18] hlsl_generator: fix formatting and add use is_pointer_v for bitcast Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 10 +++++----- tools/hlsl_generator/out.hlsl | 11 ++++++----- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 5308c0ede..43f4a498e 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -39,24 +39,24 @@ //! Std 450 Extended set operations template -[[vk::ext_instruction(GLSLstd450MatrixInverse)]] +[[vk::ext_instruction(34, /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); // Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpBitcast)]] -enable_if_t && is_spirv_type_v, T> bitcast(U); +enable_if_t, T> bitcast(U); template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpBitcast)]] -uint64_t bitcast(pointer_t); +uint64_t bitcast(pointer_t); template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpBitcast)]] -pointer_t bitcast(uint64_t); +pointer_t bitcast(uint64_t); template [[vk::ext_instruction(spv::OpBitcast)]] @@ -86,7 +86,7 @@ def gen(grammer_path, output_path): with output as writer: writer.write(head) - writer.write("\n//! Builtins\nnamespace builtin\n{") + writer.write("\n//! Builtins\nnamespace builtin\n{\n") for b in builtins: builtin_type = None is_output = False diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index bd3c2d16d..0591b9dff 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -30,24 +30,24 @@ pointer_t copyObject([[vk::ext_reference]] T value); //! Std 450 Extended set operations template -[[vk::ext_instruction(GLSLstd450MatrixInverse)]] +[[vk::ext_instruction(34, /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); // Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpBitcast)]] -enable_if_t && is_spirv_type_v, T> bitcast(U); +enable_if_t, T> bitcast(U); template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpBitcast)]] -uint64_t bitcast(pointer_t); +uint64_t bitcast(pointer_t); template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpBitcast)]] -pointer_t bitcast(uint64_t); +pointer_t bitcast(uint64_t); template [[vk::ext_instruction(spv::OpBitcast)]] @@ -55,7 +55,8 @@ T bitcast(U); //! Builtins namespace builtin -{[[vk::ext_builtin_output(spv::BuiltInPosition)]] +{ +[[vk::ext_builtin_output(spv::BuiltInPosition)]] static float32_t4 Position; [[vk::ext_builtin_input(spv::BuiltInHelperInvocation)]] static const bool HelperInvocation; From 9b33a290b96e25b607fd345d6b764cafc565fa0e Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Sun, 8 Sep 2024 13:53:43 +0330 Subject: [PATCH 03/18] hlsl_generator: ignore instructions with kernel capability Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 3 ++- tools/hlsl_generator/out.hlsl | 32 -------------------------------- 2 files changed, 2 insertions(+), 33 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 43f4a498e..231403595 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -185,7 +185,8 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): if "capabilities" in instruction and len(instruction["capabilities"]) > 0: for cap in instruction["capabilities"]: - if cap == "Shader" or cap == "Kernel": continue + if cap == "Kernel" and len(instruction["capabilities"]) == 1: return + if cap == "Shader": continue caps.append(cap) if options.shape == Shape.PTR_TEMPLATE: diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 0591b9dff..c9e044b4d 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -650,14 +650,6 @@ template [[vk::ext_instruction(spv::OpStore)]] void store(pointer_t pointer, T object); -template -[[vk::ext_instruction(spv::OpGenericPtrMemSemantics)]] -enable_if_t, T> genericPtrMemSemantics(P pointer); - -template -[[vk::ext_instruction(spv::OpGenericPtrMemSemantics)]] -T genericPtrMemSemantics(pointer_t pointer); - template [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitFieldInsert)]] @@ -726,14 +718,6 @@ template [[vk::ext_instruction(spv::OpAtomicCompareExchange)]] enable_if_t, T> atomicCompareExchange(P pointer, uint32_t memoryScope, uint32_t equal, uint32_t unequal, T value, T comparator); -template -[[vk::ext_instruction(spv::OpAtomicCompareExchangeWeak)]] -T atomicCompareExchangeWeak([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t equal, uint32_t unequal, T value, T comparator); - -template -[[vk::ext_instruction(spv::OpAtomicCompareExchangeWeak)]] -enable_if_t, T> atomicCompareExchangeWeak(P pointer, uint32_t memoryScope, uint32_t equal, uint32_t unequal, T value, T comparator); - template [[vk::ext_instruction(spv::OpAtomicIIncrement)]] enable_if_t<(is_signed_v || is_unsigned_v), T> atomicIIncrement([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); @@ -846,22 +830,6 @@ template [[vk::ext_instruction(spv::OpAtomicXor)]] enable_if_t, T> atomicXor(P pointer, uint32_t memoryScope, uint32_t semantics, T value); -template -[[vk::ext_instruction(spv::OpAtomicFlagTestAndSet)]] -T atomicFlagTestAndSet([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); - -template -[[vk::ext_instruction(spv::OpAtomicFlagTestAndSet)]] -enable_if_t, T> atomicFlagTestAndSet(P pointer, uint32_t memoryScope, uint32_t semantics); - -template -[[vk::ext_instruction(spv::OpAtomicFlagClear)]] -void atomicFlagClear([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics); - -template -[[vk::ext_instruction(spv::OpAtomicFlagClear)]] -enable_if_t, void> atomicFlagClear(P pointer, uint32_t memoryScope, uint32_t semantics); - [[vk::ext_capability(spv::CapabilityGroupNonUniform)]] [[vk::ext_instruction(spv::OpGroupNonUniformElect)]] bool groupNonUniformElect(uint32_t executionScope); From 3f41681ed12bbfb68f54ca925849c84ca5cd3068 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Sun, 8 Sep 2024 14:58:39 +0330 Subject: [PATCH 04/18] hlsl_generator: emit needed capabilities for overloaded instructions Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 21 +- tools/hlsl_generator/out.hlsl | 622 +++++++++++++++++++++++++++++++--- 2 files changed, 598 insertions(+), 45 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 231403595..e395aa05e 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -203,15 +203,15 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): break case "U": fn_name = fn_name[0:m[1][0]] + fn_name[m[1][1]:] - result_types = ["uint32_t", "uint64_t"] + result_types = ["uint16_t", "uint32_t", "uint64_t"] break case "S": fn_name = fn_name[0:m[1][0]] + fn_name[m[1][1]:] - result_types = ["int32_t", "int64_t"] + result_types = ["int16_t", "int32_t", "int64_t"] break case "F": fn_name = fn_name[0:m[1][0]] + fn_name[m[1][1]:] - result_types = ["float"] + result_types = ["float16_t", "float32_t", "float64_t"] break if "operands" in instruction: @@ -228,6 +228,13 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): result_types = ["void"] for rt in result_types: + overload_caps = caps.copy() + match rt: + case "uint16_t" | "int16_t": overload_caps.append("Int16") + case "uint64_t" | "int64_t": overload_caps.append("Int64") + case "float16_t": overload_caps.append("Float16") + case "float64_t": overload_caps.append("Float64") + op_ty = "T" if options.op_ty != None: op_ty = options.op_ty @@ -270,12 +277,12 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): case "IdMemorySemantics": args.append(" uint32_t " + operand_name) case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + operand_name) case "MemoryAccess": - writeInst(writer, templates, caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) - writeInst(writer, templates, caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) - writeInst(writer, templates + ["uint32_t alignment"], caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) + writeInst(writer, templates, overload_caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) + writeInst(writer, templates, overload_caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) + writeInst(writer, templates + ["uint32_t alignment"], overload_caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) case _: return # TODO - writeInst(writer, templates, caps, op_name, fn_name, conds, rt, args) + writeInst(writer, templates, overload_caps, op_name, fn_name, conds, rt, args) def writeInst(writer: io.TextIOWrapper, templates, caps, op_name, fn_name, conds, result_type, args): diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index c9e044b4d..128c0bbb1 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -655,13 +655,33 @@ template [[vk::ext_instruction(spv::OpBitFieldInsert)]] T bitFieldInsert(T base, T insert, uint32_t offset, uint32_t count); +[[vk::ext_capability(spv::CapabilityBitInstructions)]] +[[vk::ext_instruction(spv::OpBitFieldSExtract)]] +int16_t bitFieldExtract_BitInstructions(int16_t base, uint32_t offset, uint32_t count); + +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpBitFieldSExtract)]] +int16_t bitFieldExtract_Int16(int16_t base, uint32_t offset, uint32_t count); + [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitFieldSExtract)]] int32_t bitFieldExtract(int32_t base, uint32_t offset, uint32_t count); [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitFieldSExtract)]] -int64_t bitFieldExtract(int64_t base, uint32_t offset, uint32_t count); +int64_t bitFieldExtract_BitInstructions(int64_t base, uint32_t offset, uint32_t count); + +[[vk::ext_capability(spv::CapabilityInt64)]] +[[vk::ext_instruction(spv::OpBitFieldSExtract)]] +int64_t bitFieldExtract_Int64(int64_t base, uint32_t offset, uint32_t count); + +[[vk::ext_capability(spv::CapabilityBitInstructions)]] +[[vk::ext_instruction(spv::OpBitFieldUExtract)]] +uint16_t bitFieldExtract_BitInstructions(uint16_t base, uint32_t offset, uint32_t count); + +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpBitFieldUExtract)]] +uint16_t bitFieldExtract_Int16(uint16_t base, uint32_t offset, uint32_t count); [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitFieldUExtract)]] @@ -669,7 +689,11 @@ uint32_t bitFieldExtract(uint32_t base, uint32_t offset, uint32_t count); [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitFieldUExtract)]] -uint64_t bitFieldExtract(uint64_t base, uint32_t offset, uint32_t count); +uint64_t bitFieldExtract_BitInstructions(uint64_t base, uint32_t offset, uint32_t count); + +[[vk::ext_capability(spv::CapabilityInt64)]] +[[vk::ext_instruction(spv::OpBitFieldUExtract)]] +uint64_t bitFieldExtract_Int64(uint64_t base, uint32_t offset, uint32_t count); template [[vk::ext_capability(spv::CapabilityBitInstructions)]] @@ -750,59 +774,103 @@ template [[vk::ext_instruction(spv::OpAtomicISub)]] enable_if_t && (is_signed_v || is_unsigned_v), T> atomicISub(P pointer, uint32_t memoryScope, uint32_t semantics, T value); +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpAtomicSMin)]] +int16_t atomicMin([[vk::ext_reference]] int16_t pointer, uint32_t memoryScope, uint32_t semantics, int16_t value); + [[vk::ext_instruction(spv::OpAtomicSMin)]] int32_t atomicMin([[vk::ext_reference]] int32_t pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); +[[vk::ext_capability(spv::CapabilityInt64)]] [[vk::ext_instruction(spv::OpAtomicSMin)]] int64_t atomicMin([[vk::ext_reference]] int64_t pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); +template +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpAtomicSMin)]] +enable_if_t, int16_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, int16_t value); + template [[vk::ext_instruction(spv::OpAtomicSMin)]] enable_if_t, int32_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); template +[[vk::ext_capability(spv::CapabilityInt64)]] [[vk::ext_instruction(spv::OpAtomicSMin)]] enable_if_t, int64_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpAtomicUMin)]] +uint16_t atomicMin([[vk::ext_reference]] uint16_t pointer, uint32_t memoryScope, uint32_t semantics, uint16_t value); + [[vk::ext_instruction(spv::OpAtomicUMin)]] uint32_t atomicMin([[vk::ext_reference]] uint32_t pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); +[[vk::ext_capability(spv::CapabilityInt64)]] [[vk::ext_instruction(spv::OpAtomicUMin)]] uint64_t atomicMin([[vk::ext_reference]] uint64_t pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); +template +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpAtomicUMin)]] +enable_if_t, uint16_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, uint16_t value); + template [[vk::ext_instruction(spv::OpAtomicUMin)]] enable_if_t, uint32_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); template +[[vk::ext_capability(spv::CapabilityInt64)]] [[vk::ext_instruction(spv::OpAtomicUMin)]] enable_if_t, uint64_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpAtomicSMax)]] +int16_t atomicMax([[vk::ext_reference]] int16_t pointer, uint32_t memoryScope, uint32_t semantics, int16_t value); + [[vk::ext_instruction(spv::OpAtomicSMax)]] int32_t atomicMax([[vk::ext_reference]] int32_t pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); +[[vk::ext_capability(spv::CapabilityInt64)]] [[vk::ext_instruction(spv::OpAtomicSMax)]] int64_t atomicMax([[vk::ext_reference]] int64_t pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); +template +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpAtomicSMax)]] +enable_if_t, int16_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, int16_t value); + template [[vk::ext_instruction(spv::OpAtomicSMax)]] enable_if_t, int32_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); template +[[vk::ext_capability(spv::CapabilityInt64)]] [[vk::ext_instruction(spv::OpAtomicSMax)]] enable_if_t, int64_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpAtomicUMax)]] +uint16_t atomicMax([[vk::ext_reference]] uint16_t pointer, uint32_t memoryScope, uint32_t semantics, uint16_t value); + [[vk::ext_instruction(spv::OpAtomicUMax)]] uint32_t atomicMax([[vk::ext_reference]] uint32_t pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); +[[vk::ext_capability(spv::CapabilityInt64)]] [[vk::ext_instruction(spv::OpAtomicUMax)]] uint64_t atomicMax([[vk::ext_reference]] uint64_t pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); +template +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpAtomicUMax)]] +enable_if_t, uint16_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, uint16_t value); + template [[vk::ext_instruction(spv::OpAtomicUMax)]] enable_if_t, uint32_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); template +[[vk::ext_capability(spv::CapabilityInt64)]] [[vk::ext_instruction(spv::OpAtomicUMax)]] enable_if_t, uint64_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); @@ -917,15 +985,47 @@ enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIAdd_GroupNo [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float groupNonUniformAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float16_t groupNonUniformAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float groupNonUniformAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float16_t groupNonUniformAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float groupNonUniformAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float16_t groupNonUniformAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityFloat16)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +float16_t groupNonUniformAdd_Float16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +float32_t groupNonUniformAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +float32_t groupNonUniformAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +float32_t groupNonUniformAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +float64_t groupNonUniformAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +float64_t groupNonUniformAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +float64_t groupNonUniformAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] +float64_t groupNonUniformAdd_Float64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] @@ -944,15 +1044,63 @@ enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIMul_GroupNo [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float groupNonUniformMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float16_t groupNonUniformMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +float16_t groupNonUniformMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +float16_t groupNonUniformMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityFloat16)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +float16_t groupNonUniformMul_Float16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +float32_t groupNonUniformMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +float32_t groupNonUniformMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +float32_t groupNonUniformMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +float64_t groupNonUniformMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float groupNonUniformMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float64_t groupNonUniformMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float groupNonUniformMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float64_t groupNonUniformMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] +float64_t groupNonUniformMul_Float64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +int16_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +int16_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +int16_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); + +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +int16_t groupNonUniformMin_Int16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] @@ -978,6 +1126,26 @@ int64_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[v [[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] int64_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); +[[vk::ext_capability(spv::CapabilityInt64)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] +int64_t groupNonUniformMin_Int64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +uint16_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +uint16_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +uint16_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); + +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +uint16_t groupNonUniformMin_Int16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); + [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] uint32_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); @@ -1002,17 +1170,69 @@ uint64_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[ [[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] uint64_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); +[[vk::ext_capability(spv::CapabilityInt64)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] +uint64_t groupNonUniformMin_Int64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +float16_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +float16_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +float16_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityFloat16)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +float16_t groupNonUniformMin_Float16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float32_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float32_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float32_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +float64_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +float64_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +float64_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] +float64_t groupNonUniformMin_Float64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +int16_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +int16_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +int16_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); + +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +int16_t groupNonUniformMax_Int16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] @@ -1038,6 +1258,26 @@ int64_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[v [[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] int64_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); +[[vk::ext_capability(spv::CapabilityInt64)]] +[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] +int64_t groupNonUniformMax_Int64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +uint16_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +uint16_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +uint16_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); + +[[vk::ext_capability(spv::CapabilityInt16)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +uint16_t groupNonUniformMax_Int16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); + [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] uint32_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); @@ -1062,17 +1302,53 @@ uint64_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[ [[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] uint64_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); +[[vk::ext_capability(spv::CapabilityInt64)]] +[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] +uint64_t groupNonUniformMax_Int64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +float16_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +float16_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +float16_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + +[[vk::ext_capability(spv::CapabilityFloat16)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +float16_t groupNonUniformMax_Float16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); + [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float32_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float32_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float value); +float32_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +float64_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +float64_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +float64_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); + +[[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] +float64_t groupNonUniformMax_Float64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] @@ -1186,111 +1462,381 @@ T groupNonUniformPartitionNV(T value); [[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +float16_t atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float16_t atomicMinEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float16_t atomicMinEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float16_t atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityFloat16)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float16_t atomicMinEXT_Float16([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float32_t atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float32_t atomicMinEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float32_t atomicMinEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float32_t atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float64_t atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float atomicMinEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +float64_t atomicMinEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); [[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float atomicMinEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +float64_t atomicMinEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +float64_t atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +[[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +float64_t atomicMinEXT_Float64([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float> atomicMinEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float16_t> atomicMinEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float> atomicMinEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float16_t> atomicMinEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float> atomicMinEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float16_t> atomicMinEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float> atomicMinEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float16_t> atomicMinEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +template +[[vk::ext_capability(spv::CapabilityFloat16)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float16_t> atomicMinEXT_Float16(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float32_t> atomicMinEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float32_t> atomicMinEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float32_t> atomicMinEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float32_t> atomicMinEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float64_t> atomicMinEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float64_t> atomicMinEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float64_t> atomicMinEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float64_t> atomicMinEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +template +[[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] +enable_if_t, float64_t> atomicMinEXT_Float64(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float16_t atomicMaxEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float16_t atomicMaxEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float16_t atomicMaxEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float16_t atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityFloat16)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float16_t atomicMaxEXT_Float16([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float32_t atomicMaxEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float32_t atomicMaxEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float32_t atomicMaxEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float32_t atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float64_t atomicMaxEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float64_t atomicMaxEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float64_t atomicMaxEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float64_t atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +[[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +float64_t atomicMaxEXT_Float64([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t, float16_t> atomicMaxEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t, float16_t> atomicMaxEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t, float16_t> atomicMaxEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t, float16_t> atomicMaxEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +template +[[vk::ext_capability(spv::CapabilityFloat16)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t, float16_t> atomicMaxEXT_Float16(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); +template [[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float atomicMaxEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float32_t> atomicMaxEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); +template [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float atomicMaxEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float32_t> atomicMaxEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); +template [[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float atomicMaxEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float32_t> atomicMaxEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float32_t> atomicMaxEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float> atomicMaxEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float64_t> atomicMaxEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float> atomicMaxEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float64_t> atomicMaxEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float> atomicMaxEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float64_t> atomicMaxEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float> atomicMaxEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float64_t> atomicMaxEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +template +[[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] +enable_if_t, float64_t> atomicMaxEXT_Float64(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); [[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +float16_t atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); [[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float atomicAddEXT_AtomicFloat32AddEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +float16_t atomicAddEXT_AtomicFloat32AddEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); [[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float atomicAddEXT_AtomicFloat64AddEXT([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +float16_t atomicAddEXT_AtomicFloat64AddEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float pointer, uint32_t memoryScope, uint32_t semantics, float value); +float16_t atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityFloat16)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float16_t atomicAddEXT_Float16([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float32_t atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float32_t atomicAddEXT_AtomicFloat32AddEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float32_t atomicAddEXT_AtomicFloat64AddEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float32_t atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float64_t atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float64_t atomicAddEXT_AtomicFloat32AddEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float64_t atomicAddEXT_AtomicFloat64AddEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float64_t atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +[[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +float64_t atomicAddEXT_Float64([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float16_t> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float16_t> atomicAddEXT_AtomicFloat32AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float16_t> atomicAddEXT_AtomicFloat64AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float16_t> atomicAddEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); + +template +[[vk::ext_capability(spv::CapabilityFloat16)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float16_t> atomicAddEXT_Float16(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float32_t> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float> atomicAddEXT_AtomicFloat32AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float32_t> atomicAddEXT_AtomicFloat32AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float> atomicAddEXT_AtomicFloat64AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float32_t> atomicAddEXT_AtomicFloat64AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float> atomicAddEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float value); +enable_if_t, float32_t> atomicAddEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float64_t> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float64_t> atomicAddEXT_AtomicFloat32AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float64_t> atomicAddEXT_AtomicFloat64AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +template +[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float64_t> atomicAddEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); + +template +[[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] +enable_if_t, float64_t> atomicAddEXT_Float64(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); [[vk::ext_capability(spv::CapabilitySplitBarrierINTEL)]] [[vk::ext_instruction(spv::OpControlBarrierArriveINTEL)]] From d27eff53b1f6ff1108e84967df7fc0e21913defc Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Sun, 8 Sep 2024 15:46:16 +0330 Subject: [PATCH 05/18] hlsl_generator: don't emit instructions with clashing capabilities Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 6 + tools/hlsl_generator/out.hlsl | 216 ---------------------------------- 2 files changed, 6 insertions(+), 216 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index e395aa05e..a1c1f052b 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -288,6 +288,12 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): def writeInst(writer: io.TextIOWrapper, templates, caps, op_name, fn_name, conds, result_type, args): if len(caps) > 0: for cap in caps: + if (("Float16" in cap and result_type != "float16_t") or + ("Float32" in cap and result_type != "float32_t") or + ("Float64" in cap and result_type != "float64_t") or + ("Int16" in cap and result_type != "int16_t" and result_type != "uint16_t") or + ("Int64" in cap and result_type != "int64_t" and result_type != "uint64_t")): continue + final_fn_name = fn_name if (len(caps) > 1): final_fn_name = fn_name + "_" + cap writeInstInner(writer, templates, cap, op_name, final_fn_name, conds, result_type, args) diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 128c0bbb1..64b44d943 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -1464,14 +1464,6 @@ T groupNonUniformPartitionNV(T value); [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] float16_t atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float16_t atomicMinEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float16_t atomicMinEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] float16_t atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); @@ -1480,38 +1472,14 @@ float16_t atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t poi [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] float16_t atomicMinEXT_Float16([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float32_t atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] float32_t atomicMinEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float32_t atomicMinEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float32_t atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float64_t atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float64_t atomicMinEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - [[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] float64_t atomicMinEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float64_t atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - [[vk::ext_capability(spv::CapabilityFloat64)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] float64_t atomicMinEXT_Float64([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); @@ -1521,16 +1489,6 @@ template [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] enable_if_t, float16_t> atomicMinEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float16_t> atomicMinEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float16_t> atomicMinEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] @@ -1541,46 +1499,16 @@ template [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] enable_if_t, float16_t> atomicMinEXT_Float16(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float32_t> atomicMinEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - template [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] enable_if_t, float32_t> atomicMinEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float32_t> atomicMinEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float32_t> atomicMinEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float64_t> atomicMinEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float64_t> atomicMinEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - template [[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] enable_if_t, float64_t> atomicMinEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float64_t> atomicMinEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - template [[vk::ext_capability(spv::CapabilityFloat64)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] @@ -1590,14 +1518,6 @@ enable_if_t, float64_t> atomicMinEXT_Float64(P pointer, uint3 [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] float16_t atomicMaxEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float16_t atomicMaxEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float16_t atomicMaxEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] float16_t atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); @@ -1606,38 +1526,14 @@ float16_t atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t poi [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] float16_t atomicMaxEXT_Float16([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float32_t atomicMaxEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] float32_t atomicMaxEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float32_t atomicMaxEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float32_t atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float64_t atomicMaxEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float64_t atomicMaxEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - [[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] float64_t atomicMaxEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float64_t atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - [[vk::ext_capability(spv::CapabilityFloat64)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] float64_t atomicMaxEXT_Float64([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); @@ -1647,16 +1543,6 @@ template [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] enable_if_t, float16_t> atomicMaxEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float16_t> atomicMaxEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float16_t> atomicMaxEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] @@ -1667,46 +1553,16 @@ template [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] enable_if_t, float16_t> atomicMaxEXT_Float16(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float32_t> atomicMaxEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - template [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] enable_if_t, float32_t> atomicMaxEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float32_t> atomicMaxEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float32_t> atomicMaxEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float64_t> atomicMaxEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float64_t> atomicMaxEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - template [[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] enable_if_t, float64_t> atomicMaxEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float64_t> atomicMaxEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - template [[vk::ext_capability(spv::CapabilityFloat64)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] @@ -1716,14 +1572,6 @@ enable_if_t, float64_t> atomicMaxEXT_Float64(P pointer, uint3 [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float16_t atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float16_t atomicAddEXT_AtomicFloat32AddEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float16_t atomicAddEXT_AtomicFloat64AddEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float16_t atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); @@ -1732,38 +1580,14 @@ float16_t atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t poi [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float16_t atomicAddEXT_Float16([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float32_t atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - [[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float32_t atomicAddEXT_AtomicFloat32AddEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float32_t atomicAddEXT_AtomicFloat64AddEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float32_t atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float64_t atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float64_t atomicAddEXT_AtomicFloat32AddEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - [[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float64_t atomicAddEXT_AtomicFloat64AddEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float64_t atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - [[vk::ext_capability(spv::CapabilityFloat64)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float64_t atomicAddEXT_Float64([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); @@ -1773,16 +1597,6 @@ template [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, float16_t> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float16_t> atomicAddEXT_AtomicFloat32AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float16_t> atomicAddEXT_AtomicFloat64AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] @@ -1793,46 +1607,16 @@ template [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, float16_t> atomicAddEXT_Float16(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float32_t> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - template [[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, float32_t> atomicAddEXT_AtomicFloat32AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float32_t> atomicAddEXT_AtomicFloat64AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float32_t> atomicAddEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float64_t> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float64_t> atomicAddEXT_AtomicFloat32AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - template [[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, float64_t> atomicAddEXT_AtomicFloat64AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float64_t> atomicAddEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - template [[vk::ext_capability(spv::CapabilityFloat64)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] From 62f397620db8e9f3e832fcb74332f6d87744192a Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Sun, 8 Sep 2024 15:58:14 +0330 Subject: [PATCH 06/18] hlsl_generator: skip OpenCL+INTEL specific instructions Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 3 ++ tools/hlsl_generator/out.hlsl | 88 ----------------------------------- 2 files changed, 3 insertions(+), 88 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index a1c1f052b..80a9006ba 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -124,6 +124,7 @@ def gen(grammer_path, output_path): writer.write("\n//! Execution Modes\nnamespace execution_mode\n{") for em in execution_modes: name = em["enumerant"] + if name.endswith("INTEL"): continue name_l = name[0].lower() + name[1:] writer.write("\n\tvoid " + name_l + "()\n\t{\n\t\tvk::ext_execution_mode(spv::ExecutionMode" + name + ");\n\t}\n") writer.write("}\n") @@ -137,6 +138,8 @@ def gen(grammer_path, output_path): writer.write("\n//! Instructions\n") for instruction in grammer["instructions"]: + if instruction["opname"].endswith("INTEL"): continue + match instruction["class"]: case "Atomic": processInst(writer, instruction, InstOptions()) diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 64b44d943..d9d5bc092 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -482,56 +482,6 @@ namespace execution_mode vk::ext_execution_mode(spv::ExecutionModeShadingRateInterlockUnorderedEXT); } - void sharedLocalMemorySizeINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeSharedLocalMemorySizeINTEL); - } - - void roundingModeRTPINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeRoundingModeRTPINTEL); - } - - void roundingModeRTNINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeRoundingModeRTNINTEL); - } - - void floatingPointModeALTINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeFloatingPointModeALTINTEL); - } - - void floatingPointModeIEEEINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeFloatingPointModeIEEEINTEL); - } - - void maxWorkgroupSizeINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeMaxWorkgroupSizeINTEL); - } - - void maxWorkDimINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeMaxWorkDimINTEL); - } - - void noGlobalOffsetINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeNoGlobalOffsetINTEL); - } - - void numSIMDWorkitemsINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeNumSIMDWorkitemsINTEL); - } - - void schedulerTargetFmaxMhzINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeSchedulerTargetFmaxMhzINTEL); - } - void maximallyReconvergesKHR() { vk::ext_execution_mode(spv::ExecutionModeMaximallyReconvergesKHR); @@ -541,36 +491,6 @@ namespace execution_mode { vk::ext_execution_mode(spv::ExecutionModeFPFastMathDefault); } - - void streamingInterfaceINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeStreamingInterfaceINTEL); - } - - void registerMapInterfaceINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeRegisterMapInterfaceINTEL); - } - - void namedBarrierCountINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeNamedBarrierCountINTEL); - } - - void maximumRegistersINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeMaximumRegistersINTEL); - } - - void maximumRegistersIdINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeMaximumRegistersIdINTEL); - } - - void namedMaximumRegistersINTEL() - { - vk::ext_execution_mode(spv::ExecutionModeNamedMaximumRegistersINTEL); - } } //! Group Operations @@ -1622,14 +1542,6 @@ template [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, float64_t> atomicAddEXT_Float64(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); -[[vk::ext_capability(spv::CapabilitySplitBarrierINTEL)]] -[[vk::ext_instruction(spv::OpControlBarrierArriveINTEL)]] -void controlBarrierArriveINTEL(uint32_t executionScope, uint32_t memoryScope, uint32_t semantics); - -[[vk::ext_capability(spv::CapabilitySplitBarrierINTEL)]] -[[vk::ext_instruction(spv::OpControlBarrierWaitINTEL)]] -void controlBarrierWaitINTEL(uint32_t executionScope, uint32_t memoryScope, uint32_t semantics); - } #endif From 78d5eab17e645efa1bebae1ecb6d67e18febb32c Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Sun, 8 Sep 2024 17:27:59 +0330 Subject: [PATCH 07/18] hlsl_generator: add checks for final bitcast overload Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 2 +- tools/hlsl_generator/out.hlsl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 80a9006ba..059845b4c 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -60,7 +60,7 @@ template [[vk::ext_instruction(spv::OpBitcast)]] -T bitcast(U); +enable_if_t || is_vector_v), T> bitcast(U); """ foot = """} diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index d9d5bc092..96b1f4a6c 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -51,7 +51,7 @@ pointer_t bitcast(uint64_t); template [[vk::ext_instruction(spv::OpBitcast)]] -T bitcast(U); +enable_if_t || is_vector_v), T> bitcast(U); //! Builtins namespace builtin From 5b34f6d815e6fd954dd7f8ebfae2b7c93dd309e6 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Sun, 8 Sep 2024 20:31:39 +0330 Subject: [PATCH 08/18] hlsl_generator: add missing capability for BDA load/store Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 12 +++++++----- tools/hlsl_generator/out.hlsl | 20 ++++---------------- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 059845b4c..960e92f71 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -146,7 +146,7 @@ def gen(grammer_path, output_path): processInst(writer, instruction, InstOptions(shape=Shape.PTR_TEMPLATE)) case "Memory": processInst(writer, instruction, InstOptions(shape=Shape.PTR_TEMPLATE)) - processInst(writer, instruction, InstOptions(shape=Shape.PSB_RT)) + processInst(writer, instruction, InstOptions(shape=Shape.BDA)) case "Barrier" | "Bit": processInst(writer, instruction, InstOptions()) case "Reserved": @@ -171,7 +171,7 @@ def gen(grammer_path, output_path): class Shape(Enum): DEFAULT = 0, PTR_TEMPLATE = 1, # TODO: this is a DXC Workaround - PSB_RT = 2, # PhysicalStorageBuffer Result Type + BDA = 2, # PhysicalStorageBuffer Result Type class InstOptions(NamedTuple): shape: Shape = Shape.DEFAULT @@ -257,9 +257,10 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): case "'Pointer'": if options.shape == Shape.PTR_TEMPLATE: args.append("P " + operand_name) - elif options.shape == Shape.PSB_RT: + elif options.shape == Shape.BDA: if (not "typename T" in templates) and (rt == "T" or op_ty == "T"): templates = ["typename T"] + templates + overload_caps.append("PhysicalStorageBufferAddresses") args.append("pointer_t " + operand_name) else: if (not "typename T" in templates) and (rt == "T" or op_ty == "T"): @@ -280,8 +281,9 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): case "IdMemorySemantics": args.append(" uint32_t " + operand_name) case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + operand_name) case "MemoryAccess": - writeInst(writer, templates, overload_caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) - writeInst(writer, templates, overload_caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) + if options.shape != Shape.BDA: + writeInst(writer, templates, overload_caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) + writeInst(writer, templates, overload_caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) writeInst(writer, templates + ["uint32_t alignment"], overload_caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) case _: return # TODO diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 96b1f4a6c..043a2dcef 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -522,19 +522,13 @@ template [[vk::ext_instruction(spv::OpLoad)]] enable_if_t, T> load(P pointer); -template -[[vk::ext_instruction(spv::OpLoad)]] -T load(pointer_t pointer, [[vk::ext_literal]] uint32_t memoryAccess); - -template -[[vk::ext_instruction(spv::OpLoad)]] -T load(pointer_t pointer, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); - template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpLoad)]] T load(pointer_t pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpLoad)]] T load(pointer_t pointer); @@ -554,19 +548,13 @@ template [[vk::ext_instruction(spv::OpStore)]] enable_if_t, void> store(P pointer, T object); -template -[[vk::ext_instruction(spv::OpStore)]] -void store(pointer_t pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess); - -template -[[vk::ext_instruction(spv::OpStore)]] -void store(pointer_t pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); - template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpStore)]] void store(pointer_t pointer, T object, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpStore)]] void store(pointer_t pointer, T object); From 481e3bd14158c1b8bbc2a027e421b7c9a6b69e98 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Sun, 8 Sep 2024 20:33:36 +0330 Subject: [PATCH 09/18] hlsl_generator: add type constraint for bit instructions Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 4 ++++ tools/hlsl_generator/out.hlsl | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 960e92f71..33ebcfc27 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -217,6 +217,10 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): result_types = ["float16_t", "float32_t", "float64_t"] break + match instruction["class"]: + case "Bit": + if len(result_types) == 0: conds.append("(is_signed_v || is_unsigned_v)") + if "operands" in instruction: operands = instruction["operands"] if operands[0]["kind"] == "IdResultType": diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 043a2dcef..9fe23cc15 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -561,7 +561,7 @@ void store(pointer_t pointer, T objec template [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitFieldInsert)]] -T bitFieldInsert(T base, T insert, uint32_t offset, uint32_t count); +enable_if_t<(is_signed_v || is_unsigned_v), T> bitFieldInsert(T base, T insert, uint32_t offset, uint32_t count); [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitFieldSExtract)]] @@ -606,11 +606,11 @@ uint64_t bitFieldExtract_Int64(uint64_t base, uint32_t offset, uint32_t count); template [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitReverse)]] -T bitReverse(T base); +enable_if_t<(is_signed_v || is_unsigned_v), T> bitReverse(T base); template [[vk::ext_instruction(spv::OpBitCount)]] -T bitCount(T base); +enable_if_t<(is_signed_v || is_unsigned_v), T> bitCount(T base); [[vk::ext_instruction(spv::OpControlBarrier)]] void controlBarrier(uint32_t executionScope, uint32_t memoryScope, uint32_t semantics); From 5a13b58ff1329108b2e285386e446d1e8da6640a Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Mon, 9 Sep 2024 00:19:39 +0330 Subject: [PATCH 10/18] hlsl_generator: fix vector instructions type Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 138 +++++++++++++++++----------------- tools/hlsl_generator/out.hlsl | 48 +++--------- 2 files changed, 82 insertions(+), 104 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 33ebcfc27..14001b283 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -195,6 +195,8 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): if options.shape == Shape.PTR_TEMPLATE: templates.append("typename P") conds.append("is_spirv_type_v

") + elif options.shape == Shape.BDA: + caps.append("PhysicalStorageBufferAddresses") # split upper case words matches = [(m.group(1), m.span(1)) for m in re.finditer(r'([A-Z])[A-Z][a-z]', fn_name)] @@ -242,74 +244,74 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): case "float16_t": overload_caps.append("Float16") case "float64_t": overload_caps.append("Float64") - op_ty = "T" - if options.op_ty != None: - op_ty = options.op_ty - elif rt != "void": - op_ty = rt - - if (not "typename T" in templates) and (rt == "T"): - templates = ["typename T"] + templates - - args = [] - for operand in operands: - operand_name = operand["name"].strip("'") if "name" in operand else None - operand_name = operand_name[0].lower() + operand_name[1:] if (operand_name != None) else "" - match operand["kind"]: - case "IdRef": - match operand["name"]: - case "'Pointer'": - if options.shape == Shape.PTR_TEMPLATE: - args.append("P " + operand_name) - elif options.shape == Shape.BDA: - if (not "typename T" in templates) and (rt == "T" or op_ty == "T"): - templates = ["typename T"] + templates - overload_caps.append("PhysicalStorageBufferAddresses") - args.append("pointer_t " + operand_name) - else: - if (not "typename T" in templates) and (rt == "T" or op_ty == "T"): - templates = ["typename T"] + templates - args.append("[[vk::ext_reference]] " + op_ty + " " + operand_name) - case "'Value'" | "'Object'" | "'Comparator'" | "'Base'" | "'Insert'": - if (not "typename T" in templates) and (rt == "T" or op_ty == "T"): - templates = ["typename T"] + templates - args.append(op_ty + " " + operand_name) - case "'Offset'" | "'Count'" | "'Id'" | "'Index'" | "'Mask'" | "'Delta'": - args.append("uint32_t " + operand_name) - case "'Predicate'": args.append("bool " + operand_name) - case "'ClusterSize'": - if "quantifier" in operand and operand["quantifier"] == "?": continue # TODO: overload - else: return # TODO - case _: return # TODO - case "IdScope": args.append("uint32_t " + operand_name.lower() + "Scope") - case "IdMemorySemantics": args.append(" uint32_t " + operand_name) - case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + operand_name) - case "MemoryAccess": - if options.shape != Shape.BDA: - writeInst(writer, templates, overload_caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) - writeInst(writer, templates, overload_caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) - writeInst(writer, templates + ["uint32_t alignment"], overload_caps, op_name, fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) - case _: return # TODO - - writeInst(writer, templates, overload_caps, op_name, fn_name, conds, rt, args) - - -def writeInst(writer: io.TextIOWrapper, templates, caps, op_name, fn_name, conds, result_type, args): - if len(caps) > 0: - for cap in caps: - if (("Float16" in cap and result_type != "float16_t") or - ("Float32" in cap and result_type != "float32_t") or - ("Float64" in cap and result_type != "float64_t") or - ("Int16" in cap and result_type != "int16_t" and result_type != "uint16_t") or - ("Int64" in cap and result_type != "int64_t" and result_type != "uint64_t")): continue - - final_fn_name = fn_name - if (len(caps) > 1): final_fn_name = fn_name + "_" + cap - writeInstInner(writer, templates, cap, op_name, final_fn_name, conds, result_type, args) - else: - writeInstInner(writer, templates, None, op_name, fn_name, conds, result_type, args) - -def writeInstInner(writer: io.TextIOWrapper, templates, cap, op_name, fn_name, conds, result_type, args): + for cap in overload_caps or [None]: + final_fn_name = fn_name + "_" + cap if (len(overload_caps) > 1) else fn_name + final_templates = templates.copy() + + if (not "typename T" in final_templates) and (rt == "T"): + final_templates = ["typename T"] + final_templates + + if len(overload_caps) > 0: + if (("Float16" in cap and rt != "float16_t") or + ("Float32" in cap and rt != "float32_t") or + ("Float64" in cap and rt != "float64_t") or + ("Int16" in cap and rt != "int16_t" and rt != "uint16_t") or + ("Int64" in cap and rt != "int64_t" and rt != "uint64_t")): continue + + if "Vector" in cap: + rt = "vector<" + rt + ", N> " + final_templates.append("typename N") + + op_ty = "T" + if options.op_ty != None: + op_ty = options.op_ty + elif rt != "void": + op_ty = rt + + args = [] + for operand in operands: + operand_name = operand["name"].strip("'") if "name" in operand else None + operand_name = operand_name[0].lower() + operand_name[1:] if (operand_name != None) else "" + match operand["kind"]: + case "IdRef": + match operand["name"]: + case "'Pointer'": + if options.shape == Shape.PTR_TEMPLATE: + args.append("P " + operand_name) + elif options.shape == Shape.BDA: + if (not "typename T" in final_templates) and (rt == "T" or op_ty == "T"): + final_templates = ["typename T"] + final_templates + args.append("pointer_t " + operand_name) + else: + if (not "typename T" in final_templates) and (rt == "T" or op_ty == "T"): + final_templates = ["typename T"] + final_templates + args.append("[[vk::ext_reference]] " + op_ty + " " + operand_name) + case "'Value'" | "'Object'" | "'Comparator'" | "'Base'" | "'Insert'": + if (not "typename T" in final_templates) and (rt == "T" or op_ty == "T"): + final_templates = ["typename T"] + final_templates + args.append(op_ty + " " + operand_name) + case "'Offset'" | "'Count'" | "'Id'" | "'Index'" | "'Mask'" | "'Delta'": + args.append("uint32_t " + operand_name) + case "'Predicate'": args.append("bool " + operand_name) + case "'ClusterSize'": + if "quantifier" in operand and operand["quantifier"] == "?": continue # TODO: overload + else: return # TODO + case _: return # TODO + case "IdScope": args.append("uint32_t " + operand_name.lower() + "Scope") + case "IdMemorySemantics": args.append(" uint32_t " + operand_name) + case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + operand_name) + case "MemoryAccess": + assert len(overload_caps) <= 1 + if options.shape != Shape.BDA: + writeInst(writer, final_templates, cap, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) + writeInst(writer, final_templates, cap, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) + writeInst(writer, final_templates + ["uint32_t alignment"], cap, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) + case _: return # TODO + + writeInst(writer, final_templates, cap, op_name, final_fn_name, conds, rt, args) + + +def writeInst(writer: io.TextIOWrapper, templates, cap, op_name, fn_name, conds, result_type, args): if len(templates) > 0: writer.write("template<" + ", ".join(templates) + ">\n") if (cap != None): diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 9fe23cc15..66300cc44 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -1372,13 +1372,10 @@ T groupNonUniformPartitionNV(T value); [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] float16_t atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float16_t atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -[[vk::ext_capability(spv::CapabilityFloat16)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float16_t atomicMinEXT_Float16([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); +vector atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] vector pointer, uint32_t memoryScope, uint32_t semantics, vector value); [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] @@ -1397,15 +1394,10 @@ template [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] enable_if_t, float16_t> atomicMinEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float16_t> atomicMinEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityFloat16)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float16_t> atomicMinEXT_Float16(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); +enable_if_t, vector > atomicMinEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, vector value); template [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] @@ -1426,13 +1418,10 @@ enable_if_t, float64_t> atomicMinEXT_Float64(P pointer, uint3 [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] float16_t atomicMaxEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float16_t atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -[[vk::ext_capability(spv::CapabilityFloat16)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float16_t atomicMaxEXT_Float16([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); +vector atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] vector pointer, uint32_t memoryScope, uint32_t semantics, vector value); [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] @@ -1451,15 +1440,10 @@ template [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] enable_if_t, float16_t> atomicMaxEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float16_t> atomicMaxEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityFloat16)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float16_t> atomicMaxEXT_Float16(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); +enable_if_t, vector > atomicMaxEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, vector value); template [[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] @@ -1480,13 +1464,10 @@ enable_if_t, float64_t> atomicMaxEXT_Float64(P pointer, uint3 [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float16_t atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float16_t atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -[[vk::ext_capability(spv::CapabilityFloat16)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float16_t atomicAddEXT_Float16([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); +vector atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] vector pointer, uint32_t memoryScope, uint32_t semantics, vector value); [[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] @@ -1505,15 +1486,10 @@ template [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, float16_t> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float16_t> atomicAddEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityFloat16)]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float16_t> atomicAddEXT_Float16(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); +enable_if_t, vector > atomicAddEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, vector value); template [[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] From 08de8a52094331ef63688c1bc119bef134cdf1a9 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Mon, 9 Sep 2024 00:45:29 +0330 Subject: [PATCH 11/18] hlsl_generator: add missing invocation instructions Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 114 ++++++++++++++++++---------------- tools/hlsl_generator/out.hlsl | 41 ++++++++++++ 2 files changed, 100 insertions(+), 55 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 14001b283..44043830a 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -185,6 +185,7 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): op_name = instruction["opname"] fn_name = op_name[2].lower() + op_name[3:] result_types = [] + exts = instruction["extensions"] if "extensions" in instruction else [] if "capabilities" in instruction and len(instruction["capabilities"]) > 0: for cap in instruction["capabilities"]: @@ -223,56 +224,55 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): case "Bit": if len(result_types) == 0: conds.append("(is_signed_v || is_unsigned_v)") - if "operands" in instruction: - operands = instruction["operands"] - if operands[0]["kind"] == "IdResultType": - operands = operands[2:] - if len(result_types) == 0: - if options.result_ty == None: - result_types = ["T"] - else: - result_types = [options.result_ty] - else: - assert len(result_types) == 0 - result_types = ["void"] - - for rt in result_types: - overload_caps = caps.copy() - match rt: - case "uint16_t" | "int16_t": overload_caps.append("Int16") - case "uint64_t" | "int64_t": overload_caps.append("Int64") - case "float16_t": overload_caps.append("Float16") - case "float64_t": overload_caps.append("Float64") - - for cap in overload_caps or [None]: - final_fn_name = fn_name + "_" + cap if (len(overload_caps) > 1) else fn_name - final_templates = templates.copy() - - if (not "typename T" in final_templates) and (rt == "T"): - final_templates = ["typename T"] + final_templates - - if len(overload_caps) > 0: - if (("Float16" in cap and rt != "float16_t") or - ("Float32" in cap and rt != "float32_t") or - ("Float64" in cap and rt != "float64_t") or - ("Int16" in cap and rt != "int16_t" and rt != "uint16_t") or - ("Int64" in cap and rt != "int64_t" and rt != "uint64_t")): continue - - if "Vector" in cap: - rt = "vector<" + rt + ", N> " - final_templates.append("typename N") + if "operands" in instruction and instruction["operands"][0]["kind"] == "IdResultType": + if len(result_types) == 0: + if options.result_ty == None: + result_types = ["T"] + else: + result_types = [options.result_ty] + else: + assert len(result_types) == 0 + result_types = ["void"] + + for rt in result_types: + overload_caps = caps.copy() + match rt: + case "uint16_t" | "int16_t": overload_caps.append("Int16") + case "uint64_t" | "int64_t": overload_caps.append("Int64") + case "float16_t": overload_caps.append("Float16") + case "float64_t": overload_caps.append("Float64") + + for cap in overload_caps or [None]: + final_fn_name = fn_name + "_" + cap if (len(overload_caps) > 1) else fn_name + final_templates = templates.copy() + + if (not "typename T" in final_templates) and (rt == "T"): + final_templates = ["typename T"] + final_templates + + if len(overload_caps) > 0: + if (("Float16" in cap and rt != "float16_t") or + ("Float32" in cap and rt != "float32_t") or + ("Float64" in cap and rt != "float64_t") or + ("Int16" in cap and rt != "int16_t" and rt != "uint16_t") or + ("Int64" in cap and rt != "int64_t" and rt != "uint64_t")): continue - op_ty = "T" - if options.op_ty != None: - op_ty = options.op_ty - elif rt != "void": - op_ty = rt - - args = [] - for operand in operands: + if "Vector" in cap: + rt = "vector<" + rt + ", N> " + final_templates.append("typename N") + + op_ty = "T" + if options.op_ty != None: + op_ty = options.op_ty + elif rt != "void": + op_ty = rt + + args = [] + if "operands" in instruction: + for operand in instruction["operands"]: operand_name = operand["name"].strip("'") if "name" in operand else None operand_name = operand_name[0].lower() + operand_name[1:] if (operand_name != None) else "" match operand["kind"]: + case "IdResult" | "IdResultType": continue case "IdRef": match operand["name"]: case "'Pointer'": @@ -295,27 +295,29 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): case "'Predicate'": args.append("bool " + operand_name) case "'ClusterSize'": if "quantifier" in operand and operand["quantifier"] == "?": continue # TODO: overload - else: return # TODO - case _: return # TODO + else: return ignore(op_name) # TODO + case _: return ignore(op_name) # TODO case "IdScope": args.append("uint32_t " + operand_name.lower() + "Scope") case "IdMemorySemantics": args.append(" uint32_t " + operand_name) case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + operand_name) case "MemoryAccess": assert len(overload_caps) <= 1 if options.shape != Shape.BDA: - writeInst(writer, final_templates, cap, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) - writeInst(writer, final_templates, cap, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) - writeInst(writer, final_templates + ["uint32_t alignment"], cap, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) - case _: return # TODO + writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) + writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) + writeInst(writer, final_templates + ["uint32_t alignment"], cap, exts, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) + case _: return ignore(op_name) # TODO - writeInst(writer, final_templates, cap, op_name, final_fn_name, conds, rt, args) + writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, rt, args) -def writeInst(writer: io.TextIOWrapper, templates, cap, op_name, fn_name, conds, result_type, args): +def writeInst(writer: io.TextIOWrapper, templates, cap, exts, op_name, fn_name, conds, result_type, args): if len(templates) > 0: writer.write("template<" + ", ".join(templates) + ">\n") - if (cap != None): + if cap != None: writer.write("[[vk::ext_capability(spv::Capability" + cap + ")]]\n") + for ext in exts: + writer.write("[[vk::ext_extension(\"" + ext + "\")]]\n") writer.write("[[vk::ext_instruction(spv::" + op_name + ")]]\n") if len(conds) > 0: writer.write("enable_if_t<" + " && ".join(conds) + ", " + result_type + ">") @@ -323,6 +325,8 @@ def writeInst(writer: io.TextIOWrapper, templates, cap, op_name, fn_name, conds, writer.write(result_type) writer.write(" " + fn_name + "(" + ", ".join(args) + ");\n\n") +def ignore(op_name): + print("\033[93mWARNING\033[0m: instruction " + op_name + " ignored") if __name__ == "__main__": script_dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 66300cc44..1ec9cd684 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -1365,9 +1365,40 @@ T groupNonUniformQuadAnyKHR(bool predicate); template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] +[[vk::ext_extension("SPV_NV_shader_subgroup_partitioned")]] [[vk::ext_instruction(spv::OpGroupNonUniformPartitionNV)]] T groupNonUniformPartitionNV(T value); +[[vk::ext_capability(spv::CapabilityFragmentShaderSampleInterlockEXT)]] +[[vk::ext_extension("SPV_EXT_fragment_shader_interlock")]] +[[vk::ext_instruction(spv::OpBeginInvocationInterlockEXT)]] +void beginInvocationInterlockEXT_FragmentShaderSampleInterlockEXT(); + +[[vk::ext_capability(spv::CapabilityFragmentShaderPixelInterlockEXT)]] +[[vk::ext_extension("SPV_EXT_fragment_shader_interlock")]] +[[vk::ext_instruction(spv::OpBeginInvocationInterlockEXT)]] +void beginInvocationInterlockEXT_FragmentShaderPixelInterlockEXT(); + +[[vk::ext_capability(spv::CapabilityFragmentShaderShadingRateInterlockEXT)]] +[[vk::ext_extension("SPV_EXT_fragment_shader_interlock")]] +[[vk::ext_instruction(spv::OpBeginInvocationInterlockEXT)]] +void beginInvocationInterlockEXT_FragmentShaderShadingRateInterlockEXT(); + +[[vk::ext_capability(spv::CapabilityFragmentShaderSampleInterlockEXT)]] +[[vk::ext_extension("SPV_EXT_fragment_shader_interlock")]] +[[vk::ext_instruction(spv::OpEndInvocationInterlockEXT)]] +void endInvocationInterlockEXT_FragmentShaderSampleInterlockEXT(); + +[[vk::ext_capability(spv::CapabilityFragmentShaderPixelInterlockEXT)]] +[[vk::ext_extension("SPV_EXT_fragment_shader_interlock")]] +[[vk::ext_instruction(spv::OpEndInvocationInterlockEXT)]] +void endInvocationInterlockEXT_FragmentShaderPixelInterlockEXT(); + +[[vk::ext_capability(spv::CapabilityFragmentShaderShadingRateInterlockEXT)]] +[[vk::ext_extension("SPV_EXT_fragment_shader_interlock")]] +[[vk::ext_instruction(spv::OpEndInvocationInterlockEXT)]] +void endInvocationInterlockEXT_FragmentShaderShadingRateInterlockEXT(); + [[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] float16_t atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); @@ -1461,48 +1492,58 @@ template enable_if_t, float64_t> atomicMaxEXT_Float64(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); [[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] +[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float16_t atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] vector atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] vector pointer, uint32_t memoryScope, uint32_t semantics, vector value); [[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] +[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float32_t atomicAddEXT_AtomicFloat32AddEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); [[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] +[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float64_t atomicAddEXT_AtomicFloat64AddEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); [[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float64_t atomicAddEXT_Float64([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] +[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, float16_t> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] +[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, vector > atomicAddEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, vector value); template [[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] +[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, float32_t> atomicAddEXT_AtomicFloat32AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); template [[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] +[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, float64_t> atomicAddEXT_AtomicFloat64AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); template [[vk::ext_capability(spv::CapabilityFloat64)]] +[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, float64_t> atomicAddEXT_Float64(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); From c9da81ab1a0b7226de06c71aac7314057beab4e1 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Mon, 9 Sep 2024 16:13:36 +0330 Subject: [PATCH 12/18] hlsl_generator: add missing capability of some builtins Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 131 +++++++++++++++++++--------------- tools/hlsl_generator/out.hlsl | 33 +++++++-- 2 files changed, 102 insertions(+), 62 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 44043830a..64e7c398f 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -28,16 +28,13 @@ namespace spirv { -//! General Decls -template -using pointer_t = vk::SpirvOpaqueType >, T>; - // The holy operation that makes addrof possible template [[vk::ext_instruction(spv::OpCopyObject)]] pointer_t copyObject([[vk::ext_reference]] T value); -//! Std 450 Extended set operations +// TODO: Generate extended instructions +//! Std 450 Extended set instructions template [[vk::ext_instruction(34, /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); @@ -88,37 +85,58 @@ def gen(grammer_path, output_path): writer.write("\n//! Builtins\nnamespace builtin\n{\n") for b in builtins: - builtin_type = None + b_name = b["enumerant"] + b_type = None + b_cap = None is_output = False - builtin_name = b["enumerant"] - match builtin_name: - case "HelperInvocation": builtin_type = "bool" - case "VertexIndex": builtin_type = "uint32_t" - case "InstanceIndex": builtin_type = "uint32_t" - case "NumWorkgroups": builtin_type = "uint32_t3" - case "WorkgroupId": builtin_type = "uint32_t3" - case "LocalInvocationId": builtin_type = "uint32_t3" - case "GlobalInvocationId": builtin_type = "uint32_t3" - case "LocalInvocationIndex": builtin_type = "uint32_t" - case "SubgroupEqMask": builtin_type = "uint32_t4" - case "SubgroupGeMask": builtin_type = "uint32_t4" - case "SubgroupGtMask": builtin_type = "uint32_t4" - case "SubgroupLeMask": builtin_type = "uint32_t4" - case "SubgroupLtMask": builtin_type = "uint32_t4" - case "SubgroupSize": builtin_type = "uint32_t" - case "NumSubgroups": builtin_type = "uint32_t" - case "SubgroupId": builtin_type = "uint32_t" - case "SubgroupLocalInvocationId": builtin_type = "uint32_t" + match b_name: + case "HelperInvocation": b_type = "bool" + case "VertexIndex": b_type = "uint32_t" + case "InstanceIndex": b_type = "uint32_t" + case "NumWorkgroups": b_type = "uint32_t3" + case "WorkgroupId": b_type = "uint32_t3" + case "LocalInvocationId": b_type = "uint32_t3" + case "GlobalInvocationId": b_type = "uint32_t3" + case "LocalInvocationIndex": b_type = "uint32_t" + case "SubgroupEqMask": + b_type = "uint32_t4" + b_cap = "GroupNonUniformBallot" + case "SubgroupGeMask": + b_type = "uint32_t4" + b_cap = "GroupNonUniformBallot" + case "SubgroupGtMask": + b_type = "uint32_t4" + b_cap = "GroupNonUniformBallot" + case "SubgroupLeMask": + b_type = "uint32_t4" + b_cap = "GroupNonUniformBallot" + case "SubgroupLtMask": + b_type = "uint32_t4" + b_cap = "GroupNonUniformBallot" + case "SubgroupSize": + b_type = "uint32_t" + b_cap = "GroupNonUniform" + case "NumSubgroups": + b_type = "uint32_t" + b_cap = "GroupNonUniform" + case "SubgroupId": + b_type = "uint32_t" + b_cap = "GroupNonUniform" + case "SubgroupLocalInvocationId": + b_type = "uint32_t" + b_cap = "GroupNonUniform" case "Position": - builtin_type = "float32_t4" + b_type = "float32_t4" is_output = True case _: continue + if b_cap != None: + writer.write("[[vk::ext_capability(spv::Capability" + b_cap + ")]]\n") if is_output: - writer.write("[[vk::ext_builtin_output(spv::BuiltIn" + builtin_name + ")]]\n") - writer.write("static " + builtin_type + " " + builtin_name + ";\n") + writer.write("[[vk::ext_builtin_output(spv::BuiltIn" + b_name + ")]]\n") + writer.write("static " + b_type + " " + b_name + ";\n") else: - writer.write("[[vk::ext_builtin_input(spv::BuiltIn" + builtin_name + ")]]\n") - writer.write("static const " + builtin_type + " " + builtin_name + ";\n") + writer.write("[[vk::ext_builtin_input(spv::BuiltIn" + b_name + ")]]\n") + writer.write("static const " + b_type + " " + b_name + ";\n\n") writer.write("}\n") writer.write("\n//! Execution Modes\nnamespace execution_mode\n{") @@ -142,28 +160,28 @@ def gen(grammer_path, output_path): match instruction["class"]: case "Atomic": - processInst(writer, instruction, InstOptions()) - processInst(writer, instruction, InstOptions(shape=Shape.PTR_TEMPLATE)) + processInst(writer, instruction) + processInst(writer, instruction, Shape.PTR_TEMPLATE) case "Memory": - processInst(writer, instruction, InstOptions(shape=Shape.PTR_TEMPLATE)) - processInst(writer, instruction, InstOptions(shape=Shape.BDA)) + processInst(writer, instruction, Shape.PTR_TEMPLATE) + processInst(writer, instruction, Shape.BDA) case "Barrier" | "Bit": - processInst(writer, instruction, InstOptions()) + processInst(writer, instruction) case "Reserved": match instruction["opname"]: case "OpBeginInvocationInterlockEXT" | "OpEndInvocationInterlockEXT": - processInst(writer, instruction, InstOptions()) + processInst(writer, instruction) case "Non-Uniform": match instruction["opname"]: case "OpGroupNonUniformElect" | "OpGroupNonUniformAll" | "OpGroupNonUniformAny" | "OpGroupNonUniformAllEqual": - processInst(writer, instruction, InstOptions(result_ty="bool")) + processInst(writer, instruction, result_ty="bool") case "OpGroupNonUniformBallot": - processInst(writer, instruction, InstOptions(result_ty="uint32_t4",op_ty="bool")) + processInst(writer, instruction, result_ty="uint32_t4",prefered_op_ty="bool") case "OpGroupNonUniformInverseBallot" | "OpGroupNonUniformBallotBitExtract": - processInst(writer, instruction, InstOptions(result_ty="bool",op_ty="uint32_t4")) + processInst(writer, instruction, result_ty="bool",prefered_op_ty="uint32_t4") case "OpGroupNonUniformBallotBitCount" | "OpGroupNonUniformBallotFindLSB" | "OpGroupNonUniformBallotFindMSB": - processInst(writer, instruction, InstOptions(result_ty="uint32_t",op_ty="uint32_t4")) - case _: processInst(writer, instruction, InstOptions()) + processInst(writer, instruction, result_ty="uint32_t",prefered_op_ty="uint32_t4") + case _: processInst(writer, instruction) case _: continue # TODO writer.write(foot) @@ -173,12 +191,11 @@ class Shape(Enum): PTR_TEMPLATE = 1, # TODO: this is a DXC Workaround BDA = 2, # PhysicalStorageBuffer Result Type -class InstOptions(NamedTuple): - shape: Shape = Shape.DEFAULT - result_ty: Optional[str] = None - op_ty: Optional[str] = None - -def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): +def processInst(writer: io.TextIOWrapper, + instruction, + shape: Shape = Shape.DEFAULT, + result_ty: Optional[str] = None, + prefered_op_ty: Optional[str] = None): templates = [] caps = [] conds = [] @@ -193,10 +210,10 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): if cap == "Shader": continue caps.append(cap) - if options.shape == Shape.PTR_TEMPLATE: + if shape == Shape.PTR_TEMPLATE: templates.append("typename P") conds.append("is_spirv_type_v

") - elif options.shape == Shape.BDA: + elif shape == Shape.BDA: caps.append("PhysicalStorageBufferAddresses") # split upper case words @@ -226,10 +243,10 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): if "operands" in instruction and instruction["operands"][0]["kind"] == "IdResultType": if len(result_types) == 0: - if options.result_ty == None: + if result_ty == None: result_types = ["T"] else: - result_types = [options.result_ty] + result_types = [result_ty] else: assert len(result_types) == 0 result_types = ["void"] @@ -261,8 +278,8 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): final_templates.append("typename N") op_ty = "T" - if options.op_ty != None: - op_ty = options.op_ty + if prefered_op_ty != None: + op_ty = prefered_op_ty elif rt != "void": op_ty = rt @@ -276,9 +293,9 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): case "IdRef": match operand["name"]: case "'Pointer'": - if options.shape == Shape.PTR_TEMPLATE: + if shape == Shape.PTR_TEMPLATE: args.append("P " + operand_name) - elif options.shape == Shape.BDA: + elif shape == Shape.BDA: if (not "typename T" in final_templates) and (rt == "T" or op_ty == "T"): final_templates = ["typename T"] + final_templates args.append("pointer_t " + operand_name) @@ -302,7 +319,7 @@ def processInst(writer: io.TextIOWrapper, instruction, options: InstOptions): case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + operand_name) case "MemoryAccess": assert len(overload_caps) <= 1 - if options.shape != Shape.BDA: + if shape != Shape.BDA: writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) writeInst(writer, final_templates + ["uint32_t alignment"], cap, exts, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) @@ -326,7 +343,7 @@ def writeInst(writer: io.TextIOWrapper, templates, cap, exts, op_name, fn_name, writer.write(" " + fn_name + "(" + ", ".join(args) + ");\n\n") def ignore(op_name): - print("\033[93mWARNING\033[0m: instruction " + op_name + " ignored") + print("\033[94mIGNORED\033[0m: " + op_name) if __name__ == "__main__": script_dir_path = os.path.abspath(os.path.dirname(__file__)) diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 1ec9cd684..85db3125f 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -19,16 +19,13 @@ namespace hlsl namespace spirv { -//! General Decls -template -using pointer_t = vk::SpirvOpaqueType >, T>; - // The holy operation that makes addrof possible template [[vk::ext_instruction(spv::OpCopyObject)]] pointer_t copyObject([[vk::ext_reference]] T value); -//! Std 450 Extended set operations +// TODO: Generate extended instructions +//! Std 450 Extended set instructions template [[vk::ext_instruction(34, /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); @@ -60,38 +57,64 @@ namespace builtin static float32_t4 Position; [[vk::ext_builtin_input(spv::BuiltInHelperInvocation)]] static const bool HelperInvocation; + [[vk::ext_builtin_input(spv::BuiltInNumWorkgroups)]] static const uint32_t3 NumWorkgroups; + [[vk::ext_builtin_input(spv::BuiltInWorkgroupId)]] static const uint32_t3 WorkgroupId; + [[vk::ext_builtin_input(spv::BuiltInLocalInvocationId)]] static const uint32_t3 LocalInvocationId; + [[vk::ext_builtin_input(spv::BuiltInGlobalInvocationId)]] static const uint32_t3 GlobalInvocationId; + [[vk::ext_builtin_input(spv::BuiltInLocalInvocationIndex)]] static const uint32_t LocalInvocationIndex; + +[[vk::ext_capability(spv::CapabilityGroupNonUniform)]] [[vk::ext_builtin_input(spv::BuiltInSubgroupSize)]] static const uint32_t SubgroupSize; + +[[vk::ext_capability(spv::CapabilityGroupNonUniform)]] [[vk::ext_builtin_input(spv::BuiltInNumSubgroups)]] static const uint32_t NumSubgroups; + +[[vk::ext_capability(spv::CapabilityGroupNonUniform)]] [[vk::ext_builtin_input(spv::BuiltInSubgroupId)]] static const uint32_t SubgroupId; + +[[vk::ext_capability(spv::CapabilityGroupNonUniform)]] [[vk::ext_builtin_input(spv::BuiltInSubgroupLocalInvocationId)]] static const uint32_t SubgroupLocalInvocationId; + [[vk::ext_builtin_input(spv::BuiltInVertexIndex)]] static const uint32_t VertexIndex; + [[vk::ext_builtin_input(spv::BuiltInInstanceIndex)]] static const uint32_t InstanceIndex; + +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] [[vk::ext_builtin_input(spv::BuiltInSubgroupEqMask)]] static const uint32_t4 SubgroupEqMask; + +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] [[vk::ext_builtin_input(spv::BuiltInSubgroupGeMask)]] static const uint32_t4 SubgroupGeMask; + +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] [[vk::ext_builtin_input(spv::BuiltInSubgroupGtMask)]] static const uint32_t4 SubgroupGtMask; + +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] [[vk::ext_builtin_input(spv::BuiltInSubgroupLeMask)]] static const uint32_t4 SubgroupLeMask; + +[[vk::ext_capability(spv::CapabilityGroupNonUniformBallot)]] [[vk::ext_builtin_input(spv::BuiltInSubgroupLtMask)]] static const uint32_t4 SubgroupLtMask; + } //! Execution Modes From 725bdb4d5c07e8540e24fb202f6e7d566c6c9a7d Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Mon, 9 Sep 2024 18:14:41 +0330 Subject: [PATCH 13/18] hlsl_generator: fix typo Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 2 +- tools/hlsl_generator/out.hlsl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 64e7c398f..c241a969e 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -36,7 +36,7 @@ // TODO: Generate extended instructions //! Std 450 Extended set instructions template -[[vk::ext_instruction(34, /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] +[[vk::ext_instruction(34 /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); // Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 85db3125f..cd8ab90e6 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -27,7 +27,7 @@ pointer_t copyObject([[vk::ext_reference]] T value); // TODO: Generate extended instructions //! Std 450 Extended set instructions template -[[vk::ext_instruction(34, /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] +[[vk::ext_instruction(34 /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); // Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) From 9778007635a1558e43d73830712665e75aa3c823 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Mon, 9 Sep 2024 18:22:27 +0330 Subject: [PATCH 14/18] hlsl_generator: fix another typo Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 2 +- tools/hlsl_generator/out.hlsl | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index c241a969e..72abba705 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -275,7 +275,7 @@ def processInst(writer: io.TextIOWrapper, if "Vector" in cap: rt = "vector<" + rt + ", N> " - final_templates.append("typename N") + final_templates.append("uint32_t N") op_ty = "T" if prefered_op_ty != None: diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index cd8ab90e6..edd6d8856 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -1426,7 +1426,7 @@ void endInvocationInterlockEXT_FragmentShaderShadingRateInterlockEXT(); [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] float16_t atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] vector atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] vector pointer, uint32_t memoryScope, uint32_t semantics, vector value); @@ -1448,7 +1448,7 @@ template [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] enable_if_t, float16_t> atomicMinEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMinEXT)]] enable_if_t, vector > atomicMinEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, vector value); @@ -1472,7 +1472,7 @@ enable_if_t, float64_t> atomicMinEXT_Float64(P pointer, uint3 [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] float16_t atomicMaxEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] vector atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] vector pointer, uint32_t memoryScope, uint32_t semantics, vector value); @@ -1494,7 +1494,7 @@ template [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] enable_if_t, float16_t> atomicMaxEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] enable_if_t, vector > atomicMaxEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, vector value); @@ -1519,7 +1519,7 @@ enable_if_t, float64_t> atomicMaxEXT_Float64(P pointer, uint3 [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] float16_t atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] @@ -1546,7 +1546,7 @@ template [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] enable_if_t, float16_t> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); -template +template [[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] [[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] [[vk::ext_instruction(spv::OpAtomicFAddEXT)]] From e0919e881670cb6134ade10c6ac35211cd22cc30 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Mon, 9 Sep 2024 18:29:12 +0330 Subject: [PATCH 15/18] hlsl_generator: update pointer_t impl Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 19 +++++++++++++++++++ tools/hlsl_generator/out.hlsl | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 72abba705..53d783f73 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -28,6 +28,25 @@ namespace spirv { +//! General Decls +template +NBL_CONSTEXPR_STATIC_INLINE bool is_pointer_v = is_spirv_type::value; + +template +struct pointer +{ + using type = vk::SpirvOpaqueType >, T>; +}; +// partial spec for BDA +template +struct pointer +{ + using type = vk::SpirvType >, T>; +}; + +template +using pointer_t = typename pointer::type; + // The holy operation that makes addrof possible template [[vk::ext_instruction(spv::OpCopyObject)]] diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index edd6d8856..a7e12a6f3 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -19,6 +19,25 @@ namespace hlsl namespace spirv { +//! General Decls +template +NBL_CONSTEXPR_STATIC_INLINE bool is_pointer_v = is_spirv_type::value; + +template +struct pointer +{ + using type = vk::SpirvOpaqueType >, T>; +}; +// partial spec for BDA +template +struct pointer +{ + using type = vk::SpirvType >, T>; +}; + +template +using pointer_t = typename pointer::type; + // The holy operation that makes addrof possible template [[vk::ext_instruction(spv::OpCopyObject)]] From a2e0b6acfc94443054e9ff6ae14939a7d5f02838 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Mon, 9 Sep 2024 23:07:14 +0330 Subject: [PATCH 16/18] hlsl_generator: don't emit unneccesary overloads Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 173 +++++----- tools/hlsl_generator/out.hlsl | 614 ++++------------------------------ 2 files changed, 149 insertions(+), 638 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 53d783f73..bb9d23867 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -220,7 +220,6 @@ def processInst(writer: io.TextIOWrapper, conds = [] op_name = instruction["opname"] fn_name = op_name[2].lower() + op_name[3:] - result_types = [] exts = instruction["extensions"] if "extensions" in instruction else [] if "capabilities" in instruction and len(instruction["capabilities"]) > 0: @@ -244,107 +243,97 @@ def processInst(writer: io.TextIOWrapper, conds.append("(is_signed_v || is_unsigned_v)") break case "U": - fn_name = fn_name[0:m[1][0]] + fn_name[m[1][1]:] - result_types = ["uint16_t", "uint32_t", "uint64_t"] + conds.append("is_unsigned_v") break case "S": - fn_name = fn_name[0:m[1][0]] + fn_name[m[1][1]:] - result_types = ["int16_t", "int32_t", "int64_t"] + conds.append("is_signed_v") break case "F": - fn_name = fn_name[0:m[1][0]] + fn_name[m[1][1]:] - result_types = ["float16_t", "float32_t", "float64_t"] + conds.append("is_floating_point") break - - match instruction["class"]: - case "Bit": - if len(result_types) == 0: conds.append("(is_signed_v || is_unsigned_v)") + else: + if instruction["class"] == "Bit": + conds.append("(is_signed_v || is_unsigned_v)") if "operands" in instruction and instruction["operands"][0]["kind"] == "IdResultType": - if len(result_types) == 0: - if result_ty == None: - result_types = ["T"] - else: - result_types = [result_ty] + if result_ty == None: + result_ty = "T" else: - assert len(result_types) == 0 - result_types = ["void"] - - for rt in result_types: - overload_caps = caps.copy() - match rt: - case "uint16_t" | "int16_t": overload_caps.append("Int16") - case "uint64_t" | "int64_t": overload_caps.append("Int64") - case "float16_t": overload_caps.append("Float16") - case "float64_t": overload_caps.append("Float64") - - for cap in overload_caps or [None]: - final_fn_name = fn_name + "_" + cap if (len(overload_caps) > 1) else fn_name - final_templates = templates.copy() + result_ty = "void" + + match result_ty: + case "uint16_t" | "int16_t": caps.append("Int16") + case "uint64_t" | "int64_t": caps.append("Int64") + case "float16_t": caps.append("Float16") + case "float64_t": caps.append("Float64") + + for cap in caps or [None]: + final_fn_name = fn_name + "_" + cap if (len(caps) > 1) else fn_name + final_templates = templates.copy() + + if (not "typename T" in final_templates) and (result_ty == "T"): + final_templates = ["typename T"] + final_templates + + if len(caps) > 0: + if (("Float16" in cap and result_ty != "float16_t") or + ("Float32" in cap and result_ty != "float32_t") or + ("Float64" in cap and result_ty != "float64_t") or + ("Int16" in cap and result_ty != "int16_t" and result_ty != "uint16_t") or + ("Int64" in cap and result_ty != "int64_t" and result_ty != "uint64_t")): continue - if (not "typename T" in final_templates) and (rt == "T"): - final_templates = ["typename T"] + final_templates - - if len(overload_caps) > 0: - if (("Float16" in cap and rt != "float16_t") or - ("Float32" in cap and rt != "float32_t") or - ("Float64" in cap and rt != "float64_t") or - ("Int16" in cap and rt != "int16_t" and rt != "uint16_t") or - ("Int64" in cap and rt != "int64_t" and rt != "uint64_t")): continue - - if "Vector" in cap: - rt = "vector<" + rt + ", N> " - final_templates.append("uint32_t N") - - op_ty = "T" - if prefered_op_ty != None: - op_ty = prefered_op_ty - elif rt != "void": - op_ty = rt - - args = [] - if "operands" in instruction: - for operand in instruction["operands"]: - operand_name = operand["name"].strip("'") if "name" in operand else None - operand_name = operand_name[0].lower() + operand_name[1:] if (operand_name != None) else "" - match operand["kind"]: - case "IdResult" | "IdResultType": continue - case "IdRef": - match operand["name"]: - case "'Pointer'": - if shape == Shape.PTR_TEMPLATE: - args.append("P " + operand_name) - elif shape == Shape.BDA: - if (not "typename T" in final_templates) and (rt == "T" or op_ty == "T"): - final_templates = ["typename T"] + final_templates - args.append("pointer_t " + operand_name) - else: - if (not "typename T" in final_templates) and (rt == "T" or op_ty == "T"): - final_templates = ["typename T"] + final_templates - args.append("[[vk::ext_reference]] " + op_ty + " " + operand_name) - case "'Value'" | "'Object'" | "'Comparator'" | "'Base'" | "'Insert'": - if (not "typename T" in final_templates) and (rt == "T" or op_ty == "T"): + if "Vector" in cap: + result_ty = "vector<" + result_ty + ", N> " + final_templates.append("uint32_t N") + + op_ty = "T" + if prefered_op_ty != None: + op_ty = prefered_op_ty + elif result_ty != "void": + op_ty = result_ty + + args = [] + if "operands" in instruction: + for operand in instruction["operands"]: + operand_name = operand["name"].strip("'") if "name" in operand else None + operand_name = operand_name[0].lower() + operand_name[1:] if (operand_name != None) else "" + match operand["kind"]: + case "IdResult" | "IdResultType": continue + case "IdRef": + match operand["name"]: + case "'Pointer'": + if shape == Shape.PTR_TEMPLATE: + args.append("P " + operand_name) + elif shape == Shape.BDA: + if (not "typename T" in final_templates) and (result_ty == "T" or op_ty == "T"): + final_templates = ["typename T"] + final_templates + args.append("pointer_t " + operand_name) + else: + if (not "typename T" in final_templates) and (result_ty == "T" or op_ty == "T"): final_templates = ["typename T"] + final_templates - args.append(op_ty + " " + operand_name) - case "'Offset'" | "'Count'" | "'Id'" | "'Index'" | "'Mask'" | "'Delta'": - args.append("uint32_t " + operand_name) - case "'Predicate'": args.append("bool " + operand_name) - case "'ClusterSize'": - if "quantifier" in operand and operand["quantifier"] == "?": continue # TODO: overload - else: return ignore(op_name) # TODO - case _: return ignore(op_name) # TODO - case "IdScope": args.append("uint32_t " + operand_name.lower() + "Scope") - case "IdMemorySemantics": args.append(" uint32_t " + operand_name) - case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + operand_name) - case "MemoryAccess": - assert len(overload_caps) <= 1 - if shape != Shape.BDA: - writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) - writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) - writeInst(writer, final_templates + ["uint32_t alignment"], cap, exts, op_name, final_fn_name, conds, rt, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) - case _: return ignore(op_name) # TODO - - writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, rt, args) + args.append("[[vk::ext_reference]] " + op_ty + " " + operand_name) + case "'Value'" | "'Object'" | "'Comparator'" | "'Base'" | "'Insert'": + if (not "typename T" in final_templates) and (result_ty == "T" or op_ty == "T"): + final_templates = ["typename T"] + final_templates + args.append(op_ty + " " + operand_name) + case "'Offset'" | "'Count'" | "'Id'" | "'Index'" | "'Mask'" | "'Delta'": + args.append("uint32_t " + operand_name) + case "'Predicate'": args.append("bool " + operand_name) + case "'ClusterSize'": + if "quantifier" in operand and operand["quantifier"] == "?": continue # TODO: overload + else: return ignore(op_name) # TODO + case _: return ignore(op_name) # TODO + case "IdScope": args.append("uint32_t " + operand_name.lower() + "Scope") + case "IdMemorySemantics": args.append(" uint32_t " + operand_name) + case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + operand_name) + case "MemoryAccess": + assert len(caps) <= 1 + if shape != Shape.BDA: + writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) + writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) + writeInst(writer, final_templates + ["uint32_t alignment"], cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) + case _: return ignore(op_name) # TODO + + writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args) def writeInst(writer: io.TextIOWrapper, templates, cap, exts, op_name, fn_name, conds, result_type, args): diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index a7e12a6f3..25cc4f17b 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -605,45 +605,15 @@ template [[vk::ext_instruction(spv::OpBitFieldInsert)]] enable_if_t<(is_signed_v || is_unsigned_v), T> bitFieldInsert(T base, T insert, uint32_t offset, uint32_t count); +template [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitFieldSExtract)]] -int16_t bitFieldExtract_BitInstructions(int16_t base, uint32_t offset, uint32_t count); - -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpBitFieldSExtract)]] -int16_t bitFieldExtract_Int16(int16_t base, uint32_t offset, uint32_t count); - -[[vk::ext_capability(spv::CapabilityBitInstructions)]] -[[vk::ext_instruction(spv::OpBitFieldSExtract)]] -int32_t bitFieldExtract(int32_t base, uint32_t offset, uint32_t count); - -[[vk::ext_capability(spv::CapabilityBitInstructions)]] -[[vk::ext_instruction(spv::OpBitFieldSExtract)]] -int64_t bitFieldExtract_BitInstructions(int64_t base, uint32_t offset, uint32_t count); - -[[vk::ext_capability(spv::CapabilityInt64)]] -[[vk::ext_instruction(spv::OpBitFieldSExtract)]] -int64_t bitFieldExtract_Int64(int64_t base, uint32_t offset, uint32_t count); - -[[vk::ext_capability(spv::CapabilityBitInstructions)]] -[[vk::ext_instruction(spv::OpBitFieldUExtract)]] -uint16_t bitFieldExtract_BitInstructions(uint16_t base, uint32_t offset, uint32_t count); - -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpBitFieldUExtract)]] -uint16_t bitFieldExtract_Int16(uint16_t base, uint32_t offset, uint32_t count); - -[[vk::ext_capability(spv::CapabilityBitInstructions)]] -[[vk::ext_instruction(spv::OpBitFieldUExtract)]] -uint32_t bitFieldExtract(uint32_t base, uint32_t offset, uint32_t count); +enable_if_t, T> bitFieldSExtract(T base, uint32_t offset, uint32_t count); +template [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitFieldUExtract)]] -uint64_t bitFieldExtract_BitInstructions(uint64_t base, uint32_t offset, uint32_t count); - -[[vk::ext_capability(spv::CapabilityInt64)]] -[[vk::ext_instruction(spv::OpBitFieldUExtract)]] -uint64_t bitFieldExtract_Int64(uint64_t base, uint32_t offset, uint32_t count); +enable_if_t, T> bitFieldUExtract(T base, uint32_t offset, uint32_t count); template [[vk::ext_capability(spv::CapabilityBitInstructions)]] @@ -724,105 +694,37 @@ template [[vk::ext_instruction(spv::OpAtomicISub)]] enable_if_t && (is_signed_v || is_unsigned_v), T> atomicISub(P pointer, uint32_t memoryScope, uint32_t semantics, T value); -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpAtomicSMin)]] -int16_t atomicMin([[vk::ext_reference]] int16_t pointer, uint32_t memoryScope, uint32_t semantics, int16_t value); - -[[vk::ext_instruction(spv::OpAtomicSMin)]] -int32_t atomicMin([[vk::ext_reference]] int32_t pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); - -[[vk::ext_capability(spv::CapabilityInt64)]] -[[vk::ext_instruction(spv::OpAtomicSMin)]] -int64_t atomicMin([[vk::ext_reference]] int64_t pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); - -template -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpAtomicSMin)]] -enable_if_t, int16_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, int16_t value); - -template +template [[vk::ext_instruction(spv::OpAtomicSMin)]] -enable_if_t, int32_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); +enable_if_t, T> atomicSMin([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); -template -[[vk::ext_capability(spv::CapabilityInt64)]] +template [[vk::ext_instruction(spv::OpAtomicSMin)]] -enable_if_t, int64_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); - -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpAtomicUMin)]] -uint16_t atomicMin([[vk::ext_reference]] uint16_t pointer, uint32_t memoryScope, uint32_t semantics, uint16_t value); - -[[vk::ext_instruction(spv::OpAtomicUMin)]] -uint32_t atomicMin([[vk::ext_reference]] uint32_t pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); - -[[vk::ext_capability(spv::CapabilityInt64)]] -[[vk::ext_instruction(spv::OpAtomicUMin)]] -uint64_t atomicMin([[vk::ext_reference]] uint64_t pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); +enable_if_t && is_signed_v, T> atomicSMin(P pointer, uint32_t memoryScope, uint32_t semantics, T value); -template -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpAtomicUMin)]] -enable_if_t, uint16_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, uint16_t value); - -template +template [[vk::ext_instruction(spv::OpAtomicUMin)]] -enable_if_t, uint32_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); +enable_if_t, T> atomicUMin([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); -template -[[vk::ext_capability(spv::CapabilityInt64)]] +template [[vk::ext_instruction(spv::OpAtomicUMin)]] -enable_if_t, uint64_t> atomicMin(P pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); - -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpAtomicSMax)]] -int16_t atomicMax([[vk::ext_reference]] int16_t pointer, uint32_t memoryScope, uint32_t semantics, int16_t value); - -[[vk::ext_instruction(spv::OpAtomicSMax)]] -int32_t atomicMax([[vk::ext_reference]] int32_t pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); - -[[vk::ext_capability(spv::CapabilityInt64)]] -[[vk::ext_instruction(spv::OpAtomicSMax)]] -int64_t atomicMax([[vk::ext_reference]] int64_t pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); - -template -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpAtomicSMax)]] -enable_if_t, int16_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, int16_t value); +enable_if_t && is_unsigned_v, T> atomicUMin(P pointer, uint32_t memoryScope, uint32_t semantics, T value); -template +template [[vk::ext_instruction(spv::OpAtomicSMax)]] -enable_if_t, int32_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, int32_t value); +enable_if_t, T> atomicSMax([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); -template -[[vk::ext_capability(spv::CapabilityInt64)]] +template [[vk::ext_instruction(spv::OpAtomicSMax)]] -enable_if_t, int64_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, int64_t value); - -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpAtomicUMax)]] -uint16_t atomicMax([[vk::ext_reference]] uint16_t pointer, uint32_t memoryScope, uint32_t semantics, uint16_t value); - -[[vk::ext_instruction(spv::OpAtomicUMax)]] -uint32_t atomicMax([[vk::ext_reference]] uint32_t pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); - -[[vk::ext_capability(spv::CapabilityInt64)]] -[[vk::ext_instruction(spv::OpAtomicUMax)]] -uint64_t atomicMax([[vk::ext_reference]] uint64_t pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); +enable_if_t && is_signed_v, T> atomicSMax(P pointer, uint32_t memoryScope, uint32_t semantics, T value); -template -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpAtomicUMax)]] -enable_if_t, uint16_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, uint16_t value); - -template +template [[vk::ext_instruction(spv::OpAtomicUMax)]] -enable_if_t, uint32_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, uint32_t value); +enable_if_t, T> atomicUMax([[vk::ext_reference]] T pointer, uint32_t memoryScope, uint32_t semantics, T value); -template -[[vk::ext_capability(spv::CapabilityInt64)]] +template [[vk::ext_instruction(spv::OpAtomicUMax)]] -enable_if_t, uint64_t> atomicMax(P pointer, uint32_t memoryScope, uint32_t semantics, uint64_t value); +enable_if_t && is_unsigned_v, T> atomicUMax(P pointer, uint32_t memoryScope, uint32_t semantics, T value); template [[vk::ext_instruction(spv::OpAtomicAnd)]] @@ -933,49 +835,20 @@ template [[vk::ext_instruction(spv::OpGroupNonUniformIAdd)]] enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float16_t groupNonUniformAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float16_t groupNonUniformAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float16_t groupNonUniformAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityFloat16)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float16_t groupNonUniformAdd_Float16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float32_t groupNonUniformAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float32_t groupNonUniformAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float32_t groupNonUniformAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float64_t groupNonUniformAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); +enable_if_t, T> groupNonUniformFAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float64_t groupNonUniformAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); +enable_if_t, T> groupNonUniformFAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float64_t groupNonUniformAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); - -[[vk::ext_capability(spv::CapabilityFloat64)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -float64_t groupNonUniformAdd_Float64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); +enable_if_t, T> groupNonUniformFAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] @@ -992,313 +865,110 @@ template [[vk::ext_instruction(spv::OpGroupNonUniformIMul)]] enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float16_t groupNonUniformMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float16_t groupNonUniformMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float16_t groupNonUniformMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityFloat16)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float16_t groupNonUniformMul_Float16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float32_t groupNonUniformMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float32_t groupNonUniformMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float32_t groupNonUniformMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float64_t groupNonUniformMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); +enable_if_t, T> groupNonUniformFMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float64_t groupNonUniformMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); +enable_if_t, T> groupNonUniformFMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float64_t groupNonUniformMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); - -[[vk::ext_capability(spv::CapabilityFloat64)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -float64_t groupNonUniformMul_Float64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] -int16_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] -int16_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] -int16_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); - -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] -int16_t groupNonUniformMin_Int16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] -int32_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] -int32_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] -int32_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); +enable_if_t, T> groupNonUniformFMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] -int64_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); +enable_if_t, T> groupNonUniformSMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] -int64_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); +enable_if_t, T> groupNonUniformSMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] -int64_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); - -[[vk::ext_capability(spv::CapabilityInt64)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMin)]] -int64_t groupNonUniformMin_Int64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] -uint16_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] -uint16_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] -uint16_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); - -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] -uint16_t groupNonUniformMin_Int16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] -uint32_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] -uint32_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] -uint32_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); +enable_if_t, T> groupNonUniformSMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] -uint64_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); +enable_if_t, T> groupNonUniformUMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] -uint64_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); +enable_if_t, T> groupNonUniformUMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] -uint64_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); - -[[vk::ext_capability(spv::CapabilityInt64)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMin)]] -uint64_t groupNonUniformMin_Int64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float16_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float16_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float16_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityFloat16)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float16_t groupNonUniformMin_Float16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float32_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float32_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float32_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); +enable_if_t, T> groupNonUniformUMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float64_t groupNonUniformMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); +enable_if_t, T> groupNonUniformFMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float64_t groupNonUniformMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); +enable_if_t, T> groupNonUniformFMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float64_t groupNonUniformMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); - -[[vk::ext_capability(spv::CapabilityFloat64)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -float64_t groupNonUniformMin_Float64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] -int16_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] -int16_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] -int16_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); - -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] -int16_t groupNonUniformMax_Int16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] -int32_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] -int32_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] -int32_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int32_t value); +enable_if_t, T> groupNonUniformFMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] -int64_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); +enable_if_t, T> groupNonUniformSMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] -int64_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); +enable_if_t, T> groupNonUniformSMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] -int64_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); - -[[vk::ext_capability(spv::CapabilityInt64)]] -[[vk::ext_instruction(spv::OpGroupNonUniformSMax)]] -int64_t groupNonUniformMax_Int64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, int64_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] -uint16_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] -uint16_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] -uint16_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); - -[[vk::ext_capability(spv::CapabilityInt16)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] -uint16_t groupNonUniformMax_Int16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] -uint32_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] -uint32_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] -uint32_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint32_t value); +enable_if_t, T> groupNonUniformSMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] -uint64_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); +enable_if_t, T> groupNonUniformUMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] -uint64_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); +enable_if_t, T> groupNonUniformUMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] -uint64_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); - -[[vk::ext_capability(spv::CapabilityInt64)]] -[[vk::ext_instruction(spv::OpGroupNonUniformUMax)]] -uint64_t groupNonUniformMax_Int64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, uint64_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float16_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float16_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float16_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityFloat16)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float16_t groupNonUniformMax_Float16(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float16_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float32_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float32_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); - -[[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float32_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float32_t value); +enable_if_t, T> groupNonUniformUMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float64_t groupNonUniformMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); +enable_if_t, T> groupNonUniformFMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float64_t groupNonUniformMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); +enable_if_t, T> groupNonUniformFMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float64_t groupNonUniformMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); - -[[vk::ext_capability(spv::CapabilityFloat64)]] -[[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -float64_t groupNonUniformMax_Float64(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, float64_t value); +enable_if_t, T> groupNonUniformFMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] @@ -1441,154 +1111,6 @@ void endInvocationInterlockEXT_FragmentShaderPixelInterlockEXT(); [[vk::ext_instruction(spv::OpEndInvocationInterlockEXT)]] void endInvocationInterlockEXT_FragmentShaderShadingRateInterlockEXT(); -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float16_t atomicMinEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -vector atomicMinEXT_AtomicFloat16VectorNV([[vk::ext_reference]] vector pointer, uint32_t memoryScope, uint32_t semantics, vector value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float32_t atomicMinEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float64_t atomicMinEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -[[vk::ext_capability(spv::CapabilityFloat64)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -float64_t atomicMinEXT_Float64([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float16_t> atomicMinEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, vector > atomicMinEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, vector value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float32_t> atomicMinEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float64_t> atomicMinEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -template -[[vk::ext_capability(spv::CapabilityFloat64)]] -[[vk::ext_instruction(spv::OpAtomicFMinEXT)]] -enable_if_t, float64_t> atomicMinEXT_Float64(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float16_t atomicMaxEXT_AtomicFloat16MinMaxEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -vector atomicMaxEXT_AtomicFloat16VectorNV([[vk::ext_reference]] vector pointer, uint32_t memoryScope, uint32_t semantics, vector value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float32_t atomicMaxEXT_AtomicFloat32MinMaxEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float64_t atomicMaxEXT_AtomicFloat64MinMaxEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -[[vk::ext_capability(spv::CapabilityFloat64)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -float64_t atomicMaxEXT_Float64([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float16_t> atomicMaxEXT_AtomicFloat16MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, vector > atomicMaxEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, vector value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat32MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float32_t> atomicMaxEXT_AtomicFloat32MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat64MinMaxEXT)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float64_t> atomicMaxEXT_AtomicFloat64MinMaxEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -template -[[vk::ext_capability(spv::CapabilityFloat64)]] -[[vk::ext_instruction(spv::OpAtomicFMaxEXT)]] -enable_if_t, float64_t> atomicMaxEXT_Float64(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] -[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float16_t atomicAddEXT_AtomicFloat16AddEXT([[vk::ext_reference]] float16_t pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -vector atomicAddEXT_AtomicFloat16VectorNV([[vk::ext_reference]] vector pointer, uint32_t memoryScope, uint32_t semantics, vector value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] -[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float32_t atomicAddEXT_AtomicFloat32AddEXT([[vk::ext_reference]] float32_t pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] -[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float64_t atomicAddEXT_AtomicFloat64AddEXT([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -[[vk::ext_capability(spv::CapabilityFloat64)]] -[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -float64_t atomicAddEXT_Float64([[vk::ext_reference]] float64_t pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16AddEXT)]] -[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float16_t> atomicAddEXT_AtomicFloat16AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float16_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat16VectorNV)]] -[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, vector > atomicAddEXT_AtomicFloat16VectorNV(P pointer, uint32_t memoryScope, uint32_t semantics, vector value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat32AddEXT)]] -[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float32_t> atomicAddEXT_AtomicFloat32AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float32_t value); - -template -[[vk::ext_capability(spv::CapabilityAtomicFloat64AddEXT)]] -[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float64_t> atomicAddEXT_AtomicFloat64AddEXT(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - -template -[[vk::ext_capability(spv::CapabilityFloat64)]] -[[vk::ext_extension("SPV_EXT_shader_atomic_float_add")]] -[[vk::ext_instruction(spv::OpAtomicFAddEXT)]] -enable_if_t, float64_t> atomicAddEXT_Float64(P pointer, uint32_t memoryScope, uint32_t semantics, float64_t value); - } #endif From c387d961eb751f523a1908c6076b3401c6f6d4a3 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Tue, 10 Sep 2024 14:19:46 +0330 Subject: [PATCH 17/18] hlsl_generator: handwritten BDA instructions Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 46 ++++++++------- tools/hlsl_generator/out.hlsl | 104 ++++++++++++---------------------- 2 files changed, 63 insertions(+), 87 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index bb9d23867..4dd2e5815 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -29,9 +29,6 @@ { //! General Decls -template -NBL_CONSTEXPR_STATIC_INLINE bool is_pointer_v = is_spirv_type::value; - template struct pointer { @@ -47,6 +44,9 @@ template using pointer_t = typename pointer::type; +template +NBL_CONSTEXPR_STATIC_INLINE bool is_pointer_v = is_same_v::type >; + // The holy operation that makes addrof possible template [[vk::ext_instruction(spv::OpCopyObject)]] @@ -58,11 +58,31 @@ [[vk::ext_instruction(34 /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); +//! Memory instructions +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T obj, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T obj); + +//! Bitcast Instructions // Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpBitcast)]] -enable_if_t, T> bitcast(U); +enable_if_t, T> bitcast(U); template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] @@ -181,9 +201,6 @@ def gen(grammer_path, output_path): case "Atomic": processInst(writer, instruction) processInst(writer, instruction, Shape.PTR_TEMPLATE) - case "Memory": - processInst(writer, instruction, Shape.PTR_TEMPLATE) - processInst(writer, instruction, Shape.BDA) case "Barrier" | "Bit": processInst(writer, instruction) case "Reserved": @@ -208,7 +225,6 @@ def gen(grammer_path, output_path): class Shape(Enum): DEFAULT = 0, PTR_TEMPLATE = 1, # TODO: this is a DXC Workaround - BDA = 2, # PhysicalStorageBuffer Result Type def processInst(writer: io.TextIOWrapper, instruction, @@ -231,8 +247,6 @@ def processInst(writer: io.TextIOWrapper, if shape == Shape.PTR_TEMPLATE: templates.append("typename P") conds.append("is_spirv_type_v

") - elif shape == Shape.BDA: - caps.append("PhysicalStorageBufferAddresses") # split upper case words matches = [(m.group(1), m.span(1)) for m in re.finditer(r'([A-Z])[A-Z][a-z]', fn_name)] @@ -249,7 +263,7 @@ def processInst(writer: io.TextIOWrapper, conds.append("is_signed_v") break case "F": - conds.append("is_floating_point") + conds.append("(is_same_v || is_same_v || is_same_v)") break else: if instruction["class"] == "Bit": @@ -303,10 +317,6 @@ def processInst(writer: io.TextIOWrapper, case "'Pointer'": if shape == Shape.PTR_TEMPLATE: args.append("P " + operand_name) - elif shape == Shape.BDA: - if (not "typename T" in final_templates) and (result_ty == "T" or op_ty == "T"): - final_templates = ["typename T"] + final_templates - args.append("pointer_t " + operand_name) else: if (not "typename T" in final_templates) and (result_ty == "T" or op_ty == "T"): final_templates = ["typename T"] + final_templates @@ -327,10 +337,8 @@ def processInst(writer: io.TextIOWrapper, case "GroupOperation": args.append("[[vk::ext_literal]] uint32_t " + operand_name) case "MemoryAccess": assert len(caps) <= 1 - if shape != Shape.BDA: - writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) - writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) - writeInst(writer, final_templates + ["uint32_t alignment"], cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002", "[[vk::ext_literal]] uint32_t __alignment = alignment"]) + writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess"]) + writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args + ["[[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam"]) case _: return ignore(op_name) # TODO writeInst(writer, final_templates, cap, exts, op_name, final_fn_name, conds, result_ty, args) diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 25cc4f17b..62797583d 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -20,9 +20,6 @@ namespace spirv { //! General Decls -template -NBL_CONSTEXPR_STATIC_INLINE bool is_pointer_v = is_spirv_type::value; - template struct pointer { @@ -38,6 +35,9 @@ struct pointer template using pointer_t = typename pointer::type; +template +NBL_CONSTEXPR_STATIC_INLINE bool is_pointer_v = is_same_v::type >; + // The holy operation that makes addrof possible template [[vk::ext_instruction(spv::OpCopyObject)]] @@ -49,11 +49,31 @@ template [[vk::ext_instruction(34 /* GLSLstd450MatrixInverse */, "GLSL.std.450")]] SquareMatrix matrixInverse(NBL_CONST_REF_ARG(SquareMatrix) mat); +//! Memory instructions +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpLoad)]] +T load(pointer_t pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpLoad)]] +enable_if_t, T> load(P pointer); + +template +[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] +[[vk::ext_instruction(spv::OpStore)]] +void store(pointer_t pointer, T obj, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); + +template +[[vk::ext_instruction(spv::OpStore)]] +enable_if_t, void> store(P pointer, T obj); + +//! Bitcast Instructions // Add specializations if you need to emit a `ext_capability` (this means that the instruction needs to forward through an `impl::` struct and so on) template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] [[vk::ext_instruction(spv::OpBitcast)]] -enable_if_t, T> bitcast(U); +enable_if_t, T> bitcast(U); template [[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] @@ -548,58 +568,6 @@ namespace group_operation } //! Instructions -template -[[vk::ext_instruction(spv::OpLoad)]] -enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t memoryAccess); - -template -[[vk::ext_instruction(spv::OpLoad)]] -enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); - -template -[[vk::ext_instruction(spv::OpLoad)]] -enable_if_t, T> load(P pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); - -template -[[vk::ext_instruction(spv::OpLoad)]] -enable_if_t, T> load(P pointer); - -template -[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] -[[vk::ext_instruction(spv::OpLoad)]] -T load(pointer_t pointer, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); - -template -[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] -[[vk::ext_instruction(spv::OpLoad)]] -T load(pointer_t pointer); - -template -[[vk::ext_instruction(spv::OpStore)]] -enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess); - -template -[[vk::ext_instruction(spv::OpStore)]] -enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t memoryAccess, [[vk::ext_literal]] uint32_t memoryAccessParam); - -template -[[vk::ext_instruction(spv::OpStore)]] -enable_if_t, void> store(P pointer, T object, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); - -template -[[vk::ext_instruction(spv::OpStore)]] -enable_if_t, void> store(P pointer, T object); - -template -[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] -[[vk::ext_instruction(spv::OpStore)]] -void store(pointer_t pointer, T object, [[vk::ext_literal]] uint32_t __aligned = /*Aligned*/0x00000002, [[vk::ext_literal]] uint32_t __alignment = alignment); - -template -[[vk::ext_capability(spv::CapabilityPhysicalStorageBufferAddresses)]] -[[vk::ext_instruction(spv::OpStore)]] -void store(pointer_t pointer, T object); - template [[vk::ext_capability(spv::CapabilityBitInstructions)]] [[vk::ext_instruction(spv::OpBitFieldInsert)]] @@ -838,17 +806,17 @@ enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIAdd_GroupNo template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -enable_if_t, T> groupNonUniformFAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFAdd_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -enable_if_t, T> groupNonUniformFAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFAdd_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFAdd)]] -enable_if_t, T> groupNonUniformFAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFAdd_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] @@ -868,17 +836,17 @@ enable_if_t<(is_signed_v || is_unsigned_v), T> groupNonUniformIMul_GroupNo template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -enable_if_t, T> groupNonUniformFMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMul_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -enable_if_t, T> groupNonUniformFMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMul_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMul)]] -enable_if_t, T> groupNonUniformFMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMul_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] @@ -913,17 +881,17 @@ enable_if_t, T> groupNonUniformUMin_GroupNonUniformPartitionedN template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -enable_if_t, T> groupNonUniformFMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMin_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -enable_if_t, T> groupNonUniformFMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMin_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMin)]] -enable_if_t, T> groupNonUniformFMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMin_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] @@ -958,17 +926,17 @@ enable_if_t, T> groupNonUniformUMax_GroupNonUniformPartitionedN template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -enable_if_t, T> groupNonUniformFMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMax_GroupNonUniformArithmetic(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformClustered)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -enable_if_t, T> groupNonUniformFMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMax_GroupNonUniformClustered(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformPartitionedNV)]] [[vk::ext_instruction(spv::OpGroupNonUniformFMax)]] -enable_if_t, T> groupNonUniformFMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); +enable_if_t<(is_same_v || is_same_v || is_same_v), T> groupNonUniformFMax_GroupNonUniformPartitionedNV(uint32_t executionScope, [[vk::ext_literal]] uint32_t operation, T value); template [[vk::ext_capability(spv::CapabilityGroupNonUniformArithmetic)]] From 5b9371a009abfdbbfcf3a8c2359af217de4e3e88 Mon Sep 17 00:00:00 2001 From: Ali Cheraghi Date: Thu, 12 Sep 2024 19:42:28 +0330 Subject: [PATCH 18/18] hlsl_generator: generate glsl.std extended instructions Signed-off-by: Ali Cheraghi --- tools/hlsl_generator/gen.py | 42 +++-- tools/hlsl_generator/out.hlsl | 319 ++++++++++++++++++++++++++++++++++ 2 files changed, 348 insertions(+), 13 deletions(-) diff --git a/tools/hlsl_generator/gen.py b/tools/hlsl_generator/gen.py index 4dd2e5815..2584d023f 100644 --- a/tools/hlsl_generator/gen.py +++ b/tools/hlsl_generator/gen.py @@ -108,16 +108,12 @@ #endif """ -def gen(grammer_path, output_path): - grammer_raw = open(grammer_path, "r").read() - grammer = json.loads(grammer_raw) - del grammer_raw - +def gen(core_grammer, glsl_grammer, output_path): output = open(output_path, "w", buffering=1024**2) - builtins = [x for x in grammer["operand_kinds"] if x["kind"] == "BuiltIn"][0]["enumerants"] - execution_modes = [x for x in grammer["operand_kinds"] if x["kind"] == "ExecutionMode"][0]["enumerants"] - group_operations = [x for x in grammer["operand_kinds"] if x["kind"] == "GroupOperation"][0]["enumerants"] + builtins = [x for x in core_grammer["operand_kinds"] if x["kind"] == "BuiltIn"][0]["enumerants"] + execution_modes = [x for x in core_grammer["operand_kinds"] if x["kind"] == "ExecutionMode"][0]["enumerants"] + group_operations = [x for x in core_grammer["operand_kinds"] if x["kind"] == "GroupOperation"][0]["enumerants"] with output as writer: writer.write(head) @@ -194,7 +190,7 @@ def gen(grammer_path, output_path): writer.write("}\n") writer.write("\n//! Instructions\n") - for instruction in grammer["instructions"]: + for instruction in core_grammer["instructions"]: if instruction["opname"].endswith("INTEL"): continue match instruction["class"]: @@ -219,6 +215,9 @@ def gen(grammer_path, output_path): processInst(writer, instruction, result_ty="uint32_t",prefered_op_ty="uint32_t4") case _: processInst(writer, instruction) case _: continue # TODO + for instruction in glsl_grammer["instructions"]: + instruction["operands"] = [{"kind": "IdResultType"}] + instruction["operands"] + processInst(writer, instruction) writer.write(foot) @@ -266,7 +265,7 @@ def processInst(writer: io.TextIOWrapper, conds.append("(is_same_v || is_same_v || is_same_v)") break else: - if instruction["class"] == "Bit": + if "class" in instruction and instruction["class"] == "Bit": conds.append("(is_signed_v || is_unsigned_v)") if "operands" in instruction and instruction["operands"][0]["kind"] == "IdResultType": @@ -321,7 +320,11 @@ def processInst(writer: io.TextIOWrapper, if (not "typename T" in final_templates) and (result_ty == "T" or op_ty == "T"): final_templates = ["typename T"] + final_templates args.append("[[vk::ext_reference]] " + op_ty + " " + operand_name) - case "'Value'" | "'Object'" | "'Comparator'" | "'Base'" | "'Insert'": + case ("'a'" | "'b'" | "'c'" | "'x'" | "'y'" | "'z'" | "'i'" | "'v'" | + "'p'" | "'p0'" | "'p1'" | "'exp'" | "'minVal'" | "'maxVal'" | "'y_over_x'" | + "'edge'" | "'edge0'" | "'edge1'" | "'I'" | "'N'" | "'eta'" | "'sample'" | + "'degrees'" | "'radians'" | "'Nref'" | "'interpolant'" | "'offset'" | + "'Value'" | "'Object'" | "'Comparator'" | "'Base'" | "'Insert'"): if (not "typename T" in final_templates) and (result_ty == "T" or op_ty == "T"): final_templates = ["typename T"] + final_templates args.append(op_ty + " " + operand_name) @@ -366,8 +369,21 @@ def ignore(op_name): parser = ArgumentParser(description="Generate HLSL from SPIR-V instructions") parser.add_argument("output", type=str, help="HLSL output file") - parser.add_argument("--grammer", required=False, type=str, help="Input SPIR-V grammer JSON file", default=os.path.join(script_dir_path, "../../include/spirv/unified1/spirv.core.grammar.json")) + parser.add_argument("--core-grammer", required=False, type=str, + help="SPIR-V Core grammer JSON file", + default=os.path.join(script_dir_path, "../../include/spirv/unified1/spirv.core.grammar.json")) + parser.add_argument("--glsl-grammer", required=False, type=str, + help="SPIR-V Extended GLSL.std.450 grammer JSON file", + default=os.path.join(script_dir_path, "../../include/spirv/unified1/extinst.glsl.std.450.grammar.json")) args = parser.parse_args() - gen(args.grammer, args.output) + grammer_raw = open(args.core_grammer, "r").read() + core_grammer = json.loads(grammer_raw) + del grammer_raw + + grammer_raw = open(args.glsl_grammer, "r").read() + glsl_grammer = json.loads(grammer_raw) + del grammer_raw + + gen(core_grammer, glsl_grammer, args.output) diff --git a/tools/hlsl_generator/out.hlsl b/tools/hlsl_generator/out.hlsl index 62797583d..729f04b20 100644 --- a/tools/hlsl_generator/out.hlsl +++ b/tools/hlsl_generator/out.hlsl @@ -1079,6 +1079,325 @@ void endInvocationInterlockEXT_FragmentShaderPixelInterlockEXT(); [[vk::ext_instruction(spv::OpEndInvocationInterlockEXT)]] void endInvocationInterlockEXT_FragmentShaderShadingRateInterlockEXT(); +template +[[vk::ext_instruction(spv::Round)]] +T und(T x); + +template +[[vk::ext_instruction(spv::RoundEven)]] +T undEven(T x); + +template +[[vk::ext_instruction(spv::Trunc)]] +T unc(T x); + +template +[[vk::ext_instruction(spv::FAbs)]] +T bs(T x); + +template +[[vk::ext_instruction(spv::SAbs)]] +T bs(T x); + +template +[[vk::ext_instruction(spv::FSign)]] +T ign(T x); + +template +[[vk::ext_instruction(spv::SSign)]] +T ign(T x); + +template +[[vk::ext_instruction(spv::Floor)]] +T oor(T x); + +template +[[vk::ext_instruction(spv::Ceil)]] +T il(T x); + +template +[[vk::ext_instruction(spv::Fract)]] +T act(T x); + +template +[[vk::ext_instruction(spv::Radians)]] +T dians(T degrees); + +template +[[vk::ext_instruction(spv::Degrees)]] +T grees(T radians); + +template +[[vk::ext_instruction(spv::Sin)]] +T n(T x); + +template +[[vk::ext_instruction(spv::Cos)]] +T s(T x); + +template +[[vk::ext_instruction(spv::Tan)]] +T n(T x); + +template +[[vk::ext_instruction(spv::Asin)]] +T in(T x); + +template +[[vk::ext_instruction(spv::Acos)]] +T os(T x); + +template +[[vk::ext_instruction(spv::Atan)]] +T an(T y_over_x); + +template +[[vk::ext_instruction(spv::Sinh)]] +T nh(T x); + +template +[[vk::ext_instruction(spv::Cosh)]] +T sh(T x); + +template +[[vk::ext_instruction(spv::Tanh)]] +T nh(T x); + +template +[[vk::ext_instruction(spv::Asinh)]] +T inh(T x); + +template +[[vk::ext_instruction(spv::Acosh)]] +T osh(T x); + +template +[[vk::ext_instruction(spv::Atanh)]] +T anh(T x); + +template +[[vk::ext_instruction(spv::Atan2)]] +T an2(T y, T x); + +template +[[vk::ext_instruction(spv::Pow)]] +T w(T x, T y); + +template +[[vk::ext_instruction(spv::Exp)]] +T p(T x); + +template +[[vk::ext_instruction(spv::Log)]] +T g(T x); + +template +[[vk::ext_instruction(spv::Exp2)]] +T p2(T x); + +template +[[vk::ext_instruction(spv::Log2)]] +T g2(T x); + +template +[[vk::ext_instruction(spv::Sqrt)]] +T rt(T x); + +template +[[vk::ext_instruction(spv::InverseSqrt)]] +T verseSqrt(T x); + +template +[[vk::ext_instruction(spv::Determinant)]] +T terminant(T x); + +template +[[vk::ext_instruction(spv::MatrixInverse)]] +T trixInverse(T x); + +template +[[vk::ext_instruction(spv::Modf)]] +T df(T x, T i); + +template +[[vk::ext_instruction(spv::ModfStruct)]] +T dfStruct(T x); + +template +[[vk::ext_instruction(spv::FMin)]] +T in(T x, T y); + +template +[[vk::ext_instruction(spv::UMin)]] +T in(T x, T y); + +template +[[vk::ext_instruction(spv::SMin)]] +T in(T x, T y); + +template +[[vk::ext_instruction(spv::FMax)]] +T ax(T x, T y); + +template +[[vk::ext_instruction(spv::UMax)]] +T ax(T x, T y); + +template +[[vk::ext_instruction(spv::SMax)]] +T ax(T x, T y); + +template +[[vk::ext_instruction(spv::FClamp)]] +T lamp(T x, T minVal, T maxVal); + +template +[[vk::ext_instruction(spv::UClamp)]] +T lamp(T x, T minVal, T maxVal); + +template +[[vk::ext_instruction(spv::SClamp)]] +T lamp(T x, T minVal, T maxVal); + +template +[[vk::ext_instruction(spv::FMix)]] +T ix(T x, T y, T a); + +template +[[vk::ext_instruction(spv::IMix)]] +T ix(T x, T y, T a); + +template +[[vk::ext_instruction(spv::Step)]] +T ep(T edge, T x); + +template +[[vk::ext_instruction(spv::SmoothStep)]] +T oothStep(T edge0, T edge1, T x); + +template +[[vk::ext_instruction(spv::Fma)]] +T a(T a, T b, T c); + +template +[[vk::ext_instruction(spv::Frexp)]] +T exp(T x, T exp); + +template +[[vk::ext_instruction(spv::FrexpStruct)]] +T expStruct(T x); + +template +[[vk::ext_instruction(spv::Ldexp)]] +T exp(T x, T exp); + +template +[[vk::ext_instruction(spv::PackSnorm4x8)]] +T ckSnorm4x8(T v); + +template +[[vk::ext_instruction(spv::PackUnorm4x8)]] +T ckUnorm4x8(T v); + +template +[[vk::ext_instruction(spv::PackSnorm2x16)]] +T ckSnorm2x16(T v); + +template +[[vk::ext_instruction(spv::PackUnorm2x16)]] +T ckUnorm2x16(T v); + +template +[[vk::ext_instruction(spv::PackHalf2x16)]] +T ckHalf2x16(T v); + +template +[[vk::ext_instruction(spv::UnpackSnorm2x16)]] +T packSnorm2x16(T p); + +template +[[vk::ext_instruction(spv::UnpackUnorm2x16)]] +T packUnorm2x16(T p); + +template +[[vk::ext_instruction(spv::UnpackHalf2x16)]] +T packHalf2x16(T v); + +template +[[vk::ext_instruction(spv::UnpackSnorm4x8)]] +T packSnorm4x8(T p); + +template +[[vk::ext_instruction(spv::UnpackUnorm4x8)]] +T packUnorm4x8(T p); + +template +[[vk::ext_instruction(spv::Length)]] +T ngth(T x); + +template +[[vk::ext_instruction(spv::Distance)]] +T stance(T p0, T p1); + +template +[[vk::ext_instruction(spv::Cross)]] +T oss(T x, T y); + +template +[[vk::ext_instruction(spv::Normalize)]] +T rmalize(T x); + +template +[[vk::ext_instruction(spv::FaceForward)]] +T ceForward(T n, T i, T nref); + +template +[[vk::ext_instruction(spv::Reflect)]] +T flect(T i, T n); + +template +[[vk::ext_instruction(spv::Refract)]] +T fract(T i, T n, T eta); + +template +[[vk::ext_instruction(spv::FindILsb)]] +enable_if_t<(is_signed_v || is_unsigned_v), T> ndILsb(T value); + +template +[[vk::ext_instruction(spv::FindSMsb)]] +enable_if_t, T> ndSMsb(T value); + +template +[[vk::ext_instruction(spv::FindUMsb)]] +enable_if_t, T> ndUMsb(T value); + +template +[[vk::ext_capability(spv::CapabilityInterpolationFunction)]] +[[vk::ext_instruction(spv::InterpolateAtCentroid)]] +T terpolateAtCentroid(T interpolant); + +template +[[vk::ext_capability(spv::CapabilityInterpolationFunction)]] +[[vk::ext_instruction(spv::InterpolateAtSample)]] +T terpolateAtSample(T interpolant, T sample); + +template +[[vk::ext_capability(spv::CapabilityInterpolationFunction)]] +[[vk::ext_instruction(spv::InterpolateAtOffset)]] +T terpolateAtOffset(T interpolant, T offset); + +template +[[vk::ext_instruction(spv::NMin)]] +T in(T x, T y); + +template +[[vk::ext_instruction(spv::NMax)]] +T ax(T x, T y); + +template +[[vk::ext_instruction(spv::NClamp)]] +T lamp(T x, T minVal, T maxVal); + } #endif