From 203ab2df2a6b749d5b9e7f2d86d100fc88c6f54d Mon Sep 17 00:00:00 2001 From: Try Date: Thu, 18 Aug 2022 21:55:57 +0200 Subject: [PATCH] mesh-shader: ShaderAnalyzer initial #38 --- Engine/gapi/spirv/meshconverter.cpp | 139 ++--- Engine/gapi/spirv/meshconverter.h | 17 +- Engine/gapi/spirv/shaderanalyzer.cpp | 867 +++++++++++++++++++++++++++ Engine/gapi/spirv/shaderanalyzer.h | 169 ++++++ Engine/libspirv/libspirv.cpp | 15 + Engine/libspirv/libspirv.h | 9 +- 6 files changed, 1116 insertions(+), 100 deletions(-) create mode 100644 Engine/gapi/spirv/shaderanalyzer.cpp create mode 100644 Engine/gapi/spirv/shaderanalyzer.h diff --git a/Engine/gapi/spirv/meshconverter.cpp b/Engine/gapi/spirv/meshconverter.cpp index cc2fc0e4..d3531690 100644 --- a/Engine/gapi/spirv/meshconverter.cpp +++ b/Engine/gapi/spirv/meshconverter.cpp @@ -5,41 +5,36 @@ #include MeshConverter::MeshConverter(libspirv::MutableBytecode& code) - : code(code) { + : code(code), an(code) { } void MeshConverter::exec() { + an.analyze(); + + // gl_MeshPerVertexNV block + if(an.idMeshPerVertexNV!=0) + gl_MeshPerVertexNV.gl_Position = true; + if(an.idPointSize!=0) + gl_MeshPerVertexNV.gl_PointSize = true; + for(auto it = code.begin(), end = code.end(); it!=end; ++it) { auto& i = *it; if(i.op()==spv::OpDecorate && i[2]==spv::DecorationBuiltIn) { - if(i[3]==spv::BuiltInPrimitiveCountNV) { - idPrimitiveCountNV = i[1]; + if(i[3]==spv::BuiltInPrimitiveCountNV || + i[3]==spv::BuiltInPrimitiveIndicesNV) { it.setToNop(); } - if(i[3]==spv::BuiltInPrimitiveIndicesNV) { - idPrimitiveIndicesNV = i[1]; - it.setToNop(); - } - - if(i[3]==spv::BuiltInLocalInvocationId) { - idLocalInvocationId = i[1]; - } - if(i[3]==spv::BuiltInGlobalInvocationId) { - idGlobalInvocationId = i[1]; - } - if(i[3]==spv::BuiltInWorkgroupId) { - idWorkGroupID = i[1]; - } } if(i.op()==spv::OpExtInstImport) { std::string_view name = reinterpret_cast(&i[2]); if(name=="GLSL.std.450") std450Import = i[1]; } - if(i.op()==spv::OpExecutionMode && - (i[2]==spv::ExecutionModeOutputVertices || i[2]==spv::ExecutionModeOutputLinesNV || i[2]==spv::ExecutionModeOutputPrimitivesNV || - i[2]==spv::ExecutionModeOutputTrianglesNV)) { - it.setToNop(); + if(i.op()==spv::OpExecutionMode) { + if(i[2]==spv::ExecutionModeOutputVertices || i[2]==spv::ExecutionModeOutputLinesNV || + i[2]==spv::ExecutionModeOutputPrimitivesNV || i[2]==spv::ExecutionModeOutputTrianglesNV) { + it.setToNop(); + } } if(i.op()==spv::OpExtension) { std::string_view name = reinterpret_cast(&i[1]); @@ -57,12 +52,6 @@ void MeshConverter::exec() { for(auto it = code.begin(), end = code.end(); it!=end; ++it) { auto& i = *it; if((i.op()==spv::OpVariable && i[3]==spv::StorageClassOutput)) { - if(i[2]!=idPrimitiveCountNV && - i[2]!=idPrimitiveIndicesNV) { - VarItm it; - it.type = i[1]; - outVar.insert({i[2],it}); - } it.set(3, spv::StorageClassWorkgroup); } if((i.op()==spv::OpTypePointer && i[2]==spv::StorageClassOutput)) { @@ -70,39 +59,23 @@ void MeshConverter::exec() { } } - // gl_MeshPerVertexNV block - for(auto it = code.begin(), end = code.end(); it!=end; ++it) { - auto& i = *it; - if(i.op()==spv::OpMemberDecorate && i[3]==spv::DecorationBuiltIn && i[4]==spv::BuiltInPosition) { - idMeshPerVertexNV = i[1]; - gl_MeshPerVertexNV.gl_Position = true; - break; - } - } - removeMultiview(code); removeCullClip(code); for(auto it = code.begin(), end = code.end(); it!=end; ++it) { auto& i = *it; - if(i.op()==spv::OpDecorate && i[1]==idMeshPerVertexNV && i[2]==spv::DecorationBlock) { + if(i.op()==spv::OpDecorate && i[2]==spv::DecorationBlock && i[1]==an.idMeshPerVertexNV) { it.setToNop(); continue; } if(i.op()==spv::OpDecorate && i[2]==spv::DecorationLocation) { - outVar[i[1]].location = i[3]; it.setToNop(); continue; } - if(i.op()==spv::OpMemberDecorate && i[3]==spv::DecorationBuiltIn) { - if(i[4]==spv::BuiltInPointSize) { - gl_MeshPerVertexNV.gl_PointSize = true; - } - it.setToNop(); - continue; - } - if(i.op()==spv::OpMemberDecorate && i[3]==spv::DecorationPerViewNV) { - it.setToNop(); + if(i.op()==spv::OpMemberDecorate) { + if(i[3]==spv::DecorationBuiltIn || + i[3]==spv::DecorationPerViewNV) + it.setToNop(); continue; } if(i.op()==spv::OpName) { @@ -143,9 +116,9 @@ void MeshConverter::exec() { for(uint16_t r=ix.length(); r varRemaps; - for(auto& i:outVar) { + for(auto& i:an.varying) { varRemaps[i.first] = vert.fetchAddBound(); } @@ -270,7 +243,7 @@ void MeshConverter::generateVs() { std::unordered_map constants; uint32_t varCount = 0; { - for(auto& i:outVar) { + for(auto& i:an.varying) { // preallocate indexes code.traverseType(i.second.type,[&](const libspirv::MutableBytecode::AccessChain* ids, uint32_t len) { if(!code.isBasicTypeDecl(ids[len-1].type->op())) @@ -290,7 +263,7 @@ void MeshConverter::generateVs() { } std::unordered_map typeRemaps; - for(auto& i:outVar) { + for(auto& i:an.varying) { code.traverseType(i.second.type,[&](const libspirv::MutableBytecode::AccessChain* ids, uint32_t len) { vsTypeRemaps(fn,typeRemaps,ids,len); },libspirv::Bytecode::T_PostOrder); @@ -312,7 +285,7 @@ void MeshConverter::generateVs() { fn.insert(spv::OpVariable, {_ptr_Input_int, gl_VertexIndex, spv::StorageClassInput}); // varyings variables - for(auto& i:outVar) { + for(auto& i:an.varying) { uint32_t tId = typeRemaps[i.second.type]; uint32_t id = varRemaps [i.first]; fn.insert(spv::OpVariable, {tId, id, spv::StorageClassOutput}); @@ -324,8 +297,8 @@ void MeshConverter::generateVs() { const uint32_t rAt = vert.fetchAddBound(); fn.insert(spv::OpLoad, {int_t, rAt, gl_VertexIndex}); - uint32_t seq = 0; - for(auto& i:outVar) { + uint32_t seq = 0; + for(auto& i:an.varying) { uint32_t varId = varRemaps[i.first]; uint32_t type = i.second.type; code.traverseType(type,[&](const libspirv::MutableBytecode::AccessChain* ids, uint32_t len) { @@ -381,10 +354,10 @@ void MeshConverter::generateVs() { //fn.insert(spv::OpMemberName, typeRemaps[idGlPerVertex], 3, "gl_CullDistance"); fn = vert.findSectionEnd(libspirv::Bytecode::S_Annotations); - for(auto& i:outVar) { + for(auto& i:an.varying) { uint32_t varId = varRemaps[i.first]; uint32_t type = i.second.type; - for(auto& v:outVar) { + for(auto& v:an.varying) { if(v.second.type!=type) continue; uint32_t loc = v.second.location; @@ -622,7 +595,7 @@ void MeshConverter::injectCountingPass(const uint32_t idMainFunc) { std::unordered_map constants; uint32_t varCount = 0; { - for(auto& i:outVar) { + for(auto& i:an.varying) { // preallocate indexes code.traverseType(i.second.type,[&](const libspirv::MutableBytecode::AccessChain* ids, uint32_t len) { if(!code.isBasicTypeDecl(ids[len-1].type->op())) @@ -707,7 +680,7 @@ void MeshConverter::injectCountingPass(const uint32_t idMainFunc) { fn.insert(spv::OpControlBarrier, {const2, const2, const264}); // barrier() const uint32_t primCount = code.fetchAddBound(); - fn.insert(spv::OpLoad, {uint_t, primCount, idPrimitiveCountNV}); + fn.insert(spv::OpLoad, {uint_t, primCount, an.idPrimitiveCountNV}); // gl_PrimitiveCountNV <= 0 { @@ -726,7 +699,7 @@ void MeshConverter::injectCountingPass(const uint32_t idMainFunc) { // gl_LocalInvocationID.x != 0 { const uint32_t ptrInvocationIdX = code.fetchAddBound(); - fn.insert(spv::OpAccessChain, {_ptr_Input_uint, ptrInvocationIdX, idLocalInvocationId, const0}); + fn.insert(spv::OpAccessChain, {_ptr_Input_uint, ptrInvocationIdX, an.idLocalInvocationID, const0}); const uint32_t invocationId = code.fetchAddBound(); fn.insert(spv::OpLoad, {uint_t, invocationId, ptrInvocationIdX}); const uint32_t cond1 = code.fetchAddBound(); @@ -752,7 +725,7 @@ void MeshConverter::injectCountingPass(const uint32_t idMainFunc) { fn.insert(spv::OpLoad, {uint_t, rI, varI}); const uint32_t ptrIndicesNV = code.fetchAddBound(); - fn.insert(spv::OpAccessChain, {_ptr_Workgroup_uint, ptrIndicesNV, idPrimitiveIndicesNV, rI}); + fn.insert(spv::OpAccessChain, {_ptr_Workgroup_uint, ptrIndicesNV, an.idPrimitiveIndicesNV, rI}); const uint32_t rInd = code.fetchAddBound(); fn.insert(spv::OpLoad, {uint_t, rInd, ptrIndicesNV}); @@ -776,7 +749,7 @@ void MeshConverter::injectCountingPass(const uint32_t idMainFunc) { fn.insert(spv::OpIMul, {uint_t, maxVar, maxVertex, varSize}); { const uint32_t ptrWorkGroupID = code.fetchAddBound(); - fn.insert(spv::OpAccessChain, {_ptr_Input_uint, ptrWorkGroupID, idWorkGroupID, const1}); + fn.insert(spv::OpAccessChain, {_ptr_Input_uint, ptrWorkGroupID, an.idWorkGroupID, const1}); const uint32_t workIdX = code.fetchAddBound(); fn.insert(spv::OpLoad, {uint_t, workIdX, ptrWorkGroupID}); @@ -824,7 +797,7 @@ void MeshConverter::injectCountingPass(const uint32_t idMainFunc) { fn.insert(spv::OpAccessChain, {_ptr_Uniform_uint, ptrHeap, vEngine2, const0, rDst}); const uint32_t ptrIndicesNV = code.fetchAddBound(); - fn.insert(spv::OpAccessChain, {_ptr_Workgroup_uint, ptrIndicesNV, idPrimitiveIndicesNV, rI}); + fn.insert(spv::OpAccessChain, {_ptr_Workgroup_uint, ptrIndicesNV, an.idPrimitiveIndicesNV, rI}); const uint32_t rInd = code.fetchAddBound(); fn.insert(spv::OpLoad, {uint_t, rInd, ptrIndicesNV}); @@ -846,7 +819,7 @@ void MeshConverter::injectCountingPass(const uint32_t idMainFunc) { libspirv::MutableBytecode b; auto block = b.end(); uint32_t seq = 0; - for(auto& i:outVar) { + for(auto& i:an.varying) { uint32_t type = i.second.type; code.traverseType(type,[&](const libspirv::MutableBytecode::AccessChain* ids, uint32_t len) { if(!code.isBasicTypeDecl(ids[len-1].type->op())) @@ -891,7 +864,7 @@ void MeshConverter::injectCountingPass(const uint32_t idMainFunc) { // Writeout meshlet descriptor { const uint32_t ptrWorkGroupID = code.fetchAddBound(); - fn.insert(spv::OpAccessChain, {_ptr_Input_uint, ptrWorkGroupID, idWorkGroupID, const1}); + fn.insert(spv::OpAccessChain, {_ptr_Input_uint, ptrWorkGroupID, an.idWorkGroupID, const1}); const uint32_t workIdX = code.fetchAddBound(); fn.insert(spv::OpLoad, {uint_t, workIdX, ptrWorkGroupID}); @@ -958,7 +931,7 @@ void MeshConverter::replaceEntryPoint(const uint32_t idMainFunc, const uint32_t } void MeshConverter::removeMultiview(libspirv::MutableBytecode& code) { - if(idMeshPerVertexNV==0) + if(an.idMeshPerVertexNV==0) return; std::unordered_set perView; @@ -974,7 +947,7 @@ void MeshConverter::removeMultiview(libspirv::MutableBytecode& code) { } void MeshConverter::removeCullClip(libspirv::MutableBytecode& code) { - if(idMeshPerVertexNV==0) + if(an.idMeshPerVertexNV==0) return; std::unordered_set perView; @@ -1000,7 +973,7 @@ void MeshConverter::removeFromPerVertex(libspirv::MutableBytecode& code, const std::unordered_set& fields) { for(auto it = code.begin(), end = code.end(); it!=end; ++it) { auto& i = *it; - if((i.op()!=spv::OpMemberDecorate && i.op()!=spv::OpMemberName) || i[1]!=idMeshPerVertexNV) + if((i.op()!=spv::OpMemberDecorate && i.op()!=spv::OpMemberName) || i[1]!=an.idMeshPerVertexNV) continue; if(fields.find(i[2])==fields.end()) continue; @@ -1011,7 +984,7 @@ void MeshConverter::removeFromPerVertex(libspirv::MutableBytecode& code, auto& i = *it; if(i.op()!=spv::OpTypeStruct) continue; - if(idMeshPerVertexNV!=i[1]) + if(an.idMeshPerVertexNV!=i[1]) continue; uint16_t argc = 1; diff --git a/Engine/gapi/spirv/meshconverter.h b/Engine/gapi/spirv/meshconverter.h index ffe93411..0c63c266 100644 --- a/Engine/gapi/spirv/meshconverter.h +++ b/Engine/gapi/spirv/meshconverter.h @@ -6,6 +6,8 @@ #include "libspirv/libspirv.h" +#include "shaderanalyzer.h" + class MeshConverter { public: explicit MeshConverter(libspirv::MutableBytecode& code); @@ -15,11 +17,6 @@ class MeshConverter { void generateVs(); private: - struct VarItm { - uint32_t type = 0; - uint32_t location = -1; - }; - void avoidReservedFixup(); void removeMultiview(libspirv::MutableBytecode& code); void removeCullClip(libspirv::MutableBytecode& code); @@ -35,6 +32,7 @@ class MeshConverter { const libspirv::Bytecode::AccessChain* ids, uint32_t len); libspirv::MutableBytecode& code; + ShaderAnalyzer an; libspirv::MutableBytecode vert; struct gl_MeshPerVertexNV { @@ -43,16 +41,7 @@ class MeshConverter { } gl_MeshPerVertexNV; // meslet builtins - uint32_t idMeshPerVertexNV = 0; uint32_t idGlPerVertex = 0; - uint32_t idPrimitiveCountNV = 0; - uint32_t idPrimind = 0; - uint32_t idPrimitiveIndicesNV = 0; - uint32_t idLocalInvocationId = 0; - uint32_t idGlobalInvocationId = 0; - uint32_t idWorkGroupID = 0; uint32_t std450Import = 0; - - std::unordered_map outVar; }; diff --git a/Engine/gapi/spirv/shaderanalyzer.cpp b/Engine/gapi/spirv/shaderanalyzer.cpp new file mode 100644 index 00000000..370b5186 --- /dev/null +++ b/Engine/gapi/spirv/shaderanalyzer.cpp @@ -0,0 +1,867 @@ +#include "shaderanalyzer.h" + +#include +#include +#include +#include + +std::string ShaderAnalyzer::toStr(ShaderAnalyzer::AccessBits b) { + if(b==AC_All) + return "AC_All"; + std::stringstream s; + if(b & AC_Const) + s << "AC_Const | "; + if(b & AC_Uniform) + s << "AC_Uniform | "; + if(b & AC_Input) + s << "AC_Input | "; + if(b & AC_Local) + s << "AC_Local | "; + if(b & AC_Arg) + s << "AC_Arg | "; + if(b & AC_Global) + s << "AC_Global | "; + if(b & AC_Shared) + s << "AC_Shared | "; + if(b & AC_Output) + s << "AC_Output | "; + if(b & AC_UAV) + s << "AC_UAV | "; + if(b & AC_Cond) + s << "AC_Cond | "; + auto ret = s.str(); + if(ret.size()>3 && ret[ret.size()-2]=='|') { + ret.resize(ret.size()-3); + } + if(ret.empty()) + return "AC_None"; + return ret; + } + +std::string ShaderAnalyzer::toStr(const ShaderAnalyzer::Value& b) { + if(!b.hasVal) + return toStr(b.access); + std::stringstream s; + s << toStr(b.access) << " "; + s << b.value[0]; + return s.str(); + } + +ShaderAnalyzer::AccessBits ShaderAnalyzer::toAccessBits(spv::StorageClass c) { + switch(c) { + case spv::StorageClassUniformConstant: + return AC_Uniform; + case spv::StorageClassInput: + return AC_Input; + case spv::StorageClassUniform: + return AC_Uniform; + case spv::StorageClassOutput: + return AC_Output; + case spv::StorageClassWorkgroup: + return AC_Shared; + case spv::StorageClassCrossWorkgroup: + return AC_UAV; + case spv::StorageClassPrivate: + return AC_Global; + case spv::StorageClassFunction: + return AC_Local; + case spv::StorageClassGeneric: + return AC_All; + case spv::StorageClassPushConstant: + return AC_Uniform; + case spv::StorageClassAtomicCounter: + return AC_All; + case spv::StorageClassImage: + return AC_All; // TODO: distinct RW and read-only + case spv::StorageClassStorageBuffer: + return AC_Uniform; // TODO: distinct RW and read-only + default: + // extensions + return AC_All; + } + } + +bool ShaderAnalyzer::Func::isPureUniform() const { + auto wmsk = AC_Const | AC_Arg | AC_Local | AC_Uniform; + auto rmsk = AC_Const | AC_Arg | AC_Local | AC_Uniform | AC_Input; + return (write & (~wmsk))==0 && (read & (~rmsk))==0; + } + + +ShaderAnalyzer::ShaderAnalyzer(libspirv::MutableBytecode& code) + :code(code) { + } + +void ShaderAnalyzer::analyze() { + for(auto& i:code) { + switch(i.op()) { + case spv::OpTypeVoid: { + idVoid = i[1]; + break; + } + case spv::OpTypeBool: { + break; + } + + case spv::OpTypePointer: { + uint32_t id = i[1]; + uint32_t cls = i[2]; + pointerTypes[id].what = i.op(); + pointerTypes[id].cls = spv::StorageClass(cls); + pointerTypes[id].access = toAccessBits(spv::StorageClass(cls)); + break; + } + + case spv::OpConstantTrue: + case spv::OpConstantFalse:{ + uint32_t id = i[2]; + Reg var = {}; + var.cls = spv::StorageClassUniformConstant; + var.v.access = AC_Const; + var.v.hasVal = true; + var.v.value[0] = (i.op()==spv::OpConstantTrue) ? 1 : 0; + registers[id] = var; + break; + } + case spv::OpConstant: { + uint32_t id = i[2]; + Reg var = {}; + var.cls = spv::StorageClassUniformConstant; + var.v.access = AC_Const; + var.v.hasVal = true; + var.v.value[0] = i[3]; + registers[id] = var; + break; + } + case spv::OpConstantComposite: { + uint32_t id = i[2]; + Reg var = {}; + var.cls = spv::StorageClassUniformConstant; + var.v.access = AC_Const; + var.v.hasVal = true; + for(uint32_t r=3; rsecond; + fn.dbgName = reinterpret_cast(&i[2]); + } + auto pv = variables.find(i[1]); + if(pv!=variables.end()) { + auto& v = pv->second; + v.dbgName = reinterpret_cast(&i[2]); + } + break; + } + case spv::OpEntryPoint: { + std::string_view name = reinterpret_cast(&i[3]); + if(name=="main") { + idMain = i[2]; + } + } + case spv::OpDecorate: { + if(i[2]==spv::DecorationBuiltIn && i[3]==spv::BuiltInLocalInvocationId) { + idLocalInvocationID = i[1]; + } + if(i[2]==spv::DecorationBuiltIn && i[3]==spv::BuiltInGlobalInvocationId) { + idGlobalInvocationID = i[1]; + } + if(i[2]==spv::DecorationBuiltIn && i[3]==spv::BuiltInWorkgroupId) { + idWorkGroupID = i[1]; + } + if(i[2]==spv::DecorationBuiltIn && i[3]==spv::BuiltInPrimitiveIndicesNV) { + idPrimitiveIndicesNV = i[1]; + } + if(i[2]==spv::DecorationBuiltIn && i[3]==spv::BuiltInPrimitiveCountNV) { + idPrimitiveCountNV = i[1]; + } + break; + } + case spv::OpMemberDecorate: { + if(i[3]==spv::DecorationBuiltIn && i[4]==spv::BuiltInPosition) { + idMeshPerVertexNV = i[1]; + } + if(i[3]==spv::DecorationBuiltIn && i[4]==spv::BuiltInPointSize) { + idPointSize = i[1]; + } + break; + } + case spv::OpExecutionMode:{ + if(i[2]==spv::ExecutionModeLocalSize) { + localSizeX = i[3]; + } + break; + } + default: + break; + } + } + + for(auto& i:code) { + switch(i.op()) { + case spv::OpVariable: { + if(i[3]==spv::StorageClassOutput) { + uint32_t id = i[2]; + if(id!=idPrimitiveCountNV && id!=idPrimitiveIndicesNV) { + Varying it; + it.type = i[1]; + varying.insert({id,it}); + } + } + break; + } + default: + break; + } + } + + for(auto& i:code) { + switch(i.op()) { + case spv::OpDecorate: { + if(i[2]==spv::DecorationLocation) { + varying[i[1]].location = i[3]; + } + break; + } + default: + break; + } + } + + for(auto& i:threadMapping) + i = NoThread; + for(auto& i:threadMappingIbo) + i = NoThread; + + auto ep = code.findOpEntryPoint(spv::ExecutionModelMeshNV,"main"); + // reset var + for(uint32_t inv = 0; inv> " << v.dbgName << std::endl; + } + + l.v = v.v; + l.v.access |= p.v.access; + + if(l.isConstOrInput() && v.v.hasVal) { + //std::cout << toStr(p.v); + //std::cout << " >> " << v.dbgName << std::endl; + } + + makeReadAccess(functionCurrent, blockId, l.v.access); + break; + } + case spv::OpStore: { + // FIXME: control flow ? + auto l = registers[i[1]]; // ptr + auto r = registers[i[2]]; // reg + auto id = dereferenceAccessChain(l.v.pointer); + + auto& v = variables[id]; + v.v = r.v; + v.v.access |= l.v.access; + v.v.access |= acExt; + + if(v.cls==spv::StorageClassOutput) + { + /* + std::cout << " " << v.dbgName << "["; + for(size_t t=1; t var; + for(auto it = code.begin(); it!=code.end(); ++it) { + auto& i = *it; + if(i.op()==spv::OpCapability) { + if(i[1]==spv::CapabilityMeshShadingNV) { + gen.insert(spv::OpCapability,{spv::CapabilityShader}); + continue; + } + } + if(i.op()==spv::OpExecutionMode) { + continue; + } + if(i.op()==spv::OpEntryPoint) { + gen.insert(spv::OpEntryPoint, {spv::ExecutionModelVertex, i[2], 0x6E69616D, 0x0, idVertexIndex}); + continue; + } + if(i.op()==spv::OpDecorate && i[2]==spv::DecorationBuiltIn) { + if(i[3]==spv::BuiltInPrimitiveCountNV || + i[3]==spv::BuiltInPrimitiveIndicesNV || + i[3]==spv::BuiltInLocalInvocationId || + i[3]==spv::BuiltInGlobalInvocationId || + i[3]==spv::BuiltInWorkgroupId) { + continue; + } + } + + if(i.op()==spv::OpTypePointer) { + uint32_t cls = i[2]; + if(isVertexFriendly(spv::StorageClass(cls))) { + cls = spv::StorageClassPrivate; + } + gen.insert(i.op(),{i[1],cls,i[3]}); + continue; + } + if(i.op()==spv::OpVariable) { + uint32_t cls = i[3]; + if(isVertexFriendly(spv::StorageClass(cls))) { + cls = spv::StorageClassPrivate; + } + var.insert(i[2]); + gen.insert(i.op(),{i[1],i[2],cls}); + continue; + } + + if(i.op()==spv::OpFunction) { + auto& fn = functions[i[2]]; + var.insert(i[2]); + if(!(fn.isPureUniform() || i[2]==idMain)) { + gen.insert(i.op(),&i[1],i.length()-1); + while((*it).op()!=spv::OpFunctionEnd) { + auto& i = *it; + if(i.op()==spv::OpFunctionParameter) + gen.insert(i.op(),&i[1],i.length()-1); + ++it; + } + uint32_t id = vert.fetchAddBound(); + gen.insert(spv::OpLabel,{id}); + + if(fn.returnType==idVoid) { + gen.insert(spv::OpReturn,{}); + } else { + uint32_t id = vert.fetchAddBound(); + gen.insert(spv::OpUndef,{fn.returnType, id}); + gen.insert(spv::OpReturnValue,{id}); + } + gen.insert(spv::OpFunctionEnd,{}); + continue; + } + } + + if(i.op()==spv::OpFunctionCall) { + // + } + + if(i.op()==spv::OpSelectionMerge) { + auto& c = control[code.toOffset(i)]; + if(c.skipFromVs) { + uint32_t merge = i[1]; + for(;;++it) { + auto& i = *it; + if(i.op()==spv::OpLabel & i[1]==merge) + break; + } + continue; + } + } + + gen.insert(i.op(),&i[1],i.length()-1); + } + + for(auto it = vert.begin(); it!=vert.end(); ++it) { + auto& i = *it; + if(i.op()==spv::OpName) { + if(var.find(i[1])==var.end() || i[1]==idMain) + it.setToNop(); + std::string_view name = reinterpret_cast(&i[2]); + if(name=="gl_LocalInvocationID") + ;//it.setToNop(); + } + } + + // engine-level main + { + auto fn = vert.findSectionEnd(libspirv::Bytecode::S_Types); + const uint32_t void_t = vert.OpTypeVoid(fn); + const uint32_t func_void = vert.OpTypeFunction(fn, void_t); + const uint32_t uint_t = vert.OpTypeInt(fn, 32, false); + const uint32_t int_t = vert.OpTypeInt(fn, 32, true); + const uint32_t _ptr_Input_int = vert.OpTypePointer(fn,spv::StorageClassInput, int_t); + const uint32_t _ptr_Private_uint = vert.OpTypePointer(fn,spv::StorageClassPrivate, uint_t); + + const uint32_t const0 = vert.OpConstant(fn,int_t,0); + const uint32_t const1 = vert.OpConstant(fn,int_t,1); + const uint32_t const2 = vert.OpConstant(fn,int_t,2); + const uint32_t const1u = vert.OpConstant(fn,uint_t,1); + const uint32_t const8u = vert.OpConstant(fn,uint_t,8); + const uint32_t const255u = vert.OpConstant(fn,uint_t,255); + const uint32_t mappings = mappingTable(fn,uint_t); + + // input + fn.insert(spv::OpVariable, {_ptr_Input_int, idVertexIndex, spv::StorageClassInput}); + + // annotations + fn = vert.findSectionEnd(libspirv::Bytecode::S_Annotations); + fn.insert(spv::OpDecorate, {idVertexIndex, spv::DecorationBuiltIn, spv::BuiltInVertexIndex}); + + fn = vert.end(); + const uint32_t engineMain = vert.fetchAddBound(); + const uint32_t lblMain = vert.fetchAddBound(); + fn.insert(spv::OpFunction, {void_t, engineMain, spv::FunctionControlMaskNone, func_void}); + fn.insert(spv::OpLabel, {lblMain}); + + const uint32_t rAt = vert.fetchAddBound(); + fn.insert(spv::OpLoad, {int_t, rAt, idVertexIndex}); + const uint32_t rIndex = vert.fetchAddBound(); + fn.insert(spv::OpBitcast, {uint_t, rIndex, rAt}); + + if(idLocalInvocationID!=0) { + // gl_PrimitiveIndex = gl_VertexIndex & 0xFF; + // gl_LocalInvocationID.x = tbl[gl_PrimitiveIndex]; + const uint32_t ptrIdX = vert.fetchAddBound(); + const uint32_t mod = vert.fetchAddBound(); + const uint32_t ptrTbl = vert.fetchAddBound(); + const uint32_t tbl = vert.fetchAddBound(); + + fn.insert(spv::OpAccessChain, {_ptr_Private_uint, ptrIdX, idLocalInvocationID, const0}); + fn.insert(spv::OpBitwiseAnd, {uint_t, mod, rIndex, const255u}); + + fn.insert(spv::OpAccessChain, {_ptr_Private_uint, ptrTbl, mappings, mod}); + fn.insert(spv::OpLoad, {int_t, tbl, ptrTbl}); + fn.insert(spv::OpStore, {ptrIdX, tbl}); + + const uint32_t ptrIdY = vert.fetchAddBound(); + fn.insert(spv::OpAccessChain, {_ptr_Private_uint, ptrIdY, idLocalInvocationID, const1}); + fn.insert(spv::OpStore, {ptrIdY, const1u}); + + const uint32_t ptrIdZ = vert.fetchAddBound(); + fn.insert(spv::OpAccessChain, {_ptr_Private_uint, ptrIdZ, idLocalInvocationID, const2}); + fn.insert(spv::OpStore, {ptrIdZ, const1u}); + } + + if(idWorkGroupID!=0) { + // gl_WorkGroupID.x = gl_VertexIndex >> 8; + const uint32_t ptrIdX = vert.fetchAddBound(); + fn.insert(spv::OpAccessChain, {_ptr_Private_uint, ptrIdX, idWorkGroupID, const0}); + const uint32_t mod = vert.fetchAddBound(); + fn.insert(spv::OpShiftRightLogical, {uint_t, mod, rIndex, const8u}); + fn.insert(spv::OpStore, {ptrIdX, mod}); + + const uint32_t ptrIdY = vert.fetchAddBound(); + fn.insert(spv::OpAccessChain, {_ptr_Private_uint, ptrIdY, idWorkGroupID, const1}); + fn.insert(spv::OpStore, {ptrIdY, const1u}); + + const uint32_t ptrIdZ = vert.fetchAddBound(); + fn.insert(spv::OpAccessChain, {_ptr_Private_uint, ptrIdZ, idWorkGroupID, const2}); + fn.insert(spv::OpStore, {ptrIdZ, const1u}); + } + + const uint32_t tmp0 = vert.fetchAddBound(); + fn.insert(spv::OpFunctionCall, {void_t, tmp0, idMain}); + + fn.insert(spv::OpReturn, {}); + fn.insert(spv::OpFunctionEnd, {}); + + auto ep = vert.findOpEntryPoint(spv::ExecutionModelVertex,"main"); + assert(ep!=vert.end()); + ep.set(2,engineMain); + } + + vert.removeNops(); + } diff --git a/Engine/gapi/spirv/shaderanalyzer.h b/Engine/gapi/spirv/shaderanalyzer.h new file mode 100644 index 00000000..7aada803 --- /dev/null +++ b/Engine/gapi/spirv/shaderanalyzer.h @@ -0,0 +1,169 @@ +#pragma once + +#include "libspirv/libspirv.h" + +class ShaderAnalyzer { + public: + explicit ShaderAnalyzer(libspirv::MutableBytecode& code); + libspirv::MutableBytecode& vertexPassthrough() { return vert; } + + void analyze(); + void generateVs(); + + struct Varying { + uint32_t type = 0; + uint32_t location = -1; + }; + + uint32_t idVoid = 0; + + uint32_t idLocalInvocationID = 0; + uint32_t idGlobalInvocationID = 0; + uint32_t idWorkGroupID = 0; + + uint32_t idPrimitiveIndicesNV = 0; + uint32_t idPrimitiveCountNV = 0; + uint32_t idMeshPerVertexNV = 0; + uint32_t idPerVertex = 0; + uint32_t idPointSize = 0; + uint32_t idMain = 0; + + std::unordered_map varying; + + private: + enum AccessBits : uint16_t { + AC_None = 0x0, + AC_Const = 0x1, + AC_Uniform = 0x2, + AC_Input = 0x4, + AC_Local = 0x8, + AC_Arg = 0x10, + AC_Global = 0x20, + AC_Shared = 0x40, + AC_Output = 0x80, + AC_UAV = 0x100, + AC_Cond = 0x200, + AC_All = 0x1FF, + }; + + enum { + MaxThreads = 256, + NoThread = MaxThreads+1, + }; + + friend AccessBits operator | (const AccessBits a, const AccessBits b) { + return AccessBits(uint16_t(a) | uint16_t(b)); + } + + friend void operator |= (AccessBits& a, const AccessBits b) { + a = AccessBits(uint16_t(a) | uint16_t(b)); + } + + struct AccessChain { + AccessChain() = default; + explicit AccessChain(uint32_t ptr):chain({ptr}){} + + std::vector chain; + }; + + struct Value { + AccessBits access = AccessBits::AC_All; + AccessChain pointer; + + uint32_t value[4] = {}; + bool hasVal = false; + + friend Value operator | (const Value& a, const Value& b) { + Value ret; + ret.access = (a.access | b.access); + return ret; + } + }; + + struct Reg { + spv::StorageClass cls = spv::StorageClassGeneric; + Value v; + + bool isConstOrInput() const { + auto msk = (AC_Const | AC_Input); + return (v.access & (~msk))==AC_None; + } + bool isUniform() const { + auto msk = (AC_Const | AC_Uniform | AC_Local | AC_Global); + return (v.access & (~msk))==AC_None; + } + bool isUniformOrInput() const { + auto msk = (AC_Const | AC_Uniform | AC_Local | AC_Global | AC_Input); + return (v.access & (~msk))==AC_None; + } + bool isGlobalShared() const { + auto msk = (AC_Const | AC_Uniform | AC_Local | AC_Global); + return (v.access & (~msk))==AC_None; + } + }; + + struct Var { + spv::StorageClass cls = spv::StorageClassGeneric; + Value v; + const char* dbgName = nullptr; + }; + + struct Type { + spv::Op what = spv::OpNop; + spv::StorageClass cls = spv::StorageClassGeneric; + AccessBits access = AccessBits::AC_None; + }; + + struct Func { + uint32_t codeOffset = 0; + bool analyzed = false; + AccessBits read = AC_None; + AccessBits write = AC_None; + bool hasIll = false; + bool barrier = false; + const char* dbgName = nullptr; + uint32_t returnType = 0; + + bool isPureUniform() const; + }; + + struct CFG { + bool skipFromVs = false; + }; + + static AccessBits toAccessBits(spv::StorageClass c); + static std::string toStr(AccessBits b); + static std::string toStr(const Value& b); + + void analyzeFunc (const uint32_t functionCurrent, const libspirv::Bytecode::OpCode& calee); + void analyzeBlock(const uint32_t functionCurrent, const libspirv::Bytecode::OpCode& calee, + libspirv::Bytecode::Iterator& it, + AccessBits acExt, const uint32_t blockId, uint32_t mergeLbl); + void analyzeInstr(const uint32_t functionCurrent, const libspirv::Bytecode::OpCode& op, + AccessBits acExt, const uint32_t blockId); + uint32_t mappingTable(libspirv::MutableBytecode::Iterator& typesEnd, uint32_t eltType); + + uint32_t dereferenceAccessChain(const AccessChain& id); + void markOutputsAsThreadRelated(uint32_t elt, uint32_t thread); + void markIndexAsThreadRelated(uint32_t elt, uint32_t thread); + + void makeReadAccess (const uint32_t functionCurrent, const uint32_t blockId, AccessBits ac); + void makeWriteAccess(const uint32_t functionCurrent, const uint32_t blockId, AccessBits ac); + + static bool isVertexFriendly(spv::StorageClass cls); + + libspirv::MutableBytecode& code; + libspirv::MutableBytecode vert; + + std::unordered_map registers; + std::unordered_map pointerTypes; + std::unordered_map variables; + std::unordered_map functions; + std::unordered_map control; + + uint32_t localSizeX = 0; + uint32_t currentThread = 0; + uint32_t threadMapping[MaxThreads] = {}; + uint32_t threadMappingIbo[MaxThreads] = {}; +}; + diff --git a/Engine/libspirv/libspirv.cpp b/Engine/libspirv/libspirv.cpp index 8b0036d3..dd7fc12f 100644 --- a/Engine/libspirv/libspirv.cpp +++ b/Engine/libspirv/libspirv.cpp @@ -27,6 +27,10 @@ spv::ExecutionModel Bytecode::findExecutionModel() const { return spv::ExecutionModelMax; } +size_t Bytecode::toOffset(const OpCode& op) const { + return std::distance(spirv, &op); + } + bool Bytecode::isTypeDecl(spv::Op op) { switch(op) { case spv::OpTypeVoid: @@ -353,6 +357,10 @@ void MutableBytecode::removeNops() { invalidateSpvPointers(); } +uint32_t MutableBytecode::bound() const { + return reinterpret_cast(code[3]); + } + uint32_t MutableBytecode::fetchAddBound() { uint32_t& v = reinterpret_cast(code[3]); auto ret = v; @@ -360,6 +368,13 @@ uint32_t MutableBytecode::fetchAddBound() { return ret; } +uint32_t MutableBytecode::fetchAddBound(uint32_t cnt) { + uint32_t& v = reinterpret_cast(code[3]); + auto ret = v; + v+=cnt; + return ret; + } + void MutableBytecode::traverseType(uint32_t typeId, std::function fn, TraverseMode mode) { TraverseContext ctx = { findSection(libspirv::Bytecode::S_Types), diff --git a/Engine/libspirv/libspirv.h b/Engine/libspirv/libspirv.h index c59fabc1..2815c61e 100644 --- a/Engine/libspirv/libspirv.h +++ b/Engine/libspirv/libspirv.h @@ -87,6 +87,7 @@ class Bytecode { uint32_t spirvVersion() const; spv::ExecutionModel findExecutionModel() const; + size_t toOffset(const OpCode& op) const; static bool isTypeDecl(spv::Op op); static bool isBasicTypeDecl(spv::Op op); @@ -158,13 +159,15 @@ class MutableBytecode : public Bytecode { uint32_t OpTypeStruct (Iterator& typesEnd, const uint32_t* member, const size_t size); uint32_t OpTypeFunction (Iterator& typesEnd, uint32_t idRet); - uint32_t OpConstant (Iterator& typesEnd, uint32_t idType, uint32_t u32); - uint32_t OpConstant (Iterator& typesEnd, uint32_t idType, int32_t i32); + uint32_t OpConstant (Iterator& typesEnd, uint32_t idType, uint32_t u32); + uint32_t OpConstant (Iterator& typesEnd, uint32_t idType, int32_t i32); - uint32_t OpVariable (Iterator& fn, uint32_t idType, spv::StorageClass cls); + uint32_t OpVariable (Iterator& fn, uint32_t idType, spv::StorageClass cls); void removeNops(); + uint32_t bound() const; uint32_t fetchAddBound(); + uint32_t fetchAddBound(uint32_t cnt); void traverseType(uint32_t typeId, std::function fn, TraverseMode mode = TraverseMode::T_PreOrder);