From 1ee72c8144af5e9df1cc6cebf9008164228f8fee Mon Sep 17 00:00:00 2001 From: Nate Morrical Date: Tue, 31 Dec 2024 01:04:04 -0800 Subject: [PATCH] fixing bugs with solid types regarding stride and offsets --- gprt/gprt.cpp | 32 ++++--- gprt/gprt_builtins.slang | 4 +- gprt/gprt_fallbacks.h | 19 ++-- gprt/gprt_fallbacks.slang | 96 +++++++++++++------ .../s2-hitPrograms/s2-4-solids/hostCode.cpp | 2 +- 5 files changed, 97 insertions(+), 56 deletions(-) diff --git a/gprt/gprt.cpp b/gprt/gprt.cpp index 06940a1..09fde92 100644 --- a/gprt/gprt.cpp +++ b/gprt/gprt.cpp @@ -105,7 +105,7 @@ static struct RequestedFeatures { // Setting to 4, to allow for RSTW attributes uint32_t maxRayHitAttributeSize = 4; - uint32_t internalAdditionalSize = 32; + uint32_t internalAdditionalSize = 128; uint32_t recordSize = 256; uint32_t maxDescriptorCount = 256; @@ -3977,18 +3977,22 @@ struct SolidAccel : public Accel { } // Now populate the AABB buffer using the fallback bounds kernel - auto SolidBounds = (GPRTComputeOf) context->internalComputePrograms["SolidBounds"]; + auto SolidBounds = (GPRTComputeOf) context->internalComputePrograms["SolidBounds"]; for (uint32_t gid = 0; gid < geometries.size(); ++gid) { auto &geom = accelerationStructureGeometries[gid]; SolidGeom *solidGeom = (SolidGeom *) geometries[gid]; - SolidBoundsParameters params; + SolidParameters params; params.aabbs = gprtBufferGetDevicePointer(AABBs); params.vertices = (float4 *) solidGeom->vertex.buffers[0]->getDeviceAddress(); params.indices = (uint4 *) solidGeom->index.buffer->getDeviceAddress(); + params.types = (uint8_t *) solidGeom->types.buffer->getDeviceAddress(); + params.verticesOffset = solidGeom->vertex.offset; + params.verticesStride = solidGeom->vertex.stride; params.indicesOffset = solidGeom->index.offset; params.indicesStride = solidGeom->index.stride; - params.types = (uint8_t *) solidGeom->types.buffer->getDeviceAddress(); + params.typesOffset = solidGeom->types.offset; + params.typesStride = solidGeom->types.stride; params.offset = AABBOffsets[gid]; params.count = AABBOffsets[gid + 1] - params.offset; gprtComputeLaunch(SolidBounds, uint3(((params.count + 255) / 256), 1, 1), uint3(256, 1, 1), params); @@ -4670,14 +4674,18 @@ Context::buildSBT(GPRTBuildSBTFlags flags) { } if (geom->geomType->getKind() == GPRT_SOLIDS) { - SolidGeom *s = (SolidGeom *) geom; - SolidParameters isectParams; - isectParams.vertices = (float4 *) s->vertex.buffers[0]->getDeviceAddress(); - isectParams.indices = (uint4*) s->index.buffer->getDeviceAddress(); - isectParams.types = (uint8_t*) s->types.buffer->getDeviceAddress(); - isectParams.typeOffet = 0; - isectParams.typeStride = 1; - memcpy(internalParams, &isectParams, sizeof(SolidParameters)); + SolidGeom *solidGeom = (SolidGeom *) geom; + SolidParameters params; + params.vertices = (float4 *) solidGeom->vertex.buffers[0]->getDeviceAddress(); + params.indices = (uint4 *) solidGeom->index.buffer->getDeviceAddress(); + params.types = (uint8_t *) solidGeom->types.buffer->getDeviceAddress(); + params.verticesOffset = solidGeom->vertex.offset; + params.verticesStride = solidGeom->vertex.stride; + params.indicesOffset = solidGeom->index.offset; + params.indicesStride = solidGeom->index.stride; + params.typesOffset = solidGeom->types.offset; + params.typesStride = solidGeom->types.stride; + memcpy(internalParams, ¶ms, sizeof(SolidParameters)); } } } diff --git a/gprt/gprt_builtins.slang b/gprt/gprt_builtins.slang index 589e1f7..7760ab2 100644 --- a/gprt/gprt_builtins.slang +++ b/gprt/gprt_builtins.slang @@ -7,7 +7,8 @@ struct SolidAccelerationStructure { const void* address; // Conversion constructor - SolidAccelerationStructure(const void* addr) : address(addr) {} + SolidAccelerationStructure(const void *addr) : address(addr) {} + SolidAccelerationStructure() : address(nullptr) {} }; /// Opaque type representing an acceleration structure containing surfaces which can be intersected by rays. @@ -16,6 +17,7 @@ struct SurfaceAccelerationStructure { // Conversion constructor SurfaceAccelerationStructure(const void *addr) : address(addr) {} + SurfaceAccelerationStructure() : address(nullptr) {} }; #endif diff --git a/gprt/gprt_fallbacks.h b/gprt/gprt_fallbacks.h index 33d0df9..887a520 100644 --- a/gprt/gprt_fallbacks.h +++ b/gprt/gprt_fallbacks.h @@ -32,22 +32,17 @@ struct SphereParameters { uint32_t exitTest; // false: return entry hits, true: return exit hits }; -struct SolidBoundsParameters { +struct SolidParameters { float4 *vertices; uint4 *indices; - uint32_t indicesOffset; - uint32_t indicesStride; uint8_t *types; float4 *aabbs; uint32_t offset; uint32_t count; -}; - -// 20 bytes... -struct SolidParameters { - float4 *vertices; - uint4 *indices; - uint8_t *types; - uint32_t typeOffet; - uint32_t typeStride; + uint32_t typesOffset; + uint32_t typesStride; + uint32_t indicesOffset; + uint32_t indicesStride; + uint32_t verticesOffset; + uint32_t verticesStride; }; \ No newline at end of file diff --git a/gprt/gprt_fallbacks.slang b/gprt/gprt_fallbacks.slang index 06d47f9..b547c75 100644 --- a/gprt/gprt_fallbacks.slang +++ b/gprt/gprt_fallbacks.slang @@ -312,7 +312,7 @@ static bool IsoIsContained(int numNodes, float3 rst) { // return true; // } -#define NEWTON_ITERATIONS 2 +#define NEWTON_ITERATIONS 5 #define CONVERGED_ERROR 1e-3 #define DIVERGED_ERROR 1e6 @@ -367,6 +367,7 @@ bool intersectPointSolid(float3 P, // The query point float w[8]; // Iteration for Newton's method + bool converged = false; [unroll] for (int iteration = 0; iteration < NEWTON_ITERATIONS; iteration++) { // Compute support function values for the current position in "rst" space @@ -398,27 +399,27 @@ bool intersectPointSolid(float3 P, // The query point rstw.x -= determinant(float3x3(fcol, scol, tcol)) * dinv; rstw.y -= determinant(float3x3(rcol, fcol, tcol)) * dinv; rstw.z -= determinant(float3x3(rcol, scol, fcol)) * dinv; + if (all(abs(fcol) < CONVERGED_ERROR)) { converged = true; break;} } + if (!converged) return false; // Check for containment of the final cannonical point location - bool contained = IsoIsContained(nodeCount, rstw.xyz); + if (!IsoIsContained(nodeCount, rstw.xyz)) return false; + [unroll] for (int i = 0; i < 8; ++i) rstw.w += W[i] * w[i]; - return contained; + return true; } // Not particularly efficient at the moment, meant to be general and easy to maintain. [shader("compute")] [numthreads(256, 1, 1)] -void SolidBounds(uint3 DispatchThreadID: SV_DispatchThreadID, uniform SolidBoundsParameters record) { +void SolidBounds(uint3 DispatchThreadID: SV_DispatchThreadID, uniform SolidParameters s) { int primID = DispatchThreadID.x; - if (primID >= record.count) + if (primID >= s.count) return; - float4 *vertices = record.vertices; - uint *indices = (uint*)record.indices; - uint8_t *types = record.types; - - uint8_t type = types[primID]; + uint8_t *types = s.types; + uint8_t type = types[s.typesOffset + s.typesStride * primID]; uint32_t numVertices = getVertexCount(type); float3 aabbMin = +float3(FLT_MAX); @@ -428,18 +429,23 @@ void SolidBounds(uint3 DispatchThreadID: SV_DispatchThreadID, uniform SolidBound // TODO: Try to load indices by groups of four. // TODO: Use aligned loads... // TODO: Add some validation and report an error if indices are invalid. + uint8_t *idxstart = ((uint8_t *) s.indices) + (s.indicesOffset + s.indicesStride * primID); + uint *indices = (uint *) idxstart; + float4 *vertices = s.vertices; for (int i = 0; i < numVertices; ++i) { - int index = indices[(record.indicesOffset/4) + (record.indicesStride/4) * primID + i]; - float4 vert = vertices[index]; + int index = indices[i]; + float4 vert = vertices[(s.verticesOffset + s.verticesStride * index) / sizeof(float4)]; aabbMin = min(aabbMin, vert.xyz); aabbMax = max(aabbMax, vert.xyz); densMinMax = min(densMinMax.x, vert.w); densMinMax = max(densMinMax.y, vert.w); } - uint32_t offset = record.offset; - record.aabbs[(offset * 2) + 2 * primID] = float4(aabbMin.xyz, aabbMax.x); - record.aabbs[(offset * 2) + 2 * primID + 1] = float4(aabbMax.yz, densMinMax); + uint32_t offset = s.offset; + s.aabbs[(offset * 2) + 2 * primID] = float4(aabbMin.xyz, aabbMax.x); + s.aabbs[(offset * 2) + 2 * primID + 1] = float4(aabbMax.yz, densMinMax); + + // printf("primID %d bounds %f %f %f %f %f %f\n", primID, aabbMin.x, aabbMin.y, aabbMin.z, aabbMax.x, aabbMax.y, aabbMax.z); } [shader("intersection")] @@ -447,25 +453,55 @@ void SolidIntersection(uniform uint32_t userData[64], uniform SolidParameters s) { uint primID = PrimitiveIndex(); float4 QW[8] = { 0., 0., 0., 0., 0., 0., 0., 0. }; - uint8_t type = s.types[s.typeOffet + s.typeStride * primID]; - + uint8_t type = s.types[s.typesOffset + s.typesStride * primID]; uint32_t numVertices = getVertexCount(type); - uint4 i0 = LoadAligned<16>(s.indices + (primID * 2 + 0)); - QW[0] = LoadAligned<16>(s.vertices + i0.x); - QW[1] = LoadAligned<16>(s.vertices + i0.y); - QW[2] = LoadAligned<16>(s.vertices + i0.z); - QW[3] = LoadAligned<16>(s.vertices + i0.w); - - if (numVertices > 4) { - uint4 i1 = LoadAligned<16>(s.indices + (primID * 2 + 1)); - QW[4] = LoadAligned<16>(s.vertices + i1.x); - if (numVertices > 5) QW[5] = LoadAligned<16>(s.vertices + i1.y); - if (numVertices > 6) QW[6] = LoadAligned<16>(s.vertices + i1.z); - if (numVertices > 7) QW[7] = LoadAligned<16>(s.vertices + i1.w); + // uint8_t* indices = (uint8_t *) s.indices; + // uint8_t *idxstart = indices + (s.indicesOffset + s.indicesStride * primID); + // uint4 *u4Indices = (uint4 *) idxstart; + + uint8_t *idxstart = ((uint8_t *)s.indices) + (s.indicesOffset + s.indicesStride * primID); + uint *indices = (uint *)idxstart; + float4 *vertices = s.vertices; + for (int i = 0; i < numVertices; ++i) { + int index = indices[i]; + float4 vert = vertices[(s.verticesOffset + s.verticesStride * index) / sizeof(float4)]; + QW[i] = vert; + // aabbMin = min(aabbMin, vert.xyz); + // aabbMax = max(aabbMax, vert.xyz); + // densMinMax = min(densMinMax.x, vert.w); + // densMinMax = max(densMinMax.y, vert.w); } - float4 rstw; + + + + // uint4 i0 = LoadAligned<16>(u4Indices + 0); + + // // printf("primID %d indices %d %d %d %d\n", primID, i0.x, i0.y, i0.z, i0.w); + // float4 *vertices = s.vertices; + + // uint4 i0s = (s.verticesOffset + s.verticesStride * i0) / sizeof(float4); + // // uint4 i0s = (0 + 16*i0) / sizeof(float4); + + // QW[0] = LoadAligned<16>(s.vertices + ((s.verticesOffset + s.verticesStride * i0.x)/sizeof(float4)) ); + // QW[1] = LoadAligned<16>(s.vertices + ((s.verticesOffset + s.verticesStride * i0.y)/sizeof(float4)) ); + // QW[2] = LoadAligned<16>(s.vertices + ((s.verticesOffset + s.verticesStride * i0.z)/sizeof(float4)) ); + // QW[3] = LoadAligned<16>(s.vertices + ((s.verticesOffset + s.verticesStride * i0.w)/sizeof(float4)) ); + // // printf("primID %d offset %d stride %d indices %d %d %d %d\n", primID, s.verticesOffset, s.verticesStride, i0s.x, i0s.y, i0s.z, i0s.w); + + // // printf("primID %d v0 %f %f %f %f\n", primID, QW[0].x, QW[0].y, QW[0].z, QW[0].w); + + // if (numVertices > 4) { + // uint4 i1 = LoadAligned<16>(u4Indices + 1); + // uint4 i1s = (s.verticesOffset + s.verticesStride * i1) / sizeof(float4); + // if (numVertices > 4) QW[4] = LoadAligned<16>(s.vertices + ((s.verticesOffset + s.verticesStride * i1.x)/sizeof(float4)) ); + // if (numVertices > 5) QW[5] = LoadAligned<16>(s.vertices + ((s.verticesOffset + s.verticesStride * i1.y)/sizeof(float4)) ); + // if (numVertices > 6) QW[6] = LoadAligned<16>(s.vertices + ((s.verticesOffset + s.verticesStride * i1.z)/sizeof(float4)) ); + // if (numVertices > 7) QW[7] = LoadAligned<16>(s.vertices + ((s.verticesOffset + s.verticesStride * i1.w)/sizeof(float4)) ); + // } + + float4 rstw = float4(0.f); if (intersectPointSolid(ObjectRayOrigin(), // The query point { QW[0].xyz, QW[1].xyz, QW[2].xyz, QW[3].xyz, QW[4].xyz, QW[5].xyz, QW[6].xyz, QW[7].xyz }, { QW[0].w, QW[1].w, QW[2].w, QW[3].w, QW[4].w, QW[5].w, QW[6].w, QW[7].w }, diff --git a/samples/s2-hitPrograms/s2-4-solids/hostCode.cpp b/samples/s2-hitPrograms/s2-4-solids/hostCode.cpp index ce10577..5c50628 100644 --- a/samples/s2-hitPrograms/s2-4-solids/hostCode.cpp +++ b/samples/s2-hitPrograms/s2-4-solids/hostCode.cpp @@ -41,7 +41,7 @@ int main(int ac, char **av) { // Each solid primitive will read up to eight consecutive indices, depending on the type. GPRTBufferOf solidIndices = gprtDeviceBufferCreate(context, indices.size(), indices.data()); - gprtSolidsSetIndices(solidGeom, solidIndices, indices.size(), /*indices stride*/ 2 * sizeof(uint4)); + gprtSolidsSetIndices(solidGeom, solidIndices, indices.size()/2, /*indices stride*/ 2 * sizeof(uint4)); // Solids can vary in their types. If all elements are known to be the same type, one can create a type // array of one element, then set the type stride to 0 when creating the solid geometry.