Skip to content

Commit

Permalink
perf: improve glints performance
Browse files Browse the repository at this point in the history
  • Loading branch information
doodlum committed Feb 12, 2025
1 parent 4606ba9 commit 7ec6b7c
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 95 deletions.
157 changes: 74 additions & 83 deletions package/Shaders/Common/Glints/Glints2023.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ namespace Glints
return p * x;
}

float sampleNormalDistribution(float u, float mu, float sigma)
{
//return mu + sigma * (sqrt(-2.0 * log(u.x))* cos(2.0 * pi * u.y));
float x = sigma * 1.414213f * erfinv(2.0 * u - 1.0) + mu;
return x;
}

float3 sampleNormalDistribution(float3 u, float mu, float sigma)
{
//return mu + sigma * (sqrt(-2.0 * log(u.x))* cos(2.0 * pi * u.y));
Expand Down Expand Up @@ -240,7 +247,7 @@ namespace Glints
return result;
}

float SampleGlintGridSimplex(float logDensity, float roughness, float densityRandomization, GlintCachedVars vars, float2 slope, float targetNDF)
float SampleGlintGridSimplex(float noise, float logDensity, float roughness, float densityRandomization, GlintCachedVars vars, float2 slope, float targetNDF)
{
// Get surface space glint simplex grid cell
const float2x2 gridToSkewedGrid = float2x2(1.0, -0.57735027, 0.0, 1.15470054);
Expand All @@ -255,39 +262,45 @@ namespace Glints
int2 glint2 = baseId + int2(1.0 - s, s);
float3 barycentrics = float3(-temp.z * s2, s - temp.y * s2, s - temp.x * s2);

// Generate per surface cell random numbers
float3 rand0 = Random::pcg3d(uint3(glint0 + 2147483648, vars.gridSeed)) / 4294967296.0; // TODO : optimize away manual seeds
float3 rand1 = Random::pcg3d(uint3(glint1 + 2147483648, vars.gridSeed)) / 4294967296.0;
float3 rand2 = Random::pcg3d(uint3(glint2 + 2147483648, vars.gridSeed)) / 4294967296.0;
// Generate per surface cell random number to pick sample
int selectedSample = 0;

// Get per surface cell per slope cell random numbers
float4 rand0SlopesB, rand1SlopesB, rand2SlopesB, rand0SlopesG, rand1SlopesG, rand2SlopesG;
float2 slopeLerp0, slopeLerp1, slopeLerp2;
CustomRand4Texture(roughness, slope, rand0.yz, rand0SlopesB, rand0SlopesG, slopeLerp0);
CustomRand4Texture(roughness, slope, rand1.yz, rand1SlopesB, rand1SlopesG, slopeLerp1);
CustomRand4Texture(roughness, slope, rand2.yz, rand2SlopesB, rand2SlopesG, slopeLerp2);
{
float2 accumWeights = barycentrics;
accumWeights.y += accumWeights.x;

if (noise < accumWeights.x)
selectedSample = 0;
else if (noise < accumWeights.y)
selectedSample = 1;
else
selectedSample = 2;
}

int2 selectedGlint = (selectedSample == 0) ? glint0 : (selectedSample == 1) ? glint1 : glint2;
float3 randSelected = Random::pcg3d(uint3(selectedGlint + 2147483648, vars.gridSeed)) / 4294967296.0;

// Get per surface cell per slope cell random numbers
float4 randSlopesB, randSlopesG;
float2 slopeLerp;
CustomRand4Texture(roughness, slope, randSelected.yz, randSlopesB, randSlopesG, slopeLerp);

// Compute microfacet count with randomization
float3 logDensityRand = clamp(sampleNormalDistribution(float3(rand0.x, rand1.x, rand2.x), logDensity.r, densityRandomization), 0.0, 50.0); // TODO : optimize sampleNormalDist
float3 microfacetCount = max(1e-8, vars.footprintArea.rrr * exp(logDensityRand));
float3 microfacetCountBlended = microfacetCount * vars.gridWeight;
float logDensityRand = clamp(sampleNormalDistribution(float(randSelected.x), logDensity.r, densityRandomization), 0.0, 50.0);
float microfacetCount = max(1e-8, vars.footprintArea.r * exp(logDensityRand));
float microfacetCountBlended = microfacetCount * vars.gridWeight;

// Compute binomial properties
float hitProba = roughness * targetNDF; // probability of hitting desired half vector in NDF distribution
float3 footprintOneHitProba = (1.0 - pow(abs(1.0 - hitProba.rrr), microfacetCountBlended)); // probability of hitting at least one microfacet in footprint
float3 footprintMean = (microfacetCountBlended - 1.0) * hitProba.rrr; // Expected value of number of hits in the footprint given already one hit
float3 footprintSTD = sqrt((microfacetCountBlended - 1.0) * hitProba.rrr * (1.0 - hitProba.rrr)); // Standard deviation of number of hits in the footprint given already one hit
float3 binomialSmoothWidth = 0.1 * clamp(footprintOneHitProba * 10, 0.0, 1.0) * clamp((1.0 - footprintOneHitProba) * 10, 0.0, 1.0);
float hitProba = roughness * targetNDF;
float footprintOneHitProba = (1.0 - pow(abs(1.0 - hitProba), microfacetCountBlended));
float footprintMean = (microfacetCountBlended - 1.0) * hitProba;
float footprintSTD = sqrt((microfacetCountBlended - 1.0) * hitProba * (1.0 - hitProba));
float binomialSmoothWidth = 0.1 * clamp(footprintOneHitProba * 10, 0.0, 1.0) * clamp((1.0 - footprintOneHitProba) * 10, 0.0, 1.0);

// Generate numbers of reflecting microfacets
float result0, result1, result2;
result0 = GenerateAngularBinomialValueForSurfaceCell(rand0SlopesB, rand0SlopesG, slopeLerp0, footprintOneHitProba.x, binomialSmoothWidth.x, footprintMean.x, footprintSTD.x, microfacetCountBlended.x);
result1 = GenerateAngularBinomialValueForSurfaceCell(rand1SlopesB, rand1SlopesG, slopeLerp1, footprintOneHitProba.y, binomialSmoothWidth.y, footprintMean.y, footprintSTD.y, microfacetCountBlended.y);
result2 = GenerateAngularBinomialValueForSurfaceCell(rand2SlopesB, rand2SlopesG, slopeLerp2, footprintOneHitProba.z, binomialSmoothWidth.z, footprintMean.z, footprintSTD.z, microfacetCountBlended.z);

// Interpolate result for glint grid cell
float3 results = float3(result0, result1, result2) / microfacetCount.xyz;
float result = dot(results, barycentrics);
float result = GenerateAngularBinomialValueForSurfaceCell(randSlopesB, randSlopesG, slopeLerp, footprintOneHitProba, binomialSmoothWidth, footprintMean, footprintSTD, microfacetCountBlended);
result /= microfacetCount;

return result;
}

Expand Down Expand Up @@ -424,7 +437,7 @@ namespace Glints
return;
}

void PrecomputeGlints(float rnd, float2 uv, float2 duvdx, float2 duvdy, float screenSpaceScale, out GlintCachedVars vars[4])
void PrecomputeGlints(float rnd, float2 uv, float2 duvdx, float2 duvdy, float screenSpaceScale, out GlintCachedVars vars)
{
// ACCURATE PIXEL FOOTPRINT ELLIPSE
float2 ellipseMajor, ellipseMinor;
Expand Down Expand Up @@ -480,75 +493,53 @@ namespace Glints
tetraB.x *= 2;
tetraC.x *= 2;
tetraD.x *= 2;
if (centerSpecialCase == true) // Account for center singularity (if center vertex => no rotation)
if (centerSpecialCase)
{
tetraA.x = (tetraA.y == 0) ? 3 : tetraA.x;
tetraB.x = (tetraB.y == 0) ? 3 : tetraB.x;
tetraC.x = (tetraC.y == 0) ? 3 : tetraC.x;
tetraD.x = (tetraD.y == 0) ? 3 : tetraD.x;
}
float2 uvRotA = RotateUV(uv, thetaBins[tetraA.x], 0.0.rr);
float2 uvRotB = RotateUV(uv, thetaBins[tetraB.x], 0.0.rr);
float2 uvRotC = RotateUV(uv, thetaBins[tetraC.x], 0.0.rr);
float2 uvRotD = RotateUV(uv, thetaBins[tetraD.x], 0.0.rr);

// SAMPLE GLINT GRIDS
uint gridSeedA = HashWithoutSine13(float3(log2(divLods[tetraA.z]), fmod(thetaBins[tetraA.x], Math::TAU), ratios[tetraA.y])) * 4294967296.0;
uint gridSeedB = HashWithoutSine13(float3(log2(divLods[tetraB.z]), fmod(thetaBins[tetraB.x], Math::TAU), ratios[tetraB.y])) * 4294967296.0;
uint gridSeedC = HashWithoutSine13(float3(log2(divLods[tetraC.z]), fmod(thetaBins[tetraC.x], Math::TAU), ratios[tetraC.y])) * 4294967296.0;
uint gridSeedD = HashWithoutSine13(float3(log2(divLods[tetraD.z]), fmod(thetaBins[tetraD.x], Math::TAU), ratios[tetraD.y])) * 4294967296.0;

vars[0].uv = uvRotA / divLods[tetraA.z] / float2(1.0, ratios[tetraA.y]);
vars[0].gridSeed = gridSeedA;
vars[0].footprintArea = ratios[tetraA.y] * footprintAreas[tetraA.z];
vars[0].gridWeight = tetraBarycentricWeights.x;
vars[1].uv = uvRotB / divLods[tetraB.z] / float2(1.0, ratios[tetraB.y]);
vars[1].gridSeed = gridSeedB;
vars[1].footprintArea = ratios[tetraB.y] * footprintAreas[tetraB.z];
vars[1].gridWeight = tetraBarycentricWeights.y;
vars[2].uv = uvRotC / divLods[tetraC.z] / float2(1.0, ratios[tetraC.y]);
vars[2].gridSeed = gridSeedC;
vars[2].footprintArea = ratios[tetraC.y] * footprintAreas[tetraC.z];
vars[2].gridWeight = tetraBarycentricWeights.z;
vars[3].uv = uvRotD / divLods[tetraD.z] / float2(1.0, ratios[tetraD.y]);
vars[3].gridSeed = gridSeedD;
vars[3].footprintArea = ratios[tetraD.y] * footprintAreas[tetraD.z];
vars[3].gridWeight = tetraBarycentricWeights.w;

[branch] if (SharedData::FrameCount != 0) // has TAA
{
// importance sampling as if linear interp (not ideal but good enough)
float3 accumWeights = tetraBarycentricWeights.xyz;
accumWeights.y += accumWeights.x;
accumWeights.z += accumWeights.y;
if (rnd > accumWeights.x && rnd < accumWeights.y)
vars[0] = vars[1];
else if (rnd < accumWeights.z)
vars[0] = vars[2];
else
vars[0] = vars[3];
}
}

float4 SampleGlints2023NDF(float logDensity, float roughness, float densityRandomization, GlintCachedVars vars[4], float3 H, float targetNDF, float maxNDF)
{
float2 slope = H.xy; // Orthogrtaphic slope projected grid
float rescaledTargetNDF = targetNDF / maxNDF;
float3 accumWeights = normalize(tetraBarycentricWeights);
accumWeights.y += accumWeights.x;
accumWeights.z += accumWeights.y;

float sampleContribution;
[branch] if (SharedData::FrameCount == 0) // no TAA
if (rnd < accumWeights.x)
{
float sampleA = SampleGlintGridSimplex(logDensity, roughness, densityRandomization, vars[0], slope, rescaledTargetNDF);
float sampleB = SampleGlintGridSimplex(logDensity, roughness, densityRandomization, vars[1], slope, rescaledTargetNDF);
float sampleC = SampleGlintGridSimplex(logDensity, roughness, densityRandomization, vars[2], slope, rescaledTargetNDF);
float sampleD = SampleGlintGridSimplex(logDensity, roughness, densityRandomization, vars[3], slope, rescaledTargetNDF);
sampleContribution = sampleA + sampleB + sampleC + sampleD;
vars.uv = RotateUV(uv, thetaBins[tetraA.x], 0.0.rr) / divLods[tetraA.z] / float2(1.0, ratios[tetraA.y]);
vars.gridSeed = HashWithoutSine13(float3(log2(divLods[tetraA.z]), fmod(thetaBins[tetraA.x], Math::TAU), ratios[tetraA.y])) * 4294967296.0;
vars.footprintArea = ratios[tetraA.y] * footprintAreas[tetraA.z];
vars.gridWeight = tetraBarycentricWeights.x;
}
else if (rnd < accumWeights.y)
{
vars.uv = RotateUV(uv, thetaBins[tetraB.x], 0.0.rr) / divLods[tetraB.z] / float2(1.0, ratios[tetraB.y]);
vars.gridSeed = HashWithoutSine13(float3(log2(divLods[tetraB.z]), fmod(thetaBins[tetraB.x], Math::TAU), ratios[tetraB.y])) * 4294967296.0;
vars.footprintArea = ratios[tetraB.y] * footprintAreas[tetraB.z];
vars.gridWeight = tetraBarycentricWeights.y;
}
else if (rnd < accumWeights.z)
{
vars.uv = RotateUV(uv, thetaBins[tetraC.x], 0.0.rr) / divLods[tetraC.z] / float2(1.0, ratios[tetraC.y]);
vars.gridSeed = HashWithoutSine13(float3(log2(divLods[tetraC.z]), fmod(thetaBins[tetraC.x], Math::TAU), ratios[tetraC.y])) * 4294967296.0;
vars.footprintArea = ratios[tetraC.y] * footprintAreas[tetraC.z];
vars.gridWeight = tetraBarycentricWeights.z;
}
else
{
sampleContribution = SampleGlintGridSimplex(logDensity, roughness, densityRandomization, vars[0], slope, rescaledTargetNDF) / vars[0].gridWeight;
vars.uv = RotateUV(uv, thetaBins[tetraD.x], 0.0.rr) / divLods[tetraD.z] / float2(1.0, ratios[tetraD.y]);
vars.gridSeed = HashWithoutSine13(float3(log2(divLods[tetraD.z]), fmod(thetaBins[tetraD.x], Math::TAU), ratios[tetraD.y])) * 4294967296.0;
vars.footprintArea = ratios[tetraD.y] * footprintAreas[tetraD.z];
vars.gridWeight = tetraBarycentricWeights.w;
}
}

float4 SampleGlints2023NDF(float noise, float logDensity, float roughness, float densityRandomization, GlintCachedVars vars, float3 H, float targetNDF, float maxNDF)
{
float2 slope = H.xy; // Orthographic slope projected grid
float rescaledTargetNDF = targetNDF / maxNDF;
float sampleContribution = SampleGlintGridSimplex(noise, logDensity, roughness, densityRandomization, vars, slope, rescaledTargetNDF) / vars.gridWeight;
return min(sampleContribution * (1.0 / roughness), 20) * maxNDF; // somewhat brute force way of prevent glazing angle extremities}
}
}
21 changes: 10 additions & 11 deletions package/Shaders/Common/PBR.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ namespace PBR
float GlintLogMicrofacetDensity;
float GlintMicrofacetRoughness;
float GlintDensityRandomization;
Glints::GlintCachedVars GlintCache[4];
Glints::GlintCachedVars GlintCache;
float Noise;
};

SurfaceProperties InitSurfaceProperties()
Expand Down Expand Up @@ -103,12 +104,10 @@ namespace PBR
surfaceProperties.GlintDensityRandomization = 2.0;

#ifdef GLINT
surfaceProperties.GlintCache[0].uv = 0;
surfaceProperties.GlintCache[0].gridSeed = 0;
surfaceProperties.GlintCache[0].footprintArea = surfaceProperties.GlintCache[0].gridWeight = 0;
surfaceProperties.GlintCache[1] = surfaceProperties.GlintCache[2] = surfaceProperties.GlintCache[3] = surfaceProperties.GlintCache[0];
#else
surfaceProperties.GlintCache[0] = surfaceProperties.GlintCache[1] = surfaceProperties.GlintCache[2] = surfaceProperties.GlintCache[3] = 0;
surfaceProperties.GlintCache.uv = 0;
surfaceProperties.GlintCache.gridSeed = 0;
surfaceProperties.GlintCache.footprintArea = surfaceProperties.GlintCache.gridWeight = 0;
surfaceProperties.Noise = 0;
#endif

return surfaceProperties;
Expand Down Expand Up @@ -190,15 +189,15 @@ namespace PBR
}

#if defined(GLINT)
float3 GetSpecularDirectLightMultiplierMicrofacetWithGlint(float roughness, float3 specularColor, float NdotL, float NdotV, float NdotH, float VdotH, float glintH,
float logDensity, float microfacetRoughness, float densityRandomization, Glints::GlintCachedVars glintCache[4],
float3 GetSpecularDirectLightMultiplierMicrofacetWithGlint(float noise, float roughness, float3 specularColor, float NdotL, float NdotV, float NdotH, float VdotH, float glintH,
float logDensity, float microfacetRoughness, float densityRandomization, Glints::GlintCachedVars glintCache,
out float3 F)
{
float D = GetNormalDistributionFunctionGGX(roughness, NdotH);
[branch] if (logDensity > 1.1)
{
float D_max = GetNormalDistributionFunctionGGX(roughness, 1);
D = Glints::SampleGlints2023NDF(logDensity, microfacetRoughness, densityRandomization, glintCache, glintH, D, D_max);
D = Glints::SampleGlints2023NDF(noise, logDensity, microfacetRoughness, densityRandomization, glintCache, glintH, D, D_max);
}
float G = GetVisibilityFunctionSmithJointApprox(roughness, NdotV, NdotL);
F = GetFresnelFactorSchlick(specularColor, VdotH);
Expand Down Expand Up @@ -440,7 +439,7 @@ namespace PBR

float3 F;
#if defined(GLINT)
specular += GetSpecularDirectLightMultiplierMicrofacetWithGlint(surfaceProperties.Roughness, surfaceProperties.F0, satNdotL, satNdotV, satNdotH, satVdotH, mul(tbnTr, H),
specular += GetSpecularDirectLightMultiplierMicrofacetWithGlint(surfaceProperties.Noise, surfaceProperties.Roughness, surfaceProperties.F0, satNdotL, satNdotV, satNdotH, satVdotH, mul(tbnTr, H),
surfaceProperties.GlintLogMicrofacetDensity, surfaceProperties.GlintMicrofacetRoughness, surfaceProperties.GlintDensityRandomization, surfaceProperties.GlintCache, F) *
lightProperties.LightColor * satNdotL;
#else
Expand Down
4 changes: 3 additions & 1 deletion package/Shaders/Lighting.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,8 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace
# if defined(TRUE_PBR)
PBR::SurfaceProperties pbrSurfaceProperties = PBR::InitSurfaceProperties();

pbrSurfaceProperties.Noise = screenNoise;

pbrSurfaceProperties.Roughness = saturate(rawRMAOS.x);
pbrSurfaceProperties.Metallic = saturate(rawRMAOS.y);
pbrSurfaceProperties.AO = rawRMAOS.z;
Expand All @@ -1691,7 +1693,7 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace
pbrSurfaceProperties.GlintDensityRandomization = clamp(glintParameters.w, 0, 5);

# if defined(GLINT)
float glintNoise = Random::R1Modified(SharedData::FrameCountAlwaysActive, Random::pcg2d(uint2(input.Position.xy)) / 4294967296.0);
float glintNoise = Random::R1Modified(SharedData::FrameCount, Random::pcg2d(uint2(input.Position.xy)) / 4294967296.0);
PBR::Glints::PrecomputeGlints(glintNoise, uvOriginal, ddx(uvOriginal), ddy(uvOriginal), pbrSurfaceProperties.GlintScreenSpaceScale, pbrSurfaceProperties.GlintCache);
# endif

Expand Down

0 comments on commit 7ec6b7c

Please sign in to comment.