Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Hineven committed May 21, 2024
1 parent a0a7e61 commit 94303d4
Show file tree
Hide file tree
Showing 10 changed files with 169 additions and 69 deletions.
130 changes: 94 additions & 36 deletions src/core/src/render_techniques/migi/migi.comp
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,16 @@ void SSRC_AllocateUniformProbes (int DispatchID : SV_DispatchThreadID) {
g_RWTileAdaptiveProbeCountTexture[ProbeIndex] = 0;
}

float3 GetUniformScreenProbePositionFromGBuffer (int2 TileCoords, float LinearDepth, bool bPrevious) {
float2 UV = GetUniformScreenProbeScreenUV(TileCoords, bPrevious);
float3 RecoverScreenProbePosition (int2 TileCoords, float LinearDepth, bool bPrevious) {
// We are using the camera coordinate system to recover probe position, which is placed with G-Buffer samples
// We need to take care of the jittering as the camera coordinate system is not affected by TAA (but the G-Buffer is).
float2 UV = GetUniformScreenProbeScreenUV(TileCoords, bPrevious) + (bPrevious ? MI.PreviousTAAJitterUV : MI.TAAJitterUV);
float2 NDC2 = UV2NDC2(UV);
return LinearDepth * (NDC2.x * MI.CameraRight + NDC2.y * MI.CameraUp + MI.CameraDirection) + MI.CameraPosition;
if(!bPrevious) {
return LinearDepth * (NDC2.x * MI.CameraRight + NDC2.y * MI.CameraUp + MI.CameraDirection) + MI.CameraPosition;
} else {
return LinearDepth * (NDC2.x * MI.PreviousCameraRight + NDC2.y * MI.PreviousCameraUp + MI.PreviousCameraDirection) + MI.PreviousCameraPosition;
}
}

void CalculateSSRCSampleWeightsForUniformScreenProbes (
Expand Down Expand Up @@ -214,10 +220,10 @@ void CalculateSSRCSampleWeightsForUniformScreenProbes (
);
float4 DepthWeights;
float4 PixelPlane = float4(Normal, dot(Position, Normal));
float3 PositionX00 = GetUniformScreenProbePositionFromGBuffer(TileCoordsX00 + int2(0, 0), CornerLinearDepths.x, bPrevious);
float3 PositionX10 = GetUniformScreenProbePositionFromGBuffer(TileCoordsX00 + int2(1, 0), CornerLinearDepths.y, bPrevious);
float3 PositionX01 = GetUniformScreenProbePositionFromGBuffer(TileCoordsX00 + int2(0, 1), CornerLinearDepths.z, bPrevious);
float3 PositionX11 = GetUniformScreenProbePositionFromGBuffer(TileCoordsX00 + int2(1, 1), CornerLinearDepths.w, bPrevious);
float3 PositionX00 = RecoverScreenProbePosition(TileCoordsX00 + int2(0, 0), CornerLinearDepths.x, bPrevious);
float3 PositionX10 = RecoverScreenProbePosition(TileCoordsX00 + int2(1, 0), CornerLinearDepths.y, bPrevious);
float3 PositionX01 = RecoverScreenProbePosition(TileCoordsX00 + int2(0, 1), CornerLinearDepths.z, bPrevious);
float3 PositionX11 = RecoverScreenProbePosition(TileCoordsX00 + int2(1, 1), CornerLinearDepths.w, bPrevious);
float4 PlaneDistances;
PlaneDistances.x = abs(dot(PixelPlane, float4(PositionX00, -1.f)));
PlaneDistances.y = abs(dot(PixelPlane, float4(PositionX10, -1.f)));
Expand Down Expand Up @@ -256,9 +262,8 @@ void CalculateSSRCSampleWeights (
Sample.Index[2] = TileCoordsX00 + int2(0, 1);
Sample.Index[3] = TileCoordsX00 + int2(1, 1);

// FIXME
// Weight the adaptive probes and search within the nearest tiles
if(false) {
{
float Epsilon = .01f;
float4 PixelPlane = float4(Normal, dot(Position, Normal));
for (uint CornerIndex = 0; CornerIndex < 4; CornerIndex++)
Expand Down Expand Up @@ -310,6 +315,7 @@ groupshared int LocalAdaptiveProbeOffset;
groupshared int2 LocalProbeScreenPositionsToAllocate[WAVE_SIZE];
[numthreads(WAVE_SIZE, 1, 1)]
void SSRC_AllocateAdaptiveProbes (int DispatchID : SV_DispatchThreadID, int LocalID : SV_GroupThreadID) {
if(MI.NoAdaptiveProbes) return ;
// This macro is passed in by compiler arguments
#ifndef SSRC_ADAPTIVE_PROBE_LAYER
#define SSRC_ADAPTIVE_PROBE_LAYER 0
Expand Down Expand Up @@ -349,13 +355,12 @@ void SSRC_AllocateAdaptiveProbes (int DispatchID : SV_DispatchThreadID, int Loca

float Epsilon = .01f;
Sample.Weights /= max(dot(Sample.Weights, 1), Epsilon);

// g_RWDebugOutput[AdaptiveTileCoords] = float4(Sample.Weights);

float LightingIsValid = (dot(Sample.Weights, 1) < 1.0f - Epsilon) ? 0.0f : 1.0f;

if (!LightingIsValid)
{
// g_RWDebugOutput[AdaptiveTileCoords] = 1.f.xxxx;
int ListIndex;
InterlockedAdd(LocalNumProbesToAllocate, 1, ListIndex);
LocalProbeScreenPositionsToAllocate[ListIndex] = AdaptiveProbeScreenPosition;
Expand All @@ -378,7 +383,7 @@ void SSRC_AllocateAdaptiveProbes (int DispatchID : SV_DispatchThreadID, int Loca

int AdaptiveProbeIndex = LocalAdaptiveProbeOffset + LocalID;

if(!MI.NoAdaptiveProbes && LocalID < NumProbeToAllocateClipped) {
if(LocalID < NumProbeToAllocateClipped) {
// Allocate the index slot and record the index of the adaptive probe
int TileAdaptiveProbeRank;
// Set the tile coords to the tile that the LocalID th probe belongs to
Expand Down Expand Up @@ -482,6 +487,8 @@ void SSRC_ReprojectProbeHistory (int LocalID : SV_GroupThreadID, int GroupID : S
float Epsilon = .01f;
Sample.Weights /= max(dot(Sample.Weights, 1), Epsilon);

// Reproject SG

int BasisOffsets[4];
BasisOffsets[0] = GetScreenProbeBasisOffset(Sample.Index[0], true);
BasisOffsets[1] = GetScreenProbeBasisOffset(Sample.Index[1], true);
Expand Down Expand Up @@ -667,14 +674,14 @@ void SSRC_ReprojectProbeHistory (int LocalID : SV_GroupThreadID, int GroupID : S
#if SSRC_MAX_BASIS_PER_TILE > WAVE_SIZE
#error "SSRC_MAX_BASIS_PER_TILE must be less than or equal to WAVE_SIZE"
#endif
// Reproject irradiance
if(WaveIsFirstLane()) {
float3 Irradiance =
GetScreenProbeIrradiance(Sample.Index[0]) * Sample.Weights[0] +
GetScreenProbeIrradiance(Sample.Index[1]) * Sample.Weights[1] +
GetScreenProbeIrradiance(Sample.Index[2]) * Sample.Weights[2] +
GetScreenProbeIrradiance(Sample.Index[3]) * Sample.Weights[3];
// FIXME cause of nan?
WriteScreenProbeIrradiance(ProbeIndex, GIDenoiser_RemoveNaNs(Irradiance));
GetScreenProbeIrradiance(Sample.Index[0], true) * Sample.Weights[0] +
GetScreenProbeIrradiance(Sample.Index[1], true) * Sample.Weights[1] +
GetScreenProbeIrradiance(Sample.Index[2], true) * Sample.Weights[2] +
GetScreenProbeIrradiance(Sample.Index[3], true) * Sample.Weights[3];
WriteScreenProbeIrradiance(ProbeIndex, Irradiance);
}
int BasisOffset = Header.BasisOffset;
if(LocalID < NumBasis) {
Expand Down Expand Up @@ -745,14 +752,15 @@ void SSRC_SampleUpdateRays (int LocalID : SV_GroupThreadID, int GroupID : SV_Gro
}
}
GroupMemoryBarrierWithGroupSync();
float IrradianceSize = TWO_PI * dot(g_RWProbeIrradianceTexture[ProbeIndex].xyz, 1.f.xxx) + Epsilon;
float IrradianceSize = TWO_PI * dot(GetScreenProbeIrradiance(ProbeIndex), 1.f.xxx) + Epsilon;
float ThreadSizeSums[SSRC_MAX_NUM_BASIS_PER_PROBE];
[unroll(SSRC_MAX_NUM_BASIS_PER_PROBE)]
for(int i = 0; i < BasisCount; i++) {
ThreadSizeSums[i] = i == 0 ? 0 : ThreadSizeSums[i-1];
ThreadSizeSums[i] += LocalSGSize[i];
ThreadSizeSums[0] = 0;
if(BasisCount > 0) ThreadSizeSums[0] = LocalSGSize[0];
[unroll(SSRC_MAX_NUM_BASIS_PER_PROBE-1)]
for(int i = 1; i < BasisCount; i++) {
ThreadSizeSums[i] = ThreadSizeSums[i-1] + LocalSGSize[i];
}
float SumSizeBasis = ThreadSizeSums[BasisCount - 1];
float SumSizeBasis = BasisCount > 0 ? ThreadSizeSums[BasisCount - 1] : 0;
float SumSize = SumSizeBasis + IrradianceSize;

Random rng = MakeRandom(GroupID * WAVE_SIZE + LocalID, MI.FrameSeed);
Expand Down Expand Up @@ -1019,8 +1027,7 @@ void SSRC_TraceUpdateRays (uint DispatchID) {
);
// Fallback to sky sample if no intersection
g_RWUpdateRayRadianceInvPdfBuffer[DispatchID] = PackFp16x4Safe(float4(payload.sky_sample, InvPdf));
// FIXME
g_RWUpdateRayRadianceInvPdfBuffer[DispatchID] = PackFp16x4Safe(float4(float3(RayOrigin + 1.f), InvPdf));
// g_RWUpdateRayRadianceInvPdfBuffer[DispatchID] = PackFp16x4Safe(float4(float3(RayOrigin + 1.f), InvPdf));

g_RWUpdateRayLinearDepthBuffer[DispatchID] = MI.CameraFar;
}
Expand Down Expand Up @@ -1191,7 +1198,8 @@ void GenerateReservoirs(in uint DispatchID : SV_DispatchThreadID)
// effectively enlarges the area light (due to the spatial nature of the grid) and leads to
// light leaks and generally poorer visuals.

if (visibility.is_front_face && dot(material.emissivity.xyz, material.emissivity.xyz) > 0.0f)
// always assume 2-sided emissive surfaces
if (/*visibility.is_front_face &&*/ dot(material.emissivity.xyz, material.emissivity.xyz) > 0.0f)
{
MaterialEmissive emissive = MakeMaterialEmissive(material, mesh_uv);
float4 RadianceInvPdf = UnpackFp16x4(g_RWUpdateRayRadianceInvPdfBuffer[query_index]);
Expand Down Expand Up @@ -1757,14 +1765,14 @@ void SSRC_UpdateProbes (int LocalID : SV_GroupThreadID, int GroupID : SV_GroupID
int BasisCount = GetProbeBasisCountFromClass(Header.Class);
int ProbeRayCount = g_RWProbeUpdateRayCountBuffer[GroupID];
int ProbeRayOffset = g_RWProbeUpdateRayOffsetBuffer[GroupID];
float3 ProbeIrradiance = g_RWProbeIrradianceTexture[ProbeIndex].xyz;
float3 ProbeIrradiance = GetScreenProbeIrradiance(ProbeIndex);

if(LocalID < BasisCount) {
LocalSGData[LocalID] = FetchBasisData(Header.BasisOffset + LocalID);
}
GroupMemoryBarrierWithGroupSync();

float SumSampleWeight = Epsilon;
float SumSampleWeight = 0;
float3 SumWeightedDiffRadiance = 0.f.xxx;
float3 SumWeightedRadiance = 0.f.xxx;
#if SSRC_MAX_NUM_UPDATE_RAY_PER_PROBE % WAVE_SIZE != 0
Expand All @@ -1785,11 +1793,12 @@ void SSRC_UpdateProbes (int LocalID : SV_GroupThreadID, int GroupID : SV_GroupID
EvaluatedRadiance += EvaluateSG(LocalSGData[i], RayDirection);
LocalEvaluatedRadiance[RayRank] = EvaluatedRadiance + ProbeIrradiance;
SumSampleWeight += InvPdf;
// RayRadiance = GIDenoiser_RemoveNaNs(RayRadiance);
SumWeightedDiffRadiance += InvPdf * (RayRadiance - (EvaluatedRadiance + ProbeIrradiance));
SumWeightedRadiance += InvPdf * RayRadiance;
}
}
SumSampleWeight = WaveActiveSum(SumSampleWeight);
SumSampleWeight = WaveActiveSum(SumSampleWeight) + Epsilon;
SumWeightedDiffRadiance = WaveActiveSum(SumWeightedDiffRadiance);
SumWeightedRadiance = WaveActiveSum(SumWeightedRadiance);
GroupMemoryBarrierWithGroupSync();
Expand All @@ -1798,11 +1807,35 @@ void SSRC_UpdateProbes (int LocalID : SV_GroupThreadID, int GroupID : SV_GroupID
// Update the basis
// FIXME temporary resort
// TODO this must be unstable for now
float3 ImpactFactors = g_RWProbeHistoryTrustTexture[ProbeIndex].x;
float ReprojectionTrust = g_RWProbeHistoryTrustTexture[ProbeIndex].x;
float3 ImpactFactors = 1.f - ReprojectionTrust;

// Re-initialize the probe if it is not trusted at all
[branch]
if(ReprojectionTrust == 0) {
if(WaveIsFirstLane()) WriteScreenProbeIrradiance(ProbeIndex, 0.f.xxx);
if(LocalID < BasisCount) {
SGData SG = LocalSGData[LocalID];
SG.Color = 0.001f.xxx;
// TODO better initialization
SG.Lambda = 0.85f * (0.8f + 0.2f * BasisCount);

float3 Direction = FibonacciSphere(LocalID, BasisCount);
Direction.z = abs(Direction.z);
float3 Normal = Header.Normal;
float3 Tangent, Bitangent;
GetOrthoVectors(Normal, Tangent, Bitangent);
SG.Direction = normalize(Direction.x * Tangent + Direction.y * Bitangent + Direction.z * Normal);

SG.Depth = 1.f;
LocalSGData[LocalID] = SG;
}
}

// The chosen thread to update the irradiance
if(WaveIsFirstLane()) {
float3 NewProbeIrradiance = lerp(ProbeIrradiance, SumWeightedRadiance / SumSampleWeight, min(MI.CacheUpdateLearningRate + ImpactFactors.x, 1.f));
g_RWProbeIrradianceTexture[ProbeIndex] = float4(NewProbeIrradiance, 0.f);
WriteScreenProbeIrradiance(ProbeIndex, NewProbeIrradiance);
}

int ThreadPerBasis = WAVE_SIZE / BasisCount;
Expand Down Expand Up @@ -1903,6 +1936,9 @@ void SSRC_UpdateProbes (int LocalID : SV_GroupThreadID, int GroupID : SV_GroupID
SG.Lambda += SumStepSize.dLambda * MI.CacheUpdateLearningRate * (2 - ImpactFactors.z);
SG.Lambda = clamp(SG.Lambda, 0.8f, 100.f); // Lambda is in [0.8f, 100.f]
}

// Write back
WriteBasisData(Header.BasisOffset + BasisRank, SG);
}
}
}
Expand Down Expand Up @@ -1983,10 +2019,10 @@ void SSRC_IntegrateASG (int2 GroupID : SV_GroupID, int LocalID : SV_GroupThreadI
Sample.Weights /= max(dot(Sample.Weights, 1.f.xxxx), 0.01f);

// FIXME invalid probe?
float3 InterpolatedIrradiance = g_RWProbeIrradianceTexture[Sample.Index[0]].xyz * Sample.Weights[0] +
g_RWProbeIrradianceTexture[Sample.Index[1]].xyz * Sample.Weights[1] +
g_RWProbeIrradianceTexture[Sample.Index[2]].xyz * Sample.Weights[2] +
g_RWProbeIrradianceTexture[Sample.Index[3]].xyz * Sample.Weights[3];
float3 InterpolatedIrradiance = GetScreenProbeIrradiance(Sample.Index[0]) * Sample.Weights[0] +
GetScreenProbeIrradiance(Sample.Index[1]) * Sample.Weights[1] +
GetScreenProbeIrradiance(Sample.Index[2]) * Sample.Weights[2] +
GetScreenProbeIrradiance(Sample.Index[3]) * Sample.Weights[3];

float3 SumRadiance = 0.f.xxx;

Expand Down Expand Up @@ -2149,6 +2185,28 @@ void DebugSSRC_FetchCursorPos (uint DipspatchID : SV_DispatchThreadID) {
g_RWDebugCursorWorldPosBuffer[0] = WorldPosition;
}

[numthreads(WAVE_SIZE, 1, 1)]
void DebugSSRC_VisualizeProbePlacement (int DispatchID : SV_DispatchThreadID) {
int2 ProbeIndex = int2(DispatchID % MI.TileDimensions.x, DispatchID / MI.TileDimensions.x);
if(ProbeIndex.x >= MI.TileDimensions.x || ProbeIndex.y >= MI.TileDimensions.y) {
return;
}
int2 UniformProbeScreenCoords = GetUniformScreenProbeScreenPosition(ProbeIndex);

if(all(UniformProbeScreenCoords >= 0) && all(UniformProbeScreenCoords < MI.ScreenDimensions)
&& IsScreenProbeValid(ProbeIndex))
g_RWDebugOutput[UniformProbeScreenCoords] = float4(10.f.xxx, 1.f);
// Visualize adaptive probes
int AdaptiveProbeCount = g_RWAdaptiveProbeCountBuffer[0];
for(int i = 0; i < AdaptiveProbeCount; i++) {
int ScreenProbeIndex1 = GetAdaptiveProbeIndex(ProbeIndex, i) + MI.UniformScreenProbeCount;
int2 AdaptiveProbeIndex = int2(ScreenProbeIndex1 % MI.TileDimensions.x, ScreenProbeIndex1 / MI.TileDimensions.x);
ProbeHeader Header = GetScreenProbeHeader(AdaptiveProbeIndex);
int2 AdaptiveProbeScreenPosition = Header.ScreenPosition;
g_RWDebugOutput[AdaptiveProbeScreenPosition] = float4(10.f.xxx, 1.f);
}
}

// [numthreads(WAVE_SIZE, 1, 1)]
// void DebugSSRC_PrecomputeIncidentRadiance (uint DispatchID : SV_DispatchThreadID) {
// if(DispatchID >= MI.DebugVisualizeIncidentRadianceNumPoints) {
Expand Down
51 changes: 45 additions & 6 deletions src/core/src/render_techniques/migi/migi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,26 @@ void MIGI::render(CapsaicinInternal &capsaicin) noexcept
{
if (need_reload_kernel_)
{
terminate();
init(capsaicin);
need_reload_kernel_ = false;
releaseKernels();
initKernels(capsaicin);
}

if(need_reload_memory_)
{
releaseResources();
initResources(capsaicin);
}

// Make sure the frame buffers for graphics kernels are initialized before reloading them.
if(need_reload_kernel_ || need_reload_memory_)
{
initGraphicsKernels(capsaicin);
}

need_reload_kernel_ = false;
need_reload_memory_ = false;


// Clear the hash-grid cache if user's changed the cell size
if (need_reset_hash_grid_cache_)
{
Expand Down Expand Up @@ -222,6 +237,14 @@ void MIGI::render(CapsaicinInternal &capsaicin) noexcept
C.MaxBasisCount = options_.SSRC_max_basis_count;

C.FrameIndex = capsaicin.getFrameIndex();

C.PreviousCameraRight = previous_constants_.CameraRight;
C.PreviousCameraUp = previous_constants_.CameraUp;

glm::vec2 jitter = {camera_matrices.projection[2][0], camera_matrices.projection[2][1]};
C.TAAJitterUV = jitter;
C.PreviousTAAJitterUV = previous_constants_.TAAJitterUV;

C.FrameSeed = options_.debug_freeze_frame_seed ? 123 : C.FrameIndex;
C.PreviousFrameSeed = previous_constants_.FrameSeed;

Expand All @@ -237,6 +260,7 @@ void MIGI::render(CapsaicinInternal &capsaicin) noexcept
C.MaxAdaptiveProbeCount = options_.SSRC_max_adaptive_probe_count;
C.NoImportanceSampling = options_.no_importance_sampling;
C.NoAdaptiveProbes = options_.no_adaptive_probes;
// need_reset_screen_space_cache_ is cleared at the end of the render() function
C.ResetCache = need_reset_screen_space_cache_;

C.CacheUpdateLearningRate = options_.cache_update_learing_rate;
Expand Down Expand Up @@ -714,7 +738,6 @@ void MIGI::render(CapsaicinInternal &capsaicin) noexcept
{
// Resolving requires the wrap sampler for material textures
gfxProgramSetParameter(gfx_, kernels_.program, "g_TextureSampler", capsaicin.getLinearWrapSampler());

const TimedSection timed_section(*this, "SSRC_IntegrateASG");
gfxCommandBindKernel(gfx_, kernels_.SSRC_IntegrateASG);
uint32_t dispatch_size[] = {options_.width / SSRC_TILE_SIZE, options_.height / SSRC_TILE_SIZE};
Expand Down Expand Up @@ -743,10 +766,23 @@ void MIGI::render(CapsaicinInternal &capsaicin) noexcept

// Specify whether the GI output is copied to debug drawing as a background
bool debug_buffer_copied = false;
(void)debug_buffer_copied;

if(options_.active_debug_view == "SSRC_ProbeAllocation") {
// TODO
const TimedSection timed_section(*this, "SSRC_ProbeAllocation");

if(!debug_buffer_copied)
{
// Copy the depth buffer to the depth buffer for debug visualization
gfxCommandCopyTexture(gfx_, tex_.depth, capsaicin.getAOVBuffer("VisibilityDepth"));
gfxCommandCopyTexture(gfx_, capsaicin.getAOVBuffer("Debug"), gi_output_aov);
debug_buffer_copied = true;
}

gfxCommandBindKernel(gfx_, kernels_.DebugSSRC_VisualizeProbePlacement);
auto threads = gfxKernelGetNumThreads(gfx_, kernels_.DebugSSRC_VisualizeProbePlacement);
int tile_count = divideAndRoundUp(options_.width, SSRC_TILE_SIZE) * divideAndRoundUp(options_.height, SSRC_TILE_SIZE);
uint32_t dispatch_size[] = {(tile_count + threads[0] - 1) / threads[0]};
gfxCommandDispatch(gfx_, dispatch_size[0], 1, 1);
} else if(options_.active_debug_view == "SSRC_Complexity") {
// const TimedSection timed_section(*this, "SSRC_Complexity");
// gfxCommandBindKernel(gfx_, kernels_.DebugSSRC_show_difference);
Expand Down Expand Up @@ -839,6 +875,9 @@ void MIGI::render(CapsaicinInternal &capsaicin) noexcept
// Increment internal frame index, which is different from the frame index in Capsaicin
internal_frame_index_ ++;

// Clear flags
need_reset_screen_space_cache_ = false;

#ifndef NDEBUG
fflush(stdout);
#endif
Expand Down
1 change: 1 addition & 0 deletions src/core/src/render_techniques/migi/migi.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ class MIGI : public RenderTechnique
GfxKernel SSRC_UpdateProbes {};
GfxKernel SSRC_IntegrateASG {};
GfxKernel DebugSSRC_FetchCursorPos {};
GfxKernel DebugSSRC_VisualizeProbePlacement {};
GfxKernel DebugSSRC_PrepareUpdateRays {};

GfxKernel GenerateDispatch {};
Expand Down
Loading

0 comments on commit 94303d4

Please sign in to comment.