From 05188aca692924a6c5ad312fc819fa1d5aa9c787 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 29 Jun 2020 10:55:50 +0200 Subject: [PATCH 1/2] Fix bug with control dependent expression tracking. For a direct branch without merge, we lost control dependent expressions. --- .../avoid-expression-lowering-to-loop.frag | 28 +++++++++++++++++++ .../inliner-dominator-inside-loop.asm.frag | 6 ++-- ...schain-invalid-expression.asm.invalid.frag | 6 ++-- .../frag/array-copy-error.asm.invalid.frag | 4 ++- .../phi-variable-declaration.asm.invalid.frag | 4 ++- .../avoid-expression-lowering-to-loop.frag | 26 +++++++++++++++++ .../avoid-expression-lowering-to-loop.frag | 23 +++++++++++++++ spirv_glsl.cpp | 4 +++ 8 files changed, 95 insertions(+), 6 deletions(-) create mode 100644 reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag create mode 100644 reference/shaders/frag/avoid-expression-lowering-to-loop.frag create mode 100644 shaders/frag/avoid-expression-lowering-to-loop.frag diff --git a/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag b/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag new file mode 100644 index 000000000..f7b576636 --- /dev/null +++ b/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag @@ -0,0 +1,28 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 1, std140) uniform Count +{ + float count; +} _44; + +layout(binding = 0) uniform mediump sampler2D tex; + +layout(location = 0) in highp vec4 vertex; +layout(location = 0) out vec4 fragColor; + +void main() +{ + highp float _34 = dFdx(vertex.x); + float _62; + _62 = 0.0; + for (float _61 = 0.0; _61 < _44.count; ) + { + _62 += ((1.0 / float(textureSize(tex, 0).x)) * _34); + _61 += 1.0; + continue; + } + fragColor = vec4(_62); +} + diff --git a/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag index 29653cbb4..ac262ad70 100644 --- a/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag +++ b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag @@ -167,12 +167,14 @@ void main() vec2 _223 = vec2(1.0); vec2 _224 = (_220.wy * 2.0) - _223; vec3 _232 = vec3(_224, sqrt(clamp(1.0 + dot(-_224, _224), 0.0, 1.0))); - vec2 _240 = (texture(SPIRV_Cross_CombinedNormalDetailMapTextureNormalDetailMapSampler, _166 * 0.0).wy * 2.0) - _223; + vec4 _237 = texture(SPIRV_Cross_CombinedNormalDetailMapTextureNormalDetailMapSampler, _166 * 0.0); + vec2 _240 = (_237.wy * 2.0) - _223; vec2 _252 = _232.xy + (vec3(_240, sqrt(clamp(1.0 + dot(-_240, _240), 0.0, 1.0))).xy * 0.0); vec3 _253 = vec3(_252.x, _252.y, _232.z); vec2 _255 = _253.xy * _165; vec3 _256 = vec3(_255.x, _255.y, _253.z); - vec3 _271 = ((IN_Color.xyz * (_193 * 1.0).xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (texture(SPIRV_Cross_CombinedStudsMapTextureStudsMapSampler, _156.UvStuds).x * 2.0); + vec4 _268 = texture(SPIRV_Cross_CombinedStudsMapTextureStudsMapSampler, _156.UvStuds); + vec3 _271 = ((IN_Color.xyz * (_193 * 1.0).xyz) * (1.0 + (_256.x * 0.300000011920928955078125))) * (_268.x * 2.0); vec4 _298; for (;;) { diff --git a/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag index 1af8db758..35822f1a8 100644 --- a/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag +++ b/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag @@ -217,10 +217,12 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu main0_out out = {}; float4 _177 = float4((((gl_FragCoord.xy - View.View_ViewRectMin.xy) * View.View_ViewSizeAndInvSize.zw) - float2(0.5)) * float2(2.0, -2.0), _138, 1.0) * float4(gl_FragCoord.w); float3 _181 = normalize(-in.in_var_TEXCOORD8.xyz); - float2 _190 = (Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (in.in_var_TEXCOORD0 * float2(10.0))).xy * float2(2.0)) - float2(1.0); + float4 _187 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (in.in_var_TEXCOORD0 * float2(10.0))); + float2 _190 = (_187.xy * float2(2.0)) - float2(1.0); float3 _206 = normalize(float3x3(float3(1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, 0.0, 1.0)) * (((float4(_190, sqrt(fast::clamp(1.0 - dot(_190, _190), 0.0, 1.0)), 1.0).xyz * float3(0.300000011920928955078125, 0.300000011920928955078125, 1.0)) * float3(View.View_NormalOverrideParameter.w)) + View.View_NormalOverrideParameter.xyz)); float _208 = dot(_206, _181); - float _219 = mix(0.4000000059604644775390625, 1.0, Material_Texture2D_1.sample(Material_Texture2D_1Sampler, (in.in_var_TEXCOORD0 * float2(20.0))).x); + float4 _217 = Material_Texture2D_1.sample(Material_Texture2D_1Sampler, (in.in_var_TEXCOORD0 * float2(20.0))); + float _219 = mix(0.4000000059604644775390625, 1.0, _217.x); float4 _223 = Material_Texture2D_1.sample(Material_Texture2D_1Sampler, (in.in_var_TEXCOORD0 * float2(5.0))); float _224 = _177.w; float _228 = fast::min(fast::max((_224 - 24.0) * 0.000666666659526526927947998046875, 0.0), 1.0); diff --git a/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag index 73dc2eee1..ce92e3621 100644 --- a/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag +++ b/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag @@ -276,6 +276,8 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0)); float _172 = 1.0 / _170; float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172); + float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); + float2 _176 = dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); float _180_copy; float2 _183; _183 = float2(0.0); @@ -290,7 +292,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu { if (float(_185) < (floor(_170) + 2.0)) { - _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)), dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)))).y; + _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y; if (_180 < _188) { float _201 = _188 - _180; diff --git a/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag index 73dc2eee1..ce92e3621 100644 --- a/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag +++ b/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag @@ -276,6 +276,8 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0)); float _172 = 1.0 / _170; float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172); + float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); + float2 _176 = dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); float _180_copy; float2 _183; _183 = float2(0.0); @@ -290,7 +292,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu { if (float(_185) < (floor(_170) + 2.0)) { - _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)), dfdy(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)))).y; + _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y; if (_180 < _188) { float _201 = _188 - _180; diff --git a/reference/shaders/frag/avoid-expression-lowering-to-loop.frag b/reference/shaders/frag/avoid-expression-lowering-to-loop.frag new file mode 100644 index 000000000..6313d896e --- /dev/null +++ b/reference/shaders/frag/avoid-expression-lowering-to-loop.frag @@ -0,0 +1,26 @@ +#version 310 es +precision mediump float; +precision highp int; + +layout(binding = 1, std140) uniform Count +{ + float count; +} _44; + +layout(binding = 0) uniform mediump sampler2D tex; + +layout(location = 0) in highp vec4 vertex; +layout(location = 0) out vec4 fragColor; + +void main() +{ + highp float size = 1.0 / float(textureSize(tex, 0).x); + float r = 0.0; + float d = dFdx(vertex.x); + for (float i = 0.0; i < _44.count; i += 1.0) + { + r += (size * d); + } + fragColor = vec4(r); +} + diff --git a/shaders/frag/avoid-expression-lowering-to-loop.frag b/shaders/frag/avoid-expression-lowering-to-loop.frag new file mode 100644 index 000000000..3473875a4 --- /dev/null +++ b/shaders/frag/avoid-expression-lowering-to-loop.frag @@ -0,0 +1,23 @@ +#version 310 es +precision mediump float; +precision mediump int; + +layout(binding = 0) uniform mediump sampler2D tex; +layout(binding = 1) uniform Count +{ + float count; +}; + +layout(location = 0) in highp vec4 vertex; +layout(location = 0) out vec4 fragColor; + +void main() { + + highp float size = 1.0 / float(textureSize(tex, 0).x); + float r = 0.0; + float d = dFdx(vertex.x); + for (float i = 0.0; i < count ; i += 1.0) + r += size * d; + + fragColor = vec4(r); +} diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 7dd9f8f91..b4542b484 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -13492,7 +13492,11 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // If we hit this case, we're dealing with an unconditional branch, which means we will output // that block after this. If we had selection merge, we already flushed phi variables. if (block.merge != SPIRBlock::MergeSelection) + { flush_phi(block.self, block.next_block); + // For a direct branch, need to remember to invalidate expressions in the next linear block instead. + get(block.next_block).invalidate_expressions = block.invalidate_expressions; + } // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi. if (!current_emitting_switch_fallthrough) From 3afbfdb0909e218b7c6b66bafa367dfc9a0e387e Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 29 Jun 2020 12:20:35 +0200 Subject: [PATCH 2/2] Implement context-sensitive expression read tracking. When inside a loop, treat any read of outer expressions to happen multiple times, forcing a temporary of said outer expressions. This avoids the problem where we can end up relying on loop-invariant code motion to happen in the compiler when converting optimized shaders. --- .../asm/frag/loop-header-to-continue.asm.frag | 6 +++-- .../avoid-expression-lowering-to-loop.frag | 3 ++- .../asm/comp/variable-pointers-2.asm.comp | 3 ++- .../inliner-dominator-inside-loop.asm.frag | 3 ++- ...schain-invalid-expression.asm.invalid.frag | 3 ++- .../frag/array-copy-error.asm.invalid.frag | 3 ++- .../phi-variable-declaration.asm.invalid.frag | 3 ++- .../asm/frag/texture-atomics.asm.frag | 3 ++- ...re-atomics.asm.graphics-robust-access.frag | 3 ++- .../asm/frag/loop-header-to-continue.asm.frag | 6 +++-- spirv_common.hpp | 26 +++++++++++++++++++ spirv_cross.cpp | 5 ++++ spirv_cross.hpp | 14 ++++++++++ spirv_glsl.cpp | 25 ++++++++++++++++++ spirv_glsl.hpp | 1 + 15 files changed, 95 insertions(+), 12 deletions(-) diff --git a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag index c2dba928d..6497ad77a 100644 --- a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag +++ b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag @@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput; void main() { + vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w); vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv); float _50 = _49.y; + float _53 = clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375); float _55; float _58; _55 = 0.0; @@ -31,8 +33,8 @@ void main() for (int _60 = -3; _60 <= 3; ) { float _64 = float(_60); - vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64)); - float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp(_50 * 0.06399999558925628662109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375)); + vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64)); + float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < _53); _55 += (_72.x * _78); _58 += _78; _60++; diff --git a/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag b/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag index f7b576636..9019ac0d3 100644 --- a/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag +++ b/reference/opt/shaders/frag/avoid-expression-lowering-to-loop.frag @@ -14,12 +14,13 @@ layout(location = 0) out vec4 fragColor; void main() { + highp float _24 = 1.0 / float(textureSize(tex, 0).x); highp float _34 = dFdx(vertex.x); float _62; _62 = 0.0; for (float _61 = 0.0; _61 < _44.count; ) { - _62 += ((1.0 / float(textureSize(tex, 0).x)) * _34); + _62 += (_24 * _34); _61 += 1.0; continue; } diff --git a/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp b/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp index 4bf34ff4b..dafd6d5d4 100644 --- a/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp +++ b/reference/shaders-msl/asm/comp/variable-pointers-2.asm.comp @@ -33,6 +33,7 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]], { device foo* _46 = select_buffer(buf, cb); device foo* _45 = _46; + thread uint3* _47 = select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb); device foo* _48 = _45; device int* _52; device int* _55; @@ -46,7 +47,7 @@ kernel void main0(device foo& buf [[buffer(0)]], constant bar& cb [[buffer(1)]], _58 = *_55; if (_57 != _58) { - int _66 = (_57 + _58) + int((*select_input(gl_GlobalInvocationID, gl_LocalInvocationID, cb)).x); + int _66 = (_57 + _58) + int((*_47).x); *_52 = _66; *_55 = _66; _52 = &_52[1u]; diff --git a/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag index ac262ad70..4049c4826 100644 --- a/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag +++ b/reference/shaders-no-opt/asm/frag/inliner-dominator-inside-loop.asm.frag @@ -125,6 +125,7 @@ void main() _151.UvStuds = IN_UvStuds_EdgeDistance2.xy; SurfaceInput _156 = _151; _156.UvStuds.y = (fract(_151.UvStuds.y) + IN_studIndex) * 0.25; + float _160 = clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0); float _163 = _146.View_Depth.w * _19.CB0.RefractionBias_FadeDistance_GlowFactor.y; float _165 = clamp(1.0 - _163, 0.0, 1.0); vec2 _166 = IN_Uv_EdgeDistance1.xy * 1.0; @@ -141,7 +142,7 @@ void main() else { float _180 = 1.0 / (1.0 - 0.0); - _193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((clamp(1.0 - (_146.View_Depth.w * 0.00333332992158830165863037109375), 0.0, 1.0) * _180) - (0.0 * _180), 0.0, 1.0))); + _193 = mix(texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166 * 0.25), texture(SPIRV_Cross_CombinedDiffuseMapTextureDiffuseMapSampler, _166), vec4(clamp((_160 * _180) - (0.0 * _180), 0.0, 1.0))); break; } _193 = _192; diff --git a/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag index 35822f1a8..3a13024df 100644 --- a/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag +++ b/reference/shaders-ue4-no-opt/asm/frag/accesschain-invalid-expression.asm.invalid.frag @@ -216,6 +216,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu { main0_out out = {}; float4 _177 = float4((((gl_FragCoord.xy - View.View_ViewRectMin.xy) * View.View_ViewSizeAndInvSize.zw) - float2(0.5)) * float2(2.0, -2.0), _138, 1.0) * float4(gl_FragCoord.w); + float3 _179 = in.in_var_TEXCOORD8.xyz - float3(View.View_PreViewTranslation); float3 _181 = normalize(-in.in_var_TEXCOORD8.xyz); float4 _187 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (in.in_var_TEXCOORD0 * float2(10.0))); float2 _190 = (_187.xy * float2(2.0)) - float2(1.0); @@ -330,7 +331,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu { if (_491 < _Globals.NumDynamicPointLights) { - float3 _501 = _Globals.LightPositionAndInvRadius[_491].xyz - (in.in_var_TEXCOORD8.xyz - float3(View.View_PreViewTranslation)); + float3 _501 = _Globals.LightPositionAndInvRadius[_491].xyz - _179; float _502 = dot(_501, _501); float3 _505 = _501 * float3(rsqrt(_502)); _507 = normalize(_181 + _505); diff --git a/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag index ce92e3621..bc40c7ccf 100644 --- a/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag +++ b/reference/shaders-ue4-no-opt/asm/frag/array-copy-error.asm.invalid.frag @@ -274,6 +274,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float3 _151 = normalize(-_148); float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz); float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0)); + float _171 = floor(_170); float _172 = 1.0 / _170; float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172); float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); @@ -290,7 +291,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float _189 = 1.0; for (;;) { - if (float(_185) < (floor(_170) + 2.0)) + if (float(_185) < (_171 + 2.0)) { _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y; if (_180 < _188) diff --git a/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag b/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag index ce92e3621..bc40c7ccf 100644 --- a/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag +++ b/reference/shaders-ue4-no-opt/asm/frag/phi-variable-declaration.asm.invalid.frag @@ -274,6 +274,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float3 _151 = normalize(-_148); float3 _152 = _151 * float3x3(in.in_var_TEXCOORD10_centroid.xyz, cross(in.in_var_TEXCOORD11_centroid.xyz, in.in_var_TEXCOORD10_centroid.xyz) * float3(in.in_var_TEXCOORD11_centroid.w), in.in_var_TEXCOORD11_centroid.xyz); float _170 = mix(Material.Material_ScalarExpressions[0].y, Material.Material_ScalarExpressions[0].z, fast::min(fast::max(abs(dot(_151, in.in_var_TEXCOORD11_centroid.xyz)), 0.0), 1.0)); + float _171 = floor(_170); float _172 = 1.0 / _170; float2 _174 = (float2(Material.Material_ScalarExpressions[0].x) * ((_152.xy * float2(-1.0)) / float2(_152.z))) * float2(_172); float2 _175 = dfdx(float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y)); @@ -290,7 +291,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_View& View [[bu float _189 = 1.0; for (;;) { - if (float(_185) < (floor(_170) + 2.0)) + if (float(_185) < (_171 + 2.0)) { _188 = Material_Texture2D_0.sample(Material_Texture2D_0Sampler, (float2(in_var_TEXCOORD0[0].x, in_var_TEXCOORD0[0].y) + _183), gradient2d(_175, _176)).y; if (_180 < _188) diff --git a/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag b/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag index 98cdda44d..d73c30cbd 100644 --- a/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag +++ b/reference/shaders-ue4/asm/frag/texture-atomics.asm.frag @@ -84,6 +84,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB uint _107 = _103 + 1u; if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102)) { + float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz); float _122 = _96.x; float _123 = _96.y; spvUnsafeArray _73; @@ -103,7 +104,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB _158 = float3(500000.0); for (int _160 = 0; _160 < 8; ) { - float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz)); + float3 _166 = _73[_160] - _121; float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); _155 = fast::max(_155, _170); _158 = fast::min(_158, _170); diff --git a/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag b/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag index 98cdda44d..d73c30cbd 100644 --- a/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag +++ b/reference/shaders-ue4/asm/frag/texture-atomics.asm.graphics-robust-access.frag @@ -84,6 +84,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB uint _107 = _103 + 1u; if (all(CulledObjectBoxBounds._m0[_107].xy > _96.xy) && all(CulledObjectBoxBounds._m0[_103].xyz < _102)) { + float3 _121 = float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz); float _122 = _96.x; float _123 = _96.y; spvUnsafeArray _73; @@ -103,7 +104,7 @@ fragment main0_out main0(main0_in in [[stage_in]], const device type_StructuredB _158 = float3(500000.0); for (int _160 = 0; _160 < 8; ) { - float3 _166 = _73[_160] - (float3(0.5) * (CulledObjectBoxBounds._m0[_103].xyz + CulledObjectBoxBounds._m0[_107].xyz)); + float3 _166 = _73[_160] - _121; float3 _170 = float3(dot(_166, CulledObjectBoxBounds._m0[_103 + 2u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 3u].xyz), dot(_166, CulledObjectBoxBounds._m0[_103 + 4u].xyz)); _155 = fast::max(_155, _170); _158 = fast::min(_158, _170); diff --git a/reference/shaders/asm/frag/loop-header-to-continue.asm.frag b/reference/shaders/asm/frag/loop-header-to-continue.asm.frag index a99322d67..8a3b664bc 100644 --- a/reference/shaders/asm/frag/loop-header-to-continue.asm.frag +++ b/reference/shaders/asm/frag/loop-header-to-continue.asm.frag @@ -22,8 +22,10 @@ layout(location = 0) out vec4 _entryPointOutput; void main() { + vec2 _45 = vec2(0.0, _8.CB1.TextureSize.w); vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv); float _50 = _49.y; + float _53 = clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375); float _55; float _58; _55 = 0.0; @@ -31,8 +33,8 @@ void main() for (int _60 = -3; _60 <= 3; ) { float _64 = float(_60); - vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64)); - float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375)); + vec4 _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (_45 * _64)); + float _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < _53); _55 += (_72.x * _78); _58 += _78; _60++; diff --git a/spirv_common.hpp b/spirv_common.hpp index af0d0767f..4f9bcab99 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -262,6 +262,29 @@ inline std::string convert_to_string(double t, char locale_radix_point) return buf; } +template +struct ValueSaver +{ + explicit ValueSaver(T ¤t_) + : current(current_) + , saved(current_) + { + } + + void release() + { + current = saved; + } + + ~ValueSaver() + { + release(); + } + + T ¤t; + T saved; +}; + #if defined(__clang__) || defined(__GNUC__) #pragma GCC diagnostic pop #elif defined(_MSC_VER) @@ -699,6 +722,9 @@ struct SPIRExpression : IVariant // Used by access chain Store and Load since we read multiple expressions in this case. SmallVector implied_read_expressions; + // The expression was emitted at a certain scope. Lets us track when an expression read means multiple reads. + uint32_t emitted_loop_level = 0; + SPIRV_CROSS_DECLARE_CLONE(SPIRExpression) }; diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 8abe19a4a..19c9e1a95 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -4674,3 +4674,8 @@ bool Compiler::flush_phi_required(BlockID from, BlockID to) const return true; return false; } + +void Compiler::add_loop_level() +{ + current_loop_level++; +} diff --git a/spirv_cross.hpp b/spirv_cross.hpp index e452ca6f2..17c081882 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -513,9 +513,22 @@ class Compiler SPIRFunction *current_function = nullptr; SPIRBlock *current_block = nullptr; + uint32_t current_loop_level = 0; std::unordered_set active_interface_variables; bool check_active_interface_variables = false; + void add_loop_level(); + + void set_initializers(SPIRExpression &e) + { + e.emitted_loop_level = current_loop_level; + } + + template + void set_initializers(const T &) + { + } + // If our IDs are out of range here as part of opcodes, throw instead of // undefined behavior. template @@ -524,6 +537,7 @@ class Compiler ir.add_typed_id(static_cast(T::type), id); auto &var = variant_set(ir.ids[id], std::forward

(args)...); var.self = id; + set_initializers(var); return var; } diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index b4542b484..8d8241e60 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -341,6 +341,7 @@ void CompilerGLSL::reset() statement_count = 0; indent = 0; + current_loop_level = 0; } void CompilerGLSL::remap_pls_variables() @@ -4547,6 +4548,17 @@ bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const return suppressed_usage_tracking.count(id) != 0; } +bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const +{ + auto *expr = maybe_get(id); + if (!expr) + return false; + + // If we're emitting code at a deeper loop level than when we emitted the expression, + // we're probably reading the same expression over and over. + return current_loop_level > expr->emitted_loop_level; +} + SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, bool suppress_usage_tracking) { @@ -8169,6 +8181,13 @@ void CompilerGLSL::track_expression_read(uint32_t id) auto &v = expression_usage_counts[id]; v++; + // If we create an expression outside a loop, + // but access it inside a loop, we're implicitly reading it multiple times. + // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion + // working inside the backend compiler. + if (expression_read_implies_multiple_reads(id)) + v++; + if (v >= 2) { //if (v == 2) @@ -13000,6 +13019,10 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) bool skip_direct_branch = false; bool emitted_loop_header_variables = false; bool force_complex_continue_block = false; + ValueSaver loop_level_saver(current_loop_level); + + if (block.merge == SPIRBlock::MergeLoop) + add_loop_level(); emit_hoisted_temporaries(block.declare_temporary); @@ -13550,6 +13573,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) else end_scope(); + loop_level_saver.release(); + // We cannot break out of two loops at once, so don't check for break; here. // Using block.self as the "from" block isn't quite right, but it has the same scope // and dominance structure, so it's fine. diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index 351adae15..47b6d0b92 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -549,6 +549,7 @@ class CompilerGLSL : public Compiler void emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op); bool expression_is_forwarded(uint32_t id) const; bool expression_suppresses_usage_tracking(uint32_t id) const; + bool expression_read_implies_multiple_reads(uint32_t id) const; SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs, bool suppress_usage_tracking = false);