Skip to content

Commit

Permalink
[LLPC] Fully scalarize compares and selects
Browse files Browse the repository at this point in the history
A while back, we switch to no longer always fully scalarizing in order
to preserve packed 16-bit operations.

This had the effect that comparisons and selects were no longer
scalarized due to limitations in LLVM's ScalarizerPass. And it seems
that there is some correctness error, most likely somewhere else in the
backend, which unfortunately hasn't been fully root caused.

In any case, there are no packed compares and (vector condition)
selects, and fully scalarizing them tends to produce better code. So
let's do that explicitly in the frontend as a quick fix.

Fixes: Red Dead Redemption with FSR 2.0
Fixes: c8f8da9 ("lgc: set scalarizer min-bits to 32")
  • Loading branch information
nhaehnle committed Nov 21, 2023
1 parent 1d85fb2 commit 6d724b7
Show file tree
Hide file tree
Showing 29 changed files with 823 additions and 386 deletions.
12 changes: 6 additions & 6 deletions llpc/test/shaderdb/core/OpAny_TestBvec2_lit.frag
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ void main()
// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) (...) @lgc.create.load.buffer.desc.p7(i64 0, i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP0]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
// CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i64 0
// CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1
// CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
// CHECK-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP6]]
// CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP7]], <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
// CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i64 0
// CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i64 1
// CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP4]]
// CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[TMP5]]
// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[DOTFR]], 0
// CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[DOTNOT]], <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
// CHECK-NEXT: call void (...) @lgc.create.write.generic.output(<4 x float> [[SPEC_SELECT]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison)
// CHECK-NEXT: ret void
//
34 changes: 26 additions & 8 deletions llpc/test/shaderdb/core/OpFOrdEqual_TestVec3_lit.frag
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py
// RUN: amdllpc -o - -gfxip 11.0 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s

#version 450

layout(binding = 0) uniform Uniforms
Expand All @@ -11,11 +14,26 @@ void main()
{
fragColor = (f3_0 == f3_1) ? vec4(1.0) : vec4(0.5);
}
// BEGIN_SHADERTEST
/*
; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s
; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results
; SHADERTEST: fcmp oeq <3 x float>
; SHADERTEST: AMDLLPC SUCCESS
*/
// END_SHADERTEST

// CHECK-LABEL: @lgc.shader.FS.main(
// CHECK-NEXT: .entry:
// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) (...) @lgc.create.load.buffer.desc.p7(i64 0, i32 0, i32 0, i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP0]], align 16
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <{ [3 x float], [4 x i8], [3 x float] }>, ptr addrspace(7) [[TMP0]], i32 0, i32 2
// CHECK-NEXT: [[TMP4:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP3]], align 16
// CHECK-NEXT: [[TMP5:%.*]] = extractelement <3 x float> [[TMP2]], i64 0
// CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x float> [[TMP4]], i64 0
// CHECK-NEXT: [[TMP7:%.*]] = fcmp oeq float [[TMP5]], [[TMP6]]
// CHECK-NEXT: [[TMP8:%.*]] = extractelement <3 x float> [[TMP2]], i64 1
// CHECK-NEXT: [[TMP9:%.*]] = extractelement <3 x float> [[TMP4]], i64 1
// CHECK-NEXT: [[TMP10:%.*]] = fcmp oeq float [[TMP8]], [[TMP9]]
// CHECK-NEXT: [[TMP11:%.*]] = extractelement <3 x float> [[TMP2]], i64 2
// CHECK-NEXT: [[TMP12:%.*]] = extractelement <3 x float> [[TMP4]], i64 2
// CHECK-NEXT: [[TMP13:%.*]] = fcmp oeq float [[TMP11]], [[TMP12]]
// CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP7]], [[TMP10]]
// CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP14]], [[TMP13]]
// CHECK-NEXT: [[TMP16:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP15]], <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
// CHECK-NEXT: call void (...) @lgc.create.write.generic.output(<4 x float> [[TMP16]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison)
// CHECK-NEXT: ret void
//
78 changes: 67 additions & 11 deletions llpc/test/shaderdb/core/OpFOrdGreaterThanEqual_TestFloat_lit.frag
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py
// RUN: amdllpc -o - -gfxip 11.0 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s

#version 450 core

layout(location = 0) in vec4 colorIn1;
Expand All @@ -12,14 +15,67 @@ void main()
bvec4 q = notEqual(w,z);
color = vec4(q);
}
// BEGIN_SHADERTEST
/*
; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s
; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results
; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results
; SHADERTEST: icmp uge <4 x i32>
; SHADERTEST: fcmp ult <4 x float>
; SHADERTEST: icmp sge <4 x i32>
; SHADERTEST: AMDLLPC SUCCESS
*/
// END_SHADERTEST

// CHECK-LABEL: @lgc.shader.FS.main(
// CHECK-NEXT: .entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 1, i32 0, i32 0, i32 0, i32 16, i32 poison)
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 0, i32 0, i32 0, i32 0, i32 16, i32 poison)
// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
// CHECK-NEXT: [[TMP4:%.*]] = fcmp ult float [[TMP2]], [[TMP3]]
// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
// CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
// CHECK-NEXT: [[TMP7:%.*]] = fcmp ult float [[TMP5]], [[TMP6]]
// CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
// CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
// CHECK-NEXT: [[TMP10:%.*]] = fcmp ult float [[TMP8]], [[TMP9]]
// CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
// CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
// CHECK-NEXT: [[TMP13:%.*]] = fcmp ult float [[TMP11]], [[TMP12]]
// CHECK-NEXT: [[TMP14:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: [[TMP15:%.*]] = fptoui <4 x float> [[TMP0]] to <4 x i32>
// CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0
// CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP15]], i64 0
// CHECK-NEXT: [[TMP18:%.*]] = icmp uge i32 [[TMP16]], [[TMP17]]
// CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP14]], i64 1
// CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP15]], i64 1
// CHECK-NEXT: [[TMP21:%.*]] = icmp uge i32 [[TMP19]], [[TMP20]]
// CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP14]], i64 2
// CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP15]], i64 2
// CHECK-NEXT: [[TMP24:%.*]] = icmp uge i32 [[TMP22]], [[TMP23]]
// CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP14]], i64 3
// CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP15]], i64 3
// CHECK-NEXT: [[TMP27:%.*]] = icmp uge i32 [[TMP25]], [[TMP26]]
// CHECK-NEXT: [[TMP28:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: [[TMP29:%.*]] = fptosi <4 x float> [[TMP0]] to <4 x i32>
// CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP28]], i64 0
// CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP29]], i64 0
// CHECK-NEXT: [[TMP32:%.*]] = icmp sge i32 [[TMP30]], [[TMP31]]
// CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[TMP28]], i64 1
// CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i32> [[TMP29]], i64 1
// CHECK-NEXT: [[TMP35:%.*]] = icmp sge i32 [[TMP33]], [[TMP34]]
// CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i32> [[TMP28]], i64 2
// CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i32> [[TMP29]], i64 2
// CHECK-NEXT: [[TMP38:%.*]] = icmp sge i32 [[TMP36]], [[TMP37]]
// CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i32> [[TMP28]], i64 3
// CHECK-NEXT: [[TMP40:%.*]] = extractelement <4 x i32> [[TMP29]], i64 3
// CHECK-NEXT: [[TMP41:%.*]] = icmp sge i32 [[TMP39]], [[TMP40]]
// CHECK-NEXT: [[TMP42:%.*]] = xor i1 [[TMP4]], [[TMP18]]
// CHECK-NEXT: [[TMP43:%.*]] = xor i1 [[TMP7]], [[TMP21]]
// CHECK-NEXT: [[TMP44:%.*]] = xor i1 [[TMP10]], [[TMP24]]
// CHECK-NEXT: [[TMP45:%.*]] = xor i1 [[TMP13]], [[TMP27]]
// CHECK-NEXT: [[TMP46:%.*]] = xor i1 [[TMP32]], [[TMP42]]
// CHECK-NEXT: [[TMP47:%.*]] = xor i1 [[TMP35]], [[TMP43]]
// CHECK-NEXT: [[TMP48:%.*]] = xor i1 [[TMP38]], [[TMP44]]
// CHECK-NEXT: [[TMP49:%.*]] = xor i1 [[TMP45]], [[TMP41]]
// CHECK-NEXT: [[TMP50:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP46]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP51:%.*]] = insertelement <4 x float> poison, float [[TMP50]], i64 0
// CHECK-NEXT: [[TMP52:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP47]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x float> [[TMP51]], float [[TMP52]], i64 1
// CHECK-NEXT: [[TMP54:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP48]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x float> [[TMP53]], float [[TMP54]], i64 2
// CHECK-NEXT: [[TMP56:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP49]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP57:%.*]] = insertelement <4 x float> [[TMP55]], float [[TMP56]], i64 3
// CHECK-NEXT: call void (...) @lgc.create.write.generic.output(<4 x float> [[TMP57]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison)
// CHECK-NEXT: ret void
//
77 changes: 67 additions & 10 deletions llpc/test/shaderdb/core/OpFOrdGreaterThan_TestFloat_lit.frag
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py
// RUN: amdllpc -o - -gfxip 11.0 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s

#version 450 core

layout(location = 0) in vec4 colorIn1;
Expand All @@ -13,13 +16,67 @@ void main()
bvec4 q = notEqual(w,z);
color = vec4(q);
}
// BEGIN_SHADERTEST
/*
; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s
; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results
; SHADERTEST: icmp ugt <4 x i32>
; SHADERTEST: fcmp ule <4 x float>
; SHADERTEST: icmp sgt <4 x i32>
; SHADERTEST: AMDLLPC SUCCESS
*/
// END_SHADERTEST

// CHECK-LABEL: @lgc.shader.FS.main(
// CHECK-NEXT: .entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 1, i32 0, i32 0, i32 0, i32 16, i32 poison)
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 0, i32 0, i32 0, i32 0, i32 16, i32 poison)
// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
// CHECK-NEXT: [[TMP4:%.*]] = fcmp ule float [[TMP2]], [[TMP3]]
// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
// CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
// CHECK-NEXT: [[TMP7:%.*]] = fcmp ule float [[TMP5]], [[TMP6]]
// CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
// CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
// CHECK-NEXT: [[TMP10:%.*]] = fcmp ule float [[TMP8]], [[TMP9]]
// CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
// CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
// CHECK-NEXT: [[TMP13:%.*]] = fcmp ule float [[TMP11]], [[TMP12]]
// CHECK-NEXT: [[TMP14:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: [[TMP15:%.*]] = fptoui <4 x float> [[TMP0]] to <4 x i32>
// CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0
// CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP15]], i64 0
// CHECK-NEXT: [[TMP18:%.*]] = icmp ugt i32 [[TMP16]], [[TMP17]]
// CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[TMP14]], i64 1
// CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP15]], i64 1
// CHECK-NEXT: [[TMP21:%.*]] = icmp ugt i32 [[TMP19]], [[TMP20]]
// CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP14]], i64 2
// CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP15]], i64 2
// CHECK-NEXT: [[TMP24:%.*]] = icmp ugt i32 [[TMP22]], [[TMP23]]
// CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP14]], i64 3
// CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP15]], i64 3
// CHECK-NEXT: [[TMP27:%.*]] = icmp ugt i32 [[TMP25]], [[TMP26]]
// CHECK-NEXT: [[TMP28:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32>
// CHECK-NEXT: [[TMP29:%.*]] = fptosi <4 x float> [[TMP0]] to <4 x i32>
// CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP28]], i64 0
// CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP29]], i64 0
// CHECK-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]]
// CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[TMP28]], i64 1
// CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i32> [[TMP29]], i64 1
// CHECK-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]]
// CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x i32> [[TMP28]], i64 2
// CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i32> [[TMP29]], i64 2
// CHECK-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]]
// CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i32> [[TMP28]], i64 3
// CHECK-NEXT: [[TMP40:%.*]] = extractelement <4 x i32> [[TMP29]], i64 3
// CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
// CHECK-NEXT: [[TMP42:%.*]] = xor i1 [[TMP4]], [[TMP18]]
// CHECK-NEXT: [[TMP43:%.*]] = xor i1 [[TMP7]], [[TMP21]]
// CHECK-NEXT: [[TMP44:%.*]] = xor i1 [[TMP10]], [[TMP24]]
// CHECK-NEXT: [[TMP45:%.*]] = xor i1 [[TMP13]], [[TMP27]]
// CHECK-NEXT: [[TMP46:%.*]] = xor i1 [[TMP32]], [[TMP42]]
// CHECK-NEXT: [[TMP47:%.*]] = xor i1 [[TMP35]], [[TMP43]]
// CHECK-NEXT: [[TMP48:%.*]] = xor i1 [[TMP38]], [[TMP44]]
// CHECK-NEXT: [[TMP49:%.*]] = xor i1 [[TMP45]], [[TMP41]]
// CHECK-NEXT: [[TMP50:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP46]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP51:%.*]] = insertelement <4 x float> poison, float [[TMP50]], i64 0
// CHECK-NEXT: [[TMP52:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP47]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x float> [[TMP51]], float [[TMP52]], i64 1
// CHECK-NEXT: [[TMP54:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP48]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x float> [[TMP53]], float [[TMP54]], i64 2
// CHECK-NEXT: [[TMP56:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP49]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP57:%.*]] = insertelement <4 x float> [[TMP55]], float [[TMP56]], i64 3
// CHECK-NEXT: call void (...) @lgc.create.write.generic.output(<4 x float> [[TMP57]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison)
// CHECK-NEXT: ret void
//
39 changes: 31 additions & 8 deletions llpc/test/shaderdb/core/OpFOrdLessThanEqual_TestFloat_lit.frag
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py
// RUN: amdllpc -o - -gfxip 11.0 -emit-lgc %s | FileCheck -check-prefixes=CHECK %s

#version 450 core

layout(location = 0) in vec4 colorIn1;
Expand All @@ -8,11 +11,31 @@ void main()
bvec4 x = lessThanEqual (colorIn1, colorIn2);
color = vec4(x);
}
// BEGIN_SHADERTEST
/*
; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s
; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results
; SHADERTEST: fcmp ole <4 x float>
; SHADERTEST: AMDLLPC SUCCESS
*/
// END_SHADERTEST

// CHECK-LABEL: @lgc.shader.FS.main(
// CHECK-NEXT: .entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 1, i32 0, i32 0, i32 0, i32 16, i32 poison)
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> (...) @lgc.create.read.generic.input.v4f32(i32 0, i32 0, i32 0, i32 0, i32 16, i32 poison)
// CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
// CHECK-NEXT: [[TMP4:%.*]] = fcmp ole float [[TMP2]], [[TMP3]]
// CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
// CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
// CHECK-NEXT: [[TMP7:%.*]] = fcmp ole float [[TMP5]], [[TMP6]]
// CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
// CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
// CHECK-NEXT: [[TMP10:%.*]] = fcmp ole float [[TMP8]], [[TMP9]]
// CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
// CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
// CHECK-NEXT: [[TMP13:%.*]] = fcmp ole float [[TMP11]], [[TMP12]]
// CHECK-NEXT: [[TMP14:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP4]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0
// CHECK-NEXT: [[TMP16:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP7]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP16]], i64 1
// CHECK-NEXT: [[TMP18:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP10]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[TMP17]], float [[TMP18]], i64 2
// CHECK-NEXT: [[TMP20:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[TMP13]], float 1.000000e+00, float 0.000000e+00
// CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP20]], i64 3
// CHECK-NEXT: call void (...) @lgc.create.write.generic.output(<4 x float> [[TMP21]], i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison)
// CHECK-NEXT: ret void
//
Loading

0 comments on commit 6d724b7

Please sign in to comment.