Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMDGPU: Add baseline test for add64 with constant test #122048

Merged
merged 1 commit into from
Jan 8, 2025

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Jan 8, 2025

Add baseline test for 64-bit adds when the low half of
an operand is known 0.

Copy link
Contributor Author

arsenm commented Jan 8, 2025

@llvmbot
Copy link
Member

llvmbot commented Jan 8, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Add baseline test for 64-bit adds when the low half of
an operand is known 0.


Full diff: https://github.com/llvm/llvm-project/pull/122048.diff

2 Files Affected:

  • (added) llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll (+213)
  • (added) llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll (+213)
diff --git a/llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll b/llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll
new file mode 100644
index 00000000000000..981e33f89d956e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll
@@ -0,0 +1,213 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+
+; Reduce a 64-bit add by a constant if we know the low 32-bits are all
+; zero.
+
+; add i64:x, K if computeTrailingZeros(K) >= 32
+; => build_pair (add x.hi, K.hi), x.lo
+
+define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_0(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0x40000
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, 1125899906842624 ; (1 << 50)
+  ret i64 %add
+}
+
+define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_1(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_low_bits_known0_1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, 4294967296 ; (1 << 32)
+  ret i64 %add
+}
+
+define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_2(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_low_bits_known0_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 2
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, 8589934592 ; (1 << 33)
+  ret i64 %add
+}
+
+define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_3(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_low_bits_known0_3:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, -9223372036854775808 ; (1 << 63)
+  ret i64 %add
+}
+
+define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_4(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_low_bits_known0_4:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, -4294967296 ; 0xffffffff00000000
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_low_bits_known0_0(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x40000
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, 1125899906842624 ; (1 << 50)
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_low_bits_known0_1(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_low_bits_known0_1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, 4294967296 ; (1 << 32)
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_low_bits_known0_2(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_low_bits_known0_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, 8589934592 ; (1 << 33)
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_low_bits_known0_3(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_low_bits_known0_3:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, -9223372036854775808 ; (1 << 63)
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_low_bits_known0_4(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_low_bits_known0_4:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, -4294967296 ; 0xffffffff00000000
+  ret i64 %add
+}
+
+define amdgpu_ps i64 @s_add_i64_const_high_bits_known0_0(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, -1
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, 4294967295 ; (1 << 31)
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_high_bits_known0_0(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, 4294967295 ; (1 << 31)
+  ret i64 %add
+}
+
+define <2 x i64> @v_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) {
+; GFX9-LABEL: v_add_v2i64_splat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, 0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 1, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
+  ret <2 x i64> %add
+}
+
+define <2 x i64> @v_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) {
+; GFX9-LABEL: v_add_v2i64_nonsplat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, 0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 2, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
+  ret <2 x i64> %add
+}
+
+define amdgpu_ps <2 x i64> @s_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
+; GFX9-LABEL: s_add_v2i64_splat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-NEXT:    s_add_u32 s2, s2, 0
+; GFX9-NEXT:    s_addc_u32 s3, s3, 1
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
+  ret <2 x i64> %add
+}
+
+define amdgpu_ps <2 x i64> @s_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
+; GFX9-LABEL: s_add_v2i64_nonsplat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-NEXT:    s_add_u32 s2, s2, 0
+; GFX9-NEXT:    s_addc_u32 s3, s3, 2
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
+  ret <2 x i64> %add
+}
+
+; We could reduce this to use a 32-bit add if we use computeKnownBits
+define i64 @v_add_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
+; GFX9-LABEL: v_add_i64_variable_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset.hi32 = zext i32 %offset.hi32 to i64
+  %in.high.bits = shl i64 %zext.offset.hi32, 32
+  %add = add i64 %reg, %in.high.bits
+  ret i64 %add
+}
+
+; We could reduce this to use a 32-bit add if we use computeKnownBits
+define amdgpu_ps i64 @s_add_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) {
+; GFX9-LABEL: s_add_i64_variable_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, s2
+; GFX9-NEXT:    ; return to shader part epilog
+  %zext.offset.hi32 = zext i32 %offset.hi32 to i64
+  %in.high.bits = shl i64 %zext.offset.hi32, 32
+  %add = add i64 %reg, %in.high.bits
+  ret i64 %add
+}
diff --git a/llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll b/llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll
new file mode 100644
index 00000000000000..779c4aef647b41
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll
@@ -0,0 +1,213 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+
+; Reduce a 64-bit sub by a constant if we know the low 32-bits are all
+; zero.
+
+; sub i64:x, K if computeTrailingZeros(K) >= 32
+; => build_pair (sub x.hi, K.hi), x.lo
+
+define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_0(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0xfffc0000
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, 1125899906842624 ; (1 << 50)
+  ret i64 %sub
+}
+
+define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_1(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_low_bits_known0_1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, 4294967296 ; (1 << 32)
+  ret i64 %sub
+}
+
+define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_2(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_low_bits_known0_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, -2
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, 8589934592 ; (1 << 33)
+  ret i64 %sub
+}
+
+define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_3(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_low_bits_known0_3:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, -9223372036854775808 ; (1 << 63)
+  ret i64 %sub
+}
+
+define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_4(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_low_bits_known0_4:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, -4294967296 ; 0xffffffff00000000
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_low_bits_known0_0(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0xfffc0000
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, 1125899906842624 ; (1 << 50)
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_low_bits_known0_1(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_low_bits_known0_1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, 4294967296 ; (1 << 32)
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_low_bits_known0_2(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_low_bits_known0_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, 8589934592 ; (1 << 33)
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_low_bits_known0_3(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_low_bits_known0_3:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, -9223372036854775808 ; (1 << 63)
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_low_bits_known0_4(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_low_bits_known0_4:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, -4294967296 ; 0xffffffff00000000
+  ret i64 %sub
+}
+
+define amdgpu_ps i64 @s_sub_i64_const_high_bits_known0_0(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 1
+; GFX9-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, 4294967295 ; (1 << 31)
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_high_bits_known0_0(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, 4294967295 ; (1 << 31)
+  ret i64 %sub
+}
+
+define <2 x i64> @v_sub_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) {
+; GFX9-LABEL: v_sub_v2i64_splat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, 0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
+  ret <2 x i64> %sub
+}
+
+define <2 x i64> @v_sub_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) {
+; GFX9-LABEL: v_sub_v2i64_nonsplat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, 0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, -2, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
+  ret <2 x i64> %sub
+}
+
+define amdgpu_ps <2 x i64> @s_sub_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
+; GFX9-LABEL: s_sub_v2i64_splat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-NEXT:    s_add_u32 s2, s2, 0
+; GFX9-NEXT:    s_addc_u32 s3, s3, -1
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
+  ret <2 x i64> %sub
+}
+
+define amdgpu_ps <2 x i64> @s_sub_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
+; GFX9-LABEL: s_sub_v2i64_nonsplat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-NEXT:    s_add_u32 s2, s2, 0
+; GFX9-NEXT:    s_addc_u32 s3, s3, -2
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
+  ret <2 x i64> %sub
+}
+
+; We could reduce this to use a 32-bit sub if we use computeKnownBits
+define i64 @v_sub_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
+; GFX9-LABEL: v_sub_i64_variable_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_subrev_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset.hi32 = zext i32 %offset.hi32 to i64
+  %in.high.bits = shl i64 %zext.offset.hi32, 32
+  %sub = sub i64 %reg, %in.high.bits
+  ret i64 %sub
+}
+
+; We could reduce this to use a 32-bit sub if we use computeKnownBits
+define amdgpu_ps i64 @s_sub_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) {
+; GFX9-LABEL: s_sub_i64_variable_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_sub_u32 s0, s0, 0
+; GFX9-NEXT:    s_subb_u32 s1, s1, s2
+; GFX9-NEXT:    ; return to shader part epilog
+  %zext.offset.hi32 = zext i32 %offset.hi32 to i64
+  %in.high.bits = shl i64 %zext.offset.hi32, 32
+  %sub = sub i64 %reg, %in.high.bits
+  ret i64 %sub
+}

@arsenm arsenm marked this pull request as ready for review January 8, 2025 05:19
Copy link
Contributor Author

arsenm commented Jan 8, 2025

Merge activity

  • Jan 8, 10:24 AM EST: A user started a stack merge that includes this pull request via Graphite.
  • Jan 8, 10:27 AM EST: Graphite rebased this pull request as part of a merge.
  • Jan 8, 10:30 AM EST: A user merged this pull request with Graphite.

Add baseline test for 64-bit adds when the low half of
an operand is known 0.
@arsenm arsenm force-pushed the users/arsenm/rocm-llvm-237/add-baseline-test branch from 4aa270b to 12b7f07 Compare January 8, 2025 15:26
@arsenm arsenm merged commit 6376418 into main Jan 8, 2025
5 of 7 checks passed
@arsenm arsenm deleted the users/arsenm/rocm-llvm-237/add-baseline-test branch January 8, 2025 15:30
shenhanc78 pushed a commit to shenhanc78/llvm-project that referenced this pull request Jan 8, 2025
Add baseline test for 64-bit adds when the low half of
an operand is known 0.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants