[AIE2P] Instruction selection support for G_FADD/G_FSUB

Xilinx · Jan 30, 2025 · b8f9873 · b8f9873
1 parent ed34d7c
commit b8f9873
Show file tree

Hide file tree

Showing 3 changed files with 106 additions and 0 deletions.
diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td
@@ -27,6 +27,39 @@ def : Pat<(int_aie2p_sched_barrier), (SCHED_BARRIER)>;
 // A bare frameindex. Placeholder until eliminateFrameIndex.
 def : Pat<(ptr0 frameindex:$fi), (PseudoFI(frameindex_to_targetframeindex $fi))>;
 
+class VecConf {
+  int AMODE_I32  = 0;
+  int AMODE_I64  = 1;
+  int AMODE_FP32 = 2;
+
+  int BMODE_8x4     = 0;
+  int BMODE_8x8     = 1;
+  int BMODE_16x8    = 2;
+  int BMODE_16x16   = 3;
+  int BMODE_16x16_a = 3;
+  int BMODE_16x16_b = 1;
+  int BMODE_32x16   = 0;
+
+  bits<1> dynZeroAccum = 0;  // 0 – Use default first accumulator input to the post-adder. 1 – Replace default first accumulator with zeros.
+  bits<2> amode        = 0;  // Accumulator width (see above)
+  bits<2> bmode        = 0;  // Multiplication precision (see above)
+  bits<3> cmode        = 0;  // Multiplication mode
+  bits<1> signY        = 0;  // 0 – Y buffer has an unsigned datatype. 1 – Signed
+  bits<1> signX        = 0;  // 0 – X buffer has an unsigned datatype. 1 – Signed
+  bits<1> accShift     = 0;  // Accumulator left shift by 16 bits
+  bits<1> dynMulNeg    = 0;  // 0 – Do nothing. 1 – Invert instruction behavior regarding negation of the multiplier results.
+  bits<1> dynAcc0Neg   = 0;  // 0 – Do nothing. 1 – Invert instruction behavior regarding negation of the first accumulator input.
+  bits<1> dynAcc1Neg   = 0;  // 0 – Do nothing. 1 – Invert instruction behavior regarding negation of the second accumulator input.
+  bits<2> reserved1    = 0;
+  bits<8> dynTermNeg   = 0;  // Negation of terms in complex multiplications to allow complex handling.
+  bits<8> reserved2    = 0;
+
+  bits<32> all = {reserved2, dynTermNeg, reserved1, dynAcc1Neg, dynAcc0Neg, dynMulNeg, accShift, signX, signY, cmode, bmode, amode, dynZeroAccum};
+  dag ConfBits = (MOV_RLC_imm11_pseudo (i32 all));
+}
+
+def accfp32_vecconf : VecConf { let amode = AMODE_FP32; let bmode = BMODE_16x16; }
+
 /// Generic pattern classes
 class PatGpr<SDPatternOperator OpNode, AIE2PInst Inst, ValueType type>
     : Pat<(type (OpNode (type eR:$rs))), (Inst eR:$rs)>;
@@ -184,6 +217,11 @@ def : Pat<(int_aie2p_ACC2048_sub_conf ACC2048:$acc1, ACC2048:$acc2, eR:$acc),
 def : PatInaccessibleMem<(int_aie2p_ACC2048_accfloat_sub_conf ACC2048:$acc1, ACC2048:$acc2, eR:$acc),
            (VSUB_f_vmac_cm2_add_reg ACC2048:$acc1, ACC2048:$acc2, eR:$acc)>;
 
+def : Pat<(fadd ACC2048:$acc1, ACC2048:$acc2),
+          (VADD_f_vmac_cm2_add_reg ACC2048:$acc1, ACC2048:$acc2, (i32 accfp32_vecconf.ConfBits))>;
+def : Pat<(fsub ACC2048:$acc1, ACC2048:$acc2),
+          (VSUB_f_vmac_cm2_add_reg ACC2048:$acc1, ACC2048:$acc2, (i32 accfp32_vecconf.ConfBits))>;
+
 // VMUL/VMAC Intrinsics
 
 def : Pat<(int_aie2p_I1024_I1024_ACC2048_addmac_conf VEC1024:$s1, VEC1024:$s2, ACC2048:$acc1, ACC2048:$acc2, eR:$acc),

diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fadd.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fadd.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+#
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s
+
+
+---
+name:            test_fadd
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $dm0, $dm1
+    ; CHECK-LABEL: name: test_fadd
+    ; CHECK: liveins: $dm0, $dm1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc2048 = COPY $dm1
+    ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 28
+    ; CHECK-NEXT: [[VADD_f_vmac_cm2_add_reg:%[0-9]+]]:acc2048 = VADD_f_vmac_cm2_add_reg [[COPY]], [[COPY1]], [[MOV_RLC_imm11_pseudo]], implicit-def dead $srfpflags, implicit $crfpmask
+    ; CHECK-NEXT: $dm0 = COPY [[VADD_f_vmac_cm2_add_reg]]
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit $dm0
+    %0:accregbank(<64 x s32>) = COPY $dm0
+    %1:accregbank(<64 x s32>) = COPY $dm1
+    %2:accregbank(<64 x s32>) = G_FADD %0, %1
+    $dm0 = COPY %2:accregbank(<64 x s32>)
+    PseudoRET implicit $lr, implicit $dm0
+...
diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fsub.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fsub.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+#
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
+
+# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s
+
+
+---
+name:            test_fsub
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $dm0, $dm1
+    ; CHECK-LABEL: name: test_fsub
+    ; CHECK: liveins: $dm0, $dm1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc2048 = COPY $dm1
+    ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 28
+    ; CHECK-NEXT: [[VSUB_f_vmac_cm2_add_reg:%[0-9]+]]:acc2048 = VSUB_f_vmac_cm2_add_reg [[COPY]], [[COPY1]], [[MOV_RLC_imm11_pseudo]], implicit-def dead $srfpflags, implicit $crfpmask
+    ; CHECK-NEXT: $dm0 = COPY [[VSUB_f_vmac_cm2_add_reg]]
+    ; CHECK-NEXT: PseudoRET implicit $lr, implicit $dm0
+    %0:accregbank(<64 x s32>) = COPY $dm0
+    %1:accregbank(<64 x s32>) = COPY $dm1
+    %2:accregbank(<64 x s32>) = G_FSUB %0, %1
+    $dm0 = COPY %2:accregbank(<64 x s32>)
+    PseudoRET implicit $lr, implicit $dm0
+...