diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td index 8b35540e418c..3fea08efe3bb 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td @@ -27,6 +27,39 @@ def : Pat<(int_aie2p_sched_barrier), (SCHED_BARRIER)>; // A bare frameindex. Placeholder until eliminateFrameIndex. def : Pat<(ptr0 frameindex:$fi), (PseudoFI(frameindex_to_targetframeindex $fi))>; +class VecConf { + int AMODE_I32 = 0; + int AMODE_I64 = 1; + int AMODE_FP32 = 2; + + int BMODE_8x4 = 0; + int BMODE_8x8 = 1; + int BMODE_16x8 = 2; + int BMODE_16x16 = 3; + int BMODE_16x16_a = 3; + int BMODE_16x16_b = 1; + int BMODE_32x16 = 0; + + bits<1> dynZeroAccum = 0; // 0 – Use default first accumulator input to the post-adder. 1 – Replace default first accumulator with zeros. + bits<2> amode = 0; // Accumulator width (see above) + bits<2> bmode = 0; // Multiplication precision (see above) + bits<3> cmode = 0; // Multiplication mode + bits<1> signY = 0; // 0 – Y buffer has an unsigned datatype. 1 – Signed + bits<1> signX = 0; // 0 – X buffer has an unsigned datatype. 1 – Signed + bits<1> accShift = 0; // Accumulator left shift by 16 bits + bits<1> dynMulNeg = 0; // 0 – Do nothing. 1 – Invert instruction behavior regarding negation of the multiplier results. + bits<1> dynAcc0Neg = 0; // 0 – Do nothing. 1 – Invert instruction behavior regarding negation of the first accumulator input. + bits<1> dynAcc1Neg = 0; // 0 – Do nothing. 1 – Invert instruction behavior regarding negation of the second accumulator input. + bits<2> reserved1 = 0; + bits<8> dynTermNeg = 0; // Negation of terms in complex multiplications to allow complex handling. + bits<8> reserved2 = 0; + + bits<32> all = {reserved2, dynTermNeg, reserved1, dynAcc1Neg, dynAcc0Neg, dynMulNeg, accShift, signX, signY, cmode, bmode, amode, dynZeroAccum}; + dag ConfBits = (MOV_RLC_imm11_pseudo (i32 all)); +} + +def accfp32_vecconf : VecConf { let amode = AMODE_FP32; let bmode = BMODE_16x16; } + /// Generic pattern classes class PatGpr : Pat<(type (OpNode (type eR:$rs))), (Inst eR:$rs)>; @@ -184,6 +217,11 @@ def : Pat<(int_aie2p_ACC2048_sub_conf ACC2048:$acc1, ACC2048:$acc2, eR:$acc), def : PatInaccessibleMem<(int_aie2p_ACC2048_accfloat_sub_conf ACC2048:$acc1, ACC2048:$acc2, eR:$acc), (VSUB_f_vmac_cm2_add_reg ACC2048:$acc1, ACC2048:$acc2, eR:$acc)>; +def : Pat<(fadd ACC2048:$acc1, ACC2048:$acc2), + (VADD_f_vmac_cm2_add_reg ACC2048:$acc1, ACC2048:$acc2, (i32 accfp32_vecconf.ConfBits))>; +def : Pat<(fsub ACC2048:$acc1, ACC2048:$acc2), + (VSUB_f_vmac_cm2_add_reg ACC2048:$acc1, ACC2048:$acc2, (i32 accfp32_vecconf.ConfBits))>; + // VMUL/VMAC Intrinsics def : Pat<(int_aie2p_I1024_I1024_ACC2048_addmac_conf VEC1024:$s1, VEC1024:$s2, ACC2048:$acc1, ACC2048:$acc2, eR:$acc), diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fadd.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fadd.mir new file mode 100644 index 000000000000..fcf7d94feb91 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fadd.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates + +# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s + + +--- +name: test_fadd +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $dm0, $dm1 + ; CHECK-LABEL: name: test_fadd + ; CHECK: liveins: $dm0, $dm1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc2048 = COPY $dm1 + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 28 + ; CHECK-NEXT: [[VADD_f_vmac_cm2_add_reg:%[0-9]+]]:acc2048 = VADD_f_vmac_cm2_add_reg [[COPY]], [[COPY1]], [[MOV_RLC_imm11_pseudo]], implicit-def dead $srfpflags, implicit $crfpmask + ; CHECK-NEXT: $dm0 = COPY [[VADD_f_vmac_cm2_add_reg]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $dm0 + %0:accregbank(<64 x s32>) = COPY $dm0 + %1:accregbank(<64 x s32>) = COPY $dm1 + %2:accregbank(<64 x s32>) = G_FADD %0, %1 + $dm0 = COPY %2:accregbank(<64 x s32>) + PseudoRET implicit $lr, implicit $dm0 +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fsub.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fsub.mir new file mode 100644 index 000000000000..5ab36c64d878 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-fsub.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates + +# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s + + +--- +name: test_fsub +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $dm0, $dm1 + ; CHECK-LABEL: name: test_fsub + ; CHECK: liveins: $dm0, $dm1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc2048 = COPY $dm1 + ; CHECK-NEXT: [[MOV_RLC_imm11_pseudo:%[0-9]+]]:er = MOV_RLC_imm11_pseudo 28 + ; CHECK-NEXT: [[VSUB_f_vmac_cm2_add_reg:%[0-9]+]]:acc2048 = VSUB_f_vmac_cm2_add_reg [[COPY]], [[COPY1]], [[MOV_RLC_imm11_pseudo]], implicit-def dead $srfpflags, implicit $crfpmask + ; CHECK-NEXT: $dm0 = COPY [[VSUB_f_vmac_cm2_add_reg]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $dm0 + %0:accregbank(<64 x s32>) = COPY $dm0 + %1:accregbank(<64 x s32>) = COPY $dm1 + %2:accregbank(<64 x s32>) = G_FSUB %0, %1 + $dm0 = COPY %2:accregbank(<64 x s32>) + PseudoRET implicit $lr, implicit $dm0 +...