Skip to content

Commit

Permalink
[AIE2] Skip copies and bitcasts when combining loads/stores
Browse files Browse the repository at this point in the history
  • Loading branch information
andcarminati committed Oct 11, 2024
1 parent af61e7b commit 86a2d53
Show file tree
Hide file tree
Showing 9 changed files with 280 additions and 8 deletions.
12 changes: 6 additions & 6 deletions llvm/lib/Target/AIE/AIE2InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1601,7 +1601,7 @@ void AIE2InstructionSelector::makeDeadMI(MachineInstr &MI,
bool AIE2InstructionSelector::selectG_AIE_LOAD_UNPACK(
MachineInstr &UNPACKI, MachineRegisterInfo &MRI) {
Register LoadResult = (std::next(UNPACKI.uses().begin()))->getReg();
MachineInstr *LoadOp = MRI.getUniqueVRegDef(LoadResult);
MachineInstr *LoadOp = getDefIgnoringCopiesAndBitcasts(LoadResult, MRI);

assert(LoadOp && "Expected SSA.");

Expand Down Expand Up @@ -2771,7 +2771,7 @@ bool AIE2InstructionSelector::selectG_AIE_LOAD_UPS(MachineInstr &UPSI,

// First use is the G_INTRINSIC_W_SIDE_EFFECTS ID
Register LoadResult = (std::next(UPSI.uses().begin()))->getReg();
MachineInstr *LoadOp = MRI.getUniqueVRegDef(LoadResult);
MachineInstr *LoadOp = getDefIgnoringCopiesAndBitcasts(LoadResult, MRI);

assert(LoadOp && "Expected SSA.");

Expand Down Expand Up @@ -3993,7 +3993,7 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_PACK(MachineInstr &StoreI,
MachineRegisterInfo &MRI) {

Register PackResult = (StoreI.uses().begin())->getReg();
MachineInstr *PackOp = MRI.getUniqueVRegDef(PackResult);
MachineInstr *PackOp = getDefIgnoringCopiesAndBitcasts(PackResult, MRI);

assert(PackOp && "Expected SSA.");

Expand Down Expand Up @@ -4145,7 +4145,7 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_SRS(MachineInstr &StoreI,
MachineRegisterInfo &MRI) {

Register SrsResult = (StoreI.uses().begin())->getReg();
MachineInstr *SrsOp = MRI.getUniqueVRegDef(SrsResult);
MachineInstr *SrsOp = getDefIgnoringCopiesAndBitcasts(SrsResult, MRI);

assert(SrsOp && "Expected SSA.");

Expand Down Expand Up @@ -4252,7 +4252,7 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_CONV(MachineInstr &StoreI,
MachineRegisterInfo &MRI) {

Register ConvResult = (StoreI.uses().begin())->getReg();
MachineInstr *ConvOp = MRI.getUniqueVRegDef(ConvResult);
MachineInstr *ConvOp = getDefIgnoringCopiesAndBitcasts(ConvResult, MRI);

assert(ConvOp && "Expected SSA.");

Expand Down Expand Up @@ -4511,7 +4511,7 @@ bool canCombineCONVLoad(MachineInstr &MemOp, MachineInstr &CombOp) {
bool AIE2InstructionSelector::selectG_AIE_LOAD_CONV(MachineInstr &CONVI,
MachineRegisterInfo &MRI) {
Register LoadResult = (std::next(CONVI.uses().begin()))->getReg();
MachineInstr *LoadOp = MRI.getUniqueVRegDef(LoadResult);
MachineInstr *LoadOp = getDefIgnoringCopiesAndBitcasts(LoadResult, MRI);

assert(LoadOp && "Expected SSA.");

Expand Down
54 changes: 53 additions & 1 deletion llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,32 @@ bool isUseOf(const MachineInstr &MI, const MachineInstr &Use) {
return false;
}

/// Check for dead \a InBetweenMI MI and copy-like instructions that can be
/// coalesced once \a MemI and \a Dest are combined.
bool isNonCoalesceableUseOf(const MachineInstr &MemI,
const MachineInstr &InBetweenMI,
const MachineInstr &Dest,
const MachineRegisterInfo &MRI) {

if (isTriviallyDead(InBetweenMI, MRI))
return false;

// We can delay an instruction after a copy, if the copy just
// connects MemI and Dest. After combining, this copy will be dead.
if (InBetweenMI.isCopy() &&
MRI.hasOneNonDBGUse(InBetweenMI.getOperand(1).getReg()) &&
MRI.hasOneNonDBGUse(InBetweenMI.getOperand(0).getReg())) {
const MachineInstr *CopyOrignMI =
MRI.getVRegDef(InBetweenMI.getOperand(1).getReg());
const MachineInstr *CopyDestMI =
&*MRI.use_instr_nodbg_begin(InBetweenMI.getOperand(0).getReg());
if (CopyOrignMI == &MemI && CopyDestMI == &Dest)
return false;
}

return isUseOf(InBetweenMI, MemI);
}

/// \return true if \a MemI can be moved just before \a Dest in order to allow
/// post-increment combining
bool llvm::canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
Expand All @@ -87,12 +113,38 @@ bool llvm::canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
auto InstrRange = make_range(MII, MIE);
bool SawStore = MemI.mayStore();
auto UnsafeToMovePast = [&](const MachineInstr &MI) {
return (isUseOf(MI, MemI) && !isTriviallyDead(MI, MRI)) ||
return isNonCoalesceableUseOf(MemI, MI, Dest, MRI) ||
!MI.isSafeToMove(nullptr, SawStore);
};
return none_of(InstrRange, UnsafeToMovePast);
}

/// Find the def instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
MachineInstr *
llvm::getDefIgnoringCopiesAndBitcasts(Register Reg,
const MachineRegisterInfo &MRI) {

MachineInstr *DefInstr = MRI.getVRegDef(Reg);

auto IsSingleUseCopyOrBitcast = [&](const MachineInstr *MI) {
return (MI->isCopy() ||
(DefInstr->getOpcode() == TargetOpcode::G_BITCAST)) &&
MRI.hasOneNonDBGUse(MI->getOperand(0).getReg());
};

auto UseVirtReg = [&](const MachineInstr *MI) {
return MI->getOperand(1).getReg().isVirtual();
};

// No other use for this copy/bitcast.
// Stop if we reach an use of a physical register.
while (DefInstr && IsSingleUseCopyOrBitcast(DefInstr) && UseVirtReg(DefInstr))
DefInstr = MRI.getVRegDef(DefInstr->getOperand(1).getReg());

return DefInstr;
}

MachineInstr *findLastRegUseInBB(Register Reg, MachineInstr &IgnoreUser,
MachineRegisterInfo &MRI,
CombinerHelper &Helper,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AIE/AIECombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ bool matchGlobalValOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
/// post-increment combining
bool canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
MachineRegisterInfo &MRI);
/// Find the def instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
MachineInstr *getDefIgnoringCopiesAndBitcasts(Register Reg,
const MachineRegisterInfo &MRI);

class InstrNode {
MachineInstr *BaseNode;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,3 +215,27 @@ body: |
%11:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.pack.I4.I8), %1:vregbank(<32 x s16>), %6:gprregbank(s32)
G_AIE_OFFSET_STORE %11:vregbank(<32 x s8>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<32 x s8>))
...

---
name: VST_PACK_D8_D16_COPY_BITCAST
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $p0, $r0, $x0
; CHECK-LABEL: name: VST_PACK_D8_D16_COPY_BITCAST
; CHECK: liveins: $p0, $r0, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: VST_PACK_D8_D16_ag_idx_imm [[COPY1]], 96, [[COPY]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>))
%0:vregbank(<32 x s16>) = COPY $x0
%2:ptrregbank(p0) = COPY $p0
%6:gprregbank(s32) = G_CONSTANT i32 0
%5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.pack.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32)
%3:modregbank(s20) = G_CONSTANT i20 96
%100:vregbank(<8 x s32>) = G_BITCAST %5(<32 x s8>)
%110:vregbank(<8 x s32>) = COPY %100(<8 x s32>)
G_AIE_OFFSET_STORE %110:vregbank(<8 x s32>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<8 x s32>))
...
Original file line number Diff line number Diff line change
Expand Up @@ -1188,3 +1188,29 @@ body: |
G_AIE_OFFSET_STORE %107, %0, %11 :: (store (<16 x s32>) into stack - 64)
G_AIE_OFFSET_STORE %108, %0, %12 :: (store (<16 x s32>) into stack - 64)
...

---
name: VST_SRS_D8_S32_COPY_BITCAST
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $cm0, $p0, $r1
; CHECK-LABEL: name: VST_SRS_D8_S32_COPY_BITCAST
; CHECK: liveins: $cm0, $p0, $r1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:edj = COPY $m0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:mss = COPY [[COPY2]]
; CHECK-NEXT: VST_SRS_D8_S32_ag_idx [[COPY]], [[COPY1]], %5:acc1024, [[COPY3]], implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>))
%0:ptrregbank(p0) = COPY $p0
%7:modregbank(s20) = COPY $m0
%101:gprregbank(s32) = COPY $r1
%102:gprregbank(s32) = G_CONSTANT i32 0
%103:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.I256.v32.acc32.srs), %100:accregbank(<16 x s64>), %101:gprregbank(s32), %102:gprregbank(s32)
%144:vregbank(<8 x s32>) = G_BITCAST %103(<32 x s8>)
%201:vregbank(<8 x s32>) = COPY %144(<8 x s32>)
G_AIE_OFFSET_STORE %201(<8 x s32>), %0, %7 :: (store (<8 x s32>))
...
43 changes: 43 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-vlda_conv.mir
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,46 @@ body: |
%3:accregbank(<8 x s64>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bf16.to.v16accfloat), %2:vregbank(<16 x s16>)
PseudoRET implicit $lr, implicit %3
...

---
name: VLDA_CONV_COPY
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0, $r0
; CHECK-LABEL: name: VLDA_CONV_COPY
; CHECK: liveins: $p0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[VLDA_CONV_FP32_BF16_ag_idx_imm:%[0-9]+]]:acc512 = VLDA_CONV_FP32_BF16_ag_idx_imm [[COPY]], 0 :: (load (<16 x s16>))
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDA_CONV_FP32_BF16_ag_idx_imm]]
%0:ptrregbank(p0) = COPY $p0
%1:vregbank(<16 x s16>) = G_LOAD %0:ptrregbank(p0) :: (load (<16 x s16>))
%2:vregbank(<16 x s16>) = COPY %1
%3:accregbank(<8 x s64>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bf16.to.v16accfloat), %2:vregbank(<16 x s16>)
PseudoRET implicit $lr, implicit %3
...

---
name: VLDA_CONV_COPY_no_single_use
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0, $r0
; CHECK-LABEL: name: VLDA_CONV_COPY_no_single_use
; CHECK: liveins: $p0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[VLD_idx_imm_3x32_pseudo:%[0-9]+]]:vec256 = VLD_idx_imm_3x32_pseudo [[COPY]], 0 :: (load (<16 x s16>))
; CHECK-NEXT: [[VCONV_FP32_BF16_:%[0-9]+]]:acc512 = VCONV_FP32_BF16 [[VLD_idx_imm_3x32_pseudo]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VCONV_FP32_BF16_]], implicit [[VLD_idx_imm_3x32_pseudo]]
%0:ptrregbank(p0) = COPY $p0
%1:vregbank(<16 x s16>) = G_LOAD %0:ptrregbank(p0) :: (load (<16 x s16>))
%2:vregbank(<16 x s16>) = COPY %1
%3:accregbank(<8 x s64>) = G_INTRINSIC intrinsic(@llvm.aie2.v16bf16.to.v16accfloat), %2:vregbank(<16 x s16>)
PseudoRET implicit $lr, implicit %3, implicit %2
...
25 changes: 25 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-vlda_ups.mir
Original file line number Diff line number Diff line change
Expand Up @@ -481,3 +481,28 @@ body: |
%7:accregbank(<16 x s64>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.acc32.v32.I512.ups), %5:vregbank(<32 x s16>), %2:gprregbank(s32), %6:gprregbank(s32)
PseudoRET implicit $lr, implicit %7
...

---
name: VLDA_UPS_S32_D16_COPY
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $p0, $r0
; CHECK-LABEL: name: VLDA_UPS_S32_D16_COPY
; CHECK: liveins: $p0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:mss = COPY [[COPY1]]
; CHECK-NEXT: [[VLDA_UPS_S32_D16_ag_idx_imm:%[0-9]+]]:acc512 = VLDA_UPS_S32_D16_ag_idx_imm [[COPY2]], [[COPY]], 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<16 x s16>))
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDA_UPS_S32_D16_ag_idx_imm]]
%1:ptrregbank(p0) = COPY $p0
%2:gprregbank(s32) = COPY $r0
%6:gprregbank(s32) = G_CONSTANT i32 0
%5:vregbank(<16 x s16>) = G_LOAD %1:ptrregbank(p0) :: (load (<16 x s16>))
%8:vregbank(<16 x s16>) = COPY %5
%7:accregbank(<8 x s64>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.acc32.v16.I256.ups), %8:vregbank(<16 x s16>), %2:gprregbank(s32), %6:gprregbank(s32)
PseudoRET implicit $lr, implicit %7
...
Original file line number Diff line number Diff line change
Expand Up @@ -171,4 +171,28 @@ body: |
PseudoRET implicit $lr, implicit $x0
...


---
name: VLDB_UNPACK_S8_S4_COPY
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $p0
; CHECK-LABEL: name: VLDB_UNPACK_S8_S4_COPY
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:edj = COPY $m0
; CHECK-NEXT: [[VLDB_UNPACK_S8_S4_ag_idx:%[0-9]+]]:vec512 = VLDB_UNPACK_S8_S4_ag_idx [[COPY]], [[COPY1]] :: (load (<32 x s8>))
; CHECK-NEXT: $x0 = COPY [[VLDB_UNPACK_S8_S4_ag_idx]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:ptrregbank(p0) = COPY $p0
%8:modregbank(s20) = COPY $m0
%6:gprregbank(s32) = G_CONSTANT i32 1
%5:vregbank(<32 x s8>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %8:modregbank(s20) :: (load (<32 x s8>))
%9:vregbank(<32 x s8>) = COPY %5
%7:vregbank(<32 x s16>) = G_INTRINSIC intrinsic(@llvm.aie2.unpack.I8.I4), %9:vregbank(<32 x s8>), %6:gprregbank(s32)
$x0 = COPY %7:vregbank(<32 x s16>)
PseudoRET implicit $lr, implicit $x0
...
74 changes: 74 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-vst_conv.mir
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,77 @@ body: |
%21:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %0, %8 :: (store (<16 x s16>))
PseudoRET implicit $lr
...

---
name: VST_CONV_COPY_BITCAST
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0, $bml0
; CHECK-LABEL: name: VST_CONV_COPY_BITCAST
; CHECK: liveins: $p0, $bml0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc512 = COPY $bml0
; CHECK-NEXT: VST_CONV_BF16_FP32_ag_idx_imm [[COPY]], 0, [[COPY1]], implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>))
; CHECK-NEXT: PseudoRET implicit $lr
%0:ptrregbank(p0) = COPY $p0
%100:accregbank(<8 x s64>) = COPY $bml0
%104:vregbank(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.v16accfloat.to.v16bf16), %100:accregbank(<8 x s64>)
%150:vregbank(<8 x s32>) = G_BITCAST %104(<16 x s16>)
%200:vregbank(<8 x s32>) = COPY %150(<8 x s32>)
G_STORE %200, %0 :: (store (<8 x s32>))
PseudoRET implicit $lr
...

---
name: VST_CONV_COPY_BITCAST_no_single_use_bit_cast
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0, $bml0
; CHECK-LABEL: name: VST_CONV_COPY_BITCAST_no_single_use_bit_cast
; CHECK: liveins: $p0, $bml0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc512 = COPY $bml0
; CHECK-NEXT: [[VCONV_BF16_FP32_:%[0-9]+]]:vec256 = VCONV_BF16_FP32 [[COPY1]], implicit-def dead $srf2fflags, implicit $crf2fmask, implicit $crrnd
; CHECK-NEXT: VST_dmw_sts_w_ag_idx_imm [[VCONV_BF16_FP32_]], [[COPY]], 0 :: (store (<8 x s32>))
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VCONV_BF16_FP32_]]
%0:ptrregbank(p0) = COPY $p0
%100:accregbank(<8 x s64>) = COPY $bml0
%104:vregbank(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.v16accfloat.to.v16bf16), %100:accregbank(<8 x s64>)
%150:vregbank(<8 x s32>) = G_BITCAST %104(<16 x s16>)
%200:vregbank(<8 x s32>) = COPY %150(<8 x s32>)
G_STORE %200, %0 :: (store (<8 x s32>))
PseudoRET implicit $lr, implicit %150
...

---
name: VST_CONV_COPY_BITCAST_no_single_use_copy
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0, $bml0
; CHECK-LABEL: name: VST_CONV_COPY_BITCAST_no_single_use_copy
; CHECK: liveins: $p0, $bml0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc512 = COPY $bml0
; CHECK-NEXT: [[VCONV_BF16_FP32_:%[0-9]+]]:vec256 = VCONV_BF16_FP32 [[COPY1]], implicit-def dead $srf2fflags, implicit $crf2fmask, implicit $crrnd
; CHECK-NEXT: VST_dmw_sts_w_ag_idx_imm [[VCONV_BF16_FP32_]], [[COPY]], 0 :: (store (<8 x s32>))
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VCONV_BF16_FP32_]]
%0:ptrregbank(p0) = COPY $p0
%100:accregbank(<8 x s64>) = COPY $bml0
%104:vregbank(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.v16accfloat.to.v16bf16), %100:accregbank(<8 x s64>)
%150:vregbank(<8 x s32>) = G_BITCAST %104(<16 x s16>)
%200:vregbank(<8 x s32>) = COPY %150(<8 x s32>)
G_STORE %200, %0 :: (store (<8 x s32>))
PseudoRET implicit $lr, implicit %200
...

0 comments on commit 86a2d53

Please sign in to comment.