Skip to content

Commit

Permalink
[CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ array to vector
Browse files Browse the repository at this point in the history
According to the specification in
ARM-software/acle#309 this adds the intrinsics

Move and zero multiple ZA single-vector groups to vector registers

// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
// _za64_s64, _za64_u64 and _za64_f64
svint8x2_t svreadz_za8_s8_vg1x2(uint32_t slice)
__arm_streaming __arm_inout("za");

// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
// _za64_s64, _za64_u64 and _za64_f64
svint8x4_t svreadz_za8_s8_vg1x4(uint32_t slice)
__arm_streaming __arm_inout("za");
  • Loading branch information
CarolineConcatto committed Apr 16, 2024
1 parent 66453f2 commit 560f36d
Show file tree
Hide file tree
Showing 9 changed files with 1,082 additions and 6 deletions.
12 changes: 12 additions & 0 deletions clang/include/clang/Basic/arm_sme.td
Original file line number Diff line number Diff line change
Expand Up @@ -674,3 +674,15 @@ let TargetGuard = "sme2" in {
def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
}

multiclass ZAReadz<string vg_num>{
let TargetGuard = "sme2p1" in {
def NAME # _B : SInst<"svreadz_za8_{d}_vg1x" # vg_num, vg_num # "m", "cUc", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _H : SInst<"svreadz_za16_{d}_vg1x" # vg_num, vg_num # "m", "sUsbh", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _S : SInst<"svreadz_za32_{d}_vg1x" # vg_num, vg_num # "m", "iUif", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
def NAME # _D : SInst<"svreadz_za64_{d}_vg1x" # vg_num, vg_num # "m", "lUld", MergeNone, "aarch64_sme_readz_x" # vg_num, [IsStreaming, IsInOutZA]>;
}
}

defm SVREADZ_VG2 : ZAReadz<"2">;
defm SVREADZ_VG4 : ZAReadz<"4">;
711 changes: 711 additions & 0 deletions clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_movaz.c

Large diffs are not rendered by default.

13 changes: 12 additions & 1 deletion llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -2839,6 +2839,17 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic;
def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic;


def int_aarch64_sme_readz_x2
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects]>;

def int_aarch64_sme_readz_x4
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[llvm_i32_ty],
[IntrNoMem, IntrHasSideEffects]>;

def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;

class SME_OuterProduct_Intrinsic
Expand Down Expand Up @@ -3646,4 +3657,4 @@ def int_aarch64_sve_pmov_to_pred_lane_zero : SVE2_1VectorArg_Pred_Intrinsic;

def int_aarch64_sve_pmov_to_vector_lane_merging : SVE2_Pred_1VectorArgIndexed_Intrinsic;

def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic;
def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic;
75 changes: 74 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,9 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
template <unsigned MaxIdx, unsigned Scale>
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
unsigned Op);

template <unsigned MaxIdx, unsigned Scale>
void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, unsigned BaseReg,
unsigned Op);
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
/// SVE Reg+Imm addressing mode.
template <int64_t Min, int64_t Max>
Expand Down Expand Up @@ -1985,6 +1987,35 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
CurDAG->RemoveDeadNode(N);
}

template <unsigned MaxIdx, unsigned Scale>
void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
unsigned BaseReg,
unsigned Op) {

SDValue SliceBase = N->getOperand(2);

SDValue Base, Offset;
if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
return;
// The correct Za tile number is computed in Machine Instruction
// See EmitTileMovaz
// DAG cannot select Za tile as an output register with ZReg
SDLoc DL(N);
SDValue Ops[] = {Base, Offset, /*Chain*/ N->getOperand(0)};
SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);

EVT VT = N->getValueType(0);
for (unsigned I = 0; I < NumVecs; ++I)
ReplaceUses(SDValue(N, I),
CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
SDValue(Mov, 0)));

// Copy chain
unsigned ChainIdx = NumVecs;
ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
CurDAG->RemoveDeadNode(N);
}

void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
unsigned NumOutVecs,
bool IsTupleInput,
Expand Down Expand Up @@ -5175,6 +5206,48 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::MOVA_VG4_4ZMXI);
return;
}
case Intrinsic::aarch64_sme_readz_x2: {
if (VT == MVT::nxv16i8) {
SelectMultiVectorMoveZ<7, 1>(Node, 2, AArch64::ZA,
AArch64::MOVAZ_VG2_2ZMXI_B_PSEUDO);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
SelectMultiVectorMoveZ<7, 1>(Node, 2, AArch64::ZA,
AArch64::MOVAZ_VG2_2ZMXI_H_PSEUDO);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
SelectMultiVectorMoveZ<7, 1>(Node, 2, AArch64::ZA,
AArch64::MOVAZ_VG2_2ZMXI_S_PSEUDO);
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
SelectMultiVectorMoveZ<7, 1>(Node, 2, AArch64::ZA,
AArch64::MOVAZ_VG2_2ZMXI_D_PSEUDO);
return;
}
break;
}
case Intrinsic::aarch64_sme_readz_x4: {
if (VT == MVT::nxv16i8) {
SelectMultiVectorMoveZ<7, 1>(Node, 4, AArch64::ZA,
AArch64::MOVAZ_VG4_4ZMXI_B_PSEUDO);
return;
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
VT == MVT::nxv8bf16) {
SelectMultiVectorMoveZ<7, 1>(Node, 4, AArch64::ZA,
AArch64::MOVAZ_VG4_4ZMXI_H_PSEUDO);
return;
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
SelectMultiVectorMoveZ<7, 1>(Node, 4, AArch64::ZA,
AArch64::MOVAZ_VG4_4ZMXI_S_PSEUDO);
return;
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
SelectMultiVectorMoveZ<7, 1>(Node, 4, AArch64::ZA,
AArch64::MOVAZ_VG4_4ZMXI_D_PSEUDO);
return;
}
break;
}
case Intrinsic::swift_async_context_addr: {
SDLoc DL(Node);
SDValue Chain = Node->getOperand(0);
Expand Down
26 changes: 26 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2832,6 +2832,22 @@ AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
return BB;
}

MachineBasicBlock *
AArch64TargetLowering::EmitTileMovaz(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));

MIB.add(MI.getOperand(0)); // ZReg
MIB.addReg(BaseReg, RegState::Define); // add as output
MIB.addReg(BaseReg); // add as input
MIB.add(MI.getOperand(1)); // slice index register
MIB.add(MI.getOperand(2)); // slice index offset
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}

MachineBasicBlock *
AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
Expand Down Expand Up @@ -2992,6 +3008,16 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
return EmitZero(MI, BB);
case AArch64::ZERO_T_PSEUDO:
return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true);
case AArch64::MOVAZ_VG2_2ZMXI_B_PSEUDO:
case AArch64::MOVAZ_VG2_2ZMXI_H_PSEUDO:
case AArch64::MOVAZ_VG2_2ZMXI_S_PSEUDO:
case AArch64::MOVAZ_VG2_2ZMXI_D_PSEUDO:
return EmitTileMovaz(AArch64::MOVAZ_VG2_2ZMXI, AArch64::ZA, MI, BB);
case AArch64::MOVAZ_VG4_4ZMXI_B_PSEUDO:
case AArch64::MOVAZ_VG4_4ZMXI_H_PSEUDO:
case AArch64::MOVAZ_VG4_4ZMXI_S_PSEUDO:
case AArch64::MOVAZ_VG4_4ZMXI_D_PSEUDO:
return EmitTileMovaz(AArch64::MOVAZ_VG4_4ZMXI, AArch64::ZA, MI, BB);
}
}

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,9 @@ class AArch64TargetLowering : public TargetLowering {
MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitTileMovaz(unsigned Opc, unsigned BaseReg,
MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
MachineInstr &MI, MachineBasicBlock *BB,
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -780,8 +780,8 @@ let Predicates = [HasSME2p1] in {
defm MOVAZ_ZMI : sme2p1_movaz_tile_to_vec<"movaz">;
defm MOVAZ_2ZMI : sme2p1_movaz_tile_to_vec_vg2<"movaz">;
defm MOVAZ_4ZMI : sme2p1_movaz_tile_to_vec_vg4<"movaz">;
defm MOVAZ_VG2_2ZM : sme2_movaz_array_to_vec_vg2_multi<"movaz">;
defm MOVAZ_VG4_4ZM : sme2_mova_array_to_vec_vg4_multi<0b1100, "movaz">;
defm MOVAZ_VG2_2ZMXI : sme2_movaz_array_to_vec_vg2_multi<"movaz">;
defm MOVAZ_VG4_4ZMXI : sme2_movaz_array_to_vec_vg4_multi<"movaz">;

defm ZERO_MXI : sme2p1_zero_matrix<"zero">;

Expand Down
22 changes: 20 additions & 2 deletions llvm/lib/Target/AArch64/SMEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, Re
let usesCustomInserter = 1;
}

class sme2_movaz_array_to_tile_pseudo<string name, Operand index_ty, RegisterOperand multi_vector_ty,
SMEMatrixTypeEnum za_flag>
: SMEPseudo2Instr<name, 0>,
Pseudo<(outs multi_vector_ty:$Zd), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3), []> {
let SMEMatrixType = za_flag;
let usesCustomInserter = 1;
}
//===----------------------------------------------------------------------===//
// SME pattern match helpers.
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -4263,8 +4270,11 @@ multiclass sme2_mova_array_to_vec_vg2_multi<bits<3> opc, string mnemonic> {
}

multiclass sme2_movaz_array_to_vec_vg2_multi<string mnemonic> {
def NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>;
def NAME # _PSEUDO : sme2_movaz_to_tile_multi_pseudo
defm NAME : sme2_mova_array_to_vec_vg2_multi<0b010, mnemonic>;
def NAME # _B_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, SMEMatrixArray>;
def NAME # _H_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_h_mul_r, SMEMatrixArray>;
def NAME # _S_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_s_mul_r, SMEMatrixArray>;
def NAME # _D_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
}

// move array to vector, four registers
Expand Down Expand Up @@ -4342,6 +4352,14 @@ multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
}
}

multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> {
defm NAME : sme2_mova_array_to_vec_vg4_multi<0b1100, mnemonic>;
def NAME # _B_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_b_mul_r, SMEMatrixArray>;
def NAME # _H_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_h_mul_r, SMEMatrixArray>;
def NAME # _S_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_s_mul_r, SMEMatrixArray>;
def NAME # _D_PSEUDO : sme2_movaz_array_to_tile_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
}

//===----------------------------------------------------------------------===//
// SME2 multi-vec saturating shift right narrow
class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
Expand Down
Loading

0 comments on commit 560f36d

Please sign in to comment.