Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMDGPU: Add v_mfma_ld_scale_b32 for gfx950 #116722

Open
wants to merge 1 commit into
base: users/arsenm/gfx950/buffer-load-lds-96-128
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1991,13 +1991,14 @@ class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,

class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
bit HasNeg,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret;

dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi);
dag vop3p_neg = !if(HasNeg, (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi), (ins));

dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg);
dag ret = !con(base, vop3pFields);
Expand Down Expand Up @@ -2191,22 +2192,22 @@ class getAsmVOPDPart <int NumSrcArgs, string XorY> {

// Returns the assembly string for the inputs and outputs of a VOP3P
// instruction.
class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasNeg,
bit HasClamp, bit HasOpSel> {
string dst = "$vdst";
string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
string dst = !if(HasDst, "$vdst"# !if(!gt(NumSrcArgs, 0), ",", ""), "");
string src0 = !if(!eq(NumSrcArgs, 1), " $src0", " $src0,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
!if(!eq(NumSrcArgs, 2), " $src1",
" $src1,"));
string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");

string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
string mods = !if(HasNeg, "$neg_lo$neg_hi", "");
string clamp = !if(HasClamp, "$clamp", "");
string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");

// Each modifier is printed as an array of bits for each operand, so
// all operands are printed as part of src0_modifiers.
string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
string ret = dst#src0#src1#src2#opsel#mods#clamp;
}

// FIXME-TRUE16 AsmVOP3OpSel will be deprecated after all
Expand Down Expand Up @@ -2267,7 +2268,7 @@ class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT

class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
bit HasOpSel, bit HasOMod, bit IsVOP3P,
bit HasModifiers, bit Src0HasMods,
bit HasNeg, bit Src0HasMods,
bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32,
bit HasByteSel = 0> {
string dst = !if(HasDst,
Expand All @@ -2294,7 +2295,7 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
string bytesel = !if(HasByteSel, "$byte_sel", "");
string 3PMods = !if(IsVOP3P,
!if(HasOpSel, "$op_sel_hi", "")
#!if(HasModifiers, "$neg_lo$neg_hi", ""),
#!if(HasNeg, "$neg_lo$neg_hi", ""),
"");
string clamp = !if(HasClamp, "$clamp", "");
string omod = !if(HasOMod, "$omod", "");
Expand Down Expand Up @@ -2554,6 +2555,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
isModifierType<Src1VT>.ret,
isModifierType<Src2VT>.ret,
HasOMod);
field bit HasNeg = HasModifiers;

field bit HasSrc0Mods = HasModifiers;
field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
Expand Down Expand Up @@ -2589,7 +2591,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
HasClamp, HasModifiers, HasSrc2Mods,
HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp, HasOpSel,
NumSrcArgs, HasClamp, HasOpSel, HasNeg,
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp, HasOMod,
Expand All @@ -2607,7 +2609,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel,
Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, HasNeg,
Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;

field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase);
Expand Down Expand Up @@ -2635,10 +2637,10 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
// the asm operand name via this HasModifiers flag
field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers,
HasOpSel, HasOMod, IsVOP3P, HasNeg, HasModifiers, HasModifiers,
HasModifiers, DstVT, IsFP8ByteSel>.ret;
field string Asm64 = AsmVOP3Base;
field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasNeg, HasClamp, HasOpSel>.ret;
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
HasClamp,
HasOMod,
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ class VOP3P_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
let AsmVOP3Base = AsmVOP3P;
}

def VOP_MFMA_LD_SCALE : VOP3P_Profile<VOPProfile<[untyped, i32, i32, untyped]>, VOP3P_LD_SCALE> {
let HasModifiers = 1;
let HasNeg = 0;
}

// Used for FMA_MIX* and MAD_MIX* insts
// Their operands are only sort of f16 operands. Depending on
// op_sel_hi, these may be interpreted as f32. The inline immediate
Expand Down Expand Up @@ -753,6 +758,10 @@ defm V_MFMA_F32_32X32X16_F16 : MAIInst<"v_mfma_f32_32x32x16f16", "F32_V8F16
defm V_MFMA_F32_32X32X16_BF16 : MAIInst<"v_mfma_f32_32x32x16bf16", "F32_V8BF16_X16", int_amdgcn_mfma_f32_32x32x16_bf16>;
}

let SubtargetPredicate = HasGFX950Insts in {
defm V_MFMA_LD_SCALE_B32 : VOP3PInst<"v_mfma_ld_scale_b32", VOP_MFMA_LD_SCALE>;
}

let Predicates = [isGFX90APlus] in {
let is_gfx940_xdl = 1 in {
defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>;
Expand Down Expand Up @@ -1792,6 +1801,10 @@ defm V_MFMA_F32_16X16X32_F16 : VOP3P_Real_MFMA_gfx950 <0x54, "v_mfma_f32_16x
defm V_MFMA_F32_32X32X16_F16 : VOP3P_Real_MFMA_gfx950 <0x55, "v_mfma_f32_32x32x16_f16">;
defm V_MFMA_F32_32X32X16_BF16 : VOP3P_Real_MFMA_gfx950 <0x37, "v_mfma_f32_32x32x16_bf16">;

let SubtargetPredicate = HasGFX950Insts in {
defm V_MFMA_LD_SCALE_B32 : VOP3P_Real_vi <0x2c>;
}

defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">;
defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
let SubtargetPredicate = HasXF32Insts in {
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/VOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ class VOP3Pe <bits<7> op, VOPProfile P> : Enc64 {
bits<2> index_key_8bit;
bits<1> index_key_16bit;

let Inst{7-0} = vdst;
let Inst{7-0} = !if(P.HasDst, vdst, 0);
let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
Expand Down Expand Up @@ -1365,6 +1365,10 @@ def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
def VOP3_MAI : VOP3Features<0, 0, 0, 1>;

// Packed is misleading, but it enables the appropriate op_sel
// modifiers.
def VOP3P_LD_SCALE : VOP3Features<0, 1, 1, 0>;

class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile<P.ArgVT> {

let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);
Expand Down
31 changes: 31 additions & 0 deletions llvm/test/MC/AMDGPU/mai-gfx950-err.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --implicit-check-not=error: %s

v_mfma_ld_scale_b32 v0, 65
// CHECK: :[[@LINE-1]]:25: error: literal operands are not supported

v_mfma_ld_scale_b32 65, v0
// CHECK: :[[@LINE-1]]:21: error: literal operands are not supported

v_mfma_ld_scale_b32 65, 65
// CHECK: :[[@LINE-1]]:25: error: literal operands are not supported

v_mfma_ld_scale_b32 s0, s1
// CHECK: :[[@LINE-1]]:25: error: invalid operand (violates constant bus restrictions)

v_mfma_ld_scale_b32 v0, v0 clamp
// CHECK: :[[@LINE-1]]:28: error: invalid operand for instruction

v_mfma_ld_scale_b32 v0, v0 neg_lo:[0,1]
// CHECK: :[[@LINE-1]]:28: error: not a valid operand

v_mfma_ld_scale_b32 v0, v0 neg_lo:[1,1]
// CHECK: :[[@LINE-1]]:28: error: not a valid operand

v_mfma_ld_scale_b32 v0, v0 neg_hi:[1,1]
// CHECK: :[[@LINE-1]]:28: error: not a valid operand

v_mfma_ld_scale_b32 v0, v0 neg_hi:[0,1]
// CHECK: :[[@LINE-1]]:28: error: not a valid operand

v_mfma_ld_scale_b32 v0, v0 neg_lo:[0,1] neg_hi:[0,1]
// CHECK: :[[@LINE-1]]:28: error: not a valid operand
117 changes: 117 additions & 0 deletions llvm/test/MC/AMDGPU/mai-gfx950.s
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,120 @@ v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15] abid:1
// GFX950: v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] cbsz:3 abid:1 ; encoding: [0x00,0x8b,0xb7,0xd3,0x00,0x01,0x02,0x1c]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] cbsz:3 abid:1

//===----------------------------------------------------------------------===//
// v_mfma_ld_scale_b32
//===----------------------------------------------------------------------===//

// GFX950: v_mfma_ld_scale_b32 v0, 64 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x81,0x01,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v0, 64

// GFX950: v_mfma_ld_scale_b32 64, v0 ; encoding: [0x00,0x40,0xac,0xd3,0xc0,0x00,0x02,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 64, v0

// GFX950: v_mfma_ld_scale_b32 64, 64 ; encoding: [0x00,0x40,0xac,0xd3,0xc0,0x80,0x01,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 64, 64

// GFX950: v_mfma_ld_scale_b32 s0, s0 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x00,0x00,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 s0, s0

// GFX950: v_mfma_ld_scale_b32 s0, v0 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x00,0x02,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 s0, v0

// GFX950: v_mfma_ld_scale_b32 v0, s0 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x01,0x00,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v0, s0

// GFX950: v_mfma_ld_scale_b32 vcc_lo, vcc_lo ; encoding: [0x00,0x40,0xac,0xd3,0x6a,0xd4,0x00,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 vcc_lo, vcc_lo

// GFX950: v_mfma_ld_scale_b32 m0, m0 ; encoding: [0x00,0x40,0xac,0xd3,0x7c,0xf8,0x00,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 m0, m0

// GFX950: v_mfma_ld_scale_b32 src_vccz, src_vccz ; encoding: [0x00,0x40,0xac,0xd3,0xfb,0xf6,0x01,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 vccz, vccz

// GFX950: v_mfma_ld_scale_b32 src_execz, src_execz ; encoding: [0x00,0x40,0xac,0xd3,0xfc,0xf8,0x01,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 execz, execz

// GFX950: v_mfma_ld_scale_b32 v0, v0 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x01,0x02,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v0, v0

// GFX950: v_mfma_ld_scale_b32 v1, v1 ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1

// GFX950: v_mfma_ld_scale_b32 0, 0 ; encoding: [0x00,0x40,0xac,0xd3,0x80,0x00,0x01,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 0, 0

// GFX950: v_mfma_ld_scale_b32 1, 0 ; encoding: [0x00,0x40,0xac,0xd3,0x81,0x00,0x01,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 1, 0

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel:[1, 0]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel:[0, 1]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] ; encoding: [0x00,0x58,0xac,0xd3,0x01,0x03,0x02,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel:[1, 1]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x08]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1, 0]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0,1] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x10]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0, 1]

// GFX950: v_mfma_ld_scale_b32 v1, v1 ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1, 1]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0,0] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x00]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel:[0,0] op_sel_hi:[0,0]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x08]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[1,0]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x10]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[0,1]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,1]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,1]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] ; encoding: [0x00,0x58,0xac,0xd3,0x01,0x03,0x02,0x18]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] op_sel_hi:[1,1]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x10]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[0,1]

// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x08]
// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,0]

Loading
Loading