Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMDGPU: Add v_mfma_ld_scale_b32 for gfx950 #116722

Open
wants to merge 1 commit into
base: users/arsenm/gfx950/buffer-load-lds-96-128
Choose a base branch
from

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Nov 19, 2024

No description provided.

@arsenm arsenm added backend:AMDGPU clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" llvm:analysis llvm:ir mc Machine (object) code labels Nov 19, 2024 — with Graphite App
@arsenm arsenm marked this pull request as ready for review November 19, 2024 01:30
@llvmbot
Copy link

llvmbot commented Nov 19, 2024

@llvm/pr-subscribers-llvm-analysis
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Patch is 30.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116722.diff

7 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+14-12)
  • (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+13)
  • (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+5-1)
  • (added) llvm/test/MC/AMDGPU/mai-gfx950-err.s (+31)
  • (modified) llvm/test/MC/AMDGPU/mai-gfx950.s (+117)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt (+102-29)
  • (modified) llvm/test/tools/llvm-mca/AMDGPU/gfx950.s (+6-3)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 2079b34d0448f4..d2024cf915874d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1991,13 +1991,14 @@ class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
 
 class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
                    RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
+                   bit HasNeg,
                    Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
   dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
                     HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
                     0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret;
 
   dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
-  dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi);
+  dag vop3p_neg = !if(HasNeg, (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi), (ins));
 
   dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg);
   dag ret = !con(base, vop3pFields);
@@ -2191,22 +2192,22 @@ class getAsmVOPDPart <int NumSrcArgs, string XorY> {
 
 // Returns the assembly string for the inputs and outputs of a VOP3P
 // instruction.
-class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
+class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasNeg,
                    bit HasClamp, bit HasOpSel> {
-  string dst = "$vdst";
-  string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
+  string dst = !if(HasDst, "$vdst"# !if(!gt(NumSrcArgs, 0), ",", ""), "");
+  string src0 = !if(!eq(NumSrcArgs, 1), " $src0", " $src0,");
   string src1 = !if(!eq(NumSrcArgs, 1), "",
                    !if(!eq(NumSrcArgs, 2), " $src1",
                                            " $src1,"));
   string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
 
-  string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
+  string mods = !if(HasNeg, "$neg_lo$neg_hi", "");
   string clamp = !if(HasClamp, "$clamp", "");
   string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");
 
   // Each modifier is printed as an array of bits for each operand, so
   // all operands are printed as part of src0_modifiers.
-  string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
+  string ret = dst#src0#src1#src2#opsel#mods#clamp;
 }
 
 // FIXME-TRUE16 AsmVOP3OpSel will be deprecated after all
@@ -2267,7 +2268,7 @@ class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT
 
 class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
                        bit HasOpSel, bit HasOMod, bit IsVOP3P,
-                       bit HasModifiers, bit Src0HasMods,
+                       bit HasNeg, bit Src0HasMods,
                        bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32,
                        bit HasByteSel = 0> {
   string dst = !if(HasDst,
@@ -2294,7 +2295,7 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
   string bytesel = !if(HasByteSel, "$byte_sel", "");
   string 3PMods = !if(IsVOP3P,
                       !if(HasOpSel, "$op_sel_hi", "")
-                        #!if(HasModifiers, "$neg_lo$neg_hi", ""),
+                        #!if(HasNeg, "$neg_lo$neg_hi", ""),
                       "");
   string clamp = !if(HasClamp, "$clamp", "");
   string omod = !if(HasOMod, "$omod", "");
@@ -2554,6 +2555,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
                                isModifierType<Src1VT>.ret,
                                isModifierType<Src2VT>.ret,
                                HasOMod);
+  field bit HasNeg = HasModifiers;
 
   field bit HasSrc0Mods = HasModifiers;
   field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
@@ -2589,7 +2591,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
                              HasClamp, HasModifiers, HasSrc2Mods,
                              HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
   field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
-                                   NumSrcArgs, HasClamp, HasOpSel,
+                                   NumSrcArgs, HasClamp, HasOpSel, HasNeg,
                                    Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
   field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
                                 NumSrcArgs, HasClamp, HasOMod,
@@ -2607,7 +2609,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
                   Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
                   Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
   defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
-                  Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel,
+                  Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, HasNeg,
                   Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
 
   field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase);
@@ -2635,10 +2637,10 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   // the asm operand name via this HasModifiers flag
   field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
   field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
-   HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers,
+   HasOpSel, HasOMod, IsVOP3P, HasNeg, HasModifiers, HasModifiers,
    HasModifiers, DstVT, IsFP8ByteSel>.ret;
   field string Asm64 = AsmVOP3Base;
-  field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
+  field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasNeg, HasClamp, HasOpSel>.ret;
   field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
                                               HasClamp,
                                               HasOMod,
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 08882e41d863a1..3a6202ea435222 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -20,6 +20,11 @@ class VOP3P_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
   let AsmVOP3Base = AsmVOP3P;
 }
 
+def VOP_MFMA_LD_SCALE : VOP3P_Profile<VOPProfile<[untyped, i32, i32, untyped]>, VOP3P_LD_SCALE> {
+  let HasModifiers = 1;
+  let HasNeg = 0;
+}
+
 // Used for FMA_MIX* and MAD_MIX* insts
 // Their operands are only sort of f16 operands. Depending on
 // op_sel_hi, these may be interpreted as f32. The inline immediate
@@ -753,6 +758,10 @@ defm V_MFMA_F32_32X32X16_F16   : MAIInst<"v_mfma_f32_32x32x16f16",    "F32_V8F16
 defm V_MFMA_F32_32X32X16_BF16  : MAIInst<"v_mfma_f32_32x32x16bf16",   "F32_V8BF16_X16", int_amdgcn_mfma_f32_32x32x16_bf16>;
 }
 
+let SubtargetPredicate = HasGFX950Insts in {
+defm V_MFMA_LD_SCALE_B32 : VOP3PInst<"v_mfma_ld_scale_b32", VOP_MFMA_LD_SCALE>;
+}
+
 let Predicates = [isGFX90APlus] in {
   let is_gfx940_xdl = 1 in {
   defm V_MFMA_F32_32X32X4BF16_1K  : MAIInst<"v_mfma_f32_32x32x4bf16_1k",  "F32_V4I16_X32",  int_amdgcn_mfma_f32_32x32x4bf16_1k>;
@@ -1792,6 +1801,10 @@ defm V_MFMA_F32_16X16X32_F16     : VOP3P_Real_MFMA_gfx950 <0x54, "v_mfma_f32_16x
 defm V_MFMA_F32_32X32X16_F16     : VOP3P_Real_MFMA_gfx950 <0x55, "v_mfma_f32_32x32x16_f16">;
 defm V_MFMA_F32_32X32X16_BF16    : VOP3P_Real_MFMA_gfx950 <0x37, "v_mfma_f32_32x32x16_bf16">;
 
+let SubtargetPredicate = HasGFX950Insts in {
+defm V_MFMA_LD_SCALE_B32 : VOP3P_Real_vi <0x2c>;
+}
+
 defm V_MFMA_I32_32X32X16I8       : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">;
 defm V_MFMA_I32_16X16X32I8       : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
 let SubtargetPredicate = HasXF32Insts in {
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 1be434c2c11f7e..a6e6adac04e5a9 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -423,7 +423,7 @@ class VOP3Pe <bits<7> op, VOPProfile P> : Enc64 {
   bits<2> index_key_8bit;
   bits<1> index_key_16bit;
 
-  let Inst{7-0} = vdst;
+  let Inst{7-0} = !if(P.HasDst, vdst, 0);
   let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
   let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
   let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
@@ -1365,6 +1365,10 @@ def VOP3_OPSEL   : VOP3Features<1, 1, 0, 0>;
 def VOP3_PACKED  : VOP3Features<1, 1, 1, 0>;
 def VOP3_MAI     : VOP3Features<0, 0, 0, 1>;
 
+// Packed is misleading, but it enables the appropriate op_sel
+// modifiers.
+def VOP3P_LD_SCALE : VOP3Features<0, 1, 1, 0>;
+
 class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile<P.ArgVT> {
 
   let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);
diff --git a/llvm/test/MC/AMDGPU/mai-gfx950-err.s b/llvm/test/MC/AMDGPU/mai-gfx950-err.s
new file mode 100644
index 00000000000000..a6dff076392c85
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/mai-gfx950-err.s
@@ -0,0 +1,31 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --implicit-check-not=error: %s
+
+v_mfma_ld_scale_b32 v0, 65
+// CHECK: :[[@LINE-1]]:25: error: literal operands are not supported
+
+v_mfma_ld_scale_b32 65, v0
+// CHECK: :[[@LINE-1]]:21: error: literal operands are not supported
+
+v_mfma_ld_scale_b32 65, 65
+// CHECK: :[[@LINE-1]]:25: error: literal operands are not supported
+
+v_mfma_ld_scale_b32 s0, s1
+// CHECK: :[[@LINE-1]]:25: error: invalid operand (violates constant bus restrictions)
+
+v_mfma_ld_scale_b32 v0, v0 clamp
+// CHECK: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_mfma_ld_scale_b32 v0, v0 neg_lo:[0,1]
+// CHECK: :[[@LINE-1]]:28: error: not a valid operand
+
+v_mfma_ld_scale_b32 v0, v0 neg_lo:[1,1]
+// CHECK: :[[@LINE-1]]:28: error: not a valid operand
+
+v_mfma_ld_scale_b32 v0, v0 neg_hi:[1,1]
+// CHECK: :[[@LINE-1]]:28: error: not a valid operand
+
+v_mfma_ld_scale_b32 v0, v0 neg_hi:[0,1]
+// CHECK: :[[@LINE-1]]:28: error: not a valid operand
+
+v_mfma_ld_scale_b32 v0, v0 neg_lo:[0,1] neg_hi:[0,1]
+// CHECK: :[[@LINE-1]]:28: error: not a valid operand
diff --git a/llvm/test/MC/AMDGPU/mai-gfx950.s b/llvm/test/MC/AMDGPU/mai-gfx950.s
index 1d4902e293bb10..a692693638c692 100644
--- a/llvm/test/MC/AMDGPU/mai-gfx950.s
+++ b/llvm/test/MC/AMDGPU/mai-gfx950.s
@@ -158,3 +158,120 @@ v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15] abid:1
 // GFX950: v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] cbsz:3 abid:1 ; encoding: [0x00,0x8b,0xb7,0xd3,0x00,0x01,0x02,0x1c]
 // ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
 v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] cbsz:3 abid:1
+
+//===----------------------------------------------------------------------===//
+// v_mfma_ld_scale_b32
+//===----------------------------------------------------------------------===//
+
+// GFX950: v_mfma_ld_scale_b32 v0, 64             ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x81,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v0, 64
+
+// GFX950: v_mfma_ld_scale_b32 64, v0             ; encoding: [0x00,0x40,0xac,0xd3,0xc0,0x00,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 64, v0
+
+// GFX950: v_mfma_ld_scale_b32 64, 64             ; encoding: [0x00,0x40,0xac,0xd3,0xc0,0x80,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 64, 64
+
+// GFX950: v_mfma_ld_scale_b32 s0, s0             ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x00,0x00,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 s0, s0
+
+// GFX950: v_mfma_ld_scale_b32 s0, v0             ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x00,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 s0, v0
+
+// GFX950: v_mfma_ld_scale_b32 v0, s0             ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x01,0x00,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v0, s0
+
+// GFX950: v_mfma_ld_scale_b32 vcc_lo, vcc_lo     ; encoding: [0x00,0x40,0xac,0xd3,0x6a,0xd4,0x00,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 vcc_lo, vcc_lo
+
+// GFX950: v_mfma_ld_scale_b32 m0, m0             ; encoding: [0x00,0x40,0xac,0xd3,0x7c,0xf8,0x00,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 m0, m0
+
+// GFX950: v_mfma_ld_scale_b32 src_vccz, src_vccz ; encoding: [0x00,0x40,0xac,0xd3,0xfb,0xf6,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 vccz, vccz
+
+// GFX950: v_mfma_ld_scale_b32 src_execz, src_execz ; encoding: [0x00,0x40,0xac,0xd3,0xfc,0xf8,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 execz, execz
+
+// GFX950:  v_mfma_ld_scale_b32 v0, v0 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x01,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v0, v0
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1
+
+// GFX950: v_mfma_ld_scale_b32 0, 0 ; encoding: [0x00,0x40,0xac,0xd3,0x80,0x00,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 0, 0
+
+// GFX950: v_mfma_ld_scale_b32 1, 0               ; encoding: [0x00,0x40,0xac,0xd3,0x81,0x00,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 1, 0
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[1, 0]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0, 1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] ; encoding: [0x00,0x58,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[1, 1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x08]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1, 0]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0,1] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x10]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0, 1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1             ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1, 1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0,0] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x00]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0,0] op_sel_hi:[0,0]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x08]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[1,0]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x10]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[0,1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] ; encoding: [0x00,0x58,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] op_sel_hi:[1,1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x10]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[0,1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x08]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,0]
+
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt
index 292f2a348df2ef..1fa48fca80fb45 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt
@@ -1,88 +1,161 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding -disassemble %s | FileCheck -check-prefix=GFX950 %s
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x1c]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x1c]
 0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x1c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] abid:1 ; encoding: [0x00,0x88,0xd4,0xd3,0x00,0x01,0x02,0x1c]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] abid:1 ; encoding: [0x00,0x88,0xd4,0xd3,0x00,0x01,0x02,0x1c]
 0x00,0x88,0xd4,0xd3,0x00,0x01,0x02,0x1c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x3c]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x3c]
 0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x3c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 ; encoding: [0x00,0x83,0xd4,0xd3,0x00,0x01,0x02,0x1c]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 ; encoding: [0x00,0x83,0xd4,0xd3,0x00,0x01,0x02,0x1c]
 0x00,0x83,0xd4,0xd3,0x00,0x01,0x02,0x1c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1 ; encoding: [0x00,0x8b,0xd4,0xd3,0x00,0x01,0x02,0x1c]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1 ; encoding: [0x00,0x8b,0xd4,0xd3,0x00,0x01,0x02,0x1c]
 0x00,0x8b,0xd4,0xd3,0x00,0x01,0x02,0x1c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], a[0:3], 1.0 ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0xca,0x13]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], a[0:3], 1.0 ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0xca,0x13]
 0x00,0x80,0xd4,0xd3,0x00,0x01,0xca,0x13
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x12,0x04]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x12,0x04]
 0x00,0x80,0xd4,0xd3,0x00,0x01,0x12,0x04
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 v[0:3], a[0:3], a[0:3], v[4:7] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x01,0x12,0x1c]
+# GFX950:   v_mfma_f32_16x16x32_f16 v[0:3], a[0:3], a[0:3], v[4:7] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x01,0x12,0x1c]
 0x00,0x00,0xd4,0xd3,0x00,0x01,0x12,0x1c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 v[0:3], a[0:3], v[0:3], 1.0 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x01,0xca,0x0b]
+# GFX950:   v_mfma_f32_16x16x32_f16 v[0:3], a[0:3], v[0:3], 1.0 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x01,0xca,0x0b]
 0x00,0x00,0xd4,0xd3,0x00,0x01,0xca,0x0b
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 ...
[truncated]

@llvmbot
Copy link

llvmbot commented Nov 19, 2024

@llvm/pr-subscribers-llvm-ir

Author: Matt Arsenault (arsenm)

Changes

Patch is 30.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116722.diff

7 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+14-12)
  • (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+13)
  • (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+5-1)
  • (added) llvm/test/MC/AMDGPU/mai-gfx950-err.s (+31)
  • (modified) llvm/test/MC/AMDGPU/mai-gfx950.s (+117)
  • (modified) llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt (+102-29)
  • (modified) llvm/test/tools/llvm-mca/AMDGPU/gfx950.s (+6-3)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 2079b34d0448f4..d2024cf915874d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1991,13 +1991,14 @@ class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
 
 class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
                    RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
+                   bit HasNeg,
                    Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
   dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
                     HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
                     0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret;
 
   dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
-  dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi);
+  dag vop3p_neg = !if(HasNeg, (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi), (ins));
 
   dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg);
   dag ret = !con(base, vop3pFields);
@@ -2191,22 +2192,22 @@ class getAsmVOPDPart <int NumSrcArgs, string XorY> {
 
 // Returns the assembly string for the inputs and outputs of a VOP3P
 // instruction.
-class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
+class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasNeg,
                    bit HasClamp, bit HasOpSel> {
-  string dst = "$vdst";
-  string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
+  string dst = !if(HasDst, "$vdst"# !if(!gt(NumSrcArgs, 0), ",", ""), "");
+  string src0 = !if(!eq(NumSrcArgs, 1), " $src0", " $src0,");
   string src1 = !if(!eq(NumSrcArgs, 1), "",
                    !if(!eq(NumSrcArgs, 2), " $src1",
                                            " $src1,"));
   string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
 
-  string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
+  string mods = !if(HasNeg, "$neg_lo$neg_hi", "");
   string clamp = !if(HasClamp, "$clamp", "");
   string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");
 
   // Each modifier is printed as an array of bits for each operand, so
   // all operands are printed as part of src0_modifiers.
-  string ret = dst#", "#src0#src1#src2#opsel#mods#clamp;
+  string ret = dst#src0#src1#src2#opsel#mods#clamp;
 }
 
 // FIXME-TRUE16 AsmVOP3OpSel will be deprecated after all
@@ -2267,7 +2268,7 @@ class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT
 
 class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
                        bit HasOpSel, bit HasOMod, bit IsVOP3P,
-                       bit HasModifiers, bit Src0HasMods,
+                       bit HasNeg, bit Src0HasMods,
                        bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32,
                        bit HasByteSel = 0> {
   string dst = !if(HasDst,
@@ -2294,7 +2295,7 @@ class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
   string bytesel = !if(HasByteSel, "$byte_sel", "");
   string 3PMods = !if(IsVOP3P,
                       !if(HasOpSel, "$op_sel_hi", "")
-                        #!if(HasModifiers, "$neg_lo$neg_hi", ""),
+                        #!if(HasNeg, "$neg_lo$neg_hi", ""),
                       "");
   string clamp = !if(HasClamp, "$clamp", "");
   string omod = !if(HasOMod, "$omod", "");
@@ -2554,6 +2555,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
                                isModifierType<Src1VT>.ret,
                                isModifierType<Src2VT>.ret,
                                HasOMod);
+  field bit HasNeg = HasModifiers;
 
   field bit HasSrc0Mods = HasModifiers;
   field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
@@ -2589,7 +2591,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
                              HasClamp, HasModifiers, HasSrc2Mods,
                              HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
   field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
-                                   NumSrcArgs, HasClamp, HasOpSel,
+                                   NumSrcArgs, HasClamp, HasOpSel, HasNeg,
                                    Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
   field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
                                 NumSrcArgs, HasClamp, HasOMod,
@@ -2607,7 +2609,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
                   Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
                   Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
   defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
-                  Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel,
+                  Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, HasNeg,
                   Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
 
   field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase);
@@ -2635,10 +2637,10 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   // the asm operand name via this HasModifiers flag
   field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
   field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
-   HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers,
+   HasOpSel, HasOMod, IsVOP3P, HasNeg, HasModifiers, HasModifiers,
    HasModifiers, DstVT, IsFP8ByteSel>.ret;
   field string Asm64 = AsmVOP3Base;
-  field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
+  field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasNeg, HasClamp, HasOpSel>.ret;
   field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
                                               HasClamp,
                                               HasOMod,
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 08882e41d863a1..3a6202ea435222 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -20,6 +20,11 @@ class VOP3P_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
   let AsmVOP3Base = AsmVOP3P;
 }
 
+def VOP_MFMA_LD_SCALE : VOP3P_Profile<VOPProfile<[untyped, i32, i32, untyped]>, VOP3P_LD_SCALE> {
+  let HasModifiers = 1;
+  let HasNeg = 0;
+}
+
 // Used for FMA_MIX* and MAD_MIX* insts
 // Their operands are only sort of f16 operands. Depending on
 // op_sel_hi, these may be interpreted as f32. The inline immediate
@@ -753,6 +758,10 @@ defm V_MFMA_F32_32X32X16_F16   : MAIInst<"v_mfma_f32_32x32x16f16",    "F32_V8F16
 defm V_MFMA_F32_32X32X16_BF16  : MAIInst<"v_mfma_f32_32x32x16bf16",   "F32_V8BF16_X16", int_amdgcn_mfma_f32_32x32x16_bf16>;
 }
 
+let SubtargetPredicate = HasGFX950Insts in {
+defm V_MFMA_LD_SCALE_B32 : VOP3PInst<"v_mfma_ld_scale_b32", VOP_MFMA_LD_SCALE>;
+}
+
 let Predicates = [isGFX90APlus] in {
   let is_gfx940_xdl = 1 in {
   defm V_MFMA_F32_32X32X4BF16_1K  : MAIInst<"v_mfma_f32_32x32x4bf16_1k",  "F32_V4I16_X32",  int_amdgcn_mfma_f32_32x32x4bf16_1k>;
@@ -1792,6 +1801,10 @@ defm V_MFMA_F32_16X16X32_F16     : VOP3P_Real_MFMA_gfx950 <0x54, "v_mfma_f32_16x
 defm V_MFMA_F32_32X32X16_F16     : VOP3P_Real_MFMA_gfx950 <0x55, "v_mfma_f32_32x32x16_f16">;
 defm V_MFMA_F32_32X32X16_BF16    : VOP3P_Real_MFMA_gfx950 <0x37, "v_mfma_f32_32x32x16_bf16">;
 
+let SubtargetPredicate = HasGFX950Insts in {
+defm V_MFMA_LD_SCALE_B32 : VOP3P_Real_vi <0x2c>;
+}
+
 defm V_MFMA_I32_32X32X16I8       : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">;
 defm V_MFMA_I32_16X16X32I8       : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
 let SubtargetPredicate = HasXF32Insts in {
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 1be434c2c11f7e..a6e6adac04e5a9 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -423,7 +423,7 @@ class VOP3Pe <bits<7> op, VOPProfile P> : Enc64 {
   bits<2> index_key_8bit;
   bits<1> index_key_16bit;
 
-  let Inst{7-0} = vdst;
+  let Inst{7-0} = !if(P.HasDst, vdst, 0);
   let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
   let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
   let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
@@ -1365,6 +1365,10 @@ def VOP3_OPSEL   : VOP3Features<1, 1, 0, 0>;
 def VOP3_PACKED  : VOP3Features<1, 1, 1, 0>;
 def VOP3_MAI     : VOP3Features<0, 0, 0, 1>;
 
+// Packed is misleading, but it enables the appropriate op_sel
+// modifiers.
+def VOP3P_LD_SCALE : VOP3Features<0, 1, 1, 0>;
+
 class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOPProfile<P.ArgVT> {
 
   let HasClamp = !if(Features.HasClamp, 1, P.HasClamp);
diff --git a/llvm/test/MC/AMDGPU/mai-gfx950-err.s b/llvm/test/MC/AMDGPU/mai-gfx950-err.s
new file mode 100644
index 00000000000000..a6dff076392c85
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/mai-gfx950-err.s
@@ -0,0 +1,31 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --implicit-check-not=error: %s
+
+v_mfma_ld_scale_b32 v0, 65
+// CHECK: :[[@LINE-1]]:25: error: literal operands are not supported
+
+v_mfma_ld_scale_b32 65, v0
+// CHECK: :[[@LINE-1]]:21: error: literal operands are not supported
+
+v_mfma_ld_scale_b32 65, 65
+// CHECK: :[[@LINE-1]]:25: error: literal operands are not supported
+
+v_mfma_ld_scale_b32 s0, s1
+// CHECK: :[[@LINE-1]]:25: error: invalid operand (violates constant bus restrictions)
+
+v_mfma_ld_scale_b32 v0, v0 clamp
+// CHECK: :[[@LINE-1]]:28: error: invalid operand for instruction
+
+v_mfma_ld_scale_b32 v0, v0 neg_lo:[0,1]
+// CHECK: :[[@LINE-1]]:28: error: not a valid operand
+
+v_mfma_ld_scale_b32 v0, v0 neg_lo:[1,1]
+// CHECK: :[[@LINE-1]]:28: error: not a valid operand
+
+v_mfma_ld_scale_b32 v0, v0 neg_hi:[1,1]
+// CHECK: :[[@LINE-1]]:28: error: not a valid operand
+
+v_mfma_ld_scale_b32 v0, v0 neg_hi:[0,1]
+// CHECK: :[[@LINE-1]]:28: error: not a valid operand
+
+v_mfma_ld_scale_b32 v0, v0 neg_lo:[0,1] neg_hi:[0,1]
+// CHECK: :[[@LINE-1]]:28: error: not a valid operand
diff --git a/llvm/test/MC/AMDGPU/mai-gfx950.s b/llvm/test/MC/AMDGPU/mai-gfx950.s
index 1d4902e293bb10..a692693638c692 100644
--- a/llvm/test/MC/AMDGPU/mai-gfx950.s
+++ b/llvm/test/MC/AMDGPU/mai-gfx950.s
@@ -158,3 +158,120 @@ v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15] abid:1
 // GFX950: v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] cbsz:3 abid:1 ; encoding: [0x00,0x8b,0xb7,0xd3,0x00,0x01,0x02,0x1c]
 // ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
 v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] cbsz:3 abid:1
+
+//===----------------------------------------------------------------------===//
+// v_mfma_ld_scale_b32
+//===----------------------------------------------------------------------===//
+
+// GFX950: v_mfma_ld_scale_b32 v0, 64             ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x81,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v0, 64
+
+// GFX950: v_mfma_ld_scale_b32 64, v0             ; encoding: [0x00,0x40,0xac,0xd3,0xc0,0x00,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 64, v0
+
+// GFX950: v_mfma_ld_scale_b32 64, 64             ; encoding: [0x00,0x40,0xac,0xd3,0xc0,0x80,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 64, 64
+
+// GFX950: v_mfma_ld_scale_b32 s0, s0             ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x00,0x00,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 s0, s0
+
+// GFX950: v_mfma_ld_scale_b32 s0, v0             ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x00,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 s0, v0
+
+// GFX950: v_mfma_ld_scale_b32 v0, s0             ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x01,0x00,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v0, s0
+
+// GFX950: v_mfma_ld_scale_b32 vcc_lo, vcc_lo     ; encoding: [0x00,0x40,0xac,0xd3,0x6a,0xd4,0x00,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 vcc_lo, vcc_lo
+
+// GFX950: v_mfma_ld_scale_b32 m0, m0             ; encoding: [0x00,0x40,0xac,0xd3,0x7c,0xf8,0x00,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 m0, m0
+
+// GFX950: v_mfma_ld_scale_b32 src_vccz, src_vccz ; encoding: [0x00,0x40,0xac,0xd3,0xfb,0xf6,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 vccz, vccz
+
+// GFX950: v_mfma_ld_scale_b32 src_execz, src_execz ; encoding: [0x00,0x40,0xac,0xd3,0xfc,0xf8,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 execz, execz
+
+// GFX950:  v_mfma_ld_scale_b32 v0, v0 ; encoding: [0x00,0x40,0xac,0xd3,0x00,0x01,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v0, v0
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1
+
+// GFX950: v_mfma_ld_scale_b32 0, 0 ; encoding: [0x00,0x40,0xac,0xd3,0x80,0x00,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 0, 0
+
+// GFX950: v_mfma_ld_scale_b32 1, 0               ; encoding: [0x00,0x40,0xac,0xd3,0x81,0x00,0x01,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 1, 0
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[1, 0]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0, 1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] ; encoding: [0x00,0x58,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[1, 1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x08]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1, 0]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0,1] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x10]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0, 1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1             ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[1, 1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel_hi:[0,0] ; encoding: [0x00,0x40,0xac,0xd3,0x01,0x03,0x02,0x00]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0,0] op_sel_hi:[0,0]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x08]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[1,0]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x10]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[0,1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] ; encoding: [0x00,0x58,0xac,0xd3,0x01,0x03,0x02,0x18]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[1,1] op_sel_hi:[1,1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x00,0x48,0xac,0xd3,0x01,0x03,0x02,0x10]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[1,0] op_sel_hi:[0,1]
+
+// GFX950: v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x00,0x50,0xac,0xd3,0x01,0x03,0x02,0x08]
+// ERR: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+v_mfma_ld_scale_b32 v1, v1 op_sel:[0,1] op_sel_hi:[1,0]
+
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt
index 292f2a348df2ef..1fa48fca80fb45 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx950_mai.txt
@@ -1,88 +1,161 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx950 -show-encoding -disassemble %s | FileCheck -check-prefix=GFX950 %s
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x1c]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x1c]
 0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x1c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] abid:1 ; encoding: [0x00,0x88,0xd4,0xd3,0x00,0x01,0x02,0x1c]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] abid:1 ; encoding: [0x00,0x88,0xd4,0xd3,0x00,0x01,0x02,0x1c]
 0x00,0x88,0xd4,0xd3,0x00,0x01,0x02,0x1c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x3c]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x3c]
 0x00,0x80,0xd4,0xd3,0x00,0x01,0x02,0x3c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 ; encoding: [0x00,0x83,0xd4,0xd3,0x00,0x01,0x02,0x1c]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 ; encoding: [0x00,0x83,0xd4,0xd3,0x00,0x01,0x02,0x1c]
 0x00,0x83,0xd4,0xd3,0x00,0x01,0x02,0x1c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1 ; encoding: [0x00,0x8b,0xd4,0xd3,0x00,0x01,0x02,0x1c]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] cbsz:3 abid:1 ; encoding: [0x00,0x8b,0xd4,0xd3,0x00,0x01,0x02,0x1c]
 0x00,0x8b,0xd4,0xd3,0x00,0x01,0x02,0x1c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], a[0:3], 1.0 ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0xca,0x13]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], a[0:3], 1.0 ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0xca,0x13]
 0x00,0x80,0xd4,0xd3,0x00,0x01,0xca,0x13
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x12,0x04]
+# GFX950:   v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x01,0x12,0x04]
 0x00,0x80,0xd4,0xd3,0x00,0x01,0x12,0x04
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 v[0:3], a[0:3], a[0:3], v[4:7] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x01,0x12,0x1c]
+# GFX950:   v_mfma_f32_16x16x32_f16 v[0:3], a[0:3], a[0:3], v[4:7] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x01,0x12,0x1c]
 0x00,0x00,0xd4,0xd3,0x00,0x01,0x12,0x1c
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 v[0:3], a[0:3], v[0:3], 1.0 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x01,0xca,0x0b]
+# GFX950:   v_mfma_f32_16x16x32_f16 v[0:3], a[0:3], v[0:3], 1.0 ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x01,0xca,0x0b]
 0x00,0x00,0xd4,0xd3,0x00,0x01,0xca,0x0b
 
-# GFX950: 	v_mfma_f32_16x16x32_f16 ...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AMDGPU clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category llvm:analysis llvm:ir mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants