forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64] Add intrinsics for non-widening FMOPA/FMOPS
According to the specification in ARM-software/acle#309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants.
- Loading branch information
1 parent
8461d90
commit 2b0befb
Showing
7 changed files
with
220 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
97 changes: 97 additions & 0 deletions
97
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 | ||
// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK | ||
// RUN: %clang_cc1 -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX | ||
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK | ||
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX | ||
|
||
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s | ||
|
||
// REQUIRES: aarch64-registered-target | ||
|
||
#include <arm_sme.h> | ||
|
||
#ifdef SME_OVERLOADED_FORMS | ||
#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 | ||
#else | ||
#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 | ||
#endif | ||
|
||
// CHECK-LABEL: define dso_local void @test_svmopa_za16_bf16( | ||
// CHECK-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) | ||
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) | ||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]]) | ||
// CHECK-NEXT: ret void | ||
// | ||
// CHECK-CXX-LABEL: define dso_local void @_Z21test_svmopa_za16_bf16u10__SVBool_tS_u14__SVBfloat16_tS0_( | ||
// CHECK-CXX-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { | ||
// CHECK-CXX-NEXT: entry: | ||
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) | ||
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) | ||
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]]) | ||
// CHECK-CXX-NEXT: ret void | ||
// | ||
void test_svmopa_za16_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { | ||
SME_ACLE_FUNC(svmopa_za16, _bf16, _m)(0, pn, pm, zn, zm); | ||
} | ||
|
||
// CHECK-LABEL: define dso_local void @test_svmops_za16_bf16( | ||
// CHECK-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) | ||
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) | ||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]]) | ||
// CHECK-NEXT: ret void | ||
// | ||
// CHECK-CXX-LABEL: define dso_local void @_Z21test_svmops_za16_bf16u10__SVBool_tS_u14__SVBfloat16_tS0_( | ||
// CHECK-CXX-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-CXX-NEXT: entry: | ||
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) | ||
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) | ||
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]]) | ||
// CHECK-CXX-NEXT: ret void | ||
// | ||
void test_svmops_za16_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { | ||
SME_ACLE_FUNC(svmops_za16, _bf16, _m)(0, pn, pm, zn, zm); | ||
} | ||
|
||
// CHECK-LABEL: define dso_local void @test_svmopa_za16_f16( | ||
// CHECK-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) | ||
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) | ||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv8f16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]]) | ||
// CHECK-NEXT: ret void | ||
// | ||
// CHECK-CXX-LABEL: define dso_local void @_Z20test_svmopa_za16_f16u10__SVBool_tS_u13__SVFloat16_tS0_( | ||
// CHECK-CXX-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-CXX-NEXT: entry: | ||
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) | ||
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) | ||
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv8f16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]]) | ||
// CHECK-CXX-NEXT: ret void | ||
// | ||
void test_svmopa_za16_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { | ||
SME_ACLE_FUNC(svmopa_za16, _f16, _m)(0, pn, pm, zn, zm); | ||
} | ||
|
||
// CHECK-LABEL: define dso_local void @test_svmops_za16_f16( | ||
// CHECK-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) | ||
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) | ||
// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv8f16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]]) | ||
// CHECK-NEXT: ret void | ||
// | ||
// CHECK-CXX-LABEL: define dso_local void @_Z20test_svmops_za16_f16u10__SVBool_tS_u13__SVFloat16_tS0_( | ||
// CHECK-CXX-SAME: <vscale x 16 x i1> [[PN:%.*]], <vscale x 16 x i1> [[PM:%.*]], <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-CXX-NEXT: entry: | ||
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PN]]) | ||
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PM]]) | ||
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv8f16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]]) | ||
// CHECK-CXX-NEXT: ret void | ||
// | ||
void test_svmops_za16_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { | ||
SME_ACLE_FUNC(svmops_za16, _f16, _m)(0, pn, pm, zn, zm); | ||
} |
34 changes: 34 additions & 0 deletions
34
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify -emit-llvm %s | ||
|
||
// REQUIRES: aarch64-registered-target | ||
|
||
#include <arm_sme.h> | ||
|
||
void test_features(svbool_t pn, svbool_t pm, | ||
svfloat16_t zn, svfloat16_t zm, | ||
svbfloat16_t znb, svbfloat16_t zmb) | ||
__arm_streaming __arm_inout("za") { | ||
// expected-error@+1 {{'svmopa_za16_bf16_m' needs target feature sme2,b16b16}} | ||
svmopa_za16_bf16_m(0, pn, pm, znb, zmb); | ||
// expected-error@+1 {{'svmops_za16_bf16_m' needs target feature sme2,b16b16}} | ||
svmops_za16_bf16_m(0, pn, pm, znb, zmb); | ||
// expected-error@+1 {{'svmopa_za16_f16_m' needs target feature sme2p1,sme-f16f16}} | ||
svmopa_za16_f16_m(0, pn, pm, zn, zm); | ||
// expected-error@+1 {{'svmops_za16_f16_m' needs target feature sme2p1,sme-f16f16}} | ||
svmops_za16_f16_m(0, pn, pm, zn, zm); | ||
} | ||
|
||
void test_imm(svbool_t pn, svbool_t pm, | ||
svfloat16_t zn, svfloat16_t zm, | ||
svbfloat16_t znb, svbfloat16_t zmb) | ||
__arm_streaming __arm_inout("za") { | ||
// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} | ||
svmopa_za16_bf16_m(-1, pn, pm, znb, zmb); | ||
// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} | ||
svmops_za16_bf16_m(-1, pn, pm, znb, zmb); | ||
// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} | ||
svmopa_za16_f16_m(-1, pn, pm, zn, zm); | ||
// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} | ||
svmops_za16_f16_m(-1, pn, pm, zn, zm); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 | ||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s | ||
|
||
target triple = "aarch64-linux" | ||
|
||
define void @mopa_bf16(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) #0 { | ||
; CHECK-LABEL: mopa_bf16: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: bfmopa za0.h, p0/m, p1/m, z0.h, z1.h | ||
; CHECK-NEXT: ret | ||
call void @llvm.aarch64.sme.mopa.nxv8bf16(i32 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) | ||
ret void | ||
} | ||
|
||
define void @mopa_f16(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm) #0 { | ||
; CHECK-LABEL: mopa_f16: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: fmopa za1.h, p0/m, p1/m, z0.h, z1.h | ||
; CHECK-NEXT: ret | ||
call void @llvm.aarch64.sme.mopa.nxv8f16(i32 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm) | ||
ret void | ||
} | ||
|
||
define void @mops_bf16(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) #0 { | ||
; CHECK-LABEL: mops_bf16: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: bfmops za0.h, p0/m, p1/m, z0.h, z1.h | ||
; CHECK-NEXT: ret | ||
call void @llvm.aarch64.sme.mops.nxv8bf16(i32 0, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) | ||
ret void | ||
} | ||
|
||
define void @mops_f16(<vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm) #0 { | ||
; CHECK-LABEL: mops_f16: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: fmops za1.h, p0/m, p1/m, z0.h, z1.h | ||
; CHECK-NEXT: ret | ||
call void @llvm.aarch64.sme.mops.nxv8f16(i32 1, <vscale x 8 x i1> %pn, <vscale x 8 x i1> %pm, <vscale x 8 x half> %zn, <vscale x 8 x half> %zm) | ||
ret void | ||
} | ||
|
||
attributes #0 = {nounwind "target-features" = "+sme,+sme2p1,+bf16,+sme-f16f16,+b16b16" } |