Skip to content

Commit

Permalink
update ruapu to detect zfh zvfh xtheadvector
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Dec 22, 2024
1 parent 78aca5d commit b9bdb74
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 17 deletions.
30 changes: 20 additions & 10 deletions src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,14 @@ static int g_cpu_support_x86_avx512_bf16;
static int g_cpu_support_x86_avx512_fp16;
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)

#if defined __ANDROID__ || defined __linux__
#if __riscv
static int g_cpu_support_riscv_zfh;
static int g_cpu_support_riscv_zvfh;
static int g_cpu_support_riscv_xtheadvector;
#endif // __riscv
#endif // defined __ANDROID__ || defined __linux__

static int g_cpu_level2_cachesize;
static int g_cpu_level3_cachesize;

Expand Down Expand Up @@ -1988,7 +1996,7 @@ static void initialize_global_cpu_info()
g_powersave = 0;
initialize_cpu_thread_affinity_mask(g_cpu_affinity_mask_all, g_cpu_affinity_mask_little, g_cpu_affinity_mask_big);

#if (defined _WIN32 && (__aarch64__ || __arm__))
#if (defined _WIN32 && (__aarch64__ || __arm__)) || ((defined __ANDROID__ || defined __linux__) && __riscv)
if (!is_being_debugged())
{
ruapu_init();
Expand Down Expand Up @@ -2045,6 +2053,14 @@ static void initialize_global_cpu_info()
g_cpu_support_x86_avx512_fp16 = get_cpu_support_x86_avx512_fp16();
#endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)

#if defined __ANDROID__ || defined __linux__
#if __riscv
g_cpu_support_riscv_zfh = ruapu_supports("zfh") || ruapu_supports("xtheadvector"); // xtheadvector implies zfh
g_cpu_support_riscv_zvfh = ruapu_supports("zvfh") || ruapu_supports("xtheadvector"); // xtheadvector implies zvfh
g_cpu_support_riscv_xtheadvector = ruapu_supports("xtheadvector");
#endif // __riscv
#endif // defined __ANDROID__ || defined __linux__

g_cpu_level2_cachesize = get_cpu_level2_cachesize();
g_cpu_level3_cachesize = get_cpu_level3_cachesize();

Expand Down Expand Up @@ -2706,9 +2722,7 @@ int cpu_support_riscv_zfh()
try_initialize_global_cpu_info();
#if defined __ANDROID__ || defined __linux__
#if __riscv
// v + f does not imply zfh, but how to discover zfh properly ?
// upstream issue https://github.com/riscv/riscv-isa-manual/issues/414
return g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F;
return g_cpu_support_riscv_zfh;
#else
return 0;
#endif
Expand All @@ -2722,9 +2736,7 @@ int cpu_support_riscv_zvfh()
try_initialize_global_cpu_info();
#if defined __ANDROID__ || defined __linux__
#if __riscv
// v + f does not imply zfh, but how to discover zvfh properly ?
// upstream issue https://github.com/riscv/riscv-isa-manual/issues/414
return g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F;
return g_cpu_support_riscv_zvfh;
#else
return 0;
#endif
Expand All @@ -2738,9 +2750,7 @@ int cpu_support_riscv_xtheadvector()
try_initialize_global_cpu_info();
#if defined __ANDROID__ || defined __linux__
#if __riscv
// v + f does not imply zfh, but how to discover zvfh properly ?
// upstream issue https://github.com/riscv/riscv-isa-manual/issues/414
return g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F;
return g_cpu_support_riscv_xtheadvector;
#else
return 0;
#endif
Expand Down
56 changes: 49 additions & 7 deletions src/ruapu.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,37 +82,39 @@ static int ruapu_detect_isa(ruapu_some_inst some_inst)
#include <signal.h>
#include <setjmp.h>

static int g_ruapu_sigill_caught = 0;
static int g_ruapu_sig_caught = 0;
static sigjmp_buf g_ruapu_jmpbuf;

static void ruapu_catch_sigill(int signo, siginfo_t* si, void* data)
static void ruapu_catch_sig(int signo, siginfo_t* si, void* data)
{
(void)signo;
(void)si;
(void)data;

g_ruapu_sigill_caught = 1;
g_ruapu_sig_caught = 1;
siglongjmp(g_ruapu_jmpbuf, -1);
}

static int ruapu_detect_isa(ruapu_some_inst some_inst)
{
g_ruapu_sigill_caught = 0;
g_ruapu_sig_caught = 0;

struct sigaction sa = { 0 };
struct sigaction old_sa;
sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
sa.sa_sigaction = ruapu_catch_sigill;
sa.sa_sigaction = ruapu_catch_sig;
sigaction(SIGILL, &sa, &old_sa);
sigaction(SIGSEGV, &sa, &old_sa);

if (sigsetjmp(g_ruapu_jmpbuf, 1) == 0)
{
some_inst();
}

sigaction(SIGILL, &old_sa, NULL);
sigaction(SIGSEGV, &old_sa, NULL);

return g_ruapu_sigill_caught ? 0 : 1;
return g_ruapu_sig_caught ? 0 : 1;
}

#elif defined __SYTERKIT__
Expand Down Expand Up @@ -219,10 +221,24 @@ RUAPU_INSTCODE(avxvnni, 0xc4, 0xe2, 0x7d, 0x52, 0xc0) // vpdpwssd ymm0,ymm0,ymm0
RUAPU_INSTCODE(avxvnniint8, 0xc4, 0xe2, 0x7f, 0x50, 0xc0) // vpdpbssd ymm0,ymm0,ymm0
RUAPU_INSTCODE(avxvnniint16, 0xc4, 0xe2, 0x7e, 0xd2, 0xc0) // vpdpwsud ymm0,ymm0,ymm0
RUAPU_INSTCODE(avxifma, 0xc4, 0xe2, 0xfd, 0xb4, 0xc0) // vpmadd52luq ymm0,ymm0,ymm0
RUAPU_INSTCODE(avxneconvert, 0xc4, 0xe2, 0x7e, 0x72, 0xc0) // vcvtneps2bf16 xmm0,ymm0
RUAPU_INSTCODE(amxfp16, 0xc4, 0xe2, 0x7b, 0x5c, 0xd1) // tdpfp16ps %tmm0, %tmm1, %tmm2
RUAPU_INSTCODE(amxbf16, 0xc4, 0xe2, 0x7a, 0x5c, 0xd1) // tdpbf16ps %tmm0, %tmm1, %tmm2
RUAPU_INSTCODE(amxint8, 0xc4, 0xe2, 0x7b, 0x5e, 0xd1) // tdpbssd %tmm0, %tmm1, %tmm2
RUAPU_INSTCODE(amxtile, 0xc4, 0xe2, 0x7a, 0x49, 0xc0) // tilezero %tmm0
RUAPU_INSTCODE(bmi1, 0xc4, 0xe2, 0x78, 0xf2, 0xc0) // andn eax,eax,eax
RUAPU_INSTCODE(bmi2, 0xc4, 0xe2, 0x7b, 0xf6, 0xc0) // mulx eax,eax,eax
RUAPU_INSTCODE(gfni, 0x66, 0x0f, 0x38, 0xcf, 0xc0) // gf2p8mulb xmm0,xmm0
RUAPU_INSTCODE(aesni, 0x66, 0x0f, 0x38, 0xdc, 0xc0) // aesenc xmm0,xmm0
RUAPU_INSTCODE(vaes, 0xc4, 0xe2, 0x7d, 0xdc, 0xc0) // vaesenc ymm0,ymm0,ymm0
RUAPU_INSTCODE(sha1, 0x0f, 0x38, 0xc9, 0xc0) // sha1msg1 xmm0,xmm0
RUAPU_INSTCODE(sha256, 0x0f, 0x38, 0xcc, 0xc0) // sha256msg1 xmm0, xmm0
RUAPU_INSTCODE(sha512, 0xc4, 0xe2, 0x7f, 0xcd, 0xc0) // vsha512msg2 ymm0, ymm0
RUAPU_INSTCODE(sm3, 0xc4, 0xe2, 0x78, 0xda, 0xc0) // vsm3msg1 xmm0,xmm0,xmm0
RUAPU_INSTCODE(sm4, 0xc4, 0xe2, 0x7e, 0xda, 0xc0) // vsm4key4 ymm0,ymm0,ymm0
RUAPU_INSTCODE(rdrand, 0x0f, 0xc7, 0xf0) // rdrand eax
RUAPU_INSTCODE(rdseed, 0x0f, 0xc7, 0xf8) // rdseed eax
RUAPU_INSTCODE(tsx, 0x0f, 0x01, 0xd6) // xtest

#elif __aarch64__ || defined(_M_ARM64)
RUAPU_INSTCODE(neon, 0x4e20d400) // fadd v0.4s,v0.4s,v0.4s
Expand Down Expand Up @@ -314,6 +330,7 @@ RUAPU_INSTCODE(zbs, 0x48a51533) // bclr a0,a0,a0
RUAPU_INSTCODE(zbkb, 0x08a54533) // pack a0,a0,a0
RUAPU_INSTCODE(zbkc, 0x0aa53533) // clmulh a0,a0,a0
RUAPU_INSTCODE(zbkx, 0x28a52533) // xperm.n a0,a0,a0
RUAPU_INSTCODE(zcb, 0x9d759d75) // c.not a0 c.not a0
RUAPU_INSTCODE(zfa, 0xf0108053) // fli.s ft0, min
RUAPU_INSTCODE(zfbfmin, 0x44807053) // fcvt.bf16.s ft0,ft0
RUAPU_INSTCODE(zfh, 0x04007053); // fadd.hs ft0, ft0, ft0
Expand All @@ -333,7 +350,12 @@ RUAPU_INSTCODE(xtheadmac, 0x20a5150b) // th.mula a0,a0,a0
RUAPU_INSTCODE(xtheadmemidx, 0x1801450b) // th.lbia a0,(sp),#0,#0
RUAPU_INSTCODE(xtheadmempair, 0xe0a1450b) // th.lwd a0,a0,(sp),#0,3
RUAPU_INSTCODE(xtheadsync, 0x0180000b) // th.sync
RUAPU_INSTCODE(xtheadvdot, 0x8000600b) // th.vmaqa.vv v0,v0,v0
RUAPU_INSTCODE(xtheadvector, 0x32052557) // th.vext.x.v a0,v0,a0
RUAPU_INSTCODE(xtheadvdot, 0x8200600b) // th.vmaqa.vv v0,v0,v0

RUAPU_INSTCODE(spacemitvmadot, 0xe200312b) // vmadot v2,v0,v0
RUAPU_INSTCODE(spacemitvmadotn, 0xe600b12b) // vmadot3 v2,v0,v1 //vmadot2 vmadot1
RUAPU_INSTCODE(spacemitvfmadot, 0xea00012b) // vfmadot v2,v0,v0

// RVV 1.0 support
// unimp (csrrw x0, cycle, x0)
Expand Down Expand Up @@ -425,10 +447,24 @@ RUAPU_ISAENTRY(avxvnni)
RUAPU_ISAENTRY(avxvnniint8)
RUAPU_ISAENTRY(avxvnniint16)
RUAPU_ISAENTRY(avxifma)
RUAPU_ISAENTRY(avxneconvert)
RUAPU_ISAENTRY(amxfp16)
RUAPU_ISAENTRY(amxbf16)
RUAPU_ISAENTRY(amxint8)
RUAPU_ISAENTRY(amxtile)
RUAPU_ISAENTRY(bmi1)
RUAPU_ISAENTRY(bmi2)
RUAPU_ISAENTRY(gfni)
RUAPU_ISAENTRY(aesni)
RUAPU_ISAENTRY(vaes)
RUAPU_ISAENTRY(sha1)
RUAPU_ISAENTRY(sha256)
RUAPU_ISAENTRY(sha512)
RUAPU_ISAENTRY(sm3)
RUAPU_ISAENTRY(sm4)
RUAPU_ISAENTRY(rdrand)
RUAPU_ISAENTRY(rdseed)
RUAPU_ISAENTRY(tsx)

#elif __aarch64__ || defined(_M_ARM64)
RUAPU_ISAENTRY(neon)
Expand Down Expand Up @@ -512,6 +548,7 @@ RUAPU_ISAENTRY(zbs)
RUAPU_ISAENTRY(zbkb)
RUAPU_ISAENTRY(zbkc)
RUAPU_ISAENTRY(zbkx)
RUAPU_ISAENTRY(zcb)
RUAPU_ISAENTRY(zfa)
RUAPU_ISAENTRY(zfbfmin)
RUAPU_ISAENTRY(zfh)
Expand Down Expand Up @@ -544,8 +581,13 @@ RUAPU_ISAENTRY(xtheadmac)
RUAPU_ISAENTRY(xtheadmemidx)
RUAPU_ISAENTRY(xtheadmempair)
RUAPU_ISAENTRY(xtheadsync)
RUAPU_ISAENTRY(xtheadvector)
RUAPU_ISAENTRY(xtheadvdot)

RUAPU_ISAENTRY(spacemitvmadot)
RUAPU_ISAENTRY(spacemitvmadotn)
RUAPU_ISAENTRY(spacemitvfmadot)

#elif __openrisc__
RUAPU_ISAENTRY(orbis32)
RUAPU_ISAENTRY(orbis64)
Expand Down

0 comments on commit b9bdb74

Please sign in to comment.