Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add A64FX to the list of CPUs supported by DYNAMIC_ARCH #4811

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,7 @@ ifneq ($(NO_SVE), 1)
DYNAMIC_CORE += NEOVERSEV1
DYNAMIC_CORE += NEOVERSEN2
DYNAMIC_CORE += ARMV8SVE
DYNAMIC_CORE += A64FX
endif
DYNAMIC_CORE += THUNDERX
DYNAMIC_CORE += THUNDERX2T99
Expand Down
2 changes: 1 addition & 1 deletion cmake/arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ if (DYNAMIC_ARCH)
if (ARM64)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99)
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE)
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX)
endif ()
if (DYNAMIC_LIST)
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST})
Expand Down
31 changes: 31 additions & 0 deletions cmake/prebuild.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1218,6 +1218,37 @@ endif ()
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "A64FX")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t65536\n"
"#define L1_CODE_LINESIZE\t256\n"
"#define L1_CODE_ASSOCIATIVE\t8\n"
"#define L1_DATA_SIZE\t32768\n"
"#define L1_DATA_LINESIZE\t256\n"
"#define L1_DATA_ASSOCIATIVE\t8\n"
"#define L2_SIZE\t8388608\n\n"
"#define L2_LINESIZE\t256\n"
"#define L2_ASSOCIATIVE\t8\n"
"#define L3_SIZE\t0\n\n"
"#define L3_LINESIZE\t0\n\n"
"#define L3_ASSOCIATIVE\t0\n\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define HAVE_VFPV4\n"
"#define HAVE_VFPV3\n"
"#define HAVE_VFP\n"
"#define HAVE_NEON\n"
"#define HAVE_SVE\n"
"#define ARMV8\n")
set(SGEMM_UNROLL_M 4)
set(SGEMM_UNROLL_N 8)
set(DGEMM_UNROLL_M 2)
set(DGEMM_UNROLL_N 8)
set(CGEMM_UNROLL_M 2)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 2)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "P5600")
file(APPEND ${TARGET_CONF_TEMP}
"#define L2_SIZE 1048576\n"
Expand Down
12 changes: 12 additions & 0 deletions cmake/system.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,18 @@ if (${TARGET} STREQUAL NEOVERSEV1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve")
endif()
endif()
if (${TARGET} STREQUAL A64FX)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve -mtune=a64fx")
else ()
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve -mtune=a64fx")
else ()
message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support A64FX.")
endif()
endif()
endif()

endif()

Expand Down
21 changes: 20 additions & 1 deletion driver/others/dynamic_arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ extern gotoblas_t gotoblas_CORTEXA55;
#else
#define gotoblas_CORTEXA55 gotoblas_ARMV8
#endif
#ifdef DYN_A64FX
extern gotoblas_t gotoblas_A64FX;
#else
#define gotoblas_A64FX gotoblas_ARMV8
#endif
#else
extern gotoblas_t gotoblas_CORTEXA53;
#define gotoblas_CORTEXA55 gotoblas_CORTEXA53
Expand All @@ -136,10 +141,12 @@ extern gotoblas_t gotoblas_NEOVERSEN1;
extern gotoblas_t gotoblas_NEOVERSEV1;
extern gotoblas_t gotoblas_NEOVERSEN2;
extern gotoblas_t gotoblas_ARMV8SVE;
extern gotoblas_t gotoblas_A64FX;
#else
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8
#define gotoblas_NEOVERSEN2 gotoblas_ARMV8
#define gotoblas_ARMV8SVE gotoblas_ARMV8
#define gotoblas_A64FX gotoblas_ARMV8
#endif
extern gotoblas_t gotoblas_THUNDERX3T110;
#endif
Expand All @@ -149,7 +156,7 @@ extern void openblas_warning(int verbose, const char * msg);
#define FALLBACK_VERBOSE 1
#define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n"

#define NUM_CORETYPES 17
#define NUM_CORETYPES 18

/*
* In case asm/hwcap.h is outdated on the build system, make sure
Expand Down Expand Up @@ -184,6 +191,7 @@ static char *corename[] = {
"thunderx3t110",
"cortexa55",
"armv8sve",
"a64fx",
"unknown"
};

Expand All @@ -205,6 +213,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14];
if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
if (gotoblas == &gotoblas_ARMV8SVE) return corename[16];
if (gotoblas == &gotoblas_A64FX) return corename[17];
return corename[NUM_CORETYPES];
}

Expand Down Expand Up @@ -241,6 +250,7 @@ static gotoblas_t *force_coretype(char *coretype) {
case 14: return (&gotoblas_THUNDERX3T110);
case 15: return (&gotoblas_CORTEXA55);
case 16: return (&gotoblas_ARMV8SVE);
case 17: return (&gotoblas_A64FX);
}
snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message);
Expand Down Expand Up @@ -346,6 +356,15 @@ static gotoblas_t *get_coretype(void) {
return &gotoblas_THUNDERX3T110;
}
break;
case 0x46: // Fujitsu
switch (part)
{
#ifndef NO_SVE
case 0x001: // A64FX
return &gotoblas_A64FX;
#endif
}
break;
case 0x48: // HiSilicon
switch (part)
{
Expand Down
4 changes: 2 additions & 2 deletions kernel/arm64/gemv_n_sve.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SV_COUNT svcntd
#define SV_TYPE svfloat64_t
#define SV_TRUE svptrue_b64
#define SV_WHILE svwhilelt_b64
#define SV_WHILE svwhilelt_b64_s64
#define SV_DUP svdup_f64
#else
#define SV_COUNT svcntw
#define SV_TYPE svfloat32_t
#define SV_TRUE svptrue_b32
#define SV_WHILE svwhilelt_b32
#define SV_WHILE svwhilelt_b32_s64
#define SV_DUP svdup_f32
#endif

Expand Down
4 changes: 2 additions & 2 deletions kernel/arm64/gemv_t_sve.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SV_COUNT svcntd
#define SV_TYPE svfloat64_t
#define SV_TRUE svptrue_b64
#define SV_WHILE svwhilelt_b64
#define SV_WHILE svwhilelt_b64_s64
#define SV_DUP svdup_f64
#else
#define SV_COUNT svcntw
#define SV_TYPE svfloat32_t
#define SV_TRUE svptrue_b32
#define SV_WHILE svwhilelt_b32
#define SV_WHILE svwhilelt_b32_s64
#define SV_DUP svdup_f32
#endif

Expand Down
Loading