Skip to content

Commit

Permalink
Make memcpy_nontemporal with SVE work without crashing
Browse files Browse the repository at this point in the history
getauxval is not safe to using in an ifunc, because it runs too early.
But the ABI for AArch64 passes the HWCAPS as an argument.
  • Loading branch information
bmerry committed Nov 6, 2024
1 parent 2124947 commit 8bad35f
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
17 changes: 13 additions & 4 deletions src/common_memcpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,11 @@ void *memcpy_nontemporal_sve(void * __restrict__ dest, const void * __restrict__
}
#endif // SPEAD2_USE_SVE_STREAM

void *(*resolve_memcpy_nontemporal())(void *, const void *, std::size_t) noexcept
void *(*resolve_memcpy_nontemporal(
#ifdef __aarch64__
std::uint64_t hwcaps // See System V AVI for AArch64
#endif
))(void *, const void *, std::size_t) noexcept
{
/* x86 options */
#if SPEAD2_USE_AVX512_STREAM || SPEAD2_USE_AVX_STREAM || SPEAD2_USE_SSE2_STREAM
Expand Down Expand Up @@ -137,7 +141,6 @@ void *(*resolve_memcpy_nontemporal())(void *, const void *, std::size_t) noexcep

/* aarch64 options */
#if SPEAD2_USE_SVE_STREAM
unsigned long hwcaps = getauxval(AT_HWCAP);
if (hwcaps & HWCAP_SVE)
return memcpy_nontemporal_sve;
#endif
Expand All @@ -150,14 +153,20 @@ void *(*resolve_memcpy_nontemporal())(void *, const void *, std::size_t) noexcep

#if SPEAD2_USE_FMV

[[gnu::ifunc("_ZN6spead226resolve_memcpy_nontemporalEv")]]
[[gnu::ifunc("_ZN6spead226resolve_memcpy_nontemporalEm")]]
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept;

#else

void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept
{
static void *(*memcpy_nontemporal_ptr)(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept = resolve_memcpy_nontemporal();
#ifdef __aarch64__
static void *(*memcpy_nontemporal_ptr)(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept =
resolve_memcpy_nontemporal(getauxval(AT_HWCAPS));
#else
static void *(*memcpy_nontemporal_ptr)(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept =
resolve_memcpy_nontemporal();
#endif
return memcpy_nontemporal_ptr(dest, src, n);
}

Expand Down
7 changes: 6 additions & 1 deletion src/unittest_memcpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,14 @@ std::ostream &operator<<(std::ostream &o, const memcpy_function &func)
return o << func.name;
}

static void *wrap_memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept
{
return spead2::memcpy_nontemporal(dest, src, n);
}

static const memcpy_function memcpy_functions[] =
{
{ "default", spead2::memcpy_nontemporal, true },
{ "default", wrap_memcpy_nontemporal, true },
#if SPEAD2_USE_SSE2_STREAM
{ "sse2", spead2::memcpy_nontemporal_sse2, bool(__builtin_cpu_supports("sse2")) },
#endif
Expand Down

0 comments on commit 8bad35f

Please sign in to comment.