Skip to content

Commit

Permalink
Add detection of function multi-versioning in the compiler
Browse files Browse the repository at this point in the history
This support is used so that the SSE2 version of memcpy_nontemporal is
selected only if the CPU supports SSE2 at runtime.

On x86-64 there is guaranteed to be runtime support, so this only
affects i386. Note that i386 is not currently tested. The real
motivation is that it lays the groundwork for adding more specialised
variants e.g. AVX, AVX-512, MOVDIR64B etc.
  • Loading branch information
bmerry committed Aug 7, 2023
1 parent 5f1dead commit 10bac19
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 5 deletions.
15 changes: 15 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,20 @@ SPEAD2_ARG_WITH(
)]
)

# -Werror is used to get an error (rather than a warning) if the compiler
# doesn't support the feature.
SPEAD2_ARG_WITH(
[fmv],
[AS_HELP_STRING([--without-fmv], [Do not use compiler function multi-versioning support])],
[SPEAD2_USE_FMV],
[SPEAD2_CHECK_FEATURE(
[fmv], [function multi-versioning], [], [], [],
[SPEAD2_USE_FMV=1], [],
[__attribute__((target("default"))) void foo() {}],
[-Werror]
)]
)

SPEAD2_ARG_WITH(
[movntdq],
[AS_HELP_STRING([--without-movntdq], [Do not use MOVNTDQ instruction for non-temporal copies])],
Expand Down Expand Up @@ -275,6 +289,7 @@ SPEAD2_PRINT_CONDITION([compiler optimization], [OPTIMIZED])
SPEAD2_PRINT_CONDITION([link-time optimization], [LTO])
SPEAD2_PRINT_CONDITION([coverage], [COVERAGE])
SPEAD2_PRINT_CONDITION([shared library], [SHARED_LIBRARY])
SPEAD2_PRINT_FEATURE([function multi-versioning], [test "x$SPEAD2_USE_FMV" = "x1"])
SPEAD2_PRINT_FEATURE([MOVNTDQ instruction], [test "x$SPEAD2_USE_MOVNTDQ" = "x1"])
echo ""
echo "System calls:"
Expand Down
9 changes: 8 additions & 1 deletion include/spead2/common_defines.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2015, 2020 National Research Foundation (SARAO)
/* Copyright 2015, 2020, 2023 National Research Foundation (SARAO)
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
Expand Down Expand Up @@ -28,6 +28,7 @@
#include <utility>
#include <string>
#include <functional>
#include <spead2/common_features.h>

#ifndef SPEAD2_MAX_LOG_LEVEL
#define SPEAD2_MAX_LOG_LEVEL (spead2::log_level::info)
Expand All @@ -43,6 +44,12 @@
# define SPEAD2_DEPRECATED(msg)
#endif

#if SPEAD2_USE_FMV
# define SPEAD2_FMV_TARGET(x) [[gnu::target(x)]]
#else
# define SPEAD2_FMV_TARGET(x)
#endif

/**
* SPEAD protocol sending and receiving. All SPEAD-64-* flavours are
* supported.
Expand Down
6 changes: 5 additions & 1 deletion include/spead2/common_features.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,15 @@
#define SPEAD2_USE_SENDMMSG @SPEAD2_USE_SENDMMSG@
#define SPEAD2_USE_EVENTFD @SPEAD2_USE_EVENTFD@
#define SPEAD2_USE_PTHREAD_SETAFFINITY_NP @SPEAD2_USE_PTHREAD_SETAFFINITY_NP@
#define SPEAD2_USE_FMV @SPEAD2_USE_FMV@
/* Python on MacOS likes to build universal binaries, so even if it was
* detected at configure time, it might not be available for a particular
* build architecture.
*
* Additionally, on i386 it is not guaranteed to exist at runtime, and we
* need function multi-versioning to make it safe to use.
*/
#if defined(__i386__) || defined(__i386) || defined(__x86_64__) || defined(__x86_64)
#if (SPEAD2_USE_FMV && (defined(__i386__) || defined(__i386))) || defined(__x86_64__) || defined(__x86_64)
# define SPEAD2_USE_MOVNTDQ @SPEAD2_USE_MOVNTDQ@
#else
# define SPEAD2_USE_MOVNTDQ 0
Expand Down
9 changes: 9 additions & 0 deletions include/spead2/common_memcpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <cstddef>
#include <spead2/common_features.h>
#include <spead2/common_defines.h>

/**
* Variant of memcpy that uses a non-temporal hint for the destination.
Expand All @@ -30,7 +31,15 @@
namespace spead2
{

#if SPEAD2_USE_FMV || !SPEAD2_USE_MOVNTDQ
SPEAD2_FMV_TARGET("default")
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept;
#endif

#if SPEAD2_USE_MOVNTDQ
SPEAD2_FMV_TARGET("sse2")
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept;
#endif

} // namespace spead2

Expand Down
13 changes: 10 additions & 3 deletions src/common_memcpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,18 @@
namespace spead2
{

#if SPEAD2_USE_FMV || !SPEAD2_USE_MOVNTDQ
SPEAD2_FMV_TARGET("default")
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept
{
#if !SPEAD2_USE_MOVNTDQ
return std::memcpy(dest, src, n);
#else
}
#endif // SPEAD2_USE_FMV || !SPEAD2_USE_MOVNTDQ

#if SPEAD2_USE_MOVNTDQ
SPEAD2_FMV_TARGET("sse2")
void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src, std::size_t n) noexcept
{
char * __restrict__ dest_c = (char *) dest;
const char * __restrict__ src_c = (const char *) src;
// Align the destination to a cache-line boundary
Expand Down Expand Up @@ -73,7 +80,7 @@ void *memcpy_nontemporal(void * __restrict__ dest, const void * __restrict__ src
std::memcpy(dest_c + offset, src_c + offset, tail);
_mm_sfence();
return dest;
#endif // SPEAD2_USE_MOVNTDQ
}
#endif // SPEAD2_USE_MOVNTDQ

} // namespace spead2

0 comments on commit 10bac19

Please sign in to comment.