From 96d67ed53b1820fe651fd9ecd723ecf3d7805ff3 Mon Sep 17 00:00:00 2001 From: Timur Doumler Date: Tue, 12 Nov 2024 16:34:07 +0000 Subject: [PATCH] Added bytewise_atomic_memcpy, updated seqlock_object accordingly; added macros to detect compiler; added CRILL_PRE macro (currently just resolving to C assert) --- CMakeLists.txt | 1 + include/crill/bytewise_atomic_memcpy.h | 91 ++++++++++++++++++++++++++ include/crill/contracts.h | 15 +++++ include/crill/platform.h | 11 ++++ include/crill/seqlock_object.h | 27 ++------ tests/bytewise_atomic_memcpy_test.cpp | 42 ++++++++++++ 6 files changed, 166 insertions(+), 21 deletions(-) create mode 100644 include/crill/bytewise_atomic_memcpy.h create mode 100644 include/crill/contracts.h create mode 100644 tests/bytewise_atomic_memcpy_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c4da99..997363a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,7 @@ include_directories(include) set(TESTS_SOURCES tests/main.cpp + tests/bytewise_atomic_memcpy_test.cpp tests/progressive_backoff_wait_test.cpp tests/spin_mutex_test.cpp tests/seqlock_object_test.cpp) diff --git a/include/crill/bytewise_atomic_memcpy.h b/include/crill/bytewise_atomic_memcpy.h new file mode 100644 index 0000000..449aaae --- /dev/null +++ b/include/crill/bytewise_atomic_memcpy.h @@ -0,0 +1,91 @@ +// crill - the Cross-platform Real-time, I/O, and Low-Latency Library +// Copyright (c) 2022 - Timur Doumler and Fabian Renn-Giles +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.md or copy at http://boost.org/LICENSE_1_0.txt) + +#ifndef CRILL_BYTEWISE_ATOMIC_MEMCPY_H +#define CRILL_BYTEWISE_ATOMIC_MEMCPY_H + +#include +#include +#include + +namespace crill { + // These are implementations of the corresponding functions + // atomic_load/store_per_byte_memcpy from the Concurrency TS 2. + // They behave as if the source and dest bytes respectively + // were individual atomic objects. + // The implementations provided below is portable, but slow. + // PRs with platform-optimised versions are welcome :) + // The implementations provided below are also *technically* + // UB because C++ does not let us loop over the bytes of + // an object representation, but that is a known wording bug that + // will be fixed by P1839; the technique should work on any + // major compiler. + + // Preconditions: + // - order is std::memory_order::acquire or std::memory_order::relaxed + // - (char*)dest + [0, count) and (const char*)source + [0, count) + // are valid ranges that do not overlap + // Effects: + // Copies count consecutive bytes pointed to by source into consecutive + // bytes pointed to by dest. Each individual load operation from a source + // byte is atomic with memory order order. These individual loads are + // unsequenced with respect to each other. + inline void* atomic_load_per_byte_memcpy + (void* dest, const void* source, size_t count, std::memory_order order) + { + CRILL_PRE(order == std::memory_order_acquire || order == std::memory_order_relaxed); + + char* dest_bytes = reinterpret_cast(dest); + const char* src_bytes = reinterpret_cast(source); + + for (std::size_t i = 0; i < count; ++i) { + #if __cpp_lib_atomic_ref + dest_bytes[i] = std::atomic_ref(src_bytes[i]).load(std::memory_order_relaxed); + #elif CRILL_CLANG || CRILL_GCC + dest_bytes[i] = __atomic_load_n(src_bytes + i, __ATOMIC_RELAXED); + #else + // No atomic_ref or equivalent functionality available on this platform! + #endif + } + + std::atomic_thread_fence(order); + + return dest; + } + + // Preconditions: + // - order is std::memory_order::release or std::memory_order::relaxed + // - (char*)dest + [0, count) and (const char*)source + [0, count) + // are valid ranges that do not overlap + // Effects: + // Copies count consecutive bytes pointed to by source into consecutive + // bytes pointed to by dest. Each individual store operation to a + // destination byte is atomic with memory order order. These individual + // stores are unsequenced with respect to each other. + inline void* atomic_store_per_byte_memcpy + (void* dest, const void* source, size_t count, std::memory_order order) + { + CRILL_PRE(order == std::memory_order_release || order == std::memory_order_relaxed); + + std::atomic_thread_fence(order); + + char* dest_bytes = reinterpret_cast(dest); + const char* src_bytes = reinterpret_cast(source); + + for (size_t i = 0; i < count; ++i) { + #if __cpp_lib_atomic_ref + std::atomic_ref(dest_bytes[i]).store(src_bytes[i], std::memory_order_relaxed); + #elif CRILL_CLANG || CRILL_GCC + __atomic_store_n(dest_bytes + i, src_bytes[i], __ATOMIC_RELAXED); + #else + // No atomic_ref or equivalent functionality available on this platform! + #endif + } + + return dest; + } +} + +#endif //CRILL_BYTEWISE_ATOMIC_MEMCPY_H diff --git a/include/crill/contracts.h b/include/crill/contracts.h new file mode 100644 index 0000000..90bd8cf --- /dev/null +++ b/include/crill/contracts.h @@ -0,0 +1,15 @@ +// crill - the Cross-platform Real-time, I/O, and Low-Latency Library +// Copyright (c) 2022 - Timur Doumler and Fabian Renn-Giles +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.md or copy at http://boost.org/LICENSE_1_0.txt) + +#ifndef CRILL_CONTRACTS_H +#define CRILL_CONTRACTS_H + +#include + +// This will eventually be a proper macro-based Contracts facility +// but at the moment is just an alias for C assert. +#define CRILL_PRE(x) assert(x) + +#endif //CRILL_CONTRACTS_H diff --git a/include/crill/platform.h b/include/crill/platform.h index 952d1c2..a25fb99 100644 --- a/include/crill/platform.h +++ b/include/crill/platform.h @@ -6,6 +6,7 @@ #ifndef CRILL_PLATFORM_H #define CRILL_PLATFORM_H +// Macros to query target hardware architecture #if defined (__arm__) #define CRILL_ARM 1 #define CRILL_32BIT 1 @@ -24,4 +25,14 @@ #define CRILL_INTEL_64BIT 1 #endif +// Macros to query current compiler +#if defined(__clang__) + #define CRILL_CLANG 1 +#elif defined(__GNUC__) || defined(__GNUG__) + #define CRILL_GCC 1 +#elif defined(_MSC_VER) + #define CRILL_MSVC 1 +#endif + + #endif //CRILL_PLATFORM_H diff --git a/include/crill/seqlock_object.h b/include/crill/seqlock_object.h index a09663a..56e5599 100644 --- a/include/crill/seqlock_object.h +++ b/include/crill/seqlock_object.h @@ -6,8 +6,9 @@ #ifndef CRILL_SEQLOCK_OBJECT_H #define CRILL_SEQLOCK_OBJECT_H -#include +#include #include +#include namespace crill { @@ -51,22 +52,16 @@ class seqlock_object // Non-blocking guarantees: wait-free. bool try_load(T& t) const noexcept { - std::size_t buffer[buffer_size]; - std::size_t seq1 = seq.load(std::memory_order_acquire); if (seq1 % 2 != 0) return false; - for (std::size_t i = 0; i < buffer_size; ++i) - buffer[i] = data[i].load(std::memory_order_relaxed); - - std::atomic_thread_fence(std::memory_order_acquire); + crill::atomic_load_per_byte_memcpy(&t, &data, sizeof(data), std::memory_order_acquire); std::size_t seq2 = seq.load(std::memory_order_relaxed); if (seq1 != seq2) return false; - std::memcpy(&t, buffer, sizeof(T)); return true; } @@ -74,28 +69,18 @@ class seqlock_object // Non-blocking guarantees: wait-free. void store(T t) noexcept { - std::size_t buffer[buffer_size]; - if constexpr (sizeof(T) % sizeof(std::size_t) != 0) - buffer[buffer_size - 1] = 0; - - std::memcpy(&buffer, &t, sizeof(T)); - + // Note: load + store usually has better performance characteristics than fetch_add(1) std::size_t old_seq = seq.load(std::memory_order_relaxed); seq.store(old_seq + 1, std::memory_order_relaxed); - std::atomic_thread_fence(std::memory_order_release); - - for (std::size_t i = 0; i < buffer_size; ++i) - data[i].store(buffer[i], std::memory_order_relaxed); + crill::atomic_store_per_byte_memcpy(&data, &t, sizeof(data), std::memory_order_release); seq.store(old_seq + 2, std::memory_order_release); } private: - static constexpr std::size_t buffer_size = (sizeof(T) + sizeof(std::size_t) - 1) / sizeof(std::size_t); - std::atomic data[buffer_size]; + char data[sizeof(T)]; std::atomic seq = 0; - static_assert(decltype(seq)::is_always_lock_free); }; diff --git a/tests/bytewise_atomic_memcpy_test.cpp b/tests/bytewise_atomic_memcpy_test.cpp new file mode 100644 index 0000000..859130f --- /dev/null +++ b/tests/bytewise_atomic_memcpy_test.cpp @@ -0,0 +1,42 @@ +// crill - the Cross-platform Real-time, I/O, and Low-Latency Library +// Copyright (c) 2022 - Timur Doumler and Fabian Renn-Giles +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.md or copy at http://boost.org/LICENSE_1_0.txt) + +#include +#include + +TEST_CASE("atomic_load_per_byte_memcpy with nullptrs") +{ + REQUIRE(crill::atomic_load_per_byte_memcpy(nullptr, nullptr, 0, std::memory_order_relaxed) == nullptr); +} + +TEST_CASE("atomic_store_per_byte_memcpy with nullptrs") +{ + REQUIRE(crill::atomic_store_per_byte_memcpy(nullptr, nullptr, 0, std::memory_order_relaxed) == nullptr); +} + +struct TestData { + double x, y, z; + bool operator==(const TestData& other) const { + return x == other.x && y == other.y && z == other.z; + } +}; + +TEST_CASE("atomic_load_per_byte_memcpy with struct") +{ + TestData td1 = { 1, 2, 3 }; + TestData td2 = { 3, 4, 5 }; + REQUIRE(crill::atomic_load_per_byte_memcpy(&td2, &td1, sizeof(TestData), std::memory_order_relaxed) == &td2); + REQUIRE(td1 == td2); +} + +TEST_CASE("atomic_store_per_byte_memcpy with struct") +{ + TestData td1 = { 1, 2, 3 }; + TestData td2 = { 3, 4, 5 }; + REQUIRE(crill::atomic_store_per_byte_memcpy(&td2, &td1, sizeof(TestData), std::memory_order_relaxed) == &td2); + REQUIRE(td1 == td2); +} + +// TODO: More thorough tests \ No newline at end of file