Skip to content

Commit

Permalink
Factoring out allocator cache
Browse files Browse the repository at this point in the history
  • Loading branch information
hkaiser committed Sep 25, 2024
1 parent f8d92b0 commit e12bc1c
Show file tree
Hide file tree
Showing 32 changed files with 432 additions and 221 deletions.
5 changes: 3 additions & 2 deletions libs/core/allocator_support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ set(allocator_support_compat_headers
)
# cmake-format: on

set(allocator_support_sources)
set(allocator_support_sources thread_local_caching_allocator.cpp)

include(HPX_AddModule)
add_hpx_module(
Expand All @@ -52,6 +52,7 @@ add_hpx_module(
HEADERS ${allocator_support_headers}
COMPAT_HEADERS ${allocator_support_compat_headers}
DEPENDENCIES hpx_dependencies_allocator
MODULE_DEPENDENCIES hpx_concepts hpx_config hpx_preprocessor hpx_type_support
MODULE_DEPENDENCIES hpx_assertion hpx_concepts hpx_config hpx_preprocessor
hpx_type_support
CMAKE_SUBDIRS examples tests
)
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@

#include <hpx/config.hpp>
#include <hpx/allocator_support/config/defines.hpp>
#include <hpx/assert.hpp>

#include <cstddef>
#include <functional>
#include <memory>
#include <new>
#include <type_traits>
Expand All @@ -21,15 +23,41 @@ namespace hpx::util {
!((defined(HPX_HAVE_CUDA) && defined(__CUDACC__)) || \
defined(HPX_HAVE_HIP))

namespace detail {

HPX_CORE_EXPORT void init_allocator_cache(
std::size_t, std::function<void()>&& clear_cache);
HPX_CORE_EXPORT std::pair<void*, std::size_t> allocate_from_cache(
std::size_t) noexcept;
[[nodiscard]] HPX_CORE_EXPORT bool cache_empty(std::size_t) noexcept;
HPX_CORE_EXPORT void return_to_cache(
std::size_t, void* p, std::size_t n);

// maximal number of caches [0...max)
inline constexpr int max_number_of_caches = 16;

///////////////////////////////////////////////////////////////////////
constexpr int next_power_of_two(std::int64_t n) noexcept
{
int i = 0;
for (--n; n > 0; n >>= 1)
{
++i;
}
return i;
}
} // namespace detail

///////////////////////////////////////////////////////////////////////////
template <template <typename, typename> class Stack, typename T = char,
typename Allocator = std::allocator<T>>
template <typename T = char, typename Allocator = std::allocator<T>>
struct thread_local_caching_allocator
{
HPX_NO_UNIQUE_ADDRESS Allocator alloc;

private:
using traits = std::allocator_traits<Allocator>;

public:
using value_type = typename traits::value_type;
using pointer = typename traits::pointer;
using const_pointer = typename traits::const_pointer;
Expand All @@ -39,7 +67,7 @@ namespace hpx::util {
template <typename U>
struct rebind
{
using other = thread_local_caching_allocator<Stack, U,
using other = thread_local_caching_allocator<U,
typename traits::template rebind_alloc<U>>;
};

Expand All @@ -51,91 +79,43 @@ namespace hpx::util {
using propagate_on_container_swap =
typename traits::propagate_on_container_swap;

private:
struct allocated_cache
explicit thread_local_caching_allocator(
Allocator const& alloc = Allocator{}) noexcept(noexcept(std::
is_nothrow_copy_constructible_v<Allocator>))
: alloc(alloc)
{
explicit allocated_cache(Allocator const& a) noexcept(
noexcept(std::is_nothrow_copy_constructible_v<Allocator>))
: alloc(a)
, data(0)
{
}

allocated_cache(allocated_cache const&) = delete;
allocated_cache(allocated_cache&&) = delete;
allocated_cache& operator=(allocated_cache const&) = delete;
allocated_cache& operator=(allocated_cache&&) = delete;

~allocated_cache()
{
clear_cache();
}

pointer allocate(size_type n)
{
pointer p;
std::pair<T*, size_type> pair;
if (data.pop(pair))
{
p = pair.first;
}
else
// Note: capturing the allocator will be ok only as long as it
// doesn't have any state as this lambda will be possibly called
// very late during destruction of the thread_local cache.
static_assert(std::is_empty_v<Allocator>,
"Please don't use allocators with state in conjunction with "
"the thread_local_caching_allocator");

constexpr std::size_t num_cache =
detail::next_power_of_two(sizeof(T));

static_assert(num_cache < detail::max_number_of_caches,
"This allocator does not support allocating objects larger "
"than 2^16 bytes");

auto f = [=]() mutable {
while (!detail::cache_empty(num_cache))
{
p = traits::allocate(alloc, n);
if (p == nullptr)
auto [p, n] = detail::allocate_from_cache(num_cache);
if (p != nullptr)
{
throw std::bad_alloc();
traits::deallocate(const_cast<Allocator&>(alloc),
static_cast<char*>(p), n);
}
}
};

++allocated;
return p;
}

void deallocate(pointer p, size_type n) noexcept
{
data.push(std::make_pair(p, n));
if (++deallocated > 2 * (allocated + 16))
{
clear_cache();
allocated = 0;
deallocated = 0;
}
}

private:
void clear_cache() noexcept
{
std::pair<T*, size_type> p;
while (data.pop(p))
{
traits::deallocate(alloc, p.first, p.second);
}
}

HPX_NO_UNIQUE_ADDRESS Allocator alloc;
Stack<std::pair<T*, size_type>, Allocator> data;
std::size_t allocated = 0;
std::size_t deallocated = 0;
};

allocated_cache& cache()
{
thread_local allocated_cache allocated_data(alloc);
return allocated_data;
}

public:
explicit thread_local_caching_allocator(
Allocator const& alloc = Allocator{}) noexcept(noexcept(std::
is_nothrow_copy_constructible_v<Allocator>))
: alloc(alloc)
{
detail::init_allocator_cache(num_cache, HPX_MOVE(f));
}

template <typename U, typename Alloc>
explicit thread_local_caching_allocator(
thread_local_caching_allocator<Stack, U, Alloc> const&
thread_local_caching_allocator<U, Alloc> const&
rhs) noexcept(noexcept(std::
is_nothrow_copy_constructible_v<Alloc>))
: alloc(rhs.alloc)
Expand All @@ -155,16 +135,32 @@ namespace hpx::util {

[[nodiscard]] pointer allocate(size_type n, void const* = nullptr)
{
if (max_size() < n)
constexpr std::size_t num_cache =
detail::next_power_of_two(sizeof(T));
std::size_t N = n * (1ull << num_cache);

if (max_size() < N)
{
throw std::bad_array_new_length();
}
return cache().allocate(n);

auto [p, _] = detail::allocate_from_cache(num_cache);

Check notice

Code scanning / CodeQL

Unused local variable Note

Variable _ is not used.
if (p == nullptr)
{
p = traits::allocate(alloc, N);
if (p == nullptr)
{
throw std::bad_alloc();
}
}
return static_cast<pointer>(p);
}

void deallocate(pointer p, size_type n) noexcept
void deallocate(pointer p, size_type n)
{
cache().deallocate(p, n);
constexpr std::size_t num_cache =
detail::next_power_of_two(sizeof(T));
detail::return_to_cache(num_cache, p, n * (1ull << num_cache));
}

[[nodiscard]] constexpr size_type max_size() noexcept
Expand Down Expand Up @@ -199,8 +195,7 @@ namespace hpx::util {
}
};
#else
template <template <typename, typename> class Stack, typename T = char,
typename Allocator = std::allocator<T>>
template <typename T = char, typename Allocator = std::allocator<T>>
using thread_local_caching_allocator = Allocator;
#endif
} // namespace hpx::util
123 changes: 123 additions & 0 deletions libs/core/allocator_support/src/thread_local_caching_allocator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// Copyright (c) 2023-2024 Hartmut Kaiser
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <hpx/config.hpp>
#include <hpx/allocator_support/config/defines.hpp>

#if defined(HPX_ALLOCATOR_SUPPORT_HAVE_CACHING) && \
!((defined(HPX_HAVE_CUDA) && defined(__CUDACC__)) || \
defined(HPX_HAVE_HIP))

#include <hpx/allocator_support/thread_local_caching_allocator.hpp>
#include <hpx/assert.hpp>
#include <hpx/type_support/static_reinit_interface.hpp>

#include <cstddef>
#include <functional>
#include <stack>
#include <utility>

namespace hpx::util::detail {

///////////////////////////////////////////////////////////////////////////
struct allocated_cache
{
explicit allocated_cache() noexcept = default;

void init(std::function<void()>&& clear)
{
if (!clear_cache) // initialize once
{
clear_cache = HPX_MOVE(clear);
util::reinit_register(std::function<void()>(), clear_cache);
}
}

allocated_cache(allocated_cache const&) = delete;
allocated_cache(allocated_cache&&) = delete;
allocated_cache& operator=(allocated_cache const&) = delete;
allocated_cache& operator=(allocated_cache&&) = delete;

~allocated_cache()
{
if (clear_cache)
{
clear_cache();
}
}

std::pair<void*, std::size_t> allocate() noexcept
{
std::pair<void*, std::size_t> p{nullptr, 0};
if (!data.empty())
{
p = data.top();
data.pop();

++allocated;
}
return p;
}

void deallocate(void* p, std::size_t n)
{
data.emplace(p, n);
if (++deallocated > 2 * (allocated + 16))
{
if (clear_cache)
{
clear_cache();
}

allocated = 0;
deallocated = 0;
}
}

[[nodiscard]] bool empty() const noexcept
{
return data.empty();
}

private:
std::stack<std::pair<void*, std::size_t>> data;
std::size_t allocated = 0;
std::size_t deallocated = 0;
std::function<void()> clear_cache;
};

///////////////////////////////////////////////////////////////////////////
allocated_cache& cache(std::size_t n)
{
HPX_ASSERT(n < max_number_of_caches);

thread_local allocated_cache allocated_data[max_number_of_caches];
return allocated_data[n];
}

void init_allocator_cache(
std::size_t n, std::function<void()>&& clear_cache)
{
cache(n).init(HPX_MOVE(clear_cache));
}

std::pair<void*, std::size_t> allocate_from_cache(std::size_t n) noexcept
{
return cache(n).allocate();
}

void return_to_cache(std::size_t n, void* p, std::size_t const size)
{
cache(n).deallocate(p, size);
}

bool cache_empty(std::size_t n) noexcept
{
return cache(n).empty();
}
} // namespace hpx::util::detail

#endif
4 changes: 2 additions & 2 deletions libs/core/async_base/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024 The STE||AR-Group
# Copyright (c) 2020-2022 The STE||AR-Group
#
# SPDX-License-Identifier: BSL-1.0
# Distributed under the Boost Software License, Version 1.0. (See accompanying
Expand Down Expand Up @@ -34,6 +34,6 @@ add_hpx_module(
COMPAT_HEADERS ${async_base_compat_headers}
SOURCES ${async_base_sources}
MODULE_DEPENDENCIES hpx_allocator_support hpx_concepts hpx_config
hpx_concurrency hpx_coroutines hpx_tag_invoke
hpx_coroutines hpx_tag_invoke
CMAKE_SUBDIRS examples tests
)
Loading

0 comments on commit e12bc1c

Please sign in to comment.