Skip to content

Commit

Permalink
Improve / refactor anonymous mmap capabilities (facebook#10810)
Browse files Browse the repository at this point in the history
Summary:
The motivation for this change is a planned feature (related to HyperClockCache) that will depend on a large array that can essentially grow automatically, up to some bound, without the pointer address changing and with guaranteed zero-initialization of the data. Anonymous mmaps provide such functionality, and this change provides an internal API for that.

The other existing use of anonymous mmap in RocksDB is for allocating in huge pages. That code and other related Arena code used some awkward non-RAII and pre-C++11 idioms, so I cleaned up much of that as well, with RAII, move semantics, constexpr, etc.

More specifcs:
* Minimize conditional compilation
* Add Windows support for anonymous mmaps
* Use std::deque instead of std::vector for more efficient bag

Pull Request resolved: facebook#10810

Test Plan: unit test added for new functionality

Reviewed By: riversand963

Differential Revision: D40347204

Pulled By: pdillinger

fbshipit-source-id: ca83fcc47e50fabf7595069380edd2954f4f879c
  • Loading branch information
pdillinger authored and facebook-github-bot committed Oct 18, 2022
1 parent 11c0d13 commit 8367f0d
Show file tree
Hide file tree
Showing 11 changed files with 285 additions and 121 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ jobs:
name: "Test RocksDB"
shell: powershell.exe
command: |
build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16
build_tools\run_ci_db_test.ps1 -SuiteRun arena_test,db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16
build-linux-java:
executor: linux-docker
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ set(SOURCES
options/options.cc
options/options_helper.cc
options/options_parser.cc
port/mmap.cc
port/stack_trace.cc
table/adaptive/adaptive_table_factory.cc
table/block_based/binary_search_index_reader.cc
Expand Down
2 changes: 2 additions & 0 deletions TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[
"options/options.cc",
"options/options_helper.cc",
"options/options_parser.cc",
"port/mmap.cc",
"port/port_posix.cc",
"port/stack_trace.cc",
"port/win/env_default.cc",
Expand Down Expand Up @@ -502,6 +503,7 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[
"options/options.cc",
"options/options_helper.cc",
"options/options_parser.cc",
"port/mmap.cc",
"port/port_posix.cc",
"port/stack_trace.cc",
"port/win/env_default.cc",
Expand Down
3 changes: 3 additions & 0 deletions db/db_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
#include "util/string_util.h"
#include "utilities/merge_operators.h"

// In case defined by Windows headers
#undef small

namespace ROCKSDB_NAMESPACE {
class MockEnv;

Expand Down
2 changes: 1 addition & 1 deletion db/memtable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
: comparator_(cmp),
moptions_(ioptions, mutable_cf_options),
refs_(0),
kArenaBlockSize(OptimizeBlockSize(moptions_.arena_block_size)),
kArenaBlockSize(Arena::OptimizeBlockSize(moptions_.arena_block_size)),
mem_tracker_(write_buffer_manager),
arena_(moptions_.arena_block_size,
(write_buffer_manager != nullptr &&
Expand Down
111 changes: 23 additions & 88 deletions memory/arena.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "memory/arena.h"
#ifndef OS_WIN
#include <sys/mman.h>
#endif

#include <algorithm>

#include "logging/logging.h"
Expand All @@ -22,16 +20,7 @@

namespace ROCKSDB_NAMESPACE {

// MSVC complains that it is already defined since it is static in the header.
#ifndef _MSC_VER
const size_t Arena::kInlineSize;
#endif

const size_t Arena::kMinBlockSize = 4096;
const size_t Arena::kMaxBlockSize = 2u << 30;
static const int kAlignUnit = alignof(max_align_t);

size_t OptimizeBlockSize(size_t block_size) {
size_t Arena::OptimizeBlockSize(size_t block_size) {
// Make sure block_size is in optimal range
block_size = std::max(Arena::kMinBlockSize, block_size);
block_size = std::min(Arena::kMaxBlockSize, block_size);
Expand All @@ -53,14 +42,12 @@ Arena::Arena(size_t block_size, AllocTracker* tracker, size_t huge_page_size)
blocks_memory_ += alloc_bytes_remaining_;
aligned_alloc_ptr_ = inline_block_;
unaligned_alloc_ptr_ = inline_block_ + alloc_bytes_remaining_;
#ifdef MAP_HUGETLB
hugetlb_size_ = huge_page_size;
if (hugetlb_size_ && kBlockSize > hugetlb_size_) {
hugetlb_size_ = ((kBlockSize - 1U) / hugetlb_size_ + 1U) * hugetlb_size_;
if (MemMapping::kHugePageSupported) {
hugetlb_size_ = huge_page_size;
if (hugetlb_size_ && kBlockSize > hugetlb_size_) {
hugetlb_size_ = ((kBlockSize - 1U) / hugetlb_size_ + 1U) * hugetlb_size_;
}
}
#else
(void)huge_page_size;
#endif
if (tracker_ != nullptr) {
tracker_->Allocate(kInlineSize);
}
Expand All @@ -71,21 +58,6 @@ Arena::~Arena() {
assert(tracker_->is_freed());
tracker_->FreeMem();
}
for (const auto& block : blocks_) {
delete[] block;
}

#ifdef MAP_HUGETLB
for (const auto& mmap_info : huge_blocks_) {
if (mmap_info.addr_ == nullptr) {
continue;
}
auto ret = munmap(mmap_info.addr_, mmap_info.length_);
if (ret != 0) {
// TODO(sdong): Better handling
}
}
#endif
}

char* Arena::AllocateFallback(size_t bytes, bool aligned) {
Expand All @@ -99,12 +71,10 @@ char* Arena::AllocateFallback(size_t bytes, bool aligned) {
// We waste the remaining space in the current block.
size_t size = 0;
char* block_head = nullptr;
#ifdef MAP_HUGETLB
if (hugetlb_size_) {
if (MemMapping::kHugePageSupported && hugetlb_size_ > 0) {
size = hugetlb_size_;
block_head = AllocateFromHugePage(size);
}
#endif
if (!block_head) {
size = kBlockSize;
block_head = AllocateNewBlock(size);
Expand All @@ -123,45 +93,22 @@ char* Arena::AllocateFallback(size_t bytes, bool aligned) {
}

char* Arena::AllocateFromHugePage(size_t bytes) {
#ifdef MAP_HUGETLB
if (hugetlb_size_ == 0) {
return nullptr;
}
// Reserve space in `huge_blocks_` before calling `mmap`.
// Use `emplace_back()` instead of `reserve()` to let std::vector manage its
// own memory and do fewer reallocations.
//
// - If `emplace_back` throws, no memory leaks because we haven't called
// `mmap` yet.
// - If `mmap` throws, no memory leaks because the vector will be cleaned up
// via RAII.
huge_blocks_.emplace_back(nullptr /* addr */, 0 /* length */);

void* addr = mmap(nullptr, bytes, (PROT_READ | PROT_WRITE),
(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), -1, 0);

if (addr == MAP_FAILED) {
return nullptr;
}
huge_blocks_.back() = MmapInfo(addr, bytes);
blocks_memory_ += bytes;
if (tracker_ != nullptr) {
tracker_->Allocate(bytes);
MemMapping mm = MemMapping::AllocateHuge(bytes);
auto addr = static_cast<char*>(mm.Get());
if (addr) {
huge_blocks_.push_back(std::move(mm));
blocks_memory_ += bytes;
if (tracker_ != nullptr) {
tracker_->Allocate(bytes);
}
}
return reinterpret_cast<char*>(addr);
#else
(void)bytes;
return nullptr;
#endif
return addr;
}

char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
Logger* logger) {
assert((kAlignUnit & (kAlignUnit - 1)) ==
0); // Pointer size should be a power of 2

#ifdef MAP_HUGETLB
if (huge_page_size > 0 && bytes > 0) {
if (MemMapping::kHugePageSupported && hugetlb_size_ > 0 &&
huge_page_size > 0 && bytes > 0) {
// Allocate from a huge page TLB table.
size_t reserved_size =
((bytes - 1U) / huge_page_size + 1U) * huge_page_size;
Expand All @@ -177,10 +124,6 @@ char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
return addr;
}
}
#else
(void)huge_page_size;
(void)logger;
#endif

size_t current_mod =
reinterpret_cast<uintptr_t>(aligned_alloc_ptr_) & (kAlignUnit - 1);
Expand All @@ -200,17 +143,10 @@ char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
}

char* Arena::AllocateNewBlock(size_t block_bytes) {
// Reserve space in `blocks_` before allocating memory via new.
// Use `emplace_back()` instead of `reserve()` to let std::vector manage its
// own memory and do fewer reallocations.
//
// - If `emplace_back` throws, no memory leaks because we haven't called `new`
// yet.
// - If `new` throws, no memory leaks because the vector will be cleaned up
// via RAII.
blocks_.emplace_back(nullptr);

char* block = new char[block_bytes];
auto uniq = std::make_unique<char[]>(block_bytes);
char* block = uniq.get();
blocks_.push_back(std::move(uniq));

size_t allocated_size;
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
allocated_size = malloc_usable_size(block);
Expand All @@ -227,7 +163,6 @@ char* Arena::AllocateNewBlock(size_t block_bytes) {
if (tracker_ != nullptr) {
tracker_->Allocate(allocated_size);
}
blocks_.back() = block;
return block;
}

Expand Down
56 changes: 25 additions & 31 deletions memory/arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,13 @@
// size, it uses malloc to directly get the requested size.

#pragma once
#ifndef OS_WIN
#include <sys/mman.h>
#endif
#include <assert.h>
#include <stdint.h>
#include <cerrno>

#include <cstddef>
#include <vector>
#include <deque>

#include "memory/allocator.h"
#include "util/mutexlock.h"
#include "port/mmap.h"
#include "rocksdb/env.h"

namespace ROCKSDB_NAMESPACE {

Expand All @@ -31,9 +28,13 @@ class Arena : public Allocator {
Arena(const Arena&) = delete;
void operator=(const Arena&) = delete;

static const size_t kInlineSize = 2048;
static const size_t kMinBlockSize;
static const size_t kMaxBlockSize;
static constexpr size_t kInlineSize = 2048;
static constexpr size_t kMinBlockSize = 4096;
static constexpr size_t kMaxBlockSize = 2u << 30;

static constexpr unsigned kAlignUnit = alignof(std::max_align_t);
static_assert((kAlignUnit & (kAlignUnit - 1)) == 0,
"Pointer size should be power of 2");

// huge_page_size: if 0, don't use huge page TLB. If > 0 (should set to the
// supported hugepage size of the system), block allocation will try huge
Expand Down Expand Up @@ -63,7 +64,7 @@ class Arena : public Allocator {
// by the arena (exclude the space allocated but not yet used for future
// allocations).
size_t ApproximateMemoryUsage() const {
return blocks_memory_ + blocks_.capacity() * sizeof(char*) -
return blocks_memory_ + blocks_.size() * sizeof(char*) -
alloc_bytes_remaining_;
}

Expand All @@ -81,21 +82,19 @@ class Arena : public Allocator {
return blocks_.empty() && huge_blocks_.empty();
}

// check and adjust the block_size so that the return value is
// 1. in the range of [kMinBlockSize, kMaxBlockSize].
// 2. the multiple of align unit.
static size_t OptimizeBlockSize(size_t block_size);

private:
char inline_block_[kInlineSize] __attribute__((__aligned__(alignof(max_align_t))));
alignas(std::max_align_t) char inline_block_[kInlineSize];
// Number of bytes allocated in one block
const size_t kBlockSize;
// Array of new[] allocated memory blocks
using Blocks = std::vector<char*>;
Blocks blocks_;

struct MmapInfo {
void* addr_;
size_t length_;

MmapInfo(void* addr, size_t length) : addr_(addr), length_(length) {}
};
std::vector<MmapInfo> huge_blocks_;
// Allocated memory blocks
std::deque<std::unique_ptr<char[]>> blocks_;
// Huge page allocations
std::deque<MemMapping> huge_blocks_;
size_t irregular_block_num = 0;

// Stats for current active block.
Expand All @@ -108,15 +107,15 @@ class Arena : public Allocator {
// How many bytes left in currently active block?
size_t alloc_bytes_remaining_ = 0;

#ifdef MAP_HUGETLB
size_t hugetlb_size_ = 0;
#endif // MAP_HUGETLB

char* AllocateFromHugePage(size_t bytes);
char* AllocateFallback(size_t bytes, bool aligned);
char* AllocateNewBlock(size_t block_bytes);

// Bytes of memory in blocks allocated so far
size_t blocks_memory_ = 0;
// Non-owned
AllocTracker* tracker_;
};

Expand All @@ -133,9 +132,4 @@ inline char* Arena::Allocate(size_t bytes) {
return AllocateFallback(bytes, false /* unaligned */);
}

// check and adjust the block_size so that the return value is
// 1. in the range of [kMinBlockSize, kMaxBlockSize].
// 2. the multiple of align unit.
extern size_t OptimizeBlockSize(size_t block_size);

} // namespace ROCKSDB_NAMESPACE
Loading

0 comments on commit 8367f0d

Please sign in to comment.