Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable cf uses separete write buffer manager #343

Merged
merged 42 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
5079707
update
SpadeA-Tang Jul 19, 2023
e3ada2d
update
SpadeA-Tang Jul 19, 2023
fadfef3
update
SpadeA-Tang Jul 19, 2023
6ae46ff
update
SpadeA-Tang Jul 26, 2023
cd5a294
update wbm
SpadeA-Tang Jul 26, 2023
af33283
update
SpadeA-Tang Jul 27, 2023
ab43653
update
SpadeA-Tang Jul 27, 2023
b25567e
update
SpadeA-Tang Jul 27, 2023
033d847
update
SpadeA-Tang Jul 27, 2023
199d704
update
SpadeA-Tang Jul 27, 2023
c47e7d8
can pass original wbm tests
SpadeA-Tang Aug 9, 2023
004be9b
update
SpadeA-Tang Aug 14, 2023
30b7862
add test
SpadeA-Tang Aug 14, 2023
41775e2
format
SpadeA-Tang Aug 14, 2023
0095aa8
fix test
SpadeA-Tang Aug 15, 2023
964cf81
update
SpadeA-Tang Aug 15, 2023
64fbf24
update
SpadeA-Tang Aug 15, 2023
e373067
fix test
SpadeA-Tang Aug 15, 2023
4ce4cef
update
SpadeA-Tang Aug 15, 2023
e67df36
format
SpadeA-Tang Aug 15, 2023
755c10f
update
SpadeA-Tang Aug 16, 2023
79ac364
format
SpadeA-Tang Aug 16, 2023
62cbf09
update
SpadeA-Tang Aug 16, 2023
6f6d326
set write buffer manager in cf option
SpadeA-Tang Aug 17, 2023
6b5aabd
fix test use after free
SpadeA-Tang Aug 18, 2023
d621893
update
SpadeA-Tang Aug 21, 2023
e29df90
update
SpadeA-Tang Aug 21, 2023
37d455c
fix test
SpadeA-Tang Aug 21, 2023
9d24a9f
fix test
SpadeA-Tang Aug 21, 2023
a1ca8a9
fix test
SpadeA-Tang Aug 21, 2023
c38c417
fix test
SpadeA-Tang Aug 21, 2023
9924742
fix test
SpadeA-Tang Aug 21, 2023
8fbaf5c
update comment
SpadeA-Tang Aug 22, 2023
85d43bf
update
SpadeA-Tang Aug 23, 2023
21b9ae5
fix test
SpadeA-Tang Aug 24, 2023
4f509cf
update
SpadeA-Tang Aug 24, 2023
8ecc649
format
SpadeA-Tang Aug 24, 2023
947c94b
update::
SpadeA-Tang Aug 24, 2023
ebb40a1
address comment
SpadeA-Tang Aug 24, 2023
d79241f
address comment
SpadeA-Tang Aug 24, 2023
5e404af
address comment
SpadeA-Tang Aug 24, 2023
35caf14
Merge branch '6.29.tikv' into lock-cf-manager
SpadeA-Tang Aug 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion db/column_family.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1672,8 +1672,11 @@ ColumnFamilyData* ColumnFamilySet::CreateColumnFamily(
const std::string& name, uint32_t id, Version* dummy_versions,
const ColumnFamilyOptions& options) {
assert(column_families_.find(name) == column_families_.end());
auto* write_buffer_manager = options.cf_write_buffer_manager != nullptr
? options.cf_write_buffer_manager.get()
: write_buffer_manager_;
ColumnFamilyData* new_cfd = new ColumnFamilyData(
id, name, dummy_versions, table_cache_, write_buffer_manager_, options,
id, name, dummy_versions, table_cache_, write_buffer_manager, options,
*db_options_, &file_options_, this, block_cache_tracer_, io_tracer_,
db_session_id_);
column_families_.insert({name, id});
Expand Down
20 changes: 19 additions & 1 deletion db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,11 @@ Status DBImpl::CloseHelper() {
delete txn_entry.second;
}

// We can only access cf_based_write_buffer_manager_ before versions_.reset(),
// after which all cf write buffer managers will be freed.
for (auto m : cf_based_write_buffer_manager_) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment the ordering.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

m->UnregisterDB(this);
}
// versions need to be destroyed before table_cache since it can hold
// references to table_cache.
versions_.reset();
Expand Down Expand Up @@ -2832,7 +2837,20 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options,
if (s.ok()) {
NewThreadStatusCfInfo(
static_cast_with_check<ColumnFamilyHandleImpl>(*handle)->cfd());
if (write_buffer_manager_ != nullptr) {
if (cf_options.cf_write_buffer_manager != nullptr) {
auto* write_buffer_manager = cf_options.cf_write_buffer_manager.get();
bool exist = false;
for (auto m : cf_based_write_buffer_manager_) {
if (m == write_buffer_manager) {
exist = true;
}
}
if (!exist) {
return Status::NotSupported(
"New cf write buffer manager is not supported after Open");
}
write_buffer_manager->RegisterColumnFamily(this, *handle);
} else if (write_buffer_manager_ != nullptr) {
write_buffer_manager_->RegisterColumnFamily(this, *handle);
}
}
Expand Down
4 changes: 4 additions & 0 deletions db/db_impl/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2288,6 +2288,10 @@ class DBImpl : public DB {
Directories directories_;

WriteBufferManager* write_buffer_manager_;
// For simplicity, CF based write buffer manager does not support stall the
// write.
// Note: It's only modifed in Open, so mutex is not needed.
autovector<WriteBufferManager*> cf_based_write_buffer_manager_;

WriteThread write_thread_;
WriteBatch tmp_batch_;
Expand Down
1 change: 1 addition & 0 deletions db/db_impl/db_impl_compaction_flush.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1760,6 +1760,7 @@ Status DBImpl::Flush(const FlushOptions& flush_options,
ROCKS_LOG_INFO(immutable_db_options_.info_log, "[%s] Manual flush start.",
cfh->GetName().c_str());
Status s;
TEST_SYNC_POINT_CALLBACK("DBImpl::Flush:ScheduleFlushReq", column_family);
if (immutable_db_options_.atomic_flush) {
s = AtomicFlushMemTables({cfh->cfd()}, flush_options,
FlushReason::kManualFlush);
Expand Down
46 changes: 38 additions & 8 deletions db/db_impl/db_impl_open.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1625,6 +1625,22 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
}

DBImpl* impl = new DBImpl(db_options, dbname, seq_per_batch, batch_per_txn);
for (auto cf : column_families) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic of this block of code is quite isolated. I think it's better to wrap a private function for it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer not, adding a member function has both mental overhead and compilation overhead. Plus this block can't be exactly reused elsewhere.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the major benefit of a private function is better readability: people can skip the detail when they don't care the very detail of the block. Other benefits include better testability, and more modular code.
Of course adding a comment for the block serves the same purpose in terms of readability, but old-school programmers typically prefer a function and even chat-gpt also recommends a wrap function.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it is always a trade-off to extract a sub-routine. Reading function has a significant context switch overhead, and for non-pure function that modifies states it is mostly needed to read the implementation of the function. In RocksDB codebase it is generally preferred to use inlined code rather than many small functions. Some believe it helps with code quality.

if (cf.options.cf_write_buffer_manager != nullptr) {
auto* write_buffer_manager = cf.options.cf_write_buffer_manager.get();
bool already_exist = false;
for (auto m : impl->cf_based_write_buffer_manager_) {
if (m == write_buffer_manager) {
already_exist = true;
break;
}
}
if (!already_exist) {
impl->cf_based_write_buffer_manager_.push_back(write_buffer_manager);
}
}
}

s = impl->env_->CreateDirIfMissing(impl->immutable_db_options_.GetWalDir());
if (s.ok()) {
std::vector<std::string> paths;
Expand Down Expand Up @@ -1896,20 +1912,34 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
}
if (s.ok()) {
impl->StartPeriodicWorkScheduler();

// Newly created handles are already registered during
// `CreateColumnFamily`. We must clear them all to avoid duplicate
// registration.
if (impl->write_buffer_manager_) {
// Newly created handles are already registered during
// `CreateColumnFamily`. We must clear them all to avoid duplicate
// registration.
impl->write_buffer_manager_->UnregisterDB(impl);
for (auto* cf : *handles) {
}
for (auto m : impl->cf_based_write_buffer_manager_) {
m->UnregisterDB(impl);
}

for (size_t i = 0; i < (*handles).size(); ++i) {
auto cf_opt = column_families[i].options;

auto* cf = (*handles)[i];
std::string cf_name = cf->GetName();
auto* write_buffer_manager = cf_opt.cf_write_buffer_manager != nullptr
? cf_opt.cf_write_buffer_manager.get()
: impl->write_buffer_manager_;
if (write_buffer_manager) {
if (cf->GetName() == kDefaultColumnFamilyName) {
impl->write_buffer_manager_->RegisterColumnFamily(
impl, impl->default_cf_handle_);
write_buffer_manager->RegisterColumnFamily(impl,
impl->default_cf_handle_);
} else if (cf->GetName() == kPersistentStatsColumnFamilyName) {
impl->write_buffer_manager_->RegisterColumnFamily(
write_buffer_manager->RegisterColumnFamily(
impl, impl->persist_stats_cf_handle_);
} else {
impl->write_buffer_manager_->RegisterColumnFamily(impl, cf);
write_buffer_manager->RegisterColumnFamily(impl, cf);
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions db/db_impl/db_impl_write.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1304,6 +1304,11 @@ Status DBImpl::PreprocessWrite(const WriteOptions& write_options,
if (UNLIKELY(status.ok() && write_buffer_manager_->ShouldFlush())) {
write_buffer_manager_->MaybeFlush(this);
}
for (auto write_buffer_manager : cf_based_write_buffer_manager_) {
if (UNLIKELY(status.ok() && write_buffer_manager->ShouldFlush())) {
write_buffer_manager->MaybeFlush(this);
}
}

if (UNLIKELY(status.ok() && !trim_history_scheduler_.Empty())) {
InstrumentedMutexLock l(&mutex_);
Expand Down Expand Up @@ -2278,6 +2283,9 @@ size_t DBImpl::GetWalPreallocateBlockSize(uint64_t write_buffer_size) const {
if (immutable_db_options_.write_buffer_manager) {
size_t buffer_size =
immutable_db_options_.write_buffer_manager->flush_size();
for (auto manager : cf_based_write_buffer_manager_) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to update buffer_size for WAL here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the flush size of write buffer manager is small compared with write buffer size, we will switch memtable before write bytes up to write buffer size. Use min here can avoid wasty pre allocation.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but here you're using += instead of min<> of cf_based_write_buffer_manager_

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is because all CF will write in the same WAL file. Imaging we have 1M write-buffer-limit for CF1 but 1GB write-buffer-limit for CF2.

buffer_size += manager->flush_size();
}
if (buffer_size > 0) {
bsize = std::min<size_t>(bsize, buffer_size);
}
Expand Down
2 changes: 1 addition & 1 deletion db/db_merge_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class DBMergeTest : public testing::Test {
std::to_string(cf_id), ColumnFamilyOptions(options_)));
}
}
return std::move(column_families);
return column_families;
}

std::string GenDBPath(uint32_t db_id) {
Expand Down
16 changes: 16 additions & 0 deletions db/db_test_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,22 @@ void DBTestBase::ReopenWithColumnFamilies(const std::vector<std::string>& cfs,
ASSERT_OK(TryReopenWithColumnFamilies(cfs, options));
}

void DBTestBase::OpenWithCFWriteBufferManager(
const std::vector<std::string>& cfs,
const std::vector<std::shared_ptr<WriteBufferManager>> wbms,
const Options& options) {
CreateColumnFamilies(cfs, options);
std::vector<std::string> cfs_plus_default = cfs;
cfs_plus_default.insert(cfs_plus_default.begin(), kDefaultColumnFamilyName);
std::vector<Options> cf_options;
for (size_t i = 0; i < wbms.size(); ++i) {
auto o = options;
o.cf_write_buffer_manager = wbms[i];
cf_options.push_back(o);
}
ReopenWithColumnFamilies(cfs_plus_default, cf_options);
}

void DBTestBase::SetTimeElapseOnlySleepOnReopen(DBOptions* options) {
time_elapse_only_sleep_on_reopen_ = true;

Expand Down
5 changes: 5 additions & 0 deletions db/db_test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,11 @@ class DBTestBase : public testing::Test {
Status TryReopenWithColumnFamilies(const std::vector<std::string>& cfs,
const Options& options);

void OpenWithCFWriteBufferManager(
const std::vector<std::string>& cfs,
const std::vector<std::shared_ptr<WriteBufferManager>> wbms,
const Options& options);

void Reopen(const Options& options);

void Close();
Expand Down
Loading
Loading