Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kangaroo module #50

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions cachelib/allocator/nvmcache/NavyConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,52 @@ BigHashConfig& BigHashConfig::setSizePctAndMaxItemSize(
return *this;
}

KangarooConfig& KangarooConfig::setSizePctAndMaxItemSize(
unsigned int sizePct, uint64_t smallItemMaxSize) {
if (sizePct > 100) {
throw std::invalid_argument(folly::sformat(
"to enable Kangaroo, Kangaroo size pct should be in the range of [0, 100]"
", but {} is set",
sizePct));
}
if (sizePct == 0) {
XLOG(INFO) << "Kangaroo is not configured";
}
sizePct_ = sizePct;
smallItemMaxSize_ = smallItemMaxSize;
return *this;
}

KangarooConfig& KangarooConfig::setLog(unsigned int sizePct,
uint64_t physicalPartitions,
uint64_t indexPerPhysicalPartitions,
uint32_t threshold) {
if (sizePct > 100) {
throw std::invalid_argument(folly::sformat(
"to enable KangarooLog, KangarooLog size pct should be in the range of [0, 100]"
", but {} is set",
sizePct));
}
if (sizePct == 0) {
XLOG(INFO) << "KangarooLog is not configured";
}
logSizePct_ = sizePct;
if (indexPerPhysicalPartitions == 0) {
throw std::invalid_argument(folly::sformat(
"to enable KangarooLog, need >=1 index partitions per physical partition, {} is set",
indexPerPhysicalPartitions));
}
if (physicalPartitions == 0) {
throw std::invalid_argument(folly::sformat(
"to enable KangarooLog, need >=1 physical partitions, {} is set",
physicalPartitions));
}
physicalPartitions_ = physicalPartitions;
indexPerPhysicalPartitions_ = indexPerPhysicalPartitions;
threshold_ = threshold;
return *this;
}

void NavyConfig::setBigHash(unsigned int bigHashSizePct,
uint32_t bigHashBucketSize,
uint64_t bigHashBucketBfSize,
Expand All @@ -238,6 +284,22 @@ void NavyConfig::setBigHash(unsigned int bigHashSizePct,
.setBucketSize(bigHashBucketSize)
.setBucketBfSize(bigHashBucketBfSize);
}

void NavyConfig::setKangaroo(unsigned int kangarooSizePct,
uint32_t kangarooBucketSize,
uint64_t kangarooBucketBfSize,
uint64_t kangarooSmallItemMaxSize,
uint64_t kangarooLogSizePct,
uint64_t kangarooLogThreshold,
uint64_t kangarooLogPhysicalPartitions,
uint32_t kangarooLogIndexPerPhysicalPartitions) {
kangarooConfig_
.setSizePctAndMaxItemSize(kangarooSizePct, kangarooSmallItemMaxSize)
.setBucketSize(kangarooBucketSize)
.setBucketBfSize(kangarooBucketBfSize)
.setLog(kangarooLogSizePct, kangarooLogThreshold,
kangarooLogPhysicalPartitions, kangarooLogIndexPerPhysicalPartitions);
}
// job scheduler settings
void NavyConfig::setNavyReqOrderingShards(uint64_t navyReqOrderingShards) {
if (navyReqOrderingShards == 0) {
Expand Down
113 changes: 111 additions & 2 deletions cachelib/allocator/nvmcache/NavyConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,90 @@ class BigHashConfig {
uint64_t smallItemMaxSize_{};
};

/**
* KangarooConfig provides APIs for users to configure Kangaroo engine, which is
* one part of NavyConfig.
*
* By this class, users can:
* - enable Kangaroo by setting sizePct > 0
* - set maximum item size
* - set bucket size
* - set bloom filter size (0 to disable bloom filter)
* - set log percent and number of partitions
* - get the values of all the above parameters
*/
class KangarooConfig {
public:
// Set Kangaroo device percentage and maximum item size(in bytes) to enable
// Kangaroo engine. Default value of sizePct and smallItemMaxSize is 0,
// meaning Kangaroo is not enabled.
// @throw std::invalid_argument if sizePct is not in the range of
// [0, 100].
KangarooConfig& setSizePctAndMaxItemSize(unsigned int sizePct,
uint64_t smallItemMaxSize);

// Set the bucket size in bytes for Kangaroo engine.
// Default value is 4096.
KangarooConfig& setBucketSize(uint32_t bucketSize) noexcept {
bucketSize_ = bucketSize;
return *this;
}

// Set bloom filter size per bucket in bytes for Kangaroo engine.
// 0 means bloom filter will not be applied. Default value is 8.
KangarooConfig& setBucketBfSize(uint64_t bucketBfSize) noexcept {
bucketBfSize_ = bucketBfSize;
return *this;
}

// Set bloom filter size per bucket in bytes for Kangaroo engine.
// 0 means bloom filter will not be applied. Default value is 8.
KangarooConfig& setLog(unsigned int sizePct,
uint64_t physicalPartitions,
uint64_t indexPerPhysicalParitions,
uint32_t threshold);

bool isBloomFilterEnabled() const { return bucketBfSize_ > 0; }

unsigned int getSizePct() const { return sizePct_; }

uint32_t getBucketSize() const { return bucketSize_; }

uint64_t getBucketBfSize() const { return bucketBfSize_; }

uint64_t getSmallItemMaxSize() const { return smallItemMaxSize_; }

unsigned int getLogSizePct() const { return logSizePct_; }

uint64_t getPhysicalPartitions() const { return physicalPartitions_; }

uint64_t getIndexPerPhysicalPartitions() const { return indexPerPhysicalPartitions_; }

uint32_t getLogThreshold() const { return threshold_; }

private:
// Percentage of how much of the device out of all is given to Kangaroo
// engine in Navy, e.g. 50.
unsigned int sizePct_{0};
// Navy Kangaroo engine's bucket size (must be multiple of the minimum
// device io block size).
// This size determines how big each bucket is and what is the physical
// write granularity onto the device.
uint32_t bucketSize_{4096};
// The bloom filter size per bucket in bytes for Navy Kangaroo engine
uint64_t bucketBfSize_{8};
// The maximum item size to put into Navy Kangaroo engine.
uint64_t smallItemMaxSize_{};
// Percent of Kangaroo to dedicate to KangarooLog
unsigned int logSizePct_{0};
// Number of physical partitions of KangarooLog
uint64_t physicalPartitions_{1};
// Number of index partitions of KangarooLog
uint64_t indexPerPhysicalPartitions_{1};
// Threshold for moving items from KangarooLog to sets
uint32_t threshold_{1};
};

/**
* NavyConfig provides APIs for users to set up Navy related settings for
* NvmCache.
Expand All @@ -341,6 +425,7 @@ class NavyConfig {
bool usesSimpleFile() const noexcept { return !fileName_.empty(); }
bool usesRaidFiles() const noexcept { return raidPaths_.size() > 0; }
bool isBigHashEnabled() const { return bigHashConfig_.getSizePct() > 0; }
bool isKangarooEnabled() const { return kangarooConfig_.getSizePct() > 0; }
std::map<std::string, std::string> serialize() const;

// Getters:
Expand Down Expand Up @@ -371,15 +456,21 @@ class NavyConfig {
// Returns the threshold of classifying an item as small item or large item
// for Navy engine.
uint64_t getSmallItemThreshold() const {
if (!isBigHashEnabled()) {
if (isBigHashEnabled()) {
return bigHashConfig_.getSmallItemMaxSize();
} else if (isKangarooEnabled()) {
return kangarooConfig_.getSmallItemMaxSize();
} else {
return 0;
}
return bigHashConfig_.getSmallItemMaxSize();
}

// Return a const BlockCacheConfig to read values of its parameters.
const BigHashConfig& bigHash() const { return bigHashConfig_; }

// Return a const KangarooConfig to read values of its parameters.
const KangarooConfig& kangaroo() const { return kangarooConfig_; }

// Return a const BlockCacheConfig to read values of its parameters.
const BlockCacheConfig& blockCache() const { return blockCacheConfig_; }

Expand Down Expand Up @@ -506,6 +597,21 @@ class NavyConfig {
uint64_t bigHashSmallItemMaxSize);
// Return BigHashConfig for configuration.
BigHashConfig& bigHash() noexcept { return bigHashConfig_; }

// ============ Kangaroo settings =============
// (Deprecated) Set the parameters for Kangaroo.
// @throw std::invalid_argument if kangarooSizePct is not in the range of
// 0~100.
void setKangaroo(unsigned int kangarooSizePct,
uint32_t kangarooBucketSize,
uint64_t kangarooBucketBfSize,
uint64_t kangarooSmallItemMaxSize,
uint64_t kangarooLogSizePct,
uint64_t kangarooLogThreshold,
uint64_t kangarooLogPhysicalPartitions,
uint32_t kangarooLogIndexPerPhysicalPartitions);
// Return KangarooConfig for configuration.
KangarooConfig& kangaroo() noexcept { return kangarooConfig_; }

// ============ Job scheduler settings =============
void setReaderAndWriterThreads(unsigned int readerThreads,
Expand Down Expand Up @@ -556,6 +662,9 @@ class NavyConfig {

// ============ BigHash settings =============
BigHashConfig bigHashConfig_{};

// ============ Kangaroo settings =============
KangarooConfig kangarooConfig_{};

// ============ Job scheduler settings =============
// Number of asynchronous worker thread for read operation.
Expand Down
70 changes: 70 additions & 0 deletions cachelib/allocator/nvmcache/NavySetup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,66 @@ uint64_t setupBigHash(const navy::BigHashConfig& bigHashConfig,
return bigHashCacheOffset;
}

uint64_t setupKangaroo(const navy::KangarooConfig& kangarooConfig,
uint32_t ioAlignSize,
uint64_t totalCacheSize,
uint64_t metadataSize,
cachelib::navy::CacheProto& proto) {
auto bucketSize = kangarooConfig.getBucketSize();
if (bucketSize != alignUp(bucketSize, ioAlignSize)) {
throw std::invalid_argument(
folly::sformat("Bucket size: {} is not aligned to ioAlignSize: {}",
bucketSize, ioAlignSize));
}

// If enabled, Kangaroo storage starts after BlockCache's.
const auto sizeReservedForKangaroo =
totalCacheSize * kangarooConfig.getSizePct() / 100ul;

const uint64_t kangarooCacheOffset =
alignUp(totalCacheSize - sizeReservedForKangaroo, bucketSize);
const uint64_t kangarooCacheSize =
alignDown(totalCacheSize - kangarooCacheOffset, bucketSize);

auto kangaroo = cachelib::navy::createKangarooProto();
kangaroo->setLayout(kangarooCacheOffset, kangarooCacheSize, bucketSize);

// Bucket Bloom filter size, bytes
//
// Experiments showed that if we have 16 bytes for BF with 25 entries,
// then optimal number of hash functions is 4 and false positive rate
// below 10%.
if (kangarooConfig.isBloomFilterEnabled()) {
// We set 4 hash function unconditionally. This seems to be the best
// for our use case. If BF size to bucket size ratio gets lower, try
// to reduce number of hashes.
constexpr uint32_t kNumHashes = 4;
const uint32_t bitsPerHash =
kangarooConfig.getBucketBfSize() * 8 / kNumHashes;
kangaroo->setBloomFilter(kNumHashes, bitsPerHash);
}

if (kangarooConfig.getLogSizePct()) {
const uint64_t logSize = alignDown(
kangarooCacheSize * kangarooConfig.getLogSizePct() / 100ul,
bucketSize * 64);
const uint32_t threshold = kangarooConfig.getLogThreshold();
const uint64_t indexPerPhysical = kangarooConfig.getIndexPerPhysicalPartitions();
const uint64_t physical = kangarooConfig.getPhysicalPartitions();
kangaroo->setLog(logSize, threshold, physical, indexPerPhysical);
}

proto.setKangaroo(std::move(kangaroo), kangarooConfig.getSmallItemMaxSize());

if (kangarooCacheOffset <= metadataSize) {
throw std::invalid_argument("NVM cache size is not big enough!");
}
XLOG(INFO) << "metadataSize: " << metadataSize
<< " kangarooCacheOffset: " << kangarooCacheOffset
<< " kangarooCacheSize: " << kangarooCacheSize;
return kangarooCacheOffset;
}

void setupBlockCache(const navy::BlockCacheConfig& blockCacheConfig,
uint64_t blockCacheSize,
uint32_t ioAlignSize,
Expand Down Expand Up @@ -204,6 +264,16 @@ void setupCacheProtos(const navy::NavyConfig& config,
XLOG(INFO) << "metadataSize: " << metadataSize << ". No bighash.";
blockCacheSize = totalCacheSize - metadataSize;
}

// Set up Kangaroo if enabled
if (config.isKangarooEnabled()) {
auto kangarooCacheOffset = setupKangaroo(config.kangaroo(), ioAlignSize,
totalCacheSize, metadataSize, proto);
blockCacheSize = kangarooCacheOffset - metadataSize;
} else {
XLOG(INFO) << "metadataSize: " << metadataSize << ". No kangaroo.";
blockCacheSize = totalCacheSize - metadataSize;
}

// Set up BlockCache if enabled
if (blockCacheSize > 0) {
Expand Down
12 changes: 12 additions & 0 deletions cachelib/cachebench/cache/Cache-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,17 @@ Cache<Allocator>::Cache(const CacheConfig& config,
config_.navyBloomFilterPerBucketSize,
config_.navySmallItemMaxSize);
}

if (config_.navyKangarooSizePct > 0) {
nvmConfig.navyConfig.setKangaroo(config_.navyKangarooSizePct,
config_.navyKangarooBucketSize,
config_.navyBloomFilterPerBucketSize,
config_.navySmallItemMaxSize,
config_.navyKangarooLogSizePct,
config_.navyKangarooLogThreshold,
config_.navyKangarooLogPhysicalPartitions,
config_.navyKangarooLogIndexPerPhysicalPartitions);
}

nvmConfig.navyConfig.setMaxParcelMemoryMB(config_.navyParcelMemoryMB);

Expand Down Expand Up @@ -524,6 +535,7 @@ Stats Cache<Allocator>::getStats() const {
ret.numNvmItems = lookup("navy_bh_items") + lookup("navy_bc_items");
ret.numNvmBytesWritten = lookup("navy_device_bytes_written");
uint64_t now = fetchNandWrites();

if (now > nandBytesBegin_) {
ret.numNvmNandBytesWritten = now - nandBytesBegin_;
}
Expand Down
1 change: 1 addition & 0 deletions cachelib/cachebench/cache/Cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ class Cache {

// reading of the nand bytes written for the benchmark if enabled.
const uint64_t nandBytesBegin_{0};
uint64_t writtenBytes_{0};

// latency stats of cachelib APIs inside cachebench
mutable util::PercentileStats cacheFindLatency_;
Expand Down
10 changes: 9 additions & 1 deletion cachelib/cachebench/util/CacheConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {
JSONSetVal(configJson, navyWriterThreads);
JSONSetVal(configJson, navyCleanRegions);
JSONSetVal(configJson, navyAdmissionWriteRateMB);
JSONSetVal(configJson, navyAdmissionProb);
JSONSetVal(configJson, navyMaxConcurrentInserts);
JSONSetVal(configJson, navyDataChecksum);
JSONSetVal(configJson, navyNumInmemBuffers);
Expand All @@ -85,6 +86,13 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {

JSONSetVal(configJson, mlNvmAdmissionPolicy);
JSONSetVal(configJson, mlNvmAdmissionPolicyLocation);

JSONSetVal(configJson, navyKangarooSizePct);
JSONSetVal(configJson, navyKangarooBucketSize);
JSONSetVal(configJson, navyKangarooLogSizePct);
JSONSetVal(configJson, navyKangarooLogThreshold);
JSONSetVal(configJson, navyKangarooLogPhysicalPartitions);
JSONSetVal(configJson, navyKangarooLogIndexPerPhysicalPartitions);

JSONSetVal(configJson, useTraceTimeStamp);
JSONSetVal(configJson, printNvmCounters);
Expand All @@ -95,7 +103,7 @@ CacheConfig::CacheConfig(const folly::dynamic& configJson) {
// if you added new fields to the configuration, update the JSONSetVal
// to make them available for the json configs and increment the size
// below
checkCorrectSize<CacheConfig, 680>();
checkCorrectSize<CacheConfig, 736>();

if (numPools != poolSizes.size()) {
throw std::invalid_argument(folly::sformat(
Expand Down
Loading