From 2158144f5d2e2476fe8a45cfb1dc3649ff05d1cd Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 17 Sep 2023 16:31:17 -0500 Subject: [PATCH 01/54] Partially fix file size inconsistency --- CMakeLists.txt | 2 +- .../hermes_blob_mdm/hermes_blob_mdm_tasks.h | 3 +- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 55 +++++++++++-------- .../hermes_bucket_mdm_tasks.h | 2 +- .../src/hermes_bucket_mdm.cc | 12 ++-- tasks/posix_bdev/src/posix_bdev.cc | 10 ++-- 6 files changed, 48 insertions(+), 36 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b42e4d49..481df1597 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.10) -project(labstor) +project(hermes) #----------------------------------------------------------------------------- # Define Options diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index e8813efcb..f5d8e1b64 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -213,7 +213,8 @@ class PutBlobPhase { #define HERMES_BLOB_REPLACE BIT_OPT(u32, 0) #define HERMES_BLOB_APPEND BIT_OPT(u32, 1) #define HERMES_DID_STAGE_IN BIT_OPT(u32, 2) -#define HERMES_BLOB_DID_CREATE BIT_OPT(u32, 3) +#define HERMES_IS_FILE BIT_OPT(u32, 3) +#define HERMES_BLOB_DID_CREATE BIT_OPT(u32, 4) /** A task to put data in a blob */ diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 049445810..78928210d 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -186,30 +186,39 @@ class Server : public TaskLib { // Stage in blob data from FS task->data_ptr_.ptr_ = LABSTOR_CLIENT->GetPrivatePointer(task->data_); task->data_ptr_.shm_ = task->data_; - if (task->filename_->size() > 0 && blob_info.blob_size_ == 0) { + if (task->filename_->size() > 0) { adapter::BlobPlacement plcmnt; plcmnt.DecodeBlobName(*task->blob_name_); - HILOG(kDebug, "Attempting to stage {} bytes from the backend file {} at offset {}", - task->page_size_, task->filename_->str(), plcmnt.bucket_off_); - LPointer new_data_ptr = LABSTOR_CLIENT->AllocateBuffer(task->page_size_); - int fd = HERMES_POSIX_API->open(task->filename_->c_str(), O_RDONLY); - if (fd < 0) { - HELOG(kError, "Failed to open file {}", task->filename_->str()); - } - int ret = HERMES_POSIX_API->pread(fd, new_data_ptr.ptr_, task->page_size_, (off_t)plcmnt.bucket_off_); - if (ret < 0) { - // TODO(llogan): ret != page_size_ will require knowing file size before-hand - HELOG(kError, "Failed to stage in {} bytes from {}", task->page_size_, task->filename_->str()); - } - HERMES_POSIX_API->close(fd); - memcpy(new_data_ptr.ptr_ + plcmnt.blob_off_, task->data_ptr_.ptr_, task->data_size_); - task->data_ptr_ = new_data_ptr; - task->blob_off_ = 0; - task->data_size_ = ret; + task->flags_.SetBits(HERMES_IS_FILE); task->data_off_ = plcmnt.bucket_off_ + task->blob_off_ + task->data_size_; - task->flags_.SetBits(HERMES_DID_STAGE_IN); - HILOG(kDebug, "Staged {} bytes from the backend file {}", - task->data_size_, task->filename_->str()); + if (blob_info.blob_size_ == 0 && + task->blob_off_ == 0 && + task->data_size_ < task->page_size_) { + HILOG(kDebug, "Attempting to stage {} bytes from the backend file {} at offset {}", + task->page_size_, task->filename_->str(), plcmnt.bucket_off_); + LPointer new_data_ptr = LABSTOR_CLIENT->AllocateBuffer(task->page_size_); + int fd = HERMES_POSIX_API->open(task->filename_->c_str(), O_RDONLY); + if (fd < 0) { + HELOG(kError, "Failed to open file {}", task->filename_->str()); + } + int ret = HERMES_POSIX_API->pread(fd, new_data_ptr.ptr_, task->page_size_, (off_t)plcmnt.bucket_off_); + if (ret < 0) { + // TODO(llogan): ret != page_size_ will require knowing file size before-hand + HELOG(kError, "Failed to stage in {} bytes from {}", task->page_size_, task->filename_->str()); + } + HERMES_POSIX_API->close(fd); + memcpy(new_data_ptr.ptr_ + plcmnt.blob_off_, task->data_ptr_.ptr_, task->data_size_); + task->data_ptr_ = new_data_ptr; + task->blob_off_ = 0; + if (ret < task->blob_off_ + task->data_size_) { + task->data_size_ = task->blob_off_ + task->data_size_; + } else { + task->data_size_ = ret; + } + task->flags_.SetBits(HERMES_DID_STAGE_IN); + HILOG(kDebug, "Staged {} bytes from the backend file {}", + task->data_size_, task->filename_->str()); + } } // Determine amount of additional buffering space needed @@ -218,7 +227,7 @@ class Server : public TaskLib { if (needed_space > blob_info.max_blob_size_) { size_diff = needed_space - blob_info.max_blob_size_; } - if (!task->flags_.Any(HERMES_DID_STAGE_IN)) { + if (!task->flags_.Any(HERMES_IS_FILE)) { task->data_off_ = size_diff; } blob_info.blob_size_ += size_diff; @@ -345,7 +354,7 @@ class Server : public TaskLib { } // Update the bucket statistics int update_mode = bucket_mdm::UpdateSizeMode::kAdd; - if (task->flags_.Any(HERMES_DID_STAGE_IN)) { + if (task->flags_.Any(HERMES_IS_FILE)) { update_mode = bucket_mdm::UpdateSizeMode::kCap; } bkt_mdm_.AsyncUpdateSize(task->task_node_ + 1, diff --git a/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h b/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h index f40c18ea2..5e81e71ac 100644 --- a/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h +++ b/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h @@ -129,7 +129,7 @@ struct SetBlobMdmTask : public Task, TaskFlags { class UpdateSizeMode { public: TASK_METHOD_T kAdd = 0; - TASK_METHOD_T kCap = 0; + TASK_METHOD_T kCap = 1; }; /** Update bucket size */ diff --git a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc index e03639c1a..8c9bcecf7 100644 --- a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc +++ b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc @@ -55,6 +55,8 @@ class Server : public TaskLib { } else { internal_size = std::max(task->update_, internal_size); } + HILOG(kInfo, "Updating size of tag {} from {} to {} with update {} (mode={})", + task->tag_id_, tag_info.internal_size_, internal_size, task->update_, task->mode_) tag_info.internal_size_ = (size_t) internal_size; task->SetModuleComplete(); } @@ -200,11 +202,11 @@ class Server : public TaskLib { HILOG(kDebug, "Creating tag for the first time: {} {}", tag_name.str(), tag_id) tag_id_map_.emplace(tag_name, tag_id); tag_map_.emplace(tag_id, TagInfo()); - TagInfo &info = tag_map_[tag_id]; - info.name_ = tag_name; - info.tag_id_ = tag_id; - info.owner_ = task->blob_owner_; - info.internal_size_ = task->backend_size_; + TagInfo &tag_info = tag_map_[tag_id]; + tag_info.name_ = tag_name; + tag_info.tag_id_ = tag_id; + tag_info.owner_ = task->blob_owner_; + tag_info.internal_size_ = task->backend_size_; } else { if (tag_name.size()) { HILOG(kDebug, "Found existing tag: {}", tag_name.str()) diff --git a/tasks/posix_bdev/src/posix_bdev.cc b/tasks/posix_bdev/src/posix_bdev.cc index f33cc8609..c6f87a83a 100644 --- a/tasks/posix_bdev/src/posix_bdev.cc +++ b/tasks/posix_bdev/src/posix_bdev.cc @@ -31,7 +31,7 @@ class Server : public TaskLib { dev_info.mount_point_ = canon; path_ = canon; fd_ = open(dev_info.mount_point_.c_str(), - O_TRUNC | O_CREAT, 0666); + O_TRUNC | O_CREAT | O_RDWR, 0666); if (fd_ < 0) { HELOG(kError, "Failed to open file: {}", dev_info.mount_point_); } @@ -57,17 +57,17 @@ class Server : public TaskLib { } void Write(WriteTask *task) { - size_t count = pwrite(fd_, task->buf_, task->size_, (off_t)task->disk_off_); + ssize_t count = pwrite(fd_, task->buf_, task->size_, (off_t)task->disk_off_); if (count != task->size_) { - HELOG(kError, "BORG: wrote {} bytes, but expected {}", - count, task->size_); + HELOG(kError, "BORG: wrote {} bytes, but expected {}: {}", + count, task->size_, strerror(errno)); } task->SetModuleComplete(); } void Read(ReadTask *task) { memcpy(task->buf_, mem_ptr_ + task->disk_off_, task->size_); - size_t count = pread(fd_, task->buf_, task->size_, (off_t)task->disk_off_); + ssize_t count = pread(fd_, task->buf_, task->size_, (off_t)task->disk_off_); if (count != task->size_) { HELOG(kError, "BORG: read {} bytes, but expected {}", count, task->size_); From fce5da27f22122cbe8f0fbca3cdf22844f95ba75 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Wed, 20 Sep 2023 07:21:35 -0500 Subject: [PATCH 02/54] Use catch2 instaed of specific version --- ci/hermes/packages/__init__.py | 0 ci/hermes/packages/hermes/package.py | 5 +++-- ci/hermes/packages/hermes_shm/package.py | 3 ++- tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) delete mode 100644 ci/hermes/packages/__init__.py diff --git a/ci/hermes/packages/__init__.py b/ci/hermes/packages/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/ci/hermes/packages/hermes/package.py b/ci/hermes/packages/hermes/package.py index 8d0cda713..8e110b39b 100644 --- a/ci/hermes/packages/hermes/package.py +++ b/ci/hermes/packages/hermes/package.py @@ -19,8 +19,9 @@ class Hermes(CMakePackage): depends_on('mochi-thallium~cereal@0.10.1') depends_on('cereal') - depends_on('catch2@3.0.1') - depends_on('mpich@3.3.2:') + # depends_on('catch2@3.0.1') + depends_on('catch2') + depends_on('mpich@3.3.2') depends_on('yaml-cpp') depends_on('boost@1.7:') depends_on('hermes_shm') diff --git a/ci/hermes/packages/hermes_shm/package.py b/ci/hermes/packages/hermes_shm/package.py index 58cfbea28..9225ceb29 100644 --- a/ci/hermes/packages/hermes_shm/package.py +++ b/ci/hermes/packages/hermes_shm/package.py @@ -5,7 +5,8 @@ class HermesShm(CMakePackage): git = "https://github.com/lukemartinlogan/hermes_shm.git" version('master', branch='master') depends_on('mochi-thallium~cereal@0.10.1') - depends_on('catch2@3.0.1') + # depends_on('catch2@3.0.1') + depends_on('catch2') depends_on('mpi') depends_on('boost@1.7:') depends_on('cereal') diff --git a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc index 8c9bcecf7..656a4fccc 100644 --- a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc +++ b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc @@ -55,7 +55,7 @@ class Server : public TaskLib { } else { internal_size = std::max(task->update_, internal_size); } - HILOG(kInfo, "Updating size of tag {} from {} to {} with update {} (mode={})", + HILOG(kDebug, "Updating size of tag {} from {} to {} with update {} (mode={})", task->tag_id_, tag_info.internal_size_, internal_size, task->update_, task->mode_) tag_info.internal_size_ = (size_t) internal_size; task->SetModuleComplete(); From 92a32751b1be1cd1f080852868b7fd0687435b1c Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 21 Sep 2023 16:23:19 -0500 Subject: [PATCH 03/54] hermes_api_bench --- benchmark/CMakeLists.txt | 8 ++-- benchmark/hermes_api_bench.cc | 71 ++++++++++++++++++++++++++-- ci/hermes/packages/hermes/package.py | 1 - 3 files changed, 70 insertions(+), 10 deletions(-) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 4eb76ca54..28c07cf82 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -29,11 +29,11 @@ target_link_libraries(test_performance_exec # ${Labstor_CLIENT_LIBRARIES} hermes Catch2::Catch2 # MPI::MPI_CXX ${ZMQ_LIBRARIES}) -add_executable(test_hermes_api +add_executable(hermes_api_bench hermes_api_bench.cc) -add_dependencies(test_hermes_api +add_dependencies(hermes_api_bench ${Labstor_CLIENT_DEPS} hermes) -target_link_libraries(test_hermes_api +target_link_libraries(hermes_api_bench ${Labstor_CLIENT_LIBRARIES} hermes Catch2::Catch2 MPI::MPI_CXX) @@ -50,7 +50,7 @@ add_test(NAME test_performance COMMAND #------------------------------------------------------------------------------ install(TARGETS test_performance_exec - test_hermes_api + hermes_api_bench EXPORT ${LABSTOR_EXPORTED_TARGETS} LIBRARY DESTINATION ${LABSTOR_INSTALL_LIB_DIR} diff --git a/benchmark/hermes_api_bench.cc b/benchmark/hermes_api_bench.cc index 928d53053..3ba8d0cb0 100644 --- a/benchmark/hermes_api_bench.cc +++ b/benchmark/hermes_api_bench.cc @@ -69,9 +69,7 @@ void GetTest(int nprocs, int rank, for (size_t i = 0; i < blobs_per_rank; ++i) { size_t blob_name_int = rank * blobs_per_rank + i; std::string name = std::to_string(blob_name_int); - hermes::Blob ret; - hermes::BlobId blob_id = bkt.GetBlobId(name); - bkt.Get(blob_id, ret, ctx); + bkt.Get(name, ret, ctx); } } t.Pause(); @@ -86,6 +84,62 @@ void PutGetTest(int nprocs, int rank, int repeat, GetTest(nprocs, rank, repeat, blobs_per_rank, blob_size); } +/** Each process PUTS into the same bucket, but with different blob names */ +void PartialPutTest(int nprocs, int rank, + int repeat, size_t blobs_per_rank, + size_t blob_size, size_t part_size) { + Timer t; + hermes::Context ctx; + hermes::Bucket bkt("hello", ctx); + hermes::Blob blob(blob_size); + t.Resume(); + for (int j = 0; j < repeat; ++j) { + for (size_t i = 0; i < blobs_per_rank; ++i) { + size_t blob_name_int = rank * blobs_per_rank + i; + std::string name = std::to_string(blob_name_int); + for (size_t cur_size = 0; cur_size < blob_size; cur_size += part_size) { + bkt.PartialPut(name, blob, cur_size, ctx); + } + } + } + t.Pause(); + GatherTimes("PartialPut", nprocs * blobs_per_rank * blob_size * repeat, t); +} + +/** + * Each process GETS from the same bucket, but with different blob names + * MUST run PutTest first. + * */ +void PartialGetTest(int nprocs, int rank, + int repeat, size_t blobs_per_rank, + size_t blob_size, size_t part_size) { + Timer t; + hermes::Context ctx; + hermes::Bucket bkt("hello", ctx); + t.Resume(); + for (int j = 0; j < repeat; ++j) { + for (size_t i = 0; i < blobs_per_rank; ++i) { + size_t blob_name_int = rank * blobs_per_rank + i; + std::string name = std::to_string(blob_name_int); + hermes::Blob ret(blob_size); + for (size_t cur_size = 0; cur_size < blob_size; cur_size += part_size) { + bkt.PartialGet(name, ret, cur_size, ctx); + } + } + } + t.Pause(); + GatherTimes("PartialGet", nprocs * blobs_per_rank * blob_size * repeat, t); +} + +/** Each process PUTs then GETs */ +void PartialPutGetTest(int nprocs, int rank, int repeat, + size_t blobs_per_rank, size_t blob_size, + size_t part_size) { + PartialPutTest(nprocs, rank, repeat, blobs_per_rank, blob_size); + MPI_Barrier(MPI_COMM_WORLD); + PartialGetTest(nprocs, rank, repeat, blobs_per_rank, blob_size); +} + /** Each process creates a set of buckets */ void CreateBucketTest(int nprocs, int rank, size_t bkts_per_rank) { @@ -120,7 +174,7 @@ void GetBucketTest(int nprocs, int rank, hapi::Bucket bkt(std::to_string(bkt_name), ctx); } t.Pause(); - GatherTimes("CreateBucket", bkts_per_rank * nprocs, t); + GatherTimes("GetBucket", bkts_per_rank * nprocs, t); } /** Each process deletes a number of buckets */ @@ -172,6 +226,7 @@ void help() { printf("USAGE: ./api_bench [mode] ...\n"); printf("USAGE: ./api_bench put [blob_size (K/M/G)] [blobs_per_rank]\n"); printf("USAGE: ./api_bench putget [blob_size (K/M/G)] [blobs_per_rank]\n"); + printf("USAGE: ./api_bench pputget [blob_size (K/M/G)] [part_size (K/M/G)] [blobs_per_rank]\n"); printf("USAGE: ./api_bench create_bkt [bkts_per_rank]\n"); printf("USAGE: ./api_bench get_bkt [bkts_per_rank]\n"); printf("USAGE: ./api_bench create_blob_1bkt [blobs_per_rank]\n"); @@ -212,7 +267,13 @@ int main(int argc, char **argv) { size_t blob_size = hshm::ConfigParse::ParseSize(argv[2]); size_t blobs_per_rank = atoi(argv[3]); PutGetTest(nprocs, rank, 1, blobs_per_rank, blob_size); - } else if (mode == "create_bkt") { + } else if (mode == "pputget") { + REQUIRE_ARGC(5) + size_t blob_size = hshm::ConfigParse::ParseSize(argv[2]); + size_t part_size = hshm::ConfigParse::ParseSize(argv[3]); + size_t blobs_per_rank = atoi(argv[4]); + PartialPutGetTest(nprocs, rank, 1, blobs_per_rank, blob_size, part_size); + } else if (mode == "create_bkt") { REQUIRE_ARGC(3) size_t bkts_per_rank = atoi(argv[2]); CreateBucketTest(nprocs, rank, bkts_per_rank); diff --git a/ci/hermes/packages/hermes/package.py b/ci/hermes/packages/hermes/package.py index 8e110b39b..184922e1b 100644 --- a/ci/hermes/packages/hermes/package.py +++ b/ci/hermes/packages/hermes/package.py @@ -14,7 +14,6 @@ class Hermes(CMakePackage): variant('vfd', default=False, description='Enable HDF5 VFD') variant('ares', default=False, description='Enable full libfabric install') variant('debug', default=False, description='Enable debug mode') - variant('debug', default=False, description='Build shared libraries') variant('zmq', default=False, description='Build ZeroMQ tests') depends_on('mochi-thallium~cereal@0.10.1') From 447aad09e0293d3e20baa19092d3a43138c8a8ee Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 21 Sep 2023 16:24:28 -0500 Subject: [PATCH 04/54] Fix minor compile error --- benchmark/hermes_api_bench.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmark/hermes_api_bench.cc b/benchmark/hermes_api_bench.cc index 3ba8d0cb0..ef57ff695 100644 --- a/benchmark/hermes_api_bench.cc +++ b/benchmark/hermes_api_bench.cc @@ -67,6 +67,7 @@ void GetTest(int nprocs, int rank, t.Resume(); for (int j = 0; j < repeat; ++j) { for (size_t i = 0; i < blobs_per_rank; ++i) { + hermes::Blob ret; size_t blob_name_int = rank * blobs_per_rank + i; std::string name = std::to_string(blob_name_int); bkt.Get(name, ret, ctx); @@ -135,9 +136,9 @@ void PartialGetTest(int nprocs, int rank, void PartialPutGetTest(int nprocs, int rank, int repeat, size_t blobs_per_rank, size_t blob_size, size_t part_size) { - PartialPutTest(nprocs, rank, repeat, blobs_per_rank, blob_size); + PartialPutTest(nprocs, rank, repeat, blobs_per_rank, blob_size, part_size); MPI_Barrier(MPI_COMM_WORLD); - PartialGetTest(nprocs, rank, repeat, blobs_per_rank, blob_size); + PartialGetTest(nprocs, rank, repeat, blobs_per_rank, blob_size, part_size); } /** Each process creates a set of buckets */ From d1cbfe5c2cd93b465ed0e7db1fb1a4a5fdb9b5c8 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 21 Sep 2023 21:09:54 -0500 Subject: [PATCH 05/54] Add debug statement to remote_queue --- tasks_required/remote_queue/src/remote_queue.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tasks_required/remote_queue/src/remote_queue.cc b/tasks_required/remote_queue/src/remote_queue.cc index 6ae81d883..039ef67f1 100644 --- a/tasks_required/remote_queue/src/remote_queue.cc +++ b/tasks_required/remote_queue/src/remote_queue.cc @@ -57,6 +57,8 @@ class Server : public TaskLib { /** Construct remote queue */ void Construct(ConstructTask *task) { + HILOG(kInfo, "(node {}) Constructing remote queue (task_node={}, task_state={}, method={})", + LABSTOR_CLIENT->node_id_, task->task_node_, task->task_state_, task->method_); LABSTOR_THALLIUM->RegisterRpc("RpcPushSmall", [this](const tl::request &req, TaskStateId state_id, u32 method, From 1b0d1eb9463b765b827a5dff7ff8adddf039d401 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 21 Sep 2023 21:16:13 -0500 Subject: [PATCH 06/54] Change worker catch statement --- src/worker.cc | 3 +-- .../remote_queue/src/remote_queue.cc | 26 ------------------- 2 files changed, 1 insertion(+), 28 deletions(-) diff --git a/src/worker.cc b/src/worker.cc index ee514fdae..15e8ebbff 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -15,8 +15,7 @@ void Worker::Loop() { try { Run(); } catch (hshm::Error &e) { - e.print(); - exit(1); + HELOG(kFatal, "(node {}) Worker {} caught an error: {}", LABSTOR_CLIENT->node_id_, id_, e.what()); } // Yield(); } diff --git a/tasks_required/remote_queue/src/remote_queue.cc b/tasks_required/remote_queue/src/remote_queue.cc index 039ef67f1..fd4916fad 100644 --- a/tasks_required/remote_queue/src/remote_queue.cc +++ b/tasks_required/remote_queue/src/remote_queue.cc @@ -142,14 +142,6 @@ class Server : public TaskLib { tl_task->done_ = true; }, tl_task); task->tl_future_.emplace_back(tl_task); - - -// tl::async_response future = LABSTOR_THALLIUM->AsyncCall(domain_id.id_, -// "RpcPushSmall", -// task->exec_->id_, -// task->exec_method_, -// params); -// task->tl_future_.emplace_back(std::move(future)); } } @@ -193,30 +185,12 @@ class Server : public TaskLib { tl_task->done_ = true; }, tl_task); task->tl_future_.emplace_back(tl_task); - -// tl::async_response future = LABSTOR_THALLIUM->AsyncIoCall(domain_id.id_, -// "RpcPushBulk", -// io_type, -// (char *) xfer[0].data_, -// xfer[0].data_size_, -// task->exec_->id_, -// task->exec_method_, -// params, -// xfer[0].data_size_, -// io_type); -// task->tl_future_.emplace_back(std::move(future)); } } /** Wait for client to finish message */ void ClientWaitForMessage(PushTask *task) { for (; task->replica_ < task->tl_future_.size(); ++task->replica_) { -// tl::async_response &future = task->tl_future_[task->replica_]; -// if (!LABSTOR_THALLIUM->IsDone(future)) { -// return; -// } -// std::string ret = LABSTOR_THALLIUM->Wait(future); -// HandlePushReplicaOutput(task->replica_, ret, task); ThalliumTask *tl_task = (ThalliumTask *) task->tl_future_[task->replica_]; if (!tl_task->IsDone()) { return; From 25c2234dcffba2d8617d8c24e7a090563ec12f18 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 21 Sep 2023 21:28:36 -0500 Subject: [PATCH 07/54] Add node_id to thallium --- include/labstor/network/rpc_thallium.h | 4 +-- .../remote_queue/src/remote_queue.cc | 28 +++++++------------ 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/include/labstor/network/rpc_thallium.h b/include/labstor/network/rpc_thallium.h index 3a722e3b3..c48d24b28 100644 --- a/include/labstor/network/rpc_thallium.h +++ b/include/labstor/network/rpc_thallium.h @@ -139,8 +139,8 @@ class ThalliumRpc { return remote_proc.on(server).async(std::forward(args)...); } } catch (tl::margo_exception &err) { - HELOG(kFatal, "Thallium failed on function: {}\n{}", - func_name, err.what()) + HELOG(kFatal, "(node {}) Thallium failed on function: {}: {}", + rpc_->node_id_, func_name, err.what()) exit(1); } } diff --git a/tasks_required/remote_queue/src/remote_queue.cc b/tasks_required/remote_queue/src/remote_queue.cc index fd4916fad..8118cb87f 100644 --- a/tasks_required/remote_queue/src/remote_queue.cc +++ b/tasks_required/remote_queue/src/remote_queue.cc @@ -84,24 +84,16 @@ class Server : public TaskLib { /** Handle output from replica PUSH */ static void HandlePushReplicaOutput(int replica, std::string &ret, PushTask *task) { - try { - std::vector xfer(1); - xfer[0].data_ = ret.data(); - xfer[0].data_size_ = ret.size(); - HILOG(kDebug, "Wait got {} bytes of data (task_node={}, task_state={}, method={})", - xfer[0].data_size_, - task->orig_task_->task_node_, - task->orig_task_->task_state_, - task->orig_task_->method_); - BinaryInputArchive ar(xfer); - task->exec_->LoadEnd(replica, task->exec_method_, ar, task->orig_task_); - } catch (std::exception &e) { - HILOG(kFatal, "Error LoadEnd (task_node={}, task_state={}, method={}): {}", - task->orig_task_->task_node_, - task->orig_task_->task_state_, - task->orig_task_->method_, - e.what()); - } + std::vector xfer(1); + xfer[0].data_ = ret.data(); + xfer[0].data_size_ = ret.size(); + HILOG(kDebug, "Wait got {} bytes of data (task_node={}, task_state={}, method={})", + xfer[0].data_size_, + task->orig_task_->task_node_, + task->orig_task_->task_state_, + task->orig_task_->method_); + BinaryInputArchive ar(xfer); + task->exec_->LoadEnd(replica, task->exec_method_, ar, task->orig_task_); } /** Handle finalization of PUSH replicate */ From b951b316ae84f9e9647613c163184307110e615f Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 21 Sep 2023 21:29:46 -0500 Subject: [PATCH 08/54] Print more node info for thallium error --- include/labstor/network/rpc_thallium.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/labstor/network/rpc_thallium.h b/include/labstor/network/rpc_thallium.h index c48d24b28..7f7abe10d 100644 --- a/include/labstor/network/rpc_thallium.h +++ b/include/labstor/network/rpc_thallium.h @@ -139,8 +139,8 @@ class ThalliumRpc { return remote_proc.on(server).async(std::forward(args)...); } } catch (tl::margo_exception &err) { - HELOG(kFatal, "(node {}) Thallium failed on function: {}: {}", - rpc_->node_id_, func_name, err.what()) + HELOG(kFatal, "(node {} -> {}) Thallium failed on function: {}: {}", + rpc_->node_id_, node_id, func_name, err.what()) exit(1); } } From c18ad6157ce19d604037c0c3cf5e0f2b6acac8ec Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Thu, 21 Sep 2023 21:58:27 -0500 Subject: [PATCH 09/54] Use const std::string --- include/labstor/network/rpc_thallium.h | 78 ++++++++++++++------------ 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/include/labstor/network/rpc_thallium.h b/include/labstor/network/rpc_thallium.h index 7f7abe10d..bc504fede 100644 --- a/include/labstor/network/rpc_thallium.h +++ b/include/labstor/network/rpc_thallium.h @@ -93,7 +93,7 @@ class ThalliumRpc { tl::endpoint server = client_engine_->lookup(server_name.c_str()); client_engine_->shutdown_remote_engine(server); } catch (std::exception &e) { - HELOG(kFatal, e.what()); + HELOG(kFatal, "Stop daemon failed: {}", e.what()); } } @@ -120,7 +120,7 @@ class ThalliumRpc { /** RPC call */ template - RetT Call(u32 node_id, const char *func_name, Args&&... args) { + RetT Call(u32 node_id, const std::string &func_name, Args&&... args) { HILOG(kDebug, "Calling {} {} -> {}", func_name, rpc_->node_id_, node_id) try { std::string server_name = GetServerName(node_id); @@ -147,65 +147,71 @@ class ThalliumRpc { /** RPC call */ template - RetT SyncCall(u32 node_id, const char *func_name, Args&&... args) { + RetT SyncCall(u32 node_id, const std::string &func_name, Args&&... args) { return Call( node_id, func_name, std::forward(args)...); } /** Async RPC call */ template - thallium::async_response AsyncCall(u32 node_id, const char *func_name, Args&&... args) { + thallium::async_response AsyncCall(u32 node_id, const std::string &func_name, Args&&... args) { return Call( node_id, func_name, std::forward(args)...); } /** I/O transfers */ template - RetT IoCall(i32 node_id, const char *func_name, + RetT IoCall(i32 node_id, const std::string &func_name, IoType type, char *data, size_t size, Args&& ...args) { HILOG(kDebug, "Calling {} {} -> {}", func_name, rpc_->node_id_, node_id) - std::string server_name = GetServerName(node_id); - tl::bulk_mode flag; - switch (type) { - case IoType::kRead: { - // The "bulk" object will be modified - flag = tl::bulk_mode::write_only; - break; - } - case IoType::kWrite: { - // The "bulk" object will only be read from - flag = tl::bulk_mode::read_only; - break; - } - case IoType::kNone: { - // TODO(llogan) - HELOG(kFatal, "Cannot have none I/O type") - exit(1); + try { + std::string server_name = GetServerName(node_id); + tl::bulk_mode flag; + switch (type) { + case IoType::kRead: { + // The "bulk" object will be modified + flag = tl::bulk_mode::write_only; + break; + } + case IoType::kWrite: { + // The "bulk" object will only be read from + flag = tl::bulk_mode::read_only; + break; + } + case IoType::kNone: { + // TODO(llogan) + HELOG(kFatal, "Cannot have none I/O type") + exit(1); + } } - } - tl::remote_procedure remote_proc = client_engine_->define(func_name); - tl::endpoint server = client_engine_->lookup(server_name); + tl::remote_procedure remote_proc = client_engine_->define(func_name); + tl::endpoint server = client_engine_->lookup(server_name); - std::vector> segments(1); - segments[0].first = data; - segments[0].second = size; + std::vector> segments(1); + segments[0].first = data; + segments[0].second = size; - tl::bulk bulk = client_engine_->expose(segments, flag); - if constexpr (!ASYNC) { - if constexpr (std::is_same_v) { - remote_proc.on(server)(bulk, std::forward(args)...); + tl::bulk bulk = client_engine_->expose(segments, flag); + if constexpr (!ASYNC) { + if constexpr (std::is_same_v) { + remote_proc.on(server)(bulk, std::forward(args)...); + } else { + return remote_proc.on(server)(bulk, std::forward(args)...); + } } else { - return remote_proc.on(server)(bulk, std::forward(args)...); + return remote_proc.on(server).async(bulk, std::forward(args)...); } - } else { - return remote_proc.on(server).async(bulk, std::forward(args)...); + } catch (tl::margo_exception &err) { + HELOG(kFatal, "(node {} -> {}) Thallium failed on function: {}: {}", + rpc_->node_id_, node_id, func_name, err.what()) + exit(1); } } /** Synchronous I/O transfer */ template - RetT SyncIoCall(i32 node_id, const char *func_name, + RetT SyncIoCall(i32 node_id, const std::string &func_name, IoType type, char *data, size_t size, Args&& ...args) { return IoCall( node_id, func_name, type, data, size, std::forward(args)...); From f24cb5159cb2331a06fa4c11ac436273b354563c Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 22 Sep 2023 14:02:45 -0500 Subject: [PATCH 10/54] Add run context --- benchmark/hermes_api_bench.cc | 70 ++-- codegen/refresh_methods | 342 +++++++++--------- include/labstor/labstor_namespace.h | 1 + include/labstor/task_registry/task_lib.h | 16 +- src/worker.cc | 4 +- tasks/bdev/include/bdev/bdev_lib_exec.h | 34 +- tasks/bdev/include/bdev/bdev_methods.h | 2 +- tasks/bdev/include/bdev/bdev_methods.yaml | 2 +- tasks/bdev/include/bdev/bdev_tasks.h | 2 +- .../hermes_adapters_lib_exec.h | 14 +- .../hermes_adapters/hermes_adapters_methods.h | 6 +- tasks/hermes_adapters/src/hermes_adapters.cc | 6 +- .../hermes_blob_mdm_lib_exec.h | 36 +- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 66 ++-- .../hermes_bucket_mdm_lib_exec.h | 32 +- .../src/hermes_bucket_mdm.cc | 30 +- .../include/hermes_mdm/hermes_mdm_lib_exec.h | 6 +- tasks/hermes_mdm/src/hermes_mdm.cc | 4 +- tasks/posix_bdev/src/posix_bdev.cc | 16 +- tasks/ram_bdev/src/ram_bdev.cc | 16 +- .../include/TASK_NAME/TASK_NAME_lib_exec.h | 8 +- tasks_required/TASK_NAME/src/TASK_NAME.cc | 6 +- .../labstor_admin/labstor_admin_lib_exec.h | 20 +- .../labstor_admin/src/labstor_admin.cc | 18 +- .../include/proc_queue/proc_queue_lib_exec.h | 8 +- tasks_required/proc_queue/src/proc_queue.cc | 6 +- .../remote_queue/remote_queue_lib_exec.h | 8 +- .../remote_queue/src/remote_queue.cc | 6 +- .../small_message/small_message_lib_exec.h | 12 +- .../small_message/src/small_message.cc | 10 +- .../worch_proc_round_robin_lib_exec.h | 8 +- .../src/worch_proc_round_robin.cc | 6 +- .../worch_queue_round_robin_lib_exec.h | 8 +- .../src/worch_queue_round_robin.cc | 6 +- 34 files changed, 432 insertions(+), 403 deletions(-) diff --git a/benchmark/hermes_api_bench.cc b/benchmark/hermes_api_bench.cc index ef57ff695..18871c95a 100644 --- a/benchmark/hermes_api_bench.cc +++ b/benchmark/hermes_api_bench.cc @@ -258,39 +258,43 @@ int main(int argc, char **argv) { HIPRINT("Beginning {}\n", mode) // Run tests - if (mode == "put") { - REQUIRE_ARGC(4) - size_t blob_size = hshm::ConfigParse::ParseSize(argv[2]); - size_t blobs_per_rank = atoi(argv[3]); - PutTest(nprocs, rank, 1, blobs_per_rank, blob_size); - } else if (mode == "putget") { - REQUIRE_ARGC(4) - size_t blob_size = hshm::ConfigParse::ParseSize(argv[2]); - size_t blobs_per_rank = atoi(argv[3]); - PutGetTest(nprocs, rank, 1, blobs_per_rank, blob_size); - } else if (mode == "pputget") { - REQUIRE_ARGC(5) - size_t blob_size = hshm::ConfigParse::ParseSize(argv[2]); - size_t part_size = hshm::ConfigParse::ParseSize(argv[3]); - size_t blobs_per_rank = atoi(argv[4]); - PartialPutGetTest(nprocs, rank, 1, blobs_per_rank, blob_size, part_size); - } else if (mode == "create_bkt") { - REQUIRE_ARGC(3) - size_t bkts_per_rank = atoi(argv[2]); - CreateBucketTest(nprocs, rank, bkts_per_rank); - } else if (mode == "get_bkt") { - REQUIRE_ARGC(3) - size_t bkts_per_rank = atoi(argv[2]); - GetBucketTest(nprocs, rank, bkts_per_rank); - } else if (mode == "del_bkt") { - REQUIRE_ARGC(4) - size_t bkt_per_rank = atoi(argv[2]); - size_t blobs_per_bkt = atoi(argv[3]); - DeleteBucketTest(nprocs, rank, bkt_per_rank, blobs_per_bkt); - } else if (mode == "del_blobs") { - REQUIRE_ARGC(4) - size_t blobs_per_rank = atoi(argv[2]); - DeleteBlobOneBucket(nprocs, rank, blobs_per_rank); + try { + if (mode == "put") { + REQUIRE_ARGC(4) + size_t blob_size = hshm::ConfigParse::ParseSize(argv[2]); + size_t blobs_per_rank = atoi(argv[3]); + PutTest(nprocs, rank, 1, blobs_per_rank, blob_size); + } else if (mode == "putget") { + REQUIRE_ARGC(4) + size_t blob_size = hshm::ConfigParse::ParseSize(argv[2]); + size_t blobs_per_rank = atoi(argv[3]); + PutGetTest(nprocs, rank, 1, blobs_per_rank, blob_size); + } else if (mode == "pputget") { + REQUIRE_ARGC(5) + size_t blob_size = hshm::ConfigParse::ParseSize(argv[2]); + size_t part_size = hshm::ConfigParse::ParseSize(argv[3]); + size_t blobs_per_rank = atoi(argv[4]); + PartialPutGetTest(nprocs, rank, 1, blobs_per_rank, blob_size, part_size); + } else if (mode == "create_bkt") { + REQUIRE_ARGC(3) + size_t bkts_per_rank = atoi(argv[2]); + CreateBucketTest(nprocs, rank, bkts_per_rank); + } else if (mode == "get_bkt") { + REQUIRE_ARGC(3) + size_t bkts_per_rank = atoi(argv[2]); + GetBucketTest(nprocs, rank, bkts_per_rank); + } else if (mode == "del_bkt") { + REQUIRE_ARGC(4) + size_t bkt_per_rank = atoi(argv[2]); + size_t blobs_per_bkt = atoi(argv[3]); + DeleteBucketTest(nprocs, rank, bkt_per_rank, blobs_per_bkt); + } else if (mode == "del_blobs") { + REQUIRE_ARGC(4) + size_t blobs_per_rank = atoi(argv[2]); + DeleteBlobOneBucket(nprocs, rank, blobs_per_rank); + } + } catch (hshm::Error &err) { + HELOG(kFatal, "Error: {}", err.what()); } MPI_Finalize(); } diff --git a/codegen/refresh_methods b/codegen/refresh_methods index 600e355d8..404f6a705 100755 --- a/codegen/refresh_methods +++ b/codegen/refresh_methods @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ -USAGE: ./referesh_methods [TASK_ROOT] +USAGE: ./referesh_methods [TASK_DIR] """ import yaml @@ -9,168 +9,178 @@ import os import sys from codegen.util.paths import LABSTOR_ROOT -TASK_ROOT = sys.argv[1] -TASK_NAME = os.path.basename(TASK_ROOT) -METHODS_H = f'{TASK_ROOT}/include/{TASK_NAME}/{TASK_NAME}_methods.h' -METHODS_YAML = f'{TASK_ROOT}/include/{TASK_NAME}/{TASK_NAME}_methods.yaml' -LIB_EXEC_H = f'{TASK_ROOT}/include/{TASK_NAME}/{TASK_NAME}_lib_exec.h' -METHOD_MACRO = f'LABSTOR_{TASK_NAME.upper()}_METHODS_H_' -LIB_EXEC_MACRO = f'LABSTOR_{TASK_NAME.upper()}_LIB_EXEC_H_' - -with open(METHODS_YAML) as fp: - methods = yaml.load(fp, Loader=yaml.FullLoader) -if methods is None: - methods = {} -if 'kLast' in methods: - del methods['kLast'] -methods = sorted(methods.items(), key=lambda x: x[1]) -if TASK_NAME != 'labstor_admin': - methods.insert(0, ('kConstruct', -2)) - methods.insert(1, ('kDestruct', -1)) - -# Produce the TASK_NAME_methods.h file -lines = [] -lines += [f'#ifndef {METHOD_MACRO}', - f'#define {METHOD_MACRO}', - '', - '/** The set of methods in the admin task */', - 'struct Method : public TaskMethod {'] -for method_enum_name, method_off in methods: - if method_enum_name == 'kConstruct': - continue - if method_enum_name == 'kDestruct': - continue - lines += f' TASK_METHOD_T {method_enum_name} = kLast + {method_off};', -lines += ['};', '', f'#endif // {METHOD_MACRO}'] -with open(METHODS_H, 'w') as fp: - fp.write('\n'.join(lines)) - - -# Produce the TASK_NAME_lib_exec.h file -lines = [] -lines += [f'#ifndef {LIB_EXEC_MACRO}', - f'#define {LIB_EXEC_MACRO}', - ''] -## Create the Run method -lines += ['/** Execute a task */', - 'void Run(u32 method, Task *task) override {', - ' switch (method) {'] -for method_enum_name, method_off in methods: - method_name = method_enum_name.replace('k', '', 1) - task_name = method_name + "Task" - lines += [f' case Method::{method_enum_name}: {{', - f' {method_name}(reinterpret_cast<{task_name} *>(task));', - f' break;', - f' }}'] -lines += [' }'] -lines += ['}'] - -## Create the ReplicateStart method -lines += ['/** Ensure there is space to store replicated outputs */', - 'void ReplicateStart(u32 method, u32 count, Task *task) override {', - ' switch (method) {'] -for method_enum_name, method_off in methods: - method_name = method_enum_name.replace('k', '', 1) - task_name = method_name + "Task" - lines += [f' case Method::{method_enum_name}: {{', - f' labstor::CALL_REPLICA_START(count, reinterpret_cast<{task_name}*>(task));', - f' break;', - f' }}'] -lines += [' }'] -lines += ['}'] - -## Create the ReplicateEnd method -lines += ['/** Determine success and handle failures */', - 'void ReplicateEnd(u32 method, Task *task) override {', - ' switch (method) {'] -for method_enum_name, method_off in methods: - method_name = method_enum_name.replace('k', '', 1) - task_name = method_name + "Task" - lines += [f' case Method::{method_enum_name}: {{', - f' labstor::CALL_REPLICA_END(reinterpret_cast<{task_name}*>(task));', - f' break;', - f' }}'] -lines += [' }'] -lines += ['}'] - -## Create the SaveStart Method -lines += ['/** Serialize a task when initially pushing into remote */', - 'std::vector SaveStart(u32 method, BinaryOutputArchive &ar, Task *task) override {', - ' switch (method) {'] -for method_enum_name, method_off in methods: - method_name = method_enum_name.replace('k', '', 1) - task_name = method_name + "Task" - lines += [f' case Method::{method_enum_name}: {{', - f' ar << *reinterpret_cast<{task_name}*>(task);', - f' break;', - f' }}'] -lines += [' }'] -lines += [' return ar.Get();'] -lines += ['}'] - -## Create the LoadStart Method -lines += ['/** Deserialize a task when popping from remote queue */', - 'TaskPointer LoadStart(u32 method, BinaryInputArchive &ar) override {', - ' TaskPointer task_ptr;', - ' switch (method) {'] -for method_enum_name, method_off in methods: - method_name = method_enum_name.replace('k', '', 1) - task_name = method_name + "Task" - lines += [f' case Method::{method_enum_name}: {{', - f' task_ptr.task_ = LABSTOR_CLIENT->NewEmptyTask<{task_name}>(task_ptr.p_);', - f' ar >> *reinterpret_cast<{task_name}*>(task_ptr.task_);', - f' break;', - f' }}'] -lines += [' }'] -lines += [' return task_ptr;'] -lines += ['}'] - -## Create the SaveEnd Method -lines += ['/** Serialize a task when returning from remote queue */', - 'std::vector SaveEnd(u32 method, BinaryOutputArchive &ar, Task *task) override {', - ' switch (method) {'] -for method_enum_name, method_off in methods: - method_name = method_enum_name.replace('k', '', 1) - task_name = method_name + "Task" - lines += [f' case Method::{method_enum_name}: {{', - f' ar << *reinterpret_cast<{task_name}*>(task);', - f' break;', - f' }}'] -lines += [' }'] -lines += [' return ar.Get();'] -lines += ['}'] - -## Create the LoadEnd Method -lines += ['/** Deserialize a task when returning from remote queue */', - 'void LoadEnd(u32 replica, u32 method, BinaryInputArchive &ar, Task *task) override {', - ' switch (method) {'] -for method_enum_name, method_off in methods: - method_name = method_enum_name.replace('k', '', 1) - task_name = method_name + "Task" - lines += [f' case Method::{method_enum_name}: {{', - f' ar.Deserialize(replica, *reinterpret_cast<{task_name}*>(task));', - f' break;', - f' }}'] -lines += [' }'] -lines += ['}'] - -## Create the CheckIfConcurrent Method -lines += ['/** Get the grouping of the task */', - 'u32 GetGroup(u32 method, Task *task, hshm::charbuf &group) override {', - ' switch (method) {'] -for method_enum_name, method_off in methods: - method_name = method_enum_name.replace('k', '', 1) - task_name = method_name + "Task" - lines += [f' case Method::{method_enum_name}: {{', - f' return reinterpret_cast<{task_name}*>(task)->GetGroup(group);', - f' }}'] -lines += [' }'] -lines += [' return -1;'] -lines += ['}'] - -## Finish the file -lines += ['', f'#endif // {METHOD_MACRO}'] - -## Write TASK_NAME_lib_exec.h -with open(LIB_EXEC_H, 'w') as fp: - fp.write('\n'.join(lines)) +def refresh_methods(TASK_ROOT): + if not os.path.exists(f'{TASK_ROOT}/include'): + return + TASK_NAME = os.path.basename(TASK_ROOT) + METHODS_H = f'{TASK_ROOT}/include/{TASK_NAME}/{TASK_NAME}_methods.h' + METHODS_YAML = f'{TASK_ROOT}/include/{TASK_NAME}/{TASK_NAME}_methods.yaml' + LIB_EXEC_H = f'{TASK_ROOT}/include/{TASK_NAME}/{TASK_NAME}_lib_exec.h' + METHOD_MACRO = f'LABSTOR_{TASK_NAME.upper()}_METHODS_H_' + LIB_EXEC_MACRO = f'LABSTOR_{TASK_NAME.upper()}_LIB_EXEC_H_' + + with open(METHODS_YAML) as fp: + methods = yaml.load(fp, Loader=yaml.FullLoader) + if methods is None: + methods = {} + if 'kLast' in methods: + del methods['kLast'] + methods = sorted(methods.items(), key=lambda x: x[1]) + if TASK_NAME != 'labstor_admin': + methods.insert(0, ('kConstruct', -2)) + methods.insert(1, ('kDestruct', -1)) + + # Produce the TASK_NAME_methods.h file + lines = [] + lines += [f'#ifndef {METHOD_MACRO}', + f'#define {METHOD_MACRO}', + '', + '/** The set of methods in the admin task */', + 'struct Method : public TaskMethod {'] + for method_enum_name, method_off in methods: + if method_enum_name == 'kConstruct': + continue + if method_enum_name == 'kDestruct': + continue + lines += f' TASK_METHOD_T {method_enum_name} = kLast + {method_off};', + lines += ['};', '', f'#endif // {METHOD_MACRO}'] + with open(METHODS_H, 'w') as fp: + fp.write('\n'.join(lines)) + + + # Produce the TASK_NAME_lib_exec.h file + lines = [] + lines += [f'#ifndef {LIB_EXEC_MACRO}', + f'#define {LIB_EXEC_MACRO}', + ''] + ## Create the Run method + lines += ['/** Execute a task */', + 'void Run(u32 method, Task *task, RunContext &ctx) override {', + ' switch (method) {'] + for method_enum_name, method_off in methods: + method_name = method_enum_name.replace('k', '', 1) + task_name = method_name + "Task" + lines += [f' case Method::{method_enum_name}: {{', + f' {method_name}(reinterpret_cast<{task_name} *>(task), ctx);', + f' break;', + f' }}'] + lines += [' }'] + lines += ['}'] + + ## Create the ReplicateStart method + lines += ['/** Ensure there is space to store replicated outputs */', + 'void ReplicateStart(u32 method, u32 count, Task *task) override {', + ' switch (method) {'] + for method_enum_name, method_off in methods: + method_name = method_enum_name.replace('k', '', 1) + task_name = method_name + "Task" + lines += [f' case Method::{method_enum_name}: {{', + f' labstor::CALL_REPLICA_START(count, reinterpret_cast<{task_name}*>(task));', + f' break;', + f' }}'] + lines += [' }'] + lines += ['}'] + + ## Create the ReplicateEnd method + lines += ['/** Determine success and handle failures */', + 'void ReplicateEnd(u32 method, Task *task) override {', + ' switch (method) {'] + for method_enum_name, method_off in methods: + method_name = method_enum_name.replace('k', '', 1) + task_name = method_name + "Task" + lines += [f' case Method::{method_enum_name}: {{', + f' labstor::CALL_REPLICA_END(reinterpret_cast<{task_name}*>(task));', + f' break;', + f' }}'] + lines += [' }'] + lines += ['}'] + + ## Create the SaveStart Method + lines += ['/** Serialize a task when initially pushing into remote */', + 'std::vector SaveStart(u32 method, BinaryOutputArchive &ar, Task *task) override {', + ' switch (method) {'] + for method_enum_name, method_off in methods: + method_name = method_enum_name.replace('k', '', 1) + task_name = method_name + "Task" + lines += [f' case Method::{method_enum_name}: {{', + f' ar << *reinterpret_cast<{task_name}*>(task);', + f' break;', + f' }}'] + lines += [' }'] + lines += [' return ar.Get();'] + lines += ['}'] + + ## Create the LoadStart Method + lines += ['/** Deserialize a task when popping from remote queue */', + 'TaskPointer LoadStart(u32 method, BinaryInputArchive &ar) override {', + ' TaskPointer task_ptr;', + ' switch (method) {'] + for method_enum_name, method_off in methods: + method_name = method_enum_name.replace('k', '', 1) + task_name = method_name + "Task" + lines += [f' case Method::{method_enum_name}: {{', + f' task_ptr.task_ = LABSTOR_CLIENT->NewEmptyTask<{task_name}>(task_ptr.p_);', + f' ar >> *reinterpret_cast<{task_name}*>(task_ptr.task_);', + f' break;', + f' }}'] + lines += [' }'] + lines += [' return task_ptr;'] + lines += ['}'] + + ## Create the SaveEnd Method + lines += ['/** Serialize a task when returning from remote queue */', + 'std::vector SaveEnd(u32 method, BinaryOutputArchive &ar, Task *task) override {', + ' switch (method) {'] + for method_enum_name, method_off in methods: + method_name = method_enum_name.replace('k', '', 1) + task_name = method_name + "Task" + lines += [f' case Method::{method_enum_name}: {{', + f' ar << *reinterpret_cast<{task_name}*>(task);', + f' break;', + f' }}'] + lines += [' }'] + lines += [' return ar.Get();'] + lines += ['}'] + + ## Create the LoadEnd Method + lines += ['/** Deserialize a task when returning from remote queue */', + 'void LoadEnd(u32 replica, u32 method, BinaryInputArchive &ar, Task *task) override {', + ' switch (method) {'] + for method_enum_name, method_off in methods: + method_name = method_enum_name.replace('k', '', 1) + task_name = method_name + "Task" + lines += [f' case Method::{method_enum_name}: {{', + f' ar.Deserialize(replica, *reinterpret_cast<{task_name}*>(task));', + f' break;', + f' }}'] + lines += [' }'] + lines += ['}'] + + ## Create the CheckIfConcurrent Method + lines += ['/** Get the grouping of the task */', + 'u32 GetGroup(u32 method, Task *task, hshm::charbuf &group) override {', + ' switch (method) {'] + for method_enum_name, method_off in methods: + method_name = method_enum_name.replace('k', '', 1) + task_name = method_name + "Task" + lines += [f' case Method::{method_enum_name}: {{', + f' return reinterpret_cast<{task_name}*>(task)->GetGroup(group);', + f' }}'] + lines += [' }'] + lines += [' return -1;'] + lines += ['}'] + + ## Finish the file + lines += ['', f'#endif // {METHOD_MACRO}'] + + ## Write TASK_NAME_lib_exec.h + with open(LIB_EXEC_H, 'w') as fp: + fp.write('\n'.join(lines)) + +TASK_DIR = sys.argv[1] +TASK_ROOTS = [os.path.join(TASK_DIR, item) for item in os.listdir(TASK_DIR)] +for TASK_ROOT in TASK_ROOTS: + try: + refresh_methods(TASK_ROOT) + except: + pass diff --git a/include/labstor/labstor_namespace.h b/include/labstor/labstor_namespace.h index d80e18f6f..230834691 100644 --- a/include/labstor/labstor_namespace.h +++ b/include/labstor/labstor_namespace.h @@ -22,6 +22,7 @@ using labstor::TaskLib; using labstor::TaskLibClient; using labstor::config::QueueManagerInfo; using labstor::TaskPrio; +using labstor::RunContext; using hshm::RwLock; using hshm::Mutex; diff --git a/include/labstor/task_registry/task_lib.h b/include/labstor/task_registry/task_lib.h index e67de5852..5dc4eda98 100644 --- a/include/labstor/task_registry/task_lib.h +++ b/include/labstor/task_registry/task_lib.h @@ -53,6 +53,17 @@ struct TaskPointer { } }; +/** Context passed to the Run method of a task */ +struct RunContext { + u32 lane_id_; /**< The lane id of the task */ + + /** Default constructor */ + RunContext() {} + + /** Emplace constructor */ + RunContext(u32 lane_id) : lane_id_(lane_id) {} +}; + /** * Represents a custom operation to perform. * Tasks are independent of Hermes. @@ -77,7 +88,7 @@ class TaskLib { virtual ~TaskLib() = default; /** Run a method of the task */ - virtual void Run(u32 method, Task *task) = 0; + virtual void Run(u32 method, Task *task, RunContext &ctx) = 0; /** Allow task to store replicas of completion */ virtual void ReplicateStart(u32 method, u32 count, Task *task) = 0; @@ -132,7 +143,8 @@ typedef const char* (*get_task_lib_name_t)(void); labstor::TaskState *exec = reinterpret_cast( \ new TYPE_UNWRAP(TRAIT_CLASS)()); \ exec->Init(task->id_, state_name); \ - exec->Run(labstor::TaskMethod::kConstruct, task); \ + RunContext ctx(0); \ + exec->Run(labstor::TaskMethod::kConstruct, task, ctx); \ return exec; \ } \ const char* get_task_lib_name(void) { return TASK_NAME; } \ diff --git a/src/worker.cc b/src/worker.cc index 15e8ebbff..37c696d27 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -45,6 +45,8 @@ void Worker::PollGrouped(WorkEntry &work_entry) { Task *task; LaneData *entry; int off = 0; + RunContext ctx; + ctx.lane_id_ = work_entry.lane_id_; for (int i = 0; i < 1024; ++i) { // Get the task message if (lane->peek(entry, off).IsNull()) { @@ -75,7 +77,7 @@ void Worker::PollGrouped(WorkEntry &work_entry) { task->SetUnordered(); } else { task->SetStarted(); - exec->Run(task->method_, task); + exec->Run(task->method_, task, ctx); } } // Cleanup on task completion diff --git a/tasks/bdev/include/bdev/bdev_lib_exec.h b/tasks/bdev/include/bdev/bdev_lib_exec.h index d154585ad..943c61ed2 100644 --- a/tasks/bdev/include/bdev/bdev_lib_exec.h +++ b/tasks/bdev/include/bdev/bdev_lib_exec.h @@ -2,38 +2,38 @@ #define LABSTOR_BDEV_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kConstruct: { - Construct(reinterpret_cast(task)); + Construct(reinterpret_cast(task), ctx); break; } case Method::kDestruct: { - Destruct(reinterpret_cast(task)); + Destruct(reinterpret_cast(task), ctx); break; } case Method::kWrite: { - Write(reinterpret_cast(task)); + Write(reinterpret_cast(task), ctx); break; } case Method::kRead: { - Read(reinterpret_cast(task)); + Read(reinterpret_cast(task), ctx); break; } - case Method::kAlloc: { - Alloc(reinterpret_cast(task)); + case Method::kAllocate: { + Allocate(reinterpret_cast(task), ctx); break; } case Method::kFree: { - Free(reinterpret_cast(task)); + Free(reinterpret_cast(task), ctx); break; } case Method::kMonitor: { - Monitor(reinterpret_cast(task)); + Monitor(reinterpret_cast(task), ctx); break; } case Method::kUpdateCapacity: { - UpdateCapacity(reinterpret_cast(task)); + UpdateCapacity(reinterpret_cast(task), ctx); break; } } @@ -57,7 +57,7 @@ void ReplicateStart(u32 method, u32 count, Task *task) override { labstor::CALL_REPLICA_START(count, reinterpret_cast(task)); break; } - case Method::kAlloc: { + case Method::kAllocate: { labstor::CALL_REPLICA_START(count, reinterpret_cast(task)); break; } @@ -94,7 +94,7 @@ void ReplicateEnd(u32 method, Task *task) override { labstor::CALL_REPLICA_END(reinterpret_cast(task)); break; } - case Method::kAlloc: { + case Method::kAllocate: { labstor::CALL_REPLICA_END(reinterpret_cast(task)); break; } @@ -131,7 +131,7 @@ std::vector SaveStart(u32 method, BinaryOutputArchive &ar, T ar << *reinterpret_cast(task); break; } - case Method::kAlloc: { + case Method::kAllocate: { ar << *reinterpret_cast(task); break; } @@ -174,7 +174,7 @@ TaskPointer LoadStart(u32 method, BinaryInputArchive &ar) override { ar >> *reinterpret_cast(task_ptr.task_); break; } - case Method::kAlloc: { + case Method::kAllocate: { task_ptr.task_ = LABSTOR_CLIENT->NewEmptyTask(task_ptr.p_); ar >> *reinterpret_cast(task_ptr.task_); break; @@ -216,7 +216,7 @@ std::vector SaveEnd(u32 method, BinaryOutputArchive &ar, Ta ar << *reinterpret_cast(task); break; } - case Method::kAlloc: { + case Method::kAllocate: { ar << *reinterpret_cast(task); break; } @@ -254,7 +254,7 @@ void LoadEnd(u32 replica, u32 method, BinaryInputArchive &ar, Task *task) ar.Deserialize(replica, *reinterpret_cast(task)); break; } - case Method::kAlloc: { + case Method::kAllocate: { ar.Deserialize(replica, *reinterpret_cast(task)); break; } @@ -287,7 +287,7 @@ u32 GetGroup(u32 method, Task *task, hshm::charbuf &group) override { case Method::kRead: { return reinterpret_cast(task)->GetGroup(group); } - case Method::kAlloc: { + case Method::kAllocate: { return reinterpret_cast(task)->GetGroup(group); } case Method::kFree: { diff --git a/tasks/bdev/include/bdev/bdev_methods.h b/tasks/bdev/include/bdev/bdev_methods.h index 715545386..7ca9b12f4 100644 --- a/tasks/bdev/include/bdev/bdev_methods.h +++ b/tasks/bdev/include/bdev/bdev_methods.h @@ -5,7 +5,7 @@ struct Method : public TaskMethod { TASK_METHOD_T kWrite = kLast + 0; TASK_METHOD_T kRead = kLast + 1; - TASK_METHOD_T kAlloc = kLast + 2; + TASK_METHOD_T kAllocate = kLast + 2; TASK_METHOD_T kFree = kLast + 3; TASK_METHOD_T kMonitor = kLast + 4; TASK_METHOD_T kUpdateCapacity = kLast + 5; diff --git a/tasks/bdev/include/bdev/bdev_methods.yaml b/tasks/bdev/include/bdev/bdev_methods.yaml index 616b06640..70173aa3e 100644 --- a/tasks/bdev/include/bdev/bdev_methods.yaml +++ b/tasks/bdev/include/bdev/bdev_methods.yaml @@ -1,6 +1,6 @@ kWrite: 0 kRead: 1 -kAlloc: 2 +kAllocate: 2 kFree: 3 kMonitor: 4 kUpdateCapacity: 5 diff --git a/tasks/bdev/include/bdev/bdev_tasks.h b/tasks/bdev/include/bdev/bdev_tasks.h index 4837dd543..6a502882d 100644 --- a/tasks/bdev/include/bdev/bdev_tasks.h +++ b/tasks/bdev/include/bdev/bdev_tasks.h @@ -99,7 +99,7 @@ struct AllocateTask : public Task, TaskFlags { lane_hash_ = 0; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; - method_ = Method::kAlloc; + method_ = Method::kAllocate; task_flags_.SetBits(TASK_UNORDERED); domain_id_ = domain_id; diff --git a/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_lib_exec.h b/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_lib_exec.h index a491992c9..3ca8e4e03 100644 --- a/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_lib_exec.h +++ b/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_lib_exec.h @@ -1,19 +1,19 @@ -#ifndef LABSTOR_hermes_adapters_LIB_EXEC_H_ -#define LABSTOR_hermes_adapters_LIB_EXEC_H_ +#ifndef LABSTOR_HERMES_ADAPTERS_LIB_EXEC_H_ +#define LABSTOR_HERMES_ADAPTERS_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kConstruct: { - Construct(reinterpret_cast(task)); + Construct(reinterpret_cast(task), ctx); break; } case Method::kDestruct: { - Destruct(reinterpret_cast(task)); + Destruct(reinterpret_cast(task), ctx); break; } case Method::kCustom: { - Custom(reinterpret_cast(task)); + Custom(reinterpret_cast(task), ctx); break; } } @@ -143,4 +143,4 @@ u32 GetGroup(u32 method, Task *task, hshm::charbuf &group) override { return -1; } -#endif // LABSTOR_hermes_adapters_METHODS_H_ \ No newline at end of file +#endif // LABSTOR_HERMES_ADAPTERS_METHODS_H_ \ No newline at end of file diff --git a/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_methods.h b/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_methods.h index e6a468219..19d2b80a6 100644 --- a/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_methods.h +++ b/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_methods.h @@ -1,9 +1,9 @@ -#ifndef LABSTOR_hermes_adapters_METHODS_H_ -#define LABSTOR_hermes_adapters_METHODS_H_ +#ifndef LABSTOR_HERMES_ADAPTERS_METHODS_H_ +#define LABSTOR_HERMES_ADAPTERS_METHODS_H_ /** The set of methods in the admin task */ struct Method : public TaskMethod { TASK_METHOD_T kCustom = kLast + 0; }; -#endif // LABSTOR_hermes_adapters_METHODS_H_ \ No newline at end of file +#endif // LABSTOR_HERMES_ADAPTERS_METHODS_H_ \ No newline at end of file diff --git a/tasks/hermes_adapters/src/hermes_adapters.cc b/tasks/hermes_adapters/src/hermes_adapters.cc index 9192ef849..03d166b76 100644 --- a/tasks/hermes_adapters/src/hermes_adapters.cc +++ b/tasks/hermes_adapters/src/hermes_adapters.cc @@ -12,15 +12,15 @@ class Server : public TaskLib { public: Server() = default; - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { task->SetModuleComplete(); } - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { task->SetModuleComplete(); } - void Custom(CustomTask *task) { + void Custom(CustomTask *task, RunContext &ctx) { task->SetModuleComplete(); } diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_lib_exec.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_lib_exec.h index 251fafc96..55f37fe6d 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_lib_exec.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_lib_exec.h @@ -2,74 +2,74 @@ #define LABSTOR_HERMES_BLOB_MDM_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kConstruct: { - Construct(reinterpret_cast(task)); + Construct(reinterpret_cast(task), ctx); break; } case Method::kDestruct: { - Destruct(reinterpret_cast(task)); + Destruct(reinterpret_cast(task), ctx); break; } case Method::kPutBlob: { - PutBlob(reinterpret_cast(task)); + PutBlob(reinterpret_cast(task), ctx); break; } case Method::kGetBlob: { - GetBlob(reinterpret_cast(task)); + GetBlob(reinterpret_cast(task), ctx); break; } case Method::kTruncateBlob: { - TruncateBlob(reinterpret_cast(task)); + TruncateBlob(reinterpret_cast(task), ctx); break; } case Method::kDestroyBlob: { - DestroyBlob(reinterpret_cast(task)); + DestroyBlob(reinterpret_cast(task), ctx); break; } case Method::kTagBlob: { - TagBlob(reinterpret_cast(task)); + TagBlob(reinterpret_cast(task), ctx); break; } case Method::kBlobHasTag: { - BlobHasTag(reinterpret_cast(task)); + BlobHasTag(reinterpret_cast(task), ctx); break; } case Method::kGetBlobId: { - GetBlobId(reinterpret_cast(task)); + GetBlobId(reinterpret_cast(task), ctx); break; } case Method::kGetOrCreateBlobId: { - GetOrCreateBlobId(reinterpret_cast(task)); + GetOrCreateBlobId(reinterpret_cast(task), ctx); break; } case Method::kGetBlobName: { - GetBlobName(reinterpret_cast(task)); + GetBlobName(reinterpret_cast(task), ctx); break; } case Method::kGetBlobSize: { - GetBlobSize(reinterpret_cast(task)); + GetBlobSize(reinterpret_cast(task), ctx); break; } case Method::kGetBlobScore: { - GetBlobScore(reinterpret_cast(task)); + GetBlobScore(reinterpret_cast(task), ctx); break; } case Method::kGetBlobBuffers: { - GetBlobBuffers(reinterpret_cast(task)); + GetBlobBuffers(reinterpret_cast(task), ctx); break; } case Method::kRenameBlob: { - RenameBlob(reinterpret_cast(task)); + RenameBlob(reinterpret_cast(task), ctx); break; } case Method::kReorganizeBlob: { - ReorganizeBlob(reinterpret_cast(task)); + ReorganizeBlob(reinterpret_cast(task), ctx); break; } case Method::kSetBucketMdm: { - SetBucketMdm(reinterpret_cast(task)); + SetBucketMdm(reinterpret_cast(task), ctx); break; } } diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 78928210d..fa8812f01 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -50,7 +50,7 @@ class Server : public TaskLib { public: Server() = default; - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { id_alloc_ = 0; node_id_ = LABSTOR_CLIENT->node_id_; switch (task->phase_) { @@ -97,7 +97,7 @@ class Server : public TaskLib { task->SetModuleComplete(); } - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { task->SetModuleComplete(); } @@ -116,7 +116,7 @@ class Server : public TaskLib { /** * Set the Bucket MDM * */ - void SetBucketMdm(SetBucketMdmTask *task) { + void SetBucketMdm(SetBucketMdmTask *task, RunContext &ctx) { bkt_mdm_.Init(task->bkt_mdm_); task->SetModuleComplete(); } @@ -124,24 +124,24 @@ class Server : public TaskLib { /** * Create a blob's metadata * */ - void PutBlob(PutBlobTask *task) { + void PutBlob(PutBlobTask *task, RunContext &ctx) { if (task->phase_ == PutBlobPhase::kCreate) { - PutBlobCreatePhase(task); + PutBlobCreatePhase(task, ctx); } if (task->phase_ == PutBlobPhase::kAllocate) { - PutBlobAllocatePhase(task); + PutBlobAllocatePhase(task, ctx); } if (task->phase_ == PutBlobPhase::kWaitAllocate) { if (!task->cur_bdev_alloc_->IsComplete()){ return; } - PutBlobWaitAllocatePhase(task); + PutBlobWaitAllocatePhase(task, ctx); } if (task->phase_ == PutBlobPhase::kModify) { - PutBlobModifyPhase(task); + PutBlobModifyPhase(task, ctx); } if (task->phase_ == PutBlobPhase::kWaitModify) { - PutBlobWaitModifyPhase(task); + PutBlobWaitModifyPhase(task, ctx); if (task->phase_ == PutBlobPhase::kWaitModify) { return; } @@ -149,7 +149,7 @@ class Server : public TaskLib { } /** Create blob / update metadata for the PUT */ - void PutBlobCreatePhase(PutBlobTask *task) { + void PutBlobCreatePhase(PutBlobTask *task, RunContext &ctx) { HILOG(kDebug, "PutBlobPhase::kCreate {}", task->blob_id_); // Get the blob info data structure hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); @@ -180,7 +180,7 @@ class Server : public TaskLib { blob_info.UpdateWriteStats(); } if (task->flags_.Any(HERMES_BLOB_REPLACE)) { - PutBlobFreeBuffersPhase(blob_info, task); + PutBlobFreeBuffersPhase(blob_info, task, ctx); } // Stage in blob data from FS @@ -250,7 +250,7 @@ class Server : public TaskLib { } /** Release buffers */ - void PutBlobFreeBuffersPhase(BlobInfo &blob_info, PutBlobTask *task) { + void PutBlobFreeBuffersPhase(BlobInfo &blob_info, PutBlobTask *task, RunContext &ctx) { for (BufferInfo &buf : blob_info.buffers_) { TargetInfo &target = *target_map_[buf.tid_]; std::vector buf_vec = {buf}; @@ -262,7 +262,7 @@ class Server : public TaskLib { } /** Resolve the current sub-placement using BPM */ - void PutBlobAllocatePhase(PutBlobTask *task) { + void PutBlobAllocatePhase(PutBlobTask *task, RunContext &ctx) { BlobInfo &blob_info = blob_map_[task->blob_id_]; PlacementSchema &schema = (*task->schema_)[task->plcmnt_idx_]; SubPlacement &placement = schema.plcmnts_[task->sub_plcmnt_idx_]; @@ -275,7 +275,7 @@ class Server : public TaskLib { } /** Wait for the current-subplacement to complete */ - void PutBlobWaitAllocatePhase(PutBlobTask *task) { + void PutBlobWaitAllocatePhase(PutBlobTask *task, RunContext &ctx) { BlobInfo &blob_info = blob_map_[task->blob_id_]; PlacementSchema &schema = (*task->schema_)[task->plcmnt_idx_]; ++task->sub_plcmnt_idx_; @@ -301,7 +301,7 @@ class Server : public TaskLib { } /** Update the data on storage */ - void PutBlobModifyPhase(PutBlobTask *task) { + void PutBlobModifyPhase(PutBlobTask *task, RunContext &ctx) { BlobInfo &blob_info = blob_map_[task->blob_id_]; char *blob_buf = task->data_ptr_.ptr_; std::vector &write_tasks = *task->bdev_writes_; @@ -336,7 +336,7 @@ class Server : public TaskLib { } /** Wait for the update to complete */ - void PutBlobWaitModifyPhase(PutBlobTask *task) { + void PutBlobWaitModifyPhase(PutBlobTask *task, RunContext &ctx) { std::vector &write_tasks = *task->bdev_writes_; for (int i = (int)write_tasks.size() - 1; i >= 0; --i) { bdev::WriteTask *write_task = write_tasks[i]; @@ -370,18 +370,18 @@ class Server : public TaskLib { } /** Get a blob's data */ - void GetBlob(GetBlobTask *task) { + void GetBlob(GetBlobTask *task, RunContext &ctx) { switch (task->phase_) { case GetBlobPhase::kStart: { - GetBlobGetPhase(task); + GetBlobGetPhase(task, ctx); } case GetBlobPhase::kWait: { - GetBlobWaitPhase(task); + GetBlobWaitPhase(task, ctx); } } } - void GetBlobGetPhase(GetBlobTask *task) { + void GetBlobGetPhase(GetBlobTask *task, RunContext &ctx) { BlobInfo &blob_info = blob_map_[task->blob_id_]; HSHM_MAKE_AR0(task->bdev_reads_, nullptr); std::vector &read_tasks = *task->bdev_reads_; @@ -417,7 +417,7 @@ class Server : public TaskLib { task->phase_ = GetBlobPhase::kWait; } - void GetBlobWaitPhase(GetBlobTask *task) { + void GetBlobWaitPhase(GetBlobTask *task, RunContext &ctx) { std::vector &read_tasks = *task->bdev_reads_; for (auto it = read_tasks.rbegin(); it != read_tasks.rend(); ++it) { bdev::ReadTask *read_task = *it; @@ -435,7 +435,7 @@ class Server : public TaskLib { /** * Tag a blob * */ - void TagBlob(TagBlobTask *task) { + void TagBlob(TagBlobTask *task, RunContext &ctx) { auto it = blob_map_.find(task->blob_id_); if (it == blob_map_.end()) { task->SetModuleComplete(); @@ -449,7 +449,7 @@ class Server : public TaskLib { /** * Check if blob has a tag * */ - void BlobHasTag(BlobHasTagTask *task) { + void BlobHasTag(BlobHasTagTask *task, RunContext &ctx) { auto it = blob_map_.find(task->blob_id_); if (it == blob_map_.end()) { task->SetModuleComplete(); @@ -465,7 +465,7 @@ class Server : public TaskLib { /** * Create \a blob_id BLOB ID * */ - void GetOrCreateBlobId(GetOrCreateBlobIdTask *task) { + void GetOrCreateBlobId(GetOrCreateBlobIdTask *task, RunContext &ctx) { hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); hshm::charbuf blob_name_unique = GetBlobNameWithBucket(task->tag_id_, blob_name); auto it = blob_id_map_.find(blob_name_unique); @@ -483,7 +483,7 @@ class Server : public TaskLib { * Get \a blob_name BLOB from \a bkt_id bucket * */ HSHM_ALWAYS_INLINE - void GetBlobId(GetBlobIdTask *task) { + void GetBlobId(GetBlobIdTask *task, RunContext &ctx) { hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); hshm::charbuf blob_name_unique = GetBlobNameWithBucket(task->tag_id_, blob_name); auto it = blob_id_map_.find(blob_name_unique); @@ -501,7 +501,7 @@ class Server : public TaskLib { /** * Get \a blob_name BLOB name from \a blob_id BLOB id * */ - void GetBlobName(GetBlobNameTask *task) { + void GetBlobName(GetBlobNameTask *task, RunContext &ctx) { auto it = blob_map_.find(task->blob_id_); if (it == blob_map_.end()) { task->SetModuleComplete(); @@ -515,7 +515,7 @@ class Server : public TaskLib { /** * Get \a score from \a blob_id BLOB id * */ - void GetBlobSize(GetBlobSizeTask *task) { + void GetBlobSize(GetBlobSizeTask *task, RunContext &ctx) { auto it = blob_map_.find(task->blob_id_); if (it == blob_map_.end()) { task->SetModuleComplete(); @@ -529,7 +529,7 @@ class Server : public TaskLib { /** * Get \a score from \a blob_id BLOB id * */ - void GetBlobScore(GetBlobScoreTask *task) { + void GetBlobScore(GetBlobScoreTask *task, RunContext &ctx) { auto it = blob_map_.find(task->blob_id_); if (it == blob_map_.end()) { task->SetModuleComplete(); @@ -543,7 +543,7 @@ class Server : public TaskLib { /** * Get \a blob_id blob's buffers * */ - void GetBlobBuffers(GetBlobBuffersTask *task) { + void GetBlobBuffers(GetBlobBuffersTask *task, RunContext &ctx) { auto it = blob_map_.find(task->blob_id_); if (it == blob_map_.end()) { task->SetModuleComplete(); @@ -558,7 +558,7 @@ class Server : public TaskLib { * Rename \a blob_id blob to \a new_blob_name new blob name * in \a bkt_id bucket. * */ - void RenameBlob(RenameBlobTask *task) { + void RenameBlob(RenameBlobTask *task, RunContext &ctx) { auto it = blob_map_.find(task->blob_id_); if (it == blob_map_.end()) { task->SetModuleComplete(); @@ -574,7 +574,7 @@ class Server : public TaskLib { /** * Truncate a blob to a new size * */ - void TruncateBlob(TruncateBlobTask *task) { + void TruncateBlob(TruncateBlobTask *task, RunContext &ctx) { auto it = blob_map_.find(task->blob_id_); if (it == blob_map_.end()) { task->SetModuleComplete(); @@ -588,7 +588,7 @@ class Server : public TaskLib { /** * Destroy \a blob_id blob in \a bkt_id bucket * */ - void DestroyBlob(DestroyBlobTask *task) { + void DestroyBlob(DestroyBlobTask *task, RunContext &ctx) { switch (task->phase_) { case DestroyBlobPhase::kFreeBuffers: { auto it = blob_map_.find(task->blob_id_); @@ -635,7 +635,7 @@ class Server : public TaskLib { /** * Reorganize \a blob_id blob in \a bkt_id bucket * */ - void ReorganizeBlob(ReorganizeBlobTask *task) { + void ReorganizeBlob(ReorganizeBlobTask *task, RunContext &ctx) { switch (task->phase_) { case ReorganizeBlobPhase::kGet: { auto it = blob_map_.find(task->blob_id_); diff --git a/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_lib_exec.h b/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_lib_exec.h index f1d58e0ab..def2f8430 100644 --- a/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_lib_exec.h +++ b/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_lib_exec.h @@ -2,66 +2,66 @@ #define LABSTOR_HERMES_BUCKET_MDM_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kConstruct: { - Construct(reinterpret_cast(task)); + Construct(reinterpret_cast(task), ctx); break; } case Method::kDestruct: { - Destruct(reinterpret_cast(task)); + Destruct(reinterpret_cast(task), ctx); break; } case Method::kGetOrCreateTag: { - GetOrCreateTag(reinterpret_cast(task)); + GetOrCreateTag(reinterpret_cast(task), ctx); break; } case Method::kGetTagId: { - GetTagId(reinterpret_cast(task)); + GetTagId(reinterpret_cast(task), ctx); break; } case Method::kGetTagName: { - GetTagName(reinterpret_cast(task)); + GetTagName(reinterpret_cast(task), ctx); break; } case Method::kRenameTag: { - RenameTag(reinterpret_cast(task)); + RenameTag(reinterpret_cast(task), ctx); break; } case Method::kDestroyTag: { - DestroyTag(reinterpret_cast(task)); + DestroyTag(reinterpret_cast(task), ctx); break; } case Method::kTagAddBlob: { - TagAddBlob(reinterpret_cast(task)); + TagAddBlob(reinterpret_cast(task), ctx); break; } case Method::kTagRemoveBlob: { - TagRemoveBlob(reinterpret_cast(task)); + TagRemoveBlob(reinterpret_cast(task), ctx); break; } case Method::kTagClearBlobs: { - TagClearBlobs(reinterpret_cast(task)); + TagClearBlobs(reinterpret_cast(task), ctx); break; } case Method::kUpdateSize: { - UpdateSize(reinterpret_cast(task)); + UpdateSize(reinterpret_cast(task), ctx); break; } case Method::kAppendBlobSchema: { - AppendBlobSchema(reinterpret_cast(task)); + AppendBlobSchema(reinterpret_cast(task), ctx); break; } case Method::kAppendBlob: { - AppendBlob(reinterpret_cast(task)); + AppendBlob(reinterpret_cast(task), ctx); break; } case Method::kGetSize: { - GetSize(reinterpret_cast(task)); + GetSize(reinterpret_cast(task), ctx); break; } case Method::kSetBlobMdm: { - SetBlobMdm(reinterpret_cast(task)); + SetBlobMdm(reinterpret_cast(task), ctx); break; } } diff --git a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc index 656a4fccc..5db76ac26 100644 --- a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc +++ b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc @@ -27,27 +27,27 @@ class Server : public TaskLib { public: Server() = default; - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { id_alloc_ = 0; node_id_ = LABSTOR_CLIENT->node_id_; bkt_mdm_.Init(id_); task->SetModuleComplete(); } - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { task->SetModuleComplete(); } /** * Set the Blob MDM * */ - void SetBlobMdm(SetBlobMdmTask *task) { + void SetBlobMdm(SetBlobMdmTask *task, RunContext &ctx) { blob_mdm_.Init(task->blob_mdm_); task->SetModuleComplete(); } /** Update the size of the bucket */ - void UpdateSize(UpdateSizeTask *task) { + void UpdateSize(UpdateSizeTask *task, RunContext &ctx) { TagInfo &tag_info = tag_map_[task->tag_id_]; ssize_t internal_size = (ssize_t) tag_info.internal_size_; if (task->mode_ == UpdateSizeMode::kAdd) { @@ -64,7 +64,7 @@ class Server : public TaskLib { /** * Create the PartialPuts for append operations. * */ - void AppendBlobSchema(AppendBlobSchemaTask *task) { + void AppendBlobSchema(AppendBlobSchemaTask *task, RunContext &ctx) { switch (task->phase_) { case AppendBlobPhase::kGetBlobIds: { HILOG(kDebug, "(node {}) Getting blob IDs for tag {} (task_node={})", @@ -123,7 +123,7 @@ class Server : public TaskLib { * are named 0 ... N. Each blob is assumed to have a certain * fixed page size. * */ - void AppendBlob(AppendBlobTask *task) { + void AppendBlob(AppendBlobTask *task, RunContext &ctx) { switch (task->phase_) { case AppendBlobPhase::kGetBlobIds: { HILOG(kDebug, "(node {}) Appending {} bytes to bucket {} (task_node={})", @@ -184,7 +184,7 @@ class Server : public TaskLib { } /** Get or create a tag */ - void GetOrCreateTag(GetOrCreateTagTask *task) { + void GetOrCreateTag(GetOrCreateTagTask *task, RunContext &ctx) { TagId tag_id; HILOG(kDebug, "Creating a tag") @@ -223,7 +223,7 @@ class Server : public TaskLib { } /** Get tag ID */ - void GetTagId(GetTagIdTask *task) { + void GetTagId(GetTagIdTask *task, RunContext &ctx) { hshm::charbuf tag_name = hshm::to_charbuf(*task->tag_name_); auto it = tag_id_map_.find(tag_name); if (it == tag_id_map_.end()) { @@ -236,7 +236,7 @@ class Server : public TaskLib { } /** Get tag name */ - void GetTagName(GetTagNameTask *task) { + void GetTagName(GetTagNameTask *task, RunContext &ctx) { auto it = tag_map_.find(task->tag_id_); if (it == tag_map_.end()) { task->SetModuleComplete(); @@ -247,7 +247,7 @@ class Server : public TaskLib { } /** Rename tag */ - void RenameTag(RenameTagTask *task) { + void RenameTag(RenameTagTask *task, RunContext &ctx) { auto it = tag_map_.find(task->tag_id_); if (it == tag_map_.end()) { task->SetModuleComplete(); @@ -258,7 +258,7 @@ class Server : public TaskLib { } /** Destroy tag */ - void DestroyTag(DestroyTagTask *task) { + void DestroyTag(DestroyTagTask *task, RunContext &ctx) { switch (task->phase_) { case DestroyTagPhase::kDestroyBlobs: { TagInfo &tag = tag_map_[task->tag_id_]; @@ -292,7 +292,7 @@ class Server : public TaskLib { } /** Add a blob to a tag */ - void TagAddBlob(TagAddBlobTask *task) { + void TagAddBlob(TagAddBlobTask *task, RunContext &ctx) { auto it = tag_map_.find(task->tag_id_); if (it == tag_map_.end()) { task->SetModuleComplete(); @@ -304,7 +304,7 @@ class Server : public TaskLib { } /** Remove a blob from a tag */ - void TagRemoveBlob(TagRemoveBlobTask *task) { + void TagRemoveBlob(TagRemoveBlobTask *task, RunContext &ctx) { auto it = tag_map_.find(task->tag_id_); if (it == tag_map_.end()) { task->SetModuleComplete(); @@ -317,7 +317,7 @@ class Server : public TaskLib { } /** Clear blobs from a tag */ - void TagClearBlobs(TagClearBlobsTask *task) { + void TagClearBlobs(TagClearBlobsTask *task, RunContext &ctx) { auto it = tag_map_.find(task->tag_id_); if (it == tag_map_.end()) { task->SetModuleComplete(); @@ -330,7 +330,7 @@ class Server : public TaskLib { } /** Get size of the bucket */ - void GetSize(GetSizeTask *task) { + void GetSize(GetSizeTask *task, RunContext &ctx) { auto it = tag_map_.find(task->tag_id_); if (it == tag_map_.end()) { task->size_ = 0; diff --git a/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm_lib_exec.h b/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm_lib_exec.h index 127126f51..2d5c8cd7b 100644 --- a/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm_lib_exec.h +++ b/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm_lib_exec.h @@ -2,14 +2,14 @@ #define LABSTOR_HERMES_MDM_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kConstruct: { - Construct(reinterpret_cast(task)); + Construct(reinterpret_cast(task), ctx); break; } case Method::kDestruct: { - Destruct(reinterpret_cast(task)); + Destruct(reinterpret_cast(task), ctx); break; } } diff --git a/tasks/hermes_mdm/src/hermes_mdm.cc b/tasks/hermes_mdm/src/hermes_mdm.cc index d424753cb..9c5557b88 100644 --- a/tasks/hermes_mdm/src/hermes_mdm.cc +++ b/tasks/hermes_mdm/src/hermes_mdm.cc @@ -25,7 +25,7 @@ class Server : public TaskLib { public: Server() = default; - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { HILOG(kDebug, "ConstructTaskPhase::kLoadConfig") std::string config_path = task->server_config_path_->str(); HERMES_CONF->LoadServerConfig(config_path); @@ -33,7 +33,7 @@ class Server : public TaskLib { task->SetModuleComplete(); } - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { task->SetModuleComplete(); } diff --git a/tasks/posix_bdev/src/posix_bdev.cc b/tasks/posix_bdev/src/posix_bdev.cc index c6f87a83a..11fc832fe 100644 --- a/tasks/posix_bdev/src/posix_bdev.cc +++ b/tasks/posix_bdev/src/posix_bdev.cc @@ -22,7 +22,7 @@ class Server : public TaskLib { std::string path_; public: - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { DeviceInfo &dev_info = task->info_; alloc_.Init(id_, dev_info.capacity_, dev_info.slab_sizes_); std::string text = dev_info.mount_dir_ + @@ -40,23 +40,23 @@ class Server : public TaskLib { task->SetModuleComplete(); } - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { free(mem_ptr_); task->SetModuleComplete(); } - void Alloc(AllocateTask *task) { + void Allocate(AllocateTask *task, RunContext &ctx) { alloc_.Allocate(task->size_, *task->buffers_, task->alloc_size_); HILOG(kDebug, "Allocated {}/{} bytes ({})", task->alloc_size_, task->size_, path_); task->SetModuleComplete(); } - void Free(FreeTask *task) { + void Free(FreeTask *task, RunContext &ctx) { alloc_.Free(task->buffers_); task->SetModuleComplete(); } - void Write(WriteTask *task) { + void Write(WriteTask *task, RunContext &ctx) { ssize_t count = pwrite(fd_, task->buf_, task->size_, (off_t)task->disk_off_); if (count != task->size_) { HELOG(kError, "BORG: wrote {} bytes, but expected {}: {}", @@ -65,7 +65,7 @@ class Server : public TaskLib { task->SetModuleComplete(); } - void Read(ReadTask *task) { + void Read(ReadTask *task, RunContext &ctx) { memcpy(task->buf_, mem_ptr_ + task->disk_off_, task->size_); ssize_t count = pread(fd_, task->buf_, task->size_, (off_t)task->disk_off_); if (count != task->size_) { @@ -75,10 +75,10 @@ class Server : public TaskLib { task->SetModuleComplete(); } - void Monitor(MonitorTask *task) { + void Monitor(MonitorTask *task, RunContext &ctx) { } - void UpdateCapacity(UpdateCapacityTask *task) { + void UpdateCapacity(UpdateCapacityTask *task, RunContext &ctx) { task->SetModuleComplete(); } diff --git a/tasks/ram_bdev/src/ram_bdev.cc b/tasks/ram_bdev/src/ram_bdev.cc index 9131018df..d5f2a042a 100644 --- a/tasks/ram_bdev/src/ram_bdev.cc +++ b/tasks/ram_bdev/src/ram_bdev.cc @@ -15,7 +15,7 @@ class Server : public TaskLib { char *mem_ptr_; public: - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { DeviceInfo &dev_info = task->info_; alloc_.Init(id_, dev_info.capacity_, dev_info.slab_sizes_); mem_ptr_ = (char*)malloc(dev_info.capacity_); @@ -24,37 +24,37 @@ class Server : public TaskLib { task->SetModuleComplete(); } - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { free(mem_ptr_); task->SetModuleComplete(); } - void Alloc(AllocateTask *task) { + void Allocate(AllocateTask *task, RunContext &ctx) { HILOG(kDebug, "Allocating {} bytes (RAM)", task->size_); alloc_.Allocate(task->size_, *task->buffers_, task->alloc_size_); HILOG(kDebug, "Allocated {} bytes (RAM)", task->alloc_size_); task->SetModuleComplete(); } - void Free(FreeTask *task) { + void Free(FreeTask *task, RunContext &ctx) { alloc_.Free(task->buffers_); task->SetModuleComplete(); } - void Write(WriteTask *task) { + void Write(WriteTask *task, RunContext &ctx) { memcpy(mem_ptr_ + task->disk_off_, task->buf_, task->size_); task->SetModuleComplete(); } - void Read(ReadTask *task) { + void Read(ReadTask *task, RunContext &ctx) { memcpy(task->buf_, mem_ptr_ + task->disk_off_, task->size_); task->SetModuleComplete(); } - void Monitor(MonitorTask *task) { + void Monitor(MonitorTask *task, RunContext &ctx) { } - void UpdateCapacity(UpdateCapacityTask *task) { + void UpdateCapacity(UpdateCapacityTask *task, RunContext &ctx) { task->SetModuleComplete(); } diff --git a/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME_lib_exec.h b/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME_lib_exec.h index c603dac50..95ba64a49 100644 --- a/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME_lib_exec.h +++ b/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME_lib_exec.h @@ -2,18 +2,18 @@ #define LABSTOR_TASK_NAME_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kConstruct: { - Construct(reinterpret_cast(task)); + Construct(reinterpret_cast(task), ctx); break; } case Method::kDestruct: { - Destruct(reinterpret_cast(task)); + Destruct(reinterpret_cast(task), ctx); break; } case Method::kCustom: { - Custom(reinterpret_cast(task)); + Custom(reinterpret_cast(task), ctx); break; } } diff --git a/tasks_required/TASK_NAME/src/TASK_NAME.cc b/tasks_required/TASK_NAME/src/TASK_NAME.cc index 9142ee436..03ff393dd 100644 --- a/tasks_required/TASK_NAME/src/TASK_NAME.cc +++ b/tasks_required/TASK_NAME/src/TASK_NAME.cc @@ -12,15 +12,15 @@ class Server : public TaskLib { public: Server() = default; - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { task->SetModuleComplete(); } - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { task->SetModuleComplete(); } - void Custom(CustomTask *task) { + void Custom(CustomTask *task, RunContext &ctx) { task->SetModuleComplete(); } diff --git a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_lib_exec.h b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_lib_exec.h index 2cfe22c2b..0bcdc2d02 100644 --- a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_lib_exec.h +++ b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_lib_exec.h @@ -2,42 +2,42 @@ #define LABSTOR_LABSTOR_ADMIN_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kCreateTaskState: { - CreateTaskState(reinterpret_cast(task)); + CreateTaskState(reinterpret_cast(task), ctx); break; } case Method::kDestroyTaskState: { - DestroyTaskState(reinterpret_cast(task)); + DestroyTaskState(reinterpret_cast(task), ctx); break; } case Method::kRegisterTaskLib: { - RegisterTaskLib(reinterpret_cast(task)); + RegisterTaskLib(reinterpret_cast(task), ctx); break; } case Method::kDestroyTaskLib: { - DestroyTaskLib(reinterpret_cast(task)); + DestroyTaskLib(reinterpret_cast(task), ctx); break; } case Method::kGetOrCreateTaskStateId: { - GetOrCreateTaskStateId(reinterpret_cast(task)); + GetOrCreateTaskStateId(reinterpret_cast(task), ctx); break; } case Method::kGetTaskStateId: { - GetTaskStateId(reinterpret_cast(task)); + GetTaskStateId(reinterpret_cast(task), ctx); break; } case Method::kStopRuntime: { - StopRuntime(reinterpret_cast(task)); + StopRuntime(reinterpret_cast(task), ctx); break; } case Method::kSetWorkOrchQueuePolicy: { - SetWorkOrchQueuePolicy(reinterpret_cast(task)); + SetWorkOrchQueuePolicy(reinterpret_cast(task), ctx); break; } case Method::kSetWorkOrchProcPolicy: { - SetWorkOrchProcPolicy(reinterpret_cast(task)); + SetWorkOrchProcPolicy(reinterpret_cast(task), ctx); break; } } diff --git a/tasks_required/labstor_admin/src/labstor_admin.cc b/tasks_required/labstor_admin/src/labstor_admin.cc index 957756fc9..53bc527c4 100644 --- a/tasks_required/labstor_admin/src/labstor_admin.cc +++ b/tasks_required/labstor_admin/src/labstor_admin.cc @@ -16,25 +16,25 @@ class Server : public TaskLib { public: Server() : queue_sched_(nullptr), proc_sched_(nullptr) {} - void RegisterTaskLib(RegisterTaskLibTask *task) { + void RegisterTaskLib(RegisterTaskLibTask *task, RunContext &ctx) { std::string lib_name = task->lib_name_->str(); LABSTOR_TASK_REGISTRY->RegisterTaskLib(lib_name); task->SetModuleComplete(); } - void DestroyTaskLib(DestroyTaskLibTask *task) { + void DestroyTaskLib(DestroyTaskLibTask *task, RunContext &ctx) { std::string lib_name = task->lib_name_->str(); LABSTOR_TASK_REGISTRY->DestroyTaskLib(lib_name); task->SetModuleComplete(); } - void GetOrCreateTaskStateId(GetOrCreateTaskStateIdTask *task) { + void GetOrCreateTaskStateId(GetOrCreateTaskStateIdTask *task, RunContext &ctx) { std::string state_name = task->state_name_->str(); task->id_ = LABSTOR_TASK_REGISTRY->GetOrCreateTaskStateId(state_name); task->SetModuleComplete(); } - void CreateTaskState(CreateTaskStateTask *task) { + void CreateTaskState(CreateTaskStateTask *task, RunContext &ctx) { switch (task->phase_) { case CreateTaskStatePhase::kIdAllocStart: { std::string lib_name = task->lib_name_->str(); @@ -122,25 +122,25 @@ class Server : public TaskLib { } } - void GetTaskStateId(GetTaskStateIdTask *task) { + void GetTaskStateId(GetTaskStateIdTask *task, RunContext &ctx) { std::string state_name = task->state_name_->str(); task->id_ = LABSTOR_TASK_REGISTRY->GetTaskStateId(state_name); task->SetModuleComplete(); } - void DestroyTaskState(DestroyTaskStateTask *task) { + void DestroyTaskState(DestroyTaskStateTask *task, RunContext &ctx) { LABSTOR_TASK_REGISTRY->DestroyTaskState(task->id_); task->SetModuleComplete(); } - void StopRuntime(StopRuntimeTask *task) { + void StopRuntime(StopRuntimeTask *task, RunContext &ctx) { HILOG(kInfo, "Stopping (server mode)"); LABSTOR_WORK_ORCHESTRATOR->FinalizeRuntime(); LABSTOR_THALLIUM->StopThisDaemon(); task->SetModuleComplete(); } - void SetWorkOrchQueuePolicy(SetWorkOrchQueuePolicyTask *task) { + void SetWorkOrchQueuePolicy(SetWorkOrchQueuePolicyTask *task, RunContext &ctx) { if (queue_sched_) { queue_sched_->SetModuleComplete(); } @@ -155,7 +155,7 @@ class Server : public TaskLib { task->SetModuleComplete(); } - void SetWorkOrchProcPolicy(SetWorkOrchProcPolicyTask *task) { + void SetWorkOrchProcPolicy(SetWorkOrchProcPolicyTask *task, RunContext &ctx) { if (proc_sched_) { proc_sched_->SetModuleComplete(); } diff --git a/tasks_required/proc_queue/include/proc_queue/proc_queue_lib_exec.h b/tasks_required/proc_queue/include/proc_queue/proc_queue_lib_exec.h index bea645d52..f7f4ef23f 100644 --- a/tasks_required/proc_queue/include/proc_queue/proc_queue_lib_exec.h +++ b/tasks_required/proc_queue/include/proc_queue/proc_queue_lib_exec.h @@ -2,18 +2,18 @@ #define LABSTOR_PROC_QUEUE_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kConstruct: { - Construct(reinterpret_cast(task)); + Construct(reinterpret_cast(task), ctx); break; } case Method::kDestruct: { - Destruct(reinterpret_cast(task)); + Destruct(reinterpret_cast(task), ctx); break; } case Method::kPush: { - Push(reinterpret_cast(task)); + Push(reinterpret_cast(task), ctx); break; } } diff --git a/tasks_required/proc_queue/src/proc_queue.cc b/tasks_required/proc_queue/src/proc_queue.cc index 31016b865..de580f9a7 100644 --- a/tasks_required/proc_queue/src/proc_queue.cc +++ b/tasks_required/proc_queue/src/proc_queue.cc @@ -12,15 +12,15 @@ class Server : public TaskLib { public: Server() = default; - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { task->SetModuleComplete(); } - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { task->SetModuleComplete(); } - void Push(PushTask *task) { + void Push(PushTask *task, RunContext &ctx) { switch (task->phase_) { case PushTaskPhase::kSchedule: { task->ptr_ = LABSTOR_CLIENT->GetPrivatePointer(task->subtask_); diff --git a/tasks_required/remote_queue/include/remote_queue/remote_queue_lib_exec.h b/tasks_required/remote_queue/include/remote_queue/remote_queue_lib_exec.h index e0ff029c2..1bb6e92a9 100644 --- a/tasks_required/remote_queue/include/remote_queue/remote_queue_lib_exec.h +++ b/tasks_required/remote_queue/include/remote_queue/remote_queue_lib_exec.h @@ -2,18 +2,18 @@ #define LABSTOR_REMOTE_QUEUE_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kConstruct: { - Construct(reinterpret_cast(task)); + Construct(reinterpret_cast(task), ctx); break; } case Method::kDestruct: { - Destruct(reinterpret_cast(task)); + Destruct(reinterpret_cast(task), ctx); break; } case Method::kPush: { - Push(reinterpret_cast(task)); + Push(reinterpret_cast(task), ctx); break; } } diff --git a/tasks_required/remote_queue/src/remote_queue.cc b/tasks_required/remote_queue/src/remote_queue.cc index 8118cb87f..61f4001aa 100644 --- a/tasks_required/remote_queue/src/remote_queue.cc +++ b/tasks_required/remote_queue/src/remote_queue.cc @@ -56,7 +56,7 @@ class Server : public TaskLib { Server() = default; /** Construct remote queue */ - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { HILOG(kInfo, "(node {}) Constructing remote queue (task_node={}, task_state={}, method={})", LABSTOR_CLIENT->node_id_, task->task_node_, task->task_state_, task->method_); LABSTOR_THALLIUM->RegisterRpc("RpcPushSmall", [this](const tl::request &req, @@ -78,7 +78,7 @@ class Server : public TaskLib { } /** Destroy remote queue */ - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { task->SetModuleComplete(); } @@ -193,7 +193,7 @@ class Server : public TaskLib { } /** Push operation called on client */ - void Push(PushTask *task) { + void Push(PushTask *task, RunContext &ctx) { switch (task->phase_) { case PushPhase::kStart: { std::vector &xfer = task->xfer_; diff --git a/tasks_required/small_message/include/small_message/small_message_lib_exec.h b/tasks_required/small_message/include/small_message/small_message_lib_exec.h index 2f217b7ba..68aab730b 100644 --- a/tasks_required/small_message/include/small_message/small_message_lib_exec.h +++ b/tasks_required/small_message/include/small_message/small_message_lib_exec.h @@ -2,26 +2,26 @@ #define LABSTOR_SMALL_MESSAGE_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kConstruct: { - Construct(reinterpret_cast(task)); + Construct(reinterpret_cast(task), ctx); break; } case Method::kDestruct: { - Destruct(reinterpret_cast(task)); + Destruct(reinterpret_cast(task), ctx); break; } case Method::kMd: { - Md(reinterpret_cast(task)); + Md(reinterpret_cast(task), ctx); break; } case Method::kIo: { - Io(reinterpret_cast(task)); + Io(reinterpret_cast(task), ctx); break; } case Method::kMdPush: { - MdPush(reinterpret_cast(task)); + MdPush(reinterpret_cast(task), ctx); break; } } diff --git a/tasks_required/small_message/src/small_message.cc b/tasks_required/small_message/src/small_message.cc index 25eec506d..16282873d 100644 --- a/tasks_required/small_message/src/small_message.cc +++ b/tasks_required/small_message/src/small_message.cc @@ -13,25 +13,25 @@ class Server : public TaskLib { int count_ = 0; public: - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { task->SetModuleComplete(); } - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { task->SetModuleComplete(); } - void Md(MdTask *task) { + void Md(MdTask *task, RunContext &ctx) { task->ret_[0] = 1; task->SetModuleComplete(); } - void MdPush(MdPushTask *task) { + void MdPush(MdPushTask *task, RunContext &ctx) { task->ret_[0] = 1; task->SetModuleComplete(); } - void Io(IoTask *task) { + void Io(IoTask *task, RunContext &ctx) { task->ret_ = 1; for (int i = 0; i < 256; ++i) { if (task->data_[i] != 10) { diff --git a/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin_lib_exec.h b/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin_lib_exec.h index 10565c416..21d3493ee 100644 --- a/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin_lib_exec.h +++ b/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin_lib_exec.h @@ -2,18 +2,18 @@ #define LABSTOR_WORCH_PROC_ROUND_ROBIN_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kConstruct: { - Construct(reinterpret_cast(task)); + Construct(reinterpret_cast(task), ctx); break; } case Method::kDestruct: { - Destruct(reinterpret_cast(task)); + Destruct(reinterpret_cast(task), ctx); break; } case Method::kSchedule: { - Schedule(reinterpret_cast(task)); + Schedule(reinterpret_cast(task), ctx); break; } } diff --git a/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc b/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc index c51a0eba5..238143a27 100644 --- a/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc +++ b/tasks_required/worch_proc_round_robin/src/worch_proc_round_robin.cc @@ -10,15 +10,15 @@ namespace labstor::worch_proc_round_robin { class Server : public TaskLib { public: - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { task->SetModuleComplete(); } - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { task->SetModuleComplete(); } - void Schedule(ScheduleTask *task) { + void Schedule(ScheduleTask *task, RunContext &ctx) { int rr = 0; for (Worker &worker : LABSTOR_WORK_ORCHESTRATOR->workers_) { worker.SetCpuAffinity(rr % HERMES_SYSTEM_INFO->ncpu_); diff --git a/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin_lib_exec.h b/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin_lib_exec.h index 1321d5488..9223e7f1e 100644 --- a/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin_lib_exec.h +++ b/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin_lib_exec.h @@ -2,18 +2,18 @@ #define LABSTOR_WORCH_QUEUE_ROUND_ROBIN_LIB_EXEC_H_ /** Execute a task */ -void Run(u32 method, Task *task) override { +void Run(u32 method, Task *task, RunContext &ctx) override { switch (method) { case Method::kConstruct: { - Construct(reinterpret_cast(task)); + Construct(reinterpret_cast(task), ctx); break; } case Method::kDestruct: { - Destruct(reinterpret_cast(task)); + Destruct(reinterpret_cast(task), ctx); break; } case Method::kSchedule: { - Schedule(reinterpret_cast(task)); + Schedule(reinterpret_cast(task), ctx); break; } } diff --git a/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc b/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc index 1ea856dcb..10f53ed33 100644 --- a/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc +++ b/tasks_required/worch_queue_round_robin/src/worch_queue_round_robin.cc @@ -13,16 +13,16 @@ class Server : public TaskLib { u32 count_; public: - void Construct(ConstructTask *task) { + void Construct(ConstructTask *task, RunContext &ctx) { count_ = 0; task->SetModuleComplete(); } - void Destruct(DestructTask *task) { + void Destruct(DestructTask *task, RunContext &ctx) { task->SetModuleComplete(); } - void Schedule(ScheduleTask *task) { + void Schedule(ScheduleTask *task, RunContext &ctx) { // Check if any new queues need to be scheduled for (MultiQueue &queue : *LABSTOR_QM_RUNTIME->queue_map_) { if (queue.id_.IsNull()) { From 68189cbd71c5e2d625af29147684f1e58f554470 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 22 Sep 2023 21:00:09 -0500 Subject: [PATCH 11/54] Improve bucket operation throughput by dividing unordered_map into partitions --- config/labstor_server_default.yaml | 3 +- include/labstor/api/labstor_client.h | 2 +- .../labstor_task_node_push_root.template | 2 +- include/labstor/labstor_types.h | 12 +- .../queue_manager/queue_manager_runtime.h | 2 + .../hermes_blob_mdm/hermes_blob_mdm_tasks.h | 29 +++-- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 104 +++++++++++------- .../hermes_bucket_mdm_tasks.h | 20 ++-- .../src/hermes_bucket_mdm.cc | 69 +++++++----- .../include/proc_queue/proc_queue.h | 4 +- .../include/proc_queue/proc_queue_tasks.h | 21 ++-- tasks_required/proc_queue/src/proc_queue.cc | 19 ++-- .../include/small_message/small_message.h | 6 +- 13 files changed, 179 insertions(+), 114 deletions(-) diff --git a/config/labstor_server_default.yaml b/config/labstor_server_default.yaml index d0fc09001..2a498ff9b 100644 --- a/config/labstor_server_default.yaml +++ b/config/labstor_server_default.yaml @@ -15,8 +15,9 @@ queue_manager: shm_allocator: kScalablePageAllocator # The name of the shared memory region to create shm_name: "labstor_shm" - # The size of the shared memory region to allocate + # The size of the shared memory region to allocate for general data structures shm_size: 0g + # The size of the shared memory to allocate for data buffers ### Define properties of RPCs rpc: diff --git a/include/labstor/api/labstor_client.h b/include/labstor/api/labstor_client.h index 68c53535a..f3ef03473 100644 --- a/include/labstor/api/labstor_client.h +++ b/include/labstor/api/labstor_client.h @@ -269,7 +269,7 @@ class Client : public ConfigurationManager { hipc::LPointer> push_task =\ LABSTOR_PROCESS_QUEUE->AsyncPush(task_node,\ DomainId::GetLocal(),\ - task.shm_);\ + task);\ return push_task;\ } diff --git a/include/labstor/api/template/labstor_task_node_push_root.template b/include/labstor/api/template/labstor_task_node_push_root.template index 703048125..37b909500 100644 --- a/include/labstor/api/template/labstor_task_node_push_root.template +++ b/include/labstor/api/template/labstor_task_node_push_root.template @@ -28,6 +28,6 @@ hipc::LPointer> Async##CUSTOM##Root(Args&& .. hipc::LPointer> push_task = LABSTOR_PROCESS_QUEUE->AsyncPush(task_node, DomainId::GetLocal(), - task.shm_); + task); return push_task; } \ No newline at end of file diff --git a/include/labstor/labstor_types.h b/include/labstor/labstor_types.h index c6594db35..7ecce655b 100644 --- a/include/labstor/labstor_types.h +++ b/include/labstor/labstor_types.h @@ -225,12 +225,14 @@ struct DomainId { template struct UniqueId { u32 node_id_; /**< The node the content is on */ + u32 hash_; /**< The hash of the content the ID represents */ u64 unique_; /**< A unique id for the blob */ /** Serialization */ template void serialize(Ar &ar) { ar & node_id_; + ar & hash_; ar & unique_; } @@ -239,8 +241,14 @@ struct UniqueId { UniqueId() = default; /** Emplace constructor */ - HSHM_ALWAYS_INLINE - UniqueId(u32 node_id, u64 unique) : node_id_(node_id), unique_(unique) {} + HSHM_ALWAYS_INLINE explicit + UniqueId(u32 node_id, u64 unique) + : node_id_(node_id), unique_(unique) {} + + /** Emplace constructor (+hash) */ + HSHM_ALWAYS_INLINE explicit + UniqueId(u32 node_id, u32 hash, u64 unique) + : node_id_(node_id), hash_(hash), unique_(unique) {} /** Copy constructor */ HSHM_ALWAYS_INLINE diff --git a/include/labstor/queue_manager/queue_manager_runtime.h b/include/labstor/queue_manager/queue_manager_runtime.h index 8164b1520..a489e6d9f 100644 --- a/include/labstor/queue_manager/queue_manager_runtime.h +++ b/include/labstor/queue_manager/queue_manager_runtime.h @@ -18,6 +18,7 @@ class QueueManagerRuntime : public QueueManager { public: ServerConfig *config_; size_t max_queues_; + size_t max_lanes_; hipc::split_ticket_queue *tickets_; u32 node_id_; @@ -35,6 +36,7 @@ class QueueManagerRuntime : public QueueManager { QueueManagerInfo &qm = config_->queue_manager_; // Initialize ticket queue (ticket 0 is for admin queue) max_queues_ = qm.max_queues_; + max_lanes_ = qm.max_lanes_; HSHM_MAKE_AR(shm.tickets_, alloc, max_queues_) for (u64 i = 1; i <= max_queues_; ++i) { shm.tickets_->emplace(i); diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index f5d8e1b64..015cbcc2b 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -261,7 +261,7 @@ struct PutBlobTask : public Task, TaskFlags // Initialize task HILOG(kDebug, "Beginning PUT task constructor") task_node_ = task_node; - lane_hash_ = blob_id_.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kPutBlob; @@ -365,7 +365,7 @@ struct GetBlobTask : public Task, TaskFlags const Context &ctx) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kGetBlob; @@ -382,6 +382,11 @@ struct GetBlobTask : public Task, TaskFlags page_size_ = ctx.page_size_; } + /** Destructor */ + ~GetBlobTask() { + HSHM_DESTROY_AR(filename_); + } + /** (De)serialize message call */ template void SaveStart(Ar &ar) { @@ -439,7 +444,7 @@ struct TagBlobTask : public Task, TaskFlags { const TagId &tag) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kTagBlob; @@ -499,7 +504,7 @@ struct BlobHasTagTask : public Task, TaskFlags { // Initialize task task_node_ = task_node; task_node_ = task_node; - lane_hash_ = blob_id.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kBlobHasTag; @@ -619,7 +624,7 @@ struct GetBlobNameTask : public Task, TaskFlags { const BlobId &blob_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kGetBlobName; @@ -680,7 +685,7 @@ struct GetBlobSizeTask : public Task, TaskFlags { const BlobId &blob_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kGetBlobSize; @@ -735,7 +740,7 @@ struct GetBlobScoreTask : public Task, TaskFlags { const BlobId &blob_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kGetBlobScore; @@ -790,7 +795,7 @@ struct GetBlobBuffersTask : public Task, TaskFlags { const BlobId &blob_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kGetBlobBuffers; @@ -855,7 +860,7 @@ struct RenameBlobTask : public Task, TaskFlags { const hshm::charbuf &new_blob_name) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kRenameBlob; @@ -917,7 +922,7 @@ struct TruncateBlobTask : public Task, TaskFlags { u64 size) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kTruncateBlob; @@ -979,7 +984,7 @@ struct DestroyBlobTask : public Task, TaskFlags { const BlobId &blob_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kDestroyBlob; @@ -1048,7 +1053,7 @@ struct ReorganizeBlobTask : public Task, TaskFlags { u32 node_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.unique_; + lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kReorganizeBlob; diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index fa8812f01..b3a284023 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -27,8 +27,8 @@ class Server : public TaskLib { /**==================================== * Maps * ===================================*/ - BLOB_ID_MAP_T blob_id_map_; - BLOB_MAP_T blob_map_; + std::vector blob_id_map_; + std::vector blob_map_; std::atomic id_alloc_; /**==================================== @@ -55,6 +55,8 @@ class Server : public TaskLib { node_id_ = LABSTOR_CLIENT->node_id_; switch (task->phase_) { case ConstructTaskPhase::kCreateTaskStates: { + blob_id_map_.resize(LABSTOR_QM_RUNTIME->max_lanes_); + blob_map_.resize(LABSTOR_QM_RUNTIME->max_lanes_); target_tasks_.reserve(HERMES_SERVER_CONF.devices_.size()); for (DeviceInfo &dev : HERMES_SERVER_CONF.devices_) { std::string dev_type; @@ -153,14 +155,15 @@ class Server : public TaskLib { HILOG(kDebug, "PutBlobPhase::kCreate {}", task->blob_id_); // Get the blob info data structure hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); - auto it = blob_map_.find(task->blob_id_); - if (it == blob_map_.end()) { + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + auto it = blob_map.find(task->blob_id_); + if (it == blob_map.end()) { task->flags_.SetBits(HERMES_BLOB_DID_CREATE); } if (task->flags_.Any(HERMES_BLOB_DID_CREATE)) { - blob_map_.emplace(task->blob_id_, BlobInfo()); + blob_map.emplace(task->blob_id_, BlobInfo()); } - BlobInfo &blob_info = blob_map_[task->blob_id_]; + BlobInfo &blob_info = blob_map[task->blob_id_]; // Update the blob info if (task->flags_.Any(HERMES_BLOB_DID_CREATE)) { @@ -263,7 +266,8 @@ class Server : public TaskLib { /** Resolve the current sub-placement using BPM */ void PutBlobAllocatePhase(PutBlobTask *task, RunContext &ctx) { - BlobInfo &blob_info = blob_map_[task->blob_id_]; + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + BlobInfo &blob_info = blob_map[task->blob_id_]; PlacementSchema &schema = (*task->schema_)[task->plcmnt_idx_]; SubPlacement &placement = schema.plcmnts_[task->sub_plcmnt_idx_]; TargetInfo &bdev = *target_map_[placement.tid_]; @@ -276,7 +280,8 @@ class Server : public TaskLib { /** Wait for the current-subplacement to complete */ void PutBlobWaitAllocatePhase(PutBlobTask *task, RunContext &ctx) { - BlobInfo &blob_info = blob_map_[task->blob_id_]; + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + BlobInfo &blob_info = blob_map[task->blob_id_]; PlacementSchema &schema = (*task->schema_)[task->plcmnt_idx_]; ++task->sub_plcmnt_idx_; if (task->sub_plcmnt_idx_ >= schema.plcmnts_.size()) { @@ -302,7 +307,8 @@ class Server : public TaskLib { /** Update the data on storage */ void PutBlobModifyPhase(PutBlobTask *task, RunContext &ctx) { - BlobInfo &blob_info = blob_map_[task->blob_id_]; + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + BlobInfo &blob_info = blob_map[task->blob_id_]; char *blob_buf = task->data_ptr_.ptr_; std::vector &write_tasks = *task->bdev_writes_; size_t blob_off = 0, buf_off = 0; @@ -382,7 +388,8 @@ class Server : public TaskLib { } void GetBlobGetPhase(GetBlobTask *task, RunContext &ctx) { - BlobInfo &blob_info = blob_map_[task->blob_id_]; + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + BlobInfo &blob_info = blob_map[task->blob_id_]; HSHM_MAKE_AR0(task->bdev_reads_, nullptr); std::vector &read_tasks = *task->bdev_reads_; read_tasks.reserve(blob_info.buffers_.size()); @@ -436,8 +443,9 @@ class Server : public TaskLib { * Tag a blob * */ void TagBlob(TagBlobTask *task, RunContext &ctx) { - auto it = blob_map_.find(task->blob_id_); - if (it == blob_map_.end()) { + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + auto it = blob_map.find(task->blob_id_); + if (it == blob_map.end()) { task->SetModuleComplete(); return; } @@ -450,8 +458,9 @@ class Server : public TaskLib { * Check if blob has a tag * */ void BlobHasTag(BlobHasTagTask *task, RunContext &ctx) { - auto it = blob_map_.find(task->blob_id_); - if (it == blob_map_.end()) { + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + auto it = blob_map.find(task->blob_id_); + if (it == blob_map.end()) { task->SetModuleComplete(); return; } @@ -468,10 +477,11 @@ class Server : public TaskLib { void GetOrCreateBlobId(GetOrCreateBlobIdTask *task, RunContext &ctx) { hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); hshm::charbuf blob_name_unique = GetBlobNameWithBucket(task->tag_id_, blob_name); - auto it = blob_id_map_.find(blob_name_unique); - if (it == blob_id_map_.end()) { - task->blob_id_ = BlobId(node_id_, id_alloc_.fetch_add(1)); - blob_id_map_.emplace(blob_name_unique, task->blob_id_); + BLOB_ID_MAP_T &blob_id_map = blob_id_map_[ctx.lane_id_]; + auto it = blob_id_map.find(blob_name_unique); + if (it == blob_id_map.end()) { + task->blob_id_ = BlobId(node_id_, task->lane_hash_, id_alloc_.fetch_add(1)); + blob_id_map.emplace(blob_name_unique, task->blob_id_); task->SetModuleComplete(); return; } @@ -486,8 +496,9 @@ class Server : public TaskLib { void GetBlobId(GetBlobIdTask *task, RunContext &ctx) { hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); hshm::charbuf blob_name_unique = GetBlobNameWithBucket(task->tag_id_, blob_name); - auto it = blob_id_map_.find(blob_name_unique); - if (it == blob_id_map_.end()) { + BLOB_ID_MAP_T &blob_id_map = blob_id_map_[ctx.lane_id_]; + auto it = blob_id_map.find(blob_name_unique); + if (it == blob_id_map.end()) { task->blob_id_ = BlobId::GetNull(); task->SetModuleComplete(); HILOG(kDebug, "Failed to find blob {} in {}", blob_name.str(), task->tag_id_); @@ -502,8 +513,9 @@ class Server : public TaskLib { * Get \a blob_name BLOB name from \a blob_id BLOB id * */ void GetBlobName(GetBlobNameTask *task, RunContext &ctx) { - auto it = blob_map_.find(task->blob_id_); - if (it == blob_map_.end()) { + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + auto it = blob_map.find(task->blob_id_); + if (it == blob_map.end()) { task->SetModuleComplete(); return; } @@ -516,8 +528,9 @@ class Server : public TaskLib { * Get \a score from \a blob_id BLOB id * */ void GetBlobSize(GetBlobSizeTask *task, RunContext &ctx) { - auto it = blob_map_.find(task->blob_id_); - if (it == blob_map_.end()) { + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + auto it = blob_map.find(task->blob_id_); + if (it == blob_map.end()) { task->SetModuleComplete(); return; } @@ -530,8 +543,9 @@ class Server : public TaskLib { * Get \a score from \a blob_id BLOB id * */ void GetBlobScore(GetBlobScoreTask *task, RunContext &ctx) { - auto it = blob_map_.find(task->blob_id_); - if (it == blob_map_.end()) { + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + auto it = blob_map.find(task->blob_id_); + if (it == blob_map.end()) { task->SetModuleComplete(); return; } @@ -544,8 +558,9 @@ class Server : public TaskLib { * Get \a blob_id blob's buffers * */ void GetBlobBuffers(GetBlobBuffersTask *task, RunContext &ctx) { - auto it = blob_map_.find(task->blob_id_); - if (it == blob_map_.end()) { + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + auto it = blob_map.find(task->blob_id_); + if (it == blob_map.end()) { task->SetModuleComplete(); return; } @@ -559,14 +574,16 @@ class Server : public TaskLib { * in \a bkt_id bucket. * */ void RenameBlob(RenameBlobTask *task, RunContext &ctx) { - auto it = blob_map_.find(task->blob_id_); - if (it == blob_map_.end()) { + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + auto it = blob_map.find(task->blob_id_); + if (it == blob_map.end()) { task->SetModuleComplete(); return; } + BLOB_ID_MAP_T &blob_id_map = blob_id_map_[ctx.lane_id_]; BlobInfo &blob = it->second; - blob_id_map_.erase(blob.name_); - blob_id_map_[blob.name_] = task->blob_id_; + blob_id_map.erase(blob.name_); + blob_id_map[blob.name_] = task->blob_id_; blob.name_ = hshm::to_charbuf(*task->new_blob_name_); task->SetModuleComplete(); } @@ -575,8 +592,9 @@ class Server : public TaskLib { * Truncate a blob to a new size * */ void TruncateBlob(TruncateBlobTask *task, RunContext &ctx) { - auto it = blob_map_.find(task->blob_id_); - if (it == blob_map_.end()) { + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + auto it = blob_map.find(task->blob_id_); + if (it == blob_map.end()) { task->SetModuleComplete(); return; } @@ -591,14 +609,16 @@ class Server : public TaskLib { void DestroyBlob(DestroyBlobTask *task, RunContext &ctx) { switch (task->phase_) { case DestroyBlobPhase::kFreeBuffers: { - auto it = blob_map_.find(task->blob_id_); - if (it == blob_map_.end()) { + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + auto it = blob_map.find(task->blob_id_); + if (it == blob_map.end()) { task->SetModuleComplete(); return; } + BLOB_ID_MAP_T &blob_id_map = blob_id_map_[ctx.lane_id_]; BlobInfo &blob_info = it->second; hshm::charbuf unique_name = GetBlobNameWithBucket(blob_info.tag_id_, blob_info.name_); - blob_id_map_.erase(unique_name); + blob_id_map.erase(unique_name); HSHM_MAKE_AR0(task->free_tasks_, nullptr); task->free_tasks_->reserve(blob_info.buffers_.size()); for (BufferInfo &buf : blob_info.buffers_) { @@ -620,13 +640,14 @@ class Server : public TaskLib { LABSTOR_CLIENT->DelTask(free_task); free_tasks.pop_back(); } - BlobInfo &blob_info = blob_map_[task->blob_id_]; + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + BlobInfo &blob_info = blob_map[task->blob_id_]; bkt_mdm_.AsyncUpdateSize(task->task_node_ + 1, task->tag_id_, -(ssize_t)blob_info.blob_size_, bucket_mdm::UpdateSizeMode::kAdd); HSHM_DESTROY_AR(task->free_tasks_); - blob_map_.erase(task->blob_id_); + blob_map.erase(task->blob_id_); task->SetModuleComplete(); } } @@ -638,8 +659,9 @@ class Server : public TaskLib { void ReorganizeBlob(ReorganizeBlobTask *task, RunContext &ctx) { switch (task->phase_) { case ReorganizeBlobPhase::kGet: { - auto it = blob_map_.find(task->blob_id_); - if (it == blob_map_.end()) { + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + auto it = blob_map.find(task->blob_id_); + if (it == blob_map.end()) { task->SetModuleComplete(); return; } diff --git a/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h b/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h index 5e81e71ac..2ec787bad 100644 --- a/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h +++ b/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h @@ -153,7 +153,7 @@ struct UpdateSizeTask : public Task, TaskFlags { int mode) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = tag_id.unique_; + lane_hash_ = tag_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kUpdateSize; @@ -236,7 +236,7 @@ struct AppendBlobSchemaTask : public Task, TaskFlags { size_t page_size) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = tag_id.unique_; + lane_hash_ = tag_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kAppendBlobSchema; @@ -305,7 +305,7 @@ struct AppendBlobTask : public Task, TaskFlags { const Context &ctx) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = tag_id.unique_; + lane_hash_ = tag_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kAppendBlob; @@ -481,7 +481,7 @@ struct GetTagNameTask : public Task, TaskFlags { const TagId &tag_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = tag_id.unique_; + lane_hash_ = tag_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kGetTagName; @@ -539,7 +539,7 @@ struct RenameTagTask : public Task, TaskFlags { const hshm::charbuf &tag_name) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = tag_id.unique_; + lane_hash_ = tag_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kRenameTag; @@ -604,7 +604,7 @@ struct DestroyTagTask : public Task, TaskFlags { TagId tag_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = tag_id.unique_; + lane_hash_ = tag_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kDestroyTag; @@ -655,7 +655,7 @@ struct TagAddBlobTask : public Task, TaskFlags { const BlobId &blob_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = tag_id.unique_; + lane_hash_ = tag_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kTagAddBlob; @@ -707,7 +707,7 @@ struct TagRemoveBlobTask : public Task, TaskFlags { const BlobId &blob_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = tag_id.unique_; + lane_hash_ = tag_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kTagRemoveBlob; @@ -766,7 +766,7 @@ struct TagClearBlobsTask : public Task, TaskFlags { TagId tag_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = tag_id.unique_; + lane_hash_ = tag_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kTagClearBlobs; @@ -816,7 +816,7 @@ struct GetSizeTask : public Task, TaskFlags { TagId tag_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = tag_id.unique_; + lane_hash_ = tag_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kGetSize; diff --git a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc index 5db76ac26..75cae81b3 100644 --- a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc +++ b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc @@ -17,8 +17,8 @@ typedef std::unordered_map TAG_MAP_T; class Server : public TaskLib { public: - TAG_ID_MAP_T tag_id_map_; - TAG_MAP_T tag_map_; + std::vector tag_id_map_; + std::vector tag_map_; u32 node_id_; std::atomic id_alloc_; Client bkt_mdm_; @@ -31,6 +31,8 @@ class Server : public TaskLib { id_alloc_ = 0; node_id_ = LABSTOR_CLIENT->node_id_; bkt_mdm_.Init(id_); + tag_id_map_.resize(LABSTOR_QM_RUNTIME->max_lanes_); + tag_map_.resize(LABSTOR_QM_RUNTIME->max_lanes_); task->SetModuleComplete(); } @@ -48,7 +50,8 @@ class Server : public TaskLib { /** Update the size of the bucket */ void UpdateSize(UpdateSizeTask *task, RunContext &ctx) { - TagInfo &tag_info = tag_map_[task->tag_id_]; + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; + TagInfo &tag_info = tag_map[task->tag_id_]; ssize_t internal_size = (ssize_t) tag_info.internal_size_; if (task->mode_ == UpdateSizeMode::kAdd) { internal_size += task->update_; @@ -69,7 +72,8 @@ class Server : public TaskLib { case AppendBlobPhase::kGetBlobIds: { HILOG(kDebug, "(node {}) Getting blob IDs for tag {} (task_node={})", LABSTOR_CLIENT->node_id_, task->tag_id_, task->task_node_) - TagInfo &tag_info = tag_map_[task->tag_id_]; + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; + TagInfo &tag_info = tag_map[task->tag_id_]; size_t bucket_size = tag_info.internal_size_; size_t cur_page = bucket_size / task->page_size_; size_t cur_page_off = bucket_size % task->page_size_; @@ -189,20 +193,23 @@ class Server : public TaskLib { HILOG(kDebug, "Creating a tag") // Check if the tag exists + TAG_ID_MAP_T &tag_id_map = tag_id_map_[ctx.lane_id_]; hshm::charbuf tag_name = hshm::to_charbuf(*task->tag_name_); bool did_create = false; if (tag_name.size() > 0) { - did_create = tag_id_map_.find(tag_name) == tag_id_map_.end(); + did_create = tag_id_map.find(tag_name) == tag_id_map.end(); } // Emplace bucket if it does not already exist if (did_create) { + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; tag_id.unique_ = id_alloc_.fetch_add(1); + tag_id.hash_ = task->lane_hash_; tag_id.node_id_ = LABSTOR_RUNTIME->rpc_.node_id_; HILOG(kDebug, "Creating tag for the first time: {} {}", tag_name.str(), tag_id) - tag_id_map_.emplace(tag_name, tag_id); - tag_map_.emplace(tag_id, TagInfo()); - TagInfo &tag_info = tag_map_[tag_id]; + tag_id_map.emplace(tag_name, tag_id); + tag_map.emplace(tag_id, TagInfo()); + TagInfo &tag_info = tag_map[tag_id]; tag_info.name_ = tag_name; tag_info.tag_id_ = tag_id; tag_info.owner_ = task->blob_owner_; @@ -210,7 +217,7 @@ class Server : public TaskLib { } else { if (tag_name.size()) { HILOG(kDebug, "Found existing tag: {}", tag_name.str()) - tag_id = tag_id_map_[tag_name]; + tag_id = tag_id_map[tag_name]; } else { HILOG(kDebug, "Found existing tag: {}", task->tag_id_) tag_id = task->tag_id_; @@ -224,9 +231,10 @@ class Server : public TaskLib { /** Get tag ID */ void GetTagId(GetTagIdTask *task, RunContext &ctx) { + TAG_ID_MAP_T &tag_id_map = tag_id_map_[ctx.lane_id_]; hshm::charbuf tag_name = hshm::to_charbuf(*task->tag_name_); - auto it = tag_id_map_.find(tag_name); - if (it == tag_id_map_.end()) { + auto it = tag_id_map.find(tag_name); + if (it == tag_id_map.end()) { task->tag_id_ = TagId::GetNull(); task->SetModuleComplete(); return; @@ -237,8 +245,9 @@ class Server : public TaskLib { /** Get tag name */ void GetTagName(GetTagNameTask *task, RunContext &ctx) { - auto it = tag_map_.find(task->tag_id_); - if (it == tag_map_.end()) { + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; + auto it = tag_map.find(task->tag_id_); + if (it == tag_map.end()) { task->SetModuleComplete(); return; } @@ -248,8 +257,9 @@ class Server : public TaskLib { /** Rename tag */ void RenameTag(RenameTagTask *task, RunContext &ctx) { - auto it = tag_map_.find(task->tag_id_); - if (it == tag_map_.end()) { + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; + auto it = tag_map.find(task->tag_id_); + if (it == tag_map.end()) { task->SetModuleComplete(); return; } @@ -261,8 +271,10 @@ class Server : public TaskLib { void DestroyTag(DestroyTagTask *task, RunContext &ctx) { switch (task->phase_) { case DestroyTagPhase::kDestroyBlobs: { - TagInfo &tag = tag_map_[task->tag_id_]; - tag_id_map_.erase(tag.name_); + TAG_ID_MAP_T &tag_id_map = tag_id_map_[ctx.lane_id_]; + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; + TagInfo &tag = tag_map[task->tag_id_]; + tag_id_map.erase(tag.name_); HSHM_MAKE_AR0(task->destroy_blob_tasks_, nullptr); std::vector blob_tasks = *task->destroy_blob_tasks_; blob_tasks.reserve(tag.blobs_.size()); @@ -284,8 +296,9 @@ class Server : public TaskLib { LABSTOR_CLIENT->DelTask(blob_task); blob_tasks.pop_back(); } + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; HSHM_DESTROY_AR(task->destroy_blob_tasks_); - tag_map_.erase(task->tag_id_); + tag_map.erase(task->tag_id_); task->SetModuleComplete(); } } @@ -293,8 +306,9 @@ class Server : public TaskLib { /** Add a blob to a tag */ void TagAddBlob(TagAddBlobTask *task, RunContext &ctx) { - auto it = tag_map_.find(task->tag_id_); - if (it == tag_map_.end()) { + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; + auto it = tag_map.find(task->tag_id_); + if (it == tag_map.end()) { task->SetModuleComplete(); return; } @@ -305,8 +319,9 @@ class Server : public TaskLib { /** Remove a blob from a tag */ void TagRemoveBlob(TagRemoveBlobTask *task, RunContext &ctx) { - auto it = tag_map_.find(task->tag_id_); - if (it == tag_map_.end()) { + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; + auto it = tag_map.find(task->tag_id_); + if (it == tag_map.end()) { task->SetModuleComplete(); return; } @@ -318,8 +333,9 @@ class Server : public TaskLib { /** Clear blobs from a tag */ void TagClearBlobs(TagClearBlobsTask *task, RunContext &ctx) { - auto it = tag_map_.find(task->tag_id_); - if (it == tag_map_.end()) { + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; + auto it = tag_map.find(task->tag_id_); + if (it == tag_map.end()) { task->SetModuleComplete(); return; } @@ -331,8 +347,9 @@ class Server : public TaskLib { /** Get size of the bucket */ void GetSize(GetSizeTask *task, RunContext &ctx) { - auto it = tag_map_.find(task->tag_id_); - if (it == tag_map_.end()) { + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; + auto it = tag_map.find(task->tag_id_); + if (it == tag_map.end()) { task->size_ = 0; task->SetModuleComplete(); return; diff --git a/tasks_required/proc_queue/include/proc_queue/proc_queue.h b/tasks_required/proc_queue/include/proc_queue/proc_queue.h index b6aa51aab..527bf3ec1 100644 --- a/tasks_required/proc_queue/include/proc_queue/proc_queue.h +++ b/tasks_required/proc_queue/include/proc_queue/proc_queue.h @@ -59,7 +59,7 @@ class Client : public TaskLibClient { void AsyncPushConstruct(labpq::TypedPushTask *task, const TaskNode &task_node, const DomainId &domain_id, - const hipc::Pointer &subtask) { + const hipc::LPointer &subtask) { LABSTOR_CLIENT->ConstructTask( task, task_node, domain_id, id_, subtask); } @@ -68,7 +68,7 @@ class Client : public TaskLibClient { LPointer> AsyncPush(const TaskNode &task_node, const DomainId &domain_id, - const hipc::Pointer &subtask) { + const hipc::LPointer &subtask) { LPointer> push_task = LABSTOR_CLIENT->AllocateTask>(); AsyncPushConstruct(push_task.ptr_, task_node, domain_id, subtask); diff --git a/tasks_required/proc_queue/include/proc_queue/proc_queue_tasks.h b/tasks_required/proc_queue/include/proc_queue/proc_queue_tasks.h index 002f42436..ee5bcf500 100644 --- a/tasks_required/proc_queue/include/proc_queue/proc_queue_tasks.h +++ b/tasks_required/proc_queue/include/proc_queue/proc_queue_tasks.h @@ -84,9 +84,8 @@ class PushTaskPhase { * */ template struct TypedPushTask : public Task, TaskFlags { - IN hipc::Pointer subtask_; //< SHM pointer to the subtask - TEMP TaskT *subtask_ptr_; //< Pointer to the subtask (client) - TEMP TaskT *ptr_; //< Pointer to the subtask (server) + IN LPointer sub_cli_; /**< Pointer to the subtask (client + SHM) */ + TEMP LPointer sub_run_; /**< Pointer to the subtask (runtime) */ TEMP int phase_ = PushTaskPhase::kSchedule; /** SHM default constructor */ @@ -99,7 +98,7 @@ struct TypedPushTask : public Task, TaskFlags { const TaskNode &task_node, const DomainId &domain_id, const TaskStateId &state_id, - const hipc::Pointer &subtask) : Task(alloc) { + const hipc::LPointer &subtask) : Task(alloc) { // Initialize task hshm::NodeThreadId tid; task_node_ = task_node; @@ -111,8 +110,16 @@ struct TypedPushTask : public Task, TaskFlags { domain_id_ = domain_id; // Custom params - subtask_ = subtask; - subtask_ptr_ = (TaskT*)LABSTOR_CLIENT->GetPrivatePointer(subtask_); + sub_cli_ = subtask; + } + + /** Destructor */ + ~TypedPushTask() { + if (!IsFireAndForget()) { + LABSTOR_CLIENT->DelTask(sub_cli_); + } else { + LABSTOR_CLIENT->DelTask(sub_run_); + } } /** Create group */ @@ -126,7 +133,7 @@ struct TypedPushTask : public Task, TaskFlags { /** Get the task address */ HSHM_ALWAYS_INLINE TaskT* get() { - return subtask_ptr_; + return sub_cli_.ptr_; } }; diff --git a/tasks_required/proc_queue/src/proc_queue.cc b/tasks_required/proc_queue/src/proc_queue.cc index de580f9a7..7a74ce5d8 100644 --- a/tasks_required/proc_queue/src/proc_queue.cc +++ b/tasks_required/proc_queue/src/proc_queue.cc @@ -23,21 +23,24 @@ class Server : public TaskLib { void Push(PushTask *task, RunContext &ctx) { switch (task->phase_) { case PushTaskPhase::kSchedule: { - task->ptr_ = LABSTOR_CLIENT->GetPrivatePointer(task->subtask_); + task->sub_run_.shm_ = task->sub_cli_.shm_; + task->sub_run_.ptr_ = LABSTOR_CLIENT->GetPrivatePointer(task->sub_cli_.shm_); + Task *&ptr = task->sub_run_.ptr_; HILOG(kDebug, "Scheduling task {} on state {} tid {}", - task->ptr_->task_node_, task->ptr_->task_state_, GetLinuxTid()); - if (task->ptr_->IsFireAndForget()) { - task->ptr_->UnsetFireAndForget(); + ptr->task_node_, ptr->task_state_, GetLinuxTid()); + if (ptr->IsFireAndForget()) { + ptr->UnsetFireAndForget(); } - MultiQueue *real_queue = LABSTOR_CLIENT->GetQueue(QueueId(task->ptr_->task_state_)); - real_queue->Emplace(task->ptr_->prio_, task->ptr_->lane_hash_, task->subtask_); + MultiQueue *real_queue = LABSTOR_CLIENT->GetQueue(QueueId(ptr->task_state_)); + real_queue->Emplace(ptr->prio_, ptr->lane_hash_, task->sub_run_.shm_); task->phase_ = PushTaskPhase::kWaitSchedule; } case PushTaskPhase::kWaitSchedule: { - if (!task->ptr_->IsComplete()) { + Task *&ptr = task->sub_run_.ptr_; + if (!ptr->IsComplete()) { return; } - LABSTOR_CLIENT->DelTask(task->ptr_); + // TODO(llogan): handle fire & forget tasks gracefully task->SetModuleComplete(); } } diff --git a/tasks_required/small_message/include/small_message/small_message.h b/tasks_required/small_message/include/small_message/small_message.h index 6953a076e..bb5f8c1a2 100644 --- a/tasks_required/small_message/include/small_message/small_message.h +++ b/tasks_required/small_message/include/small_message/small_message.h @@ -70,7 +70,7 @@ class Client : public TaskLibClient { LPointer> push_task = AsyncMdPushRoot(domain_id); push_task->Wait(); - MdPushTask *task = push_task->subtask_ptr_; + MdPushTask *task = push_task->get(); int ret = task->ret_[0]; LABSTOR_CLIENT->DelTask(push_task); return ret; @@ -85,8 +85,8 @@ class Client : public TaskLibClient { } int IoRoot(const DomainId &domain_id) { LPointer> push_task = AsyncIoRoot(domain_id); - push_task.ptr_->Wait(); - IoTask *task = push_task.ptr_->subtask_ptr_; + push_task->Wait(); + IoTask *task = push_task->get(); int ret = task->ret_; LABSTOR_CLIENT->DelTask(push_task); return ret; From c421806b983ef42a2166fd071005eca4f76b8278 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 22 Sep 2023 21:18:32 -0500 Subject: [PATCH 12/54] Small-scale, single-node concurrency seems to work again --- include/labstor/labstor_types.h | 8 +++++++- tasks/hermes/include/hermes/bucket.h | 1 + tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/labstor/labstor_types.h b/include/labstor/labstor_types.h index 7ecce655b..e4c8a1faf 100644 --- a/include/labstor/labstor_types.h +++ b/include/labstor/labstor_types.h @@ -243,7 +243,7 @@ struct UniqueId { /** Emplace constructor */ HSHM_ALWAYS_INLINE explicit UniqueId(u32 node_id, u64 unique) - : node_id_(node_id), unique_(unique) {} + : node_id_(node_id), hash_(0), unique_(unique) {} /** Emplace constructor (+hash) */ HSHM_ALWAYS_INLINE explicit @@ -254,6 +254,7 @@ struct UniqueId { HSHM_ALWAYS_INLINE UniqueId(const UniqueId &other) { node_id_ = other.node_id_; + hash_ = other.hash_; unique_ = other.unique_; } @@ -262,6 +263,7 @@ struct UniqueId { HSHM_ALWAYS_INLINE UniqueId(const UniqueId &other) { node_id_ = other.node_id_; + hash_ = other.hash_; unique_ = other.unique_; } @@ -270,6 +272,7 @@ struct UniqueId { UniqueId& operator=(const UniqueId &other) { if (this != &other) { node_id_ = other.node_id_; + hash_ = other.hash_; unique_ = other.unique_; } return *this; @@ -279,6 +282,7 @@ struct UniqueId { HSHM_ALWAYS_INLINE UniqueId(UniqueId &&other) noexcept { node_id_ = other.node_id_; + hash_ = other.hash_; unique_ = other.unique_; } @@ -287,6 +291,7 @@ struct UniqueId { UniqueId& operator=(UniqueId &&other) noexcept { if (this != &other) { node_id_ = other.node_id_; + hash_ = other.hash_; unique_ = other.unique_; } return *this; @@ -309,6 +314,7 @@ struct UniqueId { HSHM_ALWAYS_INLINE void SetNull() { node_id_ = 0; + hash_ = 0; unique_ = 0; } diff --git a/tasks/hermes/include/hermes/bucket.h b/tasks/hermes/include/hermes/bucket.h index 241faf477..62879a53b 100644 --- a/tasks/hermes/include/hermes/bucket.h +++ b/tasks/hermes/include/hermes/bucket.h @@ -342,6 +342,7 @@ class Bucket { if (data_size == 0) { data_size = GetBlobSize(blob_id); } + HILOG(kInfo, "Data size: {}", data_size) LPointer data_p = LABSTOR_CLIENT->AllocateBuffer(data_size); data_size = blob_mdm_->GetBlobRoot(id_, blob_id, blob_off, data_size, data_p.shm_, ctx); char *data = data_p.ptr_; diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index b3a284023..ccd5dc763 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -531,6 +531,7 @@ class Server : public TaskLib { BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; auto it = blob_map.find(task->blob_id_); if (it == blob_map.end()) { + task->size_ = 0; task->SetModuleComplete(); return; } From f51aed41addbbb26ed93ed881f77303317420d5d Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 22 Sep 2023 22:14:59 -0500 Subject: [PATCH 13/54] Remove mem_ptr --- tasks/posix_bdev/src/posix_bdev.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/tasks/posix_bdev/src/posix_bdev.cc b/tasks/posix_bdev/src/posix_bdev.cc index 11fc832fe..71cce0b2e 100644 --- a/tasks/posix_bdev/src/posix_bdev.cc +++ b/tasks/posix_bdev/src/posix_bdev.cc @@ -17,7 +17,6 @@ namespace hermes::posix_bdev { class Server : public TaskLib { public: SlabAllocator alloc_; - char *mem_ptr_; int fd_; std::string path_; @@ -41,7 +40,6 @@ class Server : public TaskLib { } void Destruct(DestructTask *task, RunContext &ctx) { - free(mem_ptr_); task->SetModuleComplete(); } @@ -66,7 +64,6 @@ class Server : public TaskLib { } void Read(ReadTask *task, RunContext &ctx) { - memcpy(task->buf_, mem_ptr_ + task->disk_off_, task->size_); ssize_t count = pread(fd_, task->buf_, task->size_, (off_t)task->disk_off_); if (count != task->size_) { HELOG(kError, "BORG: read {} bytes, but expected {}", From 93049311ef680605324df5d35799956ff21c641d Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 22 Sep 2023 22:27:00 -0500 Subject: [PATCH 14/54] Use delete instead of free --- .../remote_queue/include/remote_queue/remote_queue_tasks.h | 1 - tasks_required/remote_queue/src/remote_queue.cc | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h index a3c120771..9aef218a4 100644 --- a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h +++ b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h @@ -94,7 +94,6 @@ struct PushTask : public Task, TaskFlags { IN TaskState *exec_; IN u32 exec_method_; IN std::vector xfer_; - // TEMP std::vector tl_future_; TEMP std::vector tl_future_; TEMP int phase_ = PushPhase::kStart; TEMP int replica_; diff --git a/tasks_required/remote_queue/src/remote_queue.cc b/tasks_required/remote_queue/src/remote_queue.cc index 61f4001aa..5dd865054 100644 --- a/tasks_required/remote_queue/src/remote_queue.cc +++ b/tasks_required/remote_queue/src/remote_queue.cc @@ -187,7 +187,7 @@ class Server : public TaskLib { if (!tl_task->IsDone()) { return; } - free(tl_task); + delete tl_task; } HandlePushReplicaEnd(task); } From e869db558380273dcef62f9c309a0eee88371edc Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Fri, 22 Sep 2023 22:35:22 -0500 Subject: [PATCH 15/54] Add finished put log --- benchmark/hermes_api_bench.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmark/hermes_api_bench.cc b/benchmark/hermes_api_bench.cc index 18871c95a..15e7c8856 100644 --- a/benchmark/hermes_api_bench.cc +++ b/benchmark/hermes_api_bench.cc @@ -52,6 +52,7 @@ void PutTest(int nprocs, int rank, } } t.Pause(); + HILOG(kInfo, "Finished PUT") GatherTimes("Put", nprocs * blobs_per_rank * blob_size * repeat, t); } From fc2405f7d2b2fe2892e33bbb7aa95c7925d7c15f Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 09:09:18 -0500 Subject: [PATCH 16/54] Add TestHermesConnect to ipc test --- test/unit/hermes/test_bucket.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index e9d131e96..164031069 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -9,6 +9,15 @@ #include "hermes/bucket.h" #include +TEST_CASE("TestHermesConnect") { + int rank, nprocs; + MPI_Barrier(MPI_COMM_WORLD); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + HERMES->ClientInit(); + MPI_Barrier(MPI_COMM_WORLD); +} + TEST_CASE("TestHermesPut1n") { int rank, nprocs; MPI_Barrier(MPI_COMM_WORLD); From 58072a95ba3369a719bbe24b10a0cd5fd7f9ecda Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 09:18:11 -0500 Subject: [PATCH 17/54] Try making it so only one RPC group at a time per-node --- .../remote_queue/include/remote_queue/remote_queue.h | 3 ++- .../remote_queue/include/remote_queue/remote_queue_tasks.h | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tasks_required/remote_queue/include/remote_queue/remote_queue.h b/tasks_required/remote_queue/include/remote_queue/remote_queue.h index e733c2b42..f0cf88119 100644 --- a/tasks_required/remote_queue/include/remote_queue/remote_queue.h +++ b/tasks_required/remote_queue/include/remote_queue/remote_queue.h @@ -35,7 +35,8 @@ class Client : public TaskLibClient { std::vector queue_info = { {1, 1, qm.queue_depth_, 0}, {1, 1, qm.queue_depth_, QUEUE_LONG_RUNNING}, - {qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} + // {qm.max_lanes_, qm.max_lanes_, qm.queue_depth_, QUEUE_LOW_LATENCY} + {1, 1, qm.queue_depth_, QUEUE_LOW_LATENCY} }; return LABSTOR_ADMIN->AsyncCreateTaskState( task_node, domain_id, state_name, id_, queue_info); diff --git a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h index 9aef218a4..63c904b0c 100644 --- a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h +++ b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h @@ -135,7 +135,10 @@ struct PushTask : public Task, TaskFlags { /** Create group */ HSHM_ALWAYS_INLINE u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; + LocalSerialize srl(group); + srl << 0; + // return TASK_UNORDERED; + return 0; } }; From a4dd782f1039154febf88be0349c73d0b09b8022 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 09:24:37 -0500 Subject: [PATCH 18/54] Use a group other than 0? --- .../remote_queue/include/remote_queue/remote_queue_tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h index 63c904b0c..a589a3652 100644 --- a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h +++ b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h @@ -136,7 +136,7 @@ struct PushTask : public Task, TaskFlags { HSHM_ALWAYS_INLINE u32 GetGroup(hshm::charbuf &group) { LocalSerialize srl(group); - srl << 0; + srl << 16; // return TASK_UNORDERED; return 0; } From ff3695364c4141997cdca8a574873b37fb157fe0 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 10:28:17 -0500 Subject: [PATCH 19/54] Add more debug logging to remote_queue --- tasks_required/remote_queue/src/remote_queue.cc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tasks_required/remote_queue/src/remote_queue.cc b/tasks_required/remote_queue/src/remote_queue.cc index 5dd865054..b188d4f39 100644 --- a/tasks_required/remote_queue/src/remote_queue.cc +++ b/tasks_required/remote_queue/src/remote_queue.cc @@ -130,6 +130,13 @@ class Server : public TaskLib { task->exec_->id_, task->exec_method_, task->params_); + HILOG(kDebug, "(SM) Finished {} bytes of data (task_node={}, task_state={}, method={}, from={}, to={})", + task->params_.size(), + task->orig_task_->task_node_, + task->orig_task_->task_state_, + task->orig_task_->method_, + LABSTOR_CLIENT->node_id_, + domain_id.id_); HandlePushReplicaOutput(replica, ret, task); tl_task->done_ = true; }, tl_task); @@ -173,6 +180,14 @@ class Server : public TaskLib { task->params_, tl_task->data_size_, io_type); + HILOG(kDebug, "(IO) Finished transferring {} bytes of data (task_node={}, task_state={}, method={}, from={}, to={}, type={})", + tl_task->data_size_, + task->orig_task_->task_node_, + task->orig_task_->task_state_, + task->orig_task_->method_, + LABSTOR_CLIENT->node_id_, + domain_id.id_, + static_cast(io_type)); HandlePushReplicaOutput(replica, ret, task); tl_task->done_ = true; }, tl_task); From 6dc06fcb6eb817699952217fe51762823ff80fb0 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 13:37:22 -0500 Subject: [PATCH 20/54] Unset long running --- include/labstor/task_registry/task.h | 5 +++++ tasks_required/labstor_admin/src/labstor_admin.cc | 7 ------- tasks_required/remote_queue/src/remote_queue.cc | 3 ++- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/labstor/task_registry/task.h b/include/labstor/task_registry/task.h index 94c4932ff..4d300be94 100644 --- a/include/labstor/task_registry/task.h +++ b/include/labstor/task_registry/task.h @@ -335,6 +335,11 @@ struct Task : public hipc::ShmContainer { return task_flags_.Any(TASK_MARKED); } + /** Set this task as started */ + HSHM_ALWAYS_INLINE void UnsetLongRunning() { + task_flags_.UnsetBits(TASK_LONG_RUNNING); + } + /** Wait for task to complete */ template void Wait() { diff --git a/tasks_required/labstor_admin/src/labstor_admin.cc b/tasks_required/labstor_admin/src/labstor_admin.cc index 53bc527c4..eb74f4acd 100644 --- a/tasks_required/labstor_admin/src/labstor_admin.cc +++ b/tasks_required/labstor_admin/src/labstor_admin.cc @@ -48,17 +48,10 @@ class Server : public TaskLib { } // Check global registry for task state if (task->id_.IsNull()) { -// if (task->domain_id_ == DomainId::GetLocal()) { -// HILOG(kDebug, "Domain ID is local for {} (task_node={})", state_name, task->task_node_); -// task->id_ = LABSTOR_TASK_REGISTRY->GetOrCreateTaskStateId(state_name); -// task->phase_ = CreateTaskStatePhase::kStateCreate; -// } else { - HILOG(kDebug, "Domain ID is global for {} (task_node={})", state_name, task->task_node_); DomainId domain = DomainId::GetNode(1); task->get_id_task_ = LABSTOR_ADMIN->AsyncGetOrCreateTaskStateId( task->task_node_ + 1, domain, state_name).ptr_; task->phase_ = CreateTaskStatePhase::kIdAllocWait; -// } } else { HILOG(kDebug, "Domain ID is given as {} for {} (task_node={})", task->id_, state_name, task->task_node_); task->phase_ = CreateTaskStatePhase::kStateCreate; diff --git a/tasks_required/remote_queue/src/remote_queue.cc b/tasks_required/remote_queue/src/remote_queue.cc index b188d4f39..bbc59f793 100644 --- a/tasks_required/remote_queue/src/remote_queue.cc +++ b/tasks_required/remote_queue/src/remote_queue.cc @@ -323,6 +323,7 @@ class Server : public TaskLib { orig_task->UnsetStarted(); orig_task->UnsetMarked(); orig_task->UnsetDataOwner(); + orig_task->UnsetLongRunning(); queue->Emplace(orig_task->prio_, orig_task->lane_hash_, p); HILOG(kDebug, "(node {}) Executing task (task_node={}, task_state={}/{}, state_name={}, method={}, size={}, lane_hash={})", @@ -342,7 +343,6 @@ class Server : public TaskLib { TaskState *exec, TaskStateId state_id) { BinaryOutputArchive ar(DomainId::GetNode(LABSTOR_CLIENT->node_id_)); std::vector out_xfer = exec->SaveEnd(method, ar, orig_task); - LABSTOR_CLIENT->DelTask(orig_task); HILOG(kDebug, "(node {}) Returning {} bytes of data (task_node={}, task_state={}/{}, method={})", LABSTOR_CLIENT->node_id_, out_xfer[0].data_size_, @@ -350,6 +350,7 @@ class Server : public TaskLib { orig_task->task_state_, state_id, method); + LABSTOR_CLIENT->DelTask(orig_task); req.respond(std::string((char *) out_xfer[0].data_, out_xfer[0].data_size_)); } From b42b0b2e53103616bc56e17b7bf7e3ac3355e947 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 15:33:34 -0500 Subject: [PATCH 21/54] Made ConstructTask blocking --- include/labstor/work_orchestrator/worker.h | 2 +- tasks/bdev/include/bdev/bdev_tasks.h | 6 ------ .../include/hermes_adapters/hermes_adapters_tasks.h | 6 ------ .../include/hermes_blob_mdm/hermes_blob_mdm_tasks.h | 6 ------ .../include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h | 6 ------ tasks/hermes_mdm/include/hermes_mdm/hermes_mdm_tasks.h | 6 ------ .../TASK_NAME/include/TASK_NAME/TASK_NAME_tasks.h | 6 ------ .../include/labstor_admin/labstor_admin_tasks.h | 4 +++- .../proc_queue/include/proc_queue/proc_queue_tasks.h | 6 ------ .../remote_queue/include/remote_queue/remote_queue_tasks.h | 6 ------ .../worch_proc_round_robin/worch_proc_round_robin_tasks.h | 6 ------ .../worch_queue_round_robin/worch_queue_round_robin_tasks.h | 6 ------ 12 files changed, 4 insertions(+), 62 deletions(-) diff --git a/include/labstor/work_orchestrator/worker.h b/include/labstor/work_orchestrator/worker.h index 1894df6ad..16907ecc7 100644 --- a/include/labstor/work_orchestrator/worker.h +++ b/include/labstor/work_orchestrator/worker.h @@ -314,7 +314,7 @@ class Worker { return; } int ret = exec->GetGroup(task->method_, task, group_); - if (ret == TASK_UNORDERED || task->IsUnordered() || task->method_ < 2) { + if (ret == TASK_UNORDERED || task->IsUnordered()) { HILOG(kDebug, "(node {}) Decreasing depth of group remains 0 (task_node={} worker={})", LABSTOR_CLIENT->node_id_, task->task_node_, id_); return; diff --git a/tasks/bdev/include/bdev/bdev_tasks.h b/tasks/bdev/include/bdev/bdev_tasks.h index 6a502882d..c7f7be735 100644 --- a/tasks/bdev/include/bdev/bdev_tasks.h +++ b/tasks/bdev/include/bdev/bdev_tasks.h @@ -44,12 +44,6 @@ struct ConstructTask : public CreateTaskStateTask { // Custom params info_ = info; } - - /** Create group */ - HSHM_ALWAYS_INLINE - u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; - } }; /** A task to destroy hermes_mdm */ diff --git a/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_tasks.h b/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_tasks.h index 6df2570a4..cfc03fe9a 100644 --- a/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_tasks.h +++ b/tasks/hermes_adapters/include/hermes_adapters/hermes_adapters_tasks.h @@ -45,12 +45,6 @@ struct ConstructTask : public CreateTaskStateTask { ~ConstructTask() { // Custom params } - - /** Create group */ - HSHM_ALWAYS_INLINE - u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; - } }; /** A task to destroy hermes_adapters */ diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 015cbcc2b..0fa2e80c1 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -51,12 +51,6 @@ struct ConstructTask : public CreateTaskStateTask { : CreateTaskStateTask(alloc, task_node, domain_id, state_name, "hermes_blob_mdm", id, queue_info) { } - - /** Create group */ - HSHM_ALWAYS_INLINE - u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; - } }; /** A task to destroy hermes_mdm */ diff --git a/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h b/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h index 2ec787bad..2dca04d1b 100644 --- a/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h +++ b/tasks/hermes_bucket_mdm/include/hermes_bucket_mdm/hermes_bucket_mdm_tasks.h @@ -41,12 +41,6 @@ struct ConstructTask : public CreateTaskStateTask { : CreateTaskStateTask(alloc, task_node, domain_id, state_name, "hermes_bucket_mdm", id, queue_info) { } - - /** Create group */ - HSHM_ALWAYS_INLINE - u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; - } }; /** A task to destroy hermes_bucket_mdm */ diff --git a/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm_tasks.h b/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm_tasks.h index 143f0b055..3a78e6de6 100644 --- a/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm_tasks.h +++ b/tasks/hermes_mdm/include/hermes_mdm/hermes_mdm_tasks.h @@ -60,12 +60,6 @@ struct ConstructTask : public CreateTaskStateTask { /** (De)serialize message return */ template void SerializeEnd(u32 replica, Ar &ar) {} - - /** Create group */ - HSHM_ALWAYS_INLINE - u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; - } }; /** A task to destroy hermes_mdm */ diff --git a/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME_tasks.h b/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME_tasks.h index 52b4c6bce..0c464e258 100644 --- a/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME_tasks.h +++ b/tasks_required/TASK_NAME/include/TASK_NAME/TASK_NAME_tasks.h @@ -45,12 +45,6 @@ struct ConstructTask : public CreateTaskStateTask { ~ConstructTask() { // Custom params } - - /** Create group */ - HSHM_ALWAYS_INLINE - u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; - } }; /** A task to destroy TASK_NAME */ diff --git a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h index d236af213..5d524e16f 100644 --- a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h +++ b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h @@ -210,7 +210,9 @@ struct CreateTaskStateTask : public Task, TaskFlags { /** Create group */ HSHM_ALWAYS_INLINE u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; + LocalSerialize srl(group); + srl << 16; + return 0; } }; diff --git a/tasks_required/proc_queue/include/proc_queue/proc_queue_tasks.h b/tasks_required/proc_queue/include/proc_queue/proc_queue_tasks.h index ee5bcf500..9372d1307 100644 --- a/tasks_required/proc_queue/include/proc_queue/proc_queue_tasks.h +++ b/tasks_required/proc_queue/include/proc_queue/proc_queue_tasks.h @@ -42,12 +42,6 @@ struct ConstructTask : public CreateTaskStateTask { ~ConstructTask() { // Custom params } - - /** Create group */ - HSHM_ALWAYS_INLINE - u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; - } }; /** A task to destroy proc_queue */ diff --git a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h index a589a3652..5007ed42e 100644 --- a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h +++ b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h @@ -45,12 +45,6 @@ struct ConstructTask : public CreateTaskStateTask { "remote_queue", id, queue_info) { // Custom params } - - /** Create group */ - HSHM_ALWAYS_INLINE - u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; - } }; /** A task to destroy remote_queue */ diff --git a/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin_tasks.h b/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin_tasks.h index d0926a34b..e9e6a27af 100644 --- a/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin_tasks.h +++ b/tasks_required/worch_proc_round_robin/include/worch_proc_round_robin/worch_proc_round_robin_tasks.h @@ -39,12 +39,6 @@ struct ConstructTask : public CreateTaskStateTask { : CreateTaskStateTask(alloc, task_node, domain_id, state_name, "worch_proc_round_robin", id, queue_info) { } - - /** Create group */ - HSHM_ALWAYS_INLINE - u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; - } }; /** A task to destroy worch_proc_round_robin */ diff --git a/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin_tasks.h b/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin_tasks.h index a5acbbc01..68ebda34b 100644 --- a/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin_tasks.h +++ b/tasks_required/worch_queue_round_robin/include/worch_queue_round_robin/worch_queue_round_robin_tasks.h @@ -43,12 +43,6 @@ struct ConstructTask : public CreateTaskStateTask { /** Destructor */ HSHM_ALWAYS_INLINE ~ConstructTask() {} - - /** Create group */ - HSHM_ALWAYS_INLINE - u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; - } }; /** A task to destroy worch_queue_round_robin */ From a1859ccf1aa088b96b94b1b44aad8e7f7c8ce958 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 16:45:12 -0500 Subject: [PATCH 22/54] Begin supporting blocking tasks --- .../labstor/queue_manager/queues/hshm_queue.h | 1 + include/labstor/task_registry/task.h | 12 ++ include/labstor/work_orchestrator/worker.h | 1 + src/worker.cc | 13 ++ tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 71 +++++----- .../labstor_admin/labstor_admin_tasks.h | 6 +- .../labstor_admin/src/labstor_admin.cc | 127 +++++++----------- 7 files changed, 109 insertions(+), 122 deletions(-) diff --git a/include/labstor/queue_manager/queues/hshm_queue.h b/include/labstor/queue_manager/queues/hshm_queue.h index d9c32ba31..a3ee79200 100644 --- a/include/labstor/queue_manager/queues/hshm_queue.h +++ b/include/labstor/queue_manager/queues/hshm_queue.h @@ -13,6 +13,7 @@ namespace labstor { struct LaneData { hipc::Pointer p_; bool complete_; + ABT_thread thread_; LaneData() = default; diff --git a/include/labstor/task_registry/task.h b/include/labstor/task_registry/task.h index 4d300be94..3ff606607 100644 --- a/include/labstor/task_registry/task.h +++ b/include/labstor/task_registry/task.h @@ -43,6 +43,8 @@ namespace labstor { #define TASK_DATA_OWNER BIT_OPT(u32, 14) /** This task is marked */ #define TASK_MARKED BIT_OPT(u32, 15) +/** This task uses argobot wait */ +#define TASK_BLOCKING BIT_OPT(u32, 16) /** Used to define task methods */ #define TASK_METHOD_T static inline const u32 @@ -340,6 +342,16 @@ struct Task : public hipc::ShmContainer { task_flags_.UnsetBits(TASK_LONG_RUNNING); } + /** Set this task as blocking */ + HSHM_ALWAYS_INLINE void SetBlocking() { + task_flags_.SetBits(TASK_BLOCKING); + } + + /** Set this task as blocking */ + HSHM_ALWAYS_INLINE bool IsBlocking() { + return task_flags_.Any(TASK_BLOCKING); + } + /** Wait for task to complete */ template void Wait() { diff --git a/include/labstor/work_orchestrator/worker.h b/include/labstor/work_orchestrator/worker.h index 16907ecc7..3a75d7bf0 100644 --- a/include/labstor/work_orchestrator/worker.h +++ b/include/labstor/work_orchestrator/worker.h @@ -364,6 +364,7 @@ class Worker { } void PollGrouped(WorkEntry &entry); + static void RunBlocking(void *data); }; } // namespace labstor diff --git a/src/worker.cc b/src/worker.cc index 37c696d27..4de72da55 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -75,6 +75,9 @@ void Worker::PollGrouped(WorkEntry &work_entry) { LABSTOR_REMOTE_QUEUE->Disperse(task, exec, ids); task->DisableRun(); task->SetUnordered(); + } else if (task->IsBlocking()) { + task->SetStarted(); + entry->thread_ = LABSTOR_WORK_ORCHESTRATOR->SpawnAsyncThread(&Worker::RunBlocking, task); } else { task->SetStarted(); exec->Run(task->method_, task, ctx); @@ -85,6 +88,9 @@ void Worker::PollGrouped(WorkEntry &work_entry) { // HILOG(kDebug, "(node {}) Ending task: task_node={} task_state={} lane={} queue={} worker={}", // LABSTOR_CLIENT->node_id_, task->task_node_, task->task_state_, lane_id, queue->id_, id_); entry->complete_ = true; + if (task->IsBlocking()) { + ABT_thread_join(entry->thread_); + } RemoveTaskGroup(task, exec, work_entry.lane_id_, is_remote); EndTask(lane, task, off); } else { @@ -93,4 +99,11 @@ void Worker::PollGrouped(WorkEntry &work_entry) { } } +void Worker::RunBlocking(void *data) { + Task *task = reinterpret_cast(data); + TaskState *exec = LABSTOR_TASK_REGISTRY->GetTaskState(task->task_state_); + RunContext ctx(0); + exec->Run(task->method_, task, ctx); +} + } // namespace labstor \ No newline at end of file diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index ccd5dc763..ba5a2268b 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -53,48 +53,37 @@ class Server : public TaskLib { void Construct(ConstructTask *task, RunContext &ctx) { id_alloc_ = 0; node_id_ = LABSTOR_CLIENT->node_id_; - switch (task->phase_) { - case ConstructTaskPhase::kCreateTaskStates: { - blob_id_map_.resize(LABSTOR_QM_RUNTIME->max_lanes_); - blob_map_.resize(LABSTOR_QM_RUNTIME->max_lanes_); - target_tasks_.reserve(HERMES_SERVER_CONF.devices_.size()); - for (DeviceInfo &dev : HERMES_SERVER_CONF.devices_) { - std::string dev_type; - if (dev.mount_dir_.empty()) { - dev_type = "ram_bdev"; - dev.mount_point_ = hshm::Formatter::format("{}/{}", dev.mount_dir_, dev.dev_name_); - } else { - dev_type = "posix_bdev"; - } - targets_.emplace_back(); - bdev::Client &client = targets_.back(); - bdev::ConstructTask *create_task = client.AsyncCreate( - task->task_node_ + 1, - DomainId::GetLocal(), - "hermes_" + dev.dev_name_, - dev_type, - dev).ptr_; - target_tasks_.emplace_back(create_task); - } - task->phase_ = ConstructTaskPhase::kWaitForTaskStates; - } - - case ConstructTaskPhase::kWaitForTaskStates: { - for (int i = (int)target_tasks_.size() - 1; i >= 0; --i) { - bdev::ConstructTask *tgt_task = target_tasks_[i]; - if (!tgt_task->IsComplete()) { - return; - } - bdev::Client &client = targets_[i]; - client.AsyncCreateComplete(tgt_task); - target_map_.emplace(client.id_, &client); - target_tasks_.pop_back(); - } - blob_mdm_.Init(id_); + // Initialize blob maps + blob_id_map_.resize(LABSTOR_QM_RUNTIME->max_lanes_); + blob_map_.resize(LABSTOR_QM_RUNTIME->max_lanes_); + // Initialize target tasks + target_tasks_.reserve(HERMES_SERVER_CONF.devices_.size()); + for (DeviceInfo &dev : HERMES_SERVER_CONF.devices_) { + std::string dev_type; + if (dev.mount_dir_.empty()) { + dev_type = "ram_bdev"; + dev.mount_point_ = hshm::Formatter::format("{}/{}", dev.mount_dir_, dev.dev_name_); + } else { + dev_type = "posix_bdev"; } - } - - // Create targets + targets_.emplace_back(); + bdev::Client &client = targets_.back(); + bdev::ConstructTask *create_task = client.AsyncCreate( + task->task_node_ + 1, + DomainId::GetLocal(), + "hermes_" + dev.dev_name_, + dev_type, + dev).ptr_; + target_tasks_.emplace_back(create_task); + } + for (int i = 0; i < target_tasks_.size(); ++i) { + bdev::ConstructTask *tgt_task = target_tasks_[i]; + tgt_task->Wait<1>(); + bdev::Client &client = targets_[i]; + client.AsyncCreateComplete(tgt_task); + target_map_.emplace(client.id_, &client); + } + blob_mdm_.Init(id_); HILOG(kInfo, "Created Blob MDM") task->SetModuleComplete(); } diff --git a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h index 5d524e16f..7d9b11f42 100644 --- a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h +++ b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h @@ -147,8 +147,6 @@ struct CreateTaskStateTask : public Task, TaskFlags { IN hipc::ShmArchive state_name_; IN hipc::ShmArchive> queue_info_; INOUT TaskStateId id_; - TEMP int phase_ = 0; - TEMP GetOrCreateTaskStateIdTask *get_id_task_; /** SHM default constructor */ HSHM_ALWAYS_INLINE explicit @@ -169,7 +167,7 @@ struct CreateTaskStateTask : public Task, TaskFlags { prio_ = TaskPrio::kAdmin; task_state_ = LABSTOR_QM_CLIENT->admin_task_state_; method_ = Method::kCreateTaskState; - task_flags_.SetBits(0); + task_flags_.SetBits(TASK_BLOCKING); domain_id_ = domain_id; // Initialize @@ -198,7 +196,7 @@ struct CreateTaskStateTask : public Task, TaskFlags { template void SerializeStart(Ar &ar) { task_serialize(ar); - ar(lib_name_, state_name_, id_, queue_info_, phase_); + ar(lib_name_, state_name_, id_, queue_info_); } /** (De)serialize message return */ diff --git a/tasks_required/labstor_admin/src/labstor_admin.cc b/tasks_required/labstor_admin/src/labstor_admin.cc index eb74f4acd..0b7b12264 100644 --- a/tasks_required/labstor_admin/src/labstor_admin.cc +++ b/tasks_required/labstor_admin/src/labstor_admin.cc @@ -35,84 +35,57 @@ class Server : public TaskLib { } void CreateTaskState(CreateTaskStateTask *task, RunContext &ctx) { - switch (task->phase_) { - case CreateTaskStatePhase::kIdAllocStart: { - std::string lib_name = task->lib_name_->str(); - std::string state_name = task->state_name_->str(); - // Check local registry for task state - TaskState *task_state = LABSTOR_TASK_REGISTRY->GetTaskState(state_name, task->id_); - if (task_state) { - task->id_ = task_state->id_; - task->SetModuleComplete(); - return; - } - // Check global registry for task state - if (task->id_.IsNull()) { - DomainId domain = DomainId::GetNode(1); - task->get_id_task_ = LABSTOR_ADMIN->AsyncGetOrCreateTaskStateId( - task->task_node_ + 1, domain, state_name).ptr_; - task->phase_ = CreateTaskStatePhase::kIdAllocWait; - } else { - HILOG(kDebug, "Domain ID is given as {} for {} (task_node={})", task->id_, state_name, task->task_node_); - task->phase_ = CreateTaskStatePhase::kStateCreate; - } - return; - } - case CreateTaskStatePhase::kIdAllocWait: { - if (!task->get_id_task_->IsComplete()) { - return; - } - task->id_ = task->get_id_task_->id_; - task->phase_ = CreateTaskStatePhase::kStateCreate; - LABSTOR_CLIENT->DelTask(task->get_id_task_); - } - case CreateTaskStatePhase::kStateCreate: { - std::string lib_name = task->lib_name_->str(); - std::string state_name = task->state_name_->str(); - HILOG(kInfo, "(node {}) Creating task state {} with id {} (task_node={})", - LABSTOR_CLIENT->node_id_, state_name, task->id_, task->task_node_); - - // Verify the state isn't NULL - if (task->id_.IsNull()) { - HELOG(kError, "(node {}) The task state {} with id {} is NULL.", - LABSTOR_CLIENT->node_id_, state_name, task->id_); - task->SetModuleComplete(); - return; - } - - // Verify the state doesn't exist - if (LABSTOR_TASK_REGISTRY->TaskStateExists(task->id_)) { - HILOG(kInfo, "(node {}) The task state {} with id {} exists", - LABSTOR_CLIENT->node_id_, state_name, task->id_); - task->SetModuleComplete(); - return; - } - - // The state is being created - // NOTE(llogan): this does NOT return since task creations can have phases - task->method_ = Method::kConstruct; - - // Create the task queue for the state - QueueId qid(task->id_); - LABSTOR_QM_RUNTIME->CreateQueue( - qid, task->queue_info_->vec()); - - // Begin creating the task state - task->phase_ = 0; - task->task_state_ = task->id_; - bool ret = LABSTOR_TASK_REGISTRY->CreateTaskState( - lib_name.c_str(), - state_name.c_str(), - task->id_, - task); - if (!ret) { - task->SetModuleComplete(); - return; - } - HILOG(kInfo, "(node {}) Allocated task state {} with id {}", - LABSTOR_CLIENT->node_id_, state_name, task->task_state_); - } + std::string lib_name = task->lib_name_->str(); + std::string state_name = task->state_name_->str(); + // Check local registry for task state + TaskState *task_state = LABSTOR_TASK_REGISTRY->GetTaskState(state_name, task->id_); + if (task_state) { + task->id_ = task_state->id_; + task->SetModuleComplete(); + return; + } + // Check global registry for task state + if (task->id_.IsNull()) { + DomainId domain = DomainId::GetNode(1); + LPointer get_id = + LABSTOR_ADMIN->AsyncGetOrCreateTaskStateId(task->task_node_ + 1, domain, state_name); + get_id->Wait<1>(); + task->id_ = get_id->id_; + LABSTOR_CLIENT->DelTask(get_id); + } + // Create the task state + HILOG(kInfo, "(node {}) Creating task state {} with id {} (task_node={})", + LABSTOR_CLIENT->node_id_, state_name, task->id_, task->task_node_); + if (task->id_.IsNull()) { + HELOG(kError, "(node {}) The task state {} with id {} is NULL.", + LABSTOR_CLIENT->node_id_, state_name, task->id_); + task->SetModuleComplete(); + return; + } + // Verify the state doesn't exist + if (LABSTOR_TASK_REGISTRY->TaskStateExists(task->id_)) { + HILOG(kInfo, "(node {}) The task state {} with id {} exists", + LABSTOR_CLIENT->node_id_, state_name, task->id_); + task->SetModuleComplete(); + return; + } + // Create the task queue for the state + QueueId qid(task->id_); + LABSTOR_QM_RUNTIME->CreateQueue( + qid, task->queue_info_->vec()); + // Run the task state's submethod + task->method_ = Method::kConstruct; + bool ret = LABSTOR_TASK_REGISTRY->CreateTaskState( + lib_name.c_str(), + state_name.c_str(), + task->id_, + task); + if (!ret) { + task->SetModuleComplete(); + return; } + HILOG(kInfo, "(node {}) Allocated task state {} with id {}", + LABSTOR_CLIENT->node_id_, state_name, task->task_state_); } void GetTaskStateId(GetTaskStateIdTask *task, RunContext &ctx) { From 4a8f3ebef8c3221010828cfe529e4b755899a493 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 16:57:30 -0500 Subject: [PATCH 23/54] Make remote queue push a blocking task --- .../include/remote_queue/remote_queue_tasks.h | 2 +- .../remote_queue/src/remote_queue.cc | 194 ++++++------------ 2 files changed, 62 insertions(+), 134 deletions(-) diff --git a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h index 5007ed42e..53b30677a 100644 --- a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h +++ b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h @@ -114,7 +114,7 @@ struct PushTask : public Task, TaskFlags { prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kPush; - task_flags_.SetBits(TASK_LOW_LATENCY); + task_flags_.SetBits(TASK_LOW_LATENCY | TASK_BLOCKING); domain_id_ = domain_id; // Custom params diff --git a/tasks_required/remote_queue/src/remote_queue.cc b/tasks_required/remote_queue/src/remote_queue.cc index bbc59f793..2ca1b70f5 100644 --- a/tasks_required/remote_queue/src/remote_queue.cc +++ b/tasks_required/remote_queue/src/remote_queue.cc @@ -15,39 +15,6 @@ SERIALIZE_ENUM(labstor::IoType); namespace labstor::remote_queue { -/** Parameters for spawning async thread for thallium */ -struct ThalliumTask { - int replica_; - PushTask *task_; - DomainId domain_id_; - IoType io_type_; - char *data_; - size_t data_size_; - ABT_thread thread_; - bool done_; - - /** Default constructor */ - ThalliumTask() : done_(false) {}; - - /** Emplace constructor Small */ - ThalliumTask(int replica, PushTask *task, DomainId domain_id) : - replica_(replica), task_(task), domain_id_(domain_id), done_(false) {} - - /** Emplace constructor I/O */ - ThalliumTask(int replica, PushTask *task, DomainId domain_id, - IoType io_type, void *data, size_t data_size) : - replica_(replica), task_(task), domain_id_(domain_id), - io_type_(io_type), data_((char*)data), data_size_(data_size), done_(false) {} - - /** Check if the thread is finished */ - bool IsDone() { - if (done_) { - ABT_thread_join(thread_); - } - return done_; - } -}; - class Server : public TaskLib { public: labstor::remote_queue::Client client_; @@ -112,35 +79,26 @@ class Server : public TaskLib { task->params_ = std::string((char *) xfer[0].data_, xfer[0].data_size_); for (int replica = 0; replica < task->domain_ids_.size(); ++replica) { DomainId domain_id = task->domain_ids_[replica]; - ThalliumTask *tl_task = new ThalliumTask(replica, task, domain_id); - tl_task->thread_ = LABSTOR_WORK_ORCHESTRATOR->SpawnAsyncThread([](void *data) { - ThalliumTask *tl_task = (ThalliumTask *) data; - DomainId &domain_id = tl_task->domain_id_; - PushTask *task = tl_task->task_; - int replica = tl_task->replica_; - HILOG(kDebug, "(SM) Transferring {} bytes of data (task_node={}, task_state={}, method={}, from={}, to={})", - task->params_.size(), - task->orig_task_->task_node_, - task->orig_task_->task_state_, - task->orig_task_->method_, - LABSTOR_CLIENT->node_id_, - domain_id.id_); - std::string ret = LABSTOR_THALLIUM->SyncCall(domain_id.id_, - "RpcPushSmall", - task->exec_->id_, - task->exec_method_, - task->params_); - HILOG(kDebug, "(SM) Finished {} bytes of data (task_node={}, task_state={}, method={}, from={}, to={})", - task->params_.size(), - task->orig_task_->task_node_, - task->orig_task_->task_state_, - task->orig_task_->method_, - LABSTOR_CLIENT->node_id_, - domain_id.id_); - HandlePushReplicaOutput(replica, ret, task); - tl_task->done_ = true; - }, tl_task); - task->tl_future_.emplace_back(tl_task); + HILOG(kDebug, "(SM) Transferring {} bytes of data (task_node={}, task_state={}, method={}, from={}, to={})", + task->params_.size(), + task->orig_task_->task_node_, + task->orig_task_->task_state_, + task->orig_task_->method_, + LABSTOR_CLIENT->node_id_, + domain_id.id_); + std::string ret = LABSTOR_THALLIUM->SyncCall(domain_id.id_, + "RpcPushSmall", + task->exec_->id_, + task->exec_method_, + task->params_); + HILOG(kDebug, "(SM) Finished {} bytes of data (task_node={}, task_state={}, method={}, from={}, to={})", + task->params_.size(), + task->orig_task_->task_node_, + task->orig_task_->task_state_, + task->orig_task_->method_, + LABSTOR_CLIENT->node_id_, + domain_id.id_); + HandlePushReplicaOutput(replica, ret, task); } } @@ -153,85 +111,55 @@ class Server : public TaskLib { } for (int replica = 0; replica < task->domain_ids_.size(); ++replica) { DomainId domain_id = task->domain_ids_[replica]; - ThalliumTask *tl_task = new ThalliumTask( - replica, task, domain_id, io_type, - xfer[0].data_, xfer[0].data_size_); - tl_task->thread_ = LABSTOR_WORK_ORCHESTRATOR->SpawnAsyncThread([](void *data) { - ThalliumTask *tl_task = (ThalliumTask *) data; - DomainId &domain_id = tl_task->domain_id_; - PushTask *task = tl_task->task_; - int replica = tl_task->replica_; - IoType &io_type = tl_task->io_type_; - HILOG(kDebug, "(IO) Transferring {} bytes of data (task_node={}, task_state={}, method={}, from={}, to={}, type={})", - tl_task->data_size_, - task->orig_task_->task_node_, - task->orig_task_->task_state_, - task->orig_task_->method_, - LABSTOR_CLIENT->node_id_, - domain_id.id_, - static_cast(io_type)); - std::string ret = LABSTOR_THALLIUM->SyncIoCall(domain_id.id_, - "RpcPushBulk", - io_type, - tl_task->data_, - tl_task->data_size_, - task->exec_->id_, - task->exec_method_, - task->params_, - tl_task->data_size_, - io_type); - HILOG(kDebug, "(IO) Finished transferring {} bytes of data (task_node={}, task_state={}, method={}, from={}, to={}, type={})", - tl_task->data_size_, - task->orig_task_->task_node_, - task->orig_task_->task_state_, - task->orig_task_->method_, - LABSTOR_CLIENT->node_id_, - domain_id.id_, - static_cast(io_type)); - HandlePushReplicaOutput(replica, ret, task); - tl_task->done_ = true; - }, tl_task); - task->tl_future_.emplace_back(tl_task); - } - } - - /** Wait for client to finish message */ - void ClientWaitForMessage(PushTask *task) { - for (; task->replica_ < task->tl_future_.size(); ++task->replica_) { - ThalliumTask *tl_task = (ThalliumTask *) task->tl_future_[task->replica_]; - if (!tl_task->IsDone()) { - return; - } - delete tl_task; + char *data = (char*)xfer[0].data_; + size_t data_size = xfer[0].data_size_; + HILOG(kDebug, "(IO) Transferring {} bytes of data (task_node={}, task_state={}, method={}, from={}, to={}, type={})", + data_size, + task->orig_task_->task_node_, + task->orig_task_->task_state_, + task->orig_task_->method_, + LABSTOR_CLIENT->node_id_, + domain_id.id_, + static_cast(io_type)); + std::string ret = LABSTOR_THALLIUM->SyncIoCall(domain_id.id_, + "RpcPushBulk", + io_type, + data, + data_size, + task->exec_->id_, + task->exec_method_, + task->params_, + data_size, + io_type); + HILOG(kDebug, "(IO) Finished transferring {} bytes of data (task_node={}, task_state={}, method={}, from={}, to={}, type={})", + data_size, + task->orig_task_->task_node_, + task->orig_task_->task_state_, + task->orig_task_->method_, + LABSTOR_CLIENT->node_id_, + domain_id.id_, + static_cast(io_type)); + HandlePushReplicaOutput(replica, ret, task); } - HandlePushReplicaEnd(task); } /** Push operation called on client */ void Push(PushTask *task, RunContext &ctx) { - switch (task->phase_) { - case PushPhase::kStart: { - std::vector &xfer = task->xfer_; - task->tl_future_.reserve(task->domain_ids_.size()); - switch (task->xfer_.size()) { - case 1: { - ClientSmallPush(xfer, task); - break; - } - case 2: { - ClientIoPush(xfer, task); - break; - } - default: { - HELOG(kFatal, "The task {}/{} does not support remote calls", task->task_state_, task->method_); - } - } - task->phase_ = PushPhase::kWait; + std::vector &xfer = task->xfer_; + switch (task->xfer_.size()) { + case 1: { + ClientSmallPush(xfer, task); + break; } - case PushPhase::kWait: { - ClientWaitForMessage(task); + case 2: { + ClientIoPush(xfer, task); + break; + } + default: { + HELOG(kFatal, "The task {}/{} does not support remote calls", task->task_state_, task->method_); } } + HandlePushReplicaEnd(task); } private: From 4717b8874f11db533dba76419f0c95d862679945 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 17:10:25 -0500 Subject: [PATCH 24/54] don't constantly spawn async threads --- src/worker.cc | 2 +- .../remote_queue/include/remote_queue/remote_queue_tasks.h | 4 ---- tasks_required/remote_queue/src/remote_queue.cc | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/worker.cc b/src/worker.cc index 4de72da55..54dce53a4 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -75,7 +75,7 @@ void Worker::PollGrouped(WorkEntry &work_entry) { LABSTOR_REMOTE_QUEUE->Disperse(task, exec, ids); task->DisableRun(); task->SetUnordered(); - } else if (task->IsBlocking()) { + } else if (task->IsBlocking() && !task->IsStarted()) { task->SetStarted(); entry->thread_ = LABSTOR_WORK_ORCHESTRATOR->SpawnAsyncThread(&Worker::RunBlocking, task); } else { diff --git a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h index 53b30677a..781b3e285 100644 --- a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h +++ b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h @@ -88,9 +88,6 @@ struct PushTask : public Task, TaskFlags { IN TaskState *exec_; IN u32 exec_method_; IN std::vector xfer_; - TEMP std::vector tl_future_; - TEMP int phase_ = PushPhase::kStart; - TEMP int replica_; TEMP std::string params_; /** SHM default constructor */ @@ -123,7 +120,6 @@ struct PushTask : public Task, TaskFlags { exec_ = exec; exec_method_ = exec_method; xfer_ = std::move(xfer); - replica_ = 0; } /** Create group */ diff --git a/tasks_required/remote_queue/src/remote_queue.cc b/tasks_required/remote_queue/src/remote_queue.cc index 2ca1b70f5..096bcfe6a 100644 --- a/tasks_required/remote_queue/src/remote_queue.cc +++ b/tasks_required/remote_queue/src/remote_queue.cc @@ -146,7 +146,7 @@ class Server : public TaskLib { /** Push operation called on client */ void Push(PushTask *task, RunContext &ctx) { std::vector &xfer = task->xfer_; - switch (task->xfer_.size()) { + switch (xfer.size()) { case 1: { ClientSmallPush(xfer, task); break; From f67d2302c4185f1f40617358075dd627c0effcba Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 17:11:01 -0500 Subject: [PATCH 25/54] don't constantly spawn async threads --- src/worker.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/worker.cc b/src/worker.cc index 54dce53a4..1813d5a9e 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -75,7 +75,8 @@ void Worker::PollGrouped(WorkEntry &work_entry) { LABSTOR_REMOTE_QUEUE->Disperse(task, exec, ids); task->DisableRun(); task->SetUnordered(); - } else if (task->IsBlocking() && !task->IsStarted()) { + } else if (task->IsBlocking()) { + task->DisableRun(); task->SetStarted(); entry->thread_ = LABSTOR_WORK_ORCHESTRATOR->SpawnAsyncThread(&Worker::RunBlocking, task); } else { From c8038963e85e6e46c7e4773b843bfef48c8d03c5 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 17:15:27 -0500 Subject: [PATCH 26/54] Make CreateTaskState unordered again --- .../labstor_admin/include/labstor_admin/labstor_admin_tasks.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h index 7d9b11f42..61a50718a 100644 --- a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h +++ b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h @@ -208,9 +208,7 @@ struct CreateTaskStateTask : public Task, TaskFlags { /** Create group */ HSHM_ALWAYS_INLINE u32 GetGroup(hshm::charbuf &group) { - LocalSerialize srl(group); - srl << 16; - return 0; + return TASK_UNORDERED; } }; From 5e2d8e904fd38f61e9233c92f49869453d141641 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 20:42:34 -0500 Subject: [PATCH 27/54] Add cooperative tasking using boost --- CMakeLists.txt | 11 +- ci/hermes/packages/hermes_shm/package.py | 9 +- .../labstor_admin/labstor_admin_tasks.h | 4 +- test/unit/CMakeLists.txt | 3 +- test/unit/boost/CMakeLists.txt | 43 ++++++++ test/unit/boost/test_boost.cc | 100 ++++++++++++++++++ test/unit/boost/test_init.cc | 22 ++++ test/unit/boost/test_init.h | 19 ++++ 8 files changed, 204 insertions(+), 7 deletions(-) create mode 100644 test/unit/boost/CMakeLists.txt create mode 100644 test/unit/boost/test_boost.cc create mode 100644 test/unit/boost/test_init.cc create mode 100644 test/unit/boost/test_init.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 481df1597..f2b98ba34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,6 +93,14 @@ if(thallium_FOUND) message(STATUS "found thallium at ${thallium_DIR}") endif() +# Boost +find_package(Boost REQUIRED COMPONENTS regex system filesystem fiber REQUIRED) +if (Boost_FOUND) + message(STATUS "found boost at ${Boost_INCLUDE_DIRS}") +endif() +include_directories(${Boost_INCLUDE_DIRS}) +message("Boost: ${Boost_LIBRARIES}") + #------------------------------------------------------------------------------ # Setup CMake Environment #------------------------------------------------------------------------------ @@ -158,7 +166,8 @@ set(Labstor_CLIENT_DEPS labstor_client) set(Labstor_RUNTIME_LIBRARIES ${Labstor_CLIENT_LIBRARIES} - labstor_runtime) + labstor_runtime + ${Boost_LIBRARIES}) set(Labstor_RUNTIME_DEPS labstor_client labstor_runtime) diff --git a/ci/hermes/packages/hermes_shm/package.py b/ci/hermes/packages/hermes_shm/package.py index 9225ceb29..76343ff00 100644 --- a/ci/hermes/packages/hermes_shm/package.py +++ b/ci/hermes/packages/hermes_shm/package.py @@ -5,11 +5,12 @@ class HermesShm(CMakePackage): git = "https://github.com/lukemartinlogan/hermes_shm.git" version('master', branch='master') depends_on('mochi-thallium~cereal@0.10.1') - # depends_on('catch2@3.0.1') - depends_on('catch2') - depends_on('mpi') - depends_on('boost@1.7:') + depends_on('catch2@3.0.1') + # depends_on('mpi') + depends_on('mpich@3.3.2') + depends_on('boost@1.7: +context +fiber') depends_on('cereal') + depends_on('yaml-cpp') depends_on('doxygen@1.9.3') variant('debug', default=False, description='Build shared libraries') diff --git a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h index 61a50718a..7d9b11f42 100644 --- a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h +++ b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h @@ -208,7 +208,9 @@ struct CreateTaskStateTask : public Task, TaskFlags { /** Create group */ HSHM_ALWAYS_INLINE u32 GetGroup(hshm::charbuf &group) { - return TASK_UNORDERED; + LocalSerialize srl(group); + srl << 16; + return 0; } }; diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index b8b63529d..dbfd08a04 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -7,4 +7,5 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${CMAKE_SOURCE_DIR}/tasks/labstor_admin/include) add_subdirectory(ipc) add_subdirectory(hermes) -add_subdirectory(hermes_adapters) \ No newline at end of file +add_subdirectory(hermes_adapters) +add_subdirectory(boost) \ No newline at end of file diff --git a/test/unit/boost/CMakeLists.txt b/test/unit/boost/CMakeLists.txt new file mode 100644 index 000000000..81746e03f --- /dev/null +++ b/test/unit/boost/CMakeLists.txt @@ -0,0 +1,43 @@ +cmake_minimum_required(VERSION 3.10) +project(labstor) + +set(CMAKE_CXX_STANDARD 17) + +#------------------------------------------------------------------------------ +# Build Tests +#------------------------------------------------------------------------------ + +add_executable(test_boost_exec + ${TEST_MAIN}/main.cc + test_init.cc + test_boost.cc +) +add_dependencies(test_boost_exec + ${Labstor_RUNTIME_DEPS} hermes) +target_link_libraries(test_boost_exec + ${Labstor_RUNTIME_LIBRARIES} Catch2::Catch2 MPI::MPI_CXX) + +#------------------------------------------------------------------------------ +# Test Cases +#------------------------------------------------------------------------------ + +add_test(NAME test_boost COMMAND + ${CMAKE_BINARY_DIR}/bin/test_messages "TestBoost") + +#------------------------------------------------------------------------------ +# Install Targets +#------------------------------------------------------------------------------ +install(TARGETS + test_boost_exec + EXPORT + ${LABSTOR_EXPORTED_TARGETS} + LIBRARY DESTINATION ${LABSTOR_INSTALL_LIB_DIR} + ARCHIVE DESTINATION ${LABSTOR_INSTALL_LIB_DIR} + RUNTIME DESTINATION ${LABSTOR_INSTALL_BIN_DIR}) + +#----------------------------------------------------------------------------- +# Coverage +#----------------------------------------------------------------------------- +if(LABSTOR_ENABLE_COVERAGE) + set_coverage_flags(test_boost_exec) +endif() diff --git a/test/unit/boost/test_boost.cc b/test/unit/boost/test_boost.cc new file mode 100644 index 000000000..15ae5161b --- /dev/null +++ b/test/unit/boost/test_boost.cc @@ -0,0 +1,100 @@ +// +// Created by llogan on 7/1/23. +// + +#include "basic_test.h" +#include +#include "hermes_shm/util/timer.h" +#include "hermes_shm/util/logging.h" + +void function() { +} + +TEST_CASE("TestBoostFiber") { + hshm::Timer t; + t.Resume(); + size_t ops = (1 << 20); + + for (size_t i = 0; i < ops; ++i) { + int a; + boost::context::fiber source([&a](boost::context::fiber &&sink) { + function(); + return std::move(sink); + }); + } + + t.Pause(); + HILOG(kInfo, "Latency: {} MOps", ops / t.GetUsec()); +} + +template< std::size_t Max, std::size_t Default, std::size_t Min > +class simple_stack_allocator +{ + public: + static std::size_t maximum_stacksize() + { return Max; } + + static std::size_t default_stacksize() + { return Default; } + + static std::size_t minimum_stacksize() + { return Min; } + + void * allocate( std::size_t size) const + { + BOOST_ASSERT( minimum_stacksize() <= size); + BOOST_ASSERT( maximum_stacksize() >= size); + + void * limit = malloc( size); + if ( ! limit) throw std::bad_alloc(); + + return static_cast< char * >( limit) + size; + } + + void deallocate( void * vp, std::size_t size) const + { + BOOST_ASSERT( vp); + BOOST_ASSERT( minimum_stacksize() <= size); + BOOST_ASSERT( maximum_stacksize() >= size); + + void * limit = static_cast< char * >( vp) - size; + free( limit); + } +}; + +typedef simple_stack_allocator< + 8 * 1024 * 1024, + 64 * 1024, + 32> stack_allocator; + +int value1; +namespace ctx = boost::context::detail; + +void f3( ctx::transfer_t t_) { + ++value1; + ctx::transfer_t t = ctx::jump_fcontext( t_.fctx, 0); + ++value1; + ctx::jump_fcontext( t.fctx, t.data); +} + + +TEST_CASE("TestBoostFcontext") { + value1 = 0; + stack_allocator alloc; + int size = 128; + + hshm::Timer t; + t.Resume(); + size_t ops = (1 << 20); + + void *sp = alloc.allocate(size); + for (size_t i = 0; i < ops; ++i) { + ctx::fcontext_t ctx = ctx::make_fcontext(sp, size, f3); + ctx::transfer_t t = ctx::jump_fcontext(ctx, 0); + ctx::jump_fcontext(t.fctx, 0); + } + alloc.deallocate(sp, size); + + t.Pause(); + HILOG(kInfo, "Latency: {} MOps", ops / t.GetUsec()); +} \ No newline at end of file diff --git a/test/unit/boost/test_init.cc b/test/unit/boost/test_init.cc new file mode 100644 index 000000000..e101758d3 --- /dev/null +++ b/test/unit/boost/test_init.cc @@ -0,0 +1,22 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Distributed under BSD 3-Clause license. * + * Copyright by The HDF Group. * + * Copyright by the Illinois Institute of Technology. * + * All rights reserved. * + * * + * This file is part of Hermes. The full Hermes copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the top directory. If you do not * + * have access to the file, you may request a copy from help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + +#include "labstor/api/labstor_client.h" +#include "basic_test.h" +#include "test_init.h" + +void MainPretest() { +} + +void MainPosttest() { +} diff --git a/test/unit/boost/test_init.h b/test/unit/boost/test_init.h new file mode 100644 index 000000000..a6c71f3ec --- /dev/null +++ b/test/unit/boost/test_init.h @@ -0,0 +1,19 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Distributed under BSD 3-Clause license. * + * Copyright by The HDF Group. * + * Copyright by the Illinois Institute of Technology. * + * All rights reserved. * + * * + * This file is part of Hermes. The full Hermes copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the top directory. If you do not * + * have access to the file, you may request a copy from help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + + +#ifndef LABSTOR_TEST_UNIT_IPC_TEST_INIT_H_ +#define LABSTOR_TEST_UNIT_IPC_TEST_INIT_H_ + +#include "labstor/labstor_types.h" + +#endif // LABSTOR_TEST_UNIT_IPC_TEST_INIT_H_ From 576c9a655d77967287570e3d236854ffd35d5b23 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 23:14:10 -0500 Subject: [PATCH 28/54] Add high-performance co-routines for complex tasks --- include/labstor/labstor_types.h | 4 ++ include/labstor/task_registry/task.h | 62 ++++++++++++++----- include/labstor/work_orchestrator/worker.h | 7 ++- src/worker.cc | 41 +++++++++--- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 +- .../labstor_admin/labstor_admin_tasks.h | 2 +- .../labstor_admin/src/labstor_admin.cc | 2 +- .../include/remote_queue/remote_queue_tasks.h | 2 +- .../remote_queue/src/remote_queue.cc | 2 +- test/unit/boost/test_boost.cc | 38 +++++++++--- 10 files changed, 124 insertions(+), 38 deletions(-) diff --git a/include/labstor/labstor_types.h b/include/labstor/labstor_types.h index e4c8a1faf..9d01597b3 100644 --- a/include/labstor/labstor_types.h +++ b/include/labstor/labstor_types.h @@ -28,6 +28,10 @@ #include "hermes_shm/util/singleton.h" #include "hermes_shm/constants/macros.h" +#include + +namespace bctx = boost::context::detail; + typedef uint8_t u8; /**< 8-bit unsigned integer */ typedef uint16_t u16; /**< 16-bit unsigned integer */ typedef uint32_t u32; /**< 32-bit unsigned integer */ diff --git a/include/labstor/task_registry/task.h b/include/labstor/task_registry/task.h index 3ff606607..2c6b14aa7 100644 --- a/include/labstor/task_registry/task.h +++ b/include/labstor/task_registry/task.h @@ -43,12 +43,19 @@ namespace labstor { #define TASK_DATA_OWNER BIT_OPT(u32, 14) /** This task is marked */ #define TASK_MARKED BIT_OPT(u32, 15) +/** This task uses co-routine wait */ +#define TASK_COROUTINE BIT_OPT(u32, 16) /** This task uses argobot wait */ -#define TASK_BLOCKING BIT_OPT(u32, 16) +#define TASK_PREEMPTIVE BIT_OPT(u32, 17) /** Used to define task methods */ #define TASK_METHOD_T static inline const u32 +/** Used to indicate Yield to use */ +#define TASK_YIELD_STD 0 +#define TASK_YIELD_CO 1 +#define TASK_YIELD_ABT 2 + /** The baseline set of tasks */ struct TaskMethod { TASK_METHOD_T kConstruct = 0; /**< The constructor of the task */ @@ -232,6 +239,9 @@ struct Task : public hipc::ShmContainer { u32 lane_hash_; /**< Determine the lane a task is keyed to */ u32 method_; /**< The method to call in the state */ bitfield32_t task_flags_; /**< Properties of the task */ + bctx::transfer_t jmp_; /**< Current execution state of the task (runtime) */ + size_t stack_size_ = KILOBYTES(256); /**< The size of the stack for the task (runtime) */ + void *stack_ptr_; /**< The pointer to the stack (runtime) */ /**==================================== * Task Helpers @@ -343,29 +353,51 @@ struct Task : public hipc::ShmContainer { } /** Set this task as blocking */ - HSHM_ALWAYS_INLINE void SetBlocking() { - task_flags_.SetBits(TASK_BLOCKING); + HSHM_ALWAYS_INLINE bool IsCoroutine() { + return task_flags_.Any(TASK_COROUTINE); } /** Set this task as blocking */ - HSHM_ALWAYS_INLINE bool IsBlocking() { - return task_flags_.Any(TASK_BLOCKING); + HSHM_ALWAYS_INLINE bool IsPreemptive() { + return task_flags_.Any(TASK_PREEMPTIVE); + } + + /** Yield the task */ + template + HSHM_ALWAYS_INLINE + void Yield() { + if constexpr (THREAD_MODEL == TASK_YIELD_STD) { + HERMES_THREAD_MODEL->Yield(); + } else if constexpr (THREAD_MODEL == TASK_YIELD_CO) { + jmp_ = bctx::jump_fcontext(jmp_.fctx, nullptr); + } else if constexpr (THREAD_MODEL == TASK_YIELD_ABT) { + ABT_thread_yield(); + } } /** Wait for task to complete */ template void Wait() { while (!IsComplete()) { - for (int i = 0; i < 100000; ++i) { - if (IsComplete()) { - return; - } - } - if constexpr(THREAD_MODEL == 0) { - HERMES_THREAD_MODEL->Yield(); - } else { - ABT_thread_yield(); - } +// for (int i = 0; i < 100000; ++i) { +// if (IsComplete()) { +// return; +// } +// } + Yield(); + } + } + + /** Wait for task to complete */ + template + void Wait(Task *yield_task) { + while (!IsComplete()) { +// for (int i = 0; i < 100000; ++i) { +// if (IsComplete()) { +// return; +// } +// } + yield_task->Yield(); } } diff --git a/include/labstor/work_orchestrator/worker.h b/include/labstor/work_orchestrator/worker.h index 3a75d7bf0..1e46009a3 100644 --- a/include/labstor/work_orchestrator/worker.h +++ b/include/labstor/work_orchestrator/worker.h @@ -29,6 +29,10 @@ struct WorkEntry { LaneGroup *group_; MultiQueue *queue_; + TaskState *exec_; + Task *task_; + RunContext ctx_; + /** Default constructor */ HSHM_ALWAYS_INLINE WorkEntry() = default; @@ -364,7 +368,8 @@ class Worker { } void PollGrouped(WorkEntry &entry); - static void RunBlocking(void *data); + static void RunBlocking(bctx::transfer_t t); + static void RunPreemptive(void *data); }; } // namespace labstor diff --git a/src/worker.cc b/src/worker.cc index 1813d5a9e..575916f5b 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -41,12 +41,13 @@ void Worker::Run() { } void Worker::PollGrouped(WorkEntry &work_entry) { - Lane *lane = work_entry.lane_; - Task *task; - LaneData *entry; int off = 0; - RunContext ctx; + Lane *&lane = work_entry.lane_; + Task *&task = work_entry.task_; + TaskState *&exec = work_entry.exec_; + RunContext &ctx = work_entry.ctx_; ctx.lane_id_ = work_entry.lane_id_; + LaneData *entry; for (int i = 0; i < 1024; ++i) { // Get the task message if (lane->peek(entry, off).IsNull()) { @@ -58,7 +59,7 @@ void Worker::PollGrouped(WorkEntry &work_entry) { } task = LABSTOR_CLIENT->GetPrivatePointer(entry->p_); // Get the task state - TaskState *exec = LABSTOR_TASK_REGISTRY->GetTaskState(task->task_state_); + exec = LABSTOR_TASK_REGISTRY->GetTaskState(task->task_state_); if (!exec) { HELOG(kFatal, "(node {}) Could not find the task state: {}", LABSTOR_CLIENT->node_id_, task->task_state_); @@ -75,10 +76,19 @@ void Worker::PollGrouped(WorkEntry &work_entry) { LABSTOR_REMOTE_QUEUE->Disperse(task, exec, ids); task->DisableRun(); task->SetUnordered(); - } else if (task->IsBlocking()) { + } else if (task->IsCoroutine()) { + if (!task->IsStarted()) { + task->stack_ptr_ = malloc(task->stack_size_); + task->jmp_.fctx = bctx::make_fcontext( + (char*)task->stack_ptr_ + task->stack_size_, + task->stack_size_, &RunBlocking); + task->SetStarted(); + } + task->jmp_ = bctx::jump_fcontext(task->jmp_.fctx, task); + HILOG(kInfo, "Jumping into function") + } else if (task->IsPreemptive()) { task->DisableRun(); - task->SetStarted(); - entry->thread_ = LABSTOR_WORK_ORCHESTRATOR->SpawnAsyncThread(&Worker::RunBlocking, task); + entry->thread_ = LABSTOR_WORK_ORCHESTRATOR->SpawnAsyncThread(&Worker::RunPreemptive, task); } else { task->SetStarted(); exec->Run(task->method_, task, ctx); @@ -89,7 +99,9 @@ void Worker::PollGrouped(WorkEntry &work_entry) { // HILOG(kDebug, "(node {}) Ending task: task_node={} task_state={} lane={} queue={} worker={}", // LABSTOR_CLIENT->node_id_, task->task_node_, task->task_state_, lane_id, queue->id_, id_); entry->complete_ = true; - if (task->IsBlocking()) { + if (task->IsCoroutine()) { + free(task->stack_ptr_); + } else if (task->IsPreemptive()) { ABT_thread_join(entry->thread_); } RemoveTaskGroup(task, exec, work_entry.lane_id_, is_remote); @@ -100,7 +112,16 @@ void Worker::PollGrouped(WorkEntry &work_entry) { } } -void Worker::RunBlocking(void *data) { +void Worker::RunBlocking(bctx::transfer_t t) { + Task *task = reinterpret_cast(t.data); + task->jmp_ = t; + TaskState *exec = LABSTOR_TASK_REGISTRY->GetTaskState(task->task_state_); + RunContext ctx(0); + exec->Run(task->method_, task, ctx); + task->Yield(); +} + +void Worker::RunPreemptive(void *data) { Task *task = reinterpret_cast(data); TaskState *exec = LABSTOR_TASK_REGISTRY->GetTaskState(task->task_state_); RunContext ctx(0); diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index ba5a2268b..33cd20c70 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -78,7 +78,7 @@ class Server : public TaskLib { } for (int i = 0; i < target_tasks_.size(); ++i) { bdev::ConstructTask *tgt_task = target_tasks_[i]; - tgt_task->Wait<1>(); + tgt_task->Wait(task); bdev::Client &client = targets_[i]; client.AsyncCreateComplete(tgt_task); target_map_.emplace(client.id_, &client); diff --git a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h index 7d9b11f42..dd28394e9 100644 --- a/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h +++ b/tasks_required/labstor_admin/include/labstor_admin/labstor_admin_tasks.h @@ -167,7 +167,7 @@ struct CreateTaskStateTask : public Task, TaskFlags { prio_ = TaskPrio::kAdmin; task_state_ = LABSTOR_QM_CLIENT->admin_task_state_; method_ = Method::kCreateTaskState; - task_flags_.SetBits(TASK_BLOCKING); + task_flags_.SetBits(TASK_COROUTINE); domain_id_ = domain_id; // Initialize diff --git a/tasks_required/labstor_admin/src/labstor_admin.cc b/tasks_required/labstor_admin/src/labstor_admin.cc index 0b7b12264..d80e4998a 100644 --- a/tasks_required/labstor_admin/src/labstor_admin.cc +++ b/tasks_required/labstor_admin/src/labstor_admin.cc @@ -49,7 +49,7 @@ class Server : public TaskLib { DomainId domain = DomainId::GetNode(1); LPointer get_id = LABSTOR_ADMIN->AsyncGetOrCreateTaskStateId(task->task_node_ + 1, domain, state_name); - get_id->Wait<1>(); + get_id->Wait(task); task->id_ = get_id->id_; LABSTOR_CLIENT->DelTask(get_id); } diff --git a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h index 781b3e285..47e5ff634 100644 --- a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h +++ b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h @@ -111,7 +111,7 @@ struct PushTask : public Task, TaskFlags { prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kPush; - task_flags_.SetBits(TASK_LOW_LATENCY | TASK_BLOCKING); + task_flags_.SetBits(TASK_LOW_LATENCY | TASK_COROUTINE); domain_id_ = domain_id; // Custom params diff --git a/tasks_required/remote_queue/src/remote_queue.cc b/tasks_required/remote_queue/src/remote_queue.cc index 096bcfe6a..b0718a7c7 100644 --- a/tasks_required/remote_queue/src/remote_queue.cc +++ b/tasks_required/remote_queue/src/remote_queue.cc @@ -263,7 +263,7 @@ class Server : public TaskLib { method, data_size, orig_task->lane_hash_); - orig_task->Wait<1>(); + orig_task->Wait(); } void RpcComplete(const tl::request &req, diff --git a/test/unit/boost/test_boost.cc b/test/unit/boost/test_boost.cc index 15ae5161b..78146590c 100644 --- a/test/unit/boost/test_boost.cc +++ b/test/unit/boost/test_boost.cc @@ -6,6 +6,8 @@ #include #include "hermes_shm/util/timer.h" #include "hermes_shm/util/logging.h" +#include + void function() { } @@ -68,13 +70,13 @@ typedef simple_stack_allocator< 32> stack_allocator; int value1; -namespace ctx = boost::context::detail; +namespace bctx = boost::context::detail; -void f3( ctx::transfer_t t_) { +void f3( bctx::transfer_t t_) { ++value1; - ctx::transfer_t t = ctx::jump_fcontext( t_.fctx, 0); + bctx::transfer_t t = bctx::jump_fcontext( t_.fctx, 0); ++value1; - ctx::jump_fcontext( t.fctx, t.data); + bctx::jump_fcontext( t.fctx, t.data); } @@ -89,12 +91,34 @@ TEST_CASE("TestBoostFcontext") { void *sp = alloc.allocate(size); for (size_t i = 0; i < ops; ++i) { - ctx::fcontext_t ctx = ctx::make_fcontext(sp, size, f3); - ctx::transfer_t t = ctx::jump_fcontext(ctx, 0); - ctx::jump_fcontext(t.fctx, 0); + bctx::fcontext_t ctx = bctx::make_fcontext(sp, size, f3); + bctx::transfer_t t = bctx::jump_fcontext(ctx, 0); + bctx::jump_fcontext(t.fctx, 0); } alloc.deallocate(sp, size); + t.Pause(); + HILOG(kInfo, "Latency: {} MOps", ops / t.GetUsec()); +} + +using namespace boost::coroutines2; + +void myCoroutine(coroutine::push_type& yield) { + for (int i = 1; i <= 5; ++i) { + yield(); + } +} + +TEST_CASE("TestBoostCoroutine") { + hshm::Timer t; + t.Resume(); + size_t ops = (1 << 20); + + for (size_t i = 0; i < ops; ++i) { + coroutine::pull_type myCoroutineInstance(myCoroutine); + myCoroutineInstance(); + } + t.Pause(); HILOG(kInfo, "Latency: {} MOps", ops / t.GetUsec()); } \ No newline at end of file From 57abb5b848c9bdfc54f97ada2654299e32c724ac Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sat, 23 Sep 2023 23:19:02 -0500 Subject: [PATCH 29/54] Use a 16KB stack size --- include/labstor/task_registry/task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/labstor/task_registry/task.h b/include/labstor/task_registry/task.h index 2c6b14aa7..d7b142043 100644 --- a/include/labstor/task_registry/task.h +++ b/include/labstor/task_registry/task.h @@ -240,7 +240,7 @@ struct Task : public hipc::ShmContainer { u32 method_; /**< The method to call in the state */ bitfield32_t task_flags_; /**< Properties of the task */ bctx::transfer_t jmp_; /**< Current execution state of the task (runtime) */ - size_t stack_size_ = KILOBYTES(256); /**< The size of the stack for the task (runtime) */ + size_t stack_size_ = KILOBYTES(16); /**< The size of the stack for the task (runtime) */ void *stack_ptr_; /**< The pointer to the stack (runtime) */ /**==================================== From 75e3e14ef92473971237923a8fd0da05a7ec68be Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 01:29:38 -0500 Subject: [PATCH 30/54] Keep work entry when jumping to new context --- src/worker.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/worker.cc b/src/worker.cc index 575916f5b..146f6e9f5 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -84,7 +84,8 @@ void Worker::PollGrouped(WorkEntry &work_entry) { task->stack_size_, &RunBlocking); task->SetStarted(); } - task->jmp_ = bctx::jump_fcontext(task->jmp_.fctx, task); + task->jmp_.data = &work_entry; + task->jmp_ = bctx::jump_fcontext(task->jmp_.fctx, &work_entry); HILOG(kInfo, "Jumping into function") } else if (task->IsPreemptive()) { task->DisableRun(); @@ -113,10 +114,11 @@ void Worker::PollGrouped(WorkEntry &work_entry) { } void Worker::RunBlocking(bctx::transfer_t t) { - Task *task = reinterpret_cast(t.data); + WorkEntry *work_entry = reinterpret_cast(t.data); + Task *&task = work_entry->task_; + TaskState *&exec = work_entry->exec_; + RunContext &ctx = work_entry->ctx_; task->jmp_ = t; - TaskState *exec = LABSTOR_TASK_REGISTRY->GetTaskState(task->task_state_); - RunContext ctx(0); exec->Run(task->method_, task, ctx); task->Yield(); } From 530e17bd40267bbc8a7075e6516c344b94647c06 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 01:37:02 -0500 Subject: [PATCH 31/54] Make network unordered again --- .../remote_queue/include/remote_queue/remote_queue_tasks.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h index 47e5ff634..c3e79057d 100644 --- a/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h +++ b/tasks_required/remote_queue/include/remote_queue/remote_queue_tasks.h @@ -111,7 +111,7 @@ struct PushTask : public Task, TaskFlags { prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kPush; - task_flags_.SetBits(TASK_LOW_LATENCY | TASK_COROUTINE); + task_flags_.SetBits(TASK_LOW_LATENCY | TASK_PREEMPTIVE); domain_id_ = domain_id; // Custom params @@ -125,10 +125,7 @@ struct PushTask : public Task, TaskFlags { /** Create group */ HSHM_ALWAYS_INLINE u32 GetGroup(hshm::charbuf &group) { - LocalSerialize srl(group); - srl << 16; - // return TASK_UNORDERED; - return 0; + return TASK_UNORDERED; } }; From 8d8523587991ed83968e15f7612da23a0f96b54a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 09:06:27 -0500 Subject: [PATCH 32/54] Print the nodes which finished blob_mdm --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 33cd20c70..43c0eb968 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -84,7 +84,7 @@ class Server : public TaskLib { target_map_.emplace(client.id_, &client); } blob_mdm_.Init(id_); - HILOG(kInfo, "Created Blob MDM") + HILOG(kInfo, "(node {}) Created Blob MDM") task->SetModuleComplete(); } From 35b0f5bec641ae4cc4b465d746378a527c033771 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 09:06:47 -0500 Subject: [PATCH 33/54] Print the nodes which finished blob_mdm --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 43c0eb968..4ee237633 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -84,7 +84,7 @@ class Server : public TaskLib { target_map_.emplace(client.id_, &client); } blob_mdm_.Init(id_); - HILOG(kInfo, "(node {}) Created Blob MDM") + HILOG(kInfo, "(node {}) Created Blob MDM", LABSTOR_CLIENT->node_id_); task->SetModuleComplete(); } From 954374a333e3786955a060d939182de4c1de4c1d Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 09:21:00 -0500 Subject: [PATCH 34/54] Use 64KB stack size --- include/labstor/task_registry/task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/labstor/task_registry/task.h b/include/labstor/task_registry/task.h index d7b142043..e28730080 100644 --- a/include/labstor/task_registry/task.h +++ b/include/labstor/task_registry/task.h @@ -240,7 +240,7 @@ struct Task : public hipc::ShmContainer { u32 method_; /**< The method to call in the state */ bitfield32_t task_flags_; /**< Properties of the task */ bctx::transfer_t jmp_; /**< Current execution state of the task (runtime) */ - size_t stack_size_ = KILOBYTES(16); /**< The size of the stack for the task (runtime) */ + size_t stack_size_ = KILOBYTES(64); /**< The size of the stack for the task (runtime) */ void *stack_ptr_; /**< The pointer to the stack (runtime) */ /**==================================== From 192dbd98f5fc17af67ce67a25d792c53225abb73 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 09:48:45 -0500 Subject: [PATCH 35/54] Use 256KB stack size --- include/labstor/task_registry/task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/labstor/task_registry/task.h b/include/labstor/task_registry/task.h index e28730080..2c6b14aa7 100644 --- a/include/labstor/task_registry/task.h +++ b/include/labstor/task_registry/task.h @@ -240,7 +240,7 @@ struct Task : public hipc::ShmContainer { u32 method_; /**< The method to call in the state */ bitfield32_t task_flags_; /**< Properties of the task */ bctx::transfer_t jmp_; /**< Current execution state of the task (runtime) */ - size_t stack_size_ = KILOBYTES(64); /**< The size of the stack for the task (runtime) */ + size_t stack_size_ = KILOBYTES(256); /**< The size of the stack for the task (runtime) */ void *stack_ptr_; /**< The pointer to the stack (runtime) */ /**==================================== From 0208c3d58a8c8f6f8ea11a785ec820e002cbc318 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 09:49:00 -0500 Subject: [PATCH 36/54] Use 256KB stack size --- src/worker.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/worker.cc b/src/worker.cc index 146f6e9f5..00955490b 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -84,7 +84,6 @@ void Worker::PollGrouped(WorkEntry &work_entry) { task->stack_size_, &RunBlocking); task->SetStarted(); } - task->jmp_.data = &work_entry; task->jmp_ = bctx::jump_fcontext(task->jmp_.fctx, &work_entry); HILOG(kInfo, "Jumping into function") } else if (task->IsPreemptive()) { From 719adabe9c1d719dc7a19b7efc7edc64e00ff8ea Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 09:51:46 -0500 Subject: [PATCH 37/54] Use 256KB stack size --- tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc index 75cae81b3..a39dc86a9 100644 --- a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc +++ b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc @@ -190,7 +190,7 @@ class Server : public TaskLib { /** Get or create a tag */ void GetOrCreateTag(GetOrCreateTagTask *task, RunContext &ctx) { TagId tag_id; - HILOG(kDebug, "Creating a tag") + HILOG(kDebug, "Creating a tag on lane {}", ctx.lane_id_); // Check if the tag exists TAG_ID_MAP_T &tag_id_map = tag_id_map_[ctx.lane_id_]; From d5e4c4572b7ced1a221787156ad7ee4a55856a96 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 10:15:26 -0500 Subject: [PATCH 38/54] Make RunContext apart of task for now --- include/labstor/task_registry/task.h | 23 ++++++++++++--- include/labstor/task_registry/task_lib.h | 11 -------- include/labstor/work_orchestrator/worker.h | 4 --- src/worker.cc | 33 ++++++++++++---------- test/unit/asan.supp | 10 +++++++ test/unit/boost/test_boost.cc | 22 +++++++++------ 6 files changed, 60 insertions(+), 43 deletions(-) create mode 100644 test/unit/asan.supp diff --git a/include/labstor/task_registry/task.h b/include/labstor/task_registry/task.h index 2c6b14aa7..19a834109 100644 --- a/include/labstor/task_registry/task.h +++ b/include/labstor/task_registry/task.h @@ -11,6 +11,8 @@ namespace labstor { +class TaskLib; + /** This task reads a state */ #define TASK_READ BIT_OPT(u32, 0) /** This task writes to a state */ @@ -228,6 +230,21 @@ class TaskPrio { TASK_PRIO_T kLowLatency = 2; }; +/** Context passed to the Run method of a task */ +struct RunContext { + u32 lane_id_; /**< The lane id of the task */ + bctx::transfer_t jmp_; /**< Current execution state of the task (runtime) */ + size_t stack_size_ = KILOBYTES(64); /**< The size of the stack for the task (runtime) */ + void *stack_ptr_; /**< The pointer to the stack (runtime) */ + TaskLib *exec_; + + /** Default constructor */ + RunContext() {} + + /** Emplace constructor */ + RunContext(u32 lane_id) : lane_id_(lane_id) {} +}; + /** A generic task base class */ struct Task : public hipc::ShmContainer { SHM_CONTAINER_TEMPLATE((Task), (Task)) @@ -239,9 +256,7 @@ struct Task : public hipc::ShmContainer { u32 lane_hash_; /**< Determine the lane a task is keyed to */ u32 method_; /**< The method to call in the state */ bitfield32_t task_flags_; /**< Properties of the task */ - bctx::transfer_t jmp_; /**< Current execution state of the task (runtime) */ - size_t stack_size_ = KILOBYTES(256); /**< The size of the stack for the task (runtime) */ - void *stack_ptr_; /**< The pointer to the stack (runtime) */ + RunContext ctx_; /**==================================== * Task Helpers @@ -369,7 +384,7 @@ struct Task : public hipc::ShmContainer { if constexpr (THREAD_MODEL == TASK_YIELD_STD) { HERMES_THREAD_MODEL->Yield(); } else if constexpr (THREAD_MODEL == TASK_YIELD_CO) { - jmp_ = bctx::jump_fcontext(jmp_.fctx, nullptr); + ctx_.jmp_ = bctx::jump_fcontext(ctx_.jmp_.fctx, nullptr); } else if constexpr (THREAD_MODEL == TASK_YIELD_ABT) { ABT_thread_yield(); } diff --git a/include/labstor/task_registry/task_lib.h b/include/labstor/task_registry/task_lib.h index 5dc4eda98..acf82efad 100644 --- a/include/labstor/task_registry/task_lib.h +++ b/include/labstor/task_registry/task_lib.h @@ -53,17 +53,6 @@ struct TaskPointer { } }; -/** Context passed to the Run method of a task */ -struct RunContext { - u32 lane_id_; /**< The lane id of the task */ - - /** Default constructor */ - RunContext() {} - - /** Emplace constructor */ - RunContext(u32 lane_id) : lane_id_(lane_id) {} -}; - /** * Represents a custom operation to perform. * Tasks are independent of Hermes. diff --git a/include/labstor/work_orchestrator/worker.h b/include/labstor/work_orchestrator/worker.h index 1e46009a3..23555bd31 100644 --- a/include/labstor/work_orchestrator/worker.h +++ b/include/labstor/work_orchestrator/worker.h @@ -29,10 +29,6 @@ struct WorkEntry { LaneGroup *group_; MultiQueue *queue_; - TaskState *exec_; - Task *task_; - RunContext ctx_; - /** Default constructor */ HSHM_ALWAYS_INLINE WorkEntry() = default; diff --git a/src/worker.cc b/src/worker.cc index 00955490b..d4842c9b6 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -43,10 +43,7 @@ void Worker::Run() { void Worker::PollGrouped(WorkEntry &work_entry) { int off = 0; Lane *&lane = work_entry.lane_; - Task *&task = work_entry.task_; - TaskState *&exec = work_entry.exec_; - RunContext &ctx = work_entry.ctx_; - ctx.lane_id_ = work_entry.lane_id_; + Task *task; LaneData *entry; for (int i = 0; i < 1024; ++i) { // Get the task message @@ -58,7 +55,10 @@ void Worker::PollGrouped(WorkEntry &work_entry) { continue; } task = LABSTOR_CLIENT->GetPrivatePointer(entry->p_); + RunContext &ctx = task->ctx_; + ctx.lane_id_ = work_entry.lane_id_; // Get the task state + TaskState *&exec = ctx.exec_; exec = LABSTOR_TASK_REGISTRY->GetTaskState(task->task_state_); if (!exec) { HELOG(kFatal, "(node {}) Could not find the task state: {}", @@ -78,13 +78,17 @@ void Worker::PollGrouped(WorkEntry &work_entry) { task->SetUnordered(); } else if (task->IsCoroutine()) { if (!task->IsStarted()) { - task->stack_ptr_ = malloc(task->stack_size_); - task->jmp_.fctx = bctx::make_fcontext( - (char*)task->stack_ptr_ + task->stack_size_, - task->stack_size_, &RunBlocking); + ctx.stack_ptr_ = malloc(ctx.stack_size_); + if (ctx.stack_ptr_ == nullptr) { + HILOG(kFatal, "The stack pointer of size {} is NULL", + ctx.stack_size_, ctx.stack_ptr_); + } + ctx.jmp_.fctx = bctx::make_fcontext( + (char*)ctx.stack_ptr_ + ctx.stack_size_, + ctx.stack_size_, &RunBlocking); task->SetStarted(); } - task->jmp_ = bctx::jump_fcontext(task->jmp_.fctx, &work_entry); + ctx.jmp_ = bctx::jump_fcontext(ctx.jmp_.fctx, task); HILOG(kInfo, "Jumping into function") } else if (task->IsPreemptive()) { task->DisableRun(); @@ -100,7 +104,7 @@ void Worker::PollGrouped(WorkEntry &work_entry) { // LABSTOR_CLIENT->node_id_, task->task_node_, task->task_state_, lane_id, queue->id_, id_); entry->complete_ = true; if (task->IsCoroutine()) { - free(task->stack_ptr_); + free(ctx.stack_ptr_); } else if (task->IsPreemptive()) { ABT_thread_join(entry->thread_); } @@ -113,11 +117,10 @@ void Worker::PollGrouped(WorkEntry &work_entry) { } void Worker::RunBlocking(bctx::transfer_t t) { - WorkEntry *work_entry = reinterpret_cast(t.data); - Task *&task = work_entry->task_; - TaskState *&exec = work_entry->exec_; - RunContext &ctx = work_entry->ctx_; - task->jmp_ = t; + Task *task = reinterpret_cast(t.data); + RunContext &ctx = task->ctx_; + TaskState *&exec = ctx.exec_; + ctx.jmp_ = t; exec->Run(task->method_, task, ctx); task->Yield(); } diff --git a/test/unit/asan.supp b/test/unit/asan.supp new file mode 100644 index 000000000..c17008e89 --- /dev/null +++ b/test/unit/asan.supp @@ -0,0 +1,10 @@ +# Ignore leaks from external libraries +leak:libfabric.so +leak:libabt.so +leak:libmargo.so +leak:libmpi.so +leak:librdmacm.so +leak:libhwloc.so +leak:libmpich.so +leak:aiori-HDF5.c +leak:ior.c \ No newline at end of file diff --git a/test/unit/boost/test_boost.cc b/test/unit/boost/test_boost.cc index 78146590c..6e53672b9 100644 --- a/test/unit/boost/test_boost.cc +++ b/test/unit/boost/test_boost.cc @@ -72,30 +72,34 @@ typedef simple_stack_allocator< int value1; namespace bctx = boost::context::detail; -void f3( bctx::transfer_t t_) { +bctx::transfer_t shared_xfer; + +void f3( bctx::transfer_t t) { ++value1; - bctx::transfer_t t = bctx::jump_fcontext( t_.fctx, 0); + shared_xfer = t; + HILOG(kInfo, "Aasfasfak;asdf {}", value1) + shared_xfer = bctx::jump_fcontext(shared_xfer.fctx, 0); ++value1; - bctx::jump_fcontext( t.fctx, t.data); + shared_xfer = bctx::jump_fcontext( shared_xfer.fctx, shared_xfer.data); } TEST_CASE("TestBoostFcontext") { value1 = 0; stack_allocator alloc; - int size = 128; + int size = KILOBYTES(64); hshm::Timer t; t.Resume(); size_t ops = (1 << 20); - void *sp = alloc.allocate(size); for (size_t i = 0; i < ops; ++i) { - bctx::fcontext_t ctx = bctx::make_fcontext(sp, size, f3); - bctx::transfer_t t = bctx::jump_fcontext(ctx, 0); - bctx::jump_fcontext(t.fctx, 0); + void *sp = alloc.allocate(size); + shared_xfer.fctx = bctx::make_fcontext(sp, size, f3); + shared_xfer = bctx::jump_fcontext(shared_xfer.fctx, 0); + shared_xfer = bctx::jump_fcontext(shared_xfer.fctx, 0); + alloc.deallocate(sp, size); } - alloc.deallocate(sp, size); t.Pause(); HILOG(kInfo, "Latency: {} MOps", ops / t.GetUsec()); From 41eb1a3830c13c3f944ee2822bff4927694e105f Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 10:30:57 -0500 Subject: [PATCH 39/54] Irresponsibly destroy DelTask --- include/labstor/api/labstor_client.h | 7 +++++-- src/worker.cc | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/labstor/api/labstor_client.h b/include/labstor/api/labstor_client.h index f3ef03473..54e9d9f99 100644 --- a/include/labstor/api/labstor_client.h +++ b/include/labstor/api/labstor_client.h @@ -139,7 +139,8 @@ class Client : public ConfigurationManager { template HSHM_ALWAYS_INLINE void DelTask(TaskT *task) { - main_alloc_->DelObj(task); + // TODO(llogan): verify leak + // main_alloc_->DelObj(task); } /** Destroy a task */ @@ -187,12 +188,14 @@ class Client : public ConfigurationManager { /** Free a buffer */ HSHM_ALWAYS_INLINE void FreeBuffer(hipc::Pointer &p) { - main_alloc_->Free(p); + // TODO(llogan): verify leak + // main_alloc_->Free(p); } /** Free a buffer */ HSHM_ALWAYS_INLINE void FreeBuffer(LPointer &p) { + // TODO(llogan): verify leak main_alloc_->FreeLocalPtr(p); } }; diff --git a/src/worker.cc b/src/worker.cc index d4842c9b6..1cabb5987 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -104,7 +104,8 @@ void Worker::PollGrouped(WorkEntry &work_entry) { // LABSTOR_CLIENT->node_id_, task->task_node_, task->task_state_, lane_id, queue->id_, id_); entry->complete_ = true; if (task->IsCoroutine()) { - free(ctx.stack_ptr_); + // TODO(llogan): verify leak + // free(ctx.stack_ptr_); } else if (task->IsPreemptive()) { ABT_thread_join(entry->thread_); } From 61d0d60bc988a12ff82048e3bcc8ce796f244ac5 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 14:14:25 -0500 Subject: [PATCH 40/54] Make PUT and GET have async option --- tasks/hermes/include/hermes/bucket.h | 275 +++++++++++++----- .../include/hermes_blob_mdm/hermes_blob_mdm.h | 16 +- .../hermes_blob_mdm/hermes_blob_mdm_tasks.h | 41 ++- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 65 +++-- .../src/hermes_bucket_mdm.cc | 8 +- tasks/posix_bdev/src/posix_bdev.cc | 2 + tasks/ram_bdev/src/ram_bdev.cc | 2 + test/unit/hermes/test_bucket.cc | 7 +- 8 files changed, 298 insertions(+), 118 deletions(-) diff --git a/tasks/hermes/include/hermes/bucket.h b/tasks/hermes/include/hermes/bucket.h index 62879a53b..70438e28c 100644 --- a/tasks/hermes/include/hermes/bucket.h +++ b/tasks/hermes/include/hermes/bucket.h @@ -11,6 +11,10 @@ namespace hermes { +#include "labstor/labstor_namespace.h" +using hermes::blob_mdm::PutBlobTask; +using hermes::blob_mdm::GetBlobTask; + #define HERMES_BUCKET_IS_FILE BIT_OPT(u32, 1) class Bucket { @@ -211,6 +215,7 @@ class Bucket { const BlobId &orig_blob_id, Context &ctx) { BlobId blob_id = orig_blob_id; + bitfield32_t flags, task_flags(TASK_FIRE_AND_FORGET | TASK_DATA_OWNER | TASK_LOW_LATENCY); // Copy data to shared memory LPointer p = LABSTOR_CLIENT->AllocateBuffer(blob.size()); char *data = p.ptr_; @@ -218,19 +223,25 @@ class Bucket { // Put to shared memory hshm::charbuf blob_name_buf = hshm::to_charbuf(blob_name); if (blob_id.IsNull()) { - blob_id = blob_mdm_->GetOrCreateBlobIdRoot(id_, blob_name_buf); + flags.SetBits(HERMES_GET_BLOB_ID); + task_flags.UnsetBits(TASK_FIRE_AND_FORGET); } - HILOG(kDebug, "The bucket's ID is: {}", blob_id); + LPointer> push_task; if constexpr(!PARTIAL) { - blob_mdm_->AsyncPutBlobRoot(id_, blob_name_buf, - blob_id, 0, blob.size(), p.shm_, ctx.blob_score_, - bitfield32_t(HERMES_BLOB_REPLACE), - ctx); + flags.SetBits(HERMES_BLOB_REPLACE); + push_task = blob_mdm_->AsyncPutBlobRoot(id_, blob_name_buf, + blob_id, 0, blob.size(), p.shm_, ctx.blob_score_, + flags, ctx, task_flags); } else { - blob_mdm_->AsyncPutBlobRoot(id_, blob_name_buf, - blob_id, blob_off, blob.size(), p.shm_, ctx.blob_score_, - bitfield32_t(0), - ctx); + push_task = blob_mdm_->AsyncPutBlobRoot(id_, blob_name_buf, + blob_id, blob_off, blob.size(), p.shm_, ctx.blob_score_, + flags, ctx, task_flags); + } + if (flags.Any(HERMES_GET_BLOB_ID)) { + push_task->Wait(); + PutBlobTask *task = push_task->get(); + blob_id = task->blob_id_; + LABSTOR_CLIENT->DelTask(push_task); } return blob_id; } @@ -238,24 +249,72 @@ class Bucket { /** * Put \a blob_name Blob into the bucket * */ + template + HSHM_ALWAYS_INLINE + BlobId SrlBasePut(const std::string &blob_name, + const T &data, + const BlobId &orig_blob_id, + Context &ctx) { + std::stringstream ss; + cereal::BinaryOutputArchive ar(ss); + ar << data; + Blob blob(ss.str()); + return BasePut(blob_name, blob, 0, orig_blob_id, ctx); + } + + /** + * Put \a blob_name Blob into the bucket + * */ + template BlobId Put(const std::string &blob_name, - const Blob &blob, + const T &blob, Context &ctx) { - return BasePut(blob_name, blob, 0, BlobId::GetNull(), ctx); + if (std::is_same_v) { + return BasePut(blob_name, blob, 0, BlobId::GetNull(), ctx); + } else { + return SrlBasePut(blob_name, blob, BlobId::GetNull(), ctx); + } } /** * Put \a blob_id Blob into the bucket * */ + template BlobId Put(const BlobId &blob_id, - const Blob &blob, + const T &blob, Context &ctx) { - return BasePut("", blob, 0, blob_id, ctx); + if (std::is_same_v) { + return BasePut("", blob, 0, blob_id, ctx); + } else { + return SrlBasePut("", blob, blob_id, ctx); + } } /** * Put \a blob_name Blob into the bucket * */ + template + HSHM_ALWAYS_INLINE + void AsyncPut(const std::string &blob_name, + const T &blob, + Context &ctx) { + Put(blob_name, blob, ctx); + } + + /** + * Put \a blob_id Blob into the bucket + * */ + template + HSHM_ALWAYS_INLINE + void AsyncPut(const BlobId &blob_id, + const T &blob, + Context &ctx) { + Put(blob_id, blob, ctx); + } + + /** + * PartialPut \a blob_name Blob into the bucket + * */ BlobId PartialPut(const std::string &blob_name, const Blob &blob, size_t blob_off, @@ -264,7 +323,7 @@ class Bucket { } /** - * Put \a blob_id Blob into the bucket + * PartialPut \a blob_id Blob into the bucket * */ BlobId PartialPut(const BlobId &blob_id, const Blob &blob, @@ -274,28 +333,33 @@ class Bucket { } /** - * Serialized PUT + * AsyncPartialPut \a blob_name Blob into the bucket * */ - template - BlobId Put(const std::string &blob_name, - const T &data, - Context &ctx) { - std::stringstream ss; - cereal::BinaryOutputArchive ar(ss); - ar << data; - Blob blob(ss.str()); - return Put(blob_name, blob, ctx); + void AsyncPartialPut(const std::string &blob_name, + const Blob &blob, + size_t blob_off, + Context &ctx) { + BasePut(blob_name, blob, blob_off, BlobId::GetNull(), ctx); } /** - * Append \a blob_name Blob into the bucket + * AsyncPartialPut \a blob_id Blob into the bucket * */ - Status Append(const Blob &blob, size_t page_size, Context &ctx) { + void AsyncPartialPut(const BlobId &blob_id, + const Blob &blob, + size_t blob_off, + Context &ctx) { + BasePut("", blob, blob_off, blob_id, ctx); + } + + /** + * Append \a blob_name Blob into the bucket (fully asynchronous) + * */ + void Append(const Blob &blob, size_t page_size, Context &ctx) { LPointer p = LABSTOR_CLIENT->AllocateBuffer(blob.size()); char *data = p.ptr_; memcpy(data, blob.data(), blob.size()); bkt_mdm_->AppendBlobRoot(id_, blob.size(), p.shm_, page_size, ctx.blob_score_, ctx.node_id_, ctx); - return Status(); } /** @@ -316,51 +380,119 @@ class Bucket { } /** - * Get \a blob_id Blob from the bucket + * Get \a blob_id Blob from the bucket (async) * */ - BlobId BaseGet(const std::string &blob_name, - const BlobId &orig_blob_id, - Blob &blob, - size_t blob_off, - Context &ctx) { - BlobId blob_id = orig_blob_id; + LPointer> + HSHM_ALWAYS_INLINE + AsyncBaseGet(const std::string &blob_name, + const BlobId &blob_id, + Blob &blob, + size_t blob_off, + Context &ctx) { + bitfield32_t flags; // Get the blob ID if (blob_id.IsNull()) { - blob_id = blob_mdm_->GetBlobIdRoot(id_, hshm::to_charbuf(blob_name)); - } - if (blob_id.IsNull()) { - if (ctx.filename_.size() == 0) { - return blob_id; - } else { - // StageIn using PUT of an empty blob - hermes::Blob emtpy_blob; - blob_id = PartialPut(blob_name, emtpy_blob, 0, ctx); - } + flags.SetBits(HERMES_GET_BLOB_ID); } // Get from shared memory size_t data_size = blob.size(); - if (data_size == 0) { - data_size = GetBlobSize(blob_id); - } - HILOG(kInfo, "Data size: {}", data_size) LPointer data_p = LABSTOR_CLIENT->AllocateBuffer(data_size); - data_size = blob_mdm_->GetBlobRoot(id_, blob_id, blob_off, data_size, data_p.shm_, ctx); - char *data = data_p.ptr_; - // Copy data to blob + LPointer> push_task; + push_task = blob_mdm_->AsyncGetBlobRoot(id_, blob_name, blob_id, blob_off, + data_size, data_p.shm_, + ctx, flags); + return push_task; + } + + /** + * Get \a blob_id Blob from the bucket (sync) + * */ + BlobId BaseGet(const std::string &blob_name, + const BlobId &orig_blob_id, + Blob &blob, + size_t blob_off, + Context &ctx) { // TODO(llogan): intercept mmap to avoid copy - blob.resize(data_size); + // TODO(llogan): make GetBlobSize work with blob_name + size_t data_size = blob.size(); + if (blob.size() == 0) { + data_size = blob_mdm_->GetBlobSizeRoot(id_, orig_blob_id); + blob.resize(data_size); + } + BlobId blob_id; + LPointer> push_task; + push_task = AsyncBaseGet(blob_name, orig_blob_id, blob, blob_off, ctx); + push_task->Wait(); + GetBlobTask *task = push_task->get(); + blob_id = task->blob_id_; + char *data = LABSTOR_CLIENT->GetPrivatePointer(task->data_); memcpy(blob.data(), data, data_size); - LABSTOR_CLIENT->FreeBuffer(data_p); + LABSTOR_CLIENT->FreeBuffer(task->data_); + LABSTOR_CLIENT->DelTask(task); return blob_id; } + /** + * Get \a blob_id Blob from the bucket (sync) + * */ + template + BlobId SrlBaseGet(const std::string &blob_name, + const BlobId &orig_blob_id, + T &data, + Context &ctx) { + std::stringstream ss; + cereal::BinaryInputArchive ar(ss); + ar >> data; + Blob blob(ss.str()); + return BaseGet(blob_name, orig_blob_id, blob, 0, ctx); + } + /** * Get \a blob_id Blob from the bucket * */ + template BlobId Get(const std::string &blob_name, - Blob &blob, + T &blob, Context &ctx) { - return BaseGet(blob_name, BlobId::GetNull(), blob, 0, ctx); + if (std::is_same_v) { + return BaseGet(blob_name, BlobId::GetNull(), blob, 0, ctx); + } else { + return SrlBaseGet(blob_name, BlobId::GetNull(), blob, ctx); + } + } + + /** + * Get \a blob_id Blob from the bucket + * */ + template + BlobId Get(const BlobId &blob_id, + T &blob, + Context &ctx) { + if (std::is_same_v) { + return BaseGet("", blob_id, blob, 0, ctx); + } else { + return SrlBaseGet("", blob_id, blob, ctx); + } + } + + /** + * AsyncGet \a blob_name Blob from the bucket + * */ + LPointer> + AsyncGet(const std::string &blob_name, + Blob &blob, + Context &ctx) { + return AsyncBaseGet(blob_name, BlobId::GetNull(), blob, 0, ctx); + } + + /** + * AsyncGet \a blob_id Blob from the bucket + * */ + LPointer> + AsyncGet(const BlobId &blob_id, + Blob &blob, + Context &ctx) { + return AsyncBaseGet("", blob_id, blob, 0, ctx); } /** @@ -373,15 +505,6 @@ class Bucket { return BaseGet(blob_name, BlobId::GetNull(), blob, blob_off, ctx); } - /** - * Get \a blob_id Blob from the bucket - * */ - BlobId Get(const BlobId &blob_id, - Blob &blob, - Context &ctx) { - return BaseGet("", blob_id, blob, 0, ctx); - } - /** * Put \a blob_name Blob into the bucket * */ @@ -392,6 +515,28 @@ class Bucket { return BaseGet("", blob_id, blob, blob_off, ctx); } + /** + * AsyncGet \a blob_name Blob from the bucket + * */ + LPointer> + AsyncPartialGet(const std::string &blob_name, + Blob &blob, + size_t blob_off, + Context &ctx) { + return AsyncBaseGet(blob_name, BlobId::GetNull(), blob, blob_off, ctx); + } + + /** + * AsyncGet \a blob_id Blob from the bucket + * */ + LPointer> + AsyncPartialGet(const BlobId &blob_id, + Blob &blob, + size_t blob_off, + Context &ctx) { + return AsyncBaseGet("", blob_id, blob, blob_off, ctx); + } + /** * Determine if the bucket contains \a blob_id BLOB * */ diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h index abb5be9cf..16c8c3f70 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h @@ -127,14 +127,13 @@ class Client : public TaskLibClient { const hipc::Pointer &blob, float score, bitfield32_t flags, Context ctx = Context(), - bitfield32_t task_flags = bitfield32_t(TASK_FIRE_AND_FORGET | TASK_DATA_OWNER)) { + bitfield32_t task_flags = bitfield32_t(TASK_FIRE_AND_FORGET | TASK_DATA_OWNER | TASK_LOW_LATENCY)) { HILOG(kDebug, "Beginning PUT (task_node={})", task_node); LABSTOR_CLIENT->ConstructTask( task, task_node, DomainId::GetNode(blob_id.node_id_), id_, tag_id, blob_name, blob_id, blob_off, blob_size, - blob, score, flags, ctx); - task->task_flags_ = task_flags; + blob, score, flags, ctx, task_flags); HILOG(kDebug, "Constructed PUT (task_node={})", task_node); } LABSTOR_TASK_NODE_PUSH_ROOT(PutBlob); @@ -143,24 +142,27 @@ class Client : public TaskLibClient { void AsyncGetBlobConstruct(GetBlobTask *task, const TaskNode &task_node, const TagId &tag_id, + const std::string &blob_name, const BlobId &blob_id, size_t off, ssize_t data_size, hipc::Pointer &data, - Context ctx = Context()) { + Context ctx = Context(), + bitfield32_t flags = bitfield32_t(0)) { HILOG(kDebug, "Beginning GET (task_node={})", task_node); LABSTOR_CLIENT->ConstructTask( task, task_node, DomainId::GetNode(blob_id.node_id_), id_, - tag_id, blob_id, off, data_size, data, ctx); + tag_id, blob_name, blob_id, off, data_size, data, ctx, flags); } size_t GetBlobRoot(const TagId &tag_id, const BlobId &blob_id, size_t off, ssize_t data_size, hipc::Pointer &data, - Context ctx = Context()) { + Context ctx = Context(), + bitfield32_t flags = bitfield32_t(0)) { LPointer> push_task = - AsyncGetBlobRoot(tag_id, blob_id, off, data_size, data); + AsyncGetBlobRoot(tag_id, "", blob_id, off, data_size, data, ctx, flags); push_task->Wait(); GetBlobTask *task = push_task->get(); data = task->data_; diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 0fa2e80c1..1f9213847 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -209,7 +209,7 @@ class PutBlobPhase { #define HERMES_DID_STAGE_IN BIT_OPT(u32, 2) #define HERMES_IS_FILE BIT_OPT(u32, 3) #define HERMES_BLOB_DID_CREATE BIT_OPT(u32, 4) - +#define HERMES_GET_BLOB_ID BIT_OPT(u32, 5) /** A task to put data in a blob */ struct PutBlobTask : public Task, TaskFlags { @@ -251,15 +251,20 @@ struct PutBlobTask : public Task, TaskFlags const hipc::Pointer &data, float score, bitfield32_t flags, - const Context &ctx) : Task(alloc) { + const Context &ctx, + bitfield32_t task_flags) : Task(alloc) { // Initialize task HILOG(kDebug, "Beginning PUT task constructor") task_node_ = task_node; - lane_hash_ = blob_id.hash_; + if (!blob_id.IsNull()) { + lane_hash_ = blob_id.hash_; + } else { + lane_hash_ = std::hash{}(blob_name); + } prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kPutBlob; - task_flags_.SetBits(TASK_LOW_LATENCY); + task_flags_ = task_flags; domain_id_ = domain_id; // Custom params @@ -309,7 +314,11 @@ struct PutBlobTask : public Task, TaskFlags /** (De)serialize message return */ template - void SerializeEnd(u32 replica, Ar &ar) {} + void SerializeEnd(u32 replica, Ar &ar) { + if (flags_.Any(HERMES_GET_BLOB_ID)) { + ar(blob_id_); + } + } /** Create group */ HSHM_ALWAYS_INLINE @@ -331,12 +340,14 @@ class GetBlobPhase { /** A task to get data from a blob */ struct GetBlobTask : public Task, TaskFlags { IN TagId tag_id_; - IN BlobId blob_id_; + IN hipc::ShmArchive blob_name_; + INOUT BlobId blob_id_; IN size_t blob_off_; IN hipc::Pointer data_; IN hipc::ShmArchive filename_; IN size_t page_size_; INOUT ssize_t data_size_; + IN bitfield32_t flags_; TEMP int phase_ = GetBlobPhase::kStart; TEMP hipc::ShmArchive> bdev_reads_; TEMP PutBlobTask *stage_task_ = nullptr; @@ -352,11 +363,13 @@ struct GetBlobTask : public Task, TaskFlags const DomainId &domain_id, const TaskStateId &state_id, const TagId &tag_id, + const std::string &blob_name, const BlobId &blob_id, size_t off, ssize_t data_size, hipc::Pointer &data, - const Context &ctx) : Task(alloc) { + const Context &ctx, + bitfield32_t flags) : Task(alloc) { // Initialize task task_node_ = task_node; lane_hash_ = blob_id.hash_; @@ -372,25 +385,27 @@ struct GetBlobTask : public Task, TaskFlags blob_off_ = off; data_size_ = data_size; data_ = data; + flags_ = flags; + HSHM_MAKE_AR(blob_name_, alloc, blob_name); HSHM_MAKE_AR(filename_, alloc, ctx.filename_); page_size_ = ctx.page_size_; } /** Destructor */ ~GetBlobTask() { + HSHM_DESTROY_AR(blob_name_); HSHM_DESTROY_AR(filename_); } /** (De)serialize message call */ template void SaveStart(Ar &ar) { - // TODO(llogan): Make it so Get takes as input a buffer, instead of returning one DataTransfer xfer(DT_RECEIVER_WRITE, HERMES_MEMORY_MANAGER->Convert(data_), data_size_, domain_id_); task_serialize(ar); ar & xfer; - ar(tag_id_, blob_id_, blob_off_, data_size_, filename_, page_size_); + ar(tag_id_, blob_id_, blob_off_, data_size_, filename_, page_size_, flags_); } /** Deserialize message call */ @@ -400,12 +415,16 @@ struct GetBlobTask : public Task, TaskFlags task_serialize(ar); ar & xfer; data_ = HERMES_MEMORY_MANAGER->Convert(xfer.data_); - ar(tag_id_, blob_id_, blob_off_, data_size_, filename_, page_size_); + ar(tag_id_, blob_id_, blob_off_, data_size_, filename_, page_size_, flags_); } /** (De)serialize message return */ template - void SerializeEnd(u32 replica, Ar &ar) {} + void SerializeEnd(u32 replica, Ar &ar) { + if (flags_.Any(HERMES_GET_BLOB_ID)) { + ar(blob_id_); + } + } /** Create group */ HSHM_ALWAYS_INLINE diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 4ee237633..b324db3b3 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -142,16 +142,14 @@ class Server : public TaskLib { /** Create blob / update metadata for the PUT */ void PutBlobCreatePhase(PutBlobTask *task, RunContext &ctx) { HILOG(kDebug, "PutBlobPhase::kCreate {}", task->blob_id_); + // Get the blob info data structure hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); - BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; - auto it = blob_map.find(task->blob_id_); - if (it == blob_map.end()) { - task->flags_.SetBits(HERMES_BLOB_DID_CREATE); - } - if (task->flags_.Any(HERMES_BLOB_DID_CREATE)) { - blob_map.emplace(task->blob_id_, BlobInfo()); + if (task->flags_.Any(HERMES_GET_BLOB_ID)) { + task->blob_id_ = GetOrCreateBlobId(task->tag_id_, task->lane_hash_, + blob_name, ctx, task->flags_); } + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; BlobInfo &blob_info = blob_map[task->blob_id_]; // Update the blob info @@ -333,13 +331,13 @@ class Server : public TaskLib { /** Wait for the update to complete */ void PutBlobWaitModifyPhase(PutBlobTask *task, RunContext &ctx) { std::vector &write_tasks = *task->bdev_writes_; - for (int i = (int)write_tasks.size() - 1; i >= 0; --i) { - bdev::WriteTask *write_task = write_tasks[i]; + for (bdev::WriteTask *&write_task : write_tasks) { if (!write_task->IsComplete()) { return; } + } + for (bdev::WriteTask *&write_task : write_tasks) { LABSTOR_CLIENT->DelTask(write_task); - write_tasks.pop_back(); } HILOG(kDebug, "PutBlobTask complete"); HSHM_DESTROY_AR(task->schema_); @@ -377,14 +375,16 @@ class Server : public TaskLib { } void GetBlobGetPhase(GetBlobTask *task, RunContext &ctx) { + if (task->flags_.Any(HERMES_GET_BLOB_ID)) { + hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); + task->blob_id_ = GetOrCreateBlobId(task->tag_id_, task->lane_hash_, + blob_name, ctx, task->flags_); + } BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; BlobInfo &blob_info = blob_map[task->blob_id_]; HSHM_MAKE_AR0(task->bdev_reads_, nullptr); std::vector &read_tasks = *task->bdev_reads_; read_tasks.reserve(blob_info.buffers_.size()); - if (task->data_size_ < 0) { - task->data_size_ = (ssize_t)(blob_info.blob_size_ - task->blob_off_); - } HILOG(kDebug, "Getting blob {} of size {} starting at offset {} (total_blob_size={}, buffers={})", task->blob_id_, task->data_size_, task->blob_off_, blob_info.blob_size_, blob_info.buffers_.size()); size_t blob_off = 0, buf_off = 0; @@ -415,13 +415,13 @@ class Server : public TaskLib { void GetBlobWaitPhase(GetBlobTask *task, RunContext &ctx) { std::vector &read_tasks = *task->bdev_reads_; - for (auto it = read_tasks.rbegin(); it != read_tasks.rend(); ++it) { - bdev::ReadTask *read_task = *it; + for (bdev::ReadTask *&read_task : read_tasks) { if (!read_task->IsComplete()) { return; } + } + for (bdev::ReadTask *&read_task : read_tasks) { LABSTOR_CLIENT->DelTask(read_task); - read_tasks.pop_back(); } HSHM_DESTROY_AR(task->bdev_reads_); HILOG(kDebug, "GetBlobTask complete"); @@ -463,18 +463,26 @@ class Server : public TaskLib { /** * Create \a blob_id BLOB ID * */ - void GetOrCreateBlobId(GetOrCreateBlobIdTask *task, RunContext &ctx) { - hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); - hshm::charbuf blob_name_unique = GetBlobNameWithBucket(task->tag_id_, blob_name); + BlobId GetOrCreateBlobId(TagId &tag_id, u32 lane_hash, + const hshm::charbuf &blob_name, RunContext &ctx, + bitfield32_t &flags) { + hshm::charbuf blob_name_unique = GetBlobNameWithBucket(tag_id, blob_name); BLOB_ID_MAP_T &blob_id_map = blob_id_map_[ctx.lane_id_]; auto it = blob_id_map.find(blob_name_unique); if (it == blob_id_map.end()) { - task->blob_id_ = BlobId(node_id_, task->lane_hash_, id_alloc_.fetch_add(1)); - blob_id_map.emplace(blob_name_unique, task->blob_id_); - task->SetModuleComplete(); - return; - } - task->blob_id_ = it->second; + BlobId blob_id = BlobId(node_id_, lane_hash, id_alloc_.fetch_add(1)); + blob_id_map.emplace(blob_name_unique, blob_id); + flags.SetBits(HERMES_BLOB_DID_CREATE); + BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; + blob_map.emplace(blob_id, BlobInfo()); + return blob_id; + } + return it->second; + } + void GetOrCreateBlobId(GetOrCreateBlobIdTask *task, RunContext &ctx) { + hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); + bitfield32_t flags; + task->blob_id_ = GetOrCreateBlobId(task->tag_id_, task->lane_hash_, blob_name, ctx, flags); task->SetModuleComplete(); } @@ -622,13 +630,13 @@ class Server : public TaskLib { } case DestroyBlobPhase::kWaitFreeBuffers: { std::vector &free_tasks = *task->free_tasks_; - for (auto it = free_tasks.rbegin(); it != free_tasks.rend(); ++it) { - bdev::FreeTask *free_task = *it; + for (bdev::FreeTask *&free_task : free_tasks) { if (!free_task->IsComplete()) { return; } + } + for (bdev::FreeTask *&free_task : free_tasks) { LABSTOR_CLIENT->DelTask(free_task); - free_tasks.pop_back(); } BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; BlobInfo &blob_info = blob_map[task->blob_id_]; @@ -660,6 +668,7 @@ class Server : public TaskLib { task->data_size_ = blob_info.blob_size_; task->get_task_ = blob_mdm_.AsyncGetBlob(task->task_node_ + 1, task->tag_id_, + "", task->blob_id_, 0, task->data_size_, diff --git a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc index a39dc86a9..5d1a96fc0 100644 --- a/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc +++ b/tasks/hermes_bucket_mdm/src/hermes_bucket_mdm.cc @@ -288,16 +288,16 @@ class Server : public TaskLib { } case DestroyTagPhase::kWaitDestroyBlobs: { std::vector blob_tasks = *task->destroy_blob_tasks_; - for (auto it = blob_tasks.rbegin(); it != blob_tasks.rend(); ++it) { - blob_mdm::DestroyBlobTask *blob_task = *it; + for (blob_mdm::DestroyBlobTask *&blob_task : blob_tasks) { if (!blob_task->IsComplete()) { return; } + } + for (blob_mdm::DestroyBlobTask *&blob_task : blob_tasks) { LABSTOR_CLIENT->DelTask(blob_task); - blob_tasks.pop_back(); } - TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; HSHM_DESTROY_AR(task->destroy_blob_tasks_); + TAG_MAP_T &tag_map = tag_map_[ctx.lane_id_]; tag_map.erase(task->tag_id_); task->SetModuleComplete(); } diff --git a/tasks/posix_bdev/src/posix_bdev.cc b/tasks/posix_bdev/src/posix_bdev.cc index 71cce0b2e..038debf86 100644 --- a/tasks/posix_bdev/src/posix_bdev.cc +++ b/tasks/posix_bdev/src/posix_bdev.cc @@ -55,6 +55,7 @@ class Server : public TaskLib { } void Write(WriteTask *task, RunContext &ctx) { + HILOG(kDebug, "Writing {} bytes to {}", task->size_, path_); ssize_t count = pwrite(fd_, task->buf_, task->size_, (off_t)task->disk_off_); if (count != task->size_) { HELOG(kError, "BORG: wrote {} bytes, but expected {}: {}", @@ -64,6 +65,7 @@ class Server : public TaskLib { } void Read(ReadTask *task, RunContext &ctx) { + HILOG(kDebug, "Reading {} bytes from {}", task->size_, path_); ssize_t count = pread(fd_, task->buf_, task->size_, (off_t)task->disk_off_); if (count != task->size_) { HELOG(kError, "BORG: read {} bytes, but expected {}", diff --git a/tasks/ram_bdev/src/ram_bdev.cc b/tasks/ram_bdev/src/ram_bdev.cc index d5f2a042a..ec1ae7107 100644 --- a/tasks/ram_bdev/src/ram_bdev.cc +++ b/tasks/ram_bdev/src/ram_bdev.cc @@ -42,11 +42,13 @@ class Server : public TaskLib { } void Write(WriteTask *task, RunContext &ctx) { + HILOG(kDebug, "Writing {} bytes to RAM", task->size_); memcpy(mem_ptr_ + task->disk_off_, task->buf_, task->size_); task->SetModuleComplete(); } void Read(ReadTask *task, RunContext &ctx) { + HILOG(kDebug, "Reading {} bytes from RAM", task->size_); memcpy(task->buf_, mem_ptr_ + task->disk_off_, task->size_); task->SetModuleComplete(); } diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 164031069..a074e884d 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -109,12 +109,13 @@ TEST_CASE("TestHermesPutGet") { memset(blob.data(), i % 256, blob.size()); hermes::BlobId blob_id = bkt.Put(std::to_string(i), blob, ctx); HILOG(kInfo, "(iteration {}) Using BlobID: {}", i, blob_id); + } + for (size_t i = off; i < proc_count; ++i) { + HILOG(kInfo, "Iteration: {} with blob name {}", i, std::to_string(i)); // Get a blob hermes::Blob blob2; - bkt.Get(blob_id, blob2, ctx); - REQUIRE(blob.size() == blob2.size()); - REQUIRE(blob == blob2); + bkt.Get(std::to_string(i), blob2, ctx); } } } From 49a3e2356594dd0ad4b4b9d8501e4fe5376f186a Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 14:49:05 -0500 Subject: [PATCH 41/54] Use kDebug for jumping into function --- src/worker.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/worker.cc b/src/worker.cc index 1cabb5987..7a943839a 100644 --- a/src/worker.cc +++ b/src/worker.cc @@ -89,7 +89,7 @@ void Worker::PollGrouped(WorkEntry &work_entry) { task->SetStarted(); } ctx.jmp_ = bctx::jump_fcontext(ctx.jmp_.fctx, task); - HILOG(kInfo, "Jumping into function") + HILOG(kDebug, "Jumping into function") } else if (task->IsPreemptive()) { task->DisableRun(); entry->thread_ = LABSTOR_WORK_ORCHESTRATOR->SpawnAsyncThread(&Worker::RunPreemptive, task); From 3d263192500b4a5eb9d652ba0ace73c8e5599cd5 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 14:51:08 -0500 Subject: [PATCH 42/54] Add flags_ to putblob --- .../include/hermes_blob_mdm/hermes_blob_mdm_tasks.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 1f9213847..8cbc0be86 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -299,7 +299,7 @@ struct PutBlobTask : public Task, TaskFlags data_size_, domain_id_); task_serialize(ar); ar & xfer; - ar(tag_id_, blob_name_, blob_id_, blob_off_, data_size_, score_, flags_, filename_, page_size_); + ar(tag_id_, blob_name_, blob_id_, blob_off_, data_size_, score_, flags_, filename_, page_size_, flags_); } /** Deserialize message call */ @@ -309,7 +309,7 @@ struct PutBlobTask : public Task, TaskFlags task_serialize(ar); ar & xfer; data_ = HERMES_MEMORY_MANAGER->Convert(xfer.data_); - ar(tag_id_, blob_name_, blob_id_, blob_off_, data_size_, score_, flags_, filename_, page_size_); + ar(tag_id_, blob_name_, blob_id_, blob_off_, data_size_, score_, flags_, filename_, page_size_, flags_); } /** (De)serialize message return */ From c22b4abdde2ec51a7230e48d4a77b26273468c6e Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 14:54:20 -0500 Subject: [PATCH 43/54] Lower debug logging --- .../include/hermes_blob_mdm/hermes_blob_mdm.h | 2 -- .../include/hermes_blob_mdm/hermes_blob_mdm_tasks.h | 9 ++++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h index 16c8c3f70..9d82c52a6 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h @@ -128,13 +128,11 @@ class Client : public TaskLibClient { bitfield32_t flags, Context ctx = Context(), bitfield32_t task_flags = bitfield32_t(TASK_FIRE_AND_FORGET | TASK_DATA_OWNER | TASK_LOW_LATENCY)) { - HILOG(kDebug, "Beginning PUT (task_node={})", task_node); LABSTOR_CLIENT->ConstructTask( task, task_node, DomainId::GetNode(blob_id.node_id_), id_, tag_id, blob_name, blob_id, blob_off, blob_size, blob, score, flags, ctx, task_flags); - HILOG(kDebug, "Constructed PUT (task_node={})", task_node); } LABSTOR_TASK_NODE_PUSH_ROOT(PutBlob); diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 8cbc0be86..6e1e11717 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -254,7 +254,6 @@ struct PutBlobTask : public Task, TaskFlags const Context &ctx, bitfield32_t task_flags) : Task(alloc) { // Initialize task - HILOG(kDebug, "Beginning PUT task constructor") task_node_ = task_node; if (!blob_id.IsNull()) { lane_hash_ = blob_id.hash_; @@ -269,7 +268,6 @@ struct PutBlobTask : public Task, TaskFlags // Custom params tag_id_ = tag_id; - HILOG(kDebug, "Setting blob name {}", blob_name.str()); HSHM_MAKE_AR(blob_name_, alloc, blob_name); blob_id_ = blob_id; blob_off_ = blob_off; @@ -279,7 +277,8 @@ struct PutBlobTask : public Task, TaskFlags flags_ = flags; HSHM_MAKE_AR(filename_, alloc, ctx.filename_); page_size_ = ctx.page_size_; - HILOG(kDebug, "Finished setting blob name {}", blob_name.str()); + HILOG(kDebug, "Construct PUT task for {}, while getting BlobId is {}", + blob_name.str(), flags_.Any(HERMES_GET_BLOB_ID)); } /** Destructor */ @@ -299,7 +298,7 @@ struct PutBlobTask : public Task, TaskFlags data_size_, domain_id_); task_serialize(ar); ar & xfer; - ar(tag_id_, blob_name_, blob_id_, blob_off_, data_size_, score_, flags_, filename_, page_size_, flags_); + ar(tag_id_, blob_name_, blob_id_, blob_off_, data_size_, score_, flags_, filename_, page_size_); } /** Deserialize message call */ @@ -309,7 +308,7 @@ struct PutBlobTask : public Task, TaskFlags task_serialize(ar); ar & xfer; data_ = HERMES_MEMORY_MANAGER->Convert(xfer.data_); - ar(tag_id_, blob_name_, blob_id_, blob_off_, data_size_, score_, flags_, filename_, page_size_, flags_); + ar(tag_id_, blob_name_, blob_id_, blob_off_, data_size_, score_, flags_, filename_, page_size_); } /** (De)serialize message return */ From 8a1b8f0e6d7eb1c583e30aa02a11345ee19bfa33 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 14:58:55 -0500 Subject: [PATCH 44/54] Support ASYNC --- tasks/hermes/include/hermes/bucket.h | 34 +++++++++++++++------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/tasks/hermes/include/hermes/bucket.h b/tasks/hermes/include/hermes/bucket.h index 70438e28c..9e23c1fd9 100644 --- a/tasks/hermes/include/hermes/bucket.h +++ b/tasks/hermes/include/hermes/bucket.h @@ -207,7 +207,7 @@ class Bucket { /** * Put \a blob_name Blob into the bucket * */ - template + template HSHM_ALWAYS_INLINE BlobId BasePut(const std::string &blob_name, const Blob &blob, @@ -222,9 +222,11 @@ class Bucket { memcpy(data, blob.data(), blob.size()); // Put to shared memory hshm::charbuf blob_name_buf = hshm::to_charbuf(blob_name); - if (blob_id.IsNull()) { - flags.SetBits(HERMES_GET_BLOB_ID); - task_flags.UnsetBits(TASK_FIRE_AND_FORGET); + if constexpr (!ASYNC) { + if (blob_id.IsNull()) { + flags.SetBits(HERMES_GET_BLOB_ID); + task_flags.UnsetBits(TASK_FIRE_AND_FORGET); + } } LPointer> push_task; if constexpr(!PARTIAL) { @@ -249,7 +251,7 @@ class Bucket { /** * Put \a blob_name Blob into the bucket * */ - template + template HSHM_ALWAYS_INLINE BlobId SrlBasePut(const std::string &blob_name, const T &data, @@ -259,7 +261,7 @@ class Bucket { cereal::BinaryOutputArchive ar(ss); ar << data; Blob blob(ss.str()); - return BasePut(blob_name, blob, 0, orig_blob_id, ctx); + return BasePut(blob_name, blob, 0, orig_blob_id, ctx); } /** @@ -270,9 +272,9 @@ class Bucket { const T &blob, Context &ctx) { if (std::is_same_v) { - return BasePut(blob_name, blob, 0, BlobId::GetNull(), ctx); + return BasePut(blob_name, blob, 0, BlobId::GetNull(), ctx); } else { - return SrlBasePut(blob_name, blob, BlobId::GetNull(), ctx); + return SrlBasePut(blob_name, blob, BlobId::GetNull(), ctx); } } @@ -284,9 +286,9 @@ class Bucket { const T &blob, Context &ctx) { if (std::is_same_v) { - return BasePut("", blob, 0, blob_id, ctx); + return BasePut("", blob, 0, blob_id, ctx); } else { - return SrlBasePut("", blob, blob_id, ctx); + return SrlBasePut("", blob, blob_id, ctx); } } @@ -298,7 +300,7 @@ class Bucket { void AsyncPut(const std::string &blob_name, const T &blob, Context &ctx) { - Put(blob_name, blob, ctx); + Put(blob_name, blob, ctx); } /** @@ -309,7 +311,7 @@ class Bucket { void AsyncPut(const BlobId &blob_id, const T &blob, Context &ctx) { - Put(blob_id, blob, ctx); + Put(blob_id, blob, ctx); } /** @@ -319,7 +321,7 @@ class Bucket { const Blob &blob, size_t blob_off, Context &ctx) { - return BasePut(blob_name, blob, blob_off, BlobId::GetNull(), ctx); + return BasePut(blob_name, blob, blob_off, BlobId::GetNull(), ctx); } /** @@ -329,7 +331,7 @@ class Bucket { const Blob &blob, size_t blob_off, Context &ctx) { - return BasePut("", blob, blob_off, blob_id, ctx); + return BasePut("", blob, blob_off, blob_id, ctx); } /** @@ -339,7 +341,7 @@ class Bucket { const Blob &blob, size_t blob_off, Context &ctx) { - BasePut(blob_name, blob, blob_off, BlobId::GetNull(), ctx); + BasePut(blob_name, blob, blob_off, BlobId::GetNull(), ctx); } /** @@ -349,7 +351,7 @@ class Bucket { const Blob &blob, size_t blob_off, Context &ctx) { - BasePut("", blob, blob_off, blob_id, ctx); + BasePut("", blob, blob_off, blob_id, ctx); } /** From c47e91b36c24f389a359cfd973de5ee69d031208 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 15:05:04 -0500 Subject: [PATCH 45/54] Re-merge putget test --- tasks/hermes/include/hermes/bucket.h | 12 ++++-------- test/unit/hermes/test_bucket.cc | 6 +----- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/tasks/hermes/include/hermes/bucket.h b/tasks/hermes/include/hermes/bucket.h index 9e23c1fd9..3c796c2a4 100644 --- a/tasks/hermes/include/hermes/bucket.h +++ b/tasks/hermes/include/hermes/bucket.h @@ -228,17 +228,13 @@ class Bucket { task_flags.UnsetBits(TASK_FIRE_AND_FORGET); } } - LPointer> push_task; if constexpr(!PARTIAL) { flags.SetBits(HERMES_BLOB_REPLACE); - push_task = blob_mdm_->AsyncPutBlobRoot(id_, blob_name_buf, - blob_id, 0, blob.size(), p.shm_, ctx.blob_score_, - flags, ctx, task_flags); - } else { - push_task = blob_mdm_->AsyncPutBlobRoot(id_, blob_name_buf, - blob_id, blob_off, blob.size(), p.shm_, ctx.blob_score_, - flags, ctx, task_flags); } + LPointer> push_task; + push_task = blob_mdm_->AsyncPutBlobRoot(id_, blob_name_buf, + blob_id, blob_off, blob.size(), p.shm_, ctx.blob_score_, + flags, ctx, task_flags); if (flags.Any(HERMES_GET_BLOB_ID)) { push_task->Wait(); PutBlobTask *task = push_task->get(); diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index a074e884d..2ce662919 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -109,13 +109,9 @@ TEST_CASE("TestHermesPutGet") { memset(blob.data(), i % 256, blob.size()); hermes::BlobId blob_id = bkt.Put(std::to_string(i), blob, ctx); HILOG(kInfo, "(iteration {}) Using BlobID: {}", i, blob_id); - } - - for (size_t i = off; i < proc_count; ++i) { - HILOG(kInfo, "Iteration: {} with blob name {}", i, std::to_string(i)); // Get a blob hermes::Blob blob2; - bkt.Get(std::to_string(i), blob2, ctx); + bkt.Get(blob_id, blob2, ctx); } } } From 8edff15dfe114d302ecae85ce164f79448d54bbb Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 15:05:20 -0500 Subject: [PATCH 46/54] Re-merge putget test --- test/unit/hermes/test_bucket.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/unit/hermes/test_bucket.cc b/test/unit/hermes/test_bucket.cc index 2ce662919..b084edb29 100644 --- a/test/unit/hermes/test_bucket.cc +++ b/test/unit/hermes/test_bucket.cc @@ -112,6 +112,8 @@ TEST_CASE("TestHermesPutGet") { // Get a blob hermes::Blob blob2; bkt.Get(blob_id, blob2, ctx); + REQUIRE(blob.size() == blob2.size()); + REQUIRE(blob == blob2); } } } From 289dd564d22387943a3f226038acb5cbf8e818b5 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 15:31:39 -0500 Subject: [PATCH 47/54] Make it so task can be hashed using ID or name --- tasks/hermes/include/hermes/bucket.h | 4 +++- .../include/hermes_blob_mdm/hermes_blob_mdm.h | 4 ++-- .../hermes_blob_mdm/hermes_blob_mdm_tasks.h | 14 ++++++++++---- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 2 +- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/tasks/hermes/include/hermes/bucket.h b/tasks/hermes/include/hermes/bucket.h index 3c796c2a4..908fb463a 100644 --- a/tasks/hermes/include/hermes/bucket.h +++ b/tasks/hermes/include/hermes/bucket.h @@ -396,7 +396,8 @@ class Bucket { size_t data_size = blob.size(); LPointer data_p = LABSTOR_CLIENT->AllocateBuffer(data_size); LPointer> push_task; - push_task = blob_mdm_->AsyncGetBlobRoot(id_, blob_name, blob_id, blob_off, + push_task = blob_mdm_->AsyncGetBlobRoot(id_, hshm::to_charbuf(blob_name), + blob_id, blob_off, data_size, data_p.shm_, ctx, flags); return push_task; @@ -417,6 +418,7 @@ class Bucket { data_size = blob_mdm_->GetBlobSizeRoot(id_, orig_blob_id); blob.resize(data_size); } + HILOG(kInfo, "Getting blob of size {}", data_size); BlobId blob_id; LPointer> push_task; push_task = AsyncBaseGet(blob_name, orig_blob_id, blob, blob_off, ctx); diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h index 9d82c52a6..02cdd90f6 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h @@ -140,7 +140,7 @@ class Client : public TaskLibClient { void AsyncGetBlobConstruct(GetBlobTask *task, const TaskNode &task_node, const TagId &tag_id, - const std::string &blob_name, + const hshm::charbuf &blob_name, const BlobId &blob_id, size_t off, ssize_t data_size, @@ -160,7 +160,7 @@ class Client : public TaskLibClient { Context ctx = Context(), bitfield32_t flags = bitfield32_t(0)) { LPointer> push_task = - AsyncGetBlobRoot(tag_id, "", blob_id, off, data_size, data, ctx, flags); + AsyncGetBlobRoot(tag_id, hshm::charbuf(""), blob_id, off, data_size, data, ctx, flags); push_task->Wait(); GetBlobTask *task = push_task->get(); data = task->data_; diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index 6e1e11717..b043127a1 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -257,14 +257,15 @@ struct PutBlobTask : public Task, TaskFlags task_node_ = task_node; if (!blob_id.IsNull()) { lane_hash_ = blob_id.hash_; + domain_id_ = domain_id; } else { lane_hash_ = std::hash{}(blob_name); + domain_id_ = DomainId::GetNode(HASH_TO_NODE_ID(lane_hash_)); } prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kPutBlob; task_flags_ = task_flags; - domain_id_ = domain_id; // Custom params tag_id_ = tag_id; @@ -362,7 +363,7 @@ struct GetBlobTask : public Task, TaskFlags const DomainId &domain_id, const TaskStateId &state_id, const TagId &tag_id, - const std::string &blob_name, + const hshm::charbuf &blob_name, const BlobId &blob_id, size_t off, ssize_t data_size, @@ -371,12 +372,17 @@ struct GetBlobTask : public Task, TaskFlags bitfield32_t flags) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kGetBlob; task_flags_.SetBits(TASK_LOW_LATENCY); - domain_id_ = domain_id; + if (!blob_id.IsNull()) { + lane_hash_ = blob_id.hash_; + domain_id_ = domain_id; + } else { + lane_hash_ = std::hash{}(blob_name); + domain_id_ = DomainId::GetNode(HASH_TO_NODE_ID(lane_hash_)); + } // Custom params tag_id_ = tag_id; diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index b324db3b3..0f2d5c604 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -668,7 +668,7 @@ class Server : public TaskLib { task->data_size_ = blob_info.blob_size_; task->get_task_ = blob_mdm_.AsyncGetBlob(task->task_node_ + 1, task->tag_id_, - "", + hshm::charbuf(""), task->blob_id_, 0, task->data_size_, From aff44a6072c2bfadba000fcc92c70155debaefc9 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 15:53:12 -0500 Subject: [PATCH 48/54] Remove some unneeded info logs --- include/labstor/network/rpc_thallium.h | 4 ++-- tasks/hermes/include/hermes/bucket.h | 2 +- test/unit/boost/test_boost.cc | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/labstor/network/rpc_thallium.h b/include/labstor/network/rpc_thallium.h index bc504fede..667ddf278 100644 --- a/include/labstor/network/rpc_thallium.h +++ b/include/labstor/network/rpc_thallium.h @@ -235,7 +235,7 @@ class ThalliumRpc { // The "local_bulk" object will only be read from flag = tl::bulk_mode::read_only; // flag = tl::bulk_mode::read_write; - HILOG(kInfo, "(node {}) Reading {} bytes from the server", + HILOG(kDebug, "(node {}) Reading {} bytes from the server", rpc_->node_id_, size) break; } @@ -243,7 +243,7 @@ class ThalliumRpc { // The "local_bulk" object will only be written to flag = tl::bulk_mode::write_only; // flag = tl::bulk_mode::read_write; - HILOG(kInfo, "(node {}) Writing {} bytes to the server", + HILOG(kDebug, "(node {}) Writing {} bytes to the server", rpc_->node_id_, size) break; } diff --git a/tasks/hermes/include/hermes/bucket.h b/tasks/hermes/include/hermes/bucket.h index 908fb463a..e799f0660 100644 --- a/tasks/hermes/include/hermes/bucket.h +++ b/tasks/hermes/include/hermes/bucket.h @@ -418,7 +418,7 @@ class Bucket { data_size = blob_mdm_->GetBlobSizeRoot(id_, orig_blob_id); blob.resize(data_size); } - HILOG(kInfo, "Getting blob of size {}", data_size); + HILOG(kDebug, "Getting blob of size {}", data_size); BlobId blob_id; LPointer> push_task; push_task = AsyncBaseGet(blob_name, orig_blob_id, blob, blob_off, ctx); diff --git a/test/unit/boost/test_boost.cc b/test/unit/boost/test_boost.cc index 6e53672b9..84c517341 100644 --- a/test/unit/boost/test_boost.cc +++ b/test/unit/boost/test_boost.cc @@ -77,7 +77,6 @@ bctx::transfer_t shared_xfer; void f3( bctx::transfer_t t) { ++value1; shared_xfer = t; - HILOG(kInfo, "Aasfasfak;asdf {}", value1) shared_xfer = bctx::jump_fcontext(shared_xfer.fctx, 0); ++value1; shared_xfer = bctx::jump_fcontext( shared_xfer.fctx, shared_xfer.data); From fc128f3ed099f0f7a0b3a5ffdd137780f7d759a4 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 16:11:49 -0500 Subject: [PATCH 49/54] Allow GetBlobSize to be done using the blob name directly --- tasks/hermes/include/hermes/bucket.h | 11 ++++++-- .../include/hermes_blob_mdm/hermes_blob_mdm.h | 6 +++-- .../hermes_blob_mdm/hermes_blob_mdm_tasks.h | 25 ++++++++++++++----- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 6 +++++ 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/tasks/hermes/include/hermes/bucket.h b/tasks/hermes/include/hermes/bucket.h index e799f0660..1a97f7bd5 100644 --- a/tasks/hermes/include/hermes/bucket.h +++ b/tasks/hermes/include/hermes/bucket.h @@ -374,7 +374,14 @@ class Bucket { * Get the current size of the blob in the bucket * */ size_t GetBlobSize(const BlobId &blob_id) { - return blob_mdm_->GetBlobSizeRoot(id_, blob_id); + return blob_mdm_->GetBlobSizeRoot(id_, hshm::charbuf(""), blob_id); + } + + /** + * Get the current size of the blob in the bucket + * */ + size_t GetBlobSize(const std::string &name) { + return blob_mdm_->GetBlobSizeRoot(id_, hshm::charbuf(name), BlobId::GetNull()); } /** @@ -415,7 +422,7 @@ class Bucket { // TODO(llogan): make GetBlobSize work with blob_name size_t data_size = blob.size(); if (blob.size() == 0) { - data_size = blob_mdm_->GetBlobSizeRoot(id_, orig_blob_id); + data_size = blob_mdm_->GetBlobSizeRoot(id_, hshm::charbuf(blob_name), orig_blob_id); blob.resize(data_size); } HILOG(kDebug, "Getting blob of size {}", data_size); diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h index 02cdd90f6..9b82d620f 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm.h @@ -293,16 +293,18 @@ class Client : public TaskLibClient { void AsyncGetBlobSizeConstruct(GetBlobSizeTask *task, const TaskNode &task_node, const TagId &tag_id, + const hshm::charbuf &blob_name, const BlobId &blob_id) { HILOG(kDebug, "Getting blob size {}", task_node); LABSTOR_CLIENT->ConstructTask( task, task_node, DomainId::GetNode(blob_id.node_id_), id_, - tag_id, blob_id); + tag_id, blob_name, blob_id); } size_t GetBlobSizeRoot(const TagId &tag_id, + const hshm::charbuf &blob_name, const BlobId &blob_id) { LPointer> push_task = - AsyncGetBlobSizeRoot(tag_id, blob_id); + AsyncGetBlobSizeRoot(tag_id, blob_name, blob_id); push_task->Wait(); GetBlobSizeTask *task = push_task->get(); size_t size = task->size_; diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index b043127a1..b3638ee85 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -255,6 +255,10 @@ struct PutBlobTask : public Task, TaskFlags bitfield32_t task_flags) : Task(alloc) { // Initialize task task_node_ = task_node; + prio_ = TaskPrio::kLowLatency; + task_state_ = state_id; + method_ = Method::kPutBlob; + task_flags_ = task_flags; if (!blob_id.IsNull()) { lane_hash_ = blob_id.hash_; domain_id_ = domain_id; @@ -262,10 +266,6 @@ struct PutBlobTask : public Task, TaskFlags lane_hash_ = std::hash{}(blob_name); domain_id_ = DomainId::GetNode(HASH_TO_NODE_ID(lane_hash_)); } - prio_ = TaskPrio::kLowLatency; - task_state_ = state_id; - method_ = Method::kPutBlob; - task_flags_ = task_flags; // Custom params tag_id_ = tag_id; @@ -686,6 +686,7 @@ struct GetBlobNameTask : public Task, TaskFlags { /** Get \a score from \a blob_id BLOB id */ struct GetBlobSizeTask : public Task, TaskFlags { IN TagId tag_id_; + IN hipc::ShmArchive blob_name_; IN BlobId blob_id_; OUT size_t size_; @@ -700,21 +701,33 @@ struct GetBlobSizeTask : public Task, TaskFlags { const DomainId &domain_id, const TaskStateId &state_id, const TagId &tag_id, + const hshm::charbuf &blob_name, const BlobId &blob_id) : Task(alloc) { // Initialize task task_node_ = task_node; - lane_hash_ = blob_id.hash_; prio_ = TaskPrio::kLowLatency; task_state_ = state_id; method_ = Method::kGetBlobSize; task_flags_.SetBits(TASK_LOW_LATENCY); - domain_id_ = domain_id; + if (!blob_id.IsNull()) { + lane_hash_ = blob_id.hash_; + domain_id_ = domain_id; + } else { + lane_hash_ = std::hash{}(blob_name); + domain_id_ = DomainId::GetNode(HASH_TO_NODE_ID(lane_hash_)); + } // Custom tag_id_ = tag_id; + HSHM_MAKE_AR(blob_name_, alloc, blob_name) blob_id_ = blob_id; } + /** Destructor */ + ~GetBlobSizeTask() { + HSHM_DESTROY_AR(blob_name_) + } + /** (De)serialize message call */ template void SerializeStart(Ar &ar) { diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 0f2d5c604..1d3778fea 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -525,6 +525,12 @@ class Server : public TaskLib { * Get \a score from \a blob_id BLOB id * */ void GetBlobSize(GetBlobSizeTask *task, RunContext &ctx) { + if (task->blob_id_.IsNull()) { + bitfield32_t flags; + task->blob_id_ = GetOrCreateBlobId(task->tag_id_, task->lane_hash_, + hshm::to_charbuf(*task->blob_name_), + ctx, flags); + } BLOB_MAP_T &blob_map = blob_map_[ctx.lane_id_]; auto it = blob_map.find(task->blob_id_); if (it == blob_map.end()) { From 26efc0f076da00a3d88f267349e9fa8aa7b24d89 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 16:33:40 -0500 Subject: [PATCH 50/54] Add blob_name to GetBlob and GetBlobSize --- .../include/hermes_blob_mdm/hermes_blob_mdm_tasks.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h index b3638ee85..e487d23c3 100644 --- a/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h +++ b/tasks/hermes_blob_mdm/include/hermes_blob_mdm/hermes_blob_mdm_tasks.h @@ -410,7 +410,7 @@ struct GetBlobTask : public Task, TaskFlags data_size_, domain_id_); task_serialize(ar); ar & xfer; - ar(tag_id_, blob_id_, blob_off_, data_size_, filename_, page_size_, flags_); + ar(tag_id_, blob_name_, blob_id_, blob_off_, data_size_, filename_, page_size_, flags_); } /** Deserialize message call */ @@ -420,7 +420,7 @@ struct GetBlobTask : public Task, TaskFlags task_serialize(ar); ar & xfer; data_ = HERMES_MEMORY_MANAGER->Convert(xfer.data_); - ar(tag_id_, blob_id_, blob_off_, data_size_, filename_, page_size_, flags_); + ar(tag_id_, blob_name_, blob_id_, blob_off_, data_size_, filename_, page_size_, flags_); } /** (De)serialize message return */ @@ -732,7 +732,7 @@ struct GetBlobSizeTask : public Task, TaskFlags { template void SerializeStart(Ar &ar) { task_serialize(ar); - ar(tag_id_, blob_id_); + ar(tag_id_, blob_name_, blob_id_); } /** (De)serialize message return */ From 5e9b6d7b8fa6fe1bdef180e59c7f912ca719cc92 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 17:22:29 -0500 Subject: [PATCH 51/54] Try asynchronous push --- benchmark/hermes_api_bench.cc | 2 +- tasks/hermes/include/hermes/bucket.h | 44 ++++++++++++++-------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/benchmark/hermes_api_bench.cc b/benchmark/hermes_api_bench.cc index 15e7c8856..a97311a10 100644 --- a/benchmark/hermes_api_bench.cc +++ b/benchmark/hermes_api_bench.cc @@ -48,7 +48,7 @@ void PutTest(int nprocs, int rank, for (size_t i = 0; i < blobs_per_rank; ++i) { size_t blob_name_int = rank * blobs_per_rank + i; std::string name = std::to_string(blob_name_int); - bkt.Put(name, blob, ctx); + bkt.AsyncPut(name, blob, ctx); } } t.Pause(); diff --git a/tasks/hermes/include/hermes/bucket.h b/tasks/hermes/include/hermes/bucket.h index 1a97f7bd5..67ade3690 100644 --- a/tasks/hermes/include/hermes/bucket.h +++ b/tasks/hermes/include/hermes/bucket.h @@ -210,9 +210,9 @@ class Bucket { template HSHM_ALWAYS_INLINE BlobId BasePut(const std::string &blob_name, + const BlobId &orig_blob_id, const Blob &blob, size_t blob_off, - const BlobId &orig_blob_id, Context &ctx) { BlobId blob_id = orig_blob_id; bitfield32_t flags, task_flags(TASK_FIRE_AND_FORGET | TASK_DATA_OWNER | TASK_LOW_LATENCY); @@ -235,11 +235,13 @@ class Bucket { push_task = blob_mdm_->AsyncPutBlobRoot(id_, blob_name_buf, blob_id, blob_off, blob.size(), p.shm_, ctx.blob_score_, flags, ctx, task_flags); - if (flags.Any(HERMES_GET_BLOB_ID)) { - push_task->Wait(); - PutBlobTask *task = push_task->get(); - blob_id = task->blob_id_; - LABSTOR_CLIENT->DelTask(push_task); + if constexpr (!ASYNC) { + if (flags.Any(HERMES_GET_BLOB_ID)) { + push_task->Wait(); + PutBlobTask *task = push_task->get(); + blob_id = task->blob_id_; + LABSTOR_CLIENT->DelTask(push_task); + } } return blob_id; } @@ -250,14 +252,14 @@ class Bucket { template HSHM_ALWAYS_INLINE BlobId SrlBasePut(const std::string &blob_name, - const T &data, const BlobId &orig_blob_id, + const T &data, Context &ctx) { std::stringstream ss; cereal::BinaryOutputArchive ar(ss); ar << data; Blob blob(ss.str()); - return BasePut(blob_name, blob, 0, orig_blob_id, ctx); + return BasePut(blob_name, orig_blob_id, blob, 0, ctx); } /** @@ -268,9 +270,9 @@ class Bucket { const T &blob, Context &ctx) { if (std::is_same_v) { - return BasePut(blob_name, blob, 0, BlobId::GetNull(), ctx); + return BasePut(blob_name, BlobId::GetNull(), blob, 0, ctx); } else { - return SrlBasePut(blob_name, blob, BlobId::GetNull(), ctx); + return SrlBasePut(blob_name, BlobId::GetNull(), blob, ctx); } } @@ -282,32 +284,30 @@ class Bucket { const T &blob, Context &ctx) { if (std::is_same_v) { - return BasePut("", blob, 0, blob_id, ctx); + return BasePut("", blob_id, blob, 0, ctx); } else { - return SrlBasePut("", blob, blob_id, ctx); + return SrlBasePut("", blob_id, blob, ctx); } } /** * Put \a blob_name Blob into the bucket * */ - template HSHM_ALWAYS_INLINE void AsyncPut(const std::string &blob_name, - const T &blob, + const Blob &blob, Context &ctx) { - Put(blob_name, blob, ctx); + BasePut(blob_name, BlobId::GetNull(), blob, 0, ctx); } /** * Put \a blob_id Blob into the bucket * */ - template HSHM_ALWAYS_INLINE void AsyncPut(const BlobId &blob_id, - const T &blob, + const Blob &blob, Context &ctx) { - Put(blob_id, blob, ctx); + BasePut("", BlobId::GetNull(), blob, 0, ctx); } /** @@ -317,7 +317,7 @@ class Bucket { const Blob &blob, size_t blob_off, Context &ctx) { - return BasePut(blob_name, blob, blob_off, BlobId::GetNull(), ctx); + return BasePut(blob_name, BlobId::GetNull(), blob, blob_off, ctx); } /** @@ -327,7 +327,7 @@ class Bucket { const Blob &blob, size_t blob_off, Context &ctx) { - return BasePut("", blob, blob_off, blob_id, ctx); + return BasePut("", blob_id, blob, blob_off, ctx); } /** @@ -337,7 +337,7 @@ class Bucket { const Blob &blob, size_t blob_off, Context &ctx) { - BasePut(blob_name, blob, blob_off, BlobId::GetNull(), ctx); + BasePut(blob_name, BlobId::GetNull(), blob, blob_off, ctx); } /** @@ -347,7 +347,7 @@ class Bucket { const Blob &blob, size_t blob_off, Context &ctx) { - BasePut("", blob, blob_off, blob_id, ctx); + BasePut("", blob_id, blob, blob_off, ctx); } /** From 78bf9ac318fdb64e363a14ea6a04b6a2e72ab46c Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 17:43:21 -0500 Subject: [PATCH 52/54] dont rely on HERMES_GET_BLOB_ID flag --- tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc index 1d3778fea..df2077126 100644 --- a/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc +++ b/tasks/hermes_blob_mdm/src/hermes_blob_mdm.cc @@ -145,7 +145,7 @@ class Server : public TaskLib { // Get the blob info data structure hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); - if (task->flags_.Any(HERMES_GET_BLOB_ID)) { + if (task->blob_id_.IsNull()) { task->blob_id_ = GetOrCreateBlobId(task->tag_id_, task->lane_hash_, blob_name, ctx, task->flags_); } @@ -375,7 +375,7 @@ class Server : public TaskLib { } void GetBlobGetPhase(GetBlobTask *task, RunContext &ctx) { - if (task->flags_.Any(HERMES_GET_BLOB_ID)) { + if (task->blob_id_.IsNull()) { hshm::charbuf blob_name = hshm::to_charbuf(*task->blob_name_); task->blob_id_ = GetOrCreateBlobId(task->tag_id_, task->lane_hash_, blob_name, ctx, task->flags_); From b1d8f8ebffdd412b2e704861498d0f988850b0f1 Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Sun, 24 Sep 2023 23:58:28 -0500 Subject: [PATCH 53/54] Include blob size --- benchmark/hermes_api_bench.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/hermes_api_bench.cc b/benchmark/hermes_api_bench.cc index a97311a10..018ac7c11 100644 --- a/benchmark/hermes_api_bench.cc +++ b/benchmark/hermes_api_bench.cc @@ -68,7 +68,7 @@ void GetTest(int nprocs, int rank, t.Resume(); for (int j = 0; j < repeat; ++j) { for (size_t i = 0; i < blobs_per_rank; ++i) { - hermes::Blob ret; + hermes::Blob ret(blob_size); size_t blob_name_int = rank * blobs_per_rank + i; std::string name = std::to_string(blob_name_int); bkt.Get(name, ret, ctx); From 2eb22d1952cd45f324f1587ba5b0a06c1ac6c58c Mon Sep 17 00:00:00 2001 From: lukemartinlogan Date: Mon, 25 Sep 2023 00:25:13 -0500 Subject: [PATCH 54/54] Use async partial put in filesystem adapter --- tasks/hermes_adapters/filesystem/filesystem.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/hermes_adapters/filesystem/filesystem.cc b/tasks/hermes_adapters/filesystem/filesystem.cc index 44d3a0a45..00f2656f8 100644 --- a/tasks/hermes_adapters/filesystem/filesystem.cc +++ b/tasks/hermes_adapters/filesystem/filesystem.cc @@ -130,7 +130,7 @@ size_t Filesystem::Write(File &f, AdapterStat &stat, const void *ptr, const Blob page((const char*)ptr + data_offset, p.blob_size_); if (!is_append) { std::string blob_name(p.CreateBlobName().str()); - bkt.PartialPut(blob_name, page, p.blob_off_, ctx); + bkt.AsyncPartialPut(blob_name, page, p.blob_off_, ctx); } else { bkt.Append(page, stat.page_size_, ctx); }