From 87c8c022e4c7bb2957c4c840578fec4f07f78b34 Mon Sep 17 00:00:00 2001
From: vegetableysm <108774481+vegetableysm@users.noreply.github.com>
Date: Fri, 9 Aug 2024 13:29:01 +0800
Subject: [PATCH] Integration RDMA module with llm cache. (#1963)

Fixes #1942

Signed-off-by: vegetableysm <yuanshumin.ysm@alibaba-inc.com>
---
 modules/llm-cache/ds/kv_cache_manager.cc      |  46 +++
 modules/llm-cache/ds/kv_cache_manager.h       |  14 +
 modules/llm-cache/ds/vineyard_file.cc         | 381 ++++++++++++++++++
 modules/llm-cache/ds/vineyard_file.h          | 109 +++++
 modules/llm-cache/storage/file_storage.cc     | 229 ++++++++++-
 modules/llm-cache/storage/file_storage.h      |  28 +-
 .../llm-cache/storage/local_file_storage.cc   |   4 +-
 .../llm-cache/storage/local_file_storage.h    |   2 +-
 modules/llm-cache/storage/storage.h           |  19 +
 .../storage/vineyard_file_storage.cc          | 262 ++++++++++++
 .../llm-cache/storage/vineyard_file_storage.h | 143 +++++++
 python/vineyard/llm/cache.cc                  |  57 +++
 python/vineyard/llm/cache.py                  |  29 +-
 python/vineyard/llm/tests/test_llm.py         |  72 ++++
 src/client/client.cc                          |   6 +
 src/client/client.h                           |   3 +
 src/client/ds/object_meta.cc                  |   6 +
 src/client/ds/object_meta.h                   |   8 +
 src/client/rpc_client.cc                      |  73 +++-
 src/client/rpc_client.h                       |  13 +-
 20 files changed, 1469 insertions(+), 35 deletions(-)
 create mode 100644 modules/llm-cache/ds/vineyard_file.cc
 create mode 100644 modules/llm-cache/ds/vineyard_file.h
 create mode 100644 modules/llm-cache/storage/vineyard_file_storage.cc
 create mode 100644 modules/llm-cache/storage/vineyard_file_storage.h
diff --git a/modules/llm-cache/ds/kv_cache_manager.cc b/modules/llm-cache/ds/kv_cache_manager.cc
index 659db8c090..88be920f11 100644
--- a/modules/llm-cache/ds/kv_cache_manager.cc
+++ b/modules/llm-cache/ds/kv_cache_manager.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "llm-cache/ds/kv_cache_manager.h"
 #include "llm-cache/storage/blob_storage.h"
 #include "llm-cache/storage/local_file_storage.h"
+#include "llm-cache/storage/vineyard_file_storage.h"
 
 namespace vineyard {
 
@@ -88,6 +89,33 @@ Status KVCacheManager::Make(std::shared_ptr<KVCacheManager>& manager,
   return Status::OK();
 }
 
+Status KVCacheManager::Make(RPCClient& rpc_client, Client& ipc_client,
+                            std::shared_ptr<KVCacheManager>& manager,
+                            FileCacheConfig& config) {
+  if (config.chunkSize <= 0 || config.hashChunkSize <= 0) {
+    return Status::Invalid("Invalid batch size or split number.");
+  }
+  if (config.tensorByte <= 0 || config.cacheCapacity <= 0 ||
+      config.layer <= 0) {
+    return Status::Invalid("Invalid tensor byte, cache capacity or layer.");
+  }
+
+  std::shared_ptr<FileStorage> file_storage;
+  if (config.filesystemType == FilesystemType::VINEYARD) {
+    file_storage = std::make_shared<VineyardFileStorage>(
+        rpc_client, ipc_client, config.tensorByte, config.cacheCapacity,
+        config.layer, config.chunkSize, config.hashChunkSize, config.root,
+        config.gcInterval, config.ttl, config.enbaleGlobalGC,
+        config.globalGCInterval, config.globalTTL);
+  } else {
+    return Status::Invalid("Unsupported filesystem type");
+  }
+  manager = std::make_shared<KVCacheManager>(file_storage);
+  RETURN_ON_ERROR(file_storage->Init());
+  manager->config = std::make_shared<FileCacheConfig>(config);
+  return Status::OK();
+}
+
 /**
  * @brief Update the kv state with the given token list in the kv state cache
  * manager.
@@ -250,6 +278,17 @@ Status KVCacheManager::Update(
   return storage->Update(tokenList, nextToken, kvState);
 }
 
+Status KVCacheManager::BatchedUpdate(
+    const std::vector<int>& tokenList,
+    const std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
+    size_t& updated) {
+  if (kvCacheList.size() != tokenList.size()) {
+    return Status::Invalid("Token list size not match kv state list size");
+  }
+
+  return storage->BatchedUpdate(tokenList, kvCacheList, updated);
+}
+
 /**
  * @brief Query the kv state with the given token list in the kv state cache
  * manager.
@@ -400,6 +439,13 @@ Status KVCacheManager::Query(
   return storage->Query(prefix, tokenList, kvCacheList, matched);
 }
 
+Status KVCacheManager::BatchedQuery(
+    const std::vector<int>& tokenList,
+    std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
+    size_t& matched) {
+  return storage->BatchedQuery(tokenList, kvCacheList, matched);
+}
+
 Status KVCacheManager::ClearGlobalCache(Client& client,
                                         VineyardCacheConfig& config) {
   return BlobStorage::ClearGlobalCache(client, config.llmCacheSyncLock,
diff --git a/modules/llm-cache/ds/kv_cache_manager.h b/modules/llm-cache/ds/kv_cache_manager.h
index 8cccabc8f4..073994a261 100644
--- a/modules/llm-cache/ds/kv_cache_manager.h
+++ b/modules/llm-cache/ds/kv_cache_manager.h
@@ -41,6 +41,10 @@ class KVCacheManager {
   static Status Make(std::shared_ptr<KVCacheManager>& manager,
                      FileCacheConfig& config);
 
+  static Status Make(RPCClient& rpc_client, Client& ipc_client,
+                     std::shared_ptr<KVCacheManager>& manager,
+                     FileCacheConfig& config);
+
   Status Update(const std::vector<int>& tokenList, int nextToken,
                 const std::vector<std::pair<LLMKV, LLMKV>>& kvState);
 
@@ -54,6 +58,11 @@ class KVCacheManager {
       const std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
       size_t& updated);
 
+  Status BatchedUpdate(
+      const std::vector<int>& tokenList,
+      const std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
+      size_t& updated);
+
   Status Query(const std::vector<int>& tokenList,
                std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
                size_t& matched);
@@ -66,6 +75,11 @@ class KVCacheManager {
                std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
                size_t& matched);
 
+  Status BatchedQuery(
+      const std::vector<int>& tokenList,
+      std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
+      size_t& matched);
+
   void Close();
 
   void StopGlobalGCThread();
diff --git a/modules/llm-cache/ds/vineyard_file.cc b/modules/llm-cache/ds/vineyard_file.cc
new file mode 100644
index 0000000000..c2b4a7ab58
--- /dev/null
+++ b/modules/llm-cache/ds/vineyard_file.cc
@@ -0,0 +1,381 @@
+/** Copyright 2020-2023 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <regex>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "client/client.h"
+#include "client/ds/blob.h"
+#include "client/ds/object_meta.h"
+#include "client/ds/remote_blob.h"
+#include "client/rpc_client.h"
+#include "common/util/logging.h"
+#include "llm-cache/ds/vineyard_file.h"
+#include "llm-cache/thread_group.h"
+
+namespace vineyard {
+
+void VineyardFile::Construct(const ObjectMeta& meta) {
+  Object::Construct(meta);
+  if (meta_.GetTypeName() != type_name<VineyardFile>()) {
+    return;
+  }
+  this->path_ = meta_.GetKeyValue("path");
+  this->access_time_ = meta_.GetKeyValue<uint64_t>("access_time");
+  ObjectMeta blob_meta;
+  meta_.GetMemberMeta("buffer", blob_meta);
+  ObjectID blob_id = blob_meta.GetId();
+  meta.GetBuffer(blob_id, buffer_);
+}
+
+Status VineyardFile::Read(void* buffer, size_t size, size_t offset) {
+  if (buffer == nullptr) {
+    return Status::Invalid("Buffer is nullptr");
+  }
+  if (static_cast<int64_t>(offset + size) > buffer_->size()) {
+    return Status::Invalid("Read out of range");
+  }
+  memcpy(buffer, buffer_->data() + offset, size);
+  return Status::OK();
+}
+
+Status VineyardFile::Make(std::shared_ptr<VineyardFile>& file,
+                          RPCClient& rpc_client, Client& ipc_client,
+                          std::string path) {
+  std::string origin_path = std::regex_replace(path, std::regex("/+"), "\\/");
+  ObjectID file_id;
+  ObjectMeta meta;
+  ObjectMeta object_meta;
+  std::shared_ptr<Object> object;
+  if (ipc_client.Connected()) {
+    if (!ipc_client.GetName(origin_path, file_id, false).ok()) {
+      return Status::IOError("File " + path + " is not exist.");
+    }
+    ipc_client.GetMetaData(file_id, meta, true);
+    if (meta.GetInstanceId() == ipc_client.instance_id()) {
+      object = ipc_client.GetObject(file_id);
+      file = std::dynamic_pointer_cast<VineyardFile>(object);
+      if (file->buffer_ == nullptr) {
+        return Status::IOError("File " + path + " is not exist.");
+      }
+      return Status::OK();
+    } else {
+      RETURN_ON_ERROR(rpc_client.GetMetaData(file_id, object_meta, true));
+    }
+  } else {
+    if (!rpc_client.GetName(origin_path, file_id, false).ok()) {
+      return Status::IOError("File " + path + " is not exist.");
+    }
+    RETURN_ON_ERROR(rpc_client.GetMetaData(file_id, object_meta, true));
+  }
+
+  std::map<InstanceID, json> cluster_info;
+  rpc_client.ClusterInfo(cluster_info);
+  if (object_meta.GetInstanceId() == rpc_client.remote_instance_id()) {
+    object = rpc_client.GetObject(file_id);
+  } else {
+    std::string rpc_endpoint =
+        cluster_info[object_meta.GetInstanceId()]["rpc_endpoint"]
+            .get<std::string>();
+    std::string rdma_endpoint =
+        cluster_info[object_meta.GetInstanceId()]["rdma_endpoint"]
+            .get<std::string>();
+    RPCClient remote_rpc_client;
+    RETURN_ON_ERROR(
+        remote_rpc_client.Connect(rpc_endpoint, "", "", rdma_endpoint));
+    object = remote_rpc_client.GetObject(file_id);
+    ObjectID buffer_id = object_meta.GetMember("buffer")->id();
+    std::shared_ptr<RemoteBlob> blob;
+    remote_rpc_client.GetRemoteBlob(buffer_id, blob);
+    std::dynamic_pointer_cast<VineyardFile>(object)->buffer_ = blob->Buffer();
+  }
+
+  file = std::dynamic_pointer_cast<VineyardFile>(object);
+  if (file->buffer_ == nullptr) {
+    return Status::IOError("File " + path + " is not exist.");
+  }
+  return Status::OK();
+}
+
+Status VineyardFile::BatchedGetObjects(
+    Client& client, RPCClient& rpc_client,
+    std::map<InstanceID, std::vector<ObjectMeta>>& instance_to_metas,
+    std::unordered_map<ObjectID, std::shared_ptr<VineyardFile>>& id_to_files) {
+  std::map<InstanceID, json> cluster_info;
+  rpc_client.ClusterInfo(cluster_info);
+  auto fn = [&](std::pair<const InstanceID, std::vector<ObjectMeta>>&
+                    instance_to_meta) -> Status {
+    std::vector<std::shared_ptr<Object>> file_objects;
+    if (client.Connected() && instance_to_meta.first == client.instance_id()) {
+      std::vector<ObjectID> ids(instance_to_meta.second.size());
+      for (size_t i = 0; i < instance_to_meta.second.size(); ++i) {
+        ids[i] = instance_to_meta.second[i].GetId();
+      }
+      instance_to_meta.second.clear();
+      client.GetMetaData(ids, instance_to_meta.second, false);
+      file_objects = client.GetObjects(instance_to_meta.second);
+    } else {
+      if (rpc_client.remote_instance_id() == instance_to_meta.first) {
+        std::vector<ObjectID> ids(instance_to_meta.second.size());
+        for (size_t i = 0; i < instance_to_meta.second.size(); ++i) {
+          ids[i] = instance_to_meta.second[i].GetId();
+        }
+        instance_to_meta.second.clear();
+        rpc_client.GetMetaData(ids, instance_to_meta.second, false);
+        RETURN_ON_ERROR(rpc_client.BatchedGetObjects(instance_to_meta.second,
+                                                     file_objects));
+      } else {
+        std::vector<ObjectID> ids(instance_to_meta.second.size());
+        for (size_t i = 0; i < instance_to_meta.second.size(); ++i) {
+          ids[i] = instance_to_meta.second[i].GetId();
+        }
+        std::string rpc_endpoint =
+            cluster_info[instance_to_meta.first]["rpc_endpoint"]
+                .get<std::string>();
+        std::string rdma_endpoint =
+            cluster_info[instance_to_meta.first]["rdma_endpoint"]
+                .get<std::string>();
+        RPCClient remote_rpc_client;
+        RETURN_ON_ERROR(
+            remote_rpc_client.Connect(rpc_endpoint, "", "", rdma_endpoint));
+
+        /*
+         * Because the GetMeta will not set buffer that is not created by the
+         * caller rpc_client, so we need to get meta again.
+         */
+        instance_to_meta.second.clear();
+        remote_rpc_client.GetMetaData(ids, instance_to_meta.second, false);
+        RETURN_ON_ERROR(remote_rpc_client.BatchedGetObjects(
+            instance_to_meta.second, file_objects));
+      }
+    }
+    for (size_t i = 0; i < instance_to_meta.second.size(); ++i) {
+      id_to_files[instance_to_meta.second[i].GetId()] =
+          std::dynamic_pointer_cast<VineyardFile>(file_objects[i]);
+    }
+    return Status::OK();
+  };
+
+  parallel::ThreadGroup tg(
+      std::min(instance_to_metas.size(),
+               static_cast<size_t>(std::thread::hardware_concurrency())));
+  std::vector<parallel::ThreadGroup::tid_t> tids(instance_to_metas.size());
+  int index = 0;
+  for (auto& instance_to_meta : instance_to_metas) {
+    tids[index] = tg.AddTask(fn, instance_to_meta);
+    index++;
+  }
+
+  std::vector<Status> taskResults(instance_to_metas.size(), Status::OK());
+  for (size_t i = 0; i < instance_to_metas.size(); ++i) {
+    taskResults[i] = tg.TaskResult(tids[i]);
+  }
+
+  return Status::OK();
+}
+
+Status VineyardFile::BatchedMake(
+    std::vector<std::shared_ptr<VineyardFile>>& files, RPCClient& rpc_client,
+    Client& ipc_client, const std::vector<std::string>& paths) {
+  std::vector<std::string> origin_paths;
+  std::vector<ObjectID> file_ids;
+
+  for (auto const& path : paths) {
+    origin_paths.push_back(std::regex_replace(path, std::regex("/+"), "\\/"));
+  }
+
+  std::vector<ObjectMeta> file_metas;
+  std::map<InstanceID, vineyard::json> clusterInfo;
+  rpc_client.ClusterInfo(clusterInfo);
+  std::map<InstanceID, std::vector<ObjectMeta>> instance_to_metas;
+  if (ipc_client.Connected()) {
+    for (auto const& path : origin_paths) {
+      ObjectID file_id;
+      if (ipc_client.GetName(path, file_id, false).ok()) {
+        file_ids.push_back(file_id);
+      } else {
+        break;
+      }
+      ipc_client.GetMetaData(file_ids, file_metas, true);
+    }
+  } else {
+    // RPC
+    for (auto const& path : origin_paths) {
+      ObjectID file_id;
+      if (rpc_client.GetName(path, file_id, false).ok()) {
+        file_ids.push_back(file_id);
+      } else {
+        break;
+      }
+    }
+    rpc_client.GetMetaData(file_ids, file_metas, true);
+  }
+  for (const auto& meta : file_metas) {
+    instance_to_metas[meta.GetInstanceId()].push_back(meta);
+  }
+  std::unordered_map<ObjectID, std::shared_ptr<VineyardFile>> id_to_files;
+  RETURN_ON_ERROR(BatchedGetObjects(ipc_client, rpc_client, instance_to_metas,
+                                    id_to_files));
+  for (auto const& meta : file_metas) {
+    if (id_to_files.find(meta.GetId()) != id_to_files.end()) {
+      files.push_back(id_to_files[meta.GetId()]);
+    } else {
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+Status VineyardFileBuilder::Make(std::shared_ptr<VineyardFileBuilder>& builder,
+                                 RPCClient& rpc_client, Client& ipc_client,
+                                 std::string path, size_t size) {
+  std::string actural_path;
+  std::string origin_path = std::regex_replace(path, std::regex("/+"), "\\/");
+  builder = std::make_shared<VineyardFileBuilder>(origin_path);
+  ObjectID id;
+  if (ipc_client.Connected()) {
+    if (ipc_client.GetName(origin_path, id).ok()) {
+      return Status::Invalid("File already exists");
+    }
+    RETURN_ON_ERROR(ipc_client.CreateBlob(size, builder->writer_));
+  } else {
+    if (rpc_client.GetName(origin_path, id).ok()) {
+      return Status::Invalid("File already exists");
+    }
+    builder->remote_writer_ = std::make_shared<RemoteBlobWriter>(size);
+  }
+  return Status::OK();
+}
+
+std::shared_ptr<Object> VineyardFileBuilder::SealAndPersist(
+    RPCClient& rpc_client, Client& ipc_client) {
+  VINEYARD_CHECK_OK(this->Build(rpc_client, ipc_client));
+
+  std::shared_ptr<VineyardFile> vineyardFile = std::make_shared<VineyardFile>();
+  ObjectMeta blob_meta;
+  if (ipc_client.Connected()) {
+    std::shared_ptr<Object> object;
+    writer_->Seal(ipc_client, object);
+    blob_meta = object->meta();
+    ipc_client.Persist(blob_meta.GetId());
+  } else {
+    rpc_client.CreateRemoteBlob(remote_writer_, blob_meta);
+    rpc_client.Persist(blob_meta.GetId());
+  }
+  vineyardFile->meta_.AddMember("buffer", blob_meta);
+  vineyardFile->meta_.AddKeyValue("path", path_);
+  vineyardFile->meta_.SetTypeName(type_name<VineyardFile>());
+
+  auto access_time = std::chrono::system_clock::now().time_since_epoch();
+  vineyardFile->meta_.AddKeyValue(
+      "access_time",
+      std::chrono::duration_cast<std::chrono::nanoseconds>(access_time)
+          .count());
+  if (ipc_client.Connected()) {
+    VINEYARD_CHECK_OK(
+        ipc_client.CreateMetaData(vineyardFile->meta_, vineyardFile->id_));
+    VINEYARD_CHECK_OK(ipc_client.Persist(vineyardFile->id_));
+    Status status = ipc_client.PutName(vineyardFile->id_, path_);
+  } else {
+    VINEYARD_CHECK_OK(
+        rpc_client.CreateMetaData(vineyardFile->meta_, vineyardFile->id_));
+    rpc_client.Persist(vineyardFile->id_);
+    Status status = rpc_client.PutName(vineyardFile->id_, path_);
+  }
+
+  return vineyardFile;
+}
+
+std::vector<std::shared_ptr<Object>> VineyardFileBuilder::BatchedSealAndPersist(
+    RPCClient& rpc_client, Client& ipc_client,
+    std::vector<std::shared_ptr<VineyardFileBuilder>>& builders) {
+  std::vector<std::shared_ptr<Object>> vineyard_file_objects;
+  std::vector<ObjectMeta> blob_metas;
+  if (ipc_client.Connected()) {
+    for (auto builder : builders) {
+      std::shared_ptr<Object> object;
+      builder->writer_->Seal(ipc_client, object);
+      blob_metas.push_back(object->meta());
+    }
+  } else {
+    std::vector<std::shared_ptr<RemoteBlobWriter>> remote_writers;
+    for (const auto& builder : builders) {
+      VINEYARD_CHECK_OK(builder->Build(rpc_client, ipc_client));
+      remote_writers.push_back(builder->remote_writer_);
+    }
+    rpc_client.CreateRemoteBlobs(remote_writers, blob_metas);
+  }
+
+  for (size_t i = 0; i < blob_metas.size(); i++) {
+    std::shared_ptr<VineyardFile> vineyard_file =
+        std::make_shared<VineyardFile>();
+    if (ipc_client.Connected()) {
+      ipc_client.Persist(blob_metas[i].GetId());
+    } else {
+      rpc_client.Persist(blob_metas[i].GetId());
+    }
+    vineyard_file->meta_.AddMember("buffer", blob_metas[i]);
+    vineyard_file->meta_.AddKeyValue("path", builders[i]->path_);
+    vineyard_file->meta_.SetTypeName(type_name<VineyardFile>());
+
+    auto access_time = std::chrono::system_clock::now().time_since_epoch();
+    vineyard_file->meta_.AddKeyValue(
+        "access_time",
+        std::chrono::duration_cast<std::chrono::nanoseconds>(access_time)
+            .count());
+    if (ipc_client.Connected()) {
+      VINEYARD_CHECK_OK(
+          ipc_client.CreateMetaData(vineyard_file->meta_, vineyard_file->id_));
+      VINEYARD_CHECK_OK(ipc_client.Persist(vineyard_file->id_));
+      Status status =
+          ipc_client.PutName(vineyard_file->id_, builders[i]->path_);
+    } else {
+      VINEYARD_CHECK_OK(
+          rpc_client.CreateMetaData(vineyard_file->meta_, vineyard_file->id_));
+      VINEYARD_CHECK_OK(rpc_client.Persist(vineyard_file->id_));
+      Status status =
+          rpc_client.PutName(vineyard_file->id_, builders[i]->path_);
+    }
+  }
+
+  return vineyard_file_objects;
+}
+
+Status VineyardFileBuilder::Write(const void* buffer, size_t size,
+                                  size_t offset) {
+  if (writer_ == nullptr && remote_writer_ == nullptr) {
+    return Status::Invalid("VineyardFileBuilder has not been initialized");
+  }
+  if (writer_ != nullptr) {
+    if (offset + size > writer_->size()) {
+      return Status::Invalid("Write out of range");
+    }
+    memcpy(writer_->data() + offset, buffer, size);
+  } else {
+    if (offset + size > remote_writer_->size()) {
+      return Status::Invalid("Write out of range");
+    }
+    memcpy(remote_writer_->data() + offset, buffer, size);
+  }
+  return Status::OK();
+}
+
+}  // namespace vineyard
diff --git a/modules/llm-cache/ds/vineyard_file.h b/modules/llm-cache/ds/vineyard_file.h
new file mode 100644
index 0000000000..64d23ce338
--- /dev/null
+++ b/modules/llm-cache/ds/vineyard_file.h
@@ -0,0 +1,109 @@
+/** Copyright 2020-2023 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef MODULES_LLM_CACHE_DS_VINEYARD_FILE_H_
+#define MODULES_LLM_CACHE_DS_VINEYARD_FILE_H_
+
+#include <map>
+#include <memory>
+#include <regex>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "client/client.h"
+#include "client/ds/blob.h"
+#include "client/ds/remote_blob.h"
+#include "client/rpc_client.h"
+
+namespace vineyard {
+
+class VineyardFileBuilder;
+
+class VineyardFile : public vineyard::Registered<VineyardFile> {
+ public:
+  VineyardFile() = default;
+
+  static std::unique_ptr<Object> Create() __attribute__((used)) {
+    return std::unique_ptr<Object>(new VineyardFile());
+  }
+
+  void Construct(const ObjectMeta& meta) override;
+
+  Status Read(void* buffer, size_t size, size_t offset);
+
+  static Status Make(std::shared_ptr<VineyardFile>& file, RPCClient& rpc_client,
+                     Client& ipc_client, std::string path);
+
+  static Status BatchedMake(std::vector<std::shared_ptr<VineyardFile>>& files,
+                            RPCClient& rpc_client, Client& ipc_client,
+                            const std::vector<std::string>& path);
+
+  size_t Size() { return buffer_->size(); }
+
+  uint64_t AccessTime() { return access_time_; }
+
+ private:
+  static Status BatchedGetObjects(
+      Client& client, RPCClient& rpc_client,
+      std::map<InstanceID, std::vector<ObjectMeta>>& instance_to_metas,
+      std::unordered_map<ObjectID, std::shared_ptr<VineyardFile>>& id_to_files);
+
+  std::shared_ptr<Buffer> buffer_;
+  std::string path_;
+  uint64_t access_time_;
+
+  friend class VineyardFileBuilder;
+};
+
+class VineyardFileBuilder {
+ public:
+  static Status Make(std::shared_ptr<VineyardFileBuilder>& builder,
+                     RPCClient& rpc_client, Client& ipc_client,
+                     std::string path, size_t size);
+
+  ~VineyardFileBuilder() {}
+
+  Status Build(RPCClient& rpc_client, Client& ipc_client) {
+    return Status::OK();
+  }
+
+  std::shared_ptr<Object> SealAndPersist(RPCClient& rpc_client,
+                                         Client& ipc_client);
+
+  Status Write(const void* buffer, size_t size, size_t offset);
+
+  explicit VineyardFileBuilder(std::string path) : path_(path) {}
+
+  static std::vector<std::shared_ptr<Object>> BatchedSealAndPersist(
+      RPCClient& rpc_client, Client& ipc_client,
+      std::vector<std::shared_ptr<VineyardFileBuilder>>& builders);
+
+  size_t Size() {
+    if (writer_) {
+      return writer_->size();
+    }
+    return remote_writer_->size();
+  }
+
+ private:
+  std::shared_ptr<RemoteBlobWriter> remote_writer_;
+  std::unique_ptr<BlobWriter> writer_;
+  std::string path_;
+};
+
+}  // namespace vineyard
+
+#endif  // MODULES_LLM_CACHE_DS_VINEYARD_FILE_H_
diff --git a/modules/llm-cache/storage/file_storage.cc b/modules/llm-cache/storage/file_storage.cc
index 5f29c73e26..2ce9b570ad 100644
--- a/modules/llm-cache/storage/file_storage.cc
+++ b/modules/llm-cache/storage/file_storage.cc
@@ -110,9 +110,8 @@ Status FileStorage::Update(
              &createFileSet, &createFileSetMutex](int i) -> Status {
     int tokenLength = (i + 1) * chunkSize;
     std::shared_ptr<FileDescriptor> fd = CreateFileDescriptor();
-    std::string tmpPathStr = GetTmpFileDir() + "-" + std::to_string(i);
+    std::string tmpPathStr = GetTmpFileDir("-" + std::to_string(i));
     tempFilePaths[i] = tmpPathStr;
-    ghc::filesystem::path tmpPath(tmpPathStr);
     std::string pathStr = this->rootPath + pathList[i];
     ghc::filesystem::path path(pathStr);
 
@@ -134,8 +133,10 @@ Status FileStorage::Update(
       return Status::OK();
     }
 
-    RETURN_ON_ERROR(Mkdir(tmpPath.parent_path().string()));
-    auto status = Open(tmpPathStr, fd, FileOperationType::WRITE);
+    RETURN_ON_ERROR(Mkdir(ghc::filesystem::path(tmpPathStr + pathList[i])
+                              .parent_path()
+                              .string()));
+    auto status = Open(tmpPathStr + pathList[i], fd, FileOperationType::WRITE);
     if (!status.ok()) {
       LOG(WARNING) << "Failed to create temporary cache entry: "
                    << status.ToString();
@@ -157,11 +158,11 @@ Status FileStorage::Update(
 
     VINEYARD_DISCARD(Flush(fd));
     VINEYARD_DISCARD(Close(fd));
-    status = MoveFileAtomic(tmpPathStr, pathStr);
+    status = MoveFileAtomic(tmpPathStr + pathList[i], pathStr);
     if (!status.ok()) {
       // Move failed. There exists a file with the same name.
       LOG(WARNING) << "Failed to move cache entry: " << status.ToString();
-      VINEYARD_SUPPRESS(Delete(tmpPathStr));
+      VINEYARD_SUPPRESS(Delete(tmpPathStr + pathList[i]));
       return Status::Wrap(status, "Failed to move cache entry");
     }
     std::lock_guard<std::mutex> lock(createFileSetMutex);
@@ -300,9 +301,8 @@ Status FileStorage::Update(
              &createFileSetMutex](size_t i) -> Status {
     int tokenLength = (i + 1) * chunkSize;
     std::shared_ptr<FileDescriptor> fd = CreateFileDescriptor();
-    std::string tmpPathStr = GetTmpFileDir() + "-" + std::to_string(i);
+    std::string tmpPathStr = GetTmpFileDir("-" + std::to_string(i));
     tempFilePaths[i] = tmpPathStr;
-    ghc::filesystem::path tmpPath(tmpPathStr);
     std::string pathStr = this->rootPath + pathList[i];
     ghc::filesystem::path path(pathStr);
 
@@ -327,8 +327,10 @@ Status FileStorage::Update(
       return Status::ObjectNotExists("The prefix is not in the file cache");
     }
 
-    RETURN_ON_ERROR(Mkdir(tmpPath.parent_path().string()));
-    auto status = Open(tmpPathStr, fd, FileOperationType::WRITE);
+    RETURN_ON_ERROR(Mkdir(ghc::filesystem::path(tmpPathStr + pathList[i])
+                              .parent_path()
+                              .string()));
+    auto status = Open(tmpPathStr + pathList[i], fd, FileOperationType::WRITE);
     if (!status.ok()) {
       return Status::Wrap(status, "Failed to create temporary cache entry");
     }
@@ -352,9 +354,9 @@ Status FileStorage::Update(
 
     VINEYARD_DISCARD(Flush(fd));
     VINEYARD_DISCARD(Close(fd));
-    if (!MoveFileAtomic(tmpPathStr, pathStr).ok()) {
+    if (!MoveFileAtomic(tmpPathStr + pathList[i], pathStr).ok()) {
       // Move failed. There exists a file with the same name.
-      VINEYARD_SUPPRESS(Delete(tmpPathStr));
+      VINEYARD_SUPPRESS(Delete(tmpPathStr + pathList[i]));
       return Status::Wrap(status, "Failed to move cache entry");
     }
     std::lock_guard<std::mutex> lock(createFileSetMutex);
@@ -413,6 +415,133 @@ Status FileStorage::Update(
   return Status::NotImplemented();
 }
 
+Status FileStorage::BatchedUpdate(
+    const std::vector<int>& tokenList,
+    const std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
+    size_t& updated) {
+  if (this->exitFlag) {
+    return Status::Invalid("The file storage has been closed!");
+  }
+  if (tokenList.size() % chunkSize != 0) {
+    return Status::Invalid("Tokens size " + std::to_string(tokenList.size()) +
+                           " should be multiple of batch size " +
+                           std::to_string(chunkSize) + "!");
+  }
+
+  std::vector<std::string> pathList;
+  std::set<std::string> createFileSet;
+  std::mutex createFileSetMutex;
+  RETURN_ON_ERROR(hasher->computePathForTokens(tokenList, chunkSize,
+                                               hashChunkSize, pathList));
+  if (pathList.size() == 0) {
+    return Status::OK();
+  }
+
+  std::vector<std::shared_ptr<FileDescriptor>> read_fd_list;
+  RETURN_ON_ERROR(BatchedOpen(pathList, read_fd_list, FileOperationType::READ));
+
+  auto read_fn = [this, &read_fd_list, &tokenList](int i) -> Status {
+    int tokenLength = (i + 1) * chunkSize;
+    RETURN_ON_ERROR(Read(read_fd_list[i], &tokenLength, sizeof(int)));
+    std::vector<int> tokens;
+    tokens.resize(tokenLength);
+    RETURN_ON_ERROR(
+        Read(read_fd_list[i], tokens.data(), tokenLength * sizeof(int)));
+    if (!CompareTokenList(tokenList, tokens, tokenLength)) {
+      // Token list not match
+      VINEYARD_DISCARD(Close(read_fd_list[i]));
+      return Status::ObjectExists("File exists for another token sequence");
+    }
+    // Skip this kv state
+    VINEYARD_DISCARD(Close(read_fd_list[i]));
+    return Status::OK();
+  };
+
+  int lower_bound = 0;
+  if (read_fd_list.size() > 0) {
+    parallel::ThreadGroup tg(
+        std::min(read_fd_list.size(),
+                 static_cast<size_t>(std::thread::hardware_concurrency())));
+    std::vector<parallel::ThreadGroup::tid_t> tids(read_fd_list.size());
+    for (size_t i = 0; i < read_fd_list.size(); ++i) {
+      tids[i] = tg.AddTask(read_fn, i);
+    }
+    std::vector<Status> taskResults(read_fd_list.size(), Status::OK());
+    for (size_t i = 0; i < read_fd_list.size(); ++i) {
+      taskResults[i] = tg.TaskResult(tids[i]);
+    }
+
+    for (size_t i = 0; i < taskResults.size(); i++) {
+      if (taskResults[i].ok()) {
+        lower_bound += 1;
+      } else {
+        // File exists for another token sequence
+        break;
+      }
+    }
+  }
+
+  BatchedClose(read_fd_list);
+
+  std::vector<std::shared_ptr<FileDescriptor>> write_fd_list;
+  std::vector<std::string> left_path(pathList.begin() + lower_bound,
+                                     pathList.end());
+  RETURN_ON_ERROR(
+      BatchedOpen(left_path, write_fd_list, FileOperationType::WRITE));
+  auto fn = [this, &write_fd_list, &tokenList, &kvCacheList,
+             lower_bound](int i) -> Status {
+    int tokenLength = (i + 1 + lower_bound) * chunkSize;
+
+    RETURN_ON_ERROR(Write(write_fd_list[i], &tokenLength, sizeof(int)));
+    RETURN_ON_ERROR(
+        Write(write_fd_list[i], tokenList.data(), tokenLength * sizeof(int)));
+    for (int currentTokenIndex = (i + lower_bound) * chunkSize;
+         currentTokenIndex < (i + lower_bound + 1) * chunkSize;
+         currentTokenIndex++) {
+      for (int currentLayer = 0; currentLayer < layer; currentLayer++) {
+        const LLMKV& k = kvCacheList[currentTokenIndex][currentLayer].first;
+        const LLMKV& v = kvCacheList[currentTokenIndex][currentLayer].second;
+        RETURN_ON_ERROR(Write(write_fd_list[i], k.data, k.length));
+        RETURN_ON_ERROR(Write(write_fd_list[i], v.data, k.length));
+      }
+    }
+
+    VINEYARD_DISCARD(Flush(write_fd_list[i]));
+    return Status::OK();
+  };
+
+  if (write_fd_list.size() > 0) {
+    parallel::ThreadGroup tg_write(
+        std::min(write_fd_list.size(),
+                 static_cast<size_t>(std::thread::hardware_concurrency())));
+    std::vector<parallel::ThreadGroup::tid_t> tids_write(write_fd_list.size());
+    for (size_t i = 0; i < write_fd_list.size(); ++i) {
+      tids_write[i] = tg_write.AddTask(fn, i);
+    }
+    std::vector<Status> taskResults_write(write_fd_list.size(), Status::OK());
+    for (size_t i = 0; i < write_fd_list.size(); ++i) {
+      taskResults_write[i] = tg_write.TaskResult(tids_write[i]);
+    }
+
+    size_t upper_bound = 0;
+    for (size_t i = 0; i < write_fd_list.size(); i++) {
+      if (taskResults_write[i].ok()) {
+        upper_bound += 1;
+      } else {
+        break;
+      }
+    }
+
+    for (size_t i = upper_bound; i < write_fd_list.size(); i++) {
+      VINEYARD_SUPPRESS(Delete(this->rootPath + pathList[i + lower_bound]));
+    }
+    updated = upper_bound * chunkSize;
+
+    RETURN_ON_ERROR(BatchedClose(write_fd_list));
+  }
+  return Status::OK();
+}
+
 /**
  * @brief Query the kv state with the given token list in the file storage.
  *
@@ -652,6 +781,78 @@ Status FileStorage::Query(
   return Status::OK();
 }
 
+Status FileStorage::BatchedQuery(
+    const std::vector<int>& tokenList,
+    std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
+    size_t& matched) {
+  if (this->exitFlag) {
+    return Status::Invalid("The file storage has been closed!");
+  }
+
+  std::vector<std::string> paths;
+  RETURN_ON_ERROR(
+      hasher->computePathForTokens(tokenList, chunkSize, hashChunkSize, paths));
+
+  std::vector<std::shared_ptr<FileDescriptor>> read_fd_list;
+  RETURN_ON_ERROR(BatchedOpen(paths, read_fd_list, FileOperationType::READ));
+  auto read_fn = [this, &read_fd_list, &tokenList, &kvCacheList](
+                     size_t i, size_t matched_start) -> Status {
+    int tokenLength = 0;
+    RETURN_ON_ERROR(Read(read_fd_list[i], &tokenLength, sizeof(int)));
+    std::vector<int> blockTokenList(tokenLength, 0);
+    RETURN_ON_ERROR(Read(read_fd_list[i], blockTokenList.data(),
+                         tokenLength * sizeof(int)));
+
+    if (!CompareTokenList(tokenList, blockTokenList, tokenLength)) {
+      VINEYARD_DISCARD(Close(read_fd_list[i]));
+      return Status::ObjectNotExists("Token mismatch");
+    }
+
+    for (int j = 0; j < chunkSize; j++) {
+      if (matched_start + j >= tokenList.size() ||
+          matched_start + j >= kvCacheList.size()) {
+        break;
+      }
+      auto& kvState = kvCacheList[matched_start + j];
+      for (int currentLayer = 0; currentLayer < layer; currentLayer++) {
+        RETURN_ON_ASSERT(static_cast<int>(kvState.size()) == layer,
+                         "The size of kvState is not equal to layer");
+        LLMKV& k = kvState[currentLayer].first;
+        LLMKV& v = kvState[currentLayer].second;
+        RETURN_ON_ASSERT(
+            k.length == tensorNBytes && v.length == tensorNBytes,
+            "The size of kv tensor doesn't match with the tensorNBytes");
+        RETURN_ON_ERROR(Read(read_fd_list[i], k.data, k.length));
+        RETURN_ON_ERROR(Read(read_fd_list[i], v.data, v.length));
+      }
+    }
+    VINEYARD_DISCARD(Close(read_fd_list[i]));
+    return Status::OK();
+  };
+
+  parallel::ThreadGroup tg(
+      std::min(read_fd_list.size(),
+               static_cast<size_t>(std::thread::hardware_concurrency())));
+  std::vector<parallel::ThreadGroup::tid_t> tids(read_fd_list.size());
+  for (size_t i = 0; i < read_fd_list.size(); ++i) {
+    tids[i] = tg.AddTask(read_fn, i, i * chunkSize);
+  }
+  std::vector<Status> taskResults(read_fd_list.size(), Status::OK());
+  for (size_t i = 0; i < read_fd_list.size(); ++i) {
+    taskResults[i] = tg.TaskResult(tids[i]);
+  }
+
+  matched = 0;
+  for (size_t i = 0; i < read_fd_list.size(); i++) {
+    if (taskResults[i].ok()) {
+      matched += chunkSize;
+    } else {
+      break;
+    }
+  }
+  return Status::OK();
+}
+
 bool FileStorage::CompareTokenList(const std::vector<int>& tokenList1,
                                    const std::vector<int>& tokenList2,
                                    size_t length) {
@@ -811,7 +1012,7 @@ void FileStorage::GlobalGCThread(std::shared_ptr<FileStorage> fileStorage) {
   }
 }
 
-void FileStorage::CloseCache() {
+void FileStorage::CloseGCThread() {
   std::lock_guard<std::mutex> gcLock(gcMutex);
   if (!exitFlag) {
     exitFlag = true;
@@ -821,4 +1022,6 @@ void FileStorage::CloseCache() {
   }
 }
 
+void FileStorage::CloseCache() { CloseGCThread(); }
+
 }  // namespace vineyard
diff --git a/modules/llm-cache/storage/file_storage.h b/modules/llm-cache/storage/file_storage.h
index a3be5c1f80..dd74f42efd 100644
--- a/modules/llm-cache/storage/file_storage.h
+++ b/modules/llm-cache/storage/file_storage.h
@@ -40,6 +40,7 @@ struct FileDescriptor {};
 
 enum FilesystemType {
   LOCAL,
+  VINEYARD,
 };
 
 enum FileOperationType {
@@ -58,6 +59,14 @@ class FileStorage : public IStorage,
   virtual Status Open(std::string path, std::shared_ptr<FileDescriptor>& fd,
                       FileOperationType fileOperationType) = 0;
 
+  virtual Status BatchedOpen(
+      const std::vector<std::string>& pathList,
+      std::vector<std::shared_ptr<FileDescriptor>>& fdList,
+      FileOperationType fileOperationType) {
+    std::runtime_error("Not implemented");
+    return Status::OK();
+  }
+
   virtual Status Seek(std::shared_ptr<FileDescriptor>& fd, size_t offset) = 0;
 
   virtual Status Read(std::shared_ptr<FileDescriptor>& fd, void* data,
@@ -80,6 +89,12 @@ class FileStorage : public IStorage,
 
   virtual Status Close(std::shared_ptr<FileDescriptor>& fd) = 0;
 
+  virtual Status BatchedClose(
+      std::vector<std::shared_ptr<FileDescriptor>>& fdList) {
+    std::runtime_error("Not implemented");
+    return Status::OK();
+  }
+
   virtual Status Delete(std::string path) = 0;
 
   virtual bool IsFileExist(const std::string& path) = 0;
@@ -90,7 +105,7 @@ class FileStorage : public IStorage,
 
   virtual Status TouchFile(const std::string& path) = 0;
 
-  virtual std::string GetTmpFileDir() = 0;
+  virtual std::string GetTmpFileDir(std::string surfix) = 0;
 
   Status DefaultGCFunc();
 
@@ -104,6 +119,8 @@ class FileStorage : public IStorage,
 
   static void GlobalGCThread(std::shared_ptr<FileStorage> fileStorage);
 
+  virtual void CloseGCThread();
+
   // for test
   void PrintFileAccessTime(std::string path);
 
@@ -128,6 +145,11 @@ class FileStorage : public IStorage,
       const std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
       size_t& updated) override;
 
+  Status BatchedUpdate(
+      const std::vector<int>& tokenList,
+      const std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
+      size_t& updated) override;
+
   Status Query(const std::vector<int>& tokenList,
                std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
                size_t& matched) override;
@@ -139,6 +161,10 @@ class FileStorage : public IStorage,
                const std::vector<int>& tokenList,
                std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
                size_t& matched) override;
+  Status BatchedQuery(
+      const std::vector<int>& tokenList,
+      std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
+      size_t& matched) override;
 
   void CloseCache() override;
 
diff --git a/modules/llm-cache/storage/local_file_storage.cc b/modules/llm-cache/storage/local_file_storage.cc
index 022defec89..349bfb2ea0 100644
--- a/modules/llm-cache/storage/local_file_storage.cc
+++ b/modules/llm-cache/storage/local_file_storage.cc
@@ -160,14 +160,14 @@ Status LocalFileStorage::Delete(std::string path) {
   return Status::OK();
 }
 
-std::string LocalFileStorage::GetTmpFileDir() {
+std::string LocalFileStorage::GetTmpFileDir(std::string surfix) {
   pid_t pid = getpid();
   char* pod_name_str = getenv("POD_NAME");
   if (pod_name_str == nullptr || strlen(pod_name_str) == 0) {
     return this->tempFileDir + std::to_string(pid);
   }
   std::string pod_name = pod_name_str;
-  return this->tempFileDir + pod_name + "/" + std::to_string(pid);
+  return this->tempFileDir + pod_name + "/" + std::to_string(pid) + surfix;
 }
 
 Status LocalFileStorage::MoveFileAtomic(std::string src, std::string dst) {
diff --git a/modules/llm-cache/storage/local_file_storage.h b/modules/llm-cache/storage/local_file_storage.h
index 1396373a9d..2093b9cdf1 100644
--- a/modules/llm-cache/storage/local_file_storage.h
+++ b/modules/llm-cache/storage/local_file_storage.h
@@ -106,7 +106,7 @@ class LocalFileStorage : public FileStorage {
 
   Status TouchFile(const std::string& path) override;
 
-  std::string GetTmpFileDir() override;
+  std::string GetTmpFileDir(std::string surfix) override;
 
   std::list<std::string>& GetGCList() { return this->gcList; }
 };
diff --git a/modules/llm-cache/storage/storage.h b/modules/llm-cache/storage/storage.h
index ce4344edfa..776452f2ec 100644
--- a/modules/llm-cache/storage/storage.h
+++ b/modules/llm-cache/storage/storage.h
@@ -43,6 +43,18 @@ class IStorage {
       const std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
       size_t& updated) = 0;
 
+  /*
+   * BatchedUpdate is used to update multiple kvCacheList in one batch. It will
+   * batch open all files or batch close all files to reduce the overhead of
+   * network IO.
+   */
+  virtual Status BatchedUpdate(
+      const std::vector<int>& tokenList,
+      const std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
+      size_t& updated) {
+    return Status::NotImplemented();
+  }
+
   virtual Status Query(
       const std::vector<int>& tokenList,
       std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
@@ -56,6 +68,13 @@ class IStorage {
       std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
       size_t& matched) = 0;
 
+  virtual Status BatchedQuery(
+      const std::vector<int>& tokenList,
+      std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kvCacheList,
+      size_t& matched) {
+    return Status::NotImplemented();
+  }
+
   virtual void CloseCache() = 0;
 
   virtual void StartGlobalGCThread() {}
diff --git a/modules/llm-cache/storage/vineyard_file_storage.cc b/modules/llm-cache/storage/vineyard_file_storage.cc
new file mode 100644
index 0000000000..740cdc2a68
--- /dev/null
+++ b/modules/llm-cache/storage/vineyard_file_storage.cc
@@ -0,0 +1,262 @@
+/** Copyright 2020-2023 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <map>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <vector>
+
+#include "common/util/logging.h"
+#include "gulrak/filesystem.hpp"
+#include "llm-cache/ds/vineyard_file.h"
+#include "llm-cache/storage/vineyard_file_storage.h"
+#include "llm-cache/thread_group.h"
+
+namespace vineyard {
+std::shared_ptr<FileDescriptor> VineyardFileStorage::CreateFileDescriptor() {
+  return std::make_shared<VineyardFileDescriptor>();
+}
+
+Status VineyardFileStorage::Open(std::string path,
+                                 std::shared_ptr<FileDescriptor>& fd,
+                                 FileOperationType fileOperationType) {
+  std::shared_ptr<VineyardFileDescriptor> lfd =
+      std::static_pointer_cast<VineyardFileDescriptor>(fd);
+  lfd->path = path;
+  lfd->cur_pos = 0;
+
+  if (fileOperationType & FileOperationType::READ) {
+    RETURN_ON_ERROR(
+        VineyardFile::Make(lfd->vineyard_file, rpc_client_, ipc_client_, path));
+  } else {
+    RETURN_ON_ERROR(VineyardFileBuilder::Make(
+        lfd->builder, rpc_client_, ipc_client_, path, max_file_size_));
+  }
+  lfd->opt_type = fileOperationType;
+  return Status::OK();
+}
+
+Status VineyardFileStorage::BatchedOpen(
+    const std::vector<std::string>& pathList,
+    std::vector<std::shared_ptr<FileDescriptor>>& fdList,
+    FileOperationType fileOperationType) {
+  if (fileOperationType & FileOperationType::READ) {
+    std::vector<std::shared_ptr<VineyardFile>> vineyardFileList;
+    RETURN_ON_ERROR(VineyardFile::BatchedMake(vineyardFileList, rpc_client_,
+                                              ipc_client_, pathList));
+    for (size_t i = 0; i < vineyardFileList.size(); i++) {
+      std::shared_ptr<VineyardFileDescriptor> lfd =
+          std::make_shared<VineyardFileDescriptor>();
+      lfd->path = pathList[i];
+      lfd->cur_pos = 0;
+      lfd->vineyard_file = vineyardFileList[i];
+      lfd->opt_type = fileOperationType;
+      fdList.push_back(lfd);
+    }
+  } else {
+    for (size_t i = 0; i < pathList.size(); i++) {
+      std::shared_ptr<VineyardFileBuilder> builder;
+      RETURN_ON_ERROR(VineyardFileBuilder::Make(
+          builder, rpc_client_, ipc_client_, pathList[i], max_file_size_));
+      std::shared_ptr<VineyardFileDescriptor> lfd =
+          std::make_shared<VineyardFileDescriptor>();
+      lfd->path = pathList[i];
+      lfd->cur_pos = 0;
+      lfd->builder = builder;
+      lfd->opt_type = fileOperationType;
+      fdList.push_back(lfd);
+    }
+  }
+  return Status::OK();
+}
+
+Status VineyardFileStorage::Seek(std::shared_ptr<FileDescriptor>& fd,
+                                 size_t offset) {
+  std::shared_ptr<VineyardFileDescriptor> lfd =
+      std::static_pointer_cast<VineyardFileDescriptor>(fd);
+  size_t size;
+  RETURN_ON_ERROR(GetFileSize(fd, size));
+  if (offset > size) {
+    return Status::Invalid("Seek out of range");
+  }
+  lfd->cur_pos = offset;
+  return Status::OK();
+}
+
+Status VineyardFileStorage::Read(std::shared_ptr<FileDescriptor>& fd,
+                                 void* data, size_t size) {
+  std::shared_ptr<VineyardFileDescriptor> lfd =
+      std::static_pointer_cast<VineyardFileDescriptor>(fd);
+  if (lfd->opt_type == FileOperationType::READ) {
+    RETURN_ON_ERROR(lfd->vineyard_file->Read(data, size, lfd->cur_pos));
+    lfd->cur_pos += size;
+  } else {
+    return Status::Invalid("File is not opened for read");
+  }
+
+  return Status::OK();
+}
+
+Status VineyardFileStorage::Write(std::shared_ptr<FileDescriptor>& fd,
+                                  const void* data, size_t size) {
+  std::shared_ptr<VineyardFileDescriptor> lfd =
+      std::static_pointer_cast<VineyardFileDescriptor>(fd);
+  if (lfd->opt_type == FileOperationType::WRITE) {
+    RETURN_ON_ERROR(lfd->builder->Write(data, size, lfd->cur_pos));
+    lfd->cur_pos += size;
+  } else {
+    return Status::Invalid("File is not opened for write");
+  }
+  return Status::OK();
+}
+
+Status VineyardFileStorage::Mkdir(std::string path) { return Status::OK(); }
+
+Status VineyardFileStorage::Flush(std::shared_ptr<FileDescriptor>& fd) {
+  return Status::OK();
+}
+
+Status VineyardFileStorage::GetCurrentPos(std::shared_ptr<FileDescriptor>& fd,
+                                          size_t& pos) {
+  std::shared_ptr<VineyardFileDescriptor> lfd =
+      std::static_pointer_cast<VineyardFileDescriptor>(fd);
+  pos = lfd->cur_pos;
+  return Status::OK();
+}
+
+Status VineyardFileStorage::Close(std::shared_ptr<FileDescriptor>& fd) {
+  std::shared_ptr<VineyardFileDescriptor> lfd =
+      std::static_pointer_cast<VineyardFileDescriptor>(fd);
+  if (lfd->opt_type == FileOperationType::WRITE) {
+    lfd->builder->SealAndPersist(rpc_client_, ipc_client_);
+  }
+  return Status::OK();
+}
+
+Status VineyardFileStorage::BatchedClose(
+    std::vector<std::shared_ptr<FileDescriptor>>& fdList) {
+  if (fdList.empty()) {
+    return Status::OK();
+  }
+  if (std::static_pointer_cast<VineyardFileDescriptor>(fdList[0])->opt_type ==
+      FileOperationType::WRITE) {
+    std::vector<std::shared_ptr<VineyardFileBuilder>> builderList;
+    for (auto& fd : fdList) {
+      std::shared_ptr<VineyardFileDescriptor> lfd =
+          std::static_pointer_cast<VineyardFileDescriptor>(fd);
+      builderList.push_back(lfd->builder);
+    }
+    VineyardFileBuilder::BatchedSealAndPersist(rpc_client_, ipc_client_,
+                                               builderList);
+  }
+  return Status::OK();
+}
+
+Status VineyardFileStorage::GetFileSize(std::shared_ptr<FileDescriptor>& fd,
+                                        size_t& size) {
+  std::shared_ptr<VineyardFileDescriptor> lfd =
+      std::static_pointer_cast<VineyardFileDescriptor>(fd);
+  if (lfd->opt_type == FileOperationType::READ) {
+    size = lfd->vineyard_file->Size();
+  } else {
+    size = lfd->builder->Size();
+  }
+  return Status::OK();
+}
+
+bool VineyardFileStorage::IsFileExist(const std::string& path) {
+  ObjectID file_id;
+  std::string origin_path = std::regex_replace(path, std::regex("/+"), "\\/");
+  if (rpc_client_.GetName(origin_path, file_id, false).ok()) {
+    return true;
+  }
+  return false;
+}
+
+Status VineyardFileStorage::Delete(std::string path) {
+  std::string origin_path = std::regex_replace(path, std::regex("/+"), "\\/");
+  std::string lock_path;
+  ObjectID file_id;
+  Status status = Status::OK();
+  if (rpc_client_.GetName(origin_path, file_id, false).ok()) {
+    status = rpc_client_.DelData(std::vector<ObjectID>{file_id}, true, true);
+    status = rpc_client_.DropName(origin_path);
+  }
+  return status;
+}
+
+std::string VineyardFileStorage::GetTmpFileDir(std::string suffix) {
+  return this->rootPath;
+}
+
+Status VineyardFileStorage::MoveFileAtomic(std::string src, std::string dst) {
+  if (src == dst) {
+    return Status::OK();
+  }
+  return Status::Invalid("Vineyard file storage does not support atomic move");
+}
+
+Status VineyardFileStorage::GetFileAccessTime(
+    const std::string& path,
+    std::chrono::duration<int64_t, std::nano>& accessTime) {
+  ObjectID file_id;
+  ObjectMeta meta;
+  std::string origin_path = std::regex_replace(path, std::regex("/+"), "\\/");
+  RETURN_ON_ERROR(rpc_client_.GetName(origin_path, file_id, false));
+  RETURN_ON_ERROR(rpc_client_.GetMetaData(file_id, meta, false));
+  uint64_t time = meta.GetKeyValue<uint64_t>("access_time");
+  accessTime = std::chrono::nanoseconds(time);
+  return Status::OK();
+}
+
+Status VineyardFileStorage::TouchFile(const std::string& path) {
+  ObjectID file_id;
+  ObjectMeta meta;
+  std::string lock_path;
+  std::string origin_path = std::regex_replace(path, std::regex("/+"), "\\/");
+  RETURN_ON_ERROR(rpc_client_.GetName(origin_path, file_id, false));
+  RETURN_ON_ERROR(rpc_client_.GetMetaData(file_id, meta, false));
+  meta.AddKeyValue(
+      "access_time",
+      std::chrono::duration_cast<std::chrono::nanoseconds>(
+          std::chrono::high_resolution_clock::now().time_since_epoch())
+          .count());
+  ObjectID new_object_id;
+  RETURN_ON_ERROR(rpc_client_.CreateMetaData(meta, new_object_id));
+  RETURN_ON_ERROR(rpc_client_.Persist(new_object_id));
+  RETURN_ON_ERROR(
+      rpc_client_.DelData(std::vector<ObjectID>{file_id}, false, false));
+  RETURN_ON_ERROR(rpc_client_.DropName(origin_path));
+  RETURN_ON_ERROR(rpc_client_.PutName(new_object_id, origin_path));
+  return Status::OK();
+}
+
+Status VineyardFileStorage::GetFileList(std::string dirPath,
+                                        std::vector<std::string>& fileList) {
+  std::string origin_path =
+      std::regex_replace(dirPath, std::regex("/+"), "\\/");
+  std::map<std::string, ObjectID> file_name_to_ids;
+  RETURN_ON_ERROR(
+      rpc_client_.ListNames(origin_path, false, UINT64_MAX, file_name_to_ids));
+  fileList.resize(file_name_to_ids.size());
+  size_t i = 0;
+  for (auto& kv : file_name_to_ids) {
+    fileList[i++] = kv.first;
+  }
+  return Status::OK();
+}
+
+}  // namespace vineyard
diff --git a/modules/llm-cache/storage/vineyard_file_storage.h b/modules/llm-cache/storage/vineyard_file_storage.h
new file mode 100644
index 0000000000..5911c5c6dd
--- /dev/null
+++ b/modules/llm-cache/storage/vineyard_file_storage.h
@@ -0,0 +1,143 @@
+/** Copyright 2020-2023 Alibaba Group Holding Limited.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef MODULES_LLM_CACHE_STORAGE_VINEYARD_FILE_STORAGE_H_
+#define MODULES_LLM_CACHE_STORAGE_VINEYARD_FILE_STORAGE_H_
+
+#include <list>
+#include <memory>
+#include <regex>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "client/client.h"
+#include "client/rpc_client.h"
+#include "common/util/logging.h"
+#include "llm-cache/ds/vineyard_file.h"
+#include "llm-cache/storage/file_storage.h"
+
+namespace vineyard {
+
+struct VineyardFileDescriptor : public FileDescriptor {
+  std::string path;
+  std::string lock_path;
+  uint64_t cur_pos;
+  FileOperationType opt_type;
+  std::shared_ptr<VineyardFileBuilder> builder;
+  std::shared_ptr<VineyardFile> vineyard_file;
+};
+
+class VineyardFileStorage : public FileStorage {
+ public:
+  VineyardFileStorage(RPCClient& rpc_client, Client& ipc_client,
+                      int tensorNBytes, int cacheCapacity, int layer,
+                      int chunkSize, int hashChunkSize, std::string rootPath,
+                      int64_t gcInterval, int64_t ttl, bool enableGlobalGC,
+                      int64_t globalGCInterval, int64_t globalTTL)
+      : rpc_client_(rpc_client), ipc_client_(ipc_client) {
+    this->hashAlgorithm = std::make_shared<MurmurHash3Algorithm>();
+    this->hasher = std::make_shared<Hasher>(hashAlgorithm.get());
+    this->tensorNBytes = tensorNBytes;
+    this->cacheCapacity = cacheCapacity;
+    this->layer = layer;
+    this->chunkSize = chunkSize;
+    this->hashChunkSize = hashChunkSize;
+    this->rootPath = std::regex_replace(rootPath + "/", std::regex("/+"), "/");
+    this->tempFileDir = this->rootPath;
+    this->gcInterval = std::chrono::seconds(gcInterval);
+    this->fileTTL = std::chrono::seconds(ttl);
+    this->globalGCInterval = std::chrono::seconds(globalGCInterval);
+    this->globalFileTTL = std::chrono::seconds(globalTTL);
+    this->enableGlobalGC = enableGlobalGC;
+    this->max_file_size_ =
+        tensorNBytes * 2 * layer * chunkSize + 65536 * sizeof(int);
+  }
+
+  ~VineyardFileStorage() = default;
+
+  Status Init() override {
+    this->gcThread =
+        std::thread(FileStorage::DefaultGCThread, shared_from_this());
+    this->globalGCThread =
+        std::thread(FileStorage::GlobalGCThread, shared_from_this());
+    return Status::OK();
+  }
+
+  void CloseGCThread() override {
+    LOG(INFO) << "Call CloseGCThread";
+    FileStorage::CloseGCThread();
+  }
+
+  std::shared_ptr<FileDescriptor> CreateFileDescriptor() override;
+
+  Status Open(std::string path, std::shared_ptr<FileDescriptor>& fd,
+              FileOperationType fileOperationType) override;
+
+  Status BatchedOpen(const std::vector<std::string>& pathList,
+                     std::vector<std::shared_ptr<FileDescriptor>>& fdList,
+                     FileOperationType fileOperationType) override;
+
+  Status Seek(std::shared_ptr<FileDescriptor>& fd, size_t offset) override;
+
+  Status Read(std::shared_ptr<FileDescriptor>& fd, void* data,
+              size_t size) override;
+
+  Status Write(std::shared_ptr<FileDescriptor>& fd, const void* data,
+               size_t size) override;
+
+  Status Mkdir(std::string path) override;
+
+  Status GetFileSize(std::shared_ptr<FileDescriptor>& fd,
+                     size_t& size) override;
+
+  Status GetCurrentPos(std::shared_ptr<FileDescriptor>& fd,
+                       size_t& pos) override;
+
+  Status MoveFileAtomic(std::string src, std::string dst) override;
+
+  bool IsFileExist(const std::string& path) override;
+
+  Status Flush(std::shared_ptr<FileDescriptor>& fd) override;
+
+  Status Close(std::shared_ptr<FileDescriptor>& fd) override;
+
+  Status BatchedClose(
+      std::vector<std::shared_ptr<FileDescriptor>>& fdList) override;
+
+  Status Delete(std::string path) override;
+
+  Status GetFileList(std::string dirPath,
+                     std::vector<std::string>& fileList) override;
+
+  Status GetFileAccessTime(
+      const std::string& path,
+      std::chrono::duration<int64_t, std::nano>& accessTime) override;
+
+  Status TouchFile(const std::string& path) override;
+
+  std::string GetTmpFileDir(std::string surfix) override;
+
+  std::list<std::string>& GetGCList() { return this->gcList; }
+
+ private:
+  RPCClient& rpc_client_;
+  Client& ipc_client_;
+  size_t max_file_size_;
+};
+
+}  // namespace vineyard
+
+#endif  // MODULES_LLM_CACHE_STORAGE_VINEYARD_FILE_STORAGE_H_
diff --git a/python/vineyard/llm/cache.cc b/python/vineyard/llm/cache.cc
index b3de645bbf..9a0e59ee38 100644
--- a/python/vineyard/llm/cache.cc
+++ b/python/vineyard/llm/cache.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "pybind11/stl.h"
 
 #include "client/client.h"
+#include "client/rpc_client.h"
 
 #include "llm-cache/ds/config.h"
 #include "llm-cache/ds/kv_cache_block.h"
@@ -32,6 +33,7 @@ PYBIND11_MODULE(_llm_C, m) {
 
   pybind11::enum_<FilesystemType>(m, "FilesystemType")
       .value("LOCAL", FilesystemType::LOCAL)
+      .value("VINEYARD", FilesystemType::VINEYARD)
       .export_values();
 
   py::class_<LLMKV, std::shared_ptr<LLMKV>>(m, "KVTensor",
@@ -107,6 +109,32 @@ PYBIND11_MODULE(_llm_C, m) {
            py::arg("enable_global_gc") = false,
            py::arg("global_gc_interval") = 30 * 60,
            py::arg("global_ttl") = 30 * 60)
+      .def(py::init([](py::object rpc_client, py::object ipc_client,
+                       int tensor_nbytes, int cache_capacity, int layer,
+                       int chunk_size, int hash_chunk_size, std::string root,
+                       FilesystemType filesystemType, int gc_interval, int ttl,
+                       bool enable_global_gc, int global_gc_interval,
+                       int global_ttl) -> std::shared_ptr<KVCacheManager> {
+             FileCacheConfig config(
+                 tensor_nbytes, cache_capacity, layer, chunk_size,
+                 hash_chunk_size, root, filesystemType, gc_interval, ttl,
+                 enable_global_gc, global_gc_interval, global_ttl);
+             Client& ipc_client_ = ipc_client.cast<Client&>();
+             RPCClient& rpc_client_ = rpc_client.cast<RPCClient&>();
+             std::shared_ptr<KVCacheManager> manager;
+             VINEYARD_CHECK_OK(vineyard::KVCacheManager::Make(
+                 rpc_client_, ipc_client_, manager, config));
+             return manager;
+           }),
+           py::arg("rpc_client"), py::arg("ipc_client"),
+           py::arg("tensor_nbytes") = 1024, py::arg("cache_capacity") = 1024,
+           py::arg("layer") = 1, py::arg("chunk_size") = 16,
+           py::arg("hash_chunk_size") = 4, py::arg("root") = "root",
+           py::arg("filesystem_type") = FilesystemType::VINEYARD,
+           py::arg("gc_interval") = 30 * 60, py::arg("ttl") = 30 * 60,
+           py::arg("enable_global_gc") = false,
+           py::arg("global_gc_interval") = 30 * 60,
+           py::arg("global_ttl") = 30 * 60)
       .def(
           "update",
           [](KVCacheManager* self, const std::vector<int>& tokenList,
@@ -137,6 +165,16 @@ PYBIND11_MODULE(_llm_C, m) {
             return updated;
           },
           py::arg("prefix"), py::arg("tokens"), py::arg("kv_states"))
+      .def(
+          "batched_update",
+          [](KVCacheManager* self, const std::vector<int>& tokens,
+             const std::vector<std::vector<std::pair<LLMKV, LLMKV>>>& kv_states)
+              -> size_t {
+            size_t updated = 0;
+            VINEYARD_CHECK_OK(self->BatchedUpdate(tokens, kv_states, updated));
+            return updated;
+          },
+          py::arg("tokens"), py::arg("kv_states"))
       .def(
           "query",
           [](KVCacheManager* self, const std::vector<int>& tokens,
@@ -187,6 +225,25 @@ PYBIND11_MODULE(_llm_C, m) {
             return matched;
           },
           py::arg("prefix"), py::arg("tokens"), py::arg("kv_states"))
+      .def(
+          "batched_query",
+          [](KVCacheManager* self, const std::vector<int>& tokens,
+             py::list& kv_cache_list) -> size_t {
+            std::vector<std::vector<std::pair<LLMKV, LLMKV>>> kv_state_vec =
+                kv_cache_list
+                    .cast<std::vector<std::vector<std::pair<LLMKV, LLMKV>>>>();
+            size_t matched = 0;
+            VINEYARD_CHECK_OK(
+                self->BatchedQuery(tokens, kv_state_vec, matched));
+            for (size_t i = 0; i < kv_state_vec.size() && i < matched; ++i) {
+              for (size_t j = 0; j < kv_state_vec[i].size(); ++j) {
+                kv_cache_list[i].cast<py::list>()[j] =
+                    py::cast(kv_state_vec[i][j]);
+              }
+            }
+            return matched;
+          },
+          py::arg("tokens"), py::arg("kv_states"))
       .def("close", [](KVCacheManager* self) { self->Close(); });
 }
 
diff --git a/python/vineyard/llm/cache.py b/python/vineyard/llm/cache.py
index 925a04ea4a..0820ee05d2 100644
--- a/python/vineyard/llm/cache.py
+++ b/python/vineyard/llm/cache.py
@@ -113,6 +113,9 @@ def __init__(
         enable_global_gc: bool = False,
         global_gc_interval: int = 3 * 60 * 60,
         global_ttl: int = 3 * 60 * 60,
+        socket: str = "",
+        rpc_endpoint: str = "",
+        rdma_endpoint: str = "",
     ):
         """Create a file cache config.
 
@@ -153,6 +156,16 @@ def __init__(
         self.global_gc_interval = global_gc_interval
         self.global_ttl = global_ttl
 
+        import vineyard
+
+        if filesystem_type == FilesystemType.VINEYARD:
+            self.ipc_client = vineyard.connect(socket).ipc_client
+            rpc_host = rpc_endpoint.split(":")[0]
+            rpc_port = rpc_endpoint.split(":")[1]
+            self.rpc_client = vineyard.connect(
+                host=rpc_host, port=rpc_port, rdma_endpoint=rdma_endpoint
+            ).rpc_client
+
     def __repr__(self):
         return (
             f'FileCacheConfig('
@@ -164,7 +177,7 @@ def __repr__(self):
             f'ttl={self.ttl}, '
             f'enable_global_gc={self.enable_global_gc}, '
             f'global_gc_interval={self.global_gc_interval}, '
-            f'global_ttl={self.global_ttl})'
+            f'global_ttl={self.global_ttl}), '
         )
 
 
@@ -311,6 +324,13 @@ def update(
         else:
             return self.kv_cache_manager.update(tokens, kv_cache_list)
 
+    def batched_update(
+        self,
+        tokens: List[int],
+        kv_cache_list: List[List[Tuple[KVTensor, KVTensor]]],
+    ) -> int:
+        return self.kv_cache_manager.batched_update(tokens, kv_cache_list)
+
     def query(
         self,
         prefix: List[int],
@@ -371,6 +391,13 @@ def query(
         else:
             return self.kv_cache_manager.query(tokens, kv_cache_list)
 
+    def batched_query(
+        self,
+        tokens: List[int],
+        kv_cache_list: List[List[Tuple[KVTensor, KVTensor]]],
+    ) -> int:
+        return self.kv_cache_manager.batched_query(tokens, kv_cache_list)
+
     def __del__(self):
         if self.kv_cache_manager:
             with contextlib.suppress(Exception):
diff --git a/python/vineyard/llm/tests/test_llm.py b/python/vineyard/llm/tests/test_llm.py
index 8568856364..4ec8676d50 100644
--- a/python/vineyard/llm/tests/test_llm.py
+++ b/python/vineyard/llm/tests/test_llm.py
@@ -21,6 +21,7 @@
 from vineyard.llm import KVCache
 from vineyard.llm import KVTensor
 from vineyard.llm.cache import FileCacheConfig
+from vineyard.llm.cache import FilesystemType
 from vineyard.llm.cache import VineyardCacheConfig
 
 
@@ -162,3 +163,74 @@ def test_kv_cache_update_and_query_on_fs():
         ):
             np.array_equal(k_tensor, queried_k_tensor)
             np.array_equal(v_tensor, queried_v_tensor)
+
+
+def test_kv_cache_update_and_query_on_vineyard_fs(
+    vineyard_ipc_sockets, vineyard_endpoints
+):
+    print(vineyard_endpoints)
+    file_cache_config = FileCacheConfig(
+        chunk_size=2,
+        hash_chunk_size=2,
+        root="/tmp/vineyard/llm_cache",
+        filesystem_type=FilesystemType.VINEYARD,
+        socket=vineyard_ipc_sockets[0],
+        rpc_endpoint=vineyard_endpoints[0],
+        rdma_endpoint='',
+    )
+    cache = KVCache(
+        cache_config=file_cache_config,
+        tensor_nbytes=16,  # should be the same as the nbytes of the tensor
+        cache_capacity=1024,
+        layer=2,
+    )
+
+    tokens = [1, 2, 3, 4]
+    original_kv_tensors = []
+    kv_tensors_to_update = []
+    for _ in range(0, len(tokens), file_cache_config.chunk_size):
+        k_tensor = np.random.rand(2, 2).astype(np.float32)
+        v_tensor = np.random.rand(2, 2).astype(np.float32)
+        for _ in range(file_cache_config.chunk_size):
+            original_kv_tensors.append(
+                [(k_tensor, v_tensor) for _ in range(cache.layer)]
+            )
+            kv_tensors_to_update.append(
+                [
+                    (
+                        KVTensor(k_tensor.ctypes.data, k_tensor.nbytes),
+                        KVTensor(v_tensor.ctypes.data, v_tensor.nbytes),
+                    )
+                    for _ in range(cache.layer)
+                ]
+            )
+
+    updated = cache.batched_update(tokens, kv_tensors_to_update)
+    assert updated == len(tokens)
+
+    kv_tensors_from_cache = []
+    kv_tensors = []
+    for _ in range(len(tokens)):
+        k_tensor = np.empty((2, 2), dtype=np.float32)
+        v_tensor = np.empty((2, 2), dtype=np.float32)
+        kv_tensors_from_cache.append([(k_tensor, v_tensor) for _ in range(cache.layer)])
+        kv_tensors.append(
+            [
+                (
+                    KVTensor(k_tensor.ctypes.data, k_tensor.nbytes),
+                    KVTensor(v_tensor.ctypes.data, v_tensor.nbytes),
+                )
+                for _ in range(cache.layer)
+            ]
+        )
+    matched = cache.batched_query(tokens, kv_tensors)
+    assert matched == len(tokens)
+
+    assert len(kv_tensors) == len(kv_tensors_from_cache)
+    for kv, kv_from_cache in zip(original_kv_tensors, kv_tensors_from_cache):
+        assert len(kv) == len(kv_from_cache)
+        for (k_tensor, v_tensor), (queried_k_tensor, queried_v_tensor) in zip(
+            kv, kv_from_cache
+        ):
+            np.array_equal(k_tensor, queried_k_tensor)
+            np.array_equal(v_tensor, queried_v_tensor)
diff --git a/src/client/client.cc b/src/client/client.cc
index c7f3c6e48d..4ea6d0709a 100644
--- a/src/client/client.cc
+++ b/src/client/client.cc
@@ -440,6 +440,12 @@ std::vector<std::shared_ptr<Object>> Client::GetObjects(
     }
     return objects;
   }
+  return GetObjects(metas);
+}
+
+std::vector<std::shared_ptr<Object>> Client::GetObjects(
+    const std::vector<ObjectMeta>& metas) {
+  std::vector<std::shared_ptr<Object>> objects(metas.size());
   for (size_t index = 0; index < metas.size(); ++index) {
     if (metas[index].MetaData().empty()) {
       objects[index] = nullptr;
diff --git a/src/client/client.h b/src/client/client.h
index 0ed4dc581d..c96cb27d78 100644
--- a/src/client/client.h
+++ b/src/client/client.h
@@ -658,6 +658,9 @@ class Client final : public BasicIPCClient,
   std::vector<std::shared_ptr<Object>> GetObjects(
       const std::vector<ObjectID>& ids, const bool sync_remote = true);
 
+  std::vector<std::shared_ptr<Object>> GetObjects(
+      const std::vector<ObjectMeta>& metas);
+
   /**
    * @brief List object metadatas in vineyard, using the given typename
    * patterns.
diff --git a/src/client/ds/object_meta.cc b/src/client/ds/object_meta.cc
index eb54418c9c..e09cd6b205 100644
--- a/src/client/ds/object_meta.cc
+++ b/src/client/ds/object_meta.cc
@@ -158,6 +158,12 @@ void ObjectMeta::AddRemoteBlob(const RemoteBlob& blob) {
   VINEYARD_CHECK_OK(buffer_set_->EmplaceBuffer(blob.id(), blob.Buffer()));
 }
 
+void ObjectMeta::AddRemoteBlob(ObjectID id,
+                               const std::shared_ptr<Buffer>& buffer) {
+  VINEYARD_CHECK_OK(buffer_set_->EmplaceBuffer(id));
+  VINEYARD_CHECK_OK(buffer_set_->EmplaceBuffer(id, buffer));
+}
+
 void ObjectMeta::AddMember(const std::string& name, const ObjectMeta& member) {
   VINEYARD_ASSERT(!meta_.contains(name));
   meta_[name] = member.meta_;
diff --git a/src/client/ds/object_meta.h b/src/client/ds/object_meta.h
index 62da14546d..3928cc3995 100644
--- a/src/client/ds/object_meta.h
+++ b/src/client/ds/object_meta.h
@@ -598,6 +598,14 @@ class ObjectMeta {
    */
   void AddRemoteBlob(const RemoteBlob& blob);
 
+  /**
+   * @brief Add remote blob's buffer to ObjectMeta.
+   *
+   * @param id The object ID of remote blob.
+   * @param buffer The buffer of remote blob.
+   */
+  void AddRemoteBlob(ObjectID id, const std::shared_ptr<Buffer>& buffer);
+
   /**
    * @brief Add member to ObjectMeta.
    *
diff --git a/src/client/rpc_client.cc b/src/client/rpc_client.cc
index 38578a6c22..c574a3addf 100644
--- a/src/client/rpc_client.cc
+++ b/src/client/rpc_client.cc
@@ -360,6 +360,34 @@ Status RPCClient::GetObject(const ObjectID id, std::shared_ptr<Object>& object,
   return Status::OK();
 }
 
+Status RPCClient::BatchedGetObjects(
+    const std::vector<ObjectMeta> metas,
+    std::vector<std::shared_ptr<Object>>& objects) {
+  std::vector<std::shared_ptr<RemoteBlob>> remote_blobs;
+  std::vector<ObjectID> batchedObjectIDVec;
+  for (auto const& meta : metas) {
+    batchedObjectIDVec.insert(batchedObjectIDVec.end(),
+                              meta.buffer_set_->AllBufferIds().begin(),
+                              meta.buffer_set_->AllBufferIds().end());
+  }
+  RETURN_ON_ERROR(GetRemoteBlobs(batchedObjectIDVec, false, remote_blobs));
+  for (auto const& meta : metas) {
+    for (size_t i = 0; i < meta.buffer_set_->AllBufferIds().size(); i++) {
+      RETURN_ON_ERROR(meta.buffer_set_->EmplaceBuffer(
+          remote_blobs[i]->id(), remote_blobs[i]->Buffer()));
+    }
+    meta.ForceLocal();
+
+    std::shared_ptr<Object> object = ObjectFactory::Create(meta.GetTypeName());
+    if (object == nullptr) {
+      object = std::unique_ptr<Object>(new Object());
+    }
+    object->Construct(meta);
+    objects.push_back(object);
+  }
+  return Status::OK();
+}
+
 std::vector<std::shared_ptr<Object>> RPCClient::GetObjects(
     const std::vector<ObjectID>& ids, const bool sync_remote) {
   std::vector<std::shared_ptr<Object>> objects(ids.size());
@@ -757,16 +785,16 @@ Status RPCClient::GetRemoteBlob(const ObjectID& id, const bool unsafe,
                                  false, message_out);
   }
   RETURN_ON_ERROR(doWrite(message_out));
+  json message_in;
+  RETURN_ON_ERROR(doRead(message_in));
+  RETURN_ON_ERROR(ReadGetBuffersReply(message_in, payloads, fd_sent));
+  RETURN_ON_ASSERT(payloads.size() == 1, "Expects only one payload");
   if (rdma_connected_) {
     std::unordered_set<ObjectID> ids{payloads[0].object_id};
     std::function<void(std::unordered_set<ObjectID>)> func = std::bind(
         &RPCClient::doReleaseBlobsWithRDMARequest, this, std::placeholders::_1);
     rdmaBlobScopeGuard.set(func, ids);
   }
-  json message_in;
-  RETURN_ON_ERROR(doRead(message_in));
-  RETURN_ON_ERROR(ReadGetBuffersReply(message_in, payloads, fd_sent));
-  RETURN_ON_ASSERT(payloads.size() == 1, "Expects only one payload");
 
   buffer = std::shared_ptr<RemoteBlob>(new RemoteBlob(
       payloads[0].object_id, remote_instance_id_, payloads[0].data_size));
@@ -874,11 +902,6 @@ Status RPCClient::GetRemoteBlobs(
                                  message_out);
   }
   RETURN_ON_ERROR(doWrite(message_out));
-  if (rdma_connected_) {
-    std::function<void(std::unordered_set<ObjectID>)> func = std::bind(
-        &RPCClient::doReleaseBlobsWithRDMARequest, this, std::placeholders::_1);
-    rdmaBlobScopeGuard.set(func, id_set);
-  }
   json message_in;
   RETURN_ON_ERROR(doRead(message_in));
   RETURN_ON_ERROR(ReadGetBuffersReply(message_in, payloads, fd_sent));
@@ -886,6 +909,11 @@ Status RPCClient::GetRemoteBlobs(
                    "The result size doesn't match with the requested sizes: " +
                        std::to_string(payloads.size()) + " vs. " +
                        std::to_string(id_set.size()));
+  if (rdma_connected_) {
+    std::function<void(std::unordered_set<ObjectID>)> func = std::bind(
+        &RPCClient::doReleaseBlobsWithRDMARequest, this, std::placeholders::_1);
+    rdmaBlobScopeGuard.set(func, id_set);
+  }
 
   std::unordered_map<ObjectID, std::shared_ptr<RemoteBlob>> id_payload_map;
   if (rdma_connected_) {
@@ -1020,4 +1048,31 @@ Status RPCClient::doReleaseBlobsWithRDMARequest(
   return Status::OK();
 }
 
+Status RPCClient::TryAcquireLock(std::string key, bool& result,
+                                 std::string& actural_key) {
+  ENSURE_CONNECTED(this);
+
+  std::string message_out;
+  WriteTryAcquireLockRequest(key, message_out);
+  VINEYARD_CHECK_OK(doWrite(message_out));
+
+  json message_in;
+  VINEYARD_CHECK_OK(doRead(message_in));
+  VINEYARD_CHECK_OK(ReadTryAcquireLockReply(message_in, result, actural_key));
+  return Status::OK();
+}
+
+Status RPCClient::TryReleaseLock(std::string key, bool& result) {
+  ENSURE_CONNECTED(this);
+
+  std::string message_out;
+  WriteTryReleaseLockRequest(key, message_out);
+  VINEYARD_CHECK_OK(doWrite(message_out));
+
+  json message_in;
+  VINEYARD_CHECK_OK(doRead(message_in));
+  VINEYARD_CHECK_OK(ReadTryReleaseLockReply(message_in, result));
+  return Status::OK();
+}
+
 }  // namespace vineyard
diff --git a/src/client/rpc_client.h b/src/client/rpc_client.h
index 2f2448ab75..d42f901b24 100644
--- a/src/client/rpc_client.h
+++ b/src/client/rpc_client.h
@@ -217,6 +217,9 @@ class RPCClient final : public ClientBase {
   Status GetObject(const ObjectID id, std::shared_ptr<Object>& object,
                    const bool sync_remote = true);
 
+  Status BatchedGetObjects(const std::vector<ObjectMeta> metas,
+                           std::vector<std::shared_ptr<Object>>& objects);
+
   /**
    * @brief Get multiple objects from vineyard.
    *
@@ -414,10 +417,7 @@ class RPCClient final : public ClientBase {
    * @return Status that indicates whether the lock process succeeds.
    */
   Status TryAcquireLock(std::string key, bool& result,
-                        std::string& actual_key) override {
-    // TBD
-    return Status::NotImplemented("TryAcquireLock is not implemented yet.");
-  }
+                        std::string& actural_key) override;
 
   /**
    * @brief Try to release a distributed lock.
@@ -426,10 +426,7 @@ class RPCClient final : public ClientBase {
    *
    * @return Status that indicates whether the unlock process succeeds.
    */
-  Status TryReleaseLock(std::string key, bool& result) override {
-    // TBD
-    return Status::NotImplemented("TryAcquireLock is not implemented yet.");
-  }
+  Status TryReleaseLock(std::string key, bool& result) override;
 
   /**
    * @brief Get the RDMA endpoint of the connected vineyard server.