Add tensor memory allocator

keith2018 · Jan 12, 2025 · 4c9ea57 · 4c9ea57
1 parent d7c884c
commit 4c9ea57
Show file tree

Hide file tree

Showing 7 changed files with 160 additions and 12 deletions.
diff --git a/TinyTorch/Allocator.cpp b/TinyTorch/Allocator.cpp
@@ -0,0 +1,84 @@
+/*
+ * TinyTorch
+ * @author 	: [email protected]
+ *
+ */
+
+#include "Allocator.h"
+
+#include <cassert>
+#include <cstdlib>
+
+#include "Logger.h"
+
+namespace TinyTorch {
+
+CachedAllocator::CachedAllocator(size_t maxCacheSize)
+    : maxCacheSize_(maxCacheSize), currentCacheSize_(0) {}
+
+CachedAllocator::~CachedAllocator() {
+  CachedAllocator::clear();
+  for (auto& pair : allocatedList_) {
+    std::free(pair.first);
+  }
+}
+
+void* CachedAllocator::malloc(size_t size) {
+  auto it = freedList_.find(size);
+  if (it != freedList_.end() && !it->second.empty()) {
+    void* ptr = it->second.front();
+    it->second.pop_front();
+    allocatedList_[ptr] = size;
+    currentCacheSize_ -= size;
+    return ptr;
+  }
+
+  void* ptr = std::malloc(size);
+  if (ptr) {
+    allocatedList_[ptr] = size;
+  } else {
+    LOGE("std::malloc failed with size: %lld", size);
+  }
+  return ptr;
+}
+
+void CachedAllocator::free(void* ptr) {
+  auto it = allocatedList_.find(ptr);
+  if (it != allocatedList_.end()) {
+    size_t size = it->second;
+    freedList_[size].push_back(ptr);
+    allocatedList_.erase(it);
+    currentCacheSize_ += size;
+
+    shrink();
+  } else {
+    LOGE("error: ptr not valid: %p", ptr);
+  }
+}
+
+void CachedAllocator::clear() {
+  assert(allocatedList_.empty());
+  for (auto& pair : freedList_) {
+    for (void* ptr : pair.second) {
+      std::free(ptr);
+    }
+  }
+}
+
+void CachedAllocator::shrink() {
+  while (!freedList_.empty() && currentCacheSize_ > maxCacheSize_) {
+    auto it = freedList_.begin();
+    if (!it->second.empty()) {
+      void* ptr = it->second.front();
+      it->second.pop_front();
+      std::free(ptr);
+      currentCacheSize_ -= it->first;
+
+      if (it->second.empty()) {
+        freedList_.erase(it);
+      }
+    }
+  }
+}
+
+}  // namespace TinyTorch
diff --git a/TinyTorch/Allocator.h b/TinyTorch/Allocator.h
@@ -0,0 +1,34 @@
+/*
+ * TinyTorch
+ * @author 	: [email protected]
+ *
+ */
+
+#pragma once
+
+#include <list>
+#include <unordered_map>
+
+#include "TensorImpl.h"
+
+namespace TinyTorch {
+
+class CachedAllocator : public Allocator {
+ public:
+  explicit CachedAllocator(size_t maxCacheSize = 256 * 1024 * 1024);  // 256 MB
+  ~CachedAllocator() override;
+
+  void* malloc(size_t size) override;
+  void free(void* ptr) override;
+  void clear() override;
+
+ private:
+  void shrink();
+
+  size_t maxCacheSize_;
+  size_t currentCacheSize_;
+  std::unordered_map<void*, size_t> allocatedList_;
+  std::unordered_map<size_t, std::list<void*>> freedList_;
+};
+
+}  // namespace TinyTorch
diff --git a/TinyTorch/Tensor.h b/TinyTorch/Tensor.h
@@ -117,6 +117,10 @@ class Tensor {
 
   const TensorImpl &data() const { return *data_; }
 
+  static void setAllocator(Allocator *allocator) {
+    TensorImpl::setAllocator(allocator);
+  }
+
  private:
   void initAutograd(bool requiresGrad,
                     const std::shared_ptr<Function> &gradFunc = nullptr);

diff --git a/TinyTorch/TensorImpl.cpp b/TinyTorch/TensorImpl.cpp
@@ -17,6 +17,9 @@ namespace TinyTorch {
 std::optional<unsigned int> RandomGenerator::seed_;
 std::default_random_engine RandomGenerator::randomEngine_;
 
+static Allocator defaultAllocator = Allocator();
+Allocator *TensorImpl::allocator_ = &defaultAllocator;
+
 // clang-format off
 #define TENSOR_CHECK_EMPTY(t, ret)                                             \
   do {                                                                         \
@@ -148,7 +151,7 @@ TensorImpl TensorImpl::scalar(const float &value) {
   ret.elemCount_ = 1;
   ret.shape_.clear();
   ret.strides_.clear();
-  ret.data_ = new float[1];
+  ret.data_ = (float *)allocator_->malloc(sizeof(float));
   ret.data_[0] = value;
   return ret;
 }
@@ -254,12 +257,26 @@ void TensorImpl::initMeta() {
 }
 
 void TensorImpl::initData(const float *from) {
-  data_ = new float[elemCount_];
+  data_ = (float *)allocator_->malloc(sizeof(float) * elemCount_);
   if (from) {
     memcpy(data_, from, elemCount_ * sizeof(float));
   }
 }
 
+void TensorImpl::dispose() {
+  if (empty()) {
+    return;
+  }
+  dimCount_ = 0;
+  elemCount_ = 0;
+  shape_.clear();
+  strides_.clear();
+  if (data_ != nullptr) {
+    allocator_->free(data_);
+    data_ = nullptr;
+  }
+}
+
 TensorImpl TensorImpl::reshape(const std::vector<int32_t> &shape) {
   shape_.resize(shape.size());
 
@@ -689,7 +706,7 @@ TensorImpl TensorImpl::col2im(const Shape &inputShape, Size2D kernelSize,
   auto outH = (height - kernelSize.h + 2 * padding.h) / stride.h + 1;
   auto outW = (width - kernelSize.w + 2 * padding.w) / stride.w + 1;
 
-  int32_t colH = outH * outW;
+  // int32_t colH = outH * outW;
   int32_t colW = channels * kernelSize.h * kernelSize.w;
 
   TensorImpl retTensor = zeros(inputShape);

diff --git a/TinyTorch/TensorImpl.h b/TinyTorch/TensorImpl.h
@@ -44,6 +44,15 @@ typedef std::vector<float> Array1d;
 typedef std::vector<std::vector<float>> Array2d;
 typedef std::vector<std::vector<std::vector<float>>> Array3d;
 
+class Allocator {
+ public:
+  virtual ~Allocator() = default;
+
+  virtual void *malloc(size_t size) { return std::malloc(size); }
+  virtual void free(void *ptr) { std::free(ptr); }
+  virtual void clear() {}
+};
+
 class RandomGenerator {
  public:
   static void setSeed(const unsigned int seed) {
@@ -130,17 +139,10 @@ class TensorImpl {
     data_ = other.data_;
   }
 
-  void dispose() {
-    dimCount_ = 0;
-    elemCount_ = 0;
-    shape_.clear();
-    strides_.clear();
-    delete[] data_;
-    data_ = nullptr;
-  }
-
   ~TensorImpl() { dispose(); }
 
+  static void setAllocator(Allocator *allocator) { allocator_ = allocator; }
+
   static TensorImpl shape(const Shape &shape);
 
   static TensorImpl scalar(const float &value);
@@ -530,6 +532,7 @@ class TensorImpl {
  protected:
   void initMeta();
   void initData(const float *from = nullptr);
+  void dispose();
 
   void traverse(const std::shared_ptr<UFuncSingle> &func, int32_t start,
                 int32_t stride, int32_t cnt) const;
@@ -563,6 +566,8 @@ class TensorImpl {
   Shape shape_;
   Shape strides_;
   float *data_ = nullptr;
+
+  static Allocator *allocator_;
 };
 
 template <typename T>

diff --git a/TinyTorch/Torch.h b/TinyTorch/Torch.h
@@ -8,6 +8,7 @@
 
 #include <fstream>
 
+#include "Allocator.h"
 #include "Data.h"
 #include "Function.h"
 #include "Logger.h"

diff --git a/demo/demo_mnist.cpp b/demo/demo_mnist.cpp
@@ -108,6 +108,9 @@ void demo_mnist() {
 
   manualSeed(0);
 
+  auto allocator = std::make_shared<CachedAllocator>();
+  Tensor::setAllocator(allocator.get());
+
   // config
   auto lr = 1.f;
   auto epochs = 2;