diff --git a/include/layers/FCLayer.hpp b/include/layers/FCLayer.hpp
index 5012af4b..89414593 100644
--- a/include/layers/FCLayer.hpp
+++ b/include/layers/FCLayer.hpp
@@ -1,23 +1,26 @@
 #pragma once
 #include <algorithm>
 #include <stdexcept>
+#include <utility>
 #include <vector>
 
 #include "layers/Layer.hpp"
 
 namespace itlab_2023 {
 
+const size_t kDepth1 = 128;
+const size_t kDepth2 = 5;
+
 class FCLayer : public Layer {
  private:
   Tensor weights_;
   Tensor bias_;
+  ImplType implType_;
 
  public:
   FCLayer() = default;
-  FCLayer(const Tensor& weights, const Tensor& bias) {
-    weights_ = weights;
-    bias_ = bias;
-  }
+  FCLayer(Tensor weights, const Tensor& bias, ImplType implType = kDefault)
+      : weights_(std::move(weights)), bias_(bias), implType_(implType) {}
   static std::string get_name() { return "Fully-connected layer"; }
   void run(const Tensor& input, Tensor& output) override;
 #ifdef ENABLE_STATISTIC_WEIGHTS
@@ -32,7 +35,7 @@ std::vector<ValueType> mat_vec_mul(const std::vector<ValueType>& mat,
   if (mat_shape.dims() != 2) {
     throw std::invalid_argument("Not a matrix in argument");
   }
-  if (vec.size() != mat_shape[1]) {
+  if (vec.size() < mat_shape[1]) {
     throw std::invalid_argument("Invalid vector size");
   }
   Shape res_shape(1);
@@ -50,6 +53,119 @@ std::vector<ValueType> mat_vec_mul(const std::vector<ValueType>& mat,
   return res;
 }
 
+template <typename ValueType>
+inline ValueType get_from(size_t i, size_t j, const std::vector<ValueType>& mat,
+                          const Shape& mat_shape) {
+  if (i < mat_shape[0] && j < mat_shape[1]) {
+    return mat[i * mat_shape[1] + j];
+  }
+  return ValueType(0);
+}
+
+template <typename ValueType>
+void m_mult(const std::vector<ValueType>& mat,
+            const std::vector<ValueType>& vec, const Shape& mat_shape,
+            std::vector<ValueType>& res, size_t ind_x, size_t ind_y,
+            size_t size, size_t depth) {
+  if (depth > kDepth2 || size < kDepth1) {
+    for (size_t i = 0; i < size; i++) {
+      for (size_t j = 0; j < size; j++) {
+        if (ind_x + j < vec.size()) {
+          res[ind_y + i] +=
+              get_from(ind_y + i, ind_x + j, mat, mat_shape) * vec[ind_x + j];
+        }
+      }
+    }
+  } else {
+    std::vector<size_t> tmp_x({0, size / 2, 0, size / 2});
+    std::vector<size_t> tmp_y({0, 0, size / 2, size / 2});
+    for (size_t i = 0; i < 4; i++) {
+      m_mult<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[i],
+                        ind_y + tmp_y[i], size / 2, depth + 1);
+    }
+  }
+}
+
+template <typename ValueType>
+void m_mult_tbb(const std::vector<ValueType>& mat,
+                const std::vector<ValueType>& vec, const Shape& mat_shape,
+                std::vector<ValueType>& res, size_t ind_x, size_t ind_y,
+                size_t size, size_t depth) {
+  if (depth > kDepth2 || size < kDepth1) {
+    for (size_t i = 0; i < size; i++) {
+      for (size_t j = 0; j < size; j++) {
+        if (ind_x + j < vec.size()) {
+          res[ind_y + i] +=
+              get_from(ind_y + i, ind_x + j, mat, mat_shape) * vec[ind_x + j];
+        }
+      }
+    }
+  } else {
+    size_t size_2 = size / 2;
+    std::vector<size_t> tmp_x({0, size_2, 0, size_2});
+    std::vector<size_t> tmp_y({0, 0, size_2, size_2});
+    oneapi::tbb::task_group g;
+    g.run([&]() {
+      m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[0],
+                            ind_y + tmp_y[0], size_2, depth + 1);
+    });
+    g.run([&]() {
+      m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[2],
+                            ind_y + tmp_y[2], size_2, depth + 1);
+    });
+    g.wait();
+    g.run([&]() {
+      m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[1],
+                            ind_y + tmp_y[1], size_2, depth + 1);
+    });
+    g.run([&]() {
+      m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[3],
+                            ind_y + tmp_y[3], size_2, depth + 1);
+    });
+    g.wait();
+  }
+}
+
+template <typename ValueType>
+std::vector<ValueType> mat_vec_mul_upd(const std::vector<ValueType>& mat,
+                                       const Shape& mat_shape,
+                                       const std::vector<ValueType>& vec) {
+  if (mat_shape.dims() != 2) {
+    throw std::invalid_argument("Not a matrix in argument");
+  }
+  if (vec.size() < mat_shape[1]) {
+    throw std::invalid_argument("Invalid vector size");
+  }
+  size_t near_pow2 = 1;
+  while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) {
+    near_pow2 = near_pow2 << 1;
+  }
+  std::vector<ValueType> res(near_pow2);
+  m_mult<ValueType>(mat, vec, mat_shape, res, 0, 0, near_pow2, 1);
+  res.resize(mat_shape[0]);
+  return res;
+}
+
+template <typename ValueType>
+std::vector<ValueType> mat_vec_mul_upd_tbb(const std::vector<ValueType>& mat,
+                                           const Shape& mat_shape,
+                                           const std::vector<ValueType>& vec) {
+  if (mat_shape.dims() != 2) {
+    throw std::invalid_argument("Not a matrix in argument");
+  }
+  if (vec.size() < mat_shape[1]) {
+    throw std::invalid_argument("Invalid vector size");
+  }
+  size_t near_pow2 = 1;
+  while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) {
+    near_pow2 = near_pow2 << 1;
+  }
+  std::vector<ValueType> res(near_pow2);
+  m_mult_tbb<ValueType>(mat, vec, mat_shape, res, 0, 0, near_pow2, 1);
+  res.resize(mat_shape[0]);
+  return res;
+}
+
 template <typename ValueType>
 class FCLayerImpl : public LayerImpl<ValueType> {
  public:
@@ -86,7 +202,7 @@ class FCLayerImpl : public LayerImpl<ValueType> {
   std::vector<ValueType> run(
       const std::vector<ValueType>& input) const override;
 
- private:
+ protected:
   std::vector<ValueType> weights_;
   std::vector<ValueType> bias_;
 };
@@ -129,4 +245,32 @@ std::vector<ValueType> FCLayerImpl<ValueType>::run(
                  output_values.begin(), std::plus<ValueType>());
   return output_values;
 }
+
+template <typename ValueType>
+class FCLayerImplTBB : public FCLayerImpl<ValueType> {
+ public:
+  FCLayerImplTBB(const std::vector<ValueType>& input_weights,
+                 const Shape& input_weights_shape,
+                 const std::vector<ValueType>& input_bias)
+      : FCLayerImpl<ValueType>(input_weights, input_weights_shape, input_bias) {
+  }
+  std::vector<ValueType> run(
+      const std::vector<ValueType>& input) const override;
+};
+
+template <typename ValueType>
+std::vector<ValueType> FCLayerImplTBB<ValueType>::run(
+    const std::vector<ValueType>& input) const {
+  if (input.size() != this->inputShape_[0]) {
+    throw std::invalid_argument("Input size doesn't fit FCLayer");
+  }
+  Shape cur_w_shape({this->outputShape_[0], this->inputShape_[0]});
+  std::vector<ValueType> output_values =
+      mat_vec_mul_upd_tbb(this->weights_, cur_w_shape, input);
+  std::transform(output_values.begin(), output_values.end(),
+                 this->bias_.begin(), output_values.begin(),
+                 std::plus<ValueType>());
+  return output_values;
+}
+
 }  // namespace itlab_2023
diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp
index 5c84a7c7..0157803b 100644
--- a/include/layers/Layer.hpp
+++ b/include/layers/Layer.hpp
@@ -7,6 +7,7 @@
 
 #include "layers/Shape.hpp"
 #include "layers/Tensor.hpp"
+#include "oneapi/tbb.h"
 
 namespace itlab_2023 {
 
@@ -21,6 +22,8 @@ enum LayerType {
   kOutput,
 };
 
+enum ImplType { kDefault, kTBB };
+
 class Layer {
  public:
   Layer() = default;
diff --git a/include/layers/PoolingLayer.hpp b/include/layers/PoolingLayer.hpp
index f446f7f0..79b22ce5 100644
--- a/include/layers/PoolingLayer.hpp
+++ b/include/layers/PoolingLayer.hpp
@@ -13,8 +13,11 @@ enum PoolingType { kAverage, kMax };
 class PoolingLayer : public Layer {
  public:
   PoolingLayer() = default;
-  PoolingLayer(const Shape& pooling_shape, std::string pooling_type = "average")
-      : poolingShape_(pooling_shape), poolingType_(std::move(pooling_type)) {}
+  PoolingLayer(const Shape& pooling_shape, std::string pooling_type = "average",
+               ImplType implType = kDefault)
+      : poolingShape_(pooling_shape),
+        poolingType_(std::move(pooling_type)),
+        implType_(implType) {}
   static std::string get_name() { return "Pooling layer"; }
   void run(const Tensor& input, Tensor& output) override;
 #ifdef ENABLE_STATISTIC_WEIGHTS
@@ -28,13 +31,14 @@ class PoolingLayer : public Layer {
  private:
   Shape poolingShape_;
   std::string poolingType_;
+  ImplType implType_;
 };
 
-inline bool isOutOfBounds(size_t index, int coord, const Shape& shape) {
+inline size_t coord_size(int coord, const Shape& shape) {
   if (coord >= 0 && static_cast<size_t>(coord) < shape.dims()) {
-    return (index >= shape[coord]);
+    return shape[coord];
   }
-  return (index > 0);
+  return 1;
 }
 
 template <typename ValueType>
@@ -65,7 +69,7 @@ class PoolingLayerImpl : public LayerImpl<ValueType> {
   std::vector<ValueType> run(
       const std::vector<ValueType>& input) const override;
 
- private:
+ protected:
   Shape poolingShape_;
   PoolingType poolingType_;
 };
@@ -120,15 +124,14 @@ std::vector<ValueType> PoolingLayerImpl<ValueType>::run(
   int input_h_index = this->inputShape_.dims() > 2
                           ? (static_cast<int>(this->inputShape_.dims()) - 2)
                           : 0;
-  // O(N^2)
-  for (size_t n = 0; !isOutOfBounds(n, input_h_index - 2, this->outputShape_);
+  for (size_t n = 0; n < coord_size(input_h_index - 2, this->outputShape_);
        n++) {
-    for (size_t c = 0; !isOutOfBounds(c, input_h_index - 1, this->outputShape_);
+    for (size_t c = 0; c < coord_size(input_h_index - 1, this->outputShape_);
          c++) {
-      for (size_t i = 0; !isOutOfBounds(i, input_h_index, this->outputShape_);
+      for (size_t i = 0; i < coord_size(input_h_index, this->outputShape_);
            i++) {
         for (size_t j = 0;
-             !isOutOfBounds(j, input_h_index + 1, this->outputShape_); j++) {
+             j < coord_size(input_h_index + 1, this->outputShape_); j++) {
           tmpheight = poolingShape_[0] * i;
           if (poolingShape_.dims() == 1) {
             tmpwidth = j;
@@ -136,8 +139,8 @@ std::vector<ValueType> PoolingLayerImpl<ValueType>::run(
             tmpwidth = poolingShape_[1] * j;
           }
           // to get matrix block for pooling
-          for (size_t k = 0; !isOutOfBounds(k, 0, poolingShape_); k++) {
-            for (size_t l = 0; !isOutOfBounds(l, 1, poolingShape_); l++) {
+          for (size_t k = 0; k < coord_size(0, poolingShape_); k++) {
+            for (size_t l = 0; l < coord_size(1, poolingShape_); l++) {
               if (this->inputShape_.dims() == 1) {
                 pooling_buf.push_back(input[tmpheight + k]);
               } else {
@@ -166,4 +169,101 @@ std::vector<ValueType> PoolingLayerImpl<ValueType>::run(
   }
   return res;
 }
+
+template <typename ValueType>
+class PoolingLayerImplTBB : public PoolingLayerImpl<ValueType> {
+ public:
+  PoolingLayerImplTBB(const Shape& input_shape, const Shape& pooling_shape,
+                      const std::string& pooling_type = "average")
+      : PoolingLayerImpl<ValueType>(input_shape, pooling_shape, pooling_type) {}
+  std::vector<ValueType> run(
+      const std::vector<ValueType>& input) const override;
+};
+
+template <typename ValueType>
+std::vector<ValueType> PoolingLayerImplTBB<ValueType>::run(
+    const std::vector<ValueType>& input) const {
+  if (input.size() != this->inputShape_.count()) {
+    throw std::invalid_argument("Input size doesn't fit pooling layer");
+  }
+  std::vector<ValueType> res(this->outputShape_.count());
+  int input_h_index = this->inputShape_.dims() > 2
+                          ? (static_cast<int>(this->inputShape_.dims()) - 2)
+                          : 0;
+  oneapi::tbb::parallel_for(
+      oneapi::tbb::blocked_range2d<size_t>(
+          0, coord_size(input_h_index - 2, this->outputShape_), 0,
+          coord_size(input_h_index - 1, this->outputShape_)),
+      [&](oneapi::tbb::blocked_range2d<size_t> r) {
+        for (size_t n = r.rows().begin(); n < r.rows().end(); n++) {
+          for (size_t c = r.cols().begin(); c < r.cols().end(); c++) {
+            oneapi::tbb::parallel_for(
+                oneapi::tbb::blocked_range2d<size_t>(
+                    0, coord_size(input_h_index, this->outputShape_), 0,
+                    coord_size(input_h_index + 1, this->outputShape_)),
+                [&](oneapi::tbb::blocked_range2d<size_t> r1) {
+                  for (size_t i = r1.rows().begin(); i < r1.rows().end(); i++) {
+                    for (size_t j = r1.cols().begin(); j < r1.cols().end();
+                         j++) {
+                      std::vector<ValueType> pooling_buf;
+                      std::vector<size_t> coords;
+                      size_t tmpwidth;
+                      size_t tmpheight;
+                      tmpheight = this->poolingShape_[0] * i;
+                      if (this->poolingShape_.dims() == 1) {
+                        tmpwidth = j;
+                      } else {
+                        tmpwidth = this->poolingShape_[1] * j;
+                      }
+                      for (size_t k = 0; k < coord_size(0, this->poolingShape_);
+                           k++) {
+                        for (size_t l = 0;
+                             l < coord_size(1, this->poolingShape_); l++) {
+                          if (this->inputShape_.dims() == 1) {
+                            pooling_buf.push_back(input[tmpheight + k]);
+                          } else {
+                            coords = std::vector<size_t>(
+                                {n, c, tmpheight + k, tmpwidth + l});
+                            pooling_buf.push_back(
+                                input[this->inputShape_.get_index(
+                                    std::vector<size_t>(
+                                        coords.end() - this->inputShape_.dims(),
+                                        coords.end()))]);
+                          }
+                        }
+                      }
+                      coords = std::vector<size_t>({n, c, i, j});
+                      switch (this->poolingType_) {
+                        case kAverage:
+                          if (this->inputShape_.dims() == 1) {
+                            res[i] = avg_pooling(pooling_buf);
+                          } else {
+                            res[this->outputShape_.get_index(
+                                std::vector<size_t>(
+                                    coords.end() - this->inputShape_.dims(),
+                                    coords.end()))] = avg_pooling(pooling_buf);
+                          }
+                          break;
+                        case kMax:
+                          if (this->inputShape_.dims() == 1) {
+                            res[i] = max_pooling(pooling_buf);
+                          } else {
+                            res[this->outputShape_.get_index(
+                                std::vector<size_t>(
+                                    coords.end() - this->inputShape_.dims(),
+                                    coords.end()))] = max_pooling(pooling_buf);
+                            break;
+                            default:
+                              throw std::runtime_error("Unknown pooling type");
+                          }
+                      }
+                    }
+                  }
+                });
+          }
+        }
+      });
+  return res;
+}
+
 }  // namespace itlab_2023
diff --git a/src/graph/CMakeLists.txt b/src/graph/CMakeLists.txt
index b89fe0f9..d093efe6 100644
--- a/src/graph/CMakeLists.txt
+++ b/src/graph/CMakeLists.txt
@@ -1,2 +1,3 @@
 file(GLOB_RECURSE graph_src *.cpp)
 add_library(graph_lib STATIC "${GRAPH_HEADERS}" "${graph_src}")
+target_link_libraries(graph_lib PUBLIC TBB::tbb)
diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt
index fd8cfa4e..4990eb2f 100644
--- a/src/layers/CMakeLists.txt
+++ b/src/layers/CMakeLists.txt
@@ -1,2 +1,3 @@
 file(GLOB_RECURSE layers_src *.cpp)
 add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}")
+target_link_libraries(layers_lib PUBLIC TBB::tbb)
diff --git a/src/layers/FCLayer.cpp b/src/layers/FCLayer.cpp
index 6e57c206..e002a748 100644
--- a/src/layers/FCLayer.cpp
+++ b/src/layers/FCLayer.cpp
@@ -11,17 +11,41 @@ void FCLayer::run(const Tensor& input, Tensor& output) {
   }
   switch (input.get_type()) {
     case Type::kInt: {
-      FCLayerImpl<int> used_impl(*weights_.as<int>(), weights_.get_shape(),
-                                 *bias_.as<int>());
-      output = make_tensor(used_impl.run(*input.as<int>()),
-                           used_impl.get_output_shape());
+      switch (implType_) {
+        case kDefault: {
+          FCLayerImpl<int> used_impl(*weights_.as<int>(), weights_.get_shape(),
+                                     *bias_.as<int>());
+          output = make_tensor(used_impl.run(*input.as<int>()),
+                               used_impl.get_output_shape());
+          break;
+        }
+        case kTBB: {
+          FCLayerImplTBB<int> used_impl(*weights_.as<int>(),
+                                        weights_.get_shape(), *bias_.as<int>());
+          output = make_tensor(used_impl.run(*input.as<int>()),
+                               used_impl.get_output_shape());
+          break;
+        }
+      }
       break;
     }
     case Type::kFloat: {
-      FCLayerImpl<float> used_impl(*weights_.as<float>(), weights_.get_shape(),
-                                   *bias_.as<float>());
-      output = make_tensor(used_impl.run(*input.as<float>()),
-                           used_impl.get_output_shape());
+      switch (implType_) {
+        case kDefault: {
+          FCLayerImpl<float> used_impl(
+              *weights_.as<float>(), weights_.get_shape(), *bias_.as<float>());
+          output = make_tensor(used_impl.run(*input.as<float>()),
+                               used_impl.get_output_shape());
+          break;
+        }
+        case kTBB: {
+          FCLayerImplTBB<float> used_impl(
+              *weights_.as<float>(), weights_.get_shape(), *bias_.as<float>());
+          output = make_tensor(used_impl.run(*input.as<float>()),
+                               used_impl.get_output_shape());
+          break;
+        }
+      }
       break;
     }
     default: {
diff --git a/src/layers/PoolingLayer.cpp b/src/layers/PoolingLayer.cpp
index ff010a54..0336f4a0 100644
--- a/src/layers/PoolingLayer.cpp
+++ b/src/layers/PoolingLayer.cpp
@@ -4,18 +4,42 @@ namespace itlab_2023 {
 
 void PoolingLayer::run(const Tensor& input, Tensor& output) {
   switch (input.get_type()) {
-    case Type::kFloat: {
-      PoolingLayerImpl<float> used_impl(input.get_shape(), poolingShape_,
-                                        poolingType_);
-      output = make_tensor(used_impl.run(*input.as<float>()),
-                           used_impl.get_output_shape());
+    case Type::kInt: {
+      switch (implType_) {
+        case kDefault: {
+          PoolingLayerImpl<int> used_impl(input.get_shape(), poolingShape_,
+                                          poolingType_);
+          output = make_tensor(used_impl.run(*input.as<int>()),
+                               used_impl.get_output_shape());
+          break;
+        }
+        case kTBB: {
+          PoolingLayerImplTBB<int> used_impl(input.get_shape(), poolingShape_,
+                                             poolingType_);
+          output = make_tensor(used_impl.run(*input.as<int>()),
+                               used_impl.get_output_shape());
+          break;
+        }
+      }
       break;
     }
-    case Type::kInt: {
-      PoolingLayerImpl<int> used_impl(input.get_shape(), poolingShape_,
-                                      poolingType_);
-      output = make_tensor(used_impl.run(*input.as<int>()),
-                           used_impl.get_output_shape());
+    case Type::kFloat: {
+      switch (implType_) {
+        case kDefault: {
+          PoolingLayerImpl<float> used_impl(input.get_shape(), poolingShape_,
+                                            poolingType_);
+          output = make_tensor(used_impl.run(*input.as<float>()),
+                               used_impl.get_output_shape());
+          break;
+        }
+        case kTBB: {
+          PoolingLayerImplTBB<float> used_impl(input.get_shape(), poolingShape_,
+                                               poolingType_);
+          output = make_tensor(used_impl.run(*input.as<float>()),
+                               used_impl.get_output_shape());
+          break;
+        }
+      }
       break;
     }
     default: {
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c5327fa5..810d73bc 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -22,11 +22,7 @@ target_include_directories(run_test PRIVATE "${CMAKE_SOURCE_DIR}/app/ReaderImage
 if (WIN32)
     add_custom_command(TARGET run_test POST_BUILD
         COMMAND ${CMAKE_COMMAND} -E copy_directory
-            "${CMAKE_SOURCE_DIR}/3rdparty/opencv/build/bin/${CMAKE_BUILD_TYPE}/Debug/."
-            "${CMAKE_BINARY_DIR}/bin/")
-    add_custom_command(TARGET run_test POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E copy_directory
-            "${CMAKE_SOURCE_DIR}/3rdparty/opencv/build/bin/${CMAKE_BUILD_TYPE}/Release/."
+            "${CMAKE_SOURCE_DIR}/3rdparty/opencv/build/bin/${CMAKE_BUILD_TYPE}"
             "${CMAKE_BINARY_DIR}/bin/")
     if(TENSORFLOW_FOUND)
         add_custom_command(TARGET run_test POST_BUILD
diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp
new file mode 100644
index 00000000..b2891e7a
--- /dev/null
+++ b/test/benchmarking/test_layers_time.cpp
@@ -0,0 +1,56 @@
+#include <iostream>
+#include <random>
+
+#include "gtest/gtest.h"
+#include "layers/FCLayer.hpp"
+#include "layers/PoolingLayer.hpp"
+#include "perf/benchmarking.hpp"
+
+using namespace itlab_2023;
+
+void test_func(PoolingLayer& p, const Tensor& input, Tensor& output) {
+  p.run(input, output);
+}
+
+TEST(time_test, mat_vec_mul_comp) {
+  size_t k = 7000;
+  std::vector<int> mat(k * k);
+  std::vector<int> vec(k);
+  for (size_t i = 0; i < k; i++) {
+    vec[i] = rand() % 500;
+  }
+  for (size_t i = 0; i < k * k; i++) {
+    mat[i] = rand() % 500;
+  }
+  double count1 = elapsed_time_avg<double, std::milli>(10, mat_vec_mul<int>,
+                                                       mat, Shape({k, k}), vec);
+  double count2 = elapsed_time_avg<double, std::milli>(
+      10, mat_vec_mul_upd_tbb<int>, mat, Shape({k, k}), vec);
+  auto tmp1 = mat_vec_mul<int>(mat, Shape{k, k}, vec);
+  auto tmp2 = mat_vec_mul_upd_tbb<int>(mat, Shape{k, k}, vec);
+  for (size_t i = 0; i < k; i++) {
+    EXPECT_EQ(tmp1[i], tmp2[i]);
+  }
+  EXPECT_GE(count1, count2);
+}
+
+TEST(pooling_test, is_parallel_ok) {
+  size_t n = 50;
+  size_t c = 3;
+  size_t h = 224;
+  size_t w = 224;
+  Shape test_shape = {n, c, h, w};
+  std::vector<int> a1(n * c * h * w);
+  for (size_t i = 0; i < n * c * h * w; i++) {
+    a1[i] = rand();
+  }
+  Tensor input = make_tensor(a1, test_shape);
+  Tensor output;
+  PoolingLayer p1(Shape({2, 2}), "max", kDefault);
+  PoolingLayer p2(Shape({2, 2}), "max", kTBB);
+  double count1 =
+      elapsed_time<double, std::milli>(test_func, p1, input, output);
+  double count2 =
+      elapsed_time<double, std::milli>(test_func, p2, input, output);
+  EXPECT_GE(count1, count2);
+}
diff --git a/test/single_layer/test_fclayer.cpp b/test/single_layer/test_fclayer.cpp
index 50d63ab8..18de8064 100644
--- a/test/single_layer/test_fclayer.cpp
+++ b/test/single_layer/test_fclayer.cpp
@@ -154,9 +154,9 @@ TEST(fclayer, matvecmul_works) {
   EXPECT_EQ(res, true_res);
 }
 
-TEST(fclayer, matvecmul_throws_when_big_vector) {
+TEST(fclayer, matvecmul_throws_when_small_vector) {
   std::vector<int> mat = {2, 4, 2, 4};
-  std::vector<int> vec = {1, 2, 3};
+  std::vector<int> vec = {1};
   Shape mat_shape({2, 2});
   ASSERT_ANY_THROW(mat_vec_mul(mat, mat_shape, vec));
 }
@@ -168,6 +168,20 @@ TEST(fclayer, matvecmul_throws_when_not_matrix) {
   ASSERT_ANY_THROW(mat_vec_mul(mat, mat_shape, vec));
 }
 
+TEST(fclayer, matvecmul_tbb_throws_when_small_vector) {
+  std::vector<int> mat = {2, 4, 2, 4};
+  std::vector<int> vec = {1};
+  Shape mat_shape({2, 2});
+  ASSERT_ANY_THROW(mat_vec_mul_upd_tbb(mat, mat_shape, vec));
+}
+
+TEST(fclayer, matvecmul_tbb_throws_when_not_matrix) {
+  std::vector<int> mat = {2, 4, 2, 4, 1, 3, 5, 7};
+  std::vector<int> vec = {1, 2};
+  Shape mat_shape({2, 2, 2});
+  ASSERT_ANY_THROW(mat_vec_mul_upd_tbb(mat, mat_shape, vec));
+}
+
 TEST(fclayer, new_fc_layer_can_run_float) {
   const std::vector<float> a1 = {2.0F, 1.5F, 0.1F, 1.9F, 0.0F, 5.5F};
   const std::vector<float> a2 = {9.0F, 6.4F, 17.5F};
@@ -196,6 +210,34 @@ TEST(fclayer, new_fc_layer_can_run_int) {
   }
 }
 
+TEST(fclayer, new_fc_layer_tbb_can_run_float) {
+  const std::vector<float> a1 = {2.0F, 1.5F, 0.1F, 1.9F, 0.0F, 5.5F};
+  const std::vector<float> a2 = {9.0F, 6.4F, 17.5F};
+  Tensor weights = make_tensor<float>(a1, {3, 2});
+  Tensor output;
+  Shape wshape({3, 2});
+  Tensor bias = make_tensor<float>({0.5F, 0.5F, 1.0F});
+  FCLayer layer(weights, bias, itlab_2023::kTBB);
+  layer.run(make_tensor<float>({2.0F, 3.0F}), output);
+  for (size_t i = 0; i < a2.size(); i++) {
+    EXPECT_NEAR((*output.as<float>())[i], a2[i], 1e-5);
+  }
+}
+
+TEST(fclayer, new_fc_layer_tbb_can_run_int) {
+  const std::vector<int> a1 = {2, 1, 0, 2, 0, 5};
+  const std::vector<int> a2 = {7, 6, 16};
+  Tensor weights = make_tensor<int>(a1, {3, 2});
+  Tensor output;
+  Shape wshape({3, 2});
+  Tensor bias = make_tensor<int>({0, 0, 1});
+  FCLayer layer(weights, bias, itlab_2023::kTBB);
+  layer.run(make_tensor<int>({2, 3}), output);
+  for (size_t i = 0; i < a2.size(); i++) {
+    EXPECT_NEAR((*output.as<int>())[i], a2[i], 1e-5);
+  }
+}
+
 TEST(fclayer, new_fc_layer_throws_when_big_input) {
   const std::vector<float> a1 = {2.0F, 1.5F, 0.1F, 1.9F, 0.0F, 5.5F};
   Tensor weights = make_tensor<float>(a1, {3, 2});
diff --git a/test/single_layer/test_poolinglayer.cpp b/test/single_layer/test_poolinglayer.cpp
index 5bafa541..184a9617 100644
--- a/test/single_layer/test_poolinglayer.cpp
+++ b/test/single_layer/test_poolinglayer.cpp
@@ -35,6 +35,15 @@ TEST(poolinglayer, throws_when_big_input) {
   ASSERT_ANY_THROW(a.run(input));
 }
 
+TEST(poolinglayer, tbb_pl_throws_when_big_input) {
+  Shape inpshape = {7};
+  Shape poolshape = {3};
+  PoolingLayerImplTBB<double> a =
+      PoolingLayerImplTBB<double>(inpshape, poolshape, "average");
+  std::vector<double> input({9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0});
+  ASSERT_ANY_THROW(a.run(input));
+}
+
 TEST(poolinglayer, throws_when_invalid_pooling_type) {
   Shape inpshape = {7};
   Shape poolshape = {3};
@@ -166,6 +175,19 @@ TEST(poolinglayer, new_pooling_layer_can_run_int_avg) {
   }
 }
 
+TEST(poolinglayer, new_pooling_layer_can_run_int_avg_tbb) {
+  Shape inpshape = {4, 4};
+  Shape poolshape = {2, 2};
+  PoolingLayer a(poolshape, "average", itlab_2023::kTBB);
+  std::vector<int> input({9, 8, 7, 6, 5, 4, 3, 2, 2, 3, 4, 5, 6, 7, 8, 9});
+  Tensor output = make_tensor<float>({0});
+  a.run(make_tensor(input, inpshape), output);
+  std::vector<int> true_output = {6, 4, 4, 6};
+  for (size_t i = 0; i < true_output.size(); i++) {
+    EXPECT_NEAR((*output.as<int>())[i], true_output[i], 1e-5);
+  }
+}
+
 TEST(poolinglayer, new_pooling_layer_can_run_1d_pooling_float) {
   Shape inpshape = {8};
   Shape poolshape = {3};
@@ -178,3 +200,16 @@ TEST(poolinglayer, new_pooling_layer_can_run_1d_pooling_float) {
     EXPECT_NEAR((*output.as<float>())[i], true_output[i], 1e-5);
   }
 }
+
+TEST(poolinglayer, new_pooling_layer_tbb_can_run_1d_pooling_float) {
+  Shape inpshape = {8};
+  Shape poolshape = {3};
+  PoolingLayer a(poolshape, "average", itlab_2023::kTBB);
+  std::vector<float> input({9.0F, 8.0F, 7.0F, 6.0F, 5.0F, 4.0F, 3.0F, 2.0F});
+  Tensor output = make_tensor<float>({0});
+  a.run(make_tensor(input, inpshape), output);
+  std::vector<float> true_output = {8.0F, 5.0F};
+  for (size_t i = 0; i < true_output.size(); i++) {
+    EXPECT_NEAR((*output.as<float>())[i], true_output[i], 1e-5);
+  }
+}