From a3944c1186164f49f1e5f559e557daa12d6d1816 Mon Sep 17 00:00:00 2001 From: NeiroYT Date: Sun, 26 May 2024 06:11:16 +0300 Subject: [PATCH 01/10] Linked TBB, optimized fc&pool, tests --- include/layers/FCLayer.hpp | 63 ++++++++++++-- include/layers/Layer.hpp | 8 +- include/layers/PoolingLayer.hpp | 113 ++++++++++++++++++++++--- src/graph/CMakeLists.txt | 1 + src/layers/CMakeLists.txt | 1 + src/layers/FCLayer.cpp | 40 +++++++-- src/layers/PoolingLayer.cpp | 44 +++++++--- test/CMakeLists.txt | 6 +- test/benchmarking/test_layers_time.cpp | 52 ++++++++++++ 9 files changed, 286 insertions(+), 42 deletions(-) create mode 100644 test/benchmarking/test_layers_time.cpp diff --git a/include/layers/FCLayer.hpp b/include/layers/FCLayer.hpp index fec21ba1..098ef3c3 100644 --- a/include/layers/FCLayer.hpp +++ b/include/layers/FCLayer.hpp @@ -11,13 +11,13 @@ class FCLayer : public Layer { private: Tensor weights_; Tensor bias_; + ImplType implType_; public: FCLayer() = default; - FCLayer(const Tensor& weights, const Tensor& bias) { - weights_ = weights; - bias_ = bias; - } + FCLayer(const Tensor& weights, const Tensor& bias, + ImplType implType = kDefault) + : weights_(weights), bias_(bias), implType_(implType) {} static std::string get_name() { return "Fully-connected layer"; } void run(const Tensor& input, Tensor& output) override; }; @@ -47,6 +47,34 @@ std::vector mat_vec_mul(const std::vector& mat, return res; } +template +std::vector mat_vec_mul_tbb(const std::vector& mat, + const Shape& mat_shape, + const std::vector& vec) { + if (mat_shape.dims() != 2) { + throw std::invalid_argument("Not a matrix in argument"); + } + if (vec.size() != mat_shape[1]) { + throw std::invalid_argument("Invalid vector size"); + } + Shape res_shape(1); + res_shape[0] = mat_shape[0]; + std::vector res(res_shape[0]); + ValueType elem; + oneapi::tbb::parallel_for(oneapi::tbb::blocked_range2d(0, mat_shape[0], 0, mat_shape[1]), + [&](oneapi::tbb::blocked_range2d r) { + for (size_t i = r.rows().begin(); i < r.rows().end(); i++) { + elem = ValueType(0); + for (size_t j = r.cols().begin(); j < r.cols().end(); j++) { + // due to 1d indexing + elem += mat[i * mat_shape[1] + j] * vec[j]; + } + res[i] = elem; + } + }); + return res; +} + template class FCLayerImpl : public LayerImpl { public: @@ -82,7 +110,7 @@ class FCLayerImpl : public LayerImpl { } std::vector run(const std::vector& input) const; - private: + protected: std::vector weights_; std::vector bias_; }; @@ -125,4 +153,29 @@ std::vector FCLayerImpl::run( output_values.begin(), std::plus()); return output_values; } + +template +class FCLayerImplTBB : public FCLayerImpl { + public: + FCLayerImplTBB(const std::vector& input_weights, + const Shape& input_weights_shape, + const std::vector& input_bias) + : FCLayerImpl(input_weights, input_weights_shape, input_bias) {} + std::vector run(const std::vector& input) const; +}; + +template +std::vector FCLayerImplTBB::run( + const std::vector& input) const { + if (input.size() != this->inputShape_[0]) { + throw std::invalid_argument("Input size doesn't fit FCLayer"); + } + Shape cur_w_shape({this->outputShape_[0], this->inputShape_[0]}); + std::vector output_values = + mat_vec_mul_tbb(weights_, cur_w_shape, input); + std::transform(output_values.begin(), output_values.end(), bias_.begin(), + output_values.begin(), std::plus()); + return output_values; +} + } // namespace itlab_2023 diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 24105151..295677f8 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -4,6 +4,7 @@ #include #include #include +#include "oneapi/tbb.h" #include "layers/Shape.hpp" #include "layers/Tensor.hpp" @@ -18,7 +19,12 @@ enum LayerType { kElementWise, kConvolution, kFullyConnected, - kOutput, + kOutput +}; + +enum ImplType { + kDefault, + kTBB }; class Layer { diff --git a/include/layers/PoolingLayer.hpp b/include/layers/PoolingLayer.hpp index 84f78185..dcc7cf7c 100644 --- a/include/layers/PoolingLayer.hpp +++ b/include/layers/PoolingLayer.hpp @@ -13,21 +13,25 @@ enum PoolingType { kAverage, kMax }; class PoolingLayer : public Layer { public: PoolingLayer() = default; - PoolingLayer(const Shape& pooling_shape, std::string pooling_type = "average") - : poolingShape_(pooling_shape), poolingType_(std::move(pooling_type)) {} + PoolingLayer(const Shape& pooling_shape, std::string pooling_type = "average", + ImplType implType = kDefault) + : poolingShape_(pooling_shape), + poolingType_(std::move(pooling_type)), + implType_(implType) {} static std::string get_name() { return "Pooling layer"; } void run(const Tensor& input, Tensor& output); private: Shape poolingShape_; std::string poolingType_; + ImplType implType_; }; -inline bool isOutOfBounds(size_t index, int coord, const Shape& shape) { +inline size_t coord_size(int coord, const Shape& shape) { if (coord >= 0 && static_cast(coord) < shape.dims()) { - return (index >= shape[coord]); + return shape[coord]; } - return (index > 0); + return 1; } template @@ -57,7 +61,7 @@ class PoolingLayerImpl : public LayerImpl { PoolingLayerImpl& operator=(const PoolingLayerImpl& c) = default; std::vector run(const std::vector& input) const; - private: + protected: Shape poolingShape_; PoolingType poolingType_; }; @@ -112,15 +116,15 @@ std::vector PoolingLayerImpl::run( int input_h_index = this->inputShape_.dims() > 2 ? (static_cast(this->inputShape_.dims()) - 2) : 0; - // O(N^2) - for (size_t n = 0; !isOutOfBounds(n, input_h_index - 2, this->outputShape_); + + for (size_t n = 0; n < coord_size(input_h_index - 2, this->outputShape_); n++) { - for (size_t c = 0; !isOutOfBounds(c, input_h_index - 1, this->outputShape_); + for (size_t c = 0; c < coord_size(input_h_index - 1, this->outputShape_); c++) { - for (size_t i = 0; !isOutOfBounds(i, input_h_index, this->outputShape_); + for (size_t i = 0; i < coord_size(input_h_index, this->outputShape_); i++) { for (size_t j = 0; - !isOutOfBounds(j, input_h_index + 1, this->outputShape_); j++) { + j < coord_size(input_h_index + 1, this->outputShape_); j++) { tmpheight = poolingShape_[0] * i; if (poolingShape_.dims() == 1) { tmpwidth = j; @@ -128,8 +132,8 @@ std::vector PoolingLayerImpl::run( tmpwidth = poolingShape_[1] * j; } // to get matrix block for pooling - for (size_t k = 0; !isOutOfBounds(k, 0, poolingShape_); k++) { - for (size_t l = 0; !isOutOfBounds(l, 1, poolingShape_); l++) { + for (size_t k = 0; k < coord_size(0, poolingShape_); k++) { + for (size_t l = 0; l < coord_size(1, poolingShape_); l++) { if (this->inputShape_.dims() == 1) { pooling_buf.push_back(input[tmpheight + k]); } else { @@ -158,4 +162,87 @@ std::vector PoolingLayerImpl::run( } return res; } + +template +class PoolingLayerImplTBB : public PoolingLayerImpl { + public: + PoolingLayerImplTBB(const Shape& input_shape, const Shape& pooling_shape, + const std::string& pooling_type = "average") + : PoolingLayerImpl(input_shape, pooling_shape, pooling_type) {} + std::vector run(const std::vector& input) const; +}; + +template +std::vector PoolingLayerImplTBB::run( + const std::vector& input) const { + if (input.size() != this->inputShape_.count()) { + throw std::invalid_argument("Input size doesn't fit pooling layer"); + } + std::vector res(this->outputShape_.count()); + int input_h_index = this->inputShape_.dims() > 2 + ? (static_cast(this->inputShape_.dims()) - 2) + : 0; + oneapi::tbb::parallel_for( + oneapi::tbb::blocked_range2d( + 0, coord_size(input_h_index - 2, this->outputShape_), 0, + coord_size(input_h_index - 1, this->outputShape_)), + [&](oneapi::tbb::blocked_range2d r) { + for (size_t n = r.rows().begin(); n < r.rows().end(); n++) { + for (size_t c = r.cols().begin(); c < r.cols().end(); c++) { + oneapi::tbb::parallel_for( + oneapi::tbb::blocked_range2d( + 0, coord_size(input_h_index, this->outputShape_), 0, + coord_size(input_h_index + 1, this->outputShape_)), + [&](oneapi::tbb::blocked_range2d r1) { + for (size_t i = r1.rows().begin(); i < r1.rows().end(); i++) { + for (size_t j = r1.cols().begin(); j < r1.cols().end(); + j++) { + std::vector pooling_buf; + std::vector coords; + size_t tmpwidth; + size_t tmpheight; + tmpheight = poolingShape_[0] * i; + if (poolingShape_.dims() == 1) { + tmpwidth = j; + } else { + tmpwidth = poolingShape_[1] * j; + } + for (size_t k = 0; k < coord_size(0, poolingShape_); + k++) { + for (size_t l = 0; l < coord_size(1, poolingShape_); + l++) { + if (this->inputShape_.dims() == 1) { + pooling_buf.push_back(input[tmpheight + k]); + } else { + coords = std::vector( + {n, c, tmpheight + k, tmpwidth + l}); + pooling_buf.push_back( + input[this->inputShape_.get_index( + std::vector( + coords.end() - this->inputShape_.dims(), + coords.end()))]); + } + } + } + switch (poolingType_) { + case kAverage: + res[this->outputShape_.get_index(std::vector( + {n, c, i, j}))] = avg_pooling(pooling_buf); + break; + case kMax: + res[this->outputShape_.get_index(std::vector( + {n, c, i, j}))] = max_pooling(pooling_buf); + break; + default: + throw std::runtime_error("Unknown pooling type"); + } + } + } + }); + } + } + }); + return res; +} + } // namespace itlab_2023 diff --git a/src/graph/CMakeLists.txt b/src/graph/CMakeLists.txt index b89fe0f9..d093efe6 100644 --- a/src/graph/CMakeLists.txt +++ b/src/graph/CMakeLists.txt @@ -1,2 +1,3 @@ file(GLOB_RECURSE graph_src *.cpp) add_library(graph_lib STATIC "${GRAPH_HEADERS}" "${graph_src}") +target_link_libraries(graph_lib PUBLIC TBB::tbb) diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt index fd8cfa4e..4990eb2f 100644 --- a/src/layers/CMakeLists.txt +++ b/src/layers/CMakeLists.txt @@ -1,2 +1,3 @@ file(GLOB_RECURSE layers_src *.cpp) add_library(layers_lib STATIC "${LAYERS_HEADERS}" "${layers_src}") +target_link_libraries(layers_lib PUBLIC TBB::tbb) diff --git a/src/layers/FCLayer.cpp b/src/layers/FCLayer.cpp index 6e57c206..e002a748 100644 --- a/src/layers/FCLayer.cpp +++ b/src/layers/FCLayer.cpp @@ -11,17 +11,41 @@ void FCLayer::run(const Tensor& input, Tensor& output) { } switch (input.get_type()) { case Type::kInt: { - FCLayerImpl used_impl(*weights_.as(), weights_.get_shape(), - *bias_.as()); - output = make_tensor(used_impl.run(*input.as()), - used_impl.get_output_shape()); + switch (implType_) { + case kDefault: { + FCLayerImpl used_impl(*weights_.as(), weights_.get_shape(), + *bias_.as()); + output = make_tensor(used_impl.run(*input.as()), + used_impl.get_output_shape()); + break; + } + case kTBB: { + FCLayerImplTBB used_impl(*weights_.as(), + weights_.get_shape(), *bias_.as()); + output = make_tensor(used_impl.run(*input.as()), + used_impl.get_output_shape()); + break; + } + } break; } case Type::kFloat: { - FCLayerImpl used_impl(*weights_.as(), weights_.get_shape(), - *bias_.as()); - output = make_tensor(used_impl.run(*input.as()), - used_impl.get_output_shape()); + switch (implType_) { + case kDefault: { + FCLayerImpl used_impl( + *weights_.as(), weights_.get_shape(), *bias_.as()); + output = make_tensor(used_impl.run(*input.as()), + used_impl.get_output_shape()); + break; + } + case kTBB: { + FCLayerImplTBB used_impl( + *weights_.as(), weights_.get_shape(), *bias_.as()); + output = make_tensor(used_impl.run(*input.as()), + used_impl.get_output_shape()); + break; + } + } break; } default: { diff --git a/src/layers/PoolingLayer.cpp b/src/layers/PoolingLayer.cpp index ff010a54..0336f4a0 100644 --- a/src/layers/PoolingLayer.cpp +++ b/src/layers/PoolingLayer.cpp @@ -4,18 +4,42 @@ namespace itlab_2023 { void PoolingLayer::run(const Tensor& input, Tensor& output) { switch (input.get_type()) { - case Type::kFloat: { - PoolingLayerImpl used_impl(input.get_shape(), poolingShape_, - poolingType_); - output = make_tensor(used_impl.run(*input.as()), - used_impl.get_output_shape()); + case Type::kInt: { + switch (implType_) { + case kDefault: { + PoolingLayerImpl used_impl(input.get_shape(), poolingShape_, + poolingType_); + output = make_tensor(used_impl.run(*input.as()), + used_impl.get_output_shape()); + break; + } + case kTBB: { + PoolingLayerImplTBB used_impl(input.get_shape(), poolingShape_, + poolingType_); + output = make_tensor(used_impl.run(*input.as()), + used_impl.get_output_shape()); + break; + } + } break; } - case Type::kInt: { - PoolingLayerImpl used_impl(input.get_shape(), poolingShape_, - poolingType_); - output = make_tensor(used_impl.run(*input.as()), - used_impl.get_output_shape()); + case Type::kFloat: { + switch (implType_) { + case kDefault: { + PoolingLayerImpl used_impl(input.get_shape(), poolingShape_, + poolingType_); + output = make_tensor(used_impl.run(*input.as()), + used_impl.get_output_shape()); + break; + } + case kTBB: { + PoolingLayerImplTBB used_impl(input.get_shape(), poolingShape_, + poolingType_); + output = make_tensor(used_impl.run(*input.as()), + used_impl.get_output_shape()); + break; + } + } break; } default: { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c5327fa5..810d73bc 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -22,11 +22,7 @@ target_include_directories(run_test PRIVATE "${CMAKE_SOURCE_DIR}/app/ReaderImage if (WIN32) add_custom_command(TARGET run_test POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory - "${CMAKE_SOURCE_DIR}/3rdparty/opencv/build/bin/${CMAKE_BUILD_TYPE}/Debug/." - "${CMAKE_BINARY_DIR}/bin/") - add_custom_command(TARGET run_test POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_directory - "${CMAKE_SOURCE_DIR}/3rdparty/opencv/build/bin/${CMAKE_BUILD_TYPE}/Release/." + "${CMAKE_SOURCE_DIR}/3rdparty/opencv/build/bin/${CMAKE_BUILD_TYPE}" "${CMAKE_BINARY_DIR}/bin/") if(TENSORFLOW_FOUND) add_custom_command(TARGET run_test POST_BUILD diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp new file mode 100644 index 00000000..463ce49e --- /dev/null +++ b/test/benchmarking/test_layers_time.cpp @@ -0,0 +1,52 @@ +#include +#include + +#include "gtest/gtest.h" +#include "layers/FCLayer.hpp" +#include "layers/PoolingLayer.hpp" +#include "perf/benchmarking.hpp" + +using namespace itlab_2023; + +void test_func(PoolingLayer& p, const Tensor& input, Tensor& output) { + p.run(input, output); +} + +TEST(time_test, mat_vec_mul_comp) { + size_t k = 5000; + std::vector mat(k * k); + std::vector vec(k); + for (size_t i = 0; i < k; i++) { + vec[i] = rand(); + } + for (size_t i = 0; i < k * k; i++) { + mat[i] = rand(); + } + double count1 = elapsed_time_avg(500, mat_vec_mul, mat, + Shape({k, k}), vec); + std::cerr << "Normal:" << count1 << std::endl; + double count2 = elapsed_time_avg(500, mat_vec_mul_tbb, mat, + Shape({k, k}), vec); + std::cerr << "Tbb:" << count2 << std::endl; + EXPECT_GE(count1, count2); +} + +TEST(pooling_test, is_parallel_good) { + size_t n = 1000; + size_t c = 3; + size_t h = 224; + size_t w = 224; + Shape test_shape = {n, c, h, w}; + std::vector a1(n * c * h * w); + for (size_t i = 0; i < n * c * h * w; i++) { + a1[i] = rand(); + } + Tensor input = make_tensor(a1, test_shape); + Tensor output; + PoolingLayer p1(Shape({2, 2}), "max", kDefault); + PoolingLayer p2(Shape({2, 2}), "max", kTBB); + double count1 = elapsed_time(test_func, p1, input, output); + std::cerr << "Normal:" << count1 << std::endl; + double count2 = elapsed_time(test_func, p2, input, output); + std::cerr << "Tbb:" << count2 << std::endl; +} \ No newline at end of file From 267da7fd8121ca8e9de01235d105dbc81b52d051 Mon Sep 17 00:00:00 2001 From: NeiroYT Date: Sun, 26 May 2024 06:30:52 +0300 Subject: [PATCH 02/10] Changes --- include/layers/FCLayer.hpp | 33 ++++++++++++++------------ include/layers/Layer.hpp | 7 ++---- include/layers/PoolingLayer.hpp | 16 ++++++------- test/benchmarking/test_layers_time.cpp | 16 +++++++------ 4 files changed, 37 insertions(+), 35 deletions(-) diff --git a/include/layers/FCLayer.hpp b/include/layers/FCLayer.hpp index 098ef3c3..cf57dc22 100644 --- a/include/layers/FCLayer.hpp +++ b/include/layers/FCLayer.hpp @@ -61,17 +61,18 @@ std::vector mat_vec_mul_tbb(const std::vector& mat, res_shape[0] = mat_shape[0]; std::vector res(res_shape[0]); ValueType elem; - oneapi::tbb::parallel_for(oneapi::tbb::blocked_range2d(0, mat_shape[0], 0, mat_shape[1]), - [&](oneapi::tbb::blocked_range2d r) { - for (size_t i = r.rows().begin(); i < r.rows().end(); i++) { - elem = ValueType(0); - for (size_t j = r.cols().begin(); j < r.cols().end(); j++) { - // due to 1d indexing - elem += mat[i * mat_shape[1] + j] * vec[j]; - } - res[i] = elem; - } - }); + oneapi::tbb::parallel_for( + oneapi::tbb::blocked_range2d(0, mat_shape[0], 0, mat_shape[1]), + [&](oneapi::tbb::blocked_range2d r) { + for (size_t i = r.rows().begin(); i < r.rows().end(); i++) { + elem = ValueType(0); + for (size_t j = r.cols().begin(); j < r.cols().end(); j++) { + // due to 1d indexing + elem += mat[i * mat_shape[1] + j] * vec[j]; + } + res[i] = elem; + } + }); return res; } @@ -160,7 +161,8 @@ class FCLayerImplTBB : public FCLayerImpl { FCLayerImplTBB(const std::vector& input_weights, const Shape& input_weights_shape, const std::vector& input_bias) - : FCLayerImpl(input_weights, input_weights_shape, input_bias) {} + : FCLayerImpl(input_weights, input_weights_shape, input_bias) { + } std::vector run(const std::vector& input) const; }; @@ -172,9 +174,10 @@ std::vector FCLayerImplTBB::run( } Shape cur_w_shape({this->outputShape_[0], this->inputShape_[0]}); std::vector output_values = - mat_vec_mul_tbb(weights_, cur_w_shape, input); - std::transform(output_values.begin(), output_values.end(), bias_.begin(), - output_values.begin(), std::plus()); + mat_vec_mul_tbb(this->weights_, cur_w_shape, input); + std::transform(output_values.begin(), output_values.end(), + this->bias_.begin(), output_values.begin(), + std::plus()); return output_values; } diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 295677f8..22c34657 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -4,10 +4,10 @@ #include #include #include -#include "oneapi/tbb.h" #include "layers/Shape.hpp" #include "layers/Tensor.hpp" +#include "oneapi/tbb.h" namespace itlab_2023 { @@ -22,10 +22,7 @@ enum LayerType { kOutput }; -enum ImplType { - kDefault, - kTBB -}; +enum ImplType { kDefault, kTBB }; class Layer { public: diff --git a/include/layers/PoolingLayer.hpp b/include/layers/PoolingLayer.hpp index dcc7cf7c..fec16262 100644 --- a/include/layers/PoolingLayer.hpp +++ b/include/layers/PoolingLayer.hpp @@ -168,7 +168,7 @@ class PoolingLayerImplTBB : public PoolingLayerImpl { public: PoolingLayerImplTBB(const Shape& input_shape, const Shape& pooling_shape, const std::string& pooling_type = "average") - : PoolingLayerImpl(input_shape, pooling_shape, pooling_type) {} + : PoolingLayerImpl(input_shape, pooling_shape, pooling_type) {} std::vector run(const std::vector& input) const; }; @@ -201,16 +201,16 @@ std::vector PoolingLayerImplTBB::run( std::vector coords; size_t tmpwidth; size_t tmpheight; - tmpheight = poolingShape_[0] * i; - if (poolingShape_.dims() == 1) { + tmpheight = this->poolingShape_[0] * i; + if (this->poolingShape_.dims() == 1) { tmpwidth = j; } else { - tmpwidth = poolingShape_[1] * j; + tmpwidth = this->poolingShape_[1] * j; } - for (size_t k = 0; k < coord_size(0, poolingShape_); + for (size_t k = 0; k < coord_size(0, this->poolingShape_); k++) { - for (size_t l = 0; l < coord_size(1, poolingShape_); - l++) { + for (size_t l = 0; + l < coord_size(1, this->poolingShape_); l++) { if (this->inputShape_.dims() == 1) { pooling_buf.push_back(input[tmpheight + k]); } else { @@ -224,7 +224,7 @@ std::vector PoolingLayerImplTBB::run( } } } - switch (poolingType_) { + switch (this->poolingType_) { case kAverage: res[this->outputShape_.get_index(std::vector( {n, c, i, j}))] = avg_pooling(pooling_buf); diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp index 463ce49e..9bb51f2c 100644 --- a/test/benchmarking/test_layers_time.cpp +++ b/test/benchmarking/test_layers_time.cpp @@ -22,11 +22,11 @@ TEST(time_test, mat_vec_mul_comp) { for (size_t i = 0; i < k * k; i++) { mat[i] = rand(); } - double count1 = elapsed_time_avg(500, mat_vec_mul, mat, - Shape({k, k}), vec); + double count1 = elapsed_time_avg(500, mat_vec_mul, + mat, Shape({k, k}), vec); std::cerr << "Normal:" << count1 << std::endl; - double count2 = elapsed_time_avg(500, mat_vec_mul_tbb, mat, - Shape({k, k}), vec); + double count2 = elapsed_time_avg( + 500, mat_vec_mul_tbb, mat, Shape({k, k}), vec); std::cerr << "Tbb:" << count2 << std::endl; EXPECT_GE(count1, count2); } @@ -45,8 +45,10 @@ TEST(pooling_test, is_parallel_good) { Tensor output; PoolingLayer p1(Shape({2, 2}), "max", kDefault); PoolingLayer p2(Shape({2, 2}), "max", kTBB); - double count1 = elapsed_time(test_func, p1, input, output); + double count1 = + elapsed_time(test_func, p1, input, output); std::cerr << "Normal:" << count1 << std::endl; - double count2 = elapsed_time(test_func, p2, input, output); + double count2 = + elapsed_time(test_func, p2, input, output); std::cerr << "Tbb:" << count2 << std::endl; -} \ No newline at end of file +} From fd4b4264157f9fc455522e18a0ff1a5f11969f16 Mon Sep 17 00:00:00 2001 From: NeiroYT Date: Sun, 26 May 2024 13:23:49 +0300 Subject: [PATCH 03/10] temp tests changes --- include/layers/PoolingLayer.hpp | 1 - test/benchmarking/test_layers_time.cpp | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/layers/PoolingLayer.hpp b/include/layers/PoolingLayer.hpp index fec16262..1c94869c 100644 --- a/include/layers/PoolingLayer.hpp +++ b/include/layers/PoolingLayer.hpp @@ -116,7 +116,6 @@ std::vector PoolingLayerImpl::run( int input_h_index = this->inputShape_.dims() > 2 ? (static_cast(this->inputShape_.dims()) - 2) : 0; - for (size_t n = 0; n < coord_size(input_h_index - 2, this->outputShape_); n++) { for (size_t c = 0; c < coord_size(input_h_index - 1, this->outputShape_); diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp index 9bb51f2c..35024c03 100644 --- a/test/benchmarking/test_layers_time.cpp +++ b/test/benchmarking/test_layers_time.cpp @@ -28,10 +28,10 @@ TEST(time_test, mat_vec_mul_comp) { double count2 = elapsed_time_avg( 500, mat_vec_mul_tbb, mat, Shape({k, k}), vec); std::cerr << "Tbb:" << count2 << std::endl; - EXPECT_GE(count1, count2); + //EXPECT_GE(count1, count2); } -TEST(pooling_test, is_parallel_good) { +TEST(pooling_test, is_parallel_ok) { size_t n = 1000; size_t c = 3; size_t h = 224; @@ -51,4 +51,5 @@ TEST(pooling_test, is_parallel_good) { double count2 = elapsed_time(test_func, p2, input, output); std::cerr << "Tbb:" << count2 << std::endl; + //EXPECT_GE(count1, count2); } From 7b492b483981a5b2ab6c02e96c23cbf4f8b4061f Mon Sep 17 00:00:00 2001 From: NeiroYT Date: Fri, 7 Jun 2024 15:04:30 +0300 Subject: [PATCH 04/10] Blocks --- include/layers/FCLayer.hpp | 207 +++++++++++++++++++++++++ test/benchmarking/test_layers_time.cpp | 12 +- 2 files changed, 213 insertions(+), 6 deletions(-) diff --git a/include/layers/FCLayer.hpp b/include/layers/FCLayer.hpp index cf57dc22..d4f41ab5 100644 --- a/include/layers/FCLayer.hpp +++ b/include/layers/FCLayer.hpp @@ -7,6 +7,8 @@ namespace itlab_2023 { +const size_t DEPTH = 64; + class FCLayer : public Layer { private: Tensor weights_; @@ -47,6 +49,211 @@ std::vector mat_vec_mul(const std::vector& mat, return res; } +template +inline ValueType get_from(size_t i, size_t j, const std::vector& mat, + const Shape& mat_shape) { + if (i < mat_shape[0] && j < mat_shape[1]) { + return mat[i * mat_shape[1] + j]; + } + return ValueType(0); +} + +template +inline std::vector m_plus(const std::vector& mat1, + const std::vector& mat2) { + std::vector res(mat1.size()); + std::transform(mat1.begin(), mat1.end(), mat2.begin(), res.begin(), + std::plus()); + return res; +} + +template +inline std::vector m_minus(const std::vector& mat1, + const std::vector& mat2) { + std::vector res(mat1.size()); + std::transform(mat1.begin(), mat1.end(), mat2.begin(), res.begin(), + std::minus()); + return res; +} + +template +void split_into_blocks(const std::vector& mat, + const Shape& mat_shape, + const std::vector& vec, + std::vector >& tmp, + size_t near_pow2) { + for (size_t i = 0; i < near_pow2 / 2; i++) { + for (size_t j = 0; j < near_pow2 / 2; j++) { + tmp[0].push_back(get_from(i, j, mat, mat_shape)); + } + for (size_t j = near_pow2 / 2; j < near_pow2; j++) { + tmp[1].push_back(get_from(i, j, mat, mat_shape)); + } + } + for (size_t i = near_pow2 / 2; i < near_pow2; i++) { + for (size_t j = 0; j < near_pow2 / 2; j++) { + tmp[2].push_back(get_from(i, j, mat, mat_shape)); + } + for (size_t j = near_pow2 / 2; j < near_pow2; j++) { + tmp[3].push_back(get_from(i, j, mat, mat_shape)); + } + } + for (size_t i = 0; i < near_pow2 / 2; i++) { + tmp[4].push_back(get_from(0, i, vec, mat_shape)); + } + for (size_t i = near_pow2 / 2; i < near_pow2; i++) { + tmp[5].push_back(get_from(0, i, vec, mat_shape)); + } +} + +template +std::vector mat_vec_mul_upd(const std::vector& mat, + const Shape& mat_shape, + const std::vector& vec) { + if (mat_shape.dims() != 2) { + throw std::invalid_argument("Not a matrix in argument"); + } + if (vec.size() != mat_shape[1]) { + throw std::invalid_argument("Invalid vector size"); + } + Shape res_shape(1); + res_shape[0] = mat_shape[0]; + std::vector res; + if (mat_shape[0] <= DEPTH && mat_shape[1] <= DEPTH) { + return mat_vec_mul(mat, mat_shape, vec); + } else { + size_t near_pow2 = 1; + std::vector > tmp(6); + while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) { + near_pow2 = near_pow2 << 1; + } + split_into_blocks(mat, mat_shape, vec, tmp, near_pow2); + Shape cur_shape({near_pow2 / 2, near_pow2 / 2}); + std::vector d = + mat_vec_mul_upd(m_plus(tmp[0], tmp[3]), cur_shape, tmp[4]); + std::vector d1 = + mat_vec_mul_upd(m_minus(tmp[1], tmp[3]), cur_shape, tmp[5]); + std::vector d2 = + mat_vec_mul_upd(m_minus(tmp[2], tmp[0]), cur_shape, tmp[4]); + std::vector h2 = + mat_vec_mul_upd(m_plus(tmp[2], tmp[3]), cur_shape, tmp[4]); + std::vector v1 = + mat_vec_mul_upd(tmp[3], cur_shape, m_minus(tmp[5], tmp[4])); + std::vector r1 = m_plus(m_plus(d1, v1), d); + std::vector r2 = m_plus(v1, h2); + res = r1; + for (size_t i = 0; i < res_shape[0] - r1.size(); i++) { + res.push_back(r2[i]); + } + } + return res; +} + +template +void split_into_blocks_tbb(const std::vector& mat, + const Shape& mat_shape, + const std::vector& vec, + std::vector >& tmp, + size_t near_pow2) { + oneapi::tbb::parallel_for( + oneapi::tbb::blocked_range(0, near_pow2 / 2), + [&](oneapi::tbb::blocked_range r) { + for (size_t i = r.begin(); i < r.end(); i++) { + for (size_t j = 0; j < near_pow2 / 2; j++) { + tmp[0][i * (near_pow2 / 2) + j] = + get_from(i, j, mat, mat_shape); + } + for (size_t j = near_pow2 / 2; j < near_pow2; j++) { + tmp[1][i * (near_pow2 / 2) + j - near_pow2 / 2] = + get_from(i, j, mat, mat_shape); + } + } + }); + oneapi::tbb::parallel_for( + oneapi::tbb::blocked_range(near_pow2 / 2, near_pow2), + [&](oneapi::tbb::blocked_range r) { + for (size_t i = r.begin(); i < r.end(); i++) { + for (size_t j = 0; j < near_pow2 / 2; j++) { + tmp[2][(i - near_pow2 / 2) * (near_pow2 / 2) + j] = + get_from(i, j, mat, mat_shape); + } + for (size_t j = near_pow2 / 2; j < near_pow2; j++) { + tmp[3][(i - near_pow2 / 2) * (near_pow2 / 2) + j - near_pow2 / 2] = + get_from(i, j, mat, mat_shape); + } + } + }); + for (size_t i = 0; i < near_pow2 / 2; i++) { + tmp[4].push_back(get_from(0, i, vec, mat_shape)); + } + for (size_t i = near_pow2 / 2; i < near_pow2; i++) { + tmp[5].push_back(get_from(0, i, vec, mat_shape)); + } +} + +template +std::vector mat_vec_mul_upd_tbb(const std::vector& mat, + const Shape& mat_shape, + const std::vector& vec) { + if (mat_shape.dims() != 2) { + throw std::invalid_argument("Not a matrix in argument"); + } + if (vec.size() != mat_shape[1]) { + throw std::invalid_argument("Invalid vector size"); + } + Shape res_shape(1); + res_shape[0] = mat_shape[0]; + std::vector res; + if (mat_shape[0] <= DEPTH && mat_shape[1] <= DEPTH) { + return mat_vec_mul(mat, mat_shape, vec); + } else { + size_t near_pow2 = 1; + while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) { + near_pow2 = near_pow2 << 1; + } + std::vector > tmp( + 4, std::vector((near_pow2 / 2) * (near_pow2 / 2))); + tmp.push_back(std::vector()); + tmp.push_back(std::vector()); + split_into_blocks_tbb(mat, mat_shape, vec, tmp, near_pow2); + Shape cur_shape({near_pow2 / 2, near_pow2 / 2}); + oneapi::tbb::task_group g; + std::vector d; + std::vector d1; + std::vector d2; + std::vector h2; + std::vector v1; + g.run([&]() { + d = mat_vec_mul_upd_tbb(m_plus(tmp[0], tmp[3]), cur_shape, + tmp[4]); + }); + g.run([&]() { + d1 = mat_vec_mul_upd_tbb(m_minus(tmp[1], tmp[3]), cur_shape, + tmp[5]); + }); + g.run([&]() { + d2 = mat_vec_mul_upd_tbb(m_minus(tmp[2], tmp[0]), cur_shape, + tmp[4]); + }); + g.run([&]() { + h2 = mat_vec_mul_upd_tbb(m_plus(tmp[2], tmp[3]), cur_shape, + tmp[4]); + }); + g.run([&]() { + v1 = mat_vec_mul_upd_tbb(tmp[3], cur_shape, + m_minus(tmp[5], tmp[4])); + }); + g.wait(); + std::vector r1 = m_plus(m_plus(d1, v1), d); + std::vector r2 = m_plus(v1, h2); + res = r1; + for (size_t i = 0; i < res_shape[0] - r1.size(); i++) { + res.push_back(r2[i]); + } + } + return res; +} + template std::vector mat_vec_mul_tbb(const std::vector& mat, const Shape& mat_shape, diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp index 35024c03..93cc26e8 100644 --- a/test/benchmarking/test_layers_time.cpp +++ b/test/benchmarking/test_layers_time.cpp @@ -13,26 +13,26 @@ void test_func(PoolingLayer& p, const Tensor& input, Tensor& output) { } TEST(time_test, mat_vec_mul_comp) { - size_t k = 5000; + size_t k = 2000; std::vector mat(k * k); std::vector vec(k); for (size_t i = 0; i < k; i++) { - vec[i] = rand(); + vec[i] = rand() % 500; } for (size_t i = 0; i < k * k; i++) { - mat[i] = rand(); + mat[i] = rand() % 500; } double count1 = elapsed_time_avg(500, mat_vec_mul, mat, Shape({k, k}), vec); std::cerr << "Normal:" << count1 << std::endl; double count2 = elapsed_time_avg( - 500, mat_vec_mul_tbb, mat, Shape({k, k}), vec); + 500, mat_vec_mul_upd_tbb, mat, Shape({k, k}), vec); std::cerr << "Tbb:" << count2 << std::endl; - //EXPECT_GE(count1, count2); + // EXPECT_GE(count1, count2); } TEST(pooling_test, is_parallel_ok) { - size_t n = 1000; + size_t n = 500; size_t c = 3; size_t h = 224; size_t w = 224; From f9444e1998dc8be486f2c6aab8d02d80ce01a94a Mon Sep 17 00:00:00 2001 From: NeiroYT Date: Fri, 14 Jun 2024 18:00:02 +0300 Subject: [PATCH 05/10] Check --- include/layers/FCLayer.hpp | 309 ++++++++++++++----------- src/layers/FCLayer.cpp | 7 + test/benchmarking/test_layers_time.cpp | 17 +- 3 files changed, 189 insertions(+), 144 deletions(-) diff --git a/include/layers/FCLayer.hpp b/include/layers/FCLayer.hpp index d4f41ab5..0a36ff65 100644 --- a/include/layers/FCLayer.hpp +++ b/include/layers/FCLayer.hpp @@ -7,7 +7,8 @@ namespace itlab_2023 { -const size_t DEPTH = 64; +const size_t kDepth = 64; +void split_into_blocks(std::vector& tmp, size_t near_pow2_2); class FCLayer : public Layer { private: @@ -31,7 +32,7 @@ std::vector mat_vec_mul(const std::vector& mat, if (mat_shape.dims() != 2) { throw std::invalid_argument("Not a matrix in argument"); } - if (vec.size() != mat_shape[1]) { + if (vec.size() < mat_shape[1]) { throw std::invalid_argument("Invalid vector size"); } Shape res_shape(1); @@ -59,51 +60,43 @@ inline ValueType get_from(size_t i, size_t j, const std::vector& mat, } template -inline std::vector m_plus(const std::vector& mat1, - const std::vector& mat2) { - std::vector res(mat1.size()); - std::transform(mat1.begin(), mat1.end(), mat2.begin(), res.begin(), - std::plus()); +std::vector m_plus(const std::vector& mat, + const Shape& mat_shape, size_t ind1, + size_t ind2, size_t size) { + std::vector res(size * size); + for (size_t i = 0; i < size; i++) { + for (size_t j = 0; j < size; j++) { + res[i * size + j] = get_from(i, j + ind1, mat, mat_shape) + + get_from(i, j + ind2, mat, mat_shape); + } + } return res; } template -inline std::vector m_minus(const std::vector& mat1, - const std::vector& mat2) { - std::vector res(mat1.size()); - std::transform(mat1.begin(), mat1.end(), mat2.begin(), res.begin(), - std::minus()); +std::vector m_minus(const std::vector& mat, + const Shape& mat_shape, size_t ind1, + size_t ind2, size_t size) { + std::vector res(size * size); + for (size_t i = 0; i < size; i++) { + for (size_t j = 0; j < size; j++) { + res[i * size + j] = get_from(i, j + ind1, mat, mat_shape) - + get_from(i, j + ind2, mat, mat_shape); + } + } return res; } template -void split_into_blocks(const std::vector& mat, - const Shape& mat_shape, - const std::vector& vec, - std::vector >& tmp, - size_t near_pow2) { - for (size_t i = 0; i < near_pow2 / 2; i++) { - for (size_t j = 0; j < near_pow2 / 2; j++) { - tmp[0].push_back(get_from(i, j, mat, mat_shape)); - } - for (size_t j = near_pow2 / 2; j < near_pow2; j++) { - tmp[1].push_back(get_from(i, j, mat, mat_shape)); +std::vector m_copy(const std::vector& mat, + const Shape& mat_shape, size_t ind1, size_t size) { + std::vector res(size * size); + for (size_t i = 0; i < size; i++) { + for (size_t j = 0; j < size; j++) { + res[i * size + j] = get_from(i, j + ind1, mat, mat_shape); } } - for (size_t i = near_pow2 / 2; i < near_pow2; i++) { - for (size_t j = 0; j < near_pow2 / 2; j++) { - tmp[2].push_back(get_from(i, j, mat, mat_shape)); - } - for (size_t j = near_pow2 / 2; j < near_pow2; j++) { - tmp[3].push_back(get_from(i, j, mat, mat_shape)); - } - } - for (size_t i = 0; i < near_pow2 / 2; i++) { - tmp[4].push_back(get_from(0, i, vec, mat_shape)); - } - for (size_t i = near_pow2 / 2; i < near_pow2; i++) { - tmp[5].push_back(get_from(0, i, vec, mat_shape)); - } + return res; } template @@ -113,82 +106,105 @@ std::vector mat_vec_mul_upd(const std::vector& mat, if (mat_shape.dims() != 2) { throw std::invalid_argument("Not a matrix in argument"); } - if (vec.size() != mat_shape[1]) { + if (vec.size() < mat_shape[1]) { throw std::invalid_argument("Invalid vector size"); } Shape res_shape(1); res_shape[0] = mat_shape[0]; std::vector res; - if (mat_shape[0] <= DEPTH && mat_shape[1] <= DEPTH) { + if (mat_shape[0] <= kDepth && mat_shape[1] <= kDepth) { return mat_vec_mul(mat, mat_shape, vec); - } else { - size_t near_pow2 = 1; - std::vector > tmp(6); - while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) { + } + size_t near_pow2 = 1; + std::vector tmp(4); + while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) { near_pow2 = near_pow2 << 1; - } - split_into_blocks(mat, mat_shape, vec, tmp, near_pow2); - Shape cur_shape({near_pow2 / 2, near_pow2 / 2}); - std::vector d = - mat_vec_mul_upd(m_plus(tmp[0], tmp[3]), cur_shape, tmp[4]); - std::vector d1 = - mat_vec_mul_upd(m_minus(tmp[1], tmp[3]), cur_shape, tmp[5]); - std::vector d2 = - mat_vec_mul_upd(m_minus(tmp[2], tmp[0]), cur_shape, tmp[4]); - std::vector h2 = - mat_vec_mul_upd(m_plus(tmp[2], tmp[3]), cur_shape, tmp[4]); - std::vector v1 = - mat_vec_mul_upd(tmp[3], cur_shape, m_minus(tmp[5], tmp[4])); - std::vector r1 = m_plus(m_plus(d1, v1), d); - std::vector r2 = m_plus(v1, h2); - res = r1; - for (size_t i = 0; i < res_shape[0] - r1.size(); i++) { - res.push_back(r2[i]); - } + } + size_t near_pow2_2 = near_pow2 / 2; + split_into_blocks(tmp, near_pow2_2); + Shape cur_shape({near_pow2_2, near_pow2_2}); + std::vector vec_sec_half(vec.begin() + near_pow2_2, vec.end()); + vec_sec_half.resize(near_pow2_2, ValueType(0)); + std::vector vec2_minus_vec1(vec_sec_half.size()); + std::transform(vec_sec_half.begin(), vec_sec_half.end(), vec.begin(), + vec2_minus_vec1.begin(), std::minus()); + std::vector d = mat_vec_mul_upd( + m_plus(mat, mat_shape, tmp[0], tmp[3], near_pow2_2), cur_shape, + vec); + std::vector d1 = mat_vec_mul_upd( + m_minus(mat, mat_shape, tmp[1], tmp[3], near_pow2_2), cur_shape, vec_sec_half); + std::vector d2 = mat_vec_mul_upd( + m_minus(mat, mat_shape, tmp[2], tmp[0], near_pow2_2), cur_shape, vec); + std::vector h2 = mat_vec_mul_upd( + m_plus(mat, mat_shape, tmp[2], tmp[3], near_pow2_2), cur_shape, vec); + std::vector v1 = mat_vec_mul_upd( + m_copy(mat, mat_shape, tmp[3], near_pow2_2), cur_shape, vec2_minus_vec1); + std::vector r1(near_pow2_2); + std::vector r2(near_pow2_2); + std::transform(d1.begin(), d1.end(), v1.begin(), d1.begin(), + std::plus()); + std::transform(d1.begin(), d1.end(), d.begin(), d1.begin(), + std::plus()); + std::transform(v1.begin(), v1.end(), h2.begin(), v1.begin(), + std::plus()); + res = d1; + for (size_t i = 0; i < res_shape[0] - d1.size(); i++) { + res.push_back(v1[i]); } return res; } template -void split_into_blocks_tbb(const std::vector& mat, - const Shape& mat_shape, - const std::vector& vec, - std::vector >& tmp, - size_t near_pow2) { +std::vector m_plus_tbb(const std::vector& mat, + const Shape& mat_shape, size_t ind1, + size_t ind2, size_t size) { + std::vector res(size * size); oneapi::tbb::parallel_for( - oneapi::tbb::blocked_range(0, near_pow2 / 2), - [&](oneapi::tbb::blocked_range r) { - for (size_t i = r.begin(); i < r.end(); i++) { - for (size_t j = 0; j < near_pow2 / 2; j++) { - tmp[0][i * (near_pow2 / 2) + j] = - get_from(i, j, mat, mat_shape); - } - for (size_t j = near_pow2 / 2; j < near_pow2; j++) { - tmp[1][i * (near_pow2 / 2) + j - near_pow2 / 2] = - get_from(i, j, mat, mat_shape); + oneapi::tbb::blocked_range2d(0, size, 0, size), + [&](oneapi::tbb::blocked_range2d r) { + for (size_t i = r.rows().begin(); i < r.rows().end(); i++) { + for (size_t j = r.cols().begin(); j < r.cols().end(); j++) { + res[i * size + j] = get_from(i, j + ind1, mat, mat_shape) + + get_from(i, j + ind2, mat, mat_shape); } } }); + return res; +} + +template +std::vector m_minus_tbb(const std::vector& mat, + const Shape& mat_shape, size_t ind1, + size_t ind2, size_t size) { + std::vector res(size * size); oneapi::tbb::parallel_for( - oneapi::tbb::blocked_range(near_pow2 / 2, near_pow2), - [&](oneapi::tbb::blocked_range r) { - for (size_t i = r.begin(); i < r.end(); i++) { - for (size_t j = 0; j < near_pow2 / 2; j++) { - tmp[2][(i - near_pow2 / 2) * (near_pow2 / 2) + j] = - get_from(i, j, mat, mat_shape); + oneapi::tbb::blocked_range2d(0, size, 0, size), + [&](oneapi::tbb::blocked_range2d r) { + for (size_t i = r.rows().begin(); i < r.rows().end(); i++) { + for (size_t j = r.cols().begin(); j < r.cols().end(); j++) { + res[i * size + j] = get_from(i, j + ind1, mat, mat_shape) - + get_from(i, j + ind2, mat, mat_shape); } - for (size_t j = near_pow2 / 2; j < near_pow2; j++) { - tmp[3][(i - near_pow2 / 2) * (near_pow2 / 2) + j - near_pow2 / 2] = - get_from(i, j, mat, mat_shape); + } + }); + return res; +} + +template +std::vector m_copy_tbb(const std::vector& mat, + const Shape& mat_shape, size_t ind1, + size_t size) { + std::vector res(size * size); + oneapi::tbb::parallel_for( + oneapi::tbb::blocked_range2d(0, size, 0, size), + [&](oneapi::tbb::blocked_range2d r) { + for (size_t i = r.rows().begin(); i < r.rows().end(); i++) { + for (size_t j = r.cols().begin(); j < r.cols().end(); j++) { + res[i * size + j] = get_from(i, j + ind1, mat, mat_shape); } } }); - for (size_t i = 0; i < near_pow2 / 2; i++) { - tmp[4].push_back(get_from(0, i, vec, mat_shape)); - } - for (size_t i = near_pow2 / 2; i < near_pow2; i++) { - tmp[5].push_back(get_from(0, i, vec, mat_shape)); - } + return res; } template @@ -198,58 +214,73 @@ std::vector mat_vec_mul_upd_tbb(const std::vector& mat, if (mat_shape.dims() != 2) { throw std::invalid_argument("Not a matrix in argument"); } - if (vec.size() != mat_shape[1]) { + if (vec.size() < mat_shape[1]) { throw std::invalid_argument("Invalid vector size"); } Shape res_shape(1); res_shape[0] = mat_shape[0]; std::vector res; - if (mat_shape[0] <= DEPTH && mat_shape[1] <= DEPTH) { + if (mat_shape[0] <= kDepth && mat_shape[1] <= kDepth) { return mat_vec_mul(mat, mat_shape, vec); - } else { - size_t near_pow2 = 1; - while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) { - near_pow2 = near_pow2 << 1; - } - std::vector > tmp( - 4, std::vector((near_pow2 / 2) * (near_pow2 / 2))); - tmp.push_back(std::vector()); - tmp.push_back(std::vector()); - split_into_blocks_tbb(mat, mat_shape, vec, tmp, near_pow2); - Shape cur_shape({near_pow2 / 2, near_pow2 / 2}); - oneapi::tbb::task_group g; - std::vector d; - std::vector d1; - std::vector d2; - std::vector h2; - std::vector v1; - g.run([&]() { - d = mat_vec_mul_upd_tbb(m_plus(tmp[0], tmp[3]), cur_shape, - tmp[4]); - }); - g.run([&]() { - d1 = mat_vec_mul_upd_tbb(m_minus(tmp[1], tmp[3]), cur_shape, - tmp[5]); - }); - g.run([&]() { - d2 = mat_vec_mul_upd_tbb(m_minus(tmp[2], tmp[0]), cur_shape, - tmp[4]); - }); - g.run([&]() { - h2 = mat_vec_mul_upd_tbb(m_plus(tmp[2], tmp[3]), cur_shape, - tmp[4]); - }); - g.run([&]() { - v1 = mat_vec_mul_upd_tbb(tmp[3], cur_shape, - m_minus(tmp[5], tmp[4])); - }); - g.wait(); - std::vector r1 = m_plus(m_plus(d1, v1), d); - std::vector r2 = m_plus(v1, h2); - res = r1; - for (size_t i = 0; i < res_shape[0] - r1.size(); i++) { - res.push_back(r2[i]); - } + } + size_t near_pow2 = 1; + std::vector tmp(4); + while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) { + near_pow2 = near_pow2 << 1; + } + size_t near_pow2_2 = near_pow2 / 2; + // split_into_blocks(tmp, near_pow2_2); + tmp[0] = 0; + tmp[1] = near_pow2_2; + tmp[2] = 2 * near_pow2_2 * near_pow2_2; + tmp[3] = (near_pow2_2) * (2 * near_pow2_2 + 1); + Shape cur_shape({near_pow2_2, near_pow2_2}); + std::vector vec_sec_half(vec.begin() + near_pow2_2, vec.end()); + vec_sec_half.resize(near_pow2_2, ValueType(0)); + std::vector vec2_minus_vec1(vec_sec_half.size()); + std::vector d; + std::vector d1; + std::vector d2; + std::vector h2; + std::vector v1; + oneapi::tbb::task_group g; + g.run([&]() { + std::transform(vec_sec_half.begin(), vec_sec_half.end(), vec.begin(), + vec2_minus_vec1.begin(), std::minus()); + }); + g.run([&]() { + d = mat_vec_mul( + m_plus_tbb(mat, mat_shape, tmp[0], tmp[3], near_pow2_2), cur_shape, + vec); + }); + g.run([&]() { + d1 = mat_vec_mul( + m_minus_tbb(mat, mat_shape, tmp[1], tmp[3], near_pow2_2), cur_shape, + vec_sec_half); + }); + g.run([&]() { + d2 = mat_vec_mul( + m_minus_tbb(mat, mat_shape, tmp[2], tmp[0], near_pow2_2), cur_shape, + vec); + }); + g.run([&]() { + h2 = mat_vec_mul( + m_plus_tbb(mat, mat_shape, tmp[2], tmp[3], near_pow2_2), cur_shape, + vec); + }); + g.wait(); + v1 = mat_vec_mul( + m_copy_tbb(mat, mat_shape, tmp[3], near_pow2_2), cur_shape, + vec2_minus_vec1); + std::transform(d1.begin(), d1.end(), v1.begin(), d1.begin(), + std::plus()); + std::transform(d1.begin(), d1.end(), d.begin(), d1.begin(), + std::plus()); + std::transform(v1.begin(), v1.end(), h2.begin(), v1.begin(), + std::plus()); + res = d1; + for (size_t i = 0; i < res_shape[0] - d1.size(); i++) { + res.push_back(v1[i]); } return res; } @@ -261,7 +292,7 @@ std::vector mat_vec_mul_tbb(const std::vector& mat, if (mat_shape.dims() != 2) { throw std::invalid_argument("Not a matrix in argument"); } - if (vec.size() != mat_shape[1]) { + if (vec.size() < mat_shape[1]) { throw std::invalid_argument("Invalid vector size"); } Shape res_shape(1); diff --git a/src/layers/FCLayer.cpp b/src/layers/FCLayer.cpp index e002a748..1d1a6205 100644 --- a/src/layers/FCLayer.cpp +++ b/src/layers/FCLayer.cpp @@ -2,6 +2,13 @@ namespace itlab_2023 { +void split_into_blocks(std::vector& tmp, size_t near_pow2_2) { + tmp[0] = 0; + tmp[1] = near_pow2_2; + tmp[2] = 2 * near_pow2_2 * near_pow2_2; + tmp[3] = (near_pow2_2) * (2 * near_pow2_2 + 1); +} + void FCLayer::run(const Tensor& input, Tensor& output) { if (input.get_type() != weights_.get_type()) { throw std::invalid_argument("Input and weights data type aren't same"); diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp index 93cc26e8..9384d88a 100644 --- a/test/benchmarking/test_layers_time.cpp +++ b/test/benchmarking/test_layers_time.cpp @@ -13,7 +13,7 @@ void test_func(PoolingLayer& p, const Tensor& input, Tensor& output) { } TEST(time_test, mat_vec_mul_comp) { - size_t k = 2000; + size_t k = 5000; std::vector mat(k * k); std::vector vec(k); for (size_t i = 0; i < k; i++) { @@ -22,17 +22,24 @@ TEST(time_test, mat_vec_mul_comp) { for (size_t i = 0; i < k * k; i++) { mat[i] = rand() % 500; } - double count1 = elapsed_time_avg(500, mat_vec_mul, + double count1 = elapsed_time_avg(10, mat_vec_mul, mat, Shape({k, k}), vec); std::cerr << "Normal:" << count1 << std::endl; double count2 = elapsed_time_avg( - 500, mat_vec_mul_upd_tbb, mat, Shape({k, k}), vec); + 10, mat_vec_mul_upd_tbb, mat, Shape({k, k}), vec); std::cerr << "Tbb:" << count2 << std::endl; + auto tmp1 = mat_vec_mul(mat, Shape{k, k}, vec); + auto tmp2 = mat_vec_mul_upd_tbb(mat, Shape{k, k}, vec); + for (size_t i = 0; i < k; i++) { + if (tmp1[i] != tmp2[i]) { + std::cerr << tmp1[i] << std::endl << tmp2[i] << std::endl; + } + } // EXPECT_GE(count1, count2); } TEST(pooling_test, is_parallel_ok) { - size_t n = 500; + size_t n = 200; size_t c = 3; size_t h = 224; size_t w = 224; @@ -51,5 +58,5 @@ TEST(pooling_test, is_parallel_ok) { double count2 = elapsed_time(test_func, p2, input, output); std::cerr << "Tbb:" << count2 << std::endl; - //EXPECT_GE(count1, count2); + EXPECT_GE(count1, count2); } From ad6fc800e85ee5c1d56341b736d94d051d50ab29 Mon Sep 17 00:00:00 2001 From: NeiroYT Date: Fri, 28 Jun 2024 01:09:50 +0300 Subject: [PATCH 06/10] Changes --- include/layers/FCLayer.hpp | 248 +++++++------------------ src/layers/FCLayer.cpp | 7 - test/benchmarking/test_layers_time.cpp | 4 +- test/single_layer/test_fclayer.cpp | 12 +- 4 files changed, 71 insertions(+), 200 deletions(-) diff --git a/include/layers/FCLayer.hpp b/include/layers/FCLayer.hpp index 0a36ff65..77c6b844 100644 --- a/include/layers/FCLayer.hpp +++ b/include/layers/FCLayer.hpp @@ -7,8 +7,8 @@ namespace itlab_2023 { -const size_t kDepth = 64; -void split_into_blocks(std::vector& tmp, size_t near_pow2_2); +const size_t kDepth1 = 128; +const size_t kDepth2 = 5; class FCLayer : public Layer { private: @@ -52,7 +52,7 @@ std::vector mat_vec_mul(const std::vector& mat, template inline ValueType get_from(size_t i, size_t j, const std::vector& mat, - const Shape& mat_shape) { + const Shape& mat_shape) { if (i < mat_shape[0] && j < mat_shape[1]) { return mat[i * mat_shape[1] + j]; } @@ -60,43 +60,67 @@ inline ValueType get_from(size_t i, size_t j, const std::vector& mat, } template -std::vector m_plus(const std::vector& mat, - const Shape& mat_shape, size_t ind1, - size_t ind2, size_t size) { - std::vector res(size * size); - for (size_t i = 0; i < size; i++) { - for (size_t j = 0; j < size; j++) { - res[i * size + j] = get_from(i, j + ind1, mat, mat_shape) + - get_from(i, j + ind2, mat, mat_shape); +void m_mult(const std::vector& mat, + const std::vector& vec, const Shape& mat_shape, + std::vector& res, size_t indX, size_t indY, size_t size, + size_t depth) { + if (depth > kDepth2 || size < kDepth1) { + for (size_t i = 0; i < size; i++) { + for (size_t j = 0; j < size; j++) { + if (indX + j < vec.size()) { + res[indY + i] += + get_from(indY + i, indX + j, mat, mat_shape) * vec[indX + j]; + } + } } - } - return res; -} - -template -std::vector m_minus(const std::vector& mat, - const Shape& mat_shape, size_t ind1, - size_t ind2, size_t size) { - std::vector res(size * size); - for (size_t i = 0; i < size; i++) { - for (size_t j = 0; j < size; j++) { - res[i * size + j] = get_from(i, j + ind1, mat, mat_shape) - - get_from(i, j + ind2, mat, mat_shape); + } else { + std::vector tmpX({0, size / 2, 0, size / 2}); + std::vector tmpY({0, 0, size / 2, size / 2}); + for (size_t i = 0; i < 4; i++) { + m_mult(mat, vec, mat_shape, res, indX + tmpX[i], + indY + tmpY[i], size / 2, depth + 1); } } - return res; } template -std::vector m_copy(const std::vector& mat, - const Shape& mat_shape, size_t ind1, size_t size) { - std::vector res(size * size); - for (size_t i = 0; i < size; i++) { - for (size_t j = 0; j < size; j++) { - res[i * size + j] = get_from(i, j + ind1, mat, mat_shape); +void m_mult_tbb(const std::vector& mat, + const std::vector& vec, const Shape& mat_shape, + std::vector& res, size_t indX, size_t indY, + size_t size, size_t depth) { + if (depth > kDepth2 || size < kDepth1) { + for (size_t i = 0; i < size; i++) { + for (size_t j = 0; j < size; j++) { + if (indX + j < vec.size()) { + res[indY + i] += + get_from(indY + i, indX + j, mat, mat_shape) * vec[indX + j]; + } + } } + } else { + size_t size_2 = size / 2; + std::vector tmpX({0, size_2, 0, size_2}); + std::vector tmpY({0, 0, size_2, size_2}); + oneapi::tbb::task_group g; + g.run([&]() { + m_mult_tbb(mat, vec, mat_shape, res, indX + tmpX[0], + indY + tmpY[0], size_2, depth + 1); + }); + g.run([&]() { + m_mult_tbb(mat, vec, mat_shape, res, indX + tmpX[2], + indY + tmpY[2], size_2, depth + 1); + }); + g.wait(); + g.run([&]() { + m_mult_tbb(mat, vec, mat_shape, res, indX + tmpX[1], + indY + tmpY[1], size_2, depth + 1); + }); + g.run([&]() { + m_mult_tbb(mat, vec, mat_shape, res, indX + tmpX[3], + indY + tmpY[3], size_2, depth + 1); + }); + g.wait(); } - return res; } template @@ -109,101 +133,13 @@ std::vector mat_vec_mul_upd(const std::vector& mat, if (vec.size() < mat_shape[1]) { throw std::invalid_argument("Invalid vector size"); } - Shape res_shape(1); - res_shape[0] = mat_shape[0]; - std::vector res; - if (mat_shape[0] <= kDepth && mat_shape[1] <= kDepth) { - return mat_vec_mul(mat, mat_shape, vec); - } size_t near_pow2 = 1; - std::vector tmp(4); while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) { - near_pow2 = near_pow2 << 1; - } - size_t near_pow2_2 = near_pow2 / 2; - split_into_blocks(tmp, near_pow2_2); - Shape cur_shape({near_pow2_2, near_pow2_2}); - std::vector vec_sec_half(vec.begin() + near_pow2_2, vec.end()); - vec_sec_half.resize(near_pow2_2, ValueType(0)); - std::vector vec2_minus_vec1(vec_sec_half.size()); - std::transform(vec_sec_half.begin(), vec_sec_half.end(), vec.begin(), - vec2_minus_vec1.begin(), std::minus()); - std::vector d = mat_vec_mul_upd( - m_plus(mat, mat_shape, tmp[0], tmp[3], near_pow2_2), cur_shape, - vec); - std::vector d1 = mat_vec_mul_upd( - m_minus(mat, mat_shape, tmp[1], tmp[3], near_pow2_2), cur_shape, vec_sec_half); - std::vector d2 = mat_vec_mul_upd( - m_minus(mat, mat_shape, tmp[2], tmp[0], near_pow2_2), cur_shape, vec); - std::vector h2 = mat_vec_mul_upd( - m_plus(mat, mat_shape, tmp[2], tmp[3], near_pow2_2), cur_shape, vec); - std::vector v1 = mat_vec_mul_upd( - m_copy(mat, mat_shape, tmp[3], near_pow2_2), cur_shape, vec2_minus_vec1); - std::vector r1(near_pow2_2); - std::vector r2(near_pow2_2); - std::transform(d1.begin(), d1.end(), v1.begin(), d1.begin(), - std::plus()); - std::transform(d1.begin(), d1.end(), d.begin(), d1.begin(), - std::plus()); - std::transform(v1.begin(), v1.end(), h2.begin(), v1.begin(), - std::plus()); - res = d1; - for (size_t i = 0; i < res_shape[0] - d1.size(); i++) { - res.push_back(v1[i]); + near_pow2 = near_pow2 << 1; } - return res; -} - -template -std::vector m_plus_tbb(const std::vector& mat, - const Shape& mat_shape, size_t ind1, - size_t ind2, size_t size) { - std::vector res(size * size); - oneapi::tbb::parallel_for( - oneapi::tbb::blocked_range2d(0, size, 0, size), - [&](oneapi::tbb::blocked_range2d r) { - for (size_t i = r.rows().begin(); i < r.rows().end(); i++) { - for (size_t j = r.cols().begin(); j < r.cols().end(); j++) { - res[i * size + j] = get_from(i, j + ind1, mat, mat_shape) + - get_from(i, j + ind2, mat, mat_shape); - } - } - }); - return res; -} - -template -std::vector m_minus_tbb(const std::vector& mat, - const Shape& mat_shape, size_t ind1, - size_t ind2, size_t size) { - std::vector res(size * size); - oneapi::tbb::parallel_for( - oneapi::tbb::blocked_range2d(0, size, 0, size), - [&](oneapi::tbb::blocked_range2d r) { - for (size_t i = r.rows().begin(); i < r.rows().end(); i++) { - for (size_t j = r.cols().begin(); j < r.cols().end(); j++) { - res[i * size + j] = get_from(i, j + ind1, mat, mat_shape) - - get_from(i, j + ind2, mat, mat_shape); - } - } - }); - return res; -} - -template -std::vector m_copy_tbb(const std::vector& mat, - const Shape& mat_shape, size_t ind1, - size_t size) { - std::vector res(size * size); - oneapi::tbb::parallel_for( - oneapi::tbb::blocked_range2d(0, size, 0, size), - [&](oneapi::tbb::blocked_range2d r) { - for (size_t i = r.rows().begin(); i < r.rows().end(); i++) { - for (size_t j = r.cols().begin(); j < r.cols().end(); j++) { - res[i * size + j] = get_from(i, j + ind1, mat, mat_shape); - } - } - }); + std::vector res(near_pow2); + m_mult(mat, vec, mat_shape, res, 0, 0, near_pow2, 1); + res.resize(mat_shape[0]); return res; } @@ -217,71 +153,13 @@ std::vector mat_vec_mul_upd_tbb(const std::vector& mat, if (vec.size() < mat_shape[1]) { throw std::invalid_argument("Invalid vector size"); } - Shape res_shape(1); - res_shape[0] = mat_shape[0]; - std::vector res; - if (mat_shape[0] <= kDepth && mat_shape[1] <= kDepth) { - return mat_vec_mul(mat, mat_shape, vec); - } size_t near_pow2 = 1; - std::vector tmp(4); while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) { near_pow2 = near_pow2 << 1; } - size_t near_pow2_2 = near_pow2 / 2; - // split_into_blocks(tmp, near_pow2_2); - tmp[0] = 0; - tmp[1] = near_pow2_2; - tmp[2] = 2 * near_pow2_2 * near_pow2_2; - tmp[3] = (near_pow2_2) * (2 * near_pow2_2 + 1); - Shape cur_shape({near_pow2_2, near_pow2_2}); - std::vector vec_sec_half(vec.begin() + near_pow2_2, vec.end()); - vec_sec_half.resize(near_pow2_2, ValueType(0)); - std::vector vec2_minus_vec1(vec_sec_half.size()); - std::vector d; - std::vector d1; - std::vector d2; - std::vector h2; - std::vector v1; - oneapi::tbb::task_group g; - g.run([&]() { - std::transform(vec_sec_half.begin(), vec_sec_half.end(), vec.begin(), - vec2_minus_vec1.begin(), std::minus()); - }); - g.run([&]() { - d = mat_vec_mul( - m_plus_tbb(mat, mat_shape, tmp[0], tmp[3], near_pow2_2), cur_shape, - vec); - }); - g.run([&]() { - d1 = mat_vec_mul( - m_minus_tbb(mat, mat_shape, tmp[1], tmp[3], near_pow2_2), cur_shape, - vec_sec_half); - }); - g.run([&]() { - d2 = mat_vec_mul( - m_minus_tbb(mat, mat_shape, tmp[2], tmp[0], near_pow2_2), cur_shape, - vec); - }); - g.run([&]() { - h2 = mat_vec_mul( - m_plus_tbb(mat, mat_shape, tmp[2], tmp[3], near_pow2_2), cur_shape, - vec); - }); - g.wait(); - v1 = mat_vec_mul( - m_copy_tbb(mat, mat_shape, tmp[3], near_pow2_2), cur_shape, - vec2_minus_vec1); - std::transform(d1.begin(), d1.end(), v1.begin(), d1.begin(), - std::plus()); - std::transform(d1.begin(), d1.end(), d.begin(), d1.begin(), - std::plus()); - std::transform(v1.begin(), v1.end(), h2.begin(), v1.begin(), - std::plus()); - res = d1; - for (size_t i = 0; i < res_shape[0] - d1.size(); i++) { - res.push_back(v1[i]); - } + std::vector res(near_pow2); + m_mult_tbb(mat, vec, mat_shape, res, 0, 0, near_pow2, 1); + res.resize(mat_shape[0]); return res; } diff --git a/src/layers/FCLayer.cpp b/src/layers/FCLayer.cpp index 1d1a6205..e002a748 100644 --- a/src/layers/FCLayer.cpp +++ b/src/layers/FCLayer.cpp @@ -2,13 +2,6 @@ namespace itlab_2023 { -void split_into_blocks(std::vector& tmp, size_t near_pow2_2) { - tmp[0] = 0; - tmp[1] = near_pow2_2; - tmp[2] = 2 * near_pow2_2 * near_pow2_2; - tmp[3] = (near_pow2_2) * (2 * near_pow2_2 + 1); -} - void FCLayer::run(const Tensor& input, Tensor& output) { if (input.get_type() != weights_.get_type()) { throw std::invalid_argument("Input and weights data type aren't same"); diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp index 9384d88a..c240dec6 100644 --- a/test/benchmarking/test_layers_time.cpp +++ b/test/benchmarking/test_layers_time.cpp @@ -13,7 +13,7 @@ void test_func(PoolingLayer& p, const Tensor& input, Tensor& output) { } TEST(time_test, mat_vec_mul_comp) { - size_t k = 5000; + size_t k = 7000; std::vector mat(k * k); std::vector vec(k); for (size_t i = 0; i < k; i++) { @@ -32,7 +32,7 @@ TEST(time_test, mat_vec_mul_comp) { auto tmp2 = mat_vec_mul_upd_tbb(mat, Shape{k, k}, vec); for (size_t i = 0; i < k; i++) { if (tmp1[i] != tmp2[i]) { - std::cerr << tmp1[i] << std::endl << tmp2[i] << std::endl; + std::cerr << tmp1[i] << ' ' << tmp2[i] << ' ' << i << std::endl; } } // EXPECT_GE(count1, count2); diff --git a/test/single_layer/test_fclayer.cpp b/test/single_layer/test_fclayer.cpp index 50d63ab8..ece3c6ee 100644 --- a/test/single_layer/test_fclayer.cpp +++ b/test/single_layer/test_fclayer.cpp @@ -154,12 +154,12 @@ TEST(fclayer, matvecmul_works) { EXPECT_EQ(res, true_res); } -TEST(fclayer, matvecmul_throws_when_big_vector) { - std::vector mat = {2, 4, 2, 4}; - std::vector vec = {1, 2, 3}; - Shape mat_shape({2, 2}); - ASSERT_ANY_THROW(mat_vec_mul(mat, mat_shape, vec)); -} +//TEST(fclayer, matvecmul_throws_when_big_vector) { +// std::vector mat = {2, 4, 2, 4}; +// std::vector vec = {1, 2, 3}; +// Shape mat_shape({2, 2}); +// ASSERT_ANY_THROW(mat_vec_mul(mat, mat_shape, vec)); +//} TEST(fclayer, matvecmul_throws_when_not_matrix) { std::vector mat = {2, 4, 2, 4, 1, 3, 5, 7}; From 1640bc02fed1f4bf7ad1c3345fa607460c708bf7 Mon Sep 17 00:00:00 2001 From: NeiroYT Date: Fri, 28 Jun 2024 02:24:48 +0300 Subject: [PATCH 07/10] Clang --- include/layers/FCLayer.hpp | 52 +++++++++++++------------- test/benchmarking/test_layers_time.cpp | 2 +- test/single_layer/test_fclayer.cpp | 12 +++--- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/include/layers/FCLayer.hpp b/include/layers/FCLayer.hpp index 77c6b844..7ed89225 100644 --- a/include/layers/FCLayer.hpp +++ b/include/layers/FCLayer.hpp @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include "layers/Layer.hpp" @@ -18,9 +19,8 @@ class FCLayer : public Layer { public: FCLayer() = default; - FCLayer(const Tensor& weights, const Tensor& bias, - ImplType implType = kDefault) - : weights_(weights), bias_(bias), implType_(implType) {} + FCLayer(Tensor weights, const Tensor& bias, ImplType implType = kDefault) + : weights_(std::move(weights)), bias_(bias), implType_(implType) {} static std::string get_name() { return "Fully-connected layer"; } void run(const Tensor& input, Tensor& output) override; }; @@ -62,23 +62,23 @@ inline ValueType get_from(size_t i, size_t j, const std::vector& mat, template void m_mult(const std::vector& mat, const std::vector& vec, const Shape& mat_shape, - std::vector& res, size_t indX, size_t indY, size_t size, - size_t depth) { + std::vector& res, size_t ind_x, size_t ind_y, + size_t size, size_t depth) { if (depth > kDepth2 || size < kDepth1) { for (size_t i = 0; i < size; i++) { for (size_t j = 0; j < size; j++) { - if (indX + j < vec.size()) { - res[indY + i] += - get_from(indY + i, indX + j, mat, mat_shape) * vec[indX + j]; + if (ind_x + j < vec.size()) { + res[ind_y + i] += + get_from(ind_y + i, ind_x + j, mat, mat_shape) * vec[ind_x + j]; } } } } else { - std::vector tmpX({0, size / 2, 0, size / 2}); - std::vector tmpY({0, 0, size / 2, size / 2}); + std::vector tmp_x({0, size / 2, 0, size / 2}); + std::vector tmp_y({0, 0, size / 2, size / 2}); for (size_t i = 0; i < 4; i++) { - m_mult(mat, vec, mat_shape, res, indX + tmpX[i], - indY + tmpY[i], size / 2, depth + 1); + m_mult(mat, vec, mat_shape, res, ind_x + tmp_x[i], + ind_y + tmp_y[i], size / 2, depth + 1); } } } @@ -86,38 +86,38 @@ void m_mult(const std::vector& mat, template void m_mult_tbb(const std::vector& mat, const std::vector& vec, const Shape& mat_shape, - std::vector& res, size_t indX, size_t indY, + std::vector& res, size_t ind_x, size_t ind_y, size_t size, size_t depth) { if (depth > kDepth2 || size < kDepth1) { for (size_t i = 0; i < size; i++) { for (size_t j = 0; j < size; j++) { - if (indX + j < vec.size()) { - res[indY + i] += - get_from(indY + i, indX + j, mat, mat_shape) * vec[indX + j]; + if (ind_x + j < vec.size()) { + res[ind_y + i] += + get_from(ind_y + i, ind_x + j, mat, mat_shape) * vec[ind_x + j]; } } } } else { size_t size_2 = size / 2; - std::vector tmpX({0, size_2, 0, size_2}); - std::vector tmpY({0, 0, size_2, size_2}); + std::vector tmp_x({0, size_2, 0, size_2}); + std::vector tmp_y({0, 0, size_2, size_2}); oneapi::tbb::task_group g; g.run([&]() { - m_mult_tbb(mat, vec, mat_shape, res, indX + tmpX[0], - indY + tmpY[0], size_2, depth + 1); + m_mult_tbb(mat, vec, mat_shape, res, ind_x + tmp_x[0], + ind_y + tmp_y[0], size_2, depth + 1); }); g.run([&]() { - m_mult_tbb(mat, vec, mat_shape, res, indX + tmpX[2], - indY + tmpY[2], size_2, depth + 1); + m_mult_tbb(mat, vec, mat_shape, res, ind_x + tmp_x[2], + ind_y + tmp_y[2], size_2, depth + 1); }); g.wait(); g.run([&]() { - m_mult_tbb(mat, vec, mat_shape, res, indX + tmpX[1], - indY + tmpY[1], size_2, depth + 1); + m_mult_tbb(mat, vec, mat_shape, res, ind_x + tmp_x[1], + ind_y + tmp_y[1], size_2, depth + 1); }); g.run([&]() { - m_mult_tbb(mat, vec, mat_shape, res, indX + tmpX[3], - indY + tmpY[3], size_2, depth + 1); + m_mult_tbb(mat, vec, mat_shape, res, ind_x + tmp_x[3], + ind_y + tmp_y[3], size_2, depth + 1); }); g.wait(); } diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp index c240dec6..629d7909 100644 --- a/test/benchmarking/test_layers_time.cpp +++ b/test/benchmarking/test_layers_time.cpp @@ -58,5 +58,5 @@ TEST(pooling_test, is_parallel_ok) { double count2 = elapsed_time(test_func, p2, input, output); std::cerr << "Tbb:" << count2 << std::endl; - EXPECT_GE(count1, count2); + // EXPECT_GE(count1, count2); } diff --git a/test/single_layer/test_fclayer.cpp b/test/single_layer/test_fclayer.cpp index ece3c6ee..5d17bcf0 100644 --- a/test/single_layer/test_fclayer.cpp +++ b/test/single_layer/test_fclayer.cpp @@ -154,12 +154,12 @@ TEST(fclayer, matvecmul_works) { EXPECT_EQ(res, true_res); } -//TEST(fclayer, matvecmul_throws_when_big_vector) { -// std::vector mat = {2, 4, 2, 4}; -// std::vector vec = {1, 2, 3}; -// Shape mat_shape({2, 2}); -// ASSERT_ANY_THROW(mat_vec_mul(mat, mat_shape, vec)); -//} +// TEST(fclayer, matvecmul_throws_when_big_vector) { +// std::vector mat = {2, 4, 2, 4}; +// std::vector vec = {1, 2, 3}; +// Shape mat_shape({2, 2}); +// ASSERT_ANY_THROW(mat_vec_mul(mat, mat_shape, vec)); +// } TEST(fclayer, matvecmul_throws_when_not_matrix) { std::vector mat = {2, 4, 2, 4, 1, 3, 5, 7}; From 2f7a5f0901d1507c51497c3a40c86faa4a8ffd3f Mon Sep 17 00:00:00 2001 From: NeiroYT Date: Fri, 28 Jun 2024 03:15:04 +0300 Subject: [PATCH 08/10] Fixes --- include/layers/FCLayer.hpp | 31 +------------- include/layers/PoolingLayer.hpp | 27 +++++++++---- test/benchmarking/test_layers_time.cpp | 2 +- test/single_layer/test_fclayer.cpp | 54 ++++++++++++++++++++++--- test/single_layer/test_poolinglayer.cpp | 22 ++++++++++ 5 files changed, 92 insertions(+), 44 deletions(-) diff --git a/include/layers/FCLayer.hpp b/include/layers/FCLayer.hpp index a223efd7..66243bf6 100644 --- a/include/layers/FCLayer.hpp +++ b/include/layers/FCLayer.hpp @@ -163,35 +163,6 @@ std::vector mat_vec_mul_upd_tbb(const std::vector& mat, return res; } -template -std::vector mat_vec_mul_tbb(const std::vector& mat, - const Shape& mat_shape, - const std::vector& vec) { - if (mat_shape.dims() != 2) { - throw std::invalid_argument("Not a matrix in argument"); - } - if (vec.size() < mat_shape[1]) { - throw std::invalid_argument("Invalid vector size"); - } - Shape res_shape(1); - res_shape[0] = mat_shape[0]; - std::vector res(res_shape[0]); - ValueType elem; - oneapi::tbb::parallel_for( - oneapi::tbb::blocked_range2d(0, mat_shape[0], 0, mat_shape[1]), - [&](oneapi::tbb::blocked_range2d r) { - for (size_t i = r.rows().begin(); i < r.rows().end(); i++) { - elem = ValueType(0); - for (size_t j = r.cols().begin(); j < r.cols().end(); j++) { - // due to 1d indexing - elem += mat[i * mat_shape[1] + j] * vec[j]; - } - res[i] = elem; - } - }); - return res; -} - template class FCLayerImpl : public LayerImpl { public: @@ -291,7 +262,7 @@ std::vector FCLayerImplTBB::run( } Shape cur_w_shape({this->outputShape_[0], this->inputShape_[0]}); std::vector output_values = - mat_vec_mul_tbb(this->weights_, cur_w_shape, input); + mat_vec_mul_upd_tbb(this->weights_, cur_w_shape, input); std::transform(output_values.begin(), output_values.end(), this->bias_.begin(), output_values.begin(), std::plus()); diff --git a/include/layers/PoolingLayer.hpp b/include/layers/PoolingLayer.hpp index 7ea3c3ef..3b187b07 100644 --- a/include/layers/PoolingLayer.hpp +++ b/include/layers/PoolingLayer.hpp @@ -224,17 +224,30 @@ std::vector PoolingLayerImplTBB::run( } } } + coords = std::vector({n, c, i, j}); switch (this->poolingType_) { case kAverage: - res[this->outputShape_.get_index(std::vector( - {n, c, i, j}))] = avg_pooling(pooling_buf); + if (this->inputShape_.dims() == 1) { + res[i] = avg_pooling(pooling_buf); + } else { + res[this->outputShape_.get_index( + std::vector( + coords.end() - this->inputShape_.dims(), + coords.end()))] = avg_pooling(pooling_buf); + } break; case kMax: - res[this->outputShape_.get_index(std::vector( - {n, c, i, j}))] = max_pooling(pooling_buf); - break; - default: - throw std::runtime_error("Unknown pooling type"); + if (this->inputShape_.dims() == 1) { + res[i] = max_pooling(pooling_buf); + } else { + res[this->outputShape_.get_index( + std::vector( + coords.end() - this->inputShape_.dims(), + coords.end()))] = max_pooling(pooling_buf); + break; + default: + throw std::runtime_error("Unknown pooling type"); + } } } } diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp index 629d7909..216e8a28 100644 --- a/test/benchmarking/test_layers_time.cpp +++ b/test/benchmarking/test_layers_time.cpp @@ -39,7 +39,7 @@ TEST(time_test, mat_vec_mul_comp) { } TEST(pooling_test, is_parallel_ok) { - size_t n = 200; + size_t n = 50; size_t c = 3; size_t h = 224; size_t w = 224; diff --git a/test/single_layer/test_fclayer.cpp b/test/single_layer/test_fclayer.cpp index 5d17bcf0..18de8064 100644 --- a/test/single_layer/test_fclayer.cpp +++ b/test/single_layer/test_fclayer.cpp @@ -154,12 +154,12 @@ TEST(fclayer, matvecmul_works) { EXPECT_EQ(res, true_res); } -// TEST(fclayer, matvecmul_throws_when_big_vector) { -// std::vector mat = {2, 4, 2, 4}; -// std::vector vec = {1, 2, 3}; -// Shape mat_shape({2, 2}); -// ASSERT_ANY_THROW(mat_vec_mul(mat, mat_shape, vec)); -// } +TEST(fclayer, matvecmul_throws_when_small_vector) { + std::vector mat = {2, 4, 2, 4}; + std::vector vec = {1}; + Shape mat_shape({2, 2}); + ASSERT_ANY_THROW(mat_vec_mul(mat, mat_shape, vec)); +} TEST(fclayer, matvecmul_throws_when_not_matrix) { std::vector mat = {2, 4, 2, 4, 1, 3, 5, 7}; @@ -168,6 +168,20 @@ TEST(fclayer, matvecmul_throws_when_not_matrix) { ASSERT_ANY_THROW(mat_vec_mul(mat, mat_shape, vec)); } +TEST(fclayer, matvecmul_tbb_throws_when_small_vector) { + std::vector mat = {2, 4, 2, 4}; + std::vector vec = {1}; + Shape mat_shape({2, 2}); + ASSERT_ANY_THROW(mat_vec_mul_upd_tbb(mat, mat_shape, vec)); +} + +TEST(fclayer, matvecmul_tbb_throws_when_not_matrix) { + std::vector mat = {2, 4, 2, 4, 1, 3, 5, 7}; + std::vector vec = {1, 2}; + Shape mat_shape({2, 2, 2}); + ASSERT_ANY_THROW(mat_vec_mul_upd_tbb(mat, mat_shape, vec)); +} + TEST(fclayer, new_fc_layer_can_run_float) { const std::vector a1 = {2.0F, 1.5F, 0.1F, 1.9F, 0.0F, 5.5F}; const std::vector a2 = {9.0F, 6.4F, 17.5F}; @@ -196,6 +210,34 @@ TEST(fclayer, new_fc_layer_can_run_int) { } } +TEST(fclayer, new_fc_layer_tbb_can_run_float) { + const std::vector a1 = {2.0F, 1.5F, 0.1F, 1.9F, 0.0F, 5.5F}; + const std::vector a2 = {9.0F, 6.4F, 17.5F}; + Tensor weights = make_tensor(a1, {3, 2}); + Tensor output; + Shape wshape({3, 2}); + Tensor bias = make_tensor({0.5F, 0.5F, 1.0F}); + FCLayer layer(weights, bias, itlab_2023::kTBB); + layer.run(make_tensor({2.0F, 3.0F}), output); + for (size_t i = 0; i < a2.size(); i++) { + EXPECT_NEAR((*output.as())[i], a2[i], 1e-5); + } +} + +TEST(fclayer, new_fc_layer_tbb_can_run_int) { + const std::vector a1 = {2, 1, 0, 2, 0, 5}; + const std::vector a2 = {7, 6, 16}; + Tensor weights = make_tensor(a1, {3, 2}); + Tensor output; + Shape wshape({3, 2}); + Tensor bias = make_tensor({0, 0, 1}); + FCLayer layer(weights, bias, itlab_2023::kTBB); + layer.run(make_tensor({2, 3}), output); + for (size_t i = 0; i < a2.size(); i++) { + EXPECT_NEAR((*output.as())[i], a2[i], 1e-5); + } +} + TEST(fclayer, new_fc_layer_throws_when_big_input) { const std::vector a1 = {2.0F, 1.5F, 0.1F, 1.9F, 0.0F, 5.5F}; Tensor weights = make_tensor(a1, {3, 2}); diff --git a/test/single_layer/test_poolinglayer.cpp b/test/single_layer/test_poolinglayer.cpp index 5bafa541..8e5310a9 100644 --- a/test/single_layer/test_poolinglayer.cpp +++ b/test/single_layer/test_poolinglayer.cpp @@ -35,6 +35,15 @@ TEST(poolinglayer, throws_when_big_input) { ASSERT_ANY_THROW(a.run(input)); } +TEST(poolinglayer, tbb_pl_throws_when_big_input) { + Shape inpshape = {7}; + Shape poolshape = {3}; + PoolingLayerImplTBB a = + PoolingLayerImplTBB(inpshape, poolshape, "average"); + std::vector input({9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0}); + ASSERT_ANY_THROW(a.run(input)); +} + TEST(poolinglayer, throws_when_invalid_pooling_type) { Shape inpshape = {7}; Shape poolshape = {3}; @@ -166,6 +175,19 @@ TEST(poolinglayer, new_pooling_layer_can_run_int_avg) { } } +TEST(poolinglayer, new_pooling_layer_can_run_int_avg_tbb) { + Shape inpshape = {4, 4}; + Shape poolshape = {2, 2}; + PoolingLayer a(poolshape, "average", itlab_2023::kTBB); + std::vector input({9, 8, 7, 6, 5, 4, 3, 2, 2, 3, 4, 5, 6, 7, 8, 9}); + Tensor output = make_tensor({0}); + a.run(make_tensor(input, inpshape), output); + std::vector true_output = {6, 4, 4, 6}; + for (size_t i = 0; i < true_output.size(); i++) { + EXPECT_NEAR((*output.as())[i], true_output[i], 1e-5); + } +} + TEST(poolinglayer, new_pooling_layer_can_run_1d_pooling_float) { Shape inpshape = {8}; Shape poolshape = {3}; From ac9260f0dc7be963104e9a3b2ae1418d1d8e4bf3 Mon Sep 17 00:00:00 2001 From: NeiroYT Date: Fri, 28 Jun 2024 04:30:05 +0300 Subject: [PATCH 09/10] Fixes 2 --- include/layers/FCLayer.hpp | 3 ++- include/layers/PoolingLayer.hpp | 3 ++- test/single_layer/test_poolinglayer.cpp | 13 +++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/layers/FCLayer.hpp b/include/layers/FCLayer.hpp index 66243bf6..5384f326 100644 --- a/include/layers/FCLayer.hpp +++ b/include/layers/FCLayer.hpp @@ -251,7 +251,8 @@ class FCLayerImplTBB : public FCLayerImpl { const std::vector& input_bias) : FCLayerImpl(input_weights, input_weights_shape, input_bias) { } - std::vector run(const std::vector& input) const; + std::vector run( + const std::vector& input) const override; }; template diff --git a/include/layers/PoolingLayer.hpp b/include/layers/PoolingLayer.hpp index 3b187b07..4b82f40a 100644 --- a/include/layers/PoolingLayer.hpp +++ b/include/layers/PoolingLayer.hpp @@ -169,7 +169,8 @@ class PoolingLayerImplTBB : public PoolingLayerImpl { PoolingLayerImplTBB(const Shape& input_shape, const Shape& pooling_shape, const std::string& pooling_type = "average") : PoolingLayerImpl(input_shape, pooling_shape, pooling_type) {} - std::vector run(const std::vector& input) const; + std::vector run( + const std::vector& input) const override; }; template diff --git a/test/single_layer/test_poolinglayer.cpp b/test/single_layer/test_poolinglayer.cpp index 8e5310a9..184a9617 100644 --- a/test/single_layer/test_poolinglayer.cpp +++ b/test/single_layer/test_poolinglayer.cpp @@ -200,3 +200,16 @@ TEST(poolinglayer, new_pooling_layer_can_run_1d_pooling_float) { EXPECT_NEAR((*output.as())[i], true_output[i], 1e-5); } } + +TEST(poolinglayer, new_pooling_layer_tbb_can_run_1d_pooling_float) { + Shape inpshape = {8}; + Shape poolshape = {3}; + PoolingLayer a(poolshape, "average", itlab_2023::kTBB); + std::vector input({9.0F, 8.0F, 7.0F, 6.0F, 5.0F, 4.0F, 3.0F, 2.0F}); + Tensor output = make_tensor({0}); + a.run(make_tensor(input, inpshape), output); + std::vector true_output = {8.0F, 5.0F}; + for (size_t i = 0; i < true_output.size(); i++) { + EXPECT_NEAR((*output.as())[i], true_output[i], 1e-5); + } +} From b49ab7a6632cd8a95ed460fc0d878678dff1eff4 Mon Sep 17 00:00:00 2001 From: NeiroYT Date: Fri, 26 Jul 2024 06:37:12 +0300 Subject: [PATCH 10/10] Changes --- include/layers/Layer.hpp | 2 +- test/benchmarking/test_layers_time.cpp | 12 +++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/include/layers/Layer.hpp b/include/layers/Layer.hpp index 22c34657..e7dacfa0 100644 --- a/include/layers/Layer.hpp +++ b/include/layers/Layer.hpp @@ -19,7 +19,7 @@ enum LayerType { kElementWise, kConvolution, kFullyConnected, - kOutput + kOutput, }; enum ImplType { kDefault, kTBB }; diff --git a/test/benchmarking/test_layers_time.cpp b/test/benchmarking/test_layers_time.cpp index 216e8a28..b2891e7a 100644 --- a/test/benchmarking/test_layers_time.cpp +++ b/test/benchmarking/test_layers_time.cpp @@ -24,18 +24,14 @@ TEST(time_test, mat_vec_mul_comp) { } double count1 = elapsed_time_avg(10, mat_vec_mul, mat, Shape({k, k}), vec); - std::cerr << "Normal:" << count1 << std::endl; double count2 = elapsed_time_avg( 10, mat_vec_mul_upd_tbb, mat, Shape({k, k}), vec); - std::cerr << "Tbb:" << count2 << std::endl; auto tmp1 = mat_vec_mul(mat, Shape{k, k}, vec); auto tmp2 = mat_vec_mul_upd_tbb(mat, Shape{k, k}, vec); for (size_t i = 0; i < k; i++) { - if (tmp1[i] != tmp2[i]) { - std::cerr << tmp1[i] << ' ' << tmp2[i] << ' ' << i << std::endl; - } + EXPECT_EQ(tmp1[i], tmp2[i]); } - // EXPECT_GE(count1, count2); + EXPECT_GE(count1, count2); } TEST(pooling_test, is_parallel_ok) { @@ -54,9 +50,7 @@ TEST(pooling_test, is_parallel_ok) { PoolingLayer p2(Shape({2, 2}), "max", kTBB); double count1 = elapsed_time(test_func, p1, input, output); - std::cerr << "Normal:" << count1 << std::endl; double count2 = elapsed_time(test_func, p2, input, output); - std::cerr << "Tbb:" << count2 << std::endl; - // EXPECT_GE(count1, count2); + EXPECT_GE(count1, count2); }