From a645d6893e40d2f5a8b037ca6ade7274e0b853f9 Mon Sep 17 00:00:00 2001 From: PhilipDeegan Date: Sun, 11 Aug 2024 18:27:53 +0200 Subject: [PATCH] consider cache lines, and tail non-avx-able ops (#9) --- .github/workflows/build_nix.yml | 6 +-- inc/mkn/avx/lazy.hpp | 86 +++++++++++++++++++-------------- test/test_lazy.cpp | 2 +- 3 files changed, 53 insertions(+), 41 deletions(-) diff --git a/.github/workflows/build_nix.yml b/.github/workflows/build_nix.yml index d5f282c..dc07af2 100644 --- a/.github/workflows/build_nix.yml +++ b/.github/workflows/build_nix.yml @@ -16,6 +16,6 @@ jobs: env: MKN_GCC_PREFERRED: 1 run: | - wget https://github.com/mkn/mkn/releases/download/latest/mkn_nix - chmod +x mkn_nix - KLOG=3 ./mkn_nix clean build run -p test,bench -OtKda "-std=c++20 -fPIC" -l -pthread -g 0 + curl -Lo mkn https://github.com/mkn/mkn/releases/download/latest/mkn_nix + chmod +x mkn + KLOG=3 ./mkn clean build run -p test,bench -OtKda "-std=c++20 -fPIC" -l -pthread -g 0 diff --git a/inc/mkn/avx/lazy.hpp b/inc/mkn/avx/lazy.hpp index 53d9cb5..2df54eb 100644 --- a/inc/mkn/avx/lazy.hpp +++ b/inc/mkn/avx/lazy.hpp @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "mkn/kul/alloc.hpp" #include "mkn/avx/span.hpp" +#include #include #include @@ -174,59 +175,70 @@ struct LazyEvaluator if (fill) std::copy(t.operands[0].a->data(), t.operands[0].a->data() + size, ret); - for (std::size_t i = 0; i < size / N; ++i) - { + std::size_t tmp = 0; + auto do_avx = [&](auto const& o, auto const& i) { std::size_t const off = i * N; assert(off < size); - std::size_t tmp = 0; - for (std::size_t o = 0; o < t.operands.size(); ++o) - { - auto& op = t.operands[o]; - bool const use_tmp = op.a != t.v; - Span_ct const a{op.a->data() + off, N}; - Span_ct const b{op.b->data() + off, N}; - Span_t r{ret + off, N}; + auto& op = t.operands[o]; + bool const use_tmp = op.a != t.v; + Span_ct const a{op.a->data() + off, N}; + Span_ct const b{op.b->data() + off, N}; + Span_t r{ret + off, N}; - if (op.prev) + if (op.prev) + { + if (use_tmp) { - if (use_tmp) - { - Span_ct const pspan{tmps[op.prev->t].data(), N}; - Span_t tspan{tmps[tmp].data(), N}; - fns[op.op](tspan, a, pspan); + Span_ct const pspan{tmps[op.prev->t].data(), N}; + Span_t tspan{tmps[tmp].data(), N}; + fns[op.op](tspan, a, pspan); - op.t = tmp++; - } - else - { - if (op.prev->a == t.v) - { - fns[op.op](r, r, b); - } - else - { - Span_t tspan{tmps[op.prev->t].data(), N}; - fns[op.op](r, r, tspan); - } - } + op.t = tmp++; } else { - if (use_tmp) + if (op.prev->a == t.v) { - Span_t tspan{tmps[tmp].data(), N}; - fns[op.op](tspan, a, b); - - op.t = tmp++; + fns[op.op](r, r, b); } else { - fns[op.op](r, r, b); + Span_t tspan{tmps[op.prev->t].data(), N}; + fns[op.op](r, r, tspan); } } } - } + else + { + if (use_tmp) + { + Span_t tspan{tmps[tmp].data(), N}; + fns[op.op](tspan, a, b); + + op.t = tmp++; + } + else + { + fns[op.op](r, r, b); + } + } + }; + + auto const cl_size = std::hardware_destructive_interference_size; + + std::size_t const batch = cl_size / sizeof(T) / N; + + std::size_t i = 0; + for (; i < size / N; i += batch, tmp = 0) + for (std::size_t o = 0; o < t.operands.size(); ++o) + for (std::size_t j = 0; j < batch; ++j) + do_avx(o, i + j); + + if (size % N != 0) + for (; i < size / N + 1; i += batch, tmp = 0) + for (std::size_t o = 0; o < t.operands.size(); ++o) + do_avx(o, i); } diff --git a/test/test_lazy.cpp b/test/test_lazy.cpp index 8d1e328..1423426 100644 --- a/test/test_lazy.cpp +++ b/test/test_lazy.cpp @@ -4,7 +4,7 @@ using namespace mkn::avx; -constexpr static std::size_t N = 1e6; +constexpr static std::size_t N = 1e6 + 5; void add() {