Skip to content

Commit

Permalink
consider cache lines, and tail non-avx-able ops (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipDeegan authored Aug 11, 2024
1 parent b83ca47 commit a645d68
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 41 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/build_nix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ jobs:
env:
MKN_GCC_PREFERRED: 1
run: |
wget https://github.com/mkn/mkn/releases/download/latest/mkn_nix
chmod +x mkn_nix
KLOG=3 ./mkn_nix clean build run -p test,bench -OtKda "-std=c++20 -fPIC" -l -pthread -g 0
curl -Lo mkn https://github.com/mkn/mkn/releases/download/latest/mkn_nix
chmod +x mkn
KLOG=3 ./mkn clean build run -p test,bench -OtKda "-std=c++20 -fPIC" -l -pthread -g 0
86 changes: 49 additions & 37 deletions inc/mkn/avx/lazy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "mkn/kul/alloc.hpp"
#include "mkn/avx/span.hpp"

#include <new>
#include <tuple>
#include <vector>

Expand Down Expand Up @@ -174,59 +175,70 @@ struct LazyEvaluator
if (fill)
std::copy(t.operands[0].a->data(), t.operands[0].a->data() + size, ret);

for (std::size_t i = 0; i < size / N; ++i)
{
std::size_t tmp = 0;
auto do_avx = [&](auto const& o, auto const& i) {
std::size_t const off = i * N;
assert(off < size);

std::size_t tmp = 0;
for (std::size_t o = 0; o < t.operands.size(); ++o)
{
auto& op = t.operands[o];
bool const use_tmp = op.a != t.v;
Span_ct const a{op.a->data() + off, N};
Span_ct const b{op.b->data() + off, N};
Span_t r{ret + off, N};
auto& op = t.operands[o];
bool const use_tmp = op.a != t.v;
Span_ct const a{op.a->data() + off, N};
Span_ct const b{op.b->data() + off, N};
Span_t r{ret + off, N};

if (op.prev)
if (op.prev)
{
if (use_tmp)
{
if (use_tmp)
{
Span_ct const pspan{tmps[op.prev->t].data(), N};
Span_t tspan{tmps[tmp].data(), N};
fns[op.op](tspan, a, pspan);
Span_ct const pspan{tmps[op.prev->t].data(), N};
Span_t tspan{tmps[tmp].data(), N};
fns[op.op](tspan, a, pspan);

op.t = tmp++;
}
else
{
if (op.prev->a == t.v)
{
fns[op.op](r, r, b);
}
else
{
Span_t tspan{tmps[op.prev->t].data(), N};
fns[op.op](r, r, tspan);
}
}
op.t = tmp++;
}
else
{
if (use_tmp)
if (op.prev->a == t.v)
{
Span_t tspan{tmps[tmp].data(), N};
fns[op.op](tspan, a, b);

op.t = tmp++;
fns[op.op](r, r, b);
}
else
{
fns[op.op](r, r, b);
Span_t tspan{tmps[op.prev->t].data(), N};
fns[op.op](r, r, tspan);
}
}
}
}
else
{
if (use_tmp)
{
Span_t tspan{tmps[tmp].data(), N};
fns[op.op](tspan, a, b);

op.t = tmp++;
}
else
{
fns[op.op](r, r, b);
}
}
};

auto const cl_size = std::hardware_destructive_interference_size;

std::size_t const batch = cl_size / sizeof(T) / N;

std::size_t i = 0;
for (; i < size / N; i += batch, tmp = 0)
for (std::size_t o = 0; o < t.operands.size(); ++o)
for (std::size_t j = 0; j < batch; ++j)
do_avx(o, i + j);

if (size % N != 0)
for (; i < size / N + 1; i += batch, tmp = 0)
for (std::size_t o = 0; o < t.operands.size(); ++o)
do_avx(o, i);
}


Expand Down
2 changes: 1 addition & 1 deletion test/test_lazy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

using namespace mkn::avx;

constexpr static std::size_t N = 1e6;
constexpr static std::size_t N = 1e6 + 5;

void add()
{
Expand Down

0 comments on commit a645d68

Please sign in to comment.