Skip to content

Commit

Permalink
Add generic chunked fill and copy algorithms (#1564)
Browse files Browse the repository at this point in the history
These are replacements for `std::ranges::fill` and `std::ranges::copy` that perform their respective loops in chunks of a dedicated size. After each chunk, a user-defined callback is invoked, which can be used e.g. for cancellation checks.
The helpers are used within the `UNION` and `BIND` operations to add more cancellation checks there.
  • Loading branch information
RobinTF authored Oct 18, 2024
1 parent 264919e commit bf36257
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 37 deletions.
10 changes: 7 additions & 3 deletions src/engine/Bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "engine/QueryExecutionTree.h"
#include "engine/sparqlExpressions/SparqlExpression.h"
#include "engine/sparqlExpressions/SparqlExpressionGenerators.h"
#include "util/ChunkedForLoop.h"
#include "util/Exception.h"

// BIND adds exactly one new column
Expand Down Expand Up @@ -170,9 +171,11 @@ IdTable Bind::computeExpressionBind(
getInternallyVisibleVariableColumns().at(singleResult).columnIndex_;
auto inputColumn = idTable.getColumn(columnIndex);
AD_CORRECTNESS_CHECK(inputColumn.size() == outputColumn.size());
std::ranges::copy(inputColumn, outputColumn.begin());
ad_utility::chunkedCopy(inputColumn, outputColumn.begin(), CHUNK_SIZE,
[this]() { checkCancellation(); });
} else if constexpr (isStrongId) {
std::ranges::fill(outputColumn, singleResult);
ad_utility::chunkedFill(outputColumn, singleResult, CHUNK_SIZE,
[this]() { checkCancellation(); });
} else {
constexpr bool isConstant = sparqlExpression::isConstantResult<T>;

Expand All @@ -187,7 +190,8 @@ IdTable Bind::computeExpressionBind(
sparqlExpression::detail::constantExpressionResultToId(
std::move(*it), *outputLocalVocab);
checkCancellation();
std::ranges::fill(outputColumn, constantId);
ad_utility::chunkedFill(outputColumn, constantId, CHUNK_SIZE,
[this]() { checkCancellation(); });
}
} else {
size_t i = 0;
Expand Down
37 changes: 11 additions & 26 deletions src/engine/Union.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "Union.h"

#include "engine/CallFixedSize.h"
#include "util/ChunkedForLoop.h"
#include "util/TransparentFunctors.h"

const size_t Union::NO_COLUMN = std::numeric_limits<size_t>::max();
Expand Down Expand Up @@ -184,26 +185,6 @@ ProtoResult Union::computeResult(bool requestLaziness) {
Result::getMergedLocalVocab(*subRes1, *subRes2)};
}

// _____________________________________________________________________________
void Union::copyChunked(auto beg, auto end, auto target) const {
size_t total = end - beg;
for (size_t i = 0; i < total; i += chunkSize) {
checkCancellation();
size_t actualEnd = std::min(i + chunkSize, total);
std::copy(beg + i, beg + actualEnd, target + i);
}
}

// _____________________________________________________________________________
void Union::fillChunked(auto beg, auto end, const auto& value) const {
size_t total = end - beg;
for (size_t i = 0; i < total; i += chunkSize) {
checkCancellation();
size_t actualEnd = std::min(i + chunkSize, total);
std::fill(beg + i, beg + actualEnd, value);
}
};

// _____________________________________________________________________________
IdTable Union::computeUnion(
const IdTable& left, const IdTable& right,
Expand All @@ -220,11 +201,14 @@ IdTable Union::computeUnion(
size_t inputColumnIndex, size_t offset) {
if (inputColumnIndex != NO_COLUMN) {
decltype(auto) input = inputTable.getColumn(inputColumnIndex);
copyChunked(input.begin(), input.end(), targetColumn.begin() + offset);
ad_utility::chunkedCopy(input, targetColumn.begin() + offset, chunkSize,
[this]() { checkCancellation(); });
} else {
fillChunked(targetColumn.begin() + offset,
targetColumn.begin() + offset + inputTable.size(),
Id::makeUndefined());
ad_utility::chunkedFill(
std::ranges::subrange{
targetColumn.begin() + offset,
targetColumn.begin() + offset + inputTable.size()},
Id::makeUndefined(), chunkSize, [this]() { checkCancellation(); });
}
};

Expand Down Expand Up @@ -263,8 +247,9 @@ IdTable Union::transformToCorrectColumnFormat(
IdTable idTable, const std::vector<ColumnIndex>& permutation) const {
while (idTable.numColumns() < getResultWidth()) {
idTable.addEmptyColumn();
auto column = idTable.getColumn(idTable.numColumns() - 1);
fillChunked(column.begin(), column.end(), Id::makeUndefined());
ad_utility::chunkedFill(idTable.getColumn(idTable.numColumns() - 1),
Id::makeUndefined(), chunkSize,
[this]() { checkCancellation(); });
}

idTable.setColumnSubset(permutation);
Expand Down
7 changes: 0 additions & 7 deletions src/engine/Union.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,6 @@ class Union : public Operation {
}

private:
// A drop-in replacement for `std::copy` that performs the copying in chunks
// of `chunkSize` and checks the timeout after each chunk.
void copyChunked(auto beg, auto end, auto target) const;

// A similar timeout-checking replacement for `std::fill`.
void fillChunked(auto beg, auto end, const auto& value) const;

ProtoResult computeResult(bool requestLaziness) override;

VariableToColumnMap computeVariableToColumnMap() const override;
Expand Down
50 changes: 50 additions & 0 deletions src/util/ChunkedForLoop.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,56 @@ inline void chunkedForLoop(std::size_t start, std::size_t end,
std::invoke(chunkOperation);
}
}

// Helper concept that combines the sized range and input range concepts.
template <typename R>
concept SizedInputRange =
std::ranges::sized_range<R> && std::ranges::input_range<R>;

// Similar to `std::ranges::copy`, but invokes `chunkOperation` every
// `chunkSize` elements. (Round up to the next chunk size if the range size is
// not a multiple of `chunkSize`.)
template <SizedInputRange R, std::weakly_incrementable O>
inline void chunkedCopy(R&& inputRange, O result,
std::ranges::range_difference_t<R> chunkSize,
const std::invocable auto& chunkOperation)
requires std::indirectly_copyable<std::ranges::iterator_t<R>, O> {
auto begin = std::ranges::begin(inputRange);
auto end = std::ranges::end(inputRange);
auto target = result;
while (std::ranges::distance(begin, end) >= chunkSize) {
auto start = begin;
std::ranges::advance(begin, chunkSize);
target = std::ranges::copy(start, begin, target).out;
chunkOperation();
}
std::ranges::copy(begin, end, target);
chunkOperation();
}

// Helper concept that combines the sized range and output range concepts.
template <typename R, typename T>
concept SizedOutputRange =
std::ranges::sized_range<R> && std::ranges::output_range<R, T>;

// Similar to `std::ranges::fill`, but invokes `chunkOperation` every
// `chunkSize` elements. (Round up to the next chunk size if the range size is
// not a multiple of `chunkSize`.)
template <typename T, SizedOutputRange<T> R>
inline void chunkedFill(R&& outputRange, const T& value,
std::ranges::range_difference_t<R> chunkSize,
const std::invocable auto& chunkOperation) {
auto begin = std::ranges::begin(outputRange);
auto end = std::ranges::end(outputRange);
while (std::ranges::distance(begin, end) >= chunkSize) {
auto start = begin;
std::ranges::advance(begin, chunkSize);
std::ranges::fill(start, begin, value);
chunkOperation();
}
std::ranges::fill(begin, end, value);
chunkOperation();
}
} // namespace ad_utility

#endif // QLEVER_CHUNKEDFORLOOP_H
43 changes: 42 additions & 1 deletion test/ChunkedForLoopTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
// Chair of Algorithms and Data Structures.
// Author: Robin Textor-Falconi <[email protected]>

#include <gtest/gtest.h>
#include <gmock/gmock.h>

#include <atomic>

#include "util/ChunkedForLoop.h"

using ad_utility::chunkedCopy;
using ad_utility::chunkedFill;
using ad_utility::chunkedForLoop;

TEST(ChunkedForLoop, testEmptyRange) {
Expand Down Expand Up @@ -103,3 +105,42 @@ TEST(ChunkedForLoop, verifyBreakWorksAsExpected) {
EXPECT_EQ(counter, 4);
EXPECT_EQ(chunkCounter, 1);
}

// _____________________________________________________________________________________________________________________
TEST(ChunkedForLoop, chunkedFillHandlesEmptyRange) {
size_t chunkCounter = 0;
chunkedFill(std::array<int, 0>{}, 0, 10, [&]() { chunkCounter++; });

EXPECT_EQ(chunkCounter, 1);
}

// _____________________________________________________________________________________________________________________
TEST(ChunkedForLoop, chunkedFillFillsCorrectly) {
size_t chunkCounter = 0;
std::array<int, 21> elements{};
chunkedFill(elements, 42, 10, [&]() { chunkCounter++; });

EXPECT_EQ(chunkCounter, 3);
EXPECT_THAT(elements, ::testing::Each(::testing::Eq(42)));
}

// _____________________________________________________________________________________________________________________
TEST(ChunkedForLoop, chunkedCopyHandlesEmptyRange) {
size_t chunkCounter = 0;
std::array<int, 0> output{};
chunkedCopy(std::array<int, 0>{}, output.begin(), 2,
[&]() { chunkCounter++; });

EXPECT_EQ(chunkCounter, 1);
}

// _____________________________________________________________________________________________________________________
TEST(ChunkedForLoop, chunkedCopyCopiesCorrectly) {
size_t chunkCounter = 0;
std::array<int, 5> input{5, 4, 3, 2, 1};
std::array<int, 5> output{};
chunkedCopy(input, output.begin(), 2, [&]() { chunkCounter++; });

EXPECT_EQ(chunkCounter, 3);
EXPECT_EQ(input, output);
}

0 comments on commit bf36257

Please sign in to comment.