From ec9cc5a2b8322b7d23e66f98a1d0340d1a87cc28 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 11 Oct 2024 21:17:26 +0200 Subject: [PATCH 01/10] Make helper lambdas to actual class members --- src/engine/Union.cpp | 51 +++++++++++++++++++++----------------------- src/engine/Union.h | 9 ++++++++ 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index ecccefa539..6c9663c3fc 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -167,7 +167,7 @@ ProtoResult Union::computeResult([[maybe_unused]] bool requestLaziness) { IdTable idTable{getExecutionContext()->getAllocator()}; idTable.setNumColumns(getResultWidth()); - Union::computeUnion(&idTable, subRes1->idTable(), subRes2->idTable(), + computeUnion(&idTable, subRes1->idTable(), subRes2->idTable(), _columnOrigins); LOG(DEBUG) << "Union result computation done" << std::endl; @@ -177,43 +177,40 @@ ProtoResult Union::computeResult([[maybe_unused]] bool requestLaziness) { Result::getMergedLocalVocab(*subRes1, *subRes2)}; } +// _____________________________________________________________________________ +void Union::copyChunked(auto beg, auto end, auto target) const { + size_t total = end - beg; + for (size_t i = 0; i < total; i += chunkSize) { + checkCancellation(); + size_t actualEnd = std::min(i + chunkSize, total); + std::copy(beg + i, beg + actualEnd, target + i); + } +}; + +// _____________________________________________________________________________ +void Union::fillChunked(auto beg, auto end, const auto& value) const { + size_t total = end - beg; + for (size_t i = 0; i < total; i += chunkSize) { + checkCancellation(); + size_t actualEnd = std::min(i + chunkSize, total); + std::fill(beg + i, beg + actualEnd, value); + } +}; + +// _____________________________________________________________________________ void Union::computeUnion( IdTable* resPtr, const IdTable& left, const IdTable& right, const std::vector>& columnOrigins) { IdTable& res = *resPtr; res.resize(left.size() + right.size()); - static constexpr size_t chunkSize = 1'000'000; - - // A drop-in replacement for `std::copy` that performs the copying in chunks - // of `chunkSize` and checks the timeout after each chunk. - auto copyChunked = [this](auto beg, auto end, auto target) { - size_t total = end - beg; - for (size_t i = 0; i < total; i += chunkSize) { - checkCancellation(); - size_t actualEnd = std::min(i + chunkSize, total); - std::copy(beg + i, beg + actualEnd, target + i); - } - }; - - // A similar timeout-checking replacement for `std::fill`. - auto fillChunked = [this](auto beg, auto end, const auto& value) { - size_t total = end - beg; - for (size_t i = 0; i < total; i += chunkSize) { - checkCancellation(); - size_t actualEnd = std::min(i + chunkSize, total); - std::fill(beg + i, beg + actualEnd, value); - } - }; - // Write the column with the `inputColumnIndex` from the `inputTable` into the // `targetColumn`. Always copy the complete input column and start at position // `offset` in the target column. If the `inputColumnIndex` is `NO_COLUMN`, // then the corresponding range in the `targetColumn` will be filled with // UNDEF. - auto writeColumn = [©Chunked, &fillChunked]( - const auto& inputTable, auto& targetColumn, - size_t inputColumnIndex, size_t offset) { + auto writeColumn = [this](const auto& inputTable, auto& targetColumn, + size_t inputColumnIndex, size_t offset) { if (inputColumnIndex != NO_COLUMN) { decltype(auto) input = inputTable.getColumn(inputColumnIndex); copyChunked(input.begin(), input.end(), targetColumn.begin() + offset); diff --git a/src/engine/Union.h b/src/engine/Union.h index 00565e372e..e4fb317b19 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -51,6 +51,8 @@ class Union : public Operation { const static size_t NO_COLUMN; + static constexpr size_t chunkSize = 1'000'000; + // The method is declared here to make it unit testable void computeUnion(IdTable* inputTable, const IdTable& left, const IdTable& right, @@ -61,6 +63,13 @@ class Union : public Operation { } private: + // A drop-in replacement for `std::copy` that performs the copying in chunks + // of `chunkSize` and checks the timeout after each chunk. + void copyChunked(auto beg, auto end, auto target) const; + + // A similar timeout-checking replacement for `std::fill`. + void fillChunked(auto beg, auto end, const auto& value) const; + virtual ProtoResult computeResult( [[maybe_unused]] bool requestLaziness) override; From aa7b72309c7f7dab78a807b6826ca56e953cae4b Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 11 Oct 2024 21:22:44 +0200 Subject: [PATCH 02/10] Return `IdTable` instead of using output variable --- src/engine/Union.cpp | 16 +++++++--------- src/engine/Union.h | 6 +++--- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index 6c9663c3fc..ab2e534b17 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -164,11 +164,8 @@ ProtoResult Union::computeResult([[maybe_unused]] bool requestLaziness) { std::shared_ptr subRes2 = _subtrees[1]->getResult(); LOG(DEBUG) << "Union subresult computation done." << std::endl; - IdTable idTable{getExecutionContext()->getAllocator()}; - - idTable.setNumColumns(getResultWidth()); - computeUnion(&idTable, subRes1->idTable(), subRes2->idTable(), - _columnOrigins); + IdTable idTable = + computeUnion(subRes1->idTable(), subRes2->idTable(), _columnOrigins); LOG(DEBUG) << "Union result computation done" << std::endl; // If only one of the two operands has a non-empty local vocabulary, share @@ -198,10 +195,10 @@ void Union::fillChunked(auto beg, auto end, const auto& value) const { }; // _____________________________________________________________________________ -void Union::computeUnion( - IdTable* resPtr, const IdTable& left, const IdTable& right, - const std::vector>& columnOrigins) { - IdTable& res = *resPtr; +IdTable Union::computeUnion( + const IdTable& left, const IdTable& right, + const std::vector>& columnOrigins) const { + IdTable res{getResultWidth(), getExecutionContext()->getAllocator()}; res.resize(left.size() + right.size()); // Write the column with the `inputColumnIndex` from the `inputTable` into the @@ -230,4 +227,5 @@ void Union::computeUnion( writeColumn(left, targetColumn, leftCol, 0u); writeColumn(right, targetColumn, rightCol, left.size()); } + return res; } diff --git a/src/engine/Union.h b/src/engine/Union.h index e4fb317b19..cbdac15a3b 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -54,9 +54,9 @@ class Union : public Operation { static constexpr size_t chunkSize = 1'000'000; // The method is declared here to make it unit testable - void computeUnion(IdTable* inputTable, const IdTable& left, - const IdTable& right, - const std::vector>& columnOrigins); + IdTable computeUnion( + const IdTable& left, const IdTable& right, + const std::vector>& columnOrigins) const; vector getChildren() override { return {_subtrees[0].get(), _subtrees[1].get()}; From 52c76bc95fb865a76ce756746ec64becd2385bc1 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 15 Oct 2024 18:42:48 +0200 Subject: [PATCH 03/10] Implement lazy union --- src/engine/Union.cpp | 90 +++++++++++++++++++++++++++++++++++++++++--- src/engine/Union.h | 17 ++++++++- test/UnionTest.cpp | 41 ++++++++++++++++++++ 3 files changed, 140 insertions(+), 8 deletions(-) diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index ab2e534b17..06bd763dab 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -158,10 +158,20 @@ size_t Union::getCostEstimate() { getSizeEstimateBeforeLimit(); } -ProtoResult Union::computeResult([[maybe_unused]] bool requestLaziness) { +ProtoResult Union::computeResult(bool requestLaziness) { LOG(DEBUG) << "Union result computation..." << std::endl; - std::shared_ptr subRes1 = _subtrees[0]->getResult(); - std::shared_ptr subRes2 = _subtrees[1]->getResult(); + std::shared_ptr subRes1 = + _subtrees[0]->getResult(requestLaziness); + std::shared_ptr subRes2 = + _subtrees[1]->getResult(requestLaziness); + + if (requestLaziness) { + auto localVocab = std::make_shared(); + auto generator = + computeResultLazily(std::move(subRes1), std::move(subRes2), localVocab); + return {std::move(generator), resultSortedOn(), std::move(localVocab)}; + } + LOG(DEBUG) << "Union subresult computation done." << std::endl; IdTable idTable = @@ -179,10 +189,10 @@ void Union::copyChunked(auto beg, auto end, auto target) const { size_t total = end - beg; for (size_t i = 0; i < total; i += chunkSize) { checkCancellation(); - size_t actualEnd = std::min(i + chunkSize, total); - std::copy(beg + i, beg + actualEnd, target + i); + size_t actualEnd = std::min(i + chunkSize, total); + std::copy(beg + i, beg + actualEnd, target + i); } -}; +} // _____________________________________________________________________________ void Union::fillChunked(auto beg, auto end, const auto& value) const { @@ -229,3 +239,71 @@ IdTable Union::computeUnion( } return res; } + +// _____________________________________________________________________________ +template +std::vector Union::computePermutation() const { + size_t startOfUndefColumns = _subtrees[left ? 0 : 1]->getResultWidth(); + std::vector permutation(_columnOrigins.size()); + for (size_t targetColIdx = 0; targetColIdx < _columnOrigins.size(); + ++targetColIdx) { + size_t originIndex = _columnOrigins.at(targetColIdx)[left ? 0 : 1]; + if (originIndex == NO_COLUMN) { + originIndex = startOfUndefColumns; + startOfUndefColumns++; + } + permutation[originIndex] = targetColIdx; + } + return permutation; +} + +// _____________________________________________________________________________ +IdTable Union::transformToCorrectColumnFormat( + IdTable idTable, const std::vector& permutation) const { + while (idTable.numColumns() < getResultWidth()) { + idTable.addEmptyColumn(); + std::ranges::fill(idTable.getColumn(idTable.numColumns() - 1), + Id::makeUndefined()); + } + + // i + 1 because once everything expect the last column is in the correct + // position, the last column is already in the correct position so the last + // iteration would always swap the last column with itself. + for (size_t i = 0; i + 1 < permutation.size(); ++i) { + size_t ind = permutation[i]; + while (ind < i) { + ind = permutation[ind]; + } + idTable.swapColumns(i, ind); + } + return idTable; +} + +// _____________________________________________________________________________ +cppcoro::generator Union::computeResultLazily( + std::shared_ptr result1, + std::shared_ptr result2, + std::shared_ptr localVocab) const { + if (result1->isFullyMaterialized()) { + co_yield computeUnion(result1->idTable(), + IdTable{getResultWidth(), allocator()}, + _columnOrigins); + } else { + std::vector permutation = computePermutation(); + for (IdTable& idTable : result1->idTables()) { + co_yield transformToCorrectColumnFormat(std::move(idTable), permutation); + } + } + if (result2->isFullyMaterialized()) { + co_yield computeUnion(IdTable{getResultWidth(), allocator()}, + result2->idTable(), _columnOrigins); + } else { + std::vector permutation = computePermutation(); + for (IdTable& idTable : result2->idTables()) { + co_yield transformToCorrectColumnFormat(std::move(idTable), permutation); + } + } + std::array vocabs{&result1->localVocab(), + &result2->localVocab()}; + *localVocab = LocalVocab::merge(vocabs); +} diff --git a/src/engine/Union.h b/src/engine/Union.h index cbdac15a3b..87a7faac68 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -70,8 +70,21 @@ class Union : public Operation { // A similar timeout-checking replacement for `std::fill`. void fillChunked(auto beg, auto end, const auto& value) const; - virtual ProtoResult computeResult( - [[maybe_unused]] bool requestLaziness) override; + virtual ProtoResult computeResult(bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; + + // Compute the permutation of the `IdTable` being yielded for the left or + // right child depending on `left`. This permutation can then be used to swap + // the columns without any copy operations. + template + std::vector computePermutation() const; + + IdTable transformToCorrectColumnFormat( + IdTable idTable, const std::vector& permutation) const; + + cppcoro::generator computeResultLazily( + std::shared_ptr result1, + std::shared_ptr result2, + std::shared_ptr localVocab) const; }; diff --git a/test/UnionTest.cpp b/test/UnionTest.cpp index 86cad376d6..08b4a9ed9c 100644 --- a/test/UnionTest.cpp +++ b/test/UnionTest.cpp @@ -71,3 +71,44 @@ TEST(UnionTest, computeUnionLarge) { ASSERT_EQ(result, makeIdTableFromVector(expected)); } + +// _____________________________________________________________________________ +TEST(UnionTest, computeUnionLazy) { + auto runTest = [](bool nonLazyChilds, + ad_utility::source_location loc = + ad_utility::source_location::current()) { + auto l = generateLocationTrace(loc); + auto* qec = ad_utility::testing::getQec(); + IdTable left = makeIdTableFromVector({{V(1)}, {V(2)}, {V(3)}}); + auto leftT = ad_utility::makeExecutionTree( + qec, std::move(left), Vars{Variable{"?x"}}, false, + std::vector{}, LocalVocab{}, std::nullopt, nonLazyChilds); + + IdTable right = makeIdTableFromVector({{V(4), V(5)}, {V(6), V(7)}}); + auto rightT = ad_utility::makeExecutionTree( + qec, std::move(right), Vars{Variable{"?u"}, Variable{"?x"}}, false, + std::vector{}, LocalVocab{}, std::nullopt, nonLazyChilds); + + Union u{ad_utility::testing::getQec(), std::move(leftT), std::move(rightT)}; + auto resultTable = u.computeResultOnlyForTesting(true); + ASSERT_FALSE(resultTable.isFullyMaterialized()); + auto& result = resultTable.idTables(); + + auto U = Id::makeUndefined(); + auto expected1 = makeIdTableFromVector({{V(1), U}, {V(2), U}, {V(3), U}}); + auto expected2 = makeIdTableFromVector({{V(5), V(4)}, {V(7), V(6)}}); + + auto iterator = result.begin(); + ASSERT_NE(iterator, result.end()); + ASSERT_EQ(*iterator, expected1); + + ++iterator; + ASSERT_NE(iterator, result.end()); + ASSERT_EQ(*iterator, expected2); + + ASSERT_EQ(++iterator, result.end()); + }; + + runTest(false); + runTest(true); +} From cade5b9f9f1965177c8fabe5dec132a8ae0f25a7 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 15 Oct 2024 21:30:20 +0200 Subject: [PATCH 04/10] Clear cache for consistent coverage and drop redundant virtual modifier --- src/engine/Union.h | 2 +- test/UnionTest.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/engine/Union.h b/src/engine/Union.h index 87a7faac68..be9ddcf34a 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -70,7 +70,7 @@ class Union : public Operation { // A similar timeout-checking replacement for `std::fill`. void fillChunked(auto beg, auto end, const auto& value) const; - virtual ProtoResult computeResult(bool requestLaziness) override; + ProtoResult computeResult(bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/test/UnionTest.cpp b/test/UnionTest.cpp index 08b4a9ed9c..de7ebb65ae 100644 --- a/test/UnionTest.cpp +++ b/test/UnionTest.cpp @@ -79,6 +79,7 @@ TEST(UnionTest, computeUnionLazy) { ad_utility::source_location::current()) { auto l = generateLocationTrace(loc); auto* qec = ad_utility::testing::getQec(); + qec->getQueryTreeCache().clearAll(); IdTable left = makeIdTableFromVector({{V(1)}, {V(2)}, {V(3)}}); auto leftT = ad_utility::makeExecutionTree( qec, std::move(left), Vars{Variable{"?x"}}, false, From 24176538a3bba1287c852ad144bbdeaa7aed1a68 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 16 Oct 2024 01:16:15 +0200 Subject: [PATCH 05/10] Add test for permutation and fix permutation generation --- src/engine/Union.cpp | 2 +- test/UnionTest.cpp | 59 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index 06bd763dab..ef6aaf600f 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -252,7 +252,7 @@ std::vector Union::computePermutation() const { originIndex = startOfUndefColumns; startOfUndefColumns++; } - permutation[originIndex] = targetColIdx; + permutation[targetColIdx] = originIndex; } return permutation; } diff --git a/test/UnionTest.cpp b/test/UnionTest.cpp index de7ebb65ae..2bbe554fe6 100644 --- a/test/UnionTest.cpp +++ b/test/UnionTest.cpp @@ -20,7 +20,7 @@ using Vars = std::vector>; } // namespace // A simple test for computing a union. -TEST(UnionTest, computeUnion) { +TEST(Union, computeUnion) { auto* qec = ad_utility::testing::getQec(); IdTable left = makeIdTableFromVector({{V(1)}, {V(2)}, {V(3)}}); auto leftT = ad_utility::makeExecutionTree( @@ -30,7 +30,7 @@ TEST(UnionTest, computeUnion) { auto rightT = ad_utility::makeExecutionTree( qec, right.clone(), Vars{Variable{"?u"}, Variable{"?x"}}); - Union u{ad_utility::testing::getQec(), leftT, rightT}; + Union u{qec, leftT, rightT}; auto resultTable = u.computeResultOnlyForTesting(); const auto& result = resultTable.idTable(); @@ -42,7 +42,7 @@ TEST(UnionTest, computeUnion) { // A test with large inputs to test the chunked writing that is caused by the // timeout checks. -TEST(UnionTest, computeUnionLarge) { +TEST(Union, computeUnionLarge) { auto* qec = ad_utility::testing::getQec(); VectorTable leftInput, rightInput, expected; size_t numInputsL = 1'500'000u; @@ -65,7 +65,7 @@ TEST(UnionTest, computeUnionLarge) { auto rightT = ad_utility::makeExecutionTree( qec, makeIdTableFromVector(rightInput), Vars{Variable{"?u"}}); - Union u{ad_utility::testing::getQec(), leftT, rightT}; + Union u{qec, leftT, rightT}; auto resultTable = u.computeResultOnlyForTesting(); const auto& result = resultTable.idTable(); @@ -73,7 +73,7 @@ TEST(UnionTest, computeUnionLarge) { } // _____________________________________________________________________________ -TEST(UnionTest, computeUnionLazy) { +TEST(Union, computeUnionLazy) { auto runTest = [](bool nonLazyChilds, ad_utility::source_location loc = ad_utility::source_location::current()) { @@ -90,7 +90,7 @@ TEST(UnionTest, computeUnionLazy) { qec, std::move(right), Vars{Variable{"?u"}, Variable{"?x"}}, false, std::vector{}, LocalVocab{}, std::nullopt, nonLazyChilds); - Union u{ad_utility::testing::getQec(), std::move(leftT), std::move(rightT)}; + Union u{qec, std::move(leftT), std::move(rightT)}; auto resultTable = u.computeResultOnlyForTesting(true); ASSERT_FALSE(resultTable.isFullyMaterialized()); auto& result = resultTable.idTables(); @@ -113,3 +113,50 @@ TEST(UnionTest, computeUnionLazy) { runTest(false); runTest(true); } + +// _____________________________________________________________________________ +TEST(Union, ensurePermutationIsAppliedCorrectly) { + using Var = Variable; + auto* qec = ad_utility::testing::getQec(); + auto leftT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1, 2, 3, 4, 5}}), + Vars{Var{"?a"}, Var{"?b"}, Var{"?c"}, Var{"?d"}, Var{"?e"}}); + + auto rightT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{6, 7, 8}}), + Vars{Var{"?b"}, Var{"?a"}, Var{"?e"}}); + + Union u{qec, std::move(leftT), std::move(rightT)}; + + { + qec->getQueryTreeCache().clearAll(); + auto resultTable = u.computeResultOnlyForTesting(true); + ASSERT_FALSE(resultTable.isFullyMaterialized()); + auto& result = resultTable.idTables(); + + auto U = Id::makeUndefined(); + auto expected1 = makeIdTableFromVector({{1, 2, 3, 4, 5}}); + auto expected2 = makeIdTableFromVector({{V(7), V(6), U, U, V(8)}}); + + auto iterator = result.begin(); + ASSERT_NE(iterator, result.end()); + ASSERT_EQ(*iterator, expected1); + + ++iterator; + ASSERT_NE(iterator, result.end()); + ASSERT_EQ(*iterator, expected2); + + ASSERT_EQ(++iterator, result.end()); + } + + { + qec->getQueryTreeCache().clearAll(); + auto resultTable = u.computeResultOnlyForTesting(); + ASSERT_TRUE(resultTable.isFullyMaterialized()); + + auto U = Id::makeUndefined(); + auto expected = + makeIdTableFromVector({{1, 2, 3, 4, 5}, {V(7), V(6), U, U, V(8)}}); + EXPECT_EQ(resultTable.idTable(), expected); + } +} From e359fae91e1977ca35beccbe75318a2e030917cb Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:41:14 +0200 Subject: [PATCH 06/10] Address PR comments --- src/engine/Union.cpp | 24 +++++++----------------- src/engine/Union.h | 5 +++++ test/UnionTest.cpp | 8 +++++--- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index ef6aaf600f..3368aede3b 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -266,16 +266,7 @@ IdTable Union::transformToCorrectColumnFormat( Id::makeUndefined()); } - // i + 1 because once everything expect the last column is in the correct - // position, the last column is already in the correct position so the last - // iteration would always swap the last column with itself. - for (size_t i = 0; i + 1 < permutation.size(); ++i) { - size_t ind = permutation[i]; - while (ind < i) { - ind = permutation[ind]; - } - idTable.swapColumns(i, ind); - } + idTable.setColumnSubset(permutation); return idTable; } @@ -284,21 +275,20 @@ cppcoro::generator Union::computeResultLazily( std::shared_ptr result1, std::shared_ptr result2, std::shared_ptr localVocab) const { + std::vector permutation = computePermutation(); if (result1->isFullyMaterialized()) { - co_yield computeUnion(result1->idTable(), - IdTable{getResultWidth(), allocator()}, - _columnOrigins); + co_yield transformToCorrectColumnFormat(result1->idTable().clone(), + permutation); } else { - std::vector permutation = computePermutation(); for (IdTable& idTable : result1->idTables()) { co_yield transformToCorrectColumnFormat(std::move(idTable), permutation); } } + permutation = computePermutation(); if (result2->isFullyMaterialized()) { - co_yield computeUnion(IdTable{getResultWidth(), allocator()}, - result2->idTable(), _columnOrigins); + co_yield transformToCorrectColumnFormat(result2->idTable().clone(), + permutation); } else { - std::vector permutation = computePermutation(); for (IdTable& idTable : result2->idTables()) { co_yield transformToCorrectColumnFormat(std::move(idTable), permutation); } diff --git a/src/engine/Union.h b/src/engine/Union.h index be9ddcf34a..1f2eb20a9c 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -80,9 +80,14 @@ class Union : public Operation { template std::vector computePermutation() const; + // Take the given `IdTable`, add any missing columns to it (filled with + // undefined values) and permutate the columns to match the end result. IdTable transformToCorrectColumnFormat( IdTable idTable, const std::vector& permutation) const; + // Create a generator that yields the `IdTable` for the left or right child + // one after another and apply a potential differing permutation to it. Write + // the merged LocalVocab to the given `LocalVocab` object at the end. cppcoro::generator computeResultLazily( std::shared_ptr result1, std::shared_ptr result2, diff --git a/test/UnionTest.cpp b/test/UnionTest.cpp index 2bbe554fe6..7d563e60c7 100644 --- a/test/UnionTest.cpp +++ b/test/UnionTest.cpp @@ -74,7 +74,7 @@ TEST(Union, computeUnionLarge) { // _____________________________________________________________________________ TEST(Union, computeUnionLazy) { - auto runTest = [](bool nonLazyChilds, + auto runTest = [](bool nonLazyChildren, ad_utility::source_location loc = ad_utility::source_location::current()) { auto l = generateLocationTrace(loc); @@ -83,12 +83,14 @@ TEST(Union, computeUnionLazy) { IdTable left = makeIdTableFromVector({{V(1)}, {V(2)}, {V(3)}}); auto leftT = ad_utility::makeExecutionTree( qec, std::move(left), Vars{Variable{"?x"}}, false, - std::vector{}, LocalVocab{}, std::nullopt, nonLazyChilds); + std::vector{}, LocalVocab{}, std::nullopt, + nonLazyChildren); IdTable right = makeIdTableFromVector({{V(4), V(5)}, {V(6), V(7)}}); auto rightT = ad_utility::makeExecutionTree( qec, std::move(right), Vars{Variable{"?u"}, Variable{"?x"}}, false, - std::vector{}, LocalVocab{}, std::nullopt, nonLazyChilds); + std::vector{}, LocalVocab{}, std::nullopt, + nonLazyChildren); Union u{qec, std::move(leftT), std::move(rightT)}; auto resultTable = u.computeResultOnlyForTesting(true); From 891da22a0f516421d1d164f0c230c0a77aeb765d Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:50:00 +0200 Subject: [PATCH 07/10] Use chunked fill --- src/engine/Union.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index 3368aede3b..fe5a6a8ffb 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -262,8 +262,8 @@ IdTable Union::transformToCorrectColumnFormat( IdTable idTable, const std::vector& permutation) const { while (idTable.numColumns() < getResultWidth()) { idTable.addEmptyColumn(); - std::ranges::fill(idTable.getColumn(idTable.numColumns() - 1), - Id::makeUndefined()); + auto column = idTable.getColumn(idTable.numColumns() - 1); + fillChunked(column.begin(), column.end(), Id::makeUndefined()); } idTable.setColumnSubset(permutation); From f03bcf5790d854b526d122a272fa7946ddfa1eca Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 16 Oct 2024 17:50:20 +0200 Subject: [PATCH 08/10] Use `ColumnIndex` instead of `size_t`to fix macOS build --- src/engine/Union.cpp | 12 ++++++------ src/engine/Union.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index fe5a6a8ffb..97855b470d 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -242,12 +242,12 @@ IdTable Union::computeUnion( // _____________________________________________________________________________ template -std::vector Union::computePermutation() const { - size_t startOfUndefColumns = _subtrees[left ? 0 : 1]->getResultWidth(); - std::vector permutation(_columnOrigins.size()); +std::vector Union::computePermutation() const { + ColumnIndex startOfUndefColumns = _subtrees[left ? 0 : 1]->getResultWidth(); + std::vector permutation(_columnOrigins.size()); for (size_t targetColIdx = 0; targetColIdx < _columnOrigins.size(); ++targetColIdx) { - size_t originIndex = _columnOrigins.at(targetColIdx)[left ? 0 : 1]; + ColumnIndex originIndex = _columnOrigins.at(targetColIdx)[left ? 0 : 1]; if (originIndex == NO_COLUMN) { originIndex = startOfUndefColumns; startOfUndefColumns++; @@ -259,7 +259,7 @@ std::vector Union::computePermutation() const { // _____________________________________________________________________________ IdTable Union::transformToCorrectColumnFormat( - IdTable idTable, const std::vector& permutation) const { + IdTable idTable, const std::vector& permutation) const { while (idTable.numColumns() < getResultWidth()) { idTable.addEmptyColumn(); auto column = idTable.getColumn(idTable.numColumns() - 1); @@ -275,7 +275,7 @@ cppcoro::generator Union::computeResultLazily( std::shared_ptr result1, std::shared_ptr result2, std::shared_ptr localVocab) const { - std::vector permutation = computePermutation(); + std::vector permutation = computePermutation(); if (result1->isFullyMaterialized()) { co_yield transformToCorrectColumnFormat(result1->idTable().clone(), permutation); diff --git a/src/engine/Union.h b/src/engine/Union.h index 1f2eb20a9c..cbd14886b3 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -83,7 +83,7 @@ class Union : public Operation { // Take the given `IdTable`, add any missing columns to it (filled with // undefined values) and permutate the columns to match the end result. IdTable transformToCorrectColumnFormat( - IdTable idTable, const std::vector& permutation) const; + IdTable idTable, const std::vector& permutation) const; // Create a generator that yields the `IdTable` for the left or right child // one after another and apply a potential differing permutation to it. Write From 0cf2e054b6a717dead0326b3e1cb666fd3c89f31 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 16 Oct 2024 18:38:26 +0200 Subject: [PATCH 09/10] Properly change function definition --- src/engine/Union.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine/Union.h b/src/engine/Union.h index cbd14886b3..6932b946d4 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -78,7 +78,7 @@ class Union : public Operation { // right child depending on `left`. This permutation can then be used to swap // the columns without any copy operations. template - std::vector computePermutation() const; + std::vector computePermutation() const; // Take the given `IdTable`, add any missing columns to it (filled with // undefined values) and permutate the columns to match the end result. From bb544baff5e4d88efef2881209c565eea17ec099 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:20:46 +0200 Subject: [PATCH 10/10] Use range-based for loop --- src/engine/Union.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index 97855b470d..2661bf5515 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -243,16 +243,17 @@ IdTable Union::computeUnion( // _____________________________________________________________________________ template std::vector Union::computePermutation() const { - ColumnIndex startOfUndefColumns = _subtrees[left ? 0 : 1]->getResultWidth(); - std::vector permutation(_columnOrigins.size()); - for (size_t targetColIdx = 0; targetColIdx < _columnOrigins.size(); - ++targetColIdx) { - ColumnIndex originIndex = _columnOrigins.at(targetColIdx)[left ? 0 : 1]; + constexpr size_t treeIndex = left ? 0 : 1; + ColumnIndex startOfUndefColumns = _subtrees[treeIndex]->getResultWidth(); + std::vector permutation{}; + permutation.reserve(_columnOrigins.size()); + for (const auto& columnOrigin : _columnOrigins) { + ColumnIndex originIndex = columnOrigin[treeIndex]; if (originIndex == NO_COLUMN) { originIndex = startOfUndefColumns; startOfUndefColumns++; } - permutation[targetColIdx] = originIndex; + permutation.push_back(originIndex); } return permutation; }