diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 7ebad1981..241953188 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -99,26 +99,25 @@ ProtoResult Bind::computeResult(bool requestLaziness) { auto applyBind = [this, subRes](IdTable idTable, LocalVocab* localVocab) { return computeExpressionBind(localVocab, std::move(idTable), - subRes->localVocab(), _bind._expression.getPimpl()); }; if (subRes->isFullyMaterialized()) { if (requestLaziness && subRes->idTable().size() > CHUNK_SIZE) { - auto localVocab = - std::make_shared(subRes->getCopyOfLocalVocab()); - auto generator = [](std::shared_ptr vocab, auto applyBind, - std::shared_ptr result) - -> cppcoro::generator { - size_t size = result->idTable().size(); - for (size_t offset = 0; offset < size; offset += CHUNK_SIZE) { - co_yield applyBind( - cloneSubView(result->idTable(), - {offset, std::min(size, offset + CHUNK_SIZE)}), - vocab.get()); - } - }(localVocab, std::move(applyBind), std::move(subRes)); - return {std::move(generator), resultSortedOn(), std::move(localVocab)}; + return { + [](auto applyBind, + std::shared_ptr result) -> Result::Generator { + size_t size = result->idTable().size(); + for (size_t offset = 0; offset < size; offset += CHUNK_SIZE) { + LocalVocab outVocab = result->getCopyOfLocalVocab(); + IdTable idTable = applyBind( + cloneSubView(result->idTable(), + {offset, std::min(size, offset + CHUNK_SIZE)}), + &outVocab); + co_yield {std::move(idTable), std::move(outVocab)}; + } + }(std::move(applyBind), std::move(subRes)), + resultSortedOn()}; } // Make a deep copy of the local vocab from `subRes` and then add to it (in // case BIND adds a new word or words). @@ -132,28 +131,25 @@ ProtoResult Bind::computeResult(bool requestLaziness) { LOG(DEBUG) << "BIND result computation done." << std::endl; return {std::move(result), resultSortedOn(), std::move(localVocab)}; } - auto localVocab = std::make_shared(); auto generator = - [](std::shared_ptr vocab, auto applyBind, - std::shared_ptr result) -> cppcoro::generator { - for (IdTable& idTable : result->idTables()) { - co_yield applyBind(std::move(idTable), vocab.get()); + [](auto applyBind, + std::shared_ptr result) -> Result::Generator { + for (auto& [idTable, localVocab] : result->idTables()) { + IdTable resultTable = applyBind(std::move(idTable), &localVocab); + co_yield {std::move(resultTable), std::move(localVocab)}; } - std::array vocabs{vocab.get(), &result->localVocab()}; - *vocab = LocalVocab::merge(std::span{vocabs}); - }(localVocab, std::move(applyBind), std::move(subRes)); - return {std::move(generator), resultSortedOn(), std::move(localVocab)}; + }(std::move(applyBind), std::move(subRes)); + return {std::move(generator), resultSortedOn()}; } // _____________________________________________________________________________ IdTable Bind::computeExpressionBind( - LocalVocab* outputLocalVocab, IdTable idTable, - const LocalVocab& inputLocalVocab, + LocalVocab* localVocab, IdTable idTable, const sparqlExpression::SparqlExpression* expression) const { sparqlExpression::EvaluationContext evaluationContext( *getExecutionContext(), _subtree->getVariableColumns(), idTable, - getExecutionContext()->getAllocator(), inputLocalVocab, - cancellationHandle_, deadline_); + getExecutionContext()->getAllocator(), *localVocab, cancellationHandle_, + deadline_); sparqlExpression::ExpressionResult expressionResult = expression->evaluate(&evaluationContext); @@ -188,7 +184,7 @@ IdTable Bind::computeExpressionBind( if (it != resultGenerator.end()) { Id constantId = sparqlExpression::detail::constantExpressionResultToId( - std::move(*it), *outputLocalVocab); + std::move(*it), *localVocab); checkCancellation(); ad_utility::chunkedFill(outputColumn, constantId, CHUNK_SIZE, [this]() { checkCancellation(); }); @@ -199,7 +195,7 @@ IdTable Bind::computeExpressionBind( for (auto& resultValue : resultGenerator) { outputColumn[i] = sparqlExpression::detail::constantExpressionResultToId( - std::move(resultValue), *outputLocalVocab); + std::move(resultValue), *localVocab); i++; checkCancellation(); } diff --git a/src/engine/Bind.h b/src/engine/Bind.h index eeaafaf3e..34c515fb5 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -49,8 +49,7 @@ class Bind : public Operation { // Implementation for the binding of arbitrary expressions. IdTable computeExpressionBind( - LocalVocab* outputLocalVocab, IdTable idTable, - const LocalVocab& inputLocalVocab, + LocalVocab* localVocab, IdTable idTable, const sparqlExpression::SparqlExpression* expression) const; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 873bd21ae..32a874a51 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -13,13 +13,15 @@ #include "util/http/MediaTypes.h" // __________________________________________________________________________ -cppcoro::generator ExportQueryExecutionTrees::getIdTables( - const Result& result) { +cppcoro::generator +ExportQueryExecutionTrees::getIdTables(const Result& result) { if (result.isFullyMaterialized()) { - co_yield result.idTable(); + TableConstRefWithVocab pair{result.idTable(), result.localVocab()}; + co_yield pair; } else { - for (const IdTable& idTable : result.idTables()) { - co_yield idTable; + for (const Result::IdTableVocabPair& pair : result.idTables()) { + TableConstRefWithVocab tableWithVocab{pair.idTable_, pair.localVocab_}; + co_yield tableWithVocab; } } } @@ -33,11 +35,14 @@ ExportQueryExecutionTrees::getRowIndices(LimitOffsetClause limitOffset, if (limitOffset._limit.value_or(1) == 0) { co_return; } - for (const IdTable& idTable : getIdTables(result)) { - uint64_t currentOffset = limitOffset.actualOffset(idTable.numRows()); - uint64_t upperBound = limitOffset.upperBound(idTable.numRows()); + for (TableConstRefWithVocab& tableWithVocab : getIdTables(result)) { + uint64_t currentOffset = + limitOffset.actualOffset(tableWithVocab.idTable_.numRows()); + uint64_t upperBound = + limitOffset.upperBound(tableWithVocab.idTable_.numRows()); if (currentOffset != upperBound) { - co_yield {idTable, std::views::iota(currentOffset, upperBound)}; + co_yield {std::move(tableWithVocab), + std::views::iota(currentOffset, upperBound)}; } limitOffset._offset -= currentOffset; if (limitOffset._limit.has_value()) { @@ -57,9 +62,10 @@ ExportQueryExecutionTrees::constructQueryResultToTriples( const ad_utility::sparql_types::Triples& constructTriples, LimitOffsetClause limitAndOffset, std::shared_ptr result, CancellationHandle cancellationHandle) { - for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { + auto& idTable = pair.idTable_; for (uint64_t i : range) { - ConstructQueryExportContext context{i, idTable, result->localVocab(), + ConstructQueryExportContext context{i, idTable, pair.localVocab_, qet.getVariableColumns(), qet.getQec()->getIndex()}; using enum PositionInTriple; @@ -172,10 +178,10 @@ ExportQueryExecutionTrees::idTableToQLeverJSONBindings( std::shared_ptr result, CancellationHandle cancellationHandle) { AD_CORRECTNESS_CHECK(result != nullptr); - for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t rowIndex : range) { - co_yield idTableToQLeverJSONRow(qet, columns, result->localVocab(), - rowIndex, idTable) + co_yield idTableToQLeverJSONRow(qet, columns, pair.localVocab_, rowIndex, + pair.idTable_) .dump(); cancellationHandle->throwIfCancelled(); } @@ -405,14 +411,14 @@ ExportQueryExecutionTrees::selectQueryResultToStream( // special case : binary export of IdTable if constexpr (format == MediaType::octetStream) { - for (const auto& [idTable, range] : - getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { for (const auto& columnIndex : selectedColumnIndices) { if (columnIndex.has_value()) { - co_yield std::string_view{reinterpret_cast(&idTable( - i, columnIndex.value().columnIndex_)), - sizeof(Id)}; + co_yield std::string_view{ + reinterpret_cast( + &pair.idTable_(i, columnIndex.value().columnIndex_)), + sizeof(Id)}; } } cancellationHandle->throwIfCancelled(); @@ -436,15 +442,15 @@ ExportQueryExecutionTrees::selectQueryResultToStream( constexpr auto& escapeFunction = format == MediaType::tsv ? RdfEscaping::escapeForTsv : RdfEscaping::escapeForCsv; - for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); - Id id = idTable(i, val.columnIndex_); + Id id = pair.idTable_(i, val.columnIndex_); auto optionalStringAndType = idToStringAndType( - qet.getQec()->getIndex(), id, result->localVocab(), + qet.getQec()->getIndex(), id, pair.localVocab_, escapeFunction); if (optionalStringAndType.has_value()) [[likely]] { co_yield optionalStringAndType.value().first; @@ -561,15 +567,15 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: auto selectedColumnIndices = qet.selectedVariablesToColumnIndices(selectClause, false); // TODO we could prefilter for the nonexisting variables. - for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { co_yield "\n "; for (size_t j = 0; j < selectedColumnIndices.size(); ++j) { if (selectedColumnIndices[j].has_value()) { const auto& val = selectedColumnIndices[j].value(); - Id id = idTable(i, val.columnIndex_); + Id id = pair.idTable_(i, val.columnIndex_); co_yield idToXMLBinding(val.variable_, id, qet.getQec()->getIndex(), - result->localVocab()); + pair.localVocab_); } } co_yield "\n "; @@ -611,12 +617,13 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: co_return; } - auto getBinding = [&](const IdTable& idTable, const uint64_t& i) { + auto getBinding = [&](const IdTable& idTable, const uint64_t& i, + const LocalVocab& localVocab) { nlohmann::ordered_json binding = {}; for (const auto& column : columns) { - auto optionalStringAndType = idToStringAndType( - qet.getQec()->getIndex(), idTable(i, column->columnIndex_), - result->localVocab()); + auto optionalStringAndType = + idToStringAndType(qet.getQec()->getIndex(), + idTable(i, column->columnIndex_), localVocab); if (optionalStringAndType.has_value()) [[likely]] { const auto& [stringValue, xsdType] = optionalStringAndType.value(); binding[column->variable_] = @@ -627,12 +634,12 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: }; bool isFirstRow = true; - for (const auto& [idTable, range] : getRowIndices(limitAndOffset, *result)) { + for (const auto& [pair, range] : getRowIndices(limitAndOffset, *result)) { for (uint64_t i : range) { if (!isFirstRow) [[likely]] { co_yield ","; } - co_yield getBinding(idTable, i); + co_yield getBinding(pair.idTable_, i, pair.localVocab_); cancellationHandle->throwIfCancelled(); isFirstRow = false; } diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index 339e7b2cf..d8a42b4d4 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -171,15 +171,23 @@ class ExportQueryExecutionTrees { const parsedQuery::SelectClause& selectClause, LimitOffsetClause limitAndOffset, CancellationHandle cancellationHandle); + // Public for testing. + public: + struct TableConstRefWithVocab { + const IdTable& idTable_; + const LocalVocab& localVocab_; + }; // Helper type that contains an `IdTable` and a view with related indices to // access the `IdTable` with. struct TableWithRange { - const IdTable& idTable_; + TableConstRefWithVocab tableWithVocab_; std::ranges::iota_view view_; }; + private: // Yield all `IdTables` provided by the given `result`. - static cppcoro::generator getIdTables(const Result& result); + static cppcoro::generator + getIdTables(const Result& result); // Return a range that contains the indices of the rows that have to be // exported from the `idTable` given the `LimitOffsetClause`. It takes into diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index 90690c9a7..79ef7572a 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -48,47 +48,55 @@ ProtoResult Filter::computeResult(bool requestLaziness) { LOG(DEBUG) << "Filter result computation..." << endl; checkCancellation(); - auto localVocab = subRes->getSharedLocalVocab(); if (subRes->isFullyMaterialized()) { - IdTable result = filterIdTable(subRes, subRes->idTable()); + IdTable result = filterIdTable(subRes->sortedBy(), subRes->idTable(), + subRes->localVocab()); LOG(DEBUG) << "Filter result computation done." << endl; - return {std::move(result), resultSortedOn(), std::move(localVocab)}; + return {std::move(result), resultSortedOn(), subRes->getSharedLocalVocab()}; } if (requestLaziness) { - return {[](auto subRes, auto* self) -> cppcoro::generator { - for (IdTable& idTable : subRes->idTables()) { - IdTable result = self->filterIdTable(subRes, idTable); - co_yield result; + return {[](auto subRes, auto* self) -> Result::Generator { + for (auto& [idTable, localVocab] : subRes->idTables()) { + IdTable result = self->filterIdTable(subRes->sortedBy(), + idTable, localVocab); + co_yield {std::move(result), std::move(localVocab)}; } }(std::move(subRes), this), - resultSortedOn(), std::move(localVocab)}; + resultSortedOn()}; } // If we receive a generator of IdTables, we need to materialize it into a // single IdTable. size_t width = getSubtree().get()->getResultWidth(); IdTable result{width, getExecutionContext()->getAllocator()}; - ad_utility::callFixedSize(width, [this, &subRes, &result]() { - for (IdTable& idTable : subRes->idTables()) { - computeFilterImpl(result, idTable, subRes->localVocab(), - subRes->sortedBy()); - } - }); + std::vector localVocabs; + ad_utility::callFixedSize( + width, [this, &subRes, &result, &localVocabs]() { + for (Result::IdTableVocabPair& pair : subRes->idTables()) { + computeFilterImpl(result, pair.idTable_, pair.localVocab_, + subRes->sortedBy()); + localVocabs.emplace_back(std::move(pair.localVocab_)); + } + }); + + LocalVocab resultLocalVocab{}; + resultLocalVocab.mergeWith(localVocabs); LOG(DEBUG) << "Filter result computation done." << endl; - return {std::move(result), resultSortedOn(), std::move(localVocab)}; + return {std::move(result), resultSortedOn(), std::move(resultLocalVocab)}; } // _____________________________________________________________________________ -IdTable Filter::filterIdTable(const std::shared_ptr& subRes, - const IdTable& idTable) const { +IdTable Filter::filterIdTable(std::vector sortedBy, + const IdTable& idTable, + const LocalVocab& localVocab) const { size_t width = idTable.numColumns(); IdTable result{width, getExecutionContext()->getAllocator()}; CALL_FIXED_SIZE(width, &Filter::computeFilterImpl, this, result, idTable, - subRes->localVocab(), subRes->sortedBy()); + localVocab, std::move(sortedBy)); return result; } diff --git a/src/engine/Filter.h b/src/engine/Filter.h index 8bf10cb0d..eaa9303bb 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -67,6 +67,7 @@ class Filter : public Operation { std::vector sortedBy) const; // Run `computeFilterImpl` on the provided IdTable - IdTable filterIdTable(const std::shared_ptr& subRes, - const IdTable& idTable) const; + IdTable filterIdTable(std::vector sortedBy, + const IdTable& idTable, + const LocalVocab& localVocab) const; }; diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 2088a1220..3c5399efc 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -339,16 +339,6 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) { LOG(DEBUG) << "GroupBy subresult computation done" << std::endl; - // Make a deep copy of the local vocab from `subresult` and then add to it (in - // case GROUP_CONCAT adds a new word or words). - // - // TODO: In most GROUP BY operations, nothing is added to the local - // vocabulary, so it would be more efficient to first share the pointer here - // (like with `shareLocalVocabFrom`) and only copy it when a new word is about - // to be added. Same for BIND. - - auto localVocab = subresult->getCopyOfLocalVocab(); - std::vector groupByColumns; // parse the group by columns @@ -369,6 +359,7 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) { } if (useHashMapOptimization) { + auto localVocab = subresult->getCopyOfLocalVocab(); IdTable idTable = CALL_FIXED_SIZE( groupByCols.size(), &GroupBy::computeGroupByForHashMapOptimization, this, metadataForUnsequentialData->aggregateAliases_, @@ -383,23 +374,25 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) { if (!subresult->isFullyMaterialized()) { AD_CORRECTNESS_CHECK(metadataForUnsequentialData.has_value()); - auto localVocabPointer = - std::make_shared(std::move(localVocab)); - cppcoro::generator generator = CALL_FIXED_SIZE( + Result::Generator generator = CALL_FIXED_SIZE( (std::array{inWidth, outWidth}), &GroupBy::computeResultLazily, this, std::move(subresult), std::move(aggregates), std::move(metadataForUnsequentialData).value().aggregateAliases_, - std::move(groupByCols), localVocabPointer, !requestLaziness); + std::move(groupByCols), !requestLaziness); return requestLaziness - ? ProtoResult{std::move(generator), resultSortedOn(), - std::move(localVocabPointer)} + ? ProtoResult{std::move(generator), resultSortedOn()} : ProtoResult{cppcoro::getSingleElement(std::move(generator)), - resultSortedOn(), std::move(*localVocabPointer)}; + resultSortedOn()}; } AD_CORRECTNESS_CHECK(subresult->idTable().numColumns() == inWidth); + // Make a copy of the local vocab. Note: the LocalVocab has reference + // semantics via `shared_ptr`, so no actual strings are copied here. + + auto localVocab = subresult->getCopyOfLocalVocab(); + IdTable idTable = CALL_FIXED_SIZE( (std::array{inWidth, outWidth}), &GroupBy::doGroupBy, this, subresult->idTable(), groupByCols, aggregates, &localVocab); @@ -474,14 +467,15 @@ void GroupBy::processEmptyImplicitGroup( // _____________________________________________________________________________ template -cppcoro::generator GroupBy::computeResultLazily( +Result::Generator GroupBy::computeResultLazily( std::shared_ptr subresult, std::vector aggregates, std::vector aggregateAliases, - std::vector groupByCols, std::shared_ptr localVocab, - bool singleIdTable) const { + std::vector groupByCols, bool singleIdTable) const { size_t inWidth = _subtree->getResultWidth(); AD_CONTRACT_CHECK(inWidth == IN_WIDTH || IN_WIDTH == 0); - LazyGroupBy lazyGroupBy{*localVocab, std::move(aggregateAliases), + LocalVocab currentLocalVocab{}; + std::vector storedLocalVocabs; + LazyGroupBy lazyGroupBy{currentLocalVocab, std::move(aggregateAliases), getExecutionContext()->getAllocator(), groupByCols.size()}; @@ -491,12 +485,14 @@ cppcoro::generator GroupBy::computeResultLazily( GroupBlock currentGroupBlock; - for (IdTable& idTable : subresult->idTables()) { + for (Result::IdTableVocabPair& pair : subresult->idTables()) { + auto& idTable = pair.idTable_; if (idTable.empty()) { continue; } AD_CORRECTNESS_CHECK(idTable.numColumns() == inWidth); checkCancellation(); + storedLocalVocabs.emplace_back(std::move(pair.localVocab_)); if (currentGroupBlock.empty()) { for (size_t col : groupByCols) { @@ -505,11 +501,11 @@ cppcoro::generator GroupBy::computeResultLazily( } sparqlExpression::EvaluationContext evaluationContext = - createEvaluationContext(*localVocab, idTable); + createEvaluationContext(currentLocalVocab, idTable); size_t lastBlockStart = searchBlockBoundaries( [this, &groupSplitAcrossTables, &lazyGroupBy, &evaluationContext, - &resultTable, ¤tGroupBlock, &aggregates, &localVocab, + &resultTable, ¤tGroupBlock, &aggregates, ¤tLocalVocab, &groupByCols](size_t blockStart, size_t blockEnd) { if (groupSplitAcrossTables) { lazyGroupBy.processBlock(evaluationContext, blockStart, blockEnd); @@ -521,7 +517,7 @@ cppcoro::generator GroupBy::computeResultLazily( IdTableStatic table = std::move(resultTable).toStatic(); processBlock(table, aggregates, evaluationContext, - blockStart, blockEnd, localVocab.get(), + blockStart, blockEnd, ¤tLocalVocab, groupByCols); resultTable = std::move(table).toDynamic(); } @@ -530,8 +526,16 @@ cppcoro::generator GroupBy::computeResultLazily( groupSplitAcrossTables = true; lazyGroupBy.processBlock(evaluationContext, lastBlockStart, idTable.size()); if (!singleIdTable && !resultTable.empty()) { - co_yield resultTable; + currentLocalVocab.mergeWith(storedLocalVocabs); + Result::IdTableVocabPair outputPair{std::move(resultTable), + std::move(currentLocalVocab)}; + co_yield outputPair; + // Reuse buffer if not moved out + resultTable = std::move(outputPair.idTable_); resultTable.clear(); + // Keep last local vocab for next commit. + currentLocalVocab = std::move(storedLocalVocabs.back()); + storedLocalVocabs.clear(); } } // No need for final commit when loop was never entered. @@ -539,11 +543,11 @@ cppcoro::generator GroupBy::computeResultLazily( // If we have an implicit group by we need to produce one result row if (groupByCols.empty()) { processEmptyImplicitGroup(resultTable, aggregates, - localVocab.get()); - co_yield resultTable; + ¤tLocalVocab); + co_yield {std::move(resultTable), std::move(currentLocalVocab)}; } else if (singleIdTable) { // Yield at least a single empty table if requested. - co_yield resultTable; + co_yield {std::move(resultTable), std::move(currentLocalVocab)}; } co_return; } @@ -560,9 +564,10 @@ cppcoro::generator GroupBy::computeResultLazily( } sparqlExpression::EvaluationContext evaluationContext = - createEvaluationContext(*localVocab, idTable); + createEvaluationContext(currentLocalVocab, idTable); lazyGroupBy.commitRow(resultTable, evaluationContext, currentGroupBlock); - co_yield resultTable; + currentLocalVocab.mergeWith(storedLocalVocabs); + co_yield {std::move(resultTable), std::move(currentLocalVocab)}; } // _____________________________________________________________________________ diff --git a/src/engine/GroupBy.h b/src/engine/GroupBy.h index d18781f1c..acfc6feac 100644 --- a/src/engine/GroupBy.h +++ b/src/engine/GroupBy.h @@ -140,12 +140,11 @@ class GroupBy : public Operation { // skipping empty tables unless `singleIdTable` is set which causes the // function to yield a single id table with the complete result. template - cppcoro::generator computeResultLazily( + Result::Generator computeResultLazily( std::shared_ptr subresult, std::vector aggregates, std::vector aggregateAliases, - std::vector groupByCols, std::shared_ptr localVocab, - bool singleIdTable) const; + std::vector groupByCols, bool singleIdTable) const; template void processGroup(const Aggregate& expression, diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 8702b4c1f..471206f80 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -135,7 +135,7 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const { } // _____________________________________________________________________________ -cppcoro::generator IndexScan::scanInChunks() const { +Result::Generator IndexScan::scanInChunks() const { auto metadata = getMetadataForScan(); if (!metadata.has_value()) { co_return; @@ -145,7 +145,7 @@ cppcoro::generator IndexScan::scanInChunks() const { std::vector blocks{blocksSpan.begin(), blocksSpan.end()}; for (IdTable& idTable : getLazyScan(std::move(blocks))) { - co_yield std::move(idTable); + co_yield {std::move(idTable), LocalVocab{}}; } } @@ -153,7 +153,7 @@ cppcoro::generator IndexScan::scanInChunks() const { ProtoResult IndexScan::computeResult(bool requestLaziness) { LOG(DEBUG) << "IndexScan result computation...\n"; if (requestLaziness) { - return {scanInChunks(), resultSortedOn(), LocalVocab{}}; + return {scanInChunks(), resultSortedOn()}; } IdTable idTable{getExecutionContext()->getAllocator()}; diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index bf73b2262..9e75c89d3 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -130,7 +130,7 @@ class IndexScan final : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; - cppcoro::generator scanInChunks() const; + Result::Generator scanInChunks() const; // Helper functions for the public `getLazyScanFor...` functions (see above). Permutation::IdTableGenerator getLazyScan( diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index ee0285c53..afa603fba 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -20,11 +20,11 @@ LocalVocab LocalVocab::clone() const { // _____________________________________________________________________________ LocalVocab LocalVocab::merge(std::span vocabs) { LocalVocab res; - auto inserter = std::back_inserter(res.otherWordSets_); - for (const auto* vocab : vocabs) { - std::ranges::copy(vocab->otherWordSets_, inserter); - *inserter = vocab->primaryWordSet_; - } + res.mergeWith(vocabs | + std::views::transform( + [](const LocalVocab* localVocab) -> const LocalVocab& { + return *localVocab; + })); return res; } diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index be9a7c449..5e6c98296 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -91,6 +91,17 @@ class LocalVocab { // of the `vocabs`. The primary word set of the newly created vocab is empty. static LocalVocab merge(std::span vocabs); + // Merge all passed local vocabs to keep alive all the words from each of the + // `vocabs`. + template + void mergeWith(const R& vocabs) { + auto inserter = std::back_inserter(otherWordSets_); + for (const auto& vocab : vocabs) { + std::ranges::copy(vocab.otherWordSets_, inserter); + *inserter = vocab.primaryWordSet_; + } + } + // Return all the words from all the word sets as a vector. std::vector getAllWordsForTesting() const; diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index c822943aa..64d2af234 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -180,12 +180,14 @@ CacheValue Operation::runComputationAndPrepareForCache( AD_CONTRACT_CHECK(!pinned); result.cacheDuringConsumption( [maxSize = cache.getMaxSizeSingleEntry()]( - const std::optional& currentIdTable, - const IdTable& newIdTable) { - auto currentSize = currentIdTable.has_value() - ? CacheValue::getSize(currentIdTable.value()) - : 0_B; - return maxSize >= currentSize + CacheValue::getSize(newIdTable); + const std::optional& currentIdTablePair, + const Result::IdTableVocabPair& newIdTable) { + auto currentSize = + currentIdTablePair.has_value() + ? CacheValue::getSize(currentIdTablePair.value().idTable_) + : 0_B; + return maxSize >= + currentSize + CacheValue::getSize(newIdTable.idTable_); }, [runtimeInfo = getRuntimeInfoPointer(), &cache, cacheKey](Result aggregatedResult) { diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index d2aab6637..292a62069 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -51,10 +51,11 @@ auto compareRowsBySortColumns(const std::vector& sortedBy) { // _____________________________________________________________________________ Result::Result(IdTable idTable, std::vector sortedBy, SharedLocalVocabWrapper localVocab) - : data_{std::move(idTable)}, - sortedBy_{std::move(sortedBy)}, - localVocab_{std::move(localVocab.localVocab_)} { - AD_CONTRACT_CHECK(localVocab_ != nullptr); + : data_{IdTableSharedLocalVocabPair{std::move(idTable), + std::move(localVocab.localVocab_)}}, + sortedBy_{std::move(sortedBy)} { + AD_CONTRACT_CHECK(std::get(data_).localVocab_ != + nullptr); assertSortOrderIsRespected(this->idTable(), sortedBy_); } @@ -65,40 +66,28 @@ Result::Result(IdTable idTable, std::vector sortedBy, SharedLocalVocabWrapper{std::move(localVocab)}} {} // _____________________________________________________________________________ -Result::Result(cppcoro::generator idTables, - std::vector sortedBy, - SharedLocalVocabWrapper localVocab) - : data_{GenContainer{ - [](auto idTables, auto sortedBy) -> cppcoro::generator { - std::optional previousId = std::nullopt; - for (IdTable& idTable : idTables) { - if (!idTable.empty()) { - if (previousId.has_value()) { - AD_EXPENSIVE_CHECK(!compareRowsBySortColumns(sortedBy)( - idTable.at(0), previousId.value())); - } - previousId = idTable.at(idTable.size() - 1); - } - assertSortOrderIsRespected(idTable, sortedBy); - co_yield std::move(idTable); - } - }(std::move(idTables), sortedBy)}}, - sortedBy_{std::move(sortedBy)}, - localVocab_{std::move(localVocab.localVocab_)} { - AD_CONTRACT_CHECK(localVocab_ != nullptr); -} - -// _____________________________________________________________________________ -Result::Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocab&& localVocab) - : Result{std::move(idTables), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}} {} +Result::Result(IdTableVocabPair pair, std::vector sortedBy) + : Result{std::move(pair.idTable_), std::move(sortedBy), + std::move(pair.localVocab_)} {} // _____________________________________________________________________________ -Result::Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocabPtr localVocab) - : Result{std::move(idTables), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}} {} +Result::Result(Generator idTables, std::vector sortedBy) + : data_{GenContainer{[](auto idTables, auto sortedBy) -> Generator { + std::optional previousId = std::nullopt; + for (IdTableVocabPair& pair : idTables) { + auto& idTable = pair.idTable_; + if (!idTable.empty()) { + if (previousId.has_value()) { + AD_EXPENSIVE_CHECK(!compareRowsBySortColumns(sortedBy)( + idTable.at(0), previousId.value())); + } + previousId = idTable.at(idTable.size() - 1); + } + assertSortOrderIsRespected(idTable, sortedBy); + co_yield pair; + } + }(std::move(idTables), sortedBy)}}, + sortedBy_{std::move(sortedBy)} {} // _____________________________________________________________________________ // Apply `LimitOffsetClause` to given `IdTable`. @@ -131,16 +120,17 @@ void Result::applyLimitOffset( } if (isFullyMaterialized()) { ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; - resizeIdTable(std::get(data_), limitOffset); + resizeIdTable(std::get(data_).idTable_, + limitOffset); limitTimeCallback(limitTimer.msecs(), idTable()); } else { - auto generator = [](cppcoro::generator original, - LimitOffsetClause limitOffset, - auto limitTimeCallback) -> cppcoro::generator { + auto generator = [](Generator original, LimitOffsetClause limitOffset, + auto limitTimeCallback) -> Generator { if (limitOffset._limit.value_or(1) == 0) { co_return; } - for (IdTable& idTable : original) { + for (IdTableVocabPair& pair : original) { + auto& idTable = pair.idTable_; ad_utility::timer::Timer limitTimer{ad_utility::timer::Timer::Started}; size_t originalSize = idTable.numRows(); resizeIdTable(idTable, limitOffset); @@ -152,7 +142,7 @@ void Result::applyLimitOffset( } limitTimeCallback(limitTimer.value(), idTable); if (limitOffset._offset == 0) { - co_yield idTable; + co_yield pair; } if (limitOffset._limit.value_or(1) == 0) { break; @@ -170,15 +160,14 @@ void Result::assertThatLimitWasRespected(const LimitOffsetClause& limitOffset) { auto limit = limitOffset._limit; AD_CONTRACT_CHECK(!limit.has_value() || numRows <= limit.value()); } else { - auto generator = - [](cppcoro::generator original, - LimitOffsetClause limitOffset) -> cppcoro::generator { + auto generator = [](Generator original, + LimitOffsetClause limitOffset) -> Generator { auto limit = limitOffset._limit; uint64_t elementCount = 0; - for (IdTable& idTable : original) { - elementCount += idTable.numRows(); + for (IdTableVocabPair& pair : original) { + elementCount += pair.idTable_.numRows(); AD_CONTRACT_CHECK(!limit.has_value() || elementCount <= limit.value()); - co_yield idTable; + co_yield pair; } AD_CONTRACT_CHECK(!limit.has_value() || elementCount <= limit.value()); }(std::move(idTables()), limitOffset); @@ -200,17 +189,17 @@ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { }); }; if (isFullyMaterialized()) { - AD_EXPENSIVE_CHECK(performCheck(varColMap, std::get(data_))); + AD_EXPENSIVE_CHECK(performCheck( + varColMap, std::get(data_).idTable_)); } else { - auto generator = - [](cppcoro::generator original, - [[maybe_unused]] VariableToColumnMap varColMap, - [[maybe_unused]] auto performCheck) -> cppcoro::generator { - for (IdTable& idTable : original) { + auto generator = [](Generator original, + [[maybe_unused]] VariableToColumnMap varColMap, + [[maybe_unused]] auto performCheck) -> Generator { + for (IdTableVocabPair& pair : original) { // No need to check subsequent idTables assuming the datatypes // don't change mid result. - AD_EXPENSIVE_CHECK(performCheck(varColMap, idTable)); - co_yield idTable; + AD_EXPENSIVE_CHECK(performCheck(varColMap, pair.idTable_)); + co_yield pair; } }(std::move(idTables()), varColMap, std::move(performCheck)); data_.emplace(std::move(generator)); @@ -222,17 +211,17 @@ void Result::runOnNewChunkComputed( std::function onNewChunk, std::function onGeneratorFinished) { AD_CONTRACT_CHECK(!isFullyMaterialized()); - auto generator = [](cppcoro::generator original, auto onNewChunk, - auto onGeneratorFinished) -> cppcoro::generator { + auto generator = [](Generator original, auto onNewChunk, + auto onGeneratorFinished) -> Generator { // Call this within destructor to make sure it is also called when an // operation stops iterating before reaching the end. absl::Cleanup cleanup{ [&onGeneratorFinished]() { onGeneratorFinished(false); }}; try { ad_utility::timer::Timer timer{ad_utility::timer::Timer::Started}; - for (IdTable& idTable : original) { - onNewChunk(idTable, timer.value()); - co_yield idTable; + for (IdTableVocabPair& pair : original) { + onNewChunk(pair.idTable_, timer.value()); + co_yield pair; timer.start(); } } catch (...) { @@ -260,11 +249,11 @@ void Result::assertSortOrderIsRespected( // _____________________________________________________________________________ const IdTable& Result::idTable() const { AD_CONTRACT_CHECK(isFullyMaterialized()); - return std::get(data_); + return std::get(data_).idTable_; } // _____________________________________________________________________________ -cppcoro::generator& Result::idTables() const { +Result::Generator& Result::idTables() const { AD_CONTRACT_CHECK(!isFullyMaterialized()); const auto& container = std::get(data_); AD_CONTRACT_CHECK(!container.consumed_->exchange(true)); @@ -273,33 +262,40 @@ cppcoro::generator& Result::idTables() const { // _____________________________________________________________________________ bool Result::isFullyMaterialized() const noexcept { - return std::holds_alternative(data_); + return std::holds_alternative(data_); } // _____________________________________________________________________________ void Result::cacheDuringConsumption( - std::function&, const IdTable&)> + std::function&, + const IdTableVocabPair&)> fitInCache, std::function storeInCache) { AD_CONTRACT_CHECK(!isFullyMaterialized()); data_.emplace(ad_utility::wrapGeneratorWithCache( std::move(idTables()), - [fitInCache = std::move(fitInCache)](std::optional& aggregate, - const IdTable& newTable) { - bool doBothFitInCache = fitInCache(aggregate, newTable); + [fitInCache = std::move(fitInCache)]( + std::optional& aggregate, + const IdTableVocabPair& newTablePair) { + bool doBothFitInCache = fitInCache(aggregate, newTablePair); if (doBothFitInCache) { if (aggregate.has_value()) { - aggregate.value().insertAtEnd(newTable); + auto& value = aggregate.value(); + value.idTable_.insertAtEnd(newTablePair.idTable_); + value.localVocab_.mergeWith( + std::span{&newTablePair.localVocab_, 1}); } else { - aggregate.emplace(newTable.clone()); + aggregate.emplace(newTablePair.idTable_.clone(), + newTablePair.localVocab_.clone()); } } return doBothFitInCache; }, - [storeInCache = std::move(storeInCache), sortedBy = sortedBy_, - localVocab = localVocab_](IdTable idTable) mutable { - storeInCache(Result{std::move(idTable), std::move(sortedBy), - SharedLocalVocabWrapper{std::move(localVocab)}}); + [storeInCache = std::move(storeInCache), + sortedBy = sortedBy_](IdTableVocabPair pair) mutable { + storeInCache( + Result{std::move(pair.idTable_), std::move(sortedBy), + SharedLocalVocabWrapper{std::move(pair.localVocab_)}}); })); } diff --git a/src/engine/Result.h b/src/engine/Result.h index a119e0561..d3716fb26 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -22,16 +22,37 @@ // through a generator via `idTables()` when it is supposed to be lazily // evaluated. class Result { + public: + struct IdTableVocabPair { + IdTable idTable_; + LocalVocab localVocab_; + + // Explicit constructor to avoid problems with coroutines and temporaries. + // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103909 for details. + IdTableVocabPair(IdTable idTable, LocalVocab localVocab) + : idTable_{std::move(idTable)}, localVocab_{std::move(localVocab)} {} + }; + + using Generator = cppcoro::generator; + private: // Needs to be mutable in order to be consumable from a const result. struct GenContainer { - mutable cppcoro::generator generator_; + mutable Generator generator_; mutable std::unique_ptr consumed_ = std::make_unique(false); - explicit GenContainer(cppcoro::generator generator) + explicit GenContainer(Generator generator) : generator_{std::move(generator)} {} }; - using Data = std::variant; + + using LocalVocabPtr = std::shared_ptr; + + struct IdTableSharedLocalVocabPair { + IdTable idTable_; + // The local vocabulary of the result. + LocalVocabPtr localVocab_; + }; + using Data = std::variant; // The actual entries. Data data_; @@ -39,11 +60,6 @@ class Result { // Empty if the result is not sorted on any column. std::vector sortedBy_; - using LocalVocabPtr = std::shared_ptr; - - // The local vocabulary of the result. - LocalVocabPtr localVocab_ = std::make_shared(); - // Note: If additional members and invariants are added to the class (for // example information about the datatypes in each column) make sure that // those remain valid after calling non-const function like @@ -92,12 +108,8 @@ class Result { SharedLocalVocabWrapper localVocab); Result(IdTable idTable, std::vector sortedBy, LocalVocab&& localVocab); - Result(cppcoro::generator idTables, - std::vector sortedBy, SharedLocalVocabWrapper localVocab); - Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocab&& localVocab); - Result(cppcoro::generator idTables, - std::vector sortedBy, LocalVocabPtr localVocab); + Result(IdTableVocabPair pair, std::vector sortedBy); + Result(Generator idTables, std::vector sortedBy); // Prevent accidental copying of a result table. Result(const Result& other) = delete; Result& operator=(const Result& other) = delete; @@ -131,7 +143,8 @@ class Result { // Throw an `ad_utility::Exception` if the underlying `data_` member holds the // wrong variant. void cacheDuringConsumption( - std::function&, const IdTable&)> + std::function&, + const IdTableVocabPair&)> fitInCache, std::function storeInCache); @@ -141,7 +154,7 @@ class Result { // Access to the underlying `IdTable`s. Throw an `ad_utility::Exception` // if the underlying `data_` member holds the wrong variant. - cppcoro::generator& idTables() const; + Generator& idTables() const; // Const access to the columns by which the `idTable()` is sorted. const std::vector& sortedBy() const { return sortedBy_; } @@ -157,12 +170,17 @@ class Result { // Filter::computeFilterImpl (evaluationContext) // Variable::evaluate (idToStringAndType) // - const LocalVocab& localVocab() const { return *localVocab_; } + const LocalVocab& localVocab() const { + AD_CONTRACT_CHECK(isFullyMaterialized()); + return *std::get(data_).localVocab_; + } // Get the local vocab as a shared pointer to const. This can be used if one // result has the same local vocab as one of its child results. SharedLocalVocabWrapper getSharedLocalVocab() const { - return SharedLocalVocabWrapper{localVocab_}; + AD_CONTRACT_CHECK(isFullyMaterialized()); + return SharedLocalVocabWrapper{ + std::get(data_).localVocab_}; } // Like `getSharedLocalVocabFrom`, but takes more than one result and merges @@ -176,7 +194,7 @@ class Result { static SharedLocalVocabWrapper getMergedLocalVocab(R&& subResults) { std::vector vocabs; for (const Result& table : subResults) { - vocabs.push_back(std::to_address(table.localVocab_)); + vocabs.push_back(&table.localVocab()); } return SharedLocalVocabWrapper{LocalVocab::merge(vocabs)}; } diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index 9533a69e4..4587066ba 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -186,15 +186,12 @@ ProtoResult Service::computeResultImpl([[maybe_unused]] bool requestLaziness) { // Note: The `body`-generator also keeps the complete response connection // alive, so we have no lifetime issue here(see `HttpRequest::send` for // details). - auto localVocabPtr = std::make_shared(); - auto generator = computeResultLazily(expVariableKeys, std::move(body), - localVocabPtr, !requestLaziness); - + auto generator = + computeResultLazily(expVariableKeys, std::move(body), !requestLaziness); return requestLaziness - ? ProtoResult{std::move(generator), resultSortedOn(), - std::move(localVocabPtr)} + ? ProtoResult{std::move(generator), resultSortedOn()} : ProtoResult{cppcoro::getSingleElement(std::move(generator)), - resultSortedOn(), std::move(*localVocabPtr)}; + resultSortedOn()}; } template @@ -242,10 +239,10 @@ void Service::writeJsonResult(const std::vector& vars, } // ____________________________________________________________________________ -cppcoro::generator Service::computeResultLazily( +Result::Generator Service::computeResultLazily( const std::vector vars, - ad_utility::LazyJsonParser::Generator body, - std::shared_ptr localVocab, bool singleIdTable) { + ad_utility::LazyJsonParser::Generator body, bool singleIdTable) { + LocalVocab localVocab{}; IdTable idTable{getResultWidth(), getExecutionContext()->getAllocator()}; size_t rowIdx = 0; @@ -260,10 +257,15 @@ cppcoro::generator Service::computeResultLazily( } CALL_FIXED_SIZE(getResultWidth(), &Service::writeJsonResult, this, vars, - partJson, &idTable, localVocab.get(), rowIdx); + partJson, &idTable, &localVocab, rowIdx); if (!singleIdTable) { - co_yield idTable; + Result::IdTableVocabPair pair{std::move(idTable), + std::move(localVocab)}; + co_yield pair; + // Move back to reuse buffer if not moved out. + idTable = std::move(pair.idTable_); idTable.clear(); + localVocab = LocalVocab{}; rowIdx = 0; } resultExists = true; @@ -294,7 +296,7 @@ cppcoro::generator Service::computeResultLazily( } if (singleIdTable) { - co_yield idTable; + co_yield {std::move(idTable), std::move(localVocab)}; } } diff --git a/src/engine/Service.h b/src/engine/Service.h index d11b0191b..41aa9e5e5 100644 --- a/src/engine/Service.h +++ b/src/engine/Service.h @@ -148,8 +148,7 @@ class Service : public Operation { // Compute the result lazy as IdTable generator. // If the `singleIdTable` flag is set, the result is yielded as one idTable. - cppcoro::generator computeResultLazily( + Result::Generator computeResultLazily( const std::vector vars, - ad_utility::LazyJsonParser::Generator body, - std::shared_ptr localVocab, bool singleIdTable); + ad_utility::LazyJsonParser::Generator body, bool singleIdTable); }; diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index 07ef2a401..0fac7b31f 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -167,10 +167,8 @@ ProtoResult Union::computeResult(bool requestLaziness) { _subtrees[1]->getResult(requestLaziness); if (requestLaziness) { - auto localVocab = std::make_shared(); - auto generator = - computeResultLazily(std::move(subRes1), std::move(subRes2), localVocab); - return {std::move(generator), resultSortedOn(), std::move(localVocab)}; + return {computeResultLazily(std::move(subRes1), std::move(subRes2)), + resultSortedOn()}; } LOG(DEBUG) << "Union subresult computation done." << std::endl; @@ -257,29 +255,29 @@ IdTable Union::transformToCorrectColumnFormat( } // _____________________________________________________________________________ -cppcoro::generator Union::computeResultLazily( +Result::Generator Union::computeResultLazily( std::shared_ptr result1, - std::shared_ptr result2, - std::shared_ptr localVocab) const { + std::shared_ptr result2) const { std::vector permutation = computePermutation(); if (result1->isFullyMaterialized()) { - co_yield transformToCorrectColumnFormat(result1->idTable().clone(), - permutation); + co_yield { + transformToCorrectColumnFormat(result1->idTable().clone(), permutation), + result1->getCopyOfLocalVocab()}; } else { - for (IdTable& idTable : result1->idTables()) { - co_yield transformToCorrectColumnFormat(std::move(idTable), permutation); + for (auto& [idTable, localVocab] : result1->idTables()) { + co_yield {transformToCorrectColumnFormat(std::move(idTable), permutation), + std::move(localVocab)}; } } permutation = computePermutation(); if (result2->isFullyMaterialized()) { - co_yield transformToCorrectColumnFormat(result2->idTable().clone(), - permutation); + co_yield { + transformToCorrectColumnFormat(result2->idTable().clone(), permutation), + result2->getCopyOfLocalVocab()}; } else { - for (IdTable& idTable : result2->idTables()) { - co_yield transformToCorrectColumnFormat(std::move(idTable), permutation); + for (auto& [idTable, localVocab] : result2->idTables()) { + co_yield {transformToCorrectColumnFormat(std::move(idTable), permutation), + std::move(localVocab)}; } } - std::array vocabs{&result1->localVocab(), - &result2->localVocab()}; - *localVocab = LocalVocab::merge(vocabs); } diff --git a/src/engine/Union.h b/src/engine/Union.h index 782c94e46..e71702315 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -81,8 +81,7 @@ class Union : public Operation { // Create a generator that yields the `IdTable` for the left or right child // one after another and apply a potential differing permutation to it. Write // the merged LocalVocab to the given `LocalVocab` object at the end. - cppcoro::generator computeResultLazily( + Result::Generator computeResultLazily( std::shared_ptr result1, - std::shared_ptr result2, - std::shared_ptr localVocab) const; + std::shared_ptr result2) const; }; diff --git a/src/global/IdTriple.h b/src/global/IdTriple.h index 2b4e21978..b349743f9 100644 --- a/src/global/IdTriple.h +++ b/src/global/IdTriple.h @@ -14,15 +14,17 @@ template struct IdTriple { + // A triple has four components: subject, predicate, object, and graph. + static constexpr size_t NumCols = 4; // The three IDs that define the triple. - std::array ids_; + std::array ids_; // Some additional payload of the triple, e.g. which graph it belongs to. std::array payload_; - explicit IdTriple(const std::array& ids) requires(N == 0) + explicit IdTriple(const std::array& ids) requires(N == 0) : ids_(ids), payload_(){}; - explicit IdTriple(const std::array& ids, + explicit IdTriple(const std::array& ids, const std::array& payload) requires(N != 0) : ids_(ids), payload_(payload){}; @@ -34,10 +36,11 @@ struct IdTriple { return os; } - // TODO: default once we drop clang16 with libc++16 + // TODO: use `= default` once we drop Clang 16 with `libc++16`. std::strong_ordering operator<=>(const IdTriple& other) const { - return std::tie(ids_[0], ids_[1], ids_[2]) <=> - std::tie(other.ids_[0], other.ids_[1], other.ids_[2]); + static_assert(NumCols == 4); + return std::tie(ids_[0], ids_[1], ids_[2], ids_[3]) <=> + std::tie(other.ids_[0], other.ids_[1], other.ids_[2], other.ids_[3]); } bool operator==(const IdTriple& other) const = default; @@ -49,8 +52,8 @@ struct IdTriple { // Permutes the ID of this triple according to the given permutation given by // its keyOrder. IdTriple permute(const std::array& keyOrder) const { - std::array newIds{ids_[keyOrder[0]], ids_[keyOrder[1]], - ids_[keyOrder[2]]}; + std::array newIds{ids_[keyOrder[0]], ids_[keyOrder[1]], + ids_[keyOrder[2]], ids_[3]}; if constexpr (N == 0) { return IdTriple(newIds); } else { @@ -60,6 +63,7 @@ struct IdTriple { CompressedBlockMetadata::PermutedTriple toPermutedTriple() const requires(N == 0) { - return {ids_[0], ids_[1], ids_[2]}; + static_assert(NumCols == 4); + return {ids_[0], ids_[1], ids_[2], ids_[3]}; } }; diff --git a/src/index/CompressedRelation.cpp b/src/index/CompressedRelation.cpp index df0daa30e..d5c4d5f84 100644 --- a/src/index/CompressedRelation.cpp +++ b/src/index/CompressedRelation.cpp @@ -816,12 +816,7 @@ CompressedRelationWriter::compressAndWriteColumn(std::span column) { // in the block metadata. static std::pair>> getGraphInfo( const std::shared_ptr& block) { - // Early bailout if the graph column doesn't exist (should only happen in - // unit tests). - if (block->numColumns() <= ADDITIONAL_COLUMN_GRAPH_ID) { - return {false, std::nullopt}; - } - + AD_CORRECTNESS_CHECK(block->numColumns() > ADDITIONAL_COLUMN_GRAPH_ID); // Return true iff the block contains duplicates when only considering the // actual triple of S, P, and O. auto hasDuplicates = [&block]() { @@ -874,18 +869,13 @@ void CompressedRelationWriter::compressAndWriteBlock( AD_CORRECTNESS_CHECK(lastCol0Id == last[0]); auto [hasDuplicates, graphInfo] = getGraphInfo(block); - // The blocks are written in parallel and possibly out of order. We thus - // can't set the proper block indices here. The proper block indices are set - // in the `getFinishedBlocks` function. - static constexpr size_t blockIndexNotYetSet = 111333555; - blockBuffer_.wlock()->push_back( - CompressedBlockMetadata{std::move(offsets), - numRows, - {first[0], first[1], first[2]}, - {last[0], last[1], last[2]}, - std::move(graphInfo), - hasDuplicates, - blockIndexNotYetSet}); + blockBuffer_.wlock()->emplace_back(CompressedBlockMetadataNoBlockIndex{ + std::move(offsets), + numRows, + {first[0], first[1], first[2], first[3]}, + {last[0], last[1], last[2], last[3]}, + std::move(graphInfo), + hasDuplicates}); if (invokeCallback && smallBlocksCallback_) { std::invoke(smallBlocksCallback_, std::move(block)); } @@ -916,6 +906,10 @@ CompressedRelationReader::getRelevantBlocks( setKey(&PermutedTriple::col1Id_, &ScanSpecification::col1Id); setKey(&PermutedTriple::col2Id_, &ScanSpecification::col2Id); + // We currently don't filter by the graph ID here. + key.firstTriple_.graphId_ = Id::min(); + key.lastTriple_.graphId_ = Id::max(); + // This comparator only returns true if a block stands completely before // another block without any overlap. In other words, the last triple of `a` // must be smaller than the first triple of `b` to return true. @@ -947,14 +941,14 @@ auto CompressedRelationReader::getFirstAndLastTriple( // Note: the following call only returns the part of the block that // actually matches the col0 and col1. return readPossiblyIncompleteBlock(scanSpec, block, std::nullopt, - {{0, 1, 2}}); + {{0, 1, 2, ADDITIONAL_COLUMN_GRAPH_ID}}); }; auto rowToTriple = [&](const auto& row) -> CompressedBlockMetadata::PermutedTriple { AD_CORRECTNESS_CHECK(!scanSpec.col0Id().has_value() || row[0] == scanSpec.col0Id().value()); - return {row[0], row[1], row[2]}; + return {row[0], row[1], row[2], row[ADDITIONAL_COLUMN_GRAPH_ID]}; }; auto firstBlock = scanBlock(relevantBlocks.front()); diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h index d980541fa..5b582cf54 100644 --- a/src/index/CompressedRelation.h +++ b/src/index/CompressedRelation.h @@ -49,7 +49,7 @@ struct DecompressedBlockSizeGetter { using CompressedBlock = std::vector>; // The metadata of a compressed block of ID triples in an index permutation. -struct CompressedBlockMetadata { +struct CompressedBlockMetadataNoBlockIndex { // Since we have column-based indices, the two columns of each block are // stored separately (but adjacently). struct OffsetAndCompressedSize { @@ -79,13 +79,14 @@ struct CompressedBlockMetadata { Id col0Id_; Id col1Id_; Id col2Id_; + Id graphId_; auto operator<=>(const PermutedTriple&) const = default; // Formatted output for debugging. friend std::ostream& operator<<(std::ostream& str, const PermutedTriple& trip) { str << "Triple: " << trip.col0Id_ << ' ' << trip.col1Id_ << ' ' - << trip.col2Id_ << std::endl; + << trip.col2Id_ << ' ' << trip.graphId_ << std::endl; return str; } @@ -103,6 +104,12 @@ struct CompressedBlockMetadata { // blocks. bool containsDuplicatesWithDifferentGraphs_; + // Two of these are equal if all members are equal. + bool operator==(const CompressedBlockMetadataNoBlockIndex&) const = default; +}; + +// The same as the above struct, but this block additionally knows its index. +struct CompressedBlockMetadata : CompressedBlockMetadataNoBlockIndex { // The index of this block in the permutation. This is required to find // the corresponding block from the `LocatedTriples` when only a subset of // blocks is being used. @@ -182,7 +189,8 @@ AD_SERIALIZE_FUNCTION(CompressedRelationMetadata) { class CompressedRelationWriter { private: ad_utility::Synchronized outfile_; - ad_utility::Synchronized> blockBuffer_; + ad_utility::Synchronized> + blockBuffer_; // If multiple small relations are stored in the same block, keep track of the // first and last `col0Id`. Id currentBlockFirstCol0_ = Id::makeUndefined(); @@ -268,15 +276,19 @@ class CompressedRelationWriter { std::vector getFinishedBlocks() && { finish(); auto blocks = std::move(*(blockBuffer_.wlock())); - std::ranges::sort(blocks, {}, [](const CompressedBlockMetadata& bl) { - return std::tie(bl.firstTriple_.col0Id_, bl.firstTriple_.col1Id_, - bl.firstTriple_.col2Id_); - }); + std::ranges::sort( + blocks, {}, [](const CompressedBlockMetadataNoBlockIndex& bl) { + return std::tie(bl.firstTriple_.col0Id_, bl.firstTriple_.col1Id_, + bl.firstTriple_.col2Id_); + }); + + std::vector result; + result.reserve(blocks.size()); // Write the correct block indices for (size_t i : ad_utility::integerRange(blocks.size())) { - blocks.at(i).blockIndex_ = i; + result.emplace_back(std::move(blocks.at(i)), i); } - return blocks; + return result; } // Compute the multiplicity of given the number of elements and the number of @@ -365,8 +377,7 @@ class CompressedRelationWriter { friend std::pair, std::vector> compressedRelationTestWriteCompressedRelations( - const auto& inputs, std::string filename, - ad_utility::MemorySize blocksize); + auto inputs, std::string filename, ad_utility::MemorySize blocksize); }; using namespace std::string_view_literals; diff --git a/src/index/IndexFormatVersion.h b/src/index/IndexFormatVersion.h index 594ef06ad..d669952cf 100644 --- a/src/index/IndexFormatVersion.h +++ b/src/index/IndexFormatVersion.h @@ -36,5 +36,5 @@ struct IndexFormatVersion { // The actual index version. Change it once the binary format of the index // changes. inline const IndexFormatVersion& indexFormatVersion{ - 1571, DateYearOrDuration{Date{2024, 10, 22}}}; + 1572, DateYearOrDuration{Date{2024, 10, 22}}}; } // namespace qlever diff --git a/src/index/LocatedTriples.cpp b/src/index/LocatedTriples.cpp index 1adfb9ac2..9bcad6838 100644 --- a/src/index/LocatedTriples.cpp +++ b/src/index/LocatedTriples.cpp @@ -57,36 +57,56 @@ NumAddedAndDeleted LocatedTriplesPerBlock::numTriples(size_t blockIndex) const { return {countInserts, blockUpdateTriples.size() - countInserts}; } -// ____________________________________________________________________________ -// Collect the relevant entries of a LocatedTriple into a triple. -template +// Return a `std::tie` of the relevant entries of a row, according to +// `numIndexColumns` and `includeGraphColumn`. For example, if `numIndexColumns` +// is `2` and `includeGraphColumn` is `true`, the function returns +// `std::tie(row[0], row[1], row[2])`. +template requires(numIndexColumns >= 1 && numIndexColumns <= 3) auto tieIdTableRow(auto& row) { return [&row](std::index_sequence) { return std::tie(row[I]...); - }(std::make_index_sequence{}); + }(std::make_index_sequence(includeGraphColumn)>{}); } -// ____________________________________________________________________________ -// Collect the relevant entries of a LocatedTriple into a triple. -template +// Return a `std::tie` of the relevant entries of a located triple, +// according to `numIndexColumns` and `includeGraphColumn`. For example, if +// `numIndexColumns` is `2` and `includeGraphColumn` is `true`, the function +// returns `std::tie(ids_[1], ids_[2], ids_[3])`, where `ids_` is from +// `lt->triple_`. +template requires(numIndexColumns >= 1 && numIndexColumns <= 3) auto tieLocatedTriple(auto& lt) { + constexpr auto indices = []() { + std::array(includeGraphColumn)> + a; + for (size_t i = 0; i < numIndexColumns; ++i) { + a[i] = 3 - numIndexColumns + i; + } + if (includeGraphColumn) { + // The graph column resides at index `3` of the located triple. + a.back() = 3; + } + return a; + }(); auto& ids = lt->triple_.ids_; - return [&ids](std::index_sequence) { - return std::tie(ids[3 - numIndexColumns + I]...); - }(std::make_index_sequence{}); + return [&ids](ad_utility::ValueSequence) { + return std::tie(ids[I]...); + }(ad_utility::toIntegerSequence()); } // ____________________________________________________________________________ -template +template IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex, const IdTable& block) const { // This method should only be called if there are located triples in the // specified block. AD_CONTRACT_CHECK(map_.contains(blockIndex)); - AD_CONTRACT_CHECK(numIndexColumns <= block.numColumns()); + AD_CONTRACT_CHECK(numIndexColumns + static_cast(includeGraphColumn) <= + block.numColumns()); auto numInsertsAndDeletes = numTriples(blockIndex); IdTable result{block.numColumns(), block.getAllocator()}; @@ -95,24 +115,33 @@ IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex, const auto& locatedTriples = map_.at(blockIndex); auto lessThan = [](const auto& lt, const auto& row) { - return tieLocatedTriple(lt) < - tieIdTableRow(row); + return tieLocatedTriple(lt) < + tieIdTableRow(row); }; auto equal = [](const auto& lt, const auto& row) { - return tieLocatedTriple(lt) == - tieIdTableRow(row); + return tieLocatedTriple(lt) == + tieIdTableRow(row); }; auto rowIt = block.begin(); auto locatedTripleIt = locatedTriples.begin(); auto resultIt = result.begin(); - auto writeTripleToResult = [&result, &resultIt](auto& locatedTriple) { - for (size_t i = 0; i < numIndexColumns; i++) { + // Write the given `locatedTriple` to `result` at position `resultIt` and + // advance `resultIt` by one. See the example in the comment of the + // declaration of `mergeTriples` to understand the behavior of this function. + auto writeLocatedTripleToResult = [&result, &resultIt](auto& locatedTriple) { + // Write part from `locatedTriple` that also occurs in the input `block` to + // the result. + static constexpr auto plusOneIfGraph = + static_cast(includeGraphColumn); + for (size_t i = 0; i < numIndexColumns + plusOneIfGraph; i++) { (*resultIt)[i] = locatedTriple.triple_.ids_[3 - numIndexColumns + i]; } - // Write UNDEF to any additional columns. - for (size_t i = numIndexColumns; i < result.numColumns(); i++) { + // If the input `block` has payload columns (which located triples don't + // have), set their values to UNDEF. + for (size_t i = numIndexColumns + plusOneIfGraph; i < result.numColumns(); + i++) { (*resultIt)[i] = ValueId::makeUndefined(); } resultIt++; @@ -122,7 +151,7 @@ IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex, if (lessThan(locatedTripleIt, *rowIt)) { if (locatedTripleIt->shouldTripleExist_) { // Insertion of a non-existent triple. - writeTripleToResult(*locatedTripleIt); + writeLocatedTripleToResult(*locatedTripleIt); } locatedTripleIt++; } else if (equal(locatedTripleIt, *rowIt)) { @@ -142,7 +171,7 @@ IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex, std::ranges::for_each( std::ranges::subrange(locatedTripleIt, locatedTriples.end()) | std::views::filter(&LocatedTriple::shouldTripleExist_), - writeTripleToResult); + writeLocatedTripleToResult); } if (rowIt != block.end()) { AD_CORRECTNESS_CHECK(locatedTripleIt == locatedTriples.end()); @@ -158,14 +187,25 @@ IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex, // ____________________________________________________________________________ IdTable LocatedTriplesPerBlock::mergeTriples(size_t blockIndex, const IdTable& block, - size_t numIndexColumns) const { - if (numIndexColumns == 3) { - return mergeTriplesImpl<3>(blockIndex, block); - } else if (numIndexColumns == 2) { - return mergeTriplesImpl<2>(blockIndex, block); + size_t numIndexColumns, + bool includeGraphColumn) const { + // The following code does nothing more than turn `numIndexColumns` and + // `includeGraphColumn` into template parameters of `mergeTriplesImpl`. + auto mergeTriplesImplHelper = [numIndexColumns, blockIndex, &block, + this]() { + if (numIndexColumns == 3) { + return mergeTriplesImpl<3, hasGraphColumn>(blockIndex, block); + } else if (numIndexColumns == 2) { + return mergeTriplesImpl<2, hasGraphColumn>(blockIndex, block); + } else { + AD_CORRECTNESS_CHECK(numIndexColumns == 1); + return mergeTriplesImpl<1, hasGraphColumn>(blockIndex, block); + } + }; + if (includeGraphColumn) { + return mergeTriplesImplHelper.template operator()(); } else { - AD_CORRECTNESS_CHECK(numIndexColumns == 1); - return mergeTriplesImpl<1>(blockIndex, block); + return mergeTriplesImplHelper.template operator()(); } } diff --git a/src/index/LocatedTriples.h b/src/index/LocatedTriples.h index a254bd87b..c9d82d674 100644 --- a/src/index/LocatedTriples.h +++ b/src/index/LocatedTriples.h @@ -20,15 +20,15 @@ struct NumAddedAndDeleted { bool operator<=>(const NumAddedAndDeleted&) const = default; }; -// A triple and its block in a particular permutation. -// For a detailed definition of all border cases, see the definition at -// the end of this file. +// A triple and its block in a particular permutation. For a detailed definition +// of all border cases, see the definition at the end of this file. struct LocatedTriple { // The index of the block, according to the definition above. size_t blockIndex_; // The `Id`s of the triple in the order of the permutation. For example, - // for an object pertaining to the OPS permutation: `id1` is the object, - // `id2` is the predicate, and `id3` is the subject. + // for an object pertaining to the OPS permutation: `triple_[0]` is the + // object, `triple_[1]` is the predicate, `triple_[2]` is the subject, + // and `triple_[3]` is the graph. IdTriple<0> triple_; // Flag that is true if the given triple is inserted and false if it @@ -81,8 +81,9 @@ class LocatedTriplesPerBlock { FRIEND_TEST(LocatedTriplesTest, numTriplesInBlock); - // Impl function to `mergeTriples`. - template + // Implementation of the `mergeTriples` function (which has `numIndexColumns` + // as a normal argument, and translates it into a template argument). + template IdTable mergeTriplesImpl(size_t blockIndex, const IdTable& block) const; // Stores the block metadata where the block borders have been adjusted for @@ -104,23 +105,27 @@ class LocatedTriplesPerBlock { // `blockIndex`. bool hasUpdates(size_t blockIndex) const; - // Merge located triples for `blockIndex_` with the given index `block` and - // write to `result`, starting from position `offsetInResult`. Return the - // number of rows written to `result`. + // Merge located triples for `blockIndex_` (there must be at least one, + // otherwise this function must not be called) with the given input `block`. + // Return the result as an `IdTable`, which has the same number of columns as + // `block`. // - // PRECONDITIONS: - // - // 1. `mergeTriples` must always be called with all the index columns in the - // input. So the column indices must be `{0, 1, 2, ...}`. + // `numIndexColumns` is the number of columns in `block`, except the graph + // column and payload if any, that is, a number from `{1, 2, 3}`. + // `includeGraphColumn` specifies whether `block` contains the graph column. // - // 2. It is the responsibility of the caller that there is enough space for - // the result of the merge in `result` starting from `offsetInResult`. + // If `block` has payload columns (which currently our located triples never + // have), the value of the merged located triples for these columns is set to + // UNDEF. // - // 3. The set of located triples for `blockIndex_` must be non-empty. - // Otherwise, there is no need for merging and this method shouldn't be - // called for efficiency reasons. + // For example, assume that `block` is from the POS permutation, that + // `numIndexColumns` is 2 (that is, only OS are present in the block), that + // `includeGraphColumn` is `true` (that is, G is also present in the block), + // and that `block` has block has two additional payload columns X and Y. + // Then the result has five columns (like the input `block`), and each merged + // located triple will have values for OSG and UNDEF for X and Y. IdTable mergeTriples(size_t blockIndex, const IdTable& block, - size_t numIndexColumns) const; + size_t numIndexColumns, bool includeGraphColumn) const; // Add `locatedTriples` to the `LocatedTriplesPerBlock`. // Return handles to where they were added (`LocatedTriples` is a sorted set, diff --git a/src/util/ConstexprUtils.h b/src/util/ConstexprUtils.h index 90b40e6ab..12ece671f 100644 --- a/src/util/ConstexprUtils.h +++ b/src/util/ConstexprUtils.h @@ -159,7 +159,6 @@ template auto toIntegerSequence() { return detail::toIntegerSequenceHelper( std::make_index_sequence{}); - // return typename detail::ToIntegerSequenceImpl::type{}; } // Map a single integer `value` that is in the range `[0, ..., (maxValue + 1) ^ diff --git a/test/CompressedRelationsTest.cpp b/test/CompressedRelationsTest.cpp index f920d5193..9233cbd1b 100644 --- a/test/CompressedRelationsTest.cpp +++ b/test/CompressedRelationsTest.cpp @@ -23,6 +23,10 @@ Id V(int64_t index) { return Id::makeFromVocabIndex(VocabIndex::make(index)); } +// A default graph IRI that is used in test cases where we don't care about the +// graph. +const Id g = V(1234059); + // A representation of a relation, consisting of the constant `col0_` element // as well as the 2D-vector for the other two columns. `col1And2_` must be // sorted lexicographically. @@ -85,8 +89,7 @@ void checkThatTablesAreEqual(const auto& expected, const IdTable& actual, std::pair, std::vector> compressedRelationTestWriteCompressedRelations( - const auto& inputs, std::string filename, - ad_utility::MemorySize blocksize) { + auto inputs, std::string filename, ad_utility::MemorySize blocksize) { // First check the invariants of the `inputs`. They must be sorted by the // `col0_` and for each of the `inputs` the `col1And2_` must also be sorted. AD_CONTRACT_CHECK(std::ranges::is_sorted( @@ -100,6 +103,17 @@ compressedRelationTestWriteCompressedRelations( // First create the on-disk permutation. size_t numColumns = getNumColumns(inputs) + 1; + // If the input has no graph info, add a dummy graph value to all inputs, + // such that the assertions work. + if (numColumns == 3) { + ++numColumns; + for (auto& input : inputs) { + for (auto& row : input.col1And2_) { + row.push_back(103496581); + } + } + } + AD_CORRECTNESS_CHECK(numColumns >= 4); CompressedRelationWriter writer{numColumns, ad_utility::File{filename, "w"}, blocksize}; vector metaData; @@ -309,9 +323,11 @@ TEST(CompressedRelationWriter, getFirstAndLastTriple) { using namespace ::testing; // Write some triples, and prepare an index std::vector inputs; + // A dummy graph ID. + int g2 = 120349; for (int i = 1; i < 200; ++i) { - inputs.push_back( - RelationInput{i, {{i - 1, i + 1}, {i - 1, i + 2}, {i + 1, i - 1}}}); + inputs.push_back(RelationInput{ + i, {{i - 1, i + 1, g2}, {i - 1, i + 2, g2}, {i + 1, i - 1, g2}}}); } auto filename = "getFirstAndLastTriple.dat"; auto [blocks, metaData, readerPtr] = @@ -486,17 +502,17 @@ TEST(CompressedRelationMetadata, GettersAndSetters) { TEST(CompressedRelationReader, getBlocksForJoinWithColumn) { CompressedBlockMetadata block1{ - {}, 0, {V(16), V(0), V(0)}, {V(38), V(4), V(12)}, {}, false, 0}; + {{}, 0, {V(16), V(0), V(0), g}, {V(38), V(4), V(12), g}, {}, false}, 0}; CompressedBlockMetadata block2{ - {}, 0, {V(42), V(3), V(0)}, {V(42), V(4), V(12)}, {}, false, 1}; + {{}, 0, {V(42), V(3), V(0), g}, {V(42), V(4), V(12), g}, {}, false}, 1}; CompressedBlockMetadata block3{ - {}, 0, {V(42), V(4), V(13)}, {V(42), V(6), V(9)}, {}, false, 2}; + {{}, 0, {V(42), V(4), V(13), g}, {V(42), V(6), V(9), g}, {}, false}, 2}; // We are only interested in blocks with a col0 of `42`. CompressedRelationMetadata relation; relation.col0Id_ = V(42); CompressedRelationReader::ScanSpecAndBlocksAndBounds::FirstAndLastTriple - firstAndLastTriple{{V(42), V(3), V(0)}, {V(42), V(6), V(9)}}; + firstAndLastTriple{{V(42), V(3), V(0), g}, {V(42), V(6), V(9), g}}; std::vector blocks{block1, block2, block3}; CompressedRelationReader::ScanSpecAndBlocksAndBounds metadataAndBlocks{ @@ -527,28 +543,29 @@ TEST(CompressedRelationReader, getBlocksForJoinWithColumn) { metadataAndBlocks.scanSpec_.setCol1Id(V(4)); metadataAndBlocks.firstAndLastTriple_ = CompressedRelationReader::ScanSpecAndBlocksAndBounds::FirstAndLastTriple{ - {V(42), V(4), V(11)}, {V(42), V(4), V(738)}}; + {V(42), V(4), V(11), g}, {V(42), V(4), V(738), g}}; test({V(11), V(27), V(30)}, {block2, block3}); test({V(12)}, {block2}); test({V(13)}, {block3}); } TEST(CompressedRelationReader, getBlocksForJoin) { CompressedBlockMetadata block1{ - {}, 0, {V(16), V(0), V(0)}, {V(38), V(4), V(12)}, {}, false, 0}; + {{}, 0, {V(16), V(0), V(0), g}, {V(38), V(4), V(12), g}, {}, false}, 0}; CompressedBlockMetadata block2{ - {}, 0, {V(42), V(3), V(0)}, {V(42), V(4), V(12)}, {}, false, 1}; + {{}, 0, {V(42), V(3), V(0), g}, {V(42), V(4), V(12), g}, {}, false}, 1}; CompressedBlockMetadata block3{ - {}, 0, {V(42), V(5), V(13)}, {V(42), V(8), V(9)}, {}, false, 2}; + {{}, 0, {V(42), V(5), V(13), g}, {V(42), V(8), V(9), g}, {}, false}, 2}; CompressedBlockMetadata block4{ - {}, 0, {V(42), V(8), V(16)}, {V(42), V(20), V(9)}, {}, false, 3}; + {{}, 0, {V(42), V(8), V(16), g}, {V(42), V(20), V(9), g}, {}, false}, 3}; CompressedBlockMetadata block5{ - {}, 0, {V(42), V(20), V(16)}, {V(42), V(20), V(63)}, {}, false, 4}; + {{}, 0, {V(42), V(20), V(16), g}, {V(42), V(20), V(63), g}, {}, false}, + 4}; // We are only interested in blocks with a col0 of `42`. CompressedRelationMetadata relation; relation.col0Id_ = V(42); CompressedRelationReader::ScanSpecAndBlocksAndBounds::FirstAndLastTriple - firstAndLastTriple{{V(42), V(3), V(0)}, {V(42), V(20), V(63)}}; + firstAndLastTriple{{V(42), V(3), V(0), g}, {V(42), V(20), V(63), g}}; std::vector blocks{block1, block2, block3, block4, block5}; CompressedRelationReader::ScanSpecAndBlocksAndBounds metadataAndBlocks{ @@ -556,17 +573,18 @@ TEST(CompressedRelationReader, getBlocksForJoin) { firstAndLastTriple}; CompressedBlockMetadata blockB1{ - {}, 0, {V(16), V(0), V(0)}, {V(38), V(4), V(12)}, {}, false, 0}; + {{}, 0, {V(16), V(0), V(0), g}, {V(38), V(4), V(12), g}, {}, false}, 0}; CompressedBlockMetadata blockB2{ - {}, 0, {V(47), V(3), V(0)}, {V(47), V(6), V(12)}, {}, false, 1}; + {{}, 0, {V(47), V(3), V(0), g}, {V(47), V(6), V(12), g}, {}, false}, 1}; CompressedBlockMetadata blockB3{ - {}, 0, {V(47), V(7), V(13)}, {V(47), V(9), V(9)}, {}, false, 2}; + {{}, 0, {V(47), V(7), V(13), g}, {V(47), V(9), V(9), g}, {}, false}, 2}; CompressedBlockMetadata blockB4{ - {}, 0, {V(47), V(38), V(7)}, {V(47), V(38), V(8)}, {}, false, 3}; + {{}, 0, {V(47), V(38), V(7), g}, {V(47), V(38), V(8), g}, {}, false}, 3}; CompressedBlockMetadata blockB5{ - {}, 0, {V(47), V(38), V(9)}, {V(47), V(38), V(12)}, {}, false, 4}; + {{}, 0, {V(47), V(38), V(9), g}, {V(47), V(38), V(12), g}, {}, false}, 4}; CompressedBlockMetadata blockB6{ - {}, 0, {V(47), V(38), V(13)}, {V(47), V(38), V(15)}, {}, false, 5}; + {{}, 0, {V(47), V(38), V(13), g}, {V(47), V(38), V(15), g}, {}, false}, + 5}; // We are only interested in blocks with a col0 of `42`. CompressedRelationMetadata relationB; @@ -574,7 +592,7 @@ TEST(CompressedRelationReader, getBlocksForJoin) { std::vector blocksB{blockB1, blockB2, blockB3, blockB4, blockB5, blockB6}; CompressedRelationReader::ScanSpecAndBlocksAndBounds::FirstAndLastTriple - firstAndLastTripleB{{V(47), V(3), V(0)}, {V(47), V(38), V(15)}}; + firstAndLastTripleB{{V(47), V(3), V(0), g}, {V(47), V(38), V(15), g}}; CompressedRelationReader::ScanSpecAndBlocksAndBounds metadataAndBlocksB{ {{V(47), std::nullopt, std::nullopt}, blocksB}, firstAndLastTripleB}; @@ -612,16 +630,16 @@ TEST(CompressedRelationReader, getBlocksForJoin) { using FL = CompressedRelationReader::ScanSpecAndBlocksAndBounds::FirstAndLastTriple; metadataAndBlocks.firstAndLastTriple_ = - FL{{V(42), V(20), V(5)}, {V(42), V(20), V(63)}}; + FL{{V(42), V(20), V(5), g}, {V(42), V(20), V(63), g}}; metadataAndBlocksB.scanSpec_.setCol1Id(V(38)); metadataAndBlocksB.firstAndLastTriple_ = - FL{{V(47), V(38), V(5)}, {V(47), V(38), V(15)}}; + FL{{V(47), V(38), V(5), g}, {V(47), V(38), V(15), g}}; test({std::vector{block4}, std::vector{blockB4, blockB5}}); // Fix only the col1Id of the left input. metadataAndBlocks.scanSpec_.setCol1Id(V(4)); metadataAndBlocks.firstAndLastTriple_ = - FL{{V(42), V(4), V(8)}, {V(42), V(4), V(12)}}; + FL{{V(42), V(4), V(8), g}, {V(42), V(4), V(12), g}}; metadataAndBlocksB.scanSpec_.setCol1Id(std::nullopt); metadataAndBlocksB.firstAndLastTriple_ = firstAndLastTripleB; test({std::vector{block2}, std::vector{blockB3}}); @@ -631,21 +649,22 @@ TEST(CompressedRelationReader, getBlocksForJoin) { metadataAndBlocks.firstAndLastTriple_ = firstAndLastTriple; metadataAndBlocksB.scanSpec_.setCol1Id(V(7)); metadataAndBlocksB.firstAndLastTriple_ = - FL{{V(47), V(7), V(13)}, {V(47), V(7), V(58)}}; + FL{{V(47), V(7), V(13), g}, {V(47), V(7), V(58), g}}; test({std::vector{block4, block5}, std::vector{blockB3}}); } TEST(CompressedRelationReader, PermutedTripleToString) { - auto tr = CompressedBlockMetadata::PermutedTriple{V(12), V(13), V(27)}; + auto tr = + CompressedBlockMetadata::PermutedTriple{V(12), V(13), V(27), V(12345)}; std::stringstream str; str << tr; - ASSERT_EQ(str.str(), "Triple: V:12 V:13 V:27\n"); + ASSERT_EQ(str.str(), "Triple: V:12 V:13 V:27 V:12345\n"); } TEST(CompressedRelationReader, filterDuplicatesAndGraphs) { auto table = makeIdTableFromVector({{3}, {4}, {5}}); CompressedBlockMetadata metadata{ - {}, 0, {V(16), V(0), V(0)}, {V(38), V(4), V(12)}, {}, false, 0}; + {{}, 0, {V(16), V(0), V(0), g}, {V(38), V(4), V(12), g}, {}, false}, 0}; using Filter = CompressedRelationReader::FilterDuplicatesAndGraphs; ScanSpecification::Graphs graphs = std::nullopt; Filter f{graphs, 43, false}; @@ -682,7 +701,7 @@ TEST(CompressedRelationReader, filterDuplicatesAndGraphs) { TEST(CompressedRelationReader, makeCanBeSkippedForBlock) { CompressedBlockMetadata metadata{ - {}, 0, {V(16), V(0), V(0)}, {V(38), V(4), V(12)}, {}, false, 0}; + {{}, 0, {V(16), V(0), V(0), g}, {V(38), V(4), V(12), g}, {}, false}, 0}; using Graphs = ScanSpecification::Graphs; Graphs graphs = std::nullopt; diff --git a/test/DeltaTriplesTest.cpp b/test/DeltaTriplesTest.cpp index 49cc5c8c3..e1c39e8bf 100644 --- a/test/DeltaTriplesTest.cpp +++ b/test/DeltaTriplesTest.cpp @@ -92,11 +92,12 @@ class DeltaTriplesTest : public ::testing::Test { const Index::Vocab& vocab, LocalVocab& localVocab, const std::vector& turtles) { auto toID = [&localVocab, &vocab](TurtleTriple triple) { - std::array ids{ + std::array ids{ std::move(triple.subject_).toValueId(vocab, localVocab), std::move(TripleComponent(triple.predicate_)) .toValueId(vocab, localVocab), - std::move(triple.object_).toValueId(vocab, localVocab)}; + std::move(triple.object_).toValueId(vocab, localVocab), + std::move(triple.graphIri_).toValueId(vocab, localVocab)}; return IdTriple<0>(ids); }; return ad_utility::transform( diff --git a/test/ExportQueryExecutionTreesTest.cpp b/test/ExportQueryExecutionTreesTest.cpp index ed8482d66..83f08b272 100644 --- a/test/ExportQueryExecutionTreesTest.cpp +++ b/test/ExportQueryExecutionTreesTest.cpp @@ -226,10 +226,12 @@ static const std::string xmlTrailer = "\n\n"; // Helper function for easier testing of the `IdTable` generator. std::vector convertToVector( - cppcoro::generator generator) { + cppcoro::generator + generator) { std::vector result; - for (const IdTable& idTable : generator) { - result.push_back(idTable.clone()); + for (const ExportQueryExecutionTrees::TableConstRefWithVocab& pair : + generator) { + result.push_back(pair.idTable_.clone()); } return result; } @@ -239,11 +241,11 @@ auto matchesIdTables(const auto&... tables) { return ElementsAre(matchesIdTable(tables)...); } -// Template is only required because inner class is not visible -template -std::vector convertToVector(cppcoro::generator generator) { +std::vector convertToVector( + cppcoro::generator generator) { std::vector result; - for (const auto& [idTable, range] : generator) { + for (const auto& [pair, range] : generator) { + const auto& idTable = pair.idTable_; result.emplace_back(idTable.numColumns(), idTable.getAllocator()); result.back().insertAtEnd(idTable.begin() + *range.begin(), idTable.begin() + *(range.end() - 1) + 1); @@ -1227,13 +1229,13 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); IdTable idTable2 = makeIdTableFromVector({{42}, {1337}}); auto tableGenerator = [](IdTable idTableA, - IdTable idTableB) -> cppcoro::generator { - co_yield idTableA; + IdTable idTableB) -> Result::Generator { + co_yield {std::move(idTableA), LocalVocab{}}; - co_yield idTableB; + co_yield {std::move(idTableB), LocalVocab{}}; }(idTable1.clone(), idTable2.clone()); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getIdTables(result); EXPECT_THAT(convertToVector(std::move(generator)), @@ -1242,12 +1244,13 @@ TEST(ExportQueryExecutionTrees, getIdTablesMirrorsGenerator) { // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - co_yield idTable1; + auto tableGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}, {2}, {3}}), + LocalVocab{}}; + co_yield pair1; }(); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 1, ._offset = 1}, result); @@ -1259,15 +1262,17 @@ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfSingleIdTable) { // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstIsSkipped) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - co_yield idTable1; - - IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); - co_yield idTable2; + auto tableGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}, {2}, {3}}), + LocalVocab{}}; + co_yield pair1; + + Result::IdTableVocabPair pair2{makeIdTableFromVector({{4}, {5}}), + LocalVocab{}}; + co_yield pair2; }(); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = std::nullopt, ._offset = 3}, result); @@ -1280,15 +1285,17 @@ TEST(ExportQueryExecutionTrees, // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenLastIsSkipped) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - co_yield idTable1; - - IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); - co_yield idTable2; + auto tableGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}, {2}, {3}}), + LocalVocab{}}; + co_yield pair1; + + Result::IdTableVocabPair pair2{makeIdTableFromVector({{4}, {5}}), + LocalVocab{}}; + co_yield pair2; }(); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3}, result); @@ -1301,15 +1308,17 @@ TEST(ExportQueryExecutionTrees, // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstAndSecondArePartial) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - co_yield idTable1; - - IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); - co_yield idTable2; + auto tableGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}, {2}, {3}}), + LocalVocab{}}; + co_yield pair1; + + Result::IdTableVocabPair pair2{makeIdTableFromVector({{4}, {5}}), + LocalVocab{}}; + co_yield pair2; }(); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 3, ._offset = 1}, result); @@ -1323,18 +1332,21 @@ TEST(ExportQueryExecutionTrees, // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureCorrectSlicingOfIdTablesWhenFirstAndLastArePartial) { - auto tableGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}, {2}, {3}}); - co_yield idTable1; - - IdTable idTable2 = makeIdTableFromVector({{4}, {5}}); - co_yield idTable2; - - IdTable idTable3 = makeIdTableFromVector({{6}, {7}, {8}, {9}}); - co_yield idTable3; + auto tableGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}, {2}, {3}}), + LocalVocab{}}; + co_yield pair1; + + Result::IdTableVocabPair pair2{makeIdTableFromVector({{4}, {5}}), + LocalVocab{}}; + co_yield pair2; + + Result::IdTableVocabPair pair3{makeIdTableFromVector({{6}, {7}, {8}, {9}}), + LocalVocab{}}; + co_yield pair3; }(); - Result result{std::move(tableGenerator), {}, LocalVocab{}}; + Result result{std::move(tableGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 5, ._offset = 2}, result); @@ -1350,28 +1362,29 @@ TEST(ExportQueryExecutionTrees, // _____________________________________________________________________________ TEST(ExportQueryExecutionTrees, ensureGeneratorIsNotConsumedWhenNotRequired) { { - auto throwingGenerator = []() -> cppcoro::generator { + auto throwingGenerator = []() -> Result::Generator { ADD_FAILURE() << "Generator was started" << std::endl; throw std::runtime_error("Generator was started"); co_return; }(); - Result result{std::move(throwingGenerator), {}, LocalVocab{}}; + Result result{std::move(throwingGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 0, ._offset = 0}, result); EXPECT_NO_THROW(convertToVector(std::move(generator))); } { - auto throwAfterYieldGenerator = []() -> cppcoro::generator { - IdTable idTable1 = makeIdTableFromVector({{1}}); - co_yield idTable1; + auto throwAfterYieldGenerator = []() -> Result::Generator { + Result::IdTableVocabPair pair1{makeIdTableFromVector({{1}}), + LocalVocab{}}; + co_yield pair1; ADD_FAILURE() << "Generator was resumed" << std::endl; throw std::runtime_error("Generator was resumed"); }(); - Result result{std::move(throwAfterYieldGenerator), {}, LocalVocab{}}; + Result result{std::move(throwAfterYieldGenerator), {}}; auto generator = ExportQueryExecutionTrees::getRowIndices( LimitOffsetClause{._limit = 1, ._offset = 0}, result); IdTable referenceTable1 = makeIdTableFromVector({{1}}); diff --git a/test/FilterTest.cpp b/test/FilterTest.cpp index 36915261b..e68350354 100644 --- a/test/FilterTest.cpp +++ b/test/FilterTest.cpp @@ -18,10 +18,14 @@ namespace { ValueId asBool(bool value) { return Id::makeFromBool(value); } // Convert a generator to a vector for easier comparison in assertions -std::vector toVector(cppcoro::generator generator) { +std::vector toVector(Result::Generator generator) { std::vector result; - for (auto& table : generator) { - result.push_back(std::move(table)); + for (auto& pair : generator) { + // IMPORTANT: The `LocalVocab` contained in the pair will be destroyed at + // the end of the iteration. The underlying assumption is that the + // `LocalVocab` will be empty and the `IdTable` won't contain any dangling + // references. + result.push_back(std::move(pair.idTable_)); } return result; } diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index bbfc0ae6e..bb307fecd 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -1952,10 +1952,10 @@ class GroupByLazyFixture : public ::testing::TestWithParam { ASSERT_NE(result.isFullyMaterialized(), lazyResult); if (lazyResult) { size_t counter = 0; - for (const IdTable& idTable : result.idTables()) { + for (const Result::IdTableVocabPair& pair : result.idTables()) { ASSERT_LT(counter, idTables.size()) << "Too many idTables yielded. Expected: " << idTables.size(); - EXPECT_EQ(idTables.at(counter), idTable); + EXPECT_EQ(idTables.at(counter), pair.idTable_); ++counter; } EXPECT_EQ(counter, idTables.size()) @@ -2130,10 +2130,9 @@ TEST_P(GroupByLazyFixture, nestedAggregateFunctionsWork) { auto result = groupBy.computeResultOnlyForTesting(GetParam()); // Acquire the local vocab index for a given string representation if present. - auto makeEntry = [&result](std::string string) { - return result.localVocab().getIndexOrNullopt( - sparqlExpression::detail::LiteralOrIri{ - L::fromStringRepresentation(std::move(string))}); + auto makeEntry = [](std::string string, const LocalVocab& localVocab) { + return localVocab.getIndexOrNullopt(sparqlExpression::detail::LiteralOrIri{ + L::fromStringRepresentation(std::move(string))}); }; auto entryToId = [](std::optional entry) { @@ -2144,30 +2143,30 @@ TEST_P(GroupByLazyFixture, nestedAggregateFunctionsWork) { auto i = IntId; if (GetParam()) { - EXPECT_EQ(result.localVocab().size(), 0); - auto& generator = result.idTables(); auto iterator = generator.begin(); ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(result.localVocab().size(), 2); - auto entry1 = makeEntry("\"1---\""); - auto entry2 = makeEntry("\"6---\""); - EXPECT_EQ(*iterator, makeIdTableFromVector({{i(0), entryToId(entry1)}, - {i(1), entryToId(entry2)}})); + EXPECT_EQ(iterator->localVocab_.size(), 2); + auto entry1 = makeEntry("\"1---\"", iterator->localVocab_); + auto entry2 = makeEntry("\"6---\"", iterator->localVocab_); + EXPECT_EQ(iterator->idTable_, + makeIdTableFromVector( + {{i(0), entryToId(entry1)}, {i(1), entryToId(entry2)}})); ++iterator; ASSERT_NE(iterator, generator.end()); - EXPECT_EQ(result.localVocab().size(), 3); - auto entry3 = makeEntry("\"8---\""); - EXPECT_EQ(*iterator, makeIdTableFromVector({{i(2), entryToId(entry3)}})); + EXPECT_EQ(iterator->localVocab_.size(), 1); + auto entry3 = makeEntry("\"8---\"", iterator->localVocab_); + EXPECT_EQ(iterator->idTable_, + makeIdTableFromVector({{i(2), entryToId(entry3)}})); EXPECT_EQ(++iterator, generator.end()); } else { EXPECT_EQ(result.localVocab().size(), 3); - auto entry1 = makeEntry("\"1---\""); - auto entry2 = makeEntry("\"6---\""); - auto entry3 = makeEntry("\"8---\""); + auto entry1 = makeEntry("\"1---\"", result.localVocab()); + auto entry2 = makeEntry("\"6---\"", result.localVocab()); + auto entry3 = makeEntry("\"8---\"", result.localVocab()); ASSERT_TRUE(entry1.has_value()); ASSERT_TRUE(entry2.has_value()); diff --git a/test/IdTripleTest.cpp b/test/IdTripleTest.cpp index b5de53fbd..56c776bcc 100644 --- a/test/IdTripleTest.cpp +++ b/test/IdTripleTest.cpp @@ -13,8 +13,8 @@ using namespace ad_utility::testing; TEST(IdTripleTest, constructors) { - const std::array ids{Id::makeFromInt(42), VocabId(10), - Id::makeFromBool(false)}; + const std::array ids{Id::makeFromInt(42), VocabId(10), + Id::makeFromBool(false), VocabId(123)}; const std::array payload{Id::makeFromDouble(3.14), Id::makeFromBool(true)}; @@ -34,29 +34,31 @@ TEST(IdTripleTest, constructors) { } TEST(IdTripleTest, permute) { - std::array ids{VocabId(0), VocabId(1), VocabId(2)}; + auto V = VocabId; + std::array ids{V(0), V(1), V(2), V(3)}; // Without a payload { IdTriple<0> idTriple{ids}; EXPECT_THAT(idTriple.permute({1, 0, 2}), - testing::Eq(IdTriple{ - std::array{VocabId(1), VocabId(0), VocabId(2)}})); + testing::Eq(IdTriple{std::array{ + VocabId(1), VocabId(0), VocabId(2), VocabId(3)}})); } // With a payload { IdTriple<2> idTriple(ids, {IntId(10), IntId(5)}); EXPECT_THAT(idTriple.permute({1, 0, 2}), - testing::Eq(IdTriple<2>({VocabId(1), VocabId(0), VocabId(2)}, - {IntId(10), IntId(5)}))); + testing::Eq(IdTriple<2>( + {VocabId(1), VocabId(0), VocabId(2), VocabId(3)}, + {IntId(10), IntId(5)}))); } } TEST(IdTripleTest, toPermutedTriple) { { - IdTriple<0> idTriple({VocabId(0), VocabId(10), VocabId(5)}); + IdTriple<0> idTriple({VocabId(0), VocabId(10), VocabId(5), VocabId(42)}); EXPECT_THAT(idTriple.toPermutedTriple(), testing::Eq(CompressedBlockMetadata::PermutedTriple{ - VocabId(0), VocabId(10), VocabId(5)})); + VocabId(0), VocabId(10), VocabId(5), VocabId(42)})); } } diff --git a/test/IndexMetaDataTest.cpp b/test/IndexMetaDataTest.cpp index 98c36985d..fa6bc07a8 100644 --- a/test/IndexMetaDataTest.cpp +++ b/test/IndexMetaDataTest.cpp @@ -13,16 +13,18 @@ namespace { auto V = ad_utility::testing::VocabId; -} +// A default/dummy graph used for several tests. +Id g = V(123405); +} // namespace TEST(RelationMetaDataTest, writeReadTest) { - CompressedBlockMetadata rmdB{{{12, 34}, {46, 11}}, - 5, - {V(0), V(2), V(13)}, - {V(3), V(24), V(62)}, - std::vector{V(85)}, - true, - 13}; + CompressedBlockMetadata rmdB{{{{12, 34}, {46, 11}}, + 5, + {V(0), V(2), V(13), g}, + {V(3), V(24), V(62), g}, + std::vector{V(85)}, + true}, + 1039}; CompressedRelationMetadata rmdF{V(1), 3, 2.0, 42.0, 16}; ad_utility::serialization::FileWriteSerializer f("_testtmp.rmd"); @@ -46,19 +48,19 @@ TEST(IndexMetaDataTest, writeReadTest2Mmap) { std::string mmapFilename = imdFilename + ".mmap"; vector bs; // A value for the Graph Id. - bs.push_back(CompressedBlockMetadata{{{12, 34}, {42, 17}}, - 5, - {V(0), V(2), V(13)}, - {V(2), V(24), V(62)}, - std::vector{V(512)}, - true, + bs.push_back(CompressedBlockMetadata{{{{12, 34}, {42, 17}}, + 5, + {V(0), V(2), V(13), g}, + {V(2), V(24), V(62), g}, + std::vector{V(512)}, + true}, 17}); - bs.push_back(CompressedBlockMetadata{{{12, 34}, {16, 12}}, - 5, - {V(0), V(2), V(13)}, - {V(3), V(24), V(62)}, - {}, - false, + bs.push_back(CompressedBlockMetadata{{{{12, 34}, {16, 12}}, + 5, + {V(0), V(2), V(13), g}, + {V(3), V(24), V(62), g}, + {}, + false}, 18}); CompressedRelationMetadata rmdF{V(1), 3, 2.0, 42.0, 16}; CompressedRelationMetadata rmdF2{V(2), 5, 3.0, 43.0, 10}; diff --git a/test/LocatedTriplesTest.cpp b/test/LocatedTriplesTest.cpp index 297c119b9..c4364023f 100644 --- a/test/LocatedTriplesTest.cpp +++ b/test/LocatedTriplesTest.cpp @@ -15,17 +15,24 @@ namespace { auto V = ad_utility::testing::VocabId; +// A default graph used in this test. +int g = 123948; -auto IT = [](const auto& c1, const auto& c2, const auto& c3) { - return IdTriple{std::array{V(c1), V(c2), V(c3)}}; +void addGraphColumn(IdTable& block) { + block.addEmptyColumn(); + std::ranges::fill(block.getColumn(block.numColumns() - 1), V(g)); +} + +auto IT = [](const auto& c1, const auto& c2, const auto& c3, int graph = g) { + return IdTriple{std::array{V(c1), V(c2), V(c3), V(graph)}}; }; -auto PT = [](const auto& c1, const auto& c2, const auto& c3) { - return CompressedBlockMetadata::PermutedTriple{V(c1), V(c2), V(c3)}; +auto PT = [](const auto& c1, const auto& c2, const auto& c3, int graph = g) { + return CompressedBlockMetadata::PermutedTriple{V(c1), V(c2), V(c3), V(graph)}; }; auto CBM = [](const auto firstTriple, const auto lastTriple) { size_t dummyBlockIndex = 0; - return CompressedBlockMetadata{{}, 0, firstTriple, lastTriple, - {}, false, dummyBlockIndex}; + return CompressedBlockMetadata{{{}, 0, firstTriple, lastTriple, {}, false}, + dummyBlockIndex}; }; auto numBlocks = @@ -237,7 +244,14 @@ TEST_F(LocatedTriplesTest, mergeTriples) { {4, 10, 10} // LT 7 }); - auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 3); + auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 3, false); + EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); + + // Run the same test with a constant graph column that is part of the + // result. + addGraphColumn(block); + addGraphColumn(resultExpected); + merged = locatedTriplesPerBlock.mergeTriples(1, block, 3, true); EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); } @@ -272,11 +286,18 @@ TEST_F(LocatedTriplesTest, mergeTriples) { {30, 10} // LT 5 }); - auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 2); + auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 2, false); + EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); + + // Run the same test with a constant graph column that is part of the + // result. + addGraphColumn(block); + addGraphColumn(resultExpected); + merged = locatedTriplesPerBlock.mergeTriples(1, block, 2, true); EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); } - // Merge the `LocatesTriples` into a block with 1 index columns. + // Merge the `LocatesTriples` into a block with 1 index column. { IdTable block = makeIdTableFromVector({ {10}, // Row 0 @@ -299,7 +320,14 @@ TEST_F(LocatedTriplesTest, mergeTriples) { {30} // orig. Row 5 }); - auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 1); + auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 1, false); + EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); + + // Run the same test with a constant graph column that is part of the + // result. + addGraphColumn(block); + addGraphColumn(resultExpected); + merged = locatedTriplesPerBlock.mergeTriples(1, block, 1, true); EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); } @@ -310,7 +338,14 @@ TEST_F(LocatedTriplesTest, mergeTriples) { makeLocatedTriplesPerBlock({LT{1, IT(1, 3, 5), true}}); IdTable resultExpected = block.clone(); - auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 3); + auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 3, false); + EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); + + // Run the same test with a constant graph column that is part of the + // result. + addGraphColumn(block); + addGraphColumn(resultExpected); + merged = locatedTriplesPerBlock.mergeTriples(1, block, 3, true); EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); } @@ -322,7 +357,14 @@ TEST_F(LocatedTriplesTest, mergeTriples) { LT{1, IT(1, 3, 5), false}}); IdTable resultExpected = makeIdTableFromVector({{1, 2, 3}, {1, 7, 9}}); - auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 3); + auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 3, false); + EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); + + // Run the same test with a constant graph column that is part of the + // result. + addGraphColumn(block); + addGraphColumn(resultExpected); + merged = locatedTriplesPerBlock.mergeTriples(1, block, 3, true); EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); } @@ -341,7 +383,17 @@ TEST_F(LocatedTriplesTest, mergeTriples) { {1, 3, 6, UndefId(), UndefId()}, {1, 7, 9, IntId(13), IntId(14)}}); - auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 3); + auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 3, false); + EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); + + // Run the same test with a constant graph column that is part of the + // result. + addGraphColumn(block); + addGraphColumn(resultExpected); + block.setColumnSubset(std::array{0u, 1u, 2u, 5u, 3u, 4u}); + resultExpected.setColumnSubset( + std::array{0u, 1u, 2u, 5u, 3u, 4u}); + merged = locatedTriplesPerBlock.mergeTriples(1, block, 3, true); EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); } @@ -366,7 +418,7 @@ TEST_F(LocatedTriplesTest, mergeTriples) { LT{1, IT(4, 10, 10), true}, // Insert after row 5 }); - EXPECT_THROW(locatedTriplesPerBlock.mergeTriples(2, block, 3), + EXPECT_THROW(locatedTriplesPerBlock.mergeTriples(2, block, 3, false), ad_utility::Exception); } @@ -392,7 +444,7 @@ TEST_F(LocatedTriplesTest, mergeTriples) { LT{1, IT(3, 30, 30), false}, // Delete row 5 LT{1, IT(4, 10, 10), true}, // Insert after row 5 }); - EXPECT_THROW(locatedTriplesPerBlock.mergeTriples(1, block, 3), + EXPECT_THROW(locatedTriplesPerBlock.mergeTriples(1, block, 3, false), ad_utility::Exception); } @@ -417,7 +469,7 @@ TEST_F(LocatedTriplesTest, mergeTriples) { LT{1, IT(3, 30, 30), false}, // Delete row 5 LT{1, IT(4, 10, 10), true}, // Insert after row 5 }); - EXPECT_THROW(locatedTriplesPerBlock.mergeTriples(1, block, 4), + EXPECT_THROW(locatedTriplesPerBlock.mergeTriples(1, block, 4, false), ad_utility::Exception); } @@ -435,11 +487,119 @@ TEST_F(LocatedTriplesTest, mergeTriples) { LT{1, IT(1, 5, 10), true}, // Insert before row 0 LT{1, IT(2, 11, 10), true}, // Insert before row 1 }); - EXPECT_THROW(locatedTriplesPerBlock.mergeTriples(1, block, 0), + EXPECT_THROW(locatedTriplesPerBlock.mergeTriples(1, block, 0, false), ad_utility::Exception); } } +// Test the `mergeTriples` functions with inputs that contain different graphs. +TEST_F(LocatedTriplesTest, mergeTriplesWithGraph) { + using LT = LocatedTriple; + std::vector emptyMetadata; + + // Merge the `LocatesTriples` into a block with 3 index columns. + { + IdTable block = makeIdTableFromVector({ + {1, 10, 10, 0}, // Row 0 + {2, 15, 20, 0}, // Row 1 + {2, 15, 20, 1}, // Row 2 + {2, 15, 20, 2}, // Row 3 + {2, 15, 30, 1}, // Row 4 + {2, 15, 30, 3}, // Row 5 + {3, 30, 30, 0} // Row 6 + }); + auto locatedTriplesPerBlock = makeLocatedTriplesPerBlock({ + LT{1, IT(1, 5, 10, 3), true}, // Insert before row 0 + LT{1, IT(2, 15, 20, 1), false}, // Delete row 2 + LT{1, IT(2, 15, 20, 3), false}, // Delete non-existent row + LT{1, IT(2, 15, 30, 2), true}, // Insert between 4 and 5 + + }); + IdTable resultExpected = makeIdTableFromVector({ + {1, 5, 10, 3}, // LT 0 + {1, 10, 10, 0}, + {2, 15, 20, 0}, // Row 1 + {2, 15, 20, 2}, // Row 3 + {2, 15, 30, 1}, // Row 4 + {2, 15, 30, 2}, // LT 3 + {2, 15, 30, 3}, // Row 5 + {3, 30, 30, 0} // Row 6 + }); + + auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 3, true); + EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); + } + + // Merge the `LocatesTriples` into a block with 2 index columns. This may + // happen if all triples in a block have the same value for the first column. + { + IdTable block = makeIdTableFromVector({ + {10, 10, 1}, // Row 0 + {15, 20, 2}, // Row 1 + {15, 30, 1}, // Row 2 + {20, 10, 2}, // Row 3 + }); + auto locatedTriplesPerBlock = makeLocatedTriplesPerBlock({ + LT{1, IT(1, 10, 10, 1), false}, // Delete row 0 + LT{1, IT(1, 13, 20, 3), true}, // Insert before row 1 + LT{1, IT(1, 15, 20, 1), true}, // Insert before row 1 + LT{1, IT(1, 15, 20, 2), true}, // Insert already existing row + LT{1, IT(1, 20, 10, 1), false}, // Delete non-existent row + }); + + IdTable resultExpected = makeIdTableFromVector({{13, 20, 3}, // LT 1 + {15, 20, 1}, // LT 2 + {15, 20, 2}, // Row 1 + {15, 30, 1}, // Row 2 + {20, 10, 2}}); // Row 3 + + auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 2, true); + EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); + } + + // Merge the `LocatesTriples` into a block with 1 index column. + { + IdTable block = makeIdTableFromVector({ + {10, 0}, // Row 0 + {10, 1}, // Row 1 + {12, 1}, // Row 2 + {20, 0}, // Row 3 + }); + auto locatedTriplesPerBlock = makeLocatedTriplesPerBlock({ + LT{1, IT(1, 1, 10, 1), false}, // Delete row 1 + LT{1, IT(1, 1, 12, 0), true}, // Insert before row 2 + }); + IdTable resultExpected = makeIdTableFromVector({ + {10, 0}, // Row 0 + {12, 0}, // LT 1 + {12, 1}, // Row 2 + {20, 0} // Row 3 + }); + + auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 1, true); + EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); + } + + // Merging if the block has additional columns. + { + auto IntId = ad_utility::testing::IntId; + auto UndefId = ad_utility::testing::UndefId; + + IdTable block = makeIdTableFromVector({{1, 2, 3, 0, IntId(10), IntId(11)}, + {1, 2, 3, 2, IntId(12), IntId(11)}, + {1, 7, 9, 1, IntId(13), IntId(14)}}); + auto locatedTriplesPerBlock = makeLocatedTriplesPerBlock( + {LT{1, IT(1, 2, 3, 1), true}, LT{1, IT(1, 7, 9, 1), false}}); + IdTable resultExpected = + makeIdTableFromVector({{1, 2, 3, 0, IntId(10), IntId(11)}, + {1, 2, 3, 1, UndefId(), UndefId()}, + {1, 2, 3, 2, IntId(12), IntId(11)}}); + + auto merged = locatedTriplesPerBlock.mergeTriples(1, block, 3, true); + EXPECT_THAT(merged, testing::ElementsAreArray(resultExpected)); + } +} + // Test the locating of triples in a permutation using `locatedTriple`. TEST_F(LocatedTriplesTest, locatedTriple) { using LT = LocatedTriple; @@ -643,9 +803,9 @@ TEST_F(LocatedTriplesTest, debugPrints) { { LocatedTriples lts; EXPECT_THAT(lts, InsertIntoStream(testing::StrEq("{ }"))); - lts.insert(LT(0, IT(1, 1, 1), true)); + lts.insert(LT(0, IT(1, 1, 1, 28), true)); EXPECT_THAT(lts, InsertIntoStream(testing::StrEq( - "{ LT(0 IdTriple(V:1, V:1, V:1, ) 1) }"))); + "{ LT(0 IdTriple(V:1, V:1, V:1, V:28, ) 1) }"))); } { @@ -653,15 +813,15 @@ TEST_F(LocatedTriplesTest, debugPrints) { ltpb.setOriginalMetadata(std::vector{CBM(PT(1, 1, 1), PT(1, 10, 15))}); EXPECT_THAT(ltpb, InsertIntoStream(testing::StrEq(""))); ltpb.add(std::vector{LT(0, IT(1, 1, 1), true)}); - EXPECT_THAT( - ltpb, InsertIntoStream(testing::StrEq( - "LTs in Block #0: { LT(0 IdTriple(V:1, V:1, V:1, ) 1) }\n"))); + EXPECT_THAT(ltpb, InsertIntoStream(testing::StrEq( + "LTs in Block #0: { LT(0 IdTriple(V:1, " + "V:1, V:1, V:123948, ) 1) }\n"))); } { std::vector> idts{IT(0, 0, 0), IT(1, 2, 3)}; - EXPECT_THAT(idts, - InsertIntoStream(testing::StrEq( - "IdTriple(V:0, V:0, V:0, ), IdTriple(V:1, V:2, V:3, ), "))); + EXPECT_THAT(idts, InsertIntoStream(testing::StrEq( + "IdTriple(V:0, V:0, V:0, V:123948, ), IdTriple(V:1, " + "V:2, V:3, V:123948, ), "))); } } diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index e0f779759..5b3591dea 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -406,18 +406,18 @@ TEST(Operation, ensureSignalUpdateIsOnlyCalledEvery50msAndAtTheEnd) { index, &cache, makeAllocator(ad_utility::MemorySize::megabytes(100)), SortPerformanceEstimator{}, [&](std::string) { ++updateCallCounter; }}; CustomGeneratorOperation operation{ - &context, [](const IdTable& idTable) -> cppcoro::generator { + &context, [](const IdTable& idTable) -> Result::Generator { std::this_thread::sleep_for(50ms); - co_yield idTable.clone(); + co_yield {idTable.clone(), LocalVocab{}}; // This one should not trigger because it's below the 50ms threshold std::this_thread::sleep_for(30ms); - co_yield idTable.clone(); + co_yield {idTable.clone(), LocalVocab{}}; std::this_thread::sleep_for(30ms); - co_yield idTable.clone(); + co_yield {idTable.clone(), LocalVocab{}}; // This one should not trigger directly, but trigger because it's the // last one std::this_thread::sleep_for(30ms); - co_yield idTable.clone(); + co_yield {idTable.clone(), LocalVocab{}}; }(idTable)}; ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; @@ -449,9 +449,9 @@ TEST(Operation, ensureSignalUpdateIsCalledAtTheEndOfPartialConsumption) { index, &cache, makeAllocator(ad_utility::MemorySize::megabytes(100)), SortPerformanceEstimator{}, [&](std::string) { ++updateCallCounter; }}; CustomGeneratorOperation operation{ - &context, [](const IdTable& idTable) -> cppcoro::generator { - co_yield idTable.clone(); - co_yield idTable.clone(); + &context, [](const IdTable& idTable) -> Result::Generator { + co_yield {idTable.clone(), LocalVocab{}}; + co_yield {idTable.clone(), LocalVocab{}}; }(idTable)}; { @@ -523,7 +523,8 @@ TEST(Operation, ensureLazyOperationIsCachedIfSmallEnough) { timer, ComputationMode::LAZY_IF_SUPPORTED, "test", false); EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); - for ([[maybe_unused]] IdTable& _ : cacheValue.resultTable().idTables()) { + for ([[maybe_unused]] Result::IdTableVocabPair& _ : + cacheValue.resultTable().idTables()) { } auto aggregatedValue = qec->getQueryTreeCache().getIfContained("test"); @@ -575,7 +576,8 @@ TEST(Operation, checkLazyOperationIsNotCachedIfTooLarge) { EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); qec->getQueryTreeCache().setMaxSizeSingleEntry(originalSize); - for ([[maybe_unused]] IdTable& _ : cacheValue.resultTable().idTables()) { + for ([[maybe_unused]] Result::IdTableVocabPair& _ : + cacheValue.resultTable().idTables()) { } EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); @@ -597,7 +599,8 @@ TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache) { timer, ComputationMode::LAZY_IF_SUPPORTED, "test", false); EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); - for ([[maybe_unused]] IdTable& _ : cacheValue.resultTable().idTables()) { + for ([[maybe_unused]] Result::IdTableVocabPair& _ : + cacheValue.resultTable().idTables()) { } EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); diff --git a/test/ResultTest.cpp b/test/ResultTest.cpp index a03b3034d..0dfc6e8bc 100644 --- a/test/ResultTest.cpp +++ b/test/ResultTest.cpp @@ -17,9 +17,8 @@ using ::testing::Values; namespace { // Helper function to generate all possible splits of an IdTable in order to // exhaustively test generator variants. -std::vector> getAllSubSplits( - const IdTable& idTable) { - std::vector> result; +std::vector getAllSubSplits(const IdTable& idTable) { + std::vector result; for (size_t i = 0; i < std::pow(idTable.size() - 1, 2); ++i) { std::vector reverseIndex{}; size_t copy = i; @@ -29,54 +28,48 @@ std::vector> getAllSubSplits( } copy /= 2; } - result.push_back( - [](auto split, IdTable clone) -> cppcoro::generator { - IdTable subSplit{clone.numColumns(), - ad_utility::makeUnlimitedAllocator()}; - size_t splitIndex = 0; - for (size_t i = 0; i < clone.size(); ++i) { - subSplit.push_back(clone[i]); - if (splitIndex < split.size() && split[splitIndex] == i) { - co_yield subSplit; - subSplit.clear(); - ++splitIndex; - } - } - if (subSplit.size() > 0) { - co_yield subSplit; - } - }(std::move(reverseIndex), idTable.clone())); + result.push_back([](auto split, IdTable clone) -> Result::Generator { + IdTable subSplit{clone.numColumns(), + ad_utility::makeUnlimitedAllocator()}; + size_t splitIndex = 0; + for (size_t i = 0; i < clone.size(); ++i) { + subSplit.push_back(clone[i]); + if (splitIndex < split.size() && split[splitIndex] == i) { + Result::IdTableVocabPair pair{std::move(subSplit), LocalVocab{}}; + co_yield pair; + // Move back if not moved out to reuse buffer. + subSplit = std::move(pair.idTable_); + subSplit.clear(); + ++splitIndex; + } + } + if (subSplit.size() > 0) { + co_yield {std::move(subSplit), LocalVocab{}}; + } + }(std::move(reverseIndex), idTable.clone())); } return result; } // _____________________________________________________________________________ -void consumeGenerator(cppcoro::generator& generator) { - for ([[maybe_unused]] IdTable& _ : generator) { +void consumeGenerator(Result::Generator& generator) { + for ([[maybe_unused]] Result::IdTableVocabPair& _ : generator) { } } } // namespace TEST(Result, verifyIdTableThrowsWhenActuallyLazy) { - Result result1{ - []() -> cppcoro::generator { co_return; }(), {}, LocalVocab{}}; - EXPECT_FALSE(result1.isFullyMaterialized()); - EXPECT_THROW(result1.idTable(), ad_utility::Exception); - - Result result2{[]() -> cppcoro::generator { co_return; }(), - {}, - result1.getSharedLocalVocab()}; - EXPECT_FALSE(result2.isFullyMaterialized()); - EXPECT_THROW(result2.idTable(), ad_utility::Exception); + Result result{[]() -> Result::Generator { co_return; }(), {}}; + EXPECT_FALSE(result.isFullyMaterialized()); + EXPECT_THROW(result.idTable(), ad_utility::Exception); } // _____________________________________________________________________________ TEST(Result, verifyIdTableThrowsOnSecondAccess) { - const Result result{ - []() -> cppcoro::generator { co_return; }(), {}, LocalVocab{}}; + const Result result{[]() -> Result::Generator { co_return; }(), {}}; // First access should work - for ([[maybe_unused]] IdTable& _ : result.idTables()) { + for ([[maybe_unused]] Result::IdTableVocabPair& _ : result.idTables()) { ADD_FAILURE() << "Generator is empty"; } // Now it should throw @@ -108,7 +101,7 @@ TEST_P(ResultSortTest, verifyAssertSortOrderIsRespectedSucceedsWhenSorted) { auto idTable = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), std::get<1>(GetParam()), LocalVocab{}}; + Result result{std::move(generator), std::get<1>(GetParam())}; if (std::get<0>(GetParam())) { EXPECT_NO_THROW(consumeGenerator(result.idTables())); } else { @@ -147,7 +140,7 @@ TEST(Result, (Result{idTable.clone(), {3}, LocalVocab{}}), matcher, Exception); for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {3}, LocalVocab{}}; + Result result{std::move(generator), {3}}; AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(consumeGenerator(result.idTables()), matcher, Exception); } @@ -156,7 +149,7 @@ TEST(Result, (Result{idTable.clone(), {2, 1337}, LocalVocab{}}), matcher, Exception); for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {2, 1337}, LocalVocab{}}; + Result result{std::move(generator), {2, 1337}}; AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(consumeGenerator(result.idTables()), matcher, Exception); } @@ -178,17 +171,15 @@ TEST(Result, verifyRunOnNewChunkComputedFiresCorrectly) { auto idTable2 = makeIdTableFromVector({{3, 4, 0}}); auto idTable3 = makeIdTableFromVector({{1, 6, 0}, {2, 5, 0}, {3, 4, 0}}); - Result result{ - [](auto& t1, auto& t2, auto& t3) -> cppcoro::generator { - std::this_thread::sleep_for(1ms); - co_yield t1; - std::this_thread::sleep_for(3ms); - co_yield t2; - std::this_thread::sleep_for(5ms); - co_yield t3; - }(idTable1, idTable2, idTable3), - {}, - LocalVocab{}}; + Result result{[](auto& t1, auto& t2, auto& t3) -> Result::Generator { + std::this_thread::sleep_for(1ms); + co_yield {t1.clone(), LocalVocab{}}; + std::this_thread::sleep_for(3ms); + co_yield {t2.clone(), LocalVocab{}}; + std::this_thread::sleep_for(5ms); + co_yield {t3.clone(), LocalVocab{}}; + }(idTable1, idTable2, idTable3), + {}}; uint32_t callCounter = 0; bool finishedConsuming = false; @@ -196,13 +187,13 @@ TEST(Result, verifyRunOnNewChunkComputedFiresCorrectly) { [&](const IdTable& idTable, std::chrono::microseconds duration) { ++callCounter; if (callCounter == 1) { - EXPECT_EQ(&idTable1, &idTable); + EXPECT_EQ(idTable1, idTable); EXPECT_GE(duration, 1ms); } else if (callCounter == 2) { - EXPECT_EQ(&idTable2, &idTable); + EXPECT_EQ(idTable2, idTable); EXPECT_GE(duration, 3ms); } else if (callCounter == 3) { - EXPECT_EQ(&idTable3, &idTable); + EXPECT_EQ(idTable3, idTable); EXPECT_GE(duration, 5ms); } }, @@ -220,12 +211,11 @@ TEST(Result, verifyRunOnNewChunkComputedFiresCorrectly) { // _____________________________________________________________________________ TEST(Result, verifyRunOnNewChunkCallsFinishOnError) { Result result{ - []() -> cppcoro::generator { + []() -> Result::Generator { throw std::runtime_error{"verifyRunOnNewChunkCallsFinishOnError"}; co_return; }(), - {}, - LocalVocab{}}; + {}}; uint32_t callCounterGenerator = 0; uint32_t callCounterFinished = 0; @@ -252,11 +242,10 @@ TEST(Result, verifyRunOnNewChunkCallsFinishOnPartialConsumption) { uint32_t callCounterFinished = 0; { - Result result{[](IdTable idTable) -> cppcoro::generator { - co_yield idTable; + Result result{[](IdTable idTable) -> Result::Generator { + co_yield {std::move(idTable), LocalVocab{}}; }(makeIdTableFromVector({{}})), - {}, - LocalVocab{}}; + {}}; result.runOnNewChunkComputed( [&](const IdTable&, std::chrono::microseconds) { @@ -277,11 +266,11 @@ TEST(Result, verifyRunOnNewChunkCallsFinishOnPartialConsumption) { // _____________________________________________________________________________ TEST(Result, verifyCacheDuringConsumptionThrowsWhenFullyMaterialized) { Result result{makeIdTableFromVector({{}}), {}, LocalVocab{}}; - EXPECT_THROW( - result.cacheDuringConsumption( - [](const std::optional&, const IdTable&) { return true; }, - [](Result) {}), - ad_utility::Exception); + EXPECT_THROW(result.cacheDuringConsumption( + [](const std::optional&, + const Result::IdTableVocabPair&) { return true; }, + [](Result) {}), + ad_utility::Exception); } // _____________________________________________________________________________ @@ -290,16 +279,17 @@ TEST(Result, verifyCacheDuringConsumptionRespectsPassedParameters) { // Test positive case for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {0}, LocalVocab{}}; + Result result{std::move(generator), {0}}; result.cacheDuringConsumption( - [predictedSize = 0](const std::optional& aggregator, - const IdTable& newTable) mutable { + [predictedSize = 0]( + const std::optional& aggregator, + const Result::IdTableVocabPair& newTable) mutable { if (aggregator.has_value()) { - EXPECT_EQ(aggregator.value().numColumns(), predictedSize); + EXPECT_EQ(aggregator.value().idTable_.numColumns(), predictedSize); } else { EXPECT_EQ(predictedSize, 0); } - predictedSize += newTable.numColumns(); + predictedSize += newTable.idTable_.numColumns(); return true; }, [&](Result aggregatedResult) { @@ -312,9 +302,10 @@ TEST(Result, verifyCacheDuringConsumptionRespectsPassedParameters) { // Test negative case for (auto& generator : getAllSubSplits(idTable)) { uint32_t callCounter = 0; - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.cacheDuringConsumption( - [&](const std::optional& aggregator, const IdTable&) { + [&](const std::optional& aggregator, + const Result::IdTableVocabPair&) { EXPECT_FALSE(aggregator.has_value()); ++callCounter; return false; @@ -346,7 +337,7 @@ TEST(Result, verifyApplyLimitOffsetDoesCorrectlyApplyLimitAndOffset) { for (auto& generator : getAllSubSplits(idTable)) { std::vector colSizes{}; uint32_t totalRows = 0; - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.applyLimitOffset( limitOffset, [&](std::chrono::microseconds, const IdTable& innerTable) { // NOTE: duration can't be tested here, processors are too fast @@ -365,7 +356,7 @@ TEST(Result, verifyApplyLimitOffsetDoesCorrectlyApplyLimitAndOffset) { EXPECT_TRUE(colSizes.empty()); for (const auto& innerTable : result.idTables()) { - for (const auto& row : innerTable) { + for (const auto& row : innerTable.idTable_) { ASSERT_EQ(row.size(), 2); // Make sure we never get values that were supposed to be filtered // out. @@ -396,7 +387,7 @@ TEST(Result, verifyApplyLimitOffsetHandlesZeroLimitCorrectly) { for (auto& generator : getAllSubSplits(idTable)) { uint32_t callCounter = 0; - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.applyLimitOffset( limitOffset, [&](std::chrono::microseconds, const IdTable&) { ++callCounter; }); @@ -424,7 +415,7 @@ TEST(Result, verifyApplyLimitOffsetHandlesNonZeroOffsetWithoutLimitCorrectly) { for (auto& generator : getAllSubSplits(idTable)) { uint32_t callCounter = 0; - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.applyLimitOffset( limitOffset, [&](std::chrono::microseconds, const IdTable& innerTable) { for (const auto& row : innerTable) { @@ -457,7 +448,7 @@ TEST(Result, verifyApplyLimitOffsetIsNoOpWhenLimitClauseIsRedundant) { } for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.applyLimitOffset( limitOffset, [&](std::chrono::microseconds, const IdTable&) { ++callCounter; }); @@ -487,7 +478,7 @@ TEST_P(ResultLimitTest, } for (auto& generator : getAllSubSplits(idTable)) { - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.assertThatLimitWasRespected(std::get<1>(GetParam())); if (std::get<0>(GetParam())) { @@ -538,7 +529,7 @@ TEST_P(ResultDefinednessTest, } } for (auto& generator : getAllSubSplits(*std::get<1>(GetParam()))) { - Result result{std::move(generator), {}, LocalVocab{}}; + Result result{std::move(generator), {}}; result.checkDefinedness(map); if (std::get<0>(GetParam())) { EXPECT_NO_THROW(consumeGenerator(result.idTables())); diff --git a/test/ServiceTest.cpp b/test/ServiceTest.cpp index ff66ca993..8ee80703e 100644 --- a/test/ServiceTest.cpp +++ b/test/ServiceTest.cpp @@ -213,30 +213,42 @@ TEST_F(ServiceTest, computeResult) { // compute resulting idTable IdTable idTable{2, ad_utility::testing::makeAllocator()}; - for (auto& row : result.idTables()) { - idTable.insertAtEnd(row); + std::vector localVocabs{}; + for (auto& pair : result.idTables()) { + idTable.insertAtEnd(pair.idTable_); + localVocabs.emplace_back(std::move(pair.localVocab_)); } // create expected idTable - const auto& localVocab = result.localVocab(); - auto get = [&localVocab](const std::string& s) { - return localVocab.getIndexOrNullopt( - ad_utility::triple_component::LiteralOrIri::iriref(s)); + auto get = + [&localVocabs]( + const std::string& s) -> std::optional { + for (const LocalVocab& localVocab : localVocabs) { + auto index = localVocab.getIndexOrNullopt( + ad_utility::triple_component::LiteralOrIri::iriref(s)); + if (index.has_value()) { + return index; + } + } + return std::nullopt; }; std::vector> idVector; std::map ids; + size_t indexCounter = 0; for (auto& row : expIdTableVector) { auto& idVecRow = idVector.emplace_back(); for (auto& e : row) { if (!ids.contains(e)) { - auto idx = get(absl::StrCat("<", e, ">")); - ASSERT_TRUE(idx); + auto str = absl::StrCat("<", e, ">"); + auto idx = get(str); + ASSERT_TRUE(idx) << '\'' << str << "' not in local vocab"; ids.insert({e, Id::makeFromLocalVocabIndex(idx.value())}); + ++indexCounter; } idVecRow.emplace_back(ids.at(e)); } } - EXPECT_EQ(localVocab.size(), ids.size()); + EXPECT_EQ(indexCounter, ids.size()); EXPECT_EQ(idTable, makeIdTableFromVector(idVector)); }; diff --git a/test/UnionTest.cpp b/test/UnionTest.cpp index 7d563e60c..b0f52f042 100644 --- a/test/UnionTest.cpp +++ b/test/UnionTest.cpp @@ -103,11 +103,11 @@ TEST(Union, computeUnionLazy) { auto iterator = result.begin(); ASSERT_NE(iterator, result.end()); - ASSERT_EQ(*iterator, expected1); + ASSERT_EQ(iterator->idTable_, expected1); ++iterator; ASSERT_NE(iterator, result.end()); - ASSERT_EQ(*iterator, expected2); + ASSERT_EQ(iterator->idTable_, expected2); ASSERT_EQ(++iterator, result.end()); }; @@ -142,11 +142,11 @@ TEST(Union, ensurePermutationIsAppliedCorrectly) { auto iterator = result.begin(); ASSERT_NE(iterator, result.end()); - ASSERT_EQ(*iterator, expected1); + ASSERT_EQ(iterator->idTable_, expected1); ++iterator; ASSERT_NE(iterator, result.end()); - ASSERT_EQ(*iterator, expected2); + ASSERT_EQ(iterator->idTable_, expected2); ASSERT_EQ(++iterator, result.end()); } diff --git a/test/engine/BindTest.cpp b/test/engine/BindTest.cpp index d0f4309f5..cecec3c4c 100644 --- a/test/engine/BindTest.cpp +++ b/test/engine/BindTest.cpp @@ -44,7 +44,7 @@ void expectBindYieldsIdTable( auto& idTables = result->idTables(); auto iterator = idTables.begin(); ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(*iterator, expected); + EXPECT_EQ(iterator->idTable_, expected); EXPECT_EQ(++iterator, idTables.end()); } } @@ -125,9 +125,9 @@ TEST( auto& idTables = result->idTables(); auto iterator = idTables.begin(); ASSERT_NE(iterator, idTables.end()); - EXPECT_EQ(*iterator, table); + EXPECT_EQ(iterator->idTable_, table); ASSERT_NE(++iterator, idTables.end()); - EXPECT_EQ(*iterator, makeIdTableFromVector({{val, val}})); + EXPECT_EQ(iterator->idTable_, makeIdTableFromVector({{val, val}})); EXPECT_EQ(++iterator, idTables.end()); } } diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp index 255973b38..58091931c 100644 --- a/test/engine/IndexScanTest.cpp +++ b/test/engine/IndexScanTest.cpp @@ -485,8 +485,8 @@ TEST(IndexScan, computeResultCanBeConsumedLazily) { IdTable resultTable{3, ad_utility::makeUnlimitedAllocator()}; - for (IdTable& idTable : result.idTables()) { - resultTable.insertAtEnd(idTable); + for (Result::IdTableVocabPair& pair : result.idTables()) { + resultTable.insertAtEnd(pair.idTable_); } EXPECT_EQ(resultTable, @@ -505,7 +505,7 @@ TEST(IndexScan, computeResultReturnsEmptyGeneratorIfScanIsEmpty) { ASSERT_FALSE(result.isFullyMaterialized()); - for ([[maybe_unused]] IdTable& idTable : result.idTables()) { + for ([[maybe_unused]] Result::IdTableVocabPair& pair : result.idTables()) { ADD_FAILURE() << "Generator should be empty" << std::endl; } } diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index 70ee18354..6ebec0fc9 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -88,12 +88,13 @@ class ValuesForTesting : public Operation { for (const IdTable& idTable : tables_) { clones.push_back(idTable.clone()); } - auto generator = [](auto idTables) -> cppcoro::generator { + auto generator = [](auto idTables, + LocalVocab localVocab) -> Result::Generator { for (IdTable& idTable : idTables) { - co_yield std::move(idTable); + co_yield {std::move(idTable), localVocab.clone()}; } - }(std::move(clones)); - return {std::move(generator), resultSortedOn(), localVocab_.clone()}; + }(std::move(clones), localVocab_.clone()); + return {std::move(generator), resultSortedOn()}; } std::optional optionalTable; if (tables_.size() > 1) { diff --git a/test/util/OperationTestHelpers.h b/test/util/OperationTestHelpers.h index 480b76536..a2d0fdc81 100644 --- a/test/util/OperationTestHelpers.h +++ b/test/util/OperationTestHelpers.h @@ -106,19 +106,19 @@ class AlwaysFailOperation : public Operation { if (!requestLaziness) { throw std::runtime_error{"AlwaysFailOperation"}; } - return {[]() -> cppcoro::generator { + return {[]() -> Result::Generator { throw std::runtime_error{"AlwaysFailOperation"}; // Required so that the exception only occurs within the generator co_return; }(), - resultSortedOn(), LocalVocab{}}; + resultSortedOn()}; } }; // Lazy operation that will yield a result with a custom generator you can // provide via the constructor. class CustomGeneratorOperation : public Operation { - cppcoro::generator generator_; + Result::Generator generator_; std::vector getChildren() override { return {}; } string getCacheKeyImpl() const override { AD_FAIL(); } string getDescriptor() const override { @@ -134,11 +134,11 @@ class CustomGeneratorOperation : public Operation { public: CustomGeneratorOperation(QueryExecutionContext* context, - cppcoro::generator generator) + Result::Generator generator) : Operation{context}, generator_{std::move(generator)} {} ProtoResult computeResult(bool requestLaziness) override { AD_CONTRACT_CHECK(requestLaziness); - return {std::move(generator_), resultSortedOn(), LocalVocab{}}; + return {std::move(generator_), resultSortedOn()}; } };