diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index fa1cf6ca3..0f3713753 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -13,5 +13,5 @@ add_library(engine VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp - CountConnectedSubgraphs.cpp) + CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp) qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2) diff --git a/src/engine/SpatialJoin.cpp b/src/engine/SpatialJoin.cpp index 8abaf95d7..19757f1ed 100644 --- a/src/engine/SpatialJoin.cpp +++ b/src/engine/SpatialJoin.cpp @@ -20,6 +20,7 @@ #include #include "engine/ExportQueryExecutionTrees.h" +#include "engine/SpatialJoinAlgorithms.h" #include "engine/VariableToColumnMap.h" #include "engine/idTable/IdTable.h" #include "global/Constants.h" @@ -287,70 +288,7 @@ vector SpatialJoin::resultSortedOn() const { } // ____________________________________________________________________________ -std::optional SpatialJoin::getPoint(const IdTable* restable, - size_t row, - ColumnIndex col) const { - auto id = restable->at(row, col); - return id.getDatatype() == Datatype::GeoPoint - ? std::optional{id.getGeoPoint()} - : std::nullopt; -}; - -// ____________________________________________________________________________ -Id SpatialJoin::computeDist(const IdTable* idTableLeft, - const IdTable* idTableRight, size_t rowLeft, - size_t rowRight, ColumnIndex leftPointCol, - ColumnIndex rightPointCol) const { - auto point1 = getPoint(idTableLeft, rowLeft, leftPointCol); - auto point2 = getPoint(idTableRight, rowRight, rightPointCol); - if (!point1.has_value() || !point2.has_value()) { - return Id::makeUndefined(); - } - return Id::makeFromInt( - ad_utility::detail::wktDistImpl(point1.value(), point2.value()) * 1000); -} - -// ____________________________________________________________________________ -void SpatialJoin::addResultTableEntry(IdTable* result, - const IdTable* idTableLeft, - const IdTable* idTableRight, - size_t rowLeft, size_t rowRight, - Id distance) const { - // this lambda function copies elements from copyFrom - // into the table res. It copies them into the row - // rowIndRes and column column colIndRes. It returns the column number - // until which elements were copied - auto addColumns = [](IdTable* res, const IdTable* copyFrom, size_t rowIndRes, - size_t colIndRes, size_t rowIndCopy) { - size_t col = 0; - while (col < copyFrom->numColumns()) { - res->at(rowIndRes, colIndRes) = (*copyFrom).at(rowIndCopy, col); - colIndRes += 1; - col += 1; - } - return colIndRes; - }; - - auto resrow = result->numRows(); - result->emplace_back(); - // add columns to result table - size_t rescol = 0; - rescol = addColumns(result, idTableLeft, resrow, rescol, rowLeft); - rescol = addColumns(result, idTableRight, resrow, rescol, rowRight); - - if (addDistToResult_) { - result->at(resrow, rescol) = distance; - // rescol isn't used after that in this function, but future updates, - // which add additional columns, would need to remember to increase - // rescol at this place otherwise. If they forget to do this, the - // distance column will be overwritten, the variableToColumnMap will - // not work and so on - // rescol += 1; - } -} - -// ____________________________________________________________________________ -SpatialJoin::PreparedJoinParams SpatialJoin::prepareJoin() const { +PreparedSpatialJoinParams SpatialJoin::prepareJoin() const { auto getIdTable = [](std::shared_ptr child) { std::shared_ptr resTable = child->getResult(); auto idTablePtr = &resTable->idTable(); @@ -365,8 +303,8 @@ SpatialJoin::PreparedJoinParams SpatialJoin::prepareJoin() const { }; // Input tables - const auto [idTableLeft, resultLeft] = getIdTable(childLeft_); - const auto [idTableRight, resultRight] = getIdTable(childRight_); + auto [idTableLeft, resultLeft] = getIdTable(childLeft_); + auto [idTableRight, resultRight] = getIdTable(childRight_); // Input table columns for the join ColumnIndex leftJoinCol = getJoinCol(childLeft_, leftChildVariable_); @@ -374,163 +312,21 @@ SpatialJoin::PreparedJoinParams SpatialJoin::prepareJoin() const { // Size of output table size_t numColumns = getResultWidth(); - return PreparedJoinParams{ - idTableLeft, std::move(resultLeft), idTableRight, std::move(resultRight), - leftJoinCol, rightJoinCol, numColumns}; -} - -// ____________________________________________________________________________ -Result SpatialJoin::baselineAlgorithm() { - const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, - rightJoinCol, numColumns] = prepareJoin(); - IdTable result{numColumns, _executionContext->getAllocator()}; - auto maxDist = getMaxDist(); - auto maxResults = getMaxResults(); - - // cartesian product between the two tables, pairs are restricted according to - // `maxDistance_` and `maxResults_` - for (size_t rowLeft = 0; rowLeft < idTableLeft->size(); rowLeft++) { - // This priority queue stores the intermediate best results if `maxResults_` - // is used. Each intermediate result is stored as a pair of its `rowRight` - // and distance. Since the queue will hold at most `maxResults_ + 1` items, - // it is not a memory concern. - auto compare = [](std::pair a, - std::pair b) { - return a.second < b.second; - }; - std::priority_queue, - std::vector>, - decltype(compare)> - intermediate(compare); - - // Inner loop of cartesian product - for (size_t rowRight = 0; rowRight < idTableRight->size(); rowRight++) { - Id dist = computeDist(idTableLeft, idTableRight, rowLeft, rowRight, - leftJoinCol, rightJoinCol); - - // Ensure `maxDist_` constraint - if (dist.getDatatype() != Datatype::Int || - (maxDist.has_value() && - dist.getInt() > static_cast(maxDist.value()))) { - continue; - } - - // If there is no `maxResults_` we can add the result row immediately - if (!maxResults.has_value()) { - addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft, - rowRight, dist); - continue; - } - - // Ensure `maxResults_` constraint using priority queue - intermediate.push(std::pair{rowRight, dist.getInt()}); - // Too many results? Drop the worst one - if (intermediate.size() > maxResults.value()) { - intermediate.pop(); - } - } - - // If we are using the priority queue, we didn't add the results in the - // inner loop, so we do it now. - if (maxResults.has_value()) { - size_t numResults = intermediate.size(); - for (size_t item = 0; item < numResults; item++) { - // Get and remove largest item from priority queue - auto [rowRight, dist] = intermediate.top(); - intermediate.pop(); - - addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft, - rowRight, Id::makeFromInt(dist)); - } - } - } - return Result(std::move(result), std::vector{}, - Result::getMergedLocalVocab(*resultLeft, *resultRight)); -} - -// ____________________________________________________________________________ -Result SpatialJoin::s2geometryAlgorithm() { - const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, - rightJoinCol, numColumns] = prepareJoin(); - IdTable result{numColumns, _executionContext->getAllocator()}; - auto maxDist = getMaxDist(); - auto maxResults = getMaxResults(); - - // Helper function to convert `GeoPoint` to `S2Point` - auto constexpr toS2Point = [](const GeoPoint& p) { - auto lat = p.getLat(); - auto lng = p.getLng(); - auto latlng = S2LatLng::FromDegrees(lat, lng); - return S2Point{latlng}; - }; - - S2PointIndex s2index; - - // Optimization: If we only search by maximum distance, the operation is - // symmetric, so the larger table can be used for the index - bool indexOfRight = - (maxResults.has_value() || (idTableLeft->size() > idTableRight->size())); - auto indexTable = indexOfRight ? idTableRight : idTableLeft; - auto indexJoinCol = indexOfRight ? rightJoinCol : leftJoinCol; - - // Populate the index - for (size_t row = 0; row < indexTable->size(); row++) { - auto p = getPoint(indexTable, row, indexJoinCol); - if (p.has_value()) { - s2index.Add(toS2Point(p.value()), row); - } - } - - // Performs a nearest neighbor search on the index and returns the closest - // points that satisfy the criteria given by `maxDist_` and `maxResults_`. - - // Construct a query object with the given constraints - auto s2query = S2ClosestPointQuery{&s2index}; - - if (maxResults.has_value()) { - s2query.mutable_options()->set_max_results( - static_cast(maxResults.value())); - } - if (maxDist.has_value()) { - s2query.mutable_options()->set_inclusive_max_distance(S2Earth::ToAngle( - util::units::Meters(static_cast(maxDist.value())))); - } - - auto searchTable = indexOfRight ? idTableLeft : idTableRight; - auto searchJoinCol = indexOfRight ? leftJoinCol : rightJoinCol; - - // Use the index to lookup the points of the other table - for (size_t searchRow = 0; searchRow < searchTable->size(); searchRow++) { - auto p = getPoint(searchTable, searchRow, searchJoinCol); - if (!p.has_value()) { - continue; - } - auto s2target = - S2ClosestPointQuery::PointTarget{toS2Point(p.value())}; - - for (const auto& neighbor : s2query.FindClosestPoints(&s2target)) { - // In this loop we only receive points that already satisfy the given - // criteria - auto indexRow = neighbor.data(); - auto dist = static_cast(S2Earth::ToMeters(neighbor.distance())); - - auto rowLeft = indexOfRight ? searchRow : indexRow; - auto rowRight = indexOfRight ? indexRow : searchRow; - addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft, rowRight, - Id::makeFromInt(dist)); - } - } - - return Result(std::move(result), std::vector{}, - Result::getMergedLocalVocab(*resultLeft, *resultRight)); + return PreparedSpatialJoinParams{idTableLeft, std::move(resultLeft), + idTableRight, std::move(resultRight), + leftJoinCol, rightJoinCol, + numColumns, getMaxDist(), + getMaxResults()}; } // ____________________________________________________________________________ Result SpatialJoin::computeResult([[maybe_unused]] bool requestLaziness) { + SpatialJoinAlgorithms algorithms{_executionContext, prepareJoin(), + addDistToResult_, config_}; if (useBaselineAlgorithm_) { - return baselineAlgorithm(); + return algorithms.BaselineAlgorithm(); } else { - return s2geometryAlgorithm(); + return algorithms.S2geometryAlgorithm(); } } diff --git a/src/engine/SpatialJoin.h b/src/engine/SpatialJoin.h index eccbd3086..b62978cf9 100644 --- a/src/engine/SpatialJoin.h +++ b/src/engine/SpatialJoin.h @@ -20,6 +20,19 @@ struct MaxDistanceConfig { size_t maxDist_; }; +// helper struct to improve readability in prepareJoin() +struct PreparedSpatialJoinParams { + const IdTable* const idTableLeft_; + std::shared_ptr resultLeft_; + const IdTable* const idTableRight_; + std::shared_ptr resultRight_; + ColumnIndex leftJoinCol_; + ColumnIndex rightJoinCol_; + size_t numColumns_; + std::optional maxDist_; + std::optional maxResults_; +}; + // This class is implementing a SpatialJoin operation. This operations joins // two tables, using their positional column. It supports nearest neighbor // search as well as search of all points within a given range. @@ -107,44 +120,8 @@ class SpatialJoin : public Operation { // helper function, which parses a triple and populates config_ void parseConfigFromTriple(); - // helper function which returns a GeoPoint if the element of the given table - // represents a GeoPoint - std::optional getPoint(const IdTable* restable, size_t row, - ColumnIndex col) const; - - // helper function, which computes the distance of two points, where each - // point comes from a different result table - Id computeDist(const IdTable* resLeft, const IdTable* resRight, - size_t rowLeft, size_t rowRight, ColumnIndex leftPointCol, - ColumnIndex rightPointCol) const; - - // Helper function, which adds a row, which belongs to the result to the - // result table. As inputs it uses a row of the left and a row of the right - // child result table. - void addResultTableEntry(IdTable* result, const IdTable* resultLeft, - const IdTable* resultRight, size_t rowLeft, - size_t rowRight, Id distance) const; - - // helper struct to improve readability in prepareJoin() - struct PreparedJoinParams { - const IdTable* const idTableLeft_; - std::shared_ptr resultLeft_; - const IdTable* const idTableRight_; - std::shared_ptr resultRight_; - ColumnIndex leftJoinCol_; - ColumnIndex rightJoinCol_; - size_t numColumns_; - }; - // helper function, to initialize various required objects for both algorithms - PreparedJoinParams prepareJoin() const; - - // the baseline algorithm, which just checks every combination - Result baselineAlgorithm(); - - // the improved algorithm, which constructs a S2PointIndex, which is usually - // much faster, however requires that the right relation fits into memory - Result s2geometryAlgorithm(); + PreparedSpatialJoinParams prepareJoin() const; SparqlTriple triple_; Variable leftChildVariable_; diff --git a/src/engine/SpatialJoinAlgorithms.cpp b/src/engine/SpatialJoinAlgorithms.cpp new file mode 100644 index 000000000..dd7a9b1c7 --- /dev/null +++ b/src/engine/SpatialJoinAlgorithms.cpp @@ -0,0 +1,225 @@ +#include "engine/SpatialJoinAlgorithms.h" + +#include +#include +#include +#include +#include + +#include "util/GeoSparqlHelpers.h" + +// ____________________________________________________________________________ +SpatialJoinAlgorithms::SpatialJoinAlgorithms( + QueryExecutionContext* qec, PreparedSpatialJoinParams params, + bool addDistToResult, + std::variant config) + : qec_{qec}, + params_{std::move(params)}, + addDistToResult_{addDistToResult}, + config_{std::move(config)} {} + +// ____________________________________________________________________________ +std::optional SpatialJoinAlgorithms::getPoint(const IdTable* restable, + size_t row, + ColumnIndex col) const { + auto id = restable->at(row, col); + return id.getDatatype() == Datatype::GeoPoint + ? std::optional{id.getGeoPoint()} + : std::nullopt; +}; + +// ____________________________________________________________________________ +Id SpatialJoinAlgorithms::computeDist(const IdTable* idTableLeft, + const IdTable* idTableRight, + size_t rowLeft, size_t rowRight, + ColumnIndex leftPointCol, + ColumnIndex rightPointCol) const { + auto point1 = getPoint(idTableLeft, rowLeft, leftPointCol); + auto point2 = getPoint(idTableRight, rowRight, rightPointCol); + if (!point1.has_value() || !point2.has_value()) { + return Id::makeUndefined(); + } + return Id::makeFromInt(static_cast( + ad_utility::detail::wktDistImpl(point1.value(), point2.value()) * 1000)); +} + +// ____________________________________________________________________________ +void SpatialJoinAlgorithms::addResultTableEntry(IdTable* result, + const IdTable* idTableLeft, + const IdTable* idTableRight, + size_t rowLeft, size_t rowRight, + Id distance) const { + // this lambda function copies elements from copyFrom + // into the table res. It copies them into the row + // rowIndRes and column column colIndRes. It returns the column number + // until which elements were copied + auto addColumns = [](IdTable* res, const IdTable* copyFrom, size_t rowIndRes, + size_t colIndRes, size_t rowIndCopy) { + size_t col = 0; + while (col < copyFrom->numColumns()) { + res->at(rowIndRes, colIndRes) = (*copyFrom).at(rowIndCopy, col); + colIndRes += 1; + col += 1; + } + return colIndRes; + }; + + auto resrow = result->numRows(); + result->emplace_back(); + // add columns to result table + size_t rescol = 0; + rescol = addColumns(result, idTableLeft, resrow, rescol, rowLeft); + rescol = addColumns(result, idTableRight, resrow, rescol, rowRight); + + if (addDistToResult_) { + result->at(resrow, rescol) = distance; + // rescol isn't used after that in this function, but future updates, + // which add additional columns, would need to remember to increase + // rescol at this place otherwise. If they forget to do this, the + // distance column will be overwritten, the variableToColumnMap will + // not work and so on + // rescol += 1; + } +} + +// ____________________________________________________________________________ +Result SpatialJoinAlgorithms::BaselineAlgorithm() { + const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, + rightJoinCol, numColumns, maxDist, maxResults] = params_; + IdTable result{numColumns, qec_->getAllocator()}; + + // cartesian product between the two tables, pairs are restricted according to + // `maxDistance_` and `maxResults_` + for (size_t rowLeft = 0; rowLeft < idTableLeft->size(); rowLeft++) { + // This priority queue stores the intermediate best results if `maxResults_` + // is used. Each intermediate result is stored as a pair of its `rowRight` + // and distance. Since the queue will hold at most `maxResults_ + 1` items, + // it is not a memory concern. + auto compare = [](std::pair a, + std::pair b) { + return a.second < b.second; + }; + std::priority_queue, + std::vector>, + decltype(compare)> + intermediate(compare); + + // Inner loop of cartesian product + for (size_t rowRight = 0; rowRight < idTableRight->size(); rowRight++) { + Id dist = computeDist(idTableLeft, idTableRight, rowLeft, rowRight, + leftJoinCol, rightJoinCol); + + // Ensure `maxDist_` constraint + if (dist.getDatatype() != Datatype::Int || + (maxDist.has_value() && + dist.getInt() > static_cast(maxDist.value()))) { + continue; + } + + // If there is no `maxResults_` we can add the result row immediately + if (!maxResults.has_value()) { + addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft, + rowRight, dist); + continue; + } + + // Ensure `maxResults_` constraint using priority queue + intermediate.push(std::pair{rowRight, dist.getInt()}); + // Too many results? Drop the worst one + if (intermediate.size() > maxResults.value()) { + intermediate.pop(); + } + } + + // If we are using the priority queue, we didn't add the results in the + // inner loop, so we do it now. + if (maxResults.has_value()) { + size_t numResults = intermediate.size(); + for (size_t item = 0; item < numResults; item++) { + // Get and remove largest item from priority queue + auto [rowRight, dist] = intermediate.top(); + intermediate.pop(); + + addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft, + rowRight, Id::makeFromInt(dist)); + } + } + } + return Result(std::move(result), std::vector{}, + Result::getMergedLocalVocab(*resultLeft, *resultRight)); +} + +// ____________________________________________________________________________ +Result SpatialJoinAlgorithms::S2geometryAlgorithm() { + const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, + rightJoinCol, numColumns, maxDist, maxResults] = params_; + IdTable result{numColumns, qec_->getAllocator()}; + + // Helper function to convert `GeoPoint` to `S2Point` + auto constexpr toS2Point = [](const GeoPoint& p) { + auto lat = p.getLat(); + auto lng = p.getLng(); + auto latlng = S2LatLng::FromDegrees(lat, lng); + return S2Point{latlng}; + }; + + S2PointIndex s2index; + + // Optimization: If we only search by maximum distance, the operation is + // symmetric, so the larger table can be used for the index + bool indexOfRight = + (maxResults.has_value() || (idTableLeft->size() > idTableRight->size())); + auto indexTable = indexOfRight ? idTableRight : idTableLeft; + auto indexJoinCol = indexOfRight ? rightJoinCol : leftJoinCol; + + // Populate the index + for (size_t row = 0; row < indexTable->size(); row++) { + auto p = getPoint(indexTable, row, indexJoinCol); + if (p.has_value()) { + s2index.Add(toS2Point(p.value()), row); + } + } + + // Performs a nearest neighbor search on the index and returns the closest + // points that satisfy the criteria given by `maxDist_` and `maxResults_`. + + // Construct a query object with the given constraints + auto s2query = S2ClosestPointQuery{&s2index}; + + if (maxResults.has_value()) { + s2query.mutable_options()->set_max_results( + static_cast(maxResults.value())); + } + if (maxDist.has_value()) { + s2query.mutable_options()->set_inclusive_max_distance(S2Earth::ToAngle( + util::units::Meters(static_cast(maxDist.value())))); + } + + auto searchTable = indexOfRight ? idTableLeft : idTableRight; + auto searchJoinCol = indexOfRight ? leftJoinCol : rightJoinCol; + + // Use the index to lookup the points of the other table + for (size_t searchRow = 0; searchRow < searchTable->size(); searchRow++) { + auto p = getPoint(searchTable, searchRow, searchJoinCol); + if (!p.has_value()) { + continue; + } + auto s2target = + S2ClosestPointQuery::PointTarget{toS2Point(p.value())}; + + for (const auto& neighbor : s2query.FindClosestPoints(&s2target)) { + // In this loop we only receive points that already satisfy the given + // criteria + auto indexRow = neighbor.data(); + auto dist = static_cast(S2Earth::ToMeters(neighbor.distance())); + + auto rowLeft = indexOfRight ? searchRow : indexRow; + auto rowRight = indexOfRight ? indexRow : searchRow; + addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft, rowRight, + Id::makeFromInt(dist)); + } + } + + return Result(std::move(result), std::vector{}, + Result::getMergedLocalVocab(*resultLeft, *resultRight)); +} diff --git a/src/engine/SpatialJoinAlgorithms.h b/src/engine/SpatialJoinAlgorithms.h new file mode 100644 index 000000000..00556f1b3 --- /dev/null +++ b/src/engine/SpatialJoinAlgorithms.h @@ -0,0 +1,40 @@ +#pragma once + +#include "engine/Result.h" +#include "engine/SpatialJoin.h" + +class SpatialJoinAlgorithms { + public: + // initialize the Algorithm with the needed parameters + SpatialJoinAlgorithms( + QueryExecutionContext* qec, PreparedSpatialJoinParams params, + bool addDistToResult, + std::variant config); + Result BaselineAlgorithm(); + Result S2geometryAlgorithm(); + Result BoundingBoxAlgorithm(); + + private: + // helper function which returns a GeoPoint if the element of the given table + // represents a GeoPoint + std::optional getPoint(const IdTable* restable, size_t row, + ColumnIndex col) const; + + // helper function, which computes the distance of two points, where each + // point comes from a different result table + Id computeDist(const IdTable* resLeft, const IdTable* resRight, + size_t rowLeft, size_t rowRight, ColumnIndex leftPointCol, + ColumnIndex rightPointCol) const; + + // Helper function, which adds a row, which belongs to the result to the + // result table. As inputs it uses a row of the left and a row of the right + // child result table. + void addResultTableEntry(IdTable* result, const IdTable* resultLeft, + const IdTable* resultRight, size_t rowLeft, + size_t rowRight, Id distance) const; + + QueryExecutionContext* qec_; + PreparedSpatialJoinParams params_; + bool addDistToResult_; + std::variant config_; +}; diff --git a/test/engine/CMakeLists.txt b/test/engine/CMakeLists.txt index 3a7e7d3cf..76ba563b7 100644 --- a/test/engine/CMakeLists.txt +++ b/test/engine/CMakeLists.txt @@ -9,3 +9,4 @@ addLinkAndDiscoverTest(GroupByHashMapOptimizationTest) addLinkAndDiscoverTest(LazyGroupByTest engine) addLinkAndDiscoverTest(CountConnectedSubgraphsTest) addLinkAndDiscoverTest(BindTest engine) +addLinkAndDiscoverTest(SpatialJoinAlgorithmsTest engine) diff --git a/test/engine/SpatialJoinAlgorithmsTest.cpp b/test/engine/SpatialJoinAlgorithmsTest.cpp new file mode 100644 index 000000000..f732c0d8e --- /dev/null +++ b/test/engine/SpatialJoinAlgorithmsTest.cpp @@ -0,0 +1,787 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include "../util/IndexTestHelpers.h" +#include "./../../src/util/GeoSparqlHelpers.h" +#include "./SpatialJoinTestHelpers.h" +#include "engine/ExportQueryExecutionTrees.h" +#include "engine/IndexScan.h" +#include "engine/Join.h" +#include "engine/QueryExecutionTree.h" +#include "engine/SpatialJoin.h" +#include "engine/SpatialJoinAlgorithms.h" +#include "parser/data/Variable.h" + +namespace { // anonymous namespace to avoid linker problems + +using namespace ad_utility::testing; +using namespace SpatialJoinTestHelpers; + +namespace computeResultTest { + +class SpatialJoinParamTest : public ::testing::TestWithParam { + public: + void createAndTestSpatialJoin( + QueryExecutionContext* qec, SparqlTriple spatialJoinTriple, + std::shared_ptr leftChild, + std::shared_ptr rightChild, bool addLeftChildFirst, + std::vector> expectedOutputUnorderedRows, + std::vector columnNames) { + // this function is like transposing a matrix. An entry which has been + // stored at (i, k) is now stored at (k, i). The reason this is needed is + // the following: this function receives the input as a vector of vector of + // strings. Each vector contains a vector, which contains a row of the + // result. This row contains all columns. After swapping, each vector + // contains a vector, which contains all entries of one column. As now each + // of the vectors contain only one column, we can later order them according + // to the variable to column map and then compare the result. + auto swapColumns = [&](std::vector> toBeSwapped) { + std::vector> result; + bool firstIteration = true; + for (size_t i = 0; i < toBeSwapped.size(); i++) { + for (size_t k = 0; k < toBeSwapped.at(i).size(); k++) { + if (firstIteration) { + result.push_back(std::vector{toBeSwapped.at(i).at(k)}); + } else { + result.at(k).push_back(toBeSwapped.at(i).at(k)); + } + } + firstIteration = false; + } + return result; + }; + + std::shared_ptr spatialJoinOperation = + ad_utility::makeExecutionTree(qec, spatialJoinTriple, + std::nullopt, std::nullopt); + + // add first child + std::shared_ptr op = spatialJoinOperation->getRootOperation(); + SpatialJoin* spatialJoin = static_cast(op.get()); + auto firstChild = addLeftChildFirst ? leftChild : rightChild; + auto secondChild = addLeftChildFirst ? rightChild : leftChild; + Variable firstVariable = addLeftChildFirst + ? spatialJoinTriple.s_.getVariable() + : spatialJoinTriple.o_.getVariable(); + Variable secondVariable = addLeftChildFirst + ? spatialJoinTriple.o_.getVariable() + : spatialJoinTriple.s_.getVariable(); + + auto spJoin1 = spatialJoin->addChild(firstChild, firstVariable); + spatialJoin = static_cast(spJoin1.get()); + + // add second child + auto spJoin2 = spatialJoin->addChild(secondChild, secondVariable); + spatialJoin = static_cast(spJoin2.get()); + + // prepare expected output + // swap rows and columns to use the function orderColAccordingToVarColMap + auto expectedMaxDistCols = swapColumns(expectedOutputUnorderedRows); + auto expectedOutputOrdered = + orderColAccordingToVarColMap(spatialJoin->computeVariableToColumnMap(), + expectedMaxDistCols, columnNames); + auto expectedOutput = + createRowVectorFromColumnVector(expectedOutputOrdered); + + // Select algorithm + spatialJoin->selectAlgorithm(GetParam()); + + // At worst quadratic time + ASSERT_LE( + spatialJoin->getCostEstimate(), + std::pow(firstChild->getSizeEstimate() * secondChild->getSizeEstimate(), + 2)); + + auto res = spatialJoin->computeResult(false); + auto vec = printTable(qec, &res); + /* + for (size_t i = 0; i < vec.size(); ++i) { + EXPECT_STREQ(vec.at(i).c_str(), expectedOutput.at(i).c_str()); + }*/ + + EXPECT_THAT(vec, ::testing::UnorderedElementsAreArray(expectedOutput)); + } + + // build the test using the small dataset. Let the SpatialJoin operation be + // the last one (the left and right child are maximally large for this test + // query) the following Query will be simulated, the max distance will be + // different depending on the test: Select * where { + // ?obj1 ?name1 . + // ?obj1 ?geo1 . + // ?geo1 ?point1 + // ?obj2 ?name2 . + // ?obj2 ?geo2 . + // ?geo2 ?point2 + // ?point1 ?point2 . + // } + void buildAndTestSmallTestSetLargeChildren( + std::string specialPredicate, bool addLeftChildFirst, + std::vector> expectedOutput, + std::vector columnNames) { + auto qec = buildTestQEC(); + auto numTriples = qec->getIndex().numTriples().normal; + ASSERT_EQ(numTriples, 15); + // ===================== build the first child + // =============================== + auto leftChild = buildMediumChild( + qec, {"?obj1", std::string{""}, "?name1"}, + {"?obj1", std::string{""}, "?geo1"}, + {"?geo1", std::string{""}, "?point1"}, "?obj1", "?geo1"); + + // ======================= build the second child + // ============================ + auto rightChild = buildMediumChild( + qec, {"?obj2", std::string{""}, "?name2"}, + {"?obj2", std::string{""}, "?geo2"}, + {"?geo2", std::string{""}, "?point2"}, "?obj2", "?geo2"); + + createAndTestSpatialJoin( + qec, + SparqlTriple{TripleComponent{Variable{"?point1"}}, specialPredicate, + TripleComponent{Variable{"?point2"}}}, + leftChild, rightChild, addLeftChildFirst, expectedOutput, columnNames); + } + + // build the test using the small dataset. Let the SpatialJoin operation. + // The following Query will be simulated, the max distance will be different + // depending on the test: + // Select * where { + // ?geo1 ?point1 + // ?geo2 ?point2 + // ?point1 ?point2 . + // } + void buildAndTestSmallTestSetSmallChildren( + std::string specialPredicate, bool addLeftChildFirst, + std::vector> expectedOutput, + std::vector columnNames) { + auto qec = buildTestQEC(); + auto numTriples = qec->getIndex().numTriples().normal; + ASSERT_EQ(numTriples, 15); + // ====================== build inputs =================================== + TripleComponent point1{Variable{"?point1"}}; + TripleComponent point2{Variable{"?point2"}}; + auto leftChild = + buildIndexScan(qec, {"?obj1", std::string{""}, "?point1"}); + auto rightChild = + buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); + + createAndTestSpatialJoin( + qec, SparqlTriple{point1, specialPredicate, point2}, leftChild, + rightChild, addLeftChildFirst, expectedOutput, columnNames); + } + + // build the test using the small dataset. Let the SpatialJoin operation be + // the last one. the following Query will be simulated, the max distance will + // be different depending on the test: Select * where { + // ?obj1 ?name1 . + // ?obj1 ?geo1 . + // ?geo1 ?point1 + // ?geo2 ?point2 + // ?point1 ?point2 . + // } + void buildAndTestSmallTestSetDiffSizeChildren( + std::string specialPredicate, bool addLeftChildFirst, + std::vector> expectedOutput, + std::vector columnNames, bool bigChildLeft) { + auto qec = buildTestQEC(); + auto numTriples = qec->getIndex().numTriples().normal; + ASSERT_EQ(numTriples, 15); + // ========================= build big child + // ================================= + auto bigChild = buildMediumChild( + qec, {"?obj1", std::string{""}, "?name1"}, + {"?obj1", std::string{""}, "?geo1"}, + {"?geo1", std::string{""}, "?point1"}, "?obj1", "?geo1"); + + // ========================= build small child + // =============================== + TripleComponent point2{Variable{"?point2"}}; + auto smallChild = + buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); + + auto firstChild = bigChildLeft ? bigChild : smallChild; + auto secondChild = bigChildLeft ? smallChild : bigChild; + auto firstVariable = + bigChildLeft ? TripleComponent{Variable{"?point1"}} : point2; + auto secondVariable = + bigChildLeft ? point2 : TripleComponent{Variable{"?point1"}}; + + createAndTestSpatialJoin( + qec, SparqlTriple{firstVariable, specialPredicate, secondVariable}, + firstChild, secondChild, addLeftChildFirst, expectedOutput, + columnNames); + } + + protected: + bool useBaselineAlgorithm_; +}; + +std::vector mergeToRow(std::vector part1, + std::vector part2, + std::vector part3) { + std::vector result = part1; + for (size_t i = 0; i < part2.size(); i++) { + result.push_back(part2.at(i)); + } + for (size_t i = 0; i < part3.size(); i++) { + result.push_back(part3.at(i)); + } + return result; +}; + +std::vector> unordered_rows{ + std::vector{"\"Uni Freiburg TF\"", "", "", + "POINT(7.835050 48.012670)"}, + std::vector{"\"Minster Freiburg\"", "", "", + "POINT(7.852980 47.995570)"}, + std::vector{"\"London Eye\"", "", "", + "POINT(-0.119570 51.503330)"}, + std::vector{"\"Statue of liberty\"", "", "", + "POINT(-74.044540 40.689250)"}, + std::vector{"\"eiffel tower\"", "", "", + "POINT(2.294510 48.858250)"}, +}; + +// Shortcuts +auto TF = unordered_rows.at(0); +auto Mun = unordered_rows.at(1); +auto Eye = unordered_rows.at(2); +auto Lib = unordered_rows.at(3); +auto Eif = unordered_rows.at(4); + +std::vector> unordered_rows_small{ + std::vector{"", "POINT(7.835050 48.012670)"}, + std::vector{"", "POINT(7.852980 47.995570)"}, + std::vector{"", "POINT(-0.119570 51.503330)"}, + std::vector{"", "POINT(-74.044540 40.689250)"}, + std::vector{"", "POINT(2.294510 48.858250)"}}; + +// Shortcuts +auto sTF = unordered_rows_small.at(0); +auto sMun = unordered_rows_small.at(1); +auto sEye = unordered_rows_small.at(2); +auto sLib = unordered_rows_small.at(3); +auto sEif = unordered_rows_small.at(4); + +// in all calculations below, the factor 1000 is used to convert from km to m + +// distance from the object to itself should be zero +std::vector expectedDistSelf{"0"}; + +// helper functions +auto P = [](double x, double y) { return GeoPoint(y, x); }; + +auto expectedDist = [](const GeoPoint& p1, const GeoPoint& p2) { + auto p1_ = S2Point{S2LatLng::FromDegrees(p1.getLat(), p1.getLng())}; + auto p2_ = S2Point{S2LatLng::FromDegrees(p2.getLat(), p2.getLng())}; + + return std::to_string(static_cast(S2Earth::ToMeters(S1Angle(p1_, p2_)))); +}; + +// Places for testing +auto PUni = P(7.83505, 48.01267); +auto PMun = P(7.85298, 47.99557); +auto PEif = P(2.29451, 48.85825); +auto PEye = P(-0.11957, 51.50333); +auto PLib = P(-74.04454, 40.68925); +auto testPlaces = std::vector{PUni, PMun, PEif, PEye, PLib}; + +// distance from Uni Freiburg to Freiburger Münster is 2,33 km according to +// google maps +std::vector expectedDistUniMun{expectedDist(PUni, PMun)}; + +// distance from Uni Freiburg to Eiffel Tower is 419,32 km according to +// google maps +std::vector expectedDistUniEif{expectedDist(PUni, PEif)}; + +// distance from Minster Freiburg to eiffel tower is 421,09 km according to +// google maps +std::vector expectedDistMunEif{expectedDist(PMun, PEif)}; + +// distance from london eye to eiffel tower is 340,62 km according to +// google maps +std::vector expectedDistEyeEif{expectedDist(PEye, PEif)}; + +// distance from Uni Freiburg to London Eye is 690,18 km according to +// google maps +std::vector expectedDistUniEye{expectedDist(PUni, PEye)}; + +// distance from Minster Freiburg to London Eye is 692,39 km according to +// google maps +std::vector expectedDistMunEye{expectedDist(PMun, PEye)}; + +// distance from Uni Freiburg to Statue of Liberty is 6249,55 km according to +// google maps +std::vector expectedDistUniLib{expectedDist(PUni, PLib)}; + +// distance from Minster Freiburg to Statue of Liberty is 6251,58 km +// according to google maps +std::vector expectedDistMunLib{expectedDist(PMun, PLib)}; + +// distance from london eye to statue of liberty is 5575,08 km according to +// google maps +std::vector expectedDistEyeLib{expectedDist(PEye, PLib)}; + +// distance from eiffel tower to Statue of liberty is 5837,42 km according to +// google maps +std::vector expectedDistEifLib{expectedDist(PEif, PLib)}; + +std::vector> expectedMaxDist1_rows{ + mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf)}; + +std::vector> expectedMaxDist5000_rows{ + mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf)}; + +std::vector> expectedMaxDist500000_rows{ + mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(TF, Eif, expectedDistUniEif), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Mun, Eif, expectedDistMunEif), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Eye, Eif, expectedDistEyeEif), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(Eif, TF, expectedDistUniEif), + mergeToRow(Eif, Mun, expectedDistMunEif), + mergeToRow(Eif, Eye, expectedDistEyeEif)}; + +std::vector> expectedMaxDist1000000_rows{ + mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(TF, Eif, expectedDistUniEif), + mergeToRow(TF, Eye, expectedDistUniEye), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Mun, Eif, expectedDistMunEif), + mergeToRow(Mun, Eye, expectedDistMunEye), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Eye, Eif, expectedDistEyeEif), + mergeToRow(Eye, TF, expectedDistUniEye), + mergeToRow(Eye, Mun, expectedDistMunEye), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(Eif, TF, expectedDistUniEif), + mergeToRow(Eif, Mun, expectedDistMunEif), + mergeToRow(Eif, Eye, expectedDistEyeEif)}; + +std::vector> expectedMaxDist10000000_rows{ + mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(TF, Eif, expectedDistUniEif), + mergeToRow(TF, Eye, expectedDistUniEye), + mergeToRow(TF, Lib, expectedDistUniLib), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Mun, Eif, expectedDistMunEif), + mergeToRow(Mun, Eye, expectedDistMunEye), + mergeToRow(Mun, Lib, expectedDistMunLib), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Eye, Eif, expectedDistEyeEif), + mergeToRow(Eye, TF, expectedDistUniEye), + mergeToRow(Eye, Mun, expectedDistMunEye), + mergeToRow(Eye, Lib, expectedDistEyeLib), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Lib, TF, expectedDistUniLib), + mergeToRow(Lib, Mun, expectedDistMunLib), + mergeToRow(Lib, Eye, expectedDistEyeLib), + mergeToRow(Lib, Eif, expectedDistEifLib), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(Eif, TF, expectedDistUniEif), + mergeToRow(Eif, Mun, expectedDistMunEif), + mergeToRow(Eif, Eye, expectedDistEyeEif), + mergeToRow(Eif, Lib, expectedDistEifLib)}; + +std::vector> expectedMaxDist1_rows_small{ + mergeToRow(sTF, sTF, expectedDistSelf), + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), +}; + +std::vector> expectedMaxDist5000_rows_small{ + mergeToRow(sTF, sTF, expectedDistSelf), + mergeToRow(sTF, sMun, expectedDistUniMun), + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sTF, expectedDistUniMun), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf)}; + +std::vector> expectedMaxDist500000_rows_small{ + mergeToRow(sTF, sTF, expectedDistSelf), + mergeToRow(sTF, sMun, expectedDistUniMun), + mergeToRow(sTF, sEif, expectedDistUniEif), + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sTF, expectedDistUniMun), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sTF, expectedDistUniEif), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif)}; + +std::vector> expectedMaxDist1000000_rows_small{ + mergeToRow(sTF, sTF, expectedDistSelf), + mergeToRow(sTF, sMun, expectedDistUniMun), + mergeToRow(sTF, sEif, expectedDistUniEif), + mergeToRow(sTF, sEye, expectedDistUniEye), + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sTF, expectedDistUniMun), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sMun, sEye, expectedDistMunEye), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sEye, sTF, expectedDistUniEye), + mergeToRow(sEye, sMun, expectedDistMunEye), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sTF, expectedDistUniEif), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif)}; + +std::vector> expectedMaxDist10000000_rows_small{ + mergeToRow(sTF, sTF, expectedDistSelf), + mergeToRow(sTF, sMun, expectedDistUniMun), + mergeToRow(sTF, sEif, expectedDistUniEif), + mergeToRow(sTF, sEye, expectedDistUniEye), + mergeToRow(sTF, sLib, expectedDistUniLib), + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sTF, expectedDistUniMun), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sMun, sEye, expectedDistMunEye), + mergeToRow(sMun, sLib, expectedDistMunLib), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sEye, sTF, expectedDistUniEye), + mergeToRow(sEye, sMun, expectedDistMunEye), + mergeToRow(sEye, sLib, expectedDistEyeLib), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sLib, sTF, expectedDistUniLib), + mergeToRow(sLib, sMun, expectedDistMunLib), + mergeToRow(sLib, sEye, expectedDistEyeLib), + mergeToRow(sLib, sEif, expectedDistEifLib), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sTF, expectedDistUniEif), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif), + mergeToRow(sEif, sLib, expectedDistEifLib)}; + +std::vector> expectedMaxDist1_rows_diff{ + mergeToRow(TF, sTF, expectedDistSelf), + mergeToRow(Mun, sMun, expectedDistSelf), + mergeToRow(Eye, sEye, expectedDistSelf), + mergeToRow(Lib, sLib, expectedDistSelf), + mergeToRow(Eif, sEif, expectedDistSelf), +}; + +std::vector> expectedMaxDist5000_rows_diff{ + mergeToRow(TF, sTF, expectedDistSelf), + mergeToRow(TF, sMun, expectedDistUniMun), + mergeToRow(Mun, sMun, expectedDistSelf), + mergeToRow(Mun, sTF, expectedDistUniMun), + mergeToRow(Eye, sEye, expectedDistSelf), + mergeToRow(Lib, sLib, expectedDistSelf), + mergeToRow(Eif, sEif, expectedDistSelf)}; + +std::vector> expectedMaxDist500000_rows_diff{ + mergeToRow(TF, sTF, expectedDistSelf), + mergeToRow(TF, sMun, expectedDistUniMun), + mergeToRow(TF, sEif, expectedDistUniEif), + mergeToRow(Mun, sMun, expectedDistSelf), + mergeToRow(Mun, sTF, expectedDistUniMun), + mergeToRow(Mun, sEif, expectedDistMunEif), + mergeToRow(Eye, sEye, expectedDistSelf), + mergeToRow(Eye, sEif, expectedDistEyeEif), + mergeToRow(Lib, sLib, expectedDistSelf), + mergeToRow(Eif, sEif, expectedDistSelf), + mergeToRow(Eif, sTF, expectedDistUniEif), + mergeToRow(Eif, sMun, expectedDistMunEif), + mergeToRow(Eif, sEye, expectedDistEyeEif)}; + +std::vector> expectedMaxDist1000000_rows_diff{ + mergeToRow(TF, sTF, expectedDistSelf), + mergeToRow(TF, sMun, expectedDistUniMun), + mergeToRow(TF, sEif, expectedDistUniEif), + mergeToRow(TF, sEye, expectedDistUniEye), + mergeToRow(Mun, sMun, expectedDistSelf), + mergeToRow(Mun, sTF, expectedDistUniMun), + mergeToRow(Mun, sEif, expectedDistMunEif), + mergeToRow(Mun, sEye, expectedDistMunEye), + mergeToRow(Eye, sEye, expectedDistSelf), + mergeToRow(Eye, sEif, expectedDistEyeEif), + mergeToRow(Eye, sTF, expectedDistUniEye), + mergeToRow(Eye, sMun, expectedDistMunEye), + mergeToRow(Lib, sLib, expectedDistSelf), + mergeToRow(Eif, sEif, expectedDistSelf), + mergeToRow(Eif, sTF, expectedDistUniEif), + mergeToRow(Eif, sMun, expectedDistMunEif), + mergeToRow(Eif, sEye, expectedDistEyeEif)}; + +std::vector> expectedMaxDist10000000_rows_diff{ + mergeToRow(TF, sTF, expectedDistSelf), + mergeToRow(TF, sMun, expectedDistUniMun), + mergeToRow(TF, sEif, expectedDistUniEif), + mergeToRow(TF, sEye, expectedDistUniEye), + mergeToRow(TF, sLib, expectedDistUniLib), + mergeToRow(Mun, sMun, expectedDistSelf), + mergeToRow(Mun, sTF, expectedDistUniMun), + mergeToRow(Mun, sEif, expectedDistMunEif), + mergeToRow(Mun, sEye, expectedDistMunEye), + mergeToRow(Mun, sLib, expectedDistMunLib), + mergeToRow(Eye, sEye, expectedDistSelf), + mergeToRow(Eye, sEif, expectedDistEyeEif), + mergeToRow(Eye, sTF, expectedDistUniEye), + mergeToRow(Eye, sMun, expectedDistMunEye), + mergeToRow(Eye, sLib, expectedDistEyeLib), + mergeToRow(Lib, sLib, expectedDistSelf), + mergeToRow(Lib, sTF, expectedDistUniLib), + mergeToRow(Lib, sMun, expectedDistMunLib), + mergeToRow(Lib, sEye, expectedDistEyeLib), + mergeToRow(Lib, sEif, expectedDistEifLib), + mergeToRow(Eif, sEif, expectedDistSelf), + mergeToRow(Eif, sTF, expectedDistUniEif), + mergeToRow(Eif, sMun, expectedDistMunEif), + mergeToRow(Eif, sEye, expectedDistEyeEif), + mergeToRow(Eif, sLib, expectedDistEifLib)}; + +std::vector> expectedNearestNeighbors1{ + mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf)}; + +std::vector> expectedNearestNeighbors2{ + mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Eye, Eif, expectedDistEyeEif), + mergeToRow(Lib, Eye, expectedDistEyeLib), + mergeToRow(Eif, Eye, expectedDistEyeEif)}; + +std::vector> expectedNearestNeighbors2_400000{ + mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Eye, Eif, expectedDistEyeEif), + mergeToRow(Eif, Eye, expectedDistEyeEif)}; + +std::vector> expectedNearestNeighbors2_4000{ + mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(Mun, TF, expectedDistUniMun)}; + +std::vector> expectedNearestNeighbors2_40{ + mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf)}; + +std::vector> expectedNearestNeighbors3_500000{ + mergeToRow(TF, TF, expectedDistSelf), + mergeToRow(Mun, Mun, expectedDistSelf), + mergeToRow(Eye, Eye, expectedDistSelf), + mergeToRow(Lib, Lib, expectedDistSelf), + mergeToRow(Eif, Eif, expectedDistSelf), + mergeToRow(TF, Mun, expectedDistUniMun), + mergeToRow(Mun, TF, expectedDistUniMun), + mergeToRow(Mun, Eif, expectedDistMunEif), + mergeToRow(TF, Eif, expectedDistUniEif), + mergeToRow(Eye, Eif, expectedDistEyeEif), + mergeToRow(Eif, Eye, expectedDistEyeEif), + mergeToRow(Eif, TF, expectedDistUniEif)}; + +// test the compute result method on small examples +TEST_P(SpatialJoinParamTest, computeResultSmallDatasetLargeChildren) { + std::vector columnNames = { + "?name1", "?obj1", "?geo1", + "?point1", "?name2", "?obj2", + "?geo2", "?point2", "?distOfTheTwoObjectsAddedInternally"}; + buildAndTestSmallTestSetLargeChildren("", true, + expectedMaxDist1_rows, columnNames); + buildAndTestSmallTestSetLargeChildren("", false, + expectedMaxDist1_rows, columnNames); + buildAndTestSmallTestSetLargeChildren("", true, + expectedMaxDist5000_rows, columnNames); + buildAndTestSmallTestSetLargeChildren("", false, + expectedMaxDist5000_rows, columnNames); + buildAndTestSmallTestSetLargeChildren("", true, + expectedMaxDist500000_rows, + columnNames); + buildAndTestSmallTestSetLargeChildren("", + false, expectedMaxDist500000_rows, + columnNames); + buildAndTestSmallTestSetLargeChildren("", + true, expectedMaxDist1000000_rows, + columnNames); + buildAndTestSmallTestSetLargeChildren("", + false, expectedMaxDist1000000_rows, + columnNames); + buildAndTestSmallTestSetLargeChildren("", + true, expectedMaxDist10000000_rows, + columnNames); + buildAndTestSmallTestSetLargeChildren("", + false, expectedMaxDist10000000_rows, + columnNames); + buildAndTestSmallTestSetLargeChildren("", true, + expectedNearestNeighbors1, columnNames); + buildAndTestSmallTestSetLargeChildren("", true, + expectedNearestNeighbors2, columnNames); + buildAndTestSmallTestSetLargeChildren("", true, + expectedNearestNeighbors2_400000, + columnNames); + buildAndTestSmallTestSetLargeChildren("", true, + expectedNearestNeighbors2_4000, + columnNames); + buildAndTestSmallTestSetLargeChildren("", true, + expectedNearestNeighbors2_40, + columnNames); + buildAndTestSmallTestSetLargeChildren("", true, + expectedNearestNeighbors3_500000, + columnNames); +} + +TEST_P(SpatialJoinParamTest, computeResultSmallDatasetSmallChildren) { + std::vector columnNames{"?obj1", "?point1", "?obj2", "?point2", + "?distOfTheTwoObjectsAddedInternally"}; + buildAndTestSmallTestSetSmallChildren("", true, + expectedMaxDist1_rows_small, + columnNames); + buildAndTestSmallTestSetSmallChildren("", false, + expectedMaxDist1_rows_small, + columnNames); + buildAndTestSmallTestSetSmallChildren("", true, + expectedMaxDist5000_rows_small, + columnNames); + buildAndTestSmallTestSetSmallChildren("", false, + expectedMaxDist5000_rows_small, + columnNames); + buildAndTestSmallTestSetSmallChildren("", true, + expectedMaxDist500000_rows_small, + columnNames); + buildAndTestSmallTestSetSmallChildren("", + false, expectedMaxDist500000_rows_small, + columnNames); + buildAndTestSmallTestSetSmallChildren("", + true, expectedMaxDist1000000_rows_small, + columnNames); + buildAndTestSmallTestSetSmallChildren( + "", false, + expectedMaxDist1000000_rows_small, columnNames); + buildAndTestSmallTestSetSmallChildren( + "", true, + expectedMaxDist10000000_rows_small, columnNames); + buildAndTestSmallTestSetSmallChildren( + "", false, + expectedMaxDist10000000_rows_small, columnNames); +} + +TEST_P(SpatialJoinParamTest, computeResultSmallDatasetDifferentSizeChildren) { + std::vector columnNames{"?name1", + "?obj1", + "?geo1", + "?point1", + "?obj2", + "?point2", + "?distOfTheTwoObjectsAddedInternally"}; + buildAndTestSmallTestSetDiffSizeChildren("", true, + expectedMaxDist1_rows_diff, + columnNames, true); + buildAndTestSmallTestSetDiffSizeChildren("", false, + expectedMaxDist1_rows_diff, + columnNames, true); + buildAndTestSmallTestSetDiffSizeChildren("", true, + expectedMaxDist1_rows_diff, + columnNames, false); + buildAndTestSmallTestSetDiffSizeChildren("", false, + expectedMaxDist1_rows_diff, + columnNames, false); + buildAndTestSmallTestSetDiffSizeChildren("", + true, expectedMaxDist5000_rows_diff, + columnNames, true); + buildAndTestSmallTestSetDiffSizeChildren("", + false, expectedMaxDist5000_rows_diff, + columnNames, true); + buildAndTestSmallTestSetDiffSizeChildren("", + true, expectedMaxDist5000_rows_diff, + columnNames, false); + buildAndTestSmallTestSetDiffSizeChildren("", + false, expectedMaxDist5000_rows_diff, + columnNames, false); + buildAndTestSmallTestSetDiffSizeChildren( + "", true, expectedMaxDist500000_rows_diff, + columnNames, true); + buildAndTestSmallTestSetDiffSizeChildren( + "", false, expectedMaxDist500000_rows_diff, + columnNames, true); + buildAndTestSmallTestSetDiffSizeChildren( + "", true, expectedMaxDist500000_rows_diff, + columnNames, false); + buildAndTestSmallTestSetDiffSizeChildren( + "", false, expectedMaxDist500000_rows_diff, + columnNames, false); + buildAndTestSmallTestSetDiffSizeChildren( + "", true, + expectedMaxDist1000000_rows_diff, columnNames, true); + buildAndTestSmallTestSetDiffSizeChildren( + "", false, + expectedMaxDist1000000_rows_diff, columnNames, true); + buildAndTestSmallTestSetDiffSizeChildren( + "", true, + expectedMaxDist1000000_rows_diff, columnNames, false); + buildAndTestSmallTestSetDiffSizeChildren( + "", false, + expectedMaxDist1000000_rows_diff, columnNames, false); + buildAndTestSmallTestSetDiffSizeChildren( + "", true, + expectedMaxDist10000000_rows_diff, columnNames, true); + buildAndTestSmallTestSetDiffSizeChildren( + "", false, + expectedMaxDist10000000_rows_diff, columnNames, true); + buildAndTestSmallTestSetDiffSizeChildren( + "", true, + expectedMaxDist10000000_rows_diff, columnNames, false); + buildAndTestSmallTestSetDiffSizeChildren( + "", false, + expectedMaxDist10000000_rows_diff, columnNames, false); +} + +INSTANTIATE_TEST_SUITE_P(SpatialJoin, SpatialJoinParamTest, ::testing::Bool()); + +} // end of Namespace computeResultTest + +} // namespace diff --git a/test/engine/SpatialJoinTest.cpp b/test/engine/SpatialJoinTest.cpp index 4d1d54ee8..4de1de25e 100644 --- a/test/engine/SpatialJoinTest.cpp +++ b/test/engine/SpatialJoinTest.cpp @@ -15,6 +15,7 @@ #include "../util/IndexTestHelpers.h" #include "./../../src/global/ValueId.h" #include "./../../src/util/GeoSparqlHelpers.h" +#include "./SpatialJoinTestHelpers.h" #include "engine/ExportQueryExecutionTrees.h" #include "engine/IndexScan.h" #include "engine/Join.h" @@ -25,908 +26,7 @@ namespace { // anonymous namespace to avoid linker problems using namespace ad_utility::testing; - -auto makePointLiteral = [](std::string_view c1, std::string_view c2) { - return absl::StrCat(" \"POINT(", c1, " ", c2, ")\"^^<", GEO_WKT_LITERAL, ">"); -}; - -namespace localTestHelpers { - -// helper function to create a vector of strings from a result table -std::vector printTable(const QueryExecutionContext* qec, - const Result* table) { - std::vector output; - for (size_t i = 0; i < table->idTable().numRows(); i++) { - std::string line = ""; - for (size_t k = 0; k < table->idTable().numColumns(); k++) { - auto test = ExportQueryExecutionTrees::idToStringAndType( - qec->getIndex(), table->idTable().at(i, k), {}); - line += test.value().first; - line += " "; - } - output.push_back(line.substr(0, line.size() - 1)); // ignore last " " - } - return output; -} - -// this helper function reorders an input vector according to the variable to -// column map to make the string array match the order of the result, which -// should be tested (it uses a vector of vectors (the first vector is containing -// each column of the result, each column consist of a vector, where each entry -// is a row of this column)) -std::vector> orderColAccordingToVarColMap( - VariableToColumnMap varColMaps, - std::vector> columns, - std::vector columnNames) { - std::vector> result; - auto indVariableMap = copySortedByColumnIndex(varColMaps); - for (size_t i = 0; i < indVariableMap.size(); i++) { - for (size_t k = 0; k < columnNames.size(); k++) { - if (indVariableMap.at(i).first.name() == columnNames.at(k)) { - result.push_back(columns.at(k)); - break; - } - } - } - return result; -} - -// helper function to create a vector of strings representing rows, from a -// vector of strings representing columns. Please make sure, that the order of -// the columns is already matching the order of the result. If this is not the -// case call the function orderColAccordingToVarColMap -std::vector createRowVectorFromColumnVector( - std::vector> column_vector) { - std::vector result; - if (column_vector.size() > 0) { - for (size_t i = 0; i < column_vector.at(0).size(); i++) { - std::string str = ""; - for (size_t k = 0; k < column_vector.size(); k++) { - str += column_vector.at(k).at(i); - str += " "; - } - result.push_back(str.substr(0, str.size() - 1)); - } - } - return result; -} - -// create a small test dataset, which focuses on points as geometry objects. -// Note, that some of these objects have a polygon representation, but for -// testing purposes, they get represented as a point here. I took those -// points, such that it is obvious, which pair of objects should be included, -// when the maximum distance is x meters. Please note, that these datapoints -// are not copied from a real input file. Copying the query will therefore -// likely not result in the same results as here (also the names, coordinates, -// etc. might be different in the real datasets) -std::string createSmallDatasetWithPoints() { - auto addPoint = [](std::string& kg, std::string number, std::string name, - std::string point) { - kg += absl::StrCat(" ", name, " . . ", point, " ."); - }; - std::string kg2; - auto p = makePointLiteral; - addPoint(kg2, "1", "\"Uni Freiburg TF\"", p("7.83505", "48.01267")); - addPoint(kg2, "2", "\"Minster Freiburg\"", p("7.85298", "47.99557")); - addPoint(kg2, "3", "\"London Eye\"", p("-0.11957", "51.50333")); - addPoint(kg2, "4", "\"Statue of liberty\"", p("-74.04454", "40.68925")); - addPoint(kg2, "5", "\"eiffel tower\"", p("2.29451", "48.85825")); - - return kg2; -} - -QueryExecutionContext* buildTestQEC() { - std::string kg = localTestHelpers::createSmallDatasetWithPoints(); - ad_utility::MemorySize blocksizePermutations = 16_MB; - auto qec = getQec(kg, true, true, false, blocksizePermutations, false); - return qec; -} - -}; // namespace localTestHelpers - -namespace computeResultTest { - -std::shared_ptr buildIndexScan( - QueryExecutionContext* qec, std::array triple) { - TripleComponent subject{Variable{triple.at(0)}}; - TripleComponent object{Variable{triple.at(2)}}; - return ad_utility::makeExecutionTree( - qec, Permutation::Enum::PSO, SparqlTriple{subject, triple.at(1), object}); -} - -std::shared_ptr buildJoin( - QueryExecutionContext* qec, std::shared_ptr tree1, - std::shared_ptr tree2, Variable joinVariable) { - auto varCol1 = tree1->getVariableColumns(); - auto varCol2 = tree2->getVariableColumns(); - size_t col1 = varCol1[joinVariable].columnIndex_; - size_t col2 = varCol2[joinVariable].columnIndex_; - return ad_utility::makeExecutionTree(qec, tree1, tree2, col1, col2, - true); -} - -std::shared_ptr buildMediumChild( - QueryExecutionContext* qec, std::array triple1, - std::array triple2, std::array triple3, - std::string joinVariable1_, std::string joinVariable2_) { - Variable joinVariable1{joinVariable1_}; - Variable joinVariable2{joinVariable2_}; - auto scan1 = buildIndexScan(qec, triple1); - auto scan2 = buildIndexScan(qec, triple2); - auto scan3 = buildIndexScan(qec, triple3); - auto join = buildJoin(qec, scan1, scan2, joinVariable1); - return buildJoin(qec, join, scan3, joinVariable2); -} - -std::shared_ptr buildSmallChild( - QueryExecutionContext* qec, std::array triple1, - std::array triple2, std::string joinVariable_) { - Variable joinVariable{joinVariable_}; - auto scan1 = buildIndexScan(qec, triple1); - auto scan2 = buildIndexScan(qec, triple2); - return buildJoin(qec, scan1, scan2, joinVariable); -} - -class SpatialJoinParamTest : public ::testing::TestWithParam { - public: - void createAndTestSpatialJoin( - QueryExecutionContext* qec, SparqlTriple spatialJoinTriple, - std::shared_ptr leftChild, - std::shared_ptr rightChild, bool addLeftChildFirst, - std::vector> expectedOutputUnorderedRows, - std::vector columnNames) { - // this function is like transposing a matrix. An entry which has been - // stored at (i, k) is now stored at (k, i). The reason this is needed is - // the following: this function receives the input as a vector of vector of - // strings. Each vector contains a vector, which contains a row of the - // result. This row contains all columns. After swapping, each vector - // contains a vector, which contains all entries of one column. As now each - // of the vectors contain only one column, we can later order them according - // to the variable to column map and then compare the result. - auto swapColumns = [&](std::vector> toBeSwapped) { - std::vector> result; - bool firstIteration = true; - for (size_t i = 0; i < toBeSwapped.size(); i++) { - for (size_t k = 0; k < toBeSwapped.at(i).size(); k++) { - if (firstIteration) { - result.push_back(std::vector{toBeSwapped.at(i).at(k)}); - } else { - result.at(k).push_back(toBeSwapped.at(i).at(k)); - } - } - firstIteration = false; - } - return result; - }; - - std::shared_ptr spatialJoinOperation = - ad_utility::makeExecutionTree(qec, spatialJoinTriple, - std::nullopt, std::nullopt); - - // add first child - std::shared_ptr op = spatialJoinOperation->getRootOperation(); - SpatialJoin* spatialJoin = static_cast(op.get()); - auto firstChild = addLeftChildFirst ? leftChild : rightChild; - auto secondChild = addLeftChildFirst ? rightChild : leftChild; - Variable firstVariable = addLeftChildFirst - ? spatialJoinTriple.s_.getVariable() - : spatialJoinTriple.o_.getVariable(); - Variable secondVariable = addLeftChildFirst - ? spatialJoinTriple.o_.getVariable() - : spatialJoinTriple.s_.getVariable(); - - auto spJoin1 = spatialJoin->addChild(firstChild, firstVariable); - spatialJoin = static_cast(spJoin1.get()); - - // add second child - auto spJoin2 = spatialJoin->addChild(secondChild, secondVariable); - spatialJoin = static_cast(spJoin2.get()); - - // prepare expected output - // swap rows and columns to use the function orderColAccordingToVarColMap - auto expectedMaxDistCols = swapColumns(expectedOutputUnorderedRows); - auto expectedOutputOrdered = localTestHelpers::orderColAccordingToVarColMap( - spatialJoin->computeVariableToColumnMap(), expectedMaxDistCols, - columnNames); - auto expectedOutput = localTestHelpers::createRowVectorFromColumnVector( - expectedOutputOrdered); - - // Select algorithm - spatialJoin->selectAlgorithm(GetParam()); - - // At worst quadratic time - ASSERT_LE( - spatialJoin->getCostEstimate(), - std::pow(firstChild->getSizeEstimate() * secondChild->getSizeEstimate(), - 2)); - - auto res = spatialJoin->computeResult(false); - auto vec = localTestHelpers::printTable(qec, &res); - /* - for (size_t i = 0; i < vec.size(); ++i) { - EXPECT_STREQ(vec.at(i).c_str(), expectedOutput.at(i).c_str()); - }*/ - - EXPECT_THAT(vec, ::testing::UnorderedElementsAreArray(expectedOutput)); - } - - // build the test using the small dataset. Let the SpatialJoin operation be - // the last one (the left and right child are maximally large for this test - // query) the following Query will be simulated, the max distance will be - // different depending on the test: Select * where { - // ?obj1 ?name1 . - // ?obj1 ?geo1 . - // ?geo1 ?point1 - // ?obj2 ?name2 . - // ?obj2 ?geo2 . - // ?geo2 ?point2 - // ?point1 ?point2 . - // } - void buildAndTestSmallTestSetLargeChildren( - std::string specialPredicate, bool addLeftChildFirst, - std::vector> expectedOutput, - std::vector columnNames) { - auto qec = localTestHelpers::buildTestQEC(); - auto numTriples = qec->getIndex().numTriples().normal; - ASSERT_EQ(numTriples, 15); - // ===================== build the first child - // =============================== - auto leftChild = buildMediumChild( - qec, {"?obj1", std::string{""}, "?name1"}, - {"?obj1", std::string{""}, "?geo1"}, - {"?geo1", std::string{""}, "?point1"}, "?obj1", "?geo1"); - - // ======================= build the second child - // ============================ - auto rightChild = buildMediumChild( - qec, {"?obj2", std::string{""}, "?name2"}, - {"?obj2", std::string{""}, "?geo2"}, - {"?geo2", std::string{""}, "?point2"}, "?obj2", "?geo2"); - - createAndTestSpatialJoin( - qec, - SparqlTriple{TripleComponent{Variable{"?point1"}}, specialPredicate, - TripleComponent{Variable{"?point2"}}}, - leftChild, rightChild, addLeftChildFirst, expectedOutput, columnNames); - } - - // build the test using the small dataset. Let the SpatialJoin operation. - // The following Query will be simulated, the max distance will be different - // depending on the test: - // Select * where { - // ?geo1 ?point1 - // ?geo2 ?point2 - // ?point1 ?point2 . - // } - void buildAndTestSmallTestSetSmallChildren( - std::string specialPredicate, bool addLeftChildFirst, - std::vector> expectedOutput, - std::vector columnNames) { - auto qec = localTestHelpers::buildTestQEC(); - auto numTriples = qec->getIndex().numTriples().normal; - ASSERT_EQ(numTriples, 15); - // ====================== build inputs =================================== - TripleComponent point1{Variable{"?point1"}}; - TripleComponent point2{Variable{"?point2"}}; - auto leftChild = - buildIndexScan(qec, {"?obj1", std::string{""}, "?point1"}); - auto rightChild = - buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); - - createAndTestSpatialJoin( - qec, SparqlTriple{point1, specialPredicate, point2}, leftChild, - rightChild, addLeftChildFirst, expectedOutput, columnNames); - } - - // build the test using the small dataset. Let the SpatialJoin operation be - // the last one. the following Query will be simulated, the max distance will - // be different depending on the test: Select * where { - // ?obj1 ?name1 . - // ?obj1 ?geo1 . - // ?geo1 ?point1 - // ?geo2 ?point2 - // ?point1 ?point2 . - // } - void buildAndTestSmallTestSetDiffSizeChildren( - std::string specialPredicate, bool addLeftChildFirst, - std::vector> expectedOutput, - std::vector columnNames, bool bigChildLeft) { - auto qec = localTestHelpers::buildTestQEC(); - auto numTriples = qec->getIndex().numTriples().normal; - ASSERT_EQ(numTriples, 15); - // ========================= build big child - // ================================= - auto bigChild = buildMediumChild( - qec, {"?obj1", std::string{""}, "?name1"}, - {"?obj1", std::string{""}, "?geo1"}, - {"?geo1", std::string{""}, "?point1"}, "?obj1", "?geo1"); - - // ========================= build small child - // =============================== - TripleComponent point2{Variable{"?point2"}}; - auto smallChild = - buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); - - auto firstChild = bigChildLeft ? bigChild : smallChild; - auto secondChild = bigChildLeft ? smallChild : bigChild; - auto firstVariable = - bigChildLeft ? TripleComponent{Variable{"?point1"}} : point2; - auto secondVariable = - bigChildLeft ? point2 : TripleComponent{Variable{"?point1"}}; - - createAndTestSpatialJoin( - qec, SparqlTriple{firstVariable, specialPredicate, secondVariable}, - firstChild, secondChild, addLeftChildFirst, expectedOutput, - columnNames); - } - - protected: - bool useBaselineAlgorithm_; -}; - -std::vector mergeToRow(std::vector part1, - std::vector part2, - std::vector part3) { - std::vector result = part1; - for (size_t i = 0; i < part2.size(); i++) { - result.push_back(part2.at(i)); - } - for (size_t i = 0; i < part3.size(); i++) { - result.push_back(part3.at(i)); - } - return result; -}; - -std::vector> unordered_rows{ - std::vector{"\"Uni Freiburg TF\"", "", "", - "POINT(7.835050 48.012670)"}, - std::vector{"\"Minster Freiburg\"", "", "", - "POINT(7.852980 47.995570)"}, - std::vector{"\"London Eye\"", "", "", - "POINT(-0.119570 51.503330)"}, - std::vector{"\"Statue of liberty\"", "", "", - "POINT(-74.044540 40.689250)"}, - std::vector{"\"eiffel tower\"", "", "", - "POINT(2.294510 48.858250)"}, -}; - -// Shortcuts -auto TF = unordered_rows.at(0); -auto Mun = unordered_rows.at(1); -auto Eye = unordered_rows.at(2); -auto Lib = unordered_rows.at(3); -auto Eif = unordered_rows.at(4); - -std::vector> unordered_rows_small{ - std::vector{"", "POINT(7.835050 48.012670)"}, - std::vector{"", "POINT(7.852980 47.995570)"}, - std::vector{"", "POINT(-0.119570 51.503330)"}, - std::vector{"", "POINT(-74.044540 40.689250)"}, - std::vector{"", "POINT(2.294510 48.858250)"}}; - -// Shortcuts -auto sTF = unordered_rows_small.at(0); -auto sMun = unordered_rows_small.at(1); -auto sEye = unordered_rows_small.at(2); -auto sLib = unordered_rows_small.at(3); -auto sEif = unordered_rows_small.at(4); - -// in all calculations below, the factor 1000 is used to convert from km to m - -// distance from the object to itself should be zero -std::vector expectedDistSelf{"0"}; - -// helper functions -auto P = [](double x, double y) { return GeoPoint(y, x); }; - -auto expectedDist = [](const GeoPoint& p1, const GeoPoint& p2) { - auto p1_ = S2Point{S2LatLng::FromDegrees(p1.getLat(), p1.getLng())}; - auto p2_ = S2Point{S2LatLng::FromDegrees(p2.getLat(), p2.getLng())}; - - return std::to_string(static_cast(S2Earth::ToMeters(S1Angle(p1_, p2_)))); -}; - -// Places for testing -auto PUni = P(7.83505, 48.01267); -auto PMun = P(7.85298, 47.99557); -auto PEif = P(2.29451, 48.85825); -auto PEye = P(-0.11957, 51.50333); -auto PLib = P(-74.04454, 40.68925); -auto testPlaces = std::vector{PUni, PMun, PEif, PEye, PLib}; - -// distance from Uni Freiburg to Freiburger Münster is 2,33 km according to -// google maps -std::vector expectedDistUniMun{expectedDist(PUni, PMun)}; - -// distance from Uni Freiburg to Eiffel Tower is 419,32 km according to -// google maps -std::vector expectedDistUniEif{expectedDist(PUni, PEif)}; - -// distance from Minster Freiburg to eiffel tower is 421,09 km according to -// google maps -std::vector expectedDistMunEif{expectedDist(PMun, PEif)}; - -// distance from london eye to eiffel tower is 340,62 km according to -// google maps -std::vector expectedDistEyeEif{expectedDist(PEye, PEif)}; - -// distance from Uni Freiburg to London Eye is 690,18 km according to -// google maps -std::vector expectedDistUniEye{expectedDist(PUni, PEye)}; - -// distance from Minster Freiburg to London Eye is 692,39 km according to -// google maps -std::vector expectedDistMunEye{expectedDist(PMun, PEye)}; - -// distance from Uni Freiburg to Statue of Liberty is 6249,55 km according to -// google maps -std::vector expectedDistUniLib{expectedDist(PUni, PLib)}; - -// distance from Minster Freiburg to Statue of Liberty is 6251,58 km -// according to google maps -std::vector expectedDistMunLib{expectedDist(PMun, PLib)}; - -// distance from london eye to statue of liberty is 5575,08 km according to -// google maps -std::vector expectedDistEyeLib{expectedDist(PEye, PLib)}; - -// distance from eiffel tower to Statue of liberty is 5837,42 km according to -// google maps -std::vector expectedDistEifLib{expectedDist(PEif, PLib)}; - -std::vector> expectedMaxDist1_rows{ - mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf)}; - -std::vector> expectedMaxDist5000_rows{ - mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf)}; - -std::vector> expectedMaxDist500000_rows{ - mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(TF, Eif, expectedDistUniEif), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Mun, Eif, expectedDistMunEif), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Eye, Eif, expectedDistEyeEif), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf), - mergeToRow(Eif, TF, expectedDistUniEif), - mergeToRow(Eif, Mun, expectedDistMunEif), - mergeToRow(Eif, Eye, expectedDistEyeEif)}; - -std::vector> expectedMaxDist1000000_rows{ - mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(TF, Eif, expectedDistUniEif), - mergeToRow(TF, Eye, expectedDistUniEye), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Mun, Eif, expectedDistMunEif), - mergeToRow(Mun, Eye, expectedDistMunEye), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Eye, Eif, expectedDistEyeEif), - mergeToRow(Eye, TF, expectedDistUniEye), - mergeToRow(Eye, Mun, expectedDistMunEye), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf), - mergeToRow(Eif, TF, expectedDistUniEif), - mergeToRow(Eif, Mun, expectedDistMunEif), - mergeToRow(Eif, Eye, expectedDistEyeEif)}; - -std::vector> expectedMaxDist10000000_rows{ - mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(TF, Eif, expectedDistUniEif), - mergeToRow(TF, Eye, expectedDistUniEye), - mergeToRow(TF, Lib, expectedDistUniLib), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Mun, Eif, expectedDistMunEif), - mergeToRow(Mun, Eye, expectedDistMunEye), - mergeToRow(Mun, Lib, expectedDistMunLib), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Eye, Eif, expectedDistEyeEif), - mergeToRow(Eye, TF, expectedDistUniEye), - mergeToRow(Eye, Mun, expectedDistMunEye), - mergeToRow(Eye, Lib, expectedDistEyeLib), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Lib, TF, expectedDistUniLib), - mergeToRow(Lib, Mun, expectedDistMunLib), - mergeToRow(Lib, Eye, expectedDistEyeLib), - mergeToRow(Lib, Eif, expectedDistEifLib), - mergeToRow(Eif, Eif, expectedDistSelf), - mergeToRow(Eif, TF, expectedDistUniEif), - mergeToRow(Eif, Mun, expectedDistMunEif), - mergeToRow(Eif, Eye, expectedDistEyeEif), - mergeToRow(Eif, Lib, expectedDistEifLib)}; - -std::vector> expectedMaxDist1_rows_small{ - mergeToRow(sTF, sTF, expectedDistSelf), - mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf), -}; - -std::vector> expectedMaxDist5000_rows_small{ - mergeToRow(sTF, sTF, expectedDistSelf), - mergeToRow(sTF, sMun, expectedDistUniMun), - mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sMun, sTF, expectedDistUniMun), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf)}; - -std::vector> expectedMaxDist500000_rows_small{ - mergeToRow(sTF, sTF, expectedDistSelf), - mergeToRow(sTF, sMun, expectedDistUniMun), - mergeToRow(sTF, sEif, expectedDistUniEif), - mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sMun, sTF, expectedDistUniMun), - mergeToRow(sMun, sEif, expectedDistMunEif), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sEye, sEif, expectedDistEyeEif), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf), - mergeToRow(sEif, sTF, expectedDistUniEif), - mergeToRow(sEif, sMun, expectedDistMunEif), - mergeToRow(sEif, sEye, expectedDistEyeEif)}; - -std::vector> expectedMaxDist1000000_rows_small{ - mergeToRow(sTF, sTF, expectedDistSelf), - mergeToRow(sTF, sMun, expectedDistUniMun), - mergeToRow(sTF, sEif, expectedDistUniEif), - mergeToRow(sTF, sEye, expectedDistUniEye), - mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sMun, sTF, expectedDistUniMun), - mergeToRow(sMun, sEif, expectedDistMunEif), - mergeToRow(sMun, sEye, expectedDistMunEye), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sEye, sEif, expectedDistEyeEif), - mergeToRow(sEye, sTF, expectedDistUniEye), - mergeToRow(sEye, sMun, expectedDistMunEye), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sEif, sEif, expectedDistSelf), - mergeToRow(sEif, sTF, expectedDistUniEif), - mergeToRow(sEif, sMun, expectedDistMunEif), - mergeToRow(sEif, sEye, expectedDistEyeEif)}; - -std::vector> expectedMaxDist10000000_rows_small{ - mergeToRow(sTF, sTF, expectedDistSelf), - mergeToRow(sTF, sMun, expectedDistUniMun), - mergeToRow(sTF, sEif, expectedDistUniEif), - mergeToRow(sTF, sEye, expectedDistUniEye), - mergeToRow(sTF, sLib, expectedDistUniLib), - mergeToRow(sMun, sMun, expectedDistSelf), - mergeToRow(sMun, sTF, expectedDistUniMun), - mergeToRow(sMun, sEif, expectedDistMunEif), - mergeToRow(sMun, sEye, expectedDistMunEye), - mergeToRow(sMun, sLib, expectedDistMunLib), - mergeToRow(sEye, sEye, expectedDistSelf), - mergeToRow(sEye, sEif, expectedDistEyeEif), - mergeToRow(sEye, sTF, expectedDistUniEye), - mergeToRow(sEye, sMun, expectedDistMunEye), - mergeToRow(sEye, sLib, expectedDistEyeLib), - mergeToRow(sLib, sLib, expectedDistSelf), - mergeToRow(sLib, sTF, expectedDistUniLib), - mergeToRow(sLib, sMun, expectedDistMunLib), - mergeToRow(sLib, sEye, expectedDistEyeLib), - mergeToRow(sLib, sEif, expectedDistEifLib), - mergeToRow(sEif, sEif, expectedDistSelf), - mergeToRow(sEif, sTF, expectedDistUniEif), - mergeToRow(sEif, sMun, expectedDistMunEif), - mergeToRow(sEif, sEye, expectedDistEyeEif), - mergeToRow(sEif, sLib, expectedDistEifLib)}; - -std::vector> expectedMaxDist1_rows_diff{ - mergeToRow(TF, sTF, expectedDistSelf), - mergeToRow(Mun, sMun, expectedDistSelf), - mergeToRow(Eye, sEye, expectedDistSelf), - mergeToRow(Lib, sLib, expectedDistSelf), - mergeToRow(Eif, sEif, expectedDistSelf), -}; - -std::vector> expectedMaxDist5000_rows_diff{ - mergeToRow(TF, sTF, expectedDistSelf), - mergeToRow(TF, sMun, expectedDistUniMun), - mergeToRow(Mun, sMun, expectedDistSelf), - mergeToRow(Mun, sTF, expectedDistUniMun), - mergeToRow(Eye, sEye, expectedDistSelf), - mergeToRow(Lib, sLib, expectedDistSelf), - mergeToRow(Eif, sEif, expectedDistSelf)}; - -std::vector> expectedMaxDist500000_rows_diff{ - mergeToRow(TF, sTF, expectedDistSelf), - mergeToRow(TF, sMun, expectedDistUniMun), - mergeToRow(TF, sEif, expectedDistUniEif), - mergeToRow(Mun, sMun, expectedDistSelf), - mergeToRow(Mun, sTF, expectedDistUniMun), - mergeToRow(Mun, sEif, expectedDistMunEif), - mergeToRow(Eye, sEye, expectedDistSelf), - mergeToRow(Eye, sEif, expectedDistEyeEif), - mergeToRow(Lib, sLib, expectedDistSelf), - mergeToRow(Eif, sEif, expectedDistSelf), - mergeToRow(Eif, sTF, expectedDistUniEif), - mergeToRow(Eif, sMun, expectedDistMunEif), - mergeToRow(Eif, sEye, expectedDistEyeEif)}; - -std::vector> expectedMaxDist1000000_rows_diff{ - mergeToRow(TF, sTF, expectedDistSelf), - mergeToRow(TF, sMun, expectedDistUniMun), - mergeToRow(TF, sEif, expectedDistUniEif), - mergeToRow(TF, sEye, expectedDistUniEye), - mergeToRow(Mun, sMun, expectedDistSelf), - mergeToRow(Mun, sTF, expectedDistUniMun), - mergeToRow(Mun, sEif, expectedDistMunEif), - mergeToRow(Mun, sEye, expectedDistMunEye), - mergeToRow(Eye, sEye, expectedDistSelf), - mergeToRow(Eye, sEif, expectedDistEyeEif), - mergeToRow(Eye, sTF, expectedDistUniEye), - mergeToRow(Eye, sMun, expectedDistMunEye), - mergeToRow(Lib, sLib, expectedDistSelf), - mergeToRow(Eif, sEif, expectedDistSelf), - mergeToRow(Eif, sTF, expectedDistUniEif), - mergeToRow(Eif, sMun, expectedDistMunEif), - mergeToRow(Eif, sEye, expectedDistEyeEif)}; - -std::vector> expectedMaxDist10000000_rows_diff{ - mergeToRow(TF, sTF, expectedDistSelf), - mergeToRow(TF, sMun, expectedDistUniMun), - mergeToRow(TF, sEif, expectedDistUniEif), - mergeToRow(TF, sEye, expectedDistUniEye), - mergeToRow(TF, sLib, expectedDistUniLib), - mergeToRow(Mun, sMun, expectedDistSelf), - mergeToRow(Mun, sTF, expectedDistUniMun), - mergeToRow(Mun, sEif, expectedDistMunEif), - mergeToRow(Mun, sEye, expectedDistMunEye), - mergeToRow(Mun, sLib, expectedDistMunLib), - mergeToRow(Eye, sEye, expectedDistSelf), - mergeToRow(Eye, sEif, expectedDistEyeEif), - mergeToRow(Eye, sTF, expectedDistUniEye), - mergeToRow(Eye, sMun, expectedDistMunEye), - mergeToRow(Eye, sLib, expectedDistEyeLib), - mergeToRow(Lib, sLib, expectedDistSelf), - mergeToRow(Lib, sTF, expectedDistUniLib), - mergeToRow(Lib, sMun, expectedDistMunLib), - mergeToRow(Lib, sEye, expectedDistEyeLib), - mergeToRow(Lib, sEif, expectedDistEifLib), - mergeToRow(Eif, sEif, expectedDistSelf), - mergeToRow(Eif, sTF, expectedDistUniEif), - mergeToRow(Eif, sMun, expectedDistMunEif), - mergeToRow(Eif, sEye, expectedDistEyeEif), - mergeToRow(Eif, sLib, expectedDistEifLib)}; - -std::vector> expectedNearestNeighbors1{ - mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf)}; - -std::vector> expectedNearestNeighbors2{ - mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Eye, Eif, expectedDistEyeEif), - mergeToRow(Lib, Eye, expectedDistEyeLib), - mergeToRow(Eif, Eye, expectedDistEyeEif)}; - -std::vector> expectedNearestNeighbors2_400000{ - mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Eye, Eif, expectedDistEyeEif), - mergeToRow(Eif, Eye, expectedDistEyeEif)}; - -std::vector> expectedNearestNeighbors2_4000{ - mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(Mun, TF, expectedDistUniMun)}; - -std::vector> expectedNearestNeighbors2_40{ - mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf)}; - -std::vector> expectedNearestNeighbors3_500000{ - mergeToRow(TF, TF, expectedDistSelf), - mergeToRow(Mun, Mun, expectedDistSelf), - mergeToRow(Eye, Eye, expectedDistSelf), - mergeToRow(Lib, Lib, expectedDistSelf), - mergeToRow(Eif, Eif, expectedDistSelf), - mergeToRow(TF, Mun, expectedDistUniMun), - mergeToRow(Mun, TF, expectedDistUniMun), - mergeToRow(Mun, Eif, expectedDistMunEif), - mergeToRow(TF, Eif, expectedDistUniEif), - mergeToRow(Eye, Eif, expectedDistEyeEif), - mergeToRow(Eif, Eye, expectedDistEyeEif), - mergeToRow(Eif, TF, expectedDistUniEif)}; - -// test the compute result method on small examples -TEST_P(SpatialJoinParamTest, computeResultSmallDatasetLargeChildren) { - std::vector columnNames = { - "?name1", "?obj1", "?geo1", - "?point1", "?name2", "?obj2", - "?geo2", "?point2", "?distOfTheTwoObjectsAddedInternally"}; - buildAndTestSmallTestSetLargeChildren("", true, - expectedMaxDist1_rows, columnNames); - buildAndTestSmallTestSetLargeChildren("", false, - expectedMaxDist1_rows, columnNames); - buildAndTestSmallTestSetLargeChildren("", true, - expectedMaxDist5000_rows, columnNames); - buildAndTestSmallTestSetLargeChildren("", false, - expectedMaxDist5000_rows, columnNames); - buildAndTestSmallTestSetLargeChildren("", true, - expectedMaxDist500000_rows, - columnNames); - buildAndTestSmallTestSetLargeChildren("", - false, expectedMaxDist500000_rows, - columnNames); - buildAndTestSmallTestSetLargeChildren("", - true, expectedMaxDist1000000_rows, - columnNames); - buildAndTestSmallTestSetLargeChildren("", - false, expectedMaxDist1000000_rows, - columnNames); - buildAndTestSmallTestSetLargeChildren("", - true, expectedMaxDist10000000_rows, - columnNames); - buildAndTestSmallTestSetLargeChildren("", - false, expectedMaxDist10000000_rows, - columnNames); - buildAndTestSmallTestSetLargeChildren("", true, - expectedNearestNeighbors1, columnNames); - buildAndTestSmallTestSetLargeChildren("", true, - expectedNearestNeighbors2, columnNames); - buildAndTestSmallTestSetLargeChildren("", true, - expectedNearestNeighbors2_400000, - columnNames); - buildAndTestSmallTestSetLargeChildren("", true, - expectedNearestNeighbors2_4000, - columnNames); - buildAndTestSmallTestSetLargeChildren("", true, - expectedNearestNeighbors2_40, - columnNames); - buildAndTestSmallTestSetLargeChildren("", true, - expectedNearestNeighbors3_500000, - columnNames); -} - -TEST_P(SpatialJoinParamTest, computeResultSmallDatasetSmallChildren) { - std::vector columnNames{"?obj1", "?point1", "?obj2", "?point2", - "?distOfTheTwoObjectsAddedInternally"}; - buildAndTestSmallTestSetSmallChildren("", true, - expectedMaxDist1_rows_small, - columnNames); - buildAndTestSmallTestSetSmallChildren("", false, - expectedMaxDist1_rows_small, - columnNames); - buildAndTestSmallTestSetSmallChildren("", true, - expectedMaxDist5000_rows_small, - columnNames); - buildAndTestSmallTestSetSmallChildren("", false, - expectedMaxDist5000_rows_small, - columnNames); - buildAndTestSmallTestSetSmallChildren("", true, - expectedMaxDist500000_rows_small, - columnNames); - buildAndTestSmallTestSetSmallChildren("", - false, expectedMaxDist500000_rows_small, - columnNames); - buildAndTestSmallTestSetSmallChildren("", - true, expectedMaxDist1000000_rows_small, - columnNames); - buildAndTestSmallTestSetSmallChildren( - "", false, - expectedMaxDist1000000_rows_small, columnNames); - buildAndTestSmallTestSetSmallChildren( - "", true, - expectedMaxDist10000000_rows_small, columnNames); - buildAndTestSmallTestSetSmallChildren( - "", false, - expectedMaxDist10000000_rows_small, columnNames); -} - -TEST_P(SpatialJoinParamTest, computeResultSmallDatasetDifferentSizeChildren) { - std::vector columnNames{"?name1", - "?obj1", - "?geo1", - "?point1", - "?obj2", - "?point2", - "?distOfTheTwoObjectsAddedInternally"}; - buildAndTestSmallTestSetDiffSizeChildren("", true, - expectedMaxDist1_rows_diff, - columnNames, true); - buildAndTestSmallTestSetDiffSizeChildren("", false, - expectedMaxDist1_rows_diff, - columnNames, true); - buildAndTestSmallTestSetDiffSizeChildren("", true, - expectedMaxDist1_rows_diff, - columnNames, false); - buildAndTestSmallTestSetDiffSizeChildren("", false, - expectedMaxDist1_rows_diff, - columnNames, false); - buildAndTestSmallTestSetDiffSizeChildren("", - true, expectedMaxDist5000_rows_diff, - columnNames, true); - buildAndTestSmallTestSetDiffSizeChildren("", - false, expectedMaxDist5000_rows_diff, - columnNames, true); - buildAndTestSmallTestSetDiffSizeChildren("", - true, expectedMaxDist5000_rows_diff, - columnNames, false); - buildAndTestSmallTestSetDiffSizeChildren("", - false, expectedMaxDist5000_rows_diff, - columnNames, false); - buildAndTestSmallTestSetDiffSizeChildren( - "", true, expectedMaxDist500000_rows_diff, - columnNames, true); - buildAndTestSmallTestSetDiffSizeChildren( - "", false, expectedMaxDist500000_rows_diff, - columnNames, true); - buildAndTestSmallTestSetDiffSizeChildren( - "", true, expectedMaxDist500000_rows_diff, - columnNames, false); - buildAndTestSmallTestSetDiffSizeChildren( - "", false, expectedMaxDist500000_rows_diff, - columnNames, false); - buildAndTestSmallTestSetDiffSizeChildren( - "", true, - expectedMaxDist1000000_rows_diff, columnNames, true); - buildAndTestSmallTestSetDiffSizeChildren( - "", false, - expectedMaxDist1000000_rows_diff, columnNames, true); - buildAndTestSmallTestSetDiffSizeChildren( - "", true, - expectedMaxDist1000000_rows_diff, columnNames, false); - buildAndTestSmallTestSetDiffSizeChildren( - "", false, - expectedMaxDist1000000_rows_diff, columnNames, false); - buildAndTestSmallTestSetDiffSizeChildren( - "", true, - expectedMaxDist10000000_rows_diff, columnNames, true); - buildAndTestSmallTestSetDiffSizeChildren( - "", false, - expectedMaxDist10000000_rows_diff, columnNames, true); - buildAndTestSmallTestSetDiffSizeChildren( - "", true, - expectedMaxDist10000000_rows_diff, columnNames, false); - buildAndTestSmallTestSetDiffSizeChildren( - "", false, - expectedMaxDist10000000_rows_diff, columnNames, false); -} - -INSTANTIATE_TEST_SUITE_P(SpatialJoin, SpatialJoinParamTest, ::testing::Bool()); - -} // end of Namespace computeResultTest +using namespace SpatialJoinTestHelpers; namespace maxDistanceParsingTest { @@ -1031,16 +131,16 @@ void testAddChild(bool addLeftChildFirst) { } }; - auto qec = localTestHelpers::buildTestQEC(); + auto qec = buildTestQEC(); auto numTriples = qec->getIndex().numTriples().normal; ASSERT_EQ(numTriples, 15); // ====================== build inputs =================================== TripleComponent point1{Variable{"?point1"}}; TripleComponent point2{Variable{"?point2"}}; - auto leftChild = computeResultTest::buildIndexScan( - qec, {"?obj1", std::string{""}, "?point1"}); - auto rightChild = computeResultTest::buildIndexScan( - qec, {"?obj2", std::string{""}, "?point2"}); + auto leftChild = + buildIndexScan(qec, {"?obj1", std::string{""}, "?point1"}); + auto rightChild = + buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); SparqlTriple triple{point1, "", point2}; std::shared_ptr spatialJoinOperation = @@ -1087,16 +187,16 @@ TEST(SpatialJoin, addChild) { } TEST(SpatialJoin, isConstructed) { - auto qec = localTestHelpers::buildTestQEC(); + auto qec = buildTestQEC(); auto numTriples = qec->getIndex().numTriples().normal; ASSERT_EQ(numTriples, 15); // ====================== build inputs =================================== TripleComponent point1{Variable{"?point1"}}; TripleComponent point2{Variable{"?point2"}}; - auto leftChild = computeResultTest::buildIndexScan( - qec, {"?obj1", std::string{""}, "?point1"}); - auto rightChild = computeResultTest::buildIndexScan( - qec, {"?obj2", std::string{""}, "?point2"}); + auto leftChild = + buildIndexScan(qec, {"?obj1", std::string{""}, "?point1"}); + auto rightChild = + buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); SparqlTriple triple{point1, "", point2}; std::shared_ptr spatialJoinOperation = @@ -1120,16 +220,16 @@ TEST(SpatialJoin, isConstructed) { } TEST(SpatialJoin, getChildren) { - auto qec = localTestHelpers::buildTestQEC(); + auto qec = buildTestQEC(); auto numTriples = qec->getIndex().numTriples().normal; ASSERT_EQ(numTriples, 15); // ====================== build inputs =================================== TripleComponent point1{Variable{"?point1"}}; TripleComponent point2{Variable{"?point2"}}; - auto leftChild = computeResultTest::buildIndexScan( - qec, {"?obj1", std::string{""}, "?point1"}); - auto rightChild = computeResultTest::buildIndexScan( - qec, {"?obj2", std::string{""}, "?point2"}); + auto leftChild = + buildIndexScan(qec, {"?obj1", std::string{""}, "?point1"}); + auto rightChild = + buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); SparqlTriple triple{point1, "", point2}; std::shared_ptr spatialJoinOperation = @@ -1196,14 +296,12 @@ void testGetResultWidthOrVariableToColumnMap(bool leftSideBigChild, std::string geo = absl::StrCat("?geo", numberOfChild); std::string point = absl::StrCat("?point", numberOfChild); if (getBigChild) { - return computeResultTest::buildMediumChild( - qec, {obj, std::string{""}, name}, - {obj, std::string{""}, geo}, - {geo, std::string{""}, point}, obj, geo); + return buildMediumChild(qec, {obj, std::string{""}, name}, + {obj, std::string{""}, geo}, + {geo, std::string{""}, point}, obj, geo); } else { - return computeResultTest::buildSmallChild( - qec, {obj, std::string{""}, geo}, - {geo, std::string{""}, point}, geo); + return buildSmallChild(qec, {obj, std::string{""}, geo}, + {geo, std::string{""}, point}, geo); } }; auto addExpectedColumns = @@ -1222,7 +320,7 @@ void testGetResultWidthOrVariableToColumnMap(bool leftSideBigChild, return expectedColumns; }; - auto qec = localTestHelpers::buildTestQEC(); + auto qec = buildTestQEC(); auto numTriples = qec->getIndex().numTriples().normal; ASSERT_EQ(numTriples, 15); @@ -1339,12 +437,12 @@ void testKnownEmptyResult(bool leftSideEmptyChild, bool rightSideEmptyChild, auto getChild = [](QueryExecutionContext* qec, bool emptyChild) { std::string predicate = emptyChild ? "" : ""; - return computeResultTest::buildSmallChild( - qec, {"?obj1", predicate, "?geo1"}, - {"?geo1", std::string{""}, "?point1"}, "?geo1"); + return buildSmallChild(qec, {"?obj1", predicate, "?geo1"}, + {"?geo1", std::string{""}, "?point1"}, + "?geo1"); }; - auto qec = localTestHelpers::buildTestQEC(); + auto qec = buildTestQEC(); auto numTriples = qec->getIndex().numTriples().normal; ASSERT_EQ(numTriples, 15); @@ -1398,7 +496,7 @@ TEST(SpatialJoin, knownEmpyResult) { namespace resultSortedOn { TEST(SpatialJoin, resultSortedOn) { - std::string kg = localTestHelpers::createSmallDatasetWithPoints(); + std::string kg = createSmallDatasetWithPoints(); ad_utility::MemorySize blocksizePermutations = 16_MB; auto qec = getQec(kg, true, true, false, blocksizePermutations, false); @@ -1411,10 +509,8 @@ TEST(SpatialJoin, resultSortedOn) { TripleComponent obj1{Variable{"?point1"}}; TripleComponent obj2{Variable{"?point2"}}; - auto leftChild = computeResultTest::buildIndexScan( - qec, {"?geometry1", "", "?point1"}); - auto rightChild = computeResultTest::buildIndexScan( - qec, {"?geometry2", "", "?point2"}); + auto leftChild = buildIndexScan(qec, {"?geometry1", "", "?point1"}); + auto rightChild = buildIndexScan(qec, {"?geometry2", "", "?point2"}); std::shared_ptr spatialJoinOperation = ad_utility::makeExecutionTree(qec, spatialJoinTriple, @@ -1457,17 +553,17 @@ TEST(SpatialJoin, getDescriptor) { } TEST(SpatialJoin, getCacheKeyImpl) { - auto qec = localTestHelpers::buildTestQEC(); + auto qec = buildTestQEC(); auto numTriples = qec->getIndex().numTriples().normal; ASSERT_EQ(numTriples, 15); // ====================== build inputs =================================== auto spatialJoinTriple = SparqlTriple{TripleComponent{Variable{"?point1"}}, "", TripleComponent{Variable{"?point2"}}}; - auto leftChild = computeResultTest::buildIndexScan( - qec, {"?obj1", std::string{""}, "?point1"}); - auto rightChild = computeResultTest::buildIndexScan( - qec, {"?obj2", std::string{""}, "?point2"}); + auto leftChild = + buildIndexScan(qec, {"?obj1", std::string{""}, "?point1"}); + auto rightChild = + buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); std::shared_ptr spatialJoinOperation = ad_utility::makeExecutionTree(qec, spatialJoinTriple, @@ -1518,7 +614,7 @@ void testMultiplicitiesOrSizeEstimate(bool addLeftChildFirst, ASSERT_TRUE(value1 * 1.00001 > value2); }; - std::string kg = localTestHelpers::createSmallDatasetWithPoints(); + std::string kg = createSmallDatasetWithPoints(); // add multiplicities to test knowledge graph kg += " \"testing multiplicity\" ."; @@ -1534,7 +630,7 @@ void testMultiplicitiesOrSizeEstimate(bool addLeftChildFirst, "", TripleComponent{Variable{"?point2"}}}; // ===================== build the first child =============================== - auto leftChild = computeResultTest::buildMediumChild( + auto leftChild = buildMediumChild( qec, {"?obj1", std::string{""}, "?name1"}, {"?obj1", std::string{""}, "?geo1"}, {"?geo1", std::string{""}, "?point1"}, "?obj1", "?geo1"); @@ -1549,7 +645,7 @@ void testMultiplicitiesOrSizeEstimate(bool addLeftChildFirst, // node_5 eiffel tower geometry5 POINT(2.29451 48.85825) // ======================= build the second child ============================ - auto rightChild = computeResultTest::buildMediumChild( + auto rightChild = buildMediumChild( qec, {"?obj2", std::string{""}, "?name2"}, {"?obj2", std::string{""}, "?geo2"}, {"?geo2", std::string{""}, "?point2"}, "?obj2", "?geo2"); @@ -1639,7 +735,7 @@ void testMultiplicitiesOrSizeEstimate(bool addLeftChildFirst, // ======================== hard coded test ================================ // here the children are only index scans, as they are perfectly predictable // in relation to size and multiplicity estimates - std::string kg = localTestHelpers::createSmallDatasetWithPoints(); + std::string kg = createSmallDatasetWithPoints(); // add multiplicities to test knowledge graph kg += " \"POINT(7.12345 48.12345)\"."; @@ -1659,10 +755,8 @@ void testMultiplicitiesOrSizeEstimate(bool addLeftChildFirst, TripleComponent obj1{Variable{"?point1"}}; TripleComponent subj2{Variable{"?geometry2"}}; TripleComponent obj2{Variable{"?point2"}}; - auto leftChild = computeResultTest::buildIndexScan( - qec, {"?geometry1", "", "?point1"}); - auto rightChild = computeResultTest::buildIndexScan( - qec, {"?geometry2", "", "?point2"}); + auto leftChild = buildIndexScan(qec, {"?geometry1", "", "?point1"}); + auto rightChild = buildIndexScan(qec, {"?geometry2", "", "?point2"}); std::shared_ptr spatialJoinOperation = ad_utility::makeExecutionTree(qec, spatialJoinTriple, diff --git a/test/engine/SpatialJoinTestHelpers.h b/test/engine/SpatialJoinTestHelpers.h new file mode 100644 index 000000000..2b4b8913a --- /dev/null +++ b/test/engine/SpatialJoinTestHelpers.h @@ -0,0 +1,155 @@ +#pragma once + +#include + +#include "../util/IndexTestHelpers.h" +#include "./../../src/util/GeoSparqlHelpers.h" +#include "engine/ExportQueryExecutionTrees.h" +#include "engine/IndexScan.h" +#include "engine/Join.h" +#include "engine/QueryExecutionTree.h" +#include "engine/SpatialJoin.h" +#include "engine/SpatialJoinAlgorithms.h" +#include "parser/data/Variable.h" + +namespace SpatialJoinTestHelpers { + +auto makePointLiteral = [](std::string_view c1, std::string_view c2) { + return absl::StrCat(" \"POINT(", c1, " ", c2, ")\"^^<", GEO_WKT_LITERAL, ">"); +}; + +// helper function to create a vector of strings from a result table +inline std::vector printTable(const QueryExecutionContext* qec, + const Result* table) { + std::vector output; + for (size_t i = 0; i < table->idTable().numRows(); i++) { + std::string line = ""; + for (size_t k = 0; k < table->idTable().numColumns(); k++) { + auto test = ExportQueryExecutionTrees::idToStringAndType( + qec->getIndex(), table->idTable().at(i, k), {}); + line += test.value().first; + line += " "; + } + output.push_back(line.substr(0, line.size() - 1)); // ignore last " " + } + return output; +} + +// this helper function reorders an input vector according to the variable to +// column map to make the string array match the order of the result, which +// should be tested (it uses a vector of vectors (the first vector is containing +// each column of the result, each column consist of a vector, where each entry +// is a row of this column)) +inline std::vector> orderColAccordingToVarColMap( + VariableToColumnMap varColMaps, + std::vector> columns, + std::vector columnNames) { + std::vector> result; + auto indVariableMap = copySortedByColumnIndex(varColMaps); + for (size_t i = 0; i < indVariableMap.size(); i++) { + for (size_t k = 0; k < columnNames.size(); k++) { + if (indVariableMap.at(i).first.name() == columnNames.at(k)) { + result.push_back(columns.at(k)); + break; + } + } + } + return result; +} + +// helper function to create a vector of strings representing rows, from a +// vector of strings representing columns. Please make sure, that the order of +// the columns is already matching the order of the result. If this is not the +// case call the function orderColAccordingToVarColMap +inline std::vector createRowVectorFromColumnVector( + std::vector> column_vector) { + std::vector result; + if (column_vector.size() > 0) { + for (size_t i = 0; i < column_vector.at(0).size(); i++) { + std::string str = ""; + for (size_t k = 0; k < column_vector.size(); k++) { + str += column_vector.at(k).at(i); + str += " "; + } + result.push_back(str.substr(0, str.size() - 1)); + } + } + return result; +} + +// create a small test dataset, which focuses on points as geometry objects. +// Note, that some of these objects have a polygon representation, but for +// testing purposes, they get represented as a point here. I took those +// points, such that it is obvious, which pair of objects should be included, +// when the maximum distance is x meters. Please note, that these datapoints +// are not copied from a real input file. Copying the query will therefore +// likely not result in the same results as here (also the names, coordinates, +// etc. might be different in the real datasets) +inline std::string createSmallDatasetWithPoints() { + auto addPoint = [](std::string& kg, std::string number, std::string name, + std::string point) { + kg += absl::StrCat(" ", name, " . . ", point, " ."); + }; + std::string kg2; + auto p = makePointLiteral; + addPoint(kg2, "1", "\"Uni Freiburg TF\"", p("7.83505", "48.01267")); + addPoint(kg2, "2", "\"Minster Freiburg\"", p("7.85298", "47.99557")); + addPoint(kg2, "3", "\"London Eye\"", p("-0.11957", "51.50333")); + addPoint(kg2, "4", "\"Statue of liberty\"", p("-74.04454", "40.68925")); + addPoint(kg2, "5", "\"eiffel tower\"", p("2.29451", "48.85825")); + + return kg2; +} + +inline QueryExecutionContext* buildTestQEC() { + std::string kg = createSmallDatasetWithPoints(); + ad_utility::MemorySize blocksizePermutations = 16_MB; + auto qec = ad_utility::testing::getQec(kg, true, true, false, + blocksizePermutations, false); + return qec; +} + +inline std::shared_ptr buildIndexScan( + QueryExecutionContext* qec, std::array triple) { + TripleComponent subject{Variable{triple.at(0)}}; + TripleComponent object{Variable{triple.at(2)}}; + return ad_utility::makeExecutionTree( + qec, Permutation::Enum::PSO, SparqlTriple{subject, triple.at(1), object}); +} + +inline std::shared_ptr buildJoin( + QueryExecutionContext* qec, std::shared_ptr tree1, + std::shared_ptr tree2, Variable joinVariable) { + auto varCol1 = tree1->getVariableColumns(); + auto varCol2 = tree2->getVariableColumns(); + size_t col1 = varCol1[joinVariable].columnIndex_; + size_t col2 = varCol2[joinVariable].columnIndex_; + return ad_utility::makeExecutionTree(qec, tree1, tree2, col1, col2, + true); +} + +inline std::shared_ptr buildMediumChild( + QueryExecutionContext* qec, std::array triple1, + std::array triple2, std::array triple3, + std::string joinVariable1_, std::string joinVariable2_) { + Variable joinVariable1{joinVariable1_}; + Variable joinVariable2{joinVariable2_}; + auto scan1 = buildIndexScan(qec, triple1); + auto scan2 = buildIndexScan(qec, triple2); + auto scan3 = buildIndexScan(qec, triple3); + auto join = buildJoin(qec, scan1, scan2, joinVariable1); + return buildJoin(qec, join, scan3, joinVariable2); +} + +inline std::shared_ptr buildSmallChild( + QueryExecutionContext* qec, std::array triple1, + std::array triple2, std::string joinVariable_) { + Variable joinVariable{joinVariable_}; + auto scan1 = buildIndexScan(qec, triple1); + auto scan2 = buildIndexScan(qec, triple2); + return buildJoin(qec, scan1, scan2, joinVariable); +} + +} // namespace SpatialJoinTestHelpers