Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spatial join bounding box #1546

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions src/engine/SpatialJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@
if (childLeft_ && childRight_) {
size_t inputEstimate =
childLeft_->getSizeEstimate() * childRight_->getSizeEstimate();
if (useBaselineAlgorithm_) {
if (algorithm_ == Algorithm::Baseline) {
return inputEstimate * inputEstimate;
} else {
// Let n be the size of the left table and m the size of the right table.
Expand Down Expand Up @@ -337,10 +337,21 @@
Result SpatialJoin::computeResult([[maybe_unused]] bool requestLaziness) {
SpatialJoinAlgorithms algorithms{_executionContext, prepareJoin(),
addDistToResult_, config_};
if (useBaselineAlgorithm_) {
if (algorithm_ == Algorithm::Baseline) {
return algorithms.BaselineAlgorithm();
} else {
} else if (algorithm_ == Algorithm::S2Geometry) {
return algorithms.S2geometryAlgorithm();
} else if (algorithm_ == Algorithm::BoundingBox) {
// as the BoundingBoxAlgorithms only works for max distance and not for
// nearest neighbors, S2geometry gets called as a backup, if the query is
// asking for the nearest neighbors
if (std::get_if<MaxDistanceConfig>(&config_)) {
return algorithms.BoundingBoxAlgorithm();
} else {
return algorithms.S2geometryAlgorithm();
}
} else {
AD_THROW("choose a valid Algorithm");

Check warning on line 354 in src/engine/SpatialJoin.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoin.cpp#L354

Added line #L354 was not covered by tests
}
}

Expand Down
14 changes: 10 additions & 4 deletions src/engine/SpatialJoin.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,20 @@ class SpatialJoin : public Operation {
// purposes
std::optional<size_t> getMaxResults() const;

void selectAlgorithm(bool useBaselineAlgorithm) {
useBaselineAlgorithm_ = useBaselineAlgorithm;
}
// options which can be used for the algorithm, which calculates the result
enum Algorithm { Baseline, S2Geometry, BoundingBox };

void selectAlgorithm(Algorithm algorithm) { algorithm_ = algorithm; }

std::pair<size_t, size_t> onlyForTestingGetConfig() const {
return std::pair{getMaxDist().value_or(-1), getMaxResults().value_or(-1)};
}

std::variant<NearestNeighborsConfig, MaxDistanceConfig>
onlyForTestingGetRealConfig() const {
return config_;
}

std::shared_ptr<QueryExecutionTree> onlyForTestingGetLeftChild() const {
return childLeft_;
}
Expand Down Expand Up @@ -135,5 +141,5 @@ class SpatialJoin : public Operation {
// between the two objects
bool addDistToResult_ = true;
const string nameDistanceInternal_ = "?distOfTheTwoObjectsAddedInternally";
bool useBaselineAlgorithm_ = false;
Algorithm algorithm_ = BoundingBox;
};
264 changes: 264 additions & 0 deletions src/engine/SpatialJoinAlgorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,267 @@
return Result(std::move(result), std::vector<ColumnIndex>{},
Result::getMergedLocalVocab(*resultLeft, *resultRight));
}

// ____________________________________________________________________________
std::vector<box> SpatialJoinAlgorithms::computeBoundingBox(
const point& startPoint) {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
if (!maxDist.has_value()) {
AD_THROW("Max distance must have a value for this operation");
}

Check warning on line 234 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L233-L234

Added lines #L233 - L234 were not covered by tests
// haversine function
auto haversine = [](double theta) { return (1 - std::cos(theta)) / 2; };

// inverse haversine function
auto archaversine = [](double theta) { return std::acos(1 - 2 * theta); };

// safety buffer for numerical inaccuracies
double maxDistInMetersBuffer = static_cast<double>(maxDist.value());
if (maxDist.value() < 10) {
maxDistInMetersBuffer = 10;
} else if (maxDist.value() < std::numeric_limits<long long>::max() / 1.02) {
maxDistInMetersBuffer = 1.01 * maxDist.value();
} else {
maxDistInMetersBuffer = std::numeric_limits<long long>::max();
}

Check warning on line 249 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L248-L249

Added lines #L248 - L249 were not covered by tests

// for large distances, where the lower calculation would just result in
// a single bounding box for the whole planet, do an optimized version
if (maxDist.value() > circumferenceMax_ / 4.0 &&
maxDist.value() < circumferenceMax_ / 2.01) {
return computeAntiBoundingBox(startPoint);
}

// compute latitude bound
double upperLatBound =
startPoint.get<1>() + maxDistInMetersBuffer * (360 / circumferenceMax_);
double lowerLatBound =
startPoint.get<1>() - maxDistInMetersBuffer * (360 / circumferenceMax_);
bool poleReached = false;
// test for "overflows"
if (lowerLatBound <= -90) {
lowerLatBound = -90;
poleReached = true; // south pole reached
}
if (upperLatBound >= 90) {
upperLatBound = 90;
poleReached = true; // north pole reached
}
if (poleReached) {
return {box(point(-180.0f, lowerLatBound), point(180.0f, upperLatBound))};
}

// compute longitude bound. For an explanation of the calculation and the
// naming convention see my master thesis
double alpha = maxDistInMetersBuffer / radius_;
double gamma =
(90 - std::abs(startPoint.get<1>())) * (2 * std::numbers::pi / 360);
double beta = std::acos((std::cos(gamma) / std::cos(alpha)));
double delta = 0;
if (maxDistInMetersBuffer > circumferenceMax_ / 20) {
// use law of cosines
delta = std::acos((std::cos(alpha) - std::cos(gamma) * std::cos(beta)) /
(std::sin(gamma) * std::sin(beta)));
} else {
// use law of haversines for numerical stability
delta = archaversine((haversine(alpha - haversine(gamma - beta))) /
(std::sin(gamma) * std::sin(beta)));
}
double lonRange = delta * 360 / (2 * std::numbers::pi);
double leftLonBound = startPoint.get<0>() - lonRange;
double rightLonBound = startPoint.get<0>() + lonRange;
// test for "overflows" and create two bounding boxes if necessary
if (leftLonBound < -180) {
box box1 =
box(point(-180, lowerLatBound), point(rightLonBound, upperLatBound));
box box2 = box(point(leftLonBound + 360, lowerLatBound),
point(180, upperLatBound));
return {box1, box2};
} else if (rightLonBound > 180) {
box box1 =
box(point(leftLonBound, lowerLatBound), point(180, upperLatBound));
box box2 = box(point(-180, lowerLatBound),
point(rightLonBound - 360, upperLatBound));
return {box1, box2};
}
// default case, when no bound has an "overflow"
return {box(point(leftLonBound, lowerLatBound),
point(rightLonBound, upperLatBound))};
}

// ____________________________________________________________________________
std::vector<box> SpatialJoinAlgorithms::computeAntiBoundingBox(
const point& startPoint) {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
if (!maxDist.has_value()) {
AD_THROW("Max distance must have a value for this operation");
}

Check warning on line 322 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L321-L322

Added lines #L321 - L322 were not covered by tests
// point on the opposite side of the globe
point antiPoint(startPoint.get<0>() + 180, startPoint.get<1>() * -1);
if (antiPoint.get<0>() > 180) {
antiPoint.set<0>(antiPoint.get<0>() - 360);
}
// for an explanation of the formula see the master thesis. Divide by two two
// only consider the distance from the point to the antiPoint, subtract
// maxDist and a safety margine from that
double antiDist =
(circumferenceMin_ / 2.0) - maxDist.value() * 1.01; // safety margin
// use the bigger circumference as an additional safety margin, use 2.01
// instead of 2.0 because of rounding inaccuracies in floating point
// operations
double distToAntiPoint = (360 / circumferenceMax_) * (antiDist / 2.01);
double upperBound = antiPoint.get<1>() + distToAntiPoint;
double lowerBound = antiPoint.get<1>() - distToAntiPoint;
double leftBound = antiPoint.get<0>() - distToAntiPoint;
double rightBound = antiPoint.get<0>() + distToAntiPoint;
bool northPoleTouched = false;
bool southPoleTouched = false;
bool boxCrosses180Longitude = false; // if the 180 to -180 line is touched
// if a pole is crossed, ignore the part after the crossing
if (upperBound > 90) {
upperBound = 90;
northPoleTouched = true;
}
if (lowerBound < -90) {
lowerBound = -90;
southPoleTouched = true;
}
if (leftBound < -180) {
leftBound += 360;
}
if (rightBound > 180) {
rightBound -= 360;
}
if (rightBound < leftBound) {
boxCrosses180Longitude = true;
}
// compute bounding boxes using the anti bounding box from above
std::vector<box> boxes;
if (!northPoleTouched) {
// add upper bounding box(es)
if (boxCrosses180Longitude) {
boxes.push_back(box(point(leftBound, upperBound), point(180, 90)));
boxes.push_back(box(point(-180, upperBound), point(rightBound, 90)));
} else {
boxes.push_back(box(point(leftBound, upperBound), point(rightBound, 90)));
}
}
if (!southPoleTouched) {
// add lower bounding box(es)
if (boxCrosses180Longitude) {
boxes.push_back(box(point(leftBound, -90), point(180, lowerBound)));
boxes.push_back(box(point(-180, -90), point(rightBound, lowerBound)));
} else {
boxes.push_back(
box(point(leftBound, -90), point(rightBound, lowerBound)));
}
}
// add the box(es) inbetween the longitude lines
if (boxCrosses180Longitude) {
// only one box needed to cover the longitudes
boxes.push_back(box(point(rightBound, -90), point(leftBound, 90)));
} else {
// two boxes needed, one left and one right of the anti bounding box
boxes.push_back(box(point(-180, -90), point(leftBound, 90)));
boxes.push_back(box(point(rightBound, -90), point(180, 90)));
}
return boxes;
}

// ____________________________________________________________________________
bool SpatialJoinAlgorithms::containedInBoundingBoxes(
const std::vector<box>& bbox, point point1) {
// correct lon bounds if necessary
while (point1.get<0>() < -180) {
point1.set<0>(point1.get<0>() + 360);
}
while (point1.get<0>() > 180) {
point1.set<0>(point1.get<0>() - 360);
}
if (point1.get<1>() < -90) {
point1.set<1>(-90);
} else if (point1.get<1>() > 90) {
point1.set<1>(90);
}

for (size_t i = 0; i < bbox.size(); i++) {
if (boost::geometry::covered_by(point1, bbox.at(i))) {
return true;
}
}
return false;
}

// ____________________________________________________________________________
Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
IdTable result{numColumns, qec_->getAllocator()};

// create r-tree for smaller result table
auto smallerResult = idTableLeft;
auto otherResult = idTableRight;
bool leftResSmaller = true;
/*auto smallerChild = childLeft_;
auto otherChild = childRight_;
auto smallerVariable = leftChildVariable_;
auto otherVariable = rightChildVariable_;*/
auto smallerResJoinCol = leftJoinCol;
auto otherResJoinCol = rightJoinCol;
if (idTableLeft->numRows() > idTableRight->numRows()) {
smallerResult = idTableRight;
otherResult = idTableLeft;
leftResSmaller = false;

Check warning on line 438 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L436-L438

Added lines #L436 - L438 were not covered by tests
/*smallerChild = childRight_;
otherChild = childLeft_;
smallerVariable = rightChildVariable_;
otherVariable = leftChildVariable_; */
smallerResJoinCol = rightJoinCol;
otherResJoinCol = leftJoinCol;
}

Check warning on line 445 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L443-L445

Added lines #L443 - L445 were not covered by tests

// Todo in the benchmark: use different algorithms and compare their
// performance
bgi::rtree<value, bgi::quadratic<16>> rtree;
for (size_t i = 0; i < smallerResult->numRows(); i++) {
// get point of row i
// ColumnIndex smallerJoinCol = getJoinCol(smallerChild, smallerVariable);
auto geopoint = getPoint(smallerResult, i, smallerResJoinCol);
point p(geopoint.value().getLng(), geopoint.value().getLat());

// add every point together with the row number into the rtree
rtree.insert(std::make_pair(p, i));
}
for (size_t i = 0; i < otherResult->numRows(); i++) {
// ColumnIndex otherJoinCol = getJoinCol(otherChild, otherVariable);
auto geopoint1 = getPoint(otherResult, i, otherResJoinCol);
point p(geopoint1.value().getLng(), geopoint1.value().getLat());

// query the other rtree for every point using the following bounding box
std::vector<box> bbox = computeBoundingBox(p);
std::vector<value> results;
for (size_t k = 0; k < bbox.size(); k++) {
rtree.query(bgi::intersects(bbox.at(k)), std::back_inserter(results));
}
for (size_t k = 0; k < results.size(); k++) {
size_t rowLeft = results.at(k).second;
size_t rowRight = i;
if (!leftResSmaller) {
rowLeft = i;
rowRight = results.at(k).second;
}

Check warning on line 476 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L474-L476

Added lines #L474 - L476 were not covered by tests
auto distance = computeDist(idTableLeft, idTableRight, rowLeft, rowRight,
leftJoinCol, rightJoinCol);
if (distance.getDatatype() == Datatype::Int &&
distance.getInt() <= maxDist) {
addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft,
rowRight, distance);
}
}
}
Result resTable =
Result(std::move(result), std::vector<ColumnIndex>{}, LocalVocab{});
return resTable;
}
Loading
Loading