Skip to content

Commit

Permalink
[Backport 2.x] GeoTile, and GeoGrid aggregation (#8295)
Browse files Browse the repository at this point in the history
* Add GeoTile and GeoHash Grid aggregations on GeoShapes. (#5589)

Src files for GeoTile and GeoHash Aggregations on GeoShape with integration
tests.

Signed-off-by: Navneet Verma <[email protected]>

* [opensearch-project/geospatial#212] Fixing the IT for GeoTilesAggrega… (#6120)

Fixing the IT for GeoTilesAggregation.

Signed-off-by: Navneet Verma <[email protected]>

* [#6187, #6222] Fixing the GeoShapes GeoHash and GeoTile Aggregations Integration tests. (#6242)

Changes done:
* Fixed the ArrayIndexOutOfBoundsException.
* Reduced the precision for GeoShapes Aggregation IT testing.

Signed-off-by: Navneet Verma <[email protected]>

* [#7101] Fixing the GeoTileIT#testMultivaluedGeoPointsAggregation test case. (#7166)

The issue was happening because we encode the GeoPoint as long and error comes in the precision due to that encoding. The error was not taken care while generating the exepected tiles count for execpected output.

Signed-off-by: Navneet Verma <[email protected]>

---------

Signed-off-by: Navneet Verma <[email protected]>
Signed-off-by: Heemin Kim <[email protected]>
Co-authored-by: Navneet Verma <[email protected]>
  • Loading branch information
heemin32 and navneet1v authored Jun 30, 2023
1 parent 04ec607 commit 0dbafea
Show file tree
Hide file tree
Showing 27 changed files with 1,066 additions and 163 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Addition of GeoShape ValueSource level code interfaces for accessing the DocValues.
- Addition of Missing Value feature in the GeoShape Aggregations.
- Enable Point based optimization for custom comparators ([#8168](https://github.com/opensearch-project/OpenSearch/pull/8168))
- Add GeoTile and GeoHash Grid aggregations on GeoShapes. ([#5589](https://github.com/opensearch-project/OpenSearch/pull/5589))

### Dependencies
- Bump `com.azure:azure-storage-common` from 12.21.0 to 12.21.1 (#7566, #7814)
Expand Down
2 changes: 1 addition & 1 deletion modules/geo/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ apply plugin: 'opensearch.yaml-rest-test'
apply plugin: 'opensearch.internal-cluster-test'

opensearchplugin {
description 'Plugin for geospatial features in OpenSearch. Registering the geo_shape and aggregations GeoBounds on Geo_Shape and Geo_Point'
description 'Plugin for geospatial features in OpenSearch. Registering the geo_shape and aggregations on GeoShape and GeoPoint'
classname 'org.opensearch.geo.GeoModulePlugin'
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

package org.opensearch.geo;

import org.opensearch.geometry.utils.StandardValidator;
import org.opensearch.geometry.utils.WellKnownText;
import org.opensearch.index.mapper.GeoShapeFieldMapper;
import org.opensearch.plugins.Plugin;
import org.opensearch.test.OpenSearchIntegTestCase;
Expand All @@ -24,6 +26,8 @@ public abstract class GeoModulePluginIntegTestCase extends OpenSearchIntegTestCa

protected static final double GEOHASH_TOLERANCE = 1E-5D;

protected static final WellKnownText WKT = new WellKnownText(true, new StandardValidator(true));

/**
* Returns a collection of plugins that should be loaded on each node for doing the integration tests. As this
* geo plugin is not getting packaged in a zip, we need to load it before the tests run.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.geo.search.aggregations.bucket;

import com.carrotsearch.hppc.ObjectIntHashMap;
import com.carrotsearch.hppc.ObjectIntMap;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.opensearch.Version;
import org.opensearch.action.index.IndexRequestBuilder;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.geo.GeoPoint;
import org.opensearch.common.geo.GeoShapeDocValue;
import org.opensearch.common.settings.Settings;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.geo.GeoModulePluginIntegTestCase;
import org.opensearch.geo.tests.common.RandomGeoGenerator;
import org.opensearch.geo.tests.common.RandomGeoGeometryGenerator;
import org.opensearch.geometry.Geometry;
import org.opensearch.geometry.Rectangle;
import org.opensearch.test.VersionUtils;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;

import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;

/**
* This is the base class for all the Bucket Aggregation related integration tests. Use this class to add common
* methods which can be used across different bucket aggregations. If there is any common code that can be used
* across other integration test too then this is not the class. Use {@link GeoModulePluginIntegTestCase}
*/
public abstract class AbstractGeoBucketAggregationIntegTest extends GeoModulePluginIntegTestCase {

protected static final int MAX_PRECISION_FOR_GEO_SHAPES_AGG_TESTING = 2;

protected static final int MIN_PRECISION_WITHOUT_BB_AGGS = 2;

protected static final int NUM_DOCS = 100;

protected static final String GEO_SHAPE_INDEX_NAME = "geoshape_index";

protected static Rectangle boundingRectangleForGeoShapesAgg;

protected static ObjectIntMap<String> expectedDocsCountForGeoShapes;

protected static ObjectIntMap<String> expectedDocCountsForSingleGeoPoint;

protected static ObjectIntMap<String> multiValuedExpectedDocCountsGeoPoint;

protected static final String GEO_SHAPE_FIELD_NAME = "location_geo_shape";

protected static final String GEO_POINT_FIELD_NAME = "location";

protected static final String KEYWORD_FIELD_NAME = "city";

protected static String smallestGeoHash = null;

protected final Version version = VersionUtils.randomIndexCompatibleVersion(random());

@Override
protected boolean forbidPrivateIndexSettings() {
return false;
}

/**
* Prepares a GeoShape index for testing the GeoShape bucket aggregations. Different bucket aggregations can use
* different techniques for creating buckets. Override the method
* {@link AbstractGeoBucketAggregationIntegTest#generateBucketsForGeometry} in the test class for creating the
* buckets which will then be used for verifications.
*
* @param random {@link Random}
* @throws Exception thrown during index creation.
*/
protected void prepareGeoShapeIndexForAggregations(final Random random) throws Exception {
expectedDocsCountForGeoShapes = new ObjectIntHashMap<>();
final Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).build();
final List<IndexRequestBuilder> geoshapes = new ArrayList<>();
assertAcked(prepareCreate(GEO_SHAPE_INDEX_NAME).setSettings(settings).setMapping(GEO_SHAPE_FIELD_NAME, "type" + "=geo_shape"));
boolean isShapeIntersectingBB = false;
for (int i = 0; i < NUM_DOCS;) {
final Geometry geometry = RandomGeoGeometryGenerator.randomGeometry(random);
final GeoShapeDocValue geometryDocValue = GeoShapeDocValue.createGeometryDocValue(geometry);
// make sure that there is 1 shape is intersecting with the bounding box
if (!isShapeIntersectingBB) {
isShapeIntersectingBB = geometryDocValue.isIntersectingRectangle(boundingRectangleForGeoShapesAgg);
if (!isShapeIntersectingBB && i == NUM_DOCS - 1) {
continue;
}
}

i++;
final Set<String> values = generateBucketsForGeometry(geometry, geometryDocValue);
geoshapes.add(indexGeoShape(GEO_SHAPE_INDEX_NAME, geometry));
for (final String hash : values) {
expectedDocsCountForGeoShapes.put(hash, expectedDocsCountForGeoShapes.getOrDefault(hash, 0) + 1);
}
}
indexRandom(true, geoshapes);
ensureGreen(GEO_SHAPE_INDEX_NAME);
}

/**
* Returns a set of buckets for the shape at different precision level. Override this method for different bucket
* aggregations.
*
* @param geometry {@link Geometry}
* @param geoShapeDocValue {@link GeoShapeDocValue}
* @return A {@link Set} of {@link String} which represents the buckets.
*/
protected abstract Set<String> generateBucketsForGeometry(final Geometry geometry, final GeoShapeDocValue geoShapeDocValue);

/**
* Prepares a GeoPoint index for testing the GeoPoint bucket aggregations. Different bucket aggregations can use
* different techniques for creating buckets. Override the method
* {@link AbstractGeoBucketAggregationIntegTest#generateBucketsForGeoPoint} in the test class for creating the
* buckets which will then be used for verifications.
*
* @param random {@link Random}
* @throws Exception thrown during index creation.
*/
protected void prepareSingleValueGeoPointIndex(final Random random) throws Exception {
expectedDocCountsForSingleGeoPoint = new ObjectIntHashMap<>();
createIndex("idx_unmapped");
final Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, version)
.put("index.number_of_shards", 4)
.put("index.number_of_replicas", 0)
.build();
assertAcked(
prepareCreate("idx").setSettings(settings)
.setMapping(GEO_POINT_FIELD_NAME, "type=geo_point", KEYWORD_FIELD_NAME, "type=keyword")
);
final List<IndexRequestBuilder> cities = new ArrayList<>();
for (int i = 0; i < NUM_DOCS; i++) {
// generate random point
final GeoPoint geoPoint = RandomGeoGenerator.randomPoint(random);
cities.add(indexGeoPoint("idx", geoPoint.toString(), geoPoint.getLat() + ", " + geoPoint.getLon()));
final Set<String> buckets = generateBucketsForGeoPoint(geoPoint);
for (final String bucket : buckets) {
expectedDocCountsForSingleGeoPoint.put(bucket, expectedDocCountsForSingleGeoPoint.getOrDefault(bucket, 0) + 1);
}
}
indexRandom(true, cities);
ensureGreen("idx_unmapped", "idx");
}

protected void prepareMultiValuedGeoPointIndex(final Random random) throws Exception {
multiValuedExpectedDocCountsGeoPoint = new ObjectIntHashMap<>();
final Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, version).build();
final List<IndexRequestBuilder> cities = new ArrayList<>();
assertAcked(
prepareCreate("multi_valued_idx").setSettings(settings)
.setMapping(GEO_POINT_FIELD_NAME, "type=geo_point", KEYWORD_FIELD_NAME, "type=keyword")
);
for (int i = 0; i < NUM_DOCS; i++) {
final int numPoints = random.nextInt(4);
final List<String> points = new ArrayList<>();
final Set<String> buckets = new HashSet<>();
for (int j = 0; j < numPoints; ++j) {
// generate random point
final GeoPoint geoPoint = RandomGeoGenerator.randomPoint(random);
points.add(geoPoint.getLat() + "," + geoPoint.getLon());
buckets.addAll(generateBucketsForGeoPoint(geoPoint));
}
cities.add(indexGeoPoints("multi_valued_idx", Integer.toString(i), points));
for (final String bucket : buckets) {
multiValuedExpectedDocCountsGeoPoint.put(bucket, multiValuedExpectedDocCountsGeoPoint.getOrDefault(bucket, 0) + 1);
}
}
indexRandom(true, cities);
ensureGreen("multi_valued_idx");
}

/**
* Returns a set of buckets for the GeoPoint at different precision level. Override this method for different bucket
* aggregations.
*
* @param geoPoint {@link GeoPoint}
* @return A {@link Set} of {@link String} which represents the buckets.
*/
protected abstract Set<String> generateBucketsForGeoPoint(final GeoPoint geoPoint);

/**
* Indexes a GeoShape in the provided index.
* @param index {@link String} index name
* @param geometry {@link Geometry} the Geometry to be indexed
* @return {@link IndexRequestBuilder}
* @throws Exception thrown during creation of {@link IndexRequestBuilder}
*/
protected IndexRequestBuilder indexGeoShape(final String index, final Geometry geometry) throws Exception {
XContentBuilder source = jsonBuilder().startObject();
source = source.field(GEO_SHAPE_FIELD_NAME, WKT.toWKT(geometry));
source = source.endObject();
return client().prepareIndex(index).setSource(source);
}

/**
* Indexes a {@link List} of {@link GeoPoint}s in the provided Index name.
* @param index {@link String} index name
* @param name {@link String} value for the string field in index
* @param latLon {@link List} of {@link String} representing the String representation of GeoPoint
* @return {@link IndexRequestBuilder}
* @throws Exception thrown during indexing.
*/
protected IndexRequestBuilder indexGeoPoints(final String index, final String name, final List<String> latLon) throws Exception {
XContentBuilder source = jsonBuilder().startObject().field(KEYWORD_FIELD_NAME, name);
if (latLon != null) {
source = source.field(GEO_POINT_FIELD_NAME, latLon);
}
source = source.endObject();
return client().prepareIndex(index).setSource(source);
}

/**
* Indexes a {@link GeoPoint} in the provided Index name.
* @param index {@link String} index name
* @param name {@link String} value for the string field in index
* @param latLon {@link String} representing the String representation of GeoPoint
* @return {@link IndexRequestBuilder}
* @throws Exception thrown during indexing.
*/
protected IndexRequestBuilder indexGeoPoint(final String index, final String name, final String latLon) throws Exception {
return indexGeoPoints(index, name, List.of(latLon));
}

/**
* Generates a Bounding Box of a fixed radius that can be used for shapes aggregations to reduce the size of
* aggregation results.
* @param random {@link Random}
* @return {@link Rectangle}
*/
protected Rectangle getGridAggregationBoundingBox(final Random random) {
final double radius = getRadiusOfBoundingBox();
assertTrue("The radius of Bounding Box is less than or equal to 0", radius > 0);
return RandomGeoGeometryGenerator.randomRectangle(random, radius);
}

/**
* Returns a radius for the Bounding box. Test classes can override this method to change the radius of BBox for
* the test cases. If we increase this value, it will lead to creation of a lot of buckets that can lead of
* IndexOutOfBoundsExceptions.
* @return double
*/
protected double getRadiusOfBoundingBox() {
return 5.0;
}

/**
* Encode and Decode the {@link GeoPoint} to get a {@link GeoPoint} which has the exact precision which is being
* stored.
* @param geoPoint {@link GeoPoint}
* @return {@link GeoPoint}
*/
protected GeoPoint toStoragePrecision(final GeoPoint geoPoint) {
return new GeoPoint(
GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(geoPoint.getLat())),
GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(geoPoint.getLon()))
);
}

}
Loading

0 comments on commit 0dbafea

Please sign in to comment.