diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cd8e54c4..62c95b2e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add support for `log_level` in `create_job()` and `execute_job()` ([#704](https://github.com/Open-EO/openeo-python-client/issues/704)) - Add initial support for "geometry" dimension type in `CubeMetadata` ([#705](https://github.com/Open-EO/openeo-python-client/issues/705)) - Add support for parameterized `bands` argument in `load_stac()` +- Argument `spatial_extent` in `load_collection()`/`load_stac()`: add support for Shapely objects and loading GeoJSON from a local path. ([#678](https://github.com/Open-EO/openeo-python-client/issues/678)) ### Changed diff --git a/openeo/api/process.py b/openeo/api/process.py index 1e2d840ae..0947a47d4 100644 --- a/openeo/api/process.py +++ b/openeo/api/process.py @@ -467,6 +467,9 @@ def schema_supports(schema: Union[dict, List[dict]], type: str, subtype: Optiona elif isinstance(actual_type, list): if type not in actual_type: return False + elif actual_type is None: + # Without explicit "type", anything is accepted + return True else: raise ValueError(actual_type) if subtype: diff --git a/openeo/rest/_testing.py b/openeo/rest/_testing.py index 7940210d6..63e9460c5 100644 --- a/openeo/rest/_testing.py +++ b/openeo/rest/_testing.py @@ -153,7 +153,7 @@ def setup_collection( json={ "id": collection_id, # define temporal and band dim - "cube:dimensions": {"t": {"type": "temporal"}, "bands": {"type": "bands"}}, + "cube:dimensions": cube_dimensions, }, ) return self diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 5407c8839..f687d5ca1 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -1256,7 +1256,7 @@ def datacube_from_json(self, src: Union[str, Path], parameters: Optional[dict] = def load_collection( self, collection_id: Union[str, Parameter], - spatial_extent: Union[Dict[str, float], Parameter, None] = None, + spatial_extent: Union[dict, Parameter, shapely.geometry.base.BaseGeometry, str, Path, None] = None, temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, bands: Union[Iterable[str], Parameter, str, None] = None, properties: Union[ @@ -1269,7 +1269,14 @@ def load_collection( Load a DataCube by collection id. :param collection_id: image collection identifier - :param spatial_extent: limit data to specified bounding box or polygons + :param spatial_extent: limit data to specified bounding box or polygons. Can be provided in different ways: + - a bounding box dictionary + - a Shapely geometry object + - a GeoJSON-style dictionary + - a path (as :py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + which will be loaded automatically to get the geometries as GeoJSON construct. + - a URL to a publicly accessible GeoJSON document + - a :py:class:`~openeo.api.process.Parameter` instance. :param temporal_extent: limit data to specified temporal interval. Typically, just a two-item list or tuple containing start and end date. See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation. @@ -1288,6 +1295,9 @@ def load_collection( .. versionchanged:: 0.26.0 Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument. + + .. versionchanged:: 0.37.0 + Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file. """ return DataCube.load_collection( collection_id=collection_id, @@ -1346,7 +1356,7 @@ def load_result( def load_stac( self, url: str, - spatial_extent: Union[Dict[str, float], Parameter, None] = None, + spatial_extent: Union[dict, Parameter, shapely.geometry.base.BaseGeometry, str, Path, None] = None, temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, bands: Union[Iterable[str], Parameter, str, None] = None, properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, @@ -1448,6 +1458,9 @@ def load_stac( .. versionchanged:: 0.23.0 Argument ``temporal_extent``: add support for year/month shorthand notation as discussed at :ref:`date-shorthand-handling`. + + .. versionchanged:: 0.37.0 + Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file. """ return DataCube.load_stac( url=url, @@ -1553,7 +1566,7 @@ def load_geojson( return VectorCube.load_geojson(connection=self, data=data, properties=properties) @openeo_process - def load_url(self, url: str, format: str, options: Optional[dict] = None): + def load_url(self, url: str, format: str, options: Optional[dict] = None) -> VectorCube: """ Loads a file from a URL diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index d050e5306..4cbae0db1 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -143,7 +143,7 @@ def load_collection( cls, collection_id: Union[str, Parameter], connection: Optional[Connection] = None, - spatial_extent: Union[Dict[str, float], Parameter, None] = None, + spatial_extent: Union[dict, Parameter, shapely.geometry.base.BaseGeometry, str, pathlib.Path, None] = None, temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, bands: Union[Iterable[str], Parameter, str, None] = None, fetch_metadata: bool = True, @@ -158,7 +158,14 @@ def load_collection( :param collection_id: image collection identifier :param connection: The backend connection to use. Can be ``None`` to work without connection and collection metadata. - :param spatial_extent: limit data to specified bounding box or polygons + :param spatial_extent: limit data to specified bounding box or polygons. Can be provided in different ways: + - a bounding box dictionary + - a Shapely geometry object + - a GeoJSON-style dictionary + - a path (as :py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + which will be loaded automatically to get the geometries as GeoJSON construct. + - a URL to a publicly accessible GeoJSON document + - a :py:class:`~openeo.api.process.Parameter` instance. :param temporal_extent: limit data to specified temporal interval. Typically, just a two-item list or tuple containing start and end date. See :ref:`filtering-on-temporal-extent-section` for more details on temporal extent handling and shorthand notation. @@ -177,19 +184,25 @@ def load_collection( .. versionchanged:: 0.26.0 Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument. + + .. versionchanged:: 0.37.0 + Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file. """ if temporal_extent: temporal_extent = cls._get_temporal_extent(extent=temporal_extent) + spatial_extent = _get_geometry_argument( + argument=spatial_extent, + valid_geojson_types=["Polygon", "MultiPolygon", "Feature", "FeatureCollection"], + connection=connection, + allow_none=True, + allow_parameter=True, + allow_bounding_box=True, + argument_name="spatial_extent", + process_id="load_collection", + ) - if isinstance(spatial_extent, Parameter): - if not schema_supports(spatial_extent.schema, type="object"): - warnings.warn( - "Unexpected parameterized `spatial_extent` in `load_collection`:" - f" expected schema compatible with type 'object' but got {spatial_extent.schema!r}." - ) arguments = { 'id': collection_id, - # TODO: spatial_extent could also be a "geojson" subtype object, so we might want to allow (and convert) shapely shapes as well here. 'spatial_extent': spatial_extent, 'temporal_extent': temporal_extent, } @@ -269,7 +282,7 @@ def load_disk_collection(cls, connection: Connection, file_format: str, glob_pat def load_stac( cls, url: str, - spatial_extent: Union[Dict[str, float], Parameter, None] = None, + spatial_extent: Union[dict, Parameter, shapely.geometry.base.BaseGeometry, str, pathlib.Path, None] = None, temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, bands: Union[Iterable[str], Parameter, str, None] = None, properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, @@ -371,11 +384,22 @@ def load_stac( .. versionadded:: 0.33.0 + .. versionchanged:: 0.37.0 + Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file. """ arguments = {"url": url} - # TODO #425 more normalization/validation of extent/band parameters if spatial_extent: - arguments["spatial_extent"] = spatial_extent + arguments["spatial_extent"] = _get_geometry_argument( + argument=spatial_extent, + valid_geojson_types=["Polygon", "MultiPolygon", "Feature", "FeatureCollection"], + connection=connection, + allow_none=True, + allow_parameter=True, + allow_bounding_box=True, + argument_name="spatial_extent", + process_id="load_stac", + ) + if temporal_extent: arguments["temporal_extent"] = DataCube._get_temporal_extent(extent=temporal_extent) bands = cls._get_bands(bands, process_id="load_stac") @@ -646,10 +670,16 @@ def filter_spatial( (which will be loaded client-side to get the geometries as GeoJSON construct). """ valid_geojson_types = [ - "Point", "MultiPoint", "LineString", "MultiLineString", - "Polygon", "MultiPolygon", "GeometryCollection", "FeatureCollection" + "Point", + "MultiPoint", + "LineString", + "MultiLineString", + "Polygon", + "MultiPolygon", + "GeometryCollection", + "FeatureCollection", ] - geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, crs=None) + geometries = _get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, connection=self.connection, crs=None) return self.process( process_id='filter_spatial', arguments={ @@ -1076,75 +1106,6 @@ def _merge_operator_binary_cubes( } )) - def _get_geometry_argument( - self, - argument: Union[ - shapely.geometry.base.BaseGeometry, - dict, - str, - pathlib.Path, - Parameter, - _FromNodeMixin, - ], - valid_geojson_types: List[str], - crs: Optional[str] = None, - ) -> Union[dict, Parameter, PGNode]: - """ - Convert input to a geometry as "geojson" subtype object or vectorcube. - - :param crs: value that encodes a coordinate reference system. - See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. - """ - if isinstance(argument, Parameter): - return argument - elif isinstance(argument, _FromNodeMixin): - return argument.from_node() - - if isinstance(argument, str) and re.match(r"^https?://", argument, flags=re.I): - # Geometry provided as URL: load with `load_url` (with best-effort format guess) - url = urllib.parse.urlparse(argument) - suffix = pathlib.Path(url.path.lower()).suffix - format = { - ".json": "GeoJSON", - ".geojson": "GeoJSON", - ".pq": "Parquet", - ".parquet": "Parquet", - ".geoparquet": "Parquet", - }.get(suffix, suffix.split(".")[-1]) - return self.connection.load_url(url=argument, format=format) - - if ( - isinstance(argument, (str, pathlib.Path)) - and pathlib.Path(argument).is_file() - and pathlib.Path(argument).suffix.lower() in [".json", ".geojson"] - ): - geometry = load_json(argument) - elif isinstance(argument, shapely.geometry.base.BaseGeometry): - geometry = mapping(argument) - elif isinstance(argument, dict): - geometry = argument - else: - raise OpenEoClientException(f"Invalid geometry argument: {argument!r}") - - if geometry.get("type") not in valid_geojson_types: - raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format( - t=geometry.get("type"), s=valid_geojson_types - )) - if crs: - # TODO: don't warn when the crs is Lon-Lat like EPSG:4326? - warnings.warn(f"Geometry with non-Lon-Lat CRS {crs!r} is only supported by specific back-ends.") - # TODO #204 alternative for non-standard CRS in GeoJSON object? - epsg_code = normalize_crs(crs) - if epsg_code is not None: - # proj did recognize the CRS - crs_name = f"EPSG:{epsg_code}" - else: - # proj did not recognise this CRS - warnings.warn(f"non-Lon-Lat CRS {crs!r} is not known to the proj library and might not be supported.") - crs_name = crs - geometry["crs"] = {"type": "name", "properties": {"name": crs_name}} - return geometry - @openeo_process def aggregate_spatial( self, @@ -1216,7 +1177,7 @@ def aggregate_spatial( "Point", "MultiPoint", "LineString", "MultiLineString", "Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection" ] - geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, crs=crs) + geometries = _get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, connection= self.connection, crs=crs) reducer = build_child_callback(reducer, parent_parameters=["data"]) return VectorCube( graph=self._build_pgnode( @@ -1496,8 +1457,8 @@ def chunk_polygon( "Feature", "FeatureCollection", ] - chunks = self._get_geometry_argument( - chunks, valid_geojson_types=valid_geojson_types + chunks = _get_geometry_argument( + chunks, valid_geojson_types=valid_geojson_types, connection=self.connection ) mask_value = float(mask_value) if mask_value is not None else None return self.process( @@ -1586,7 +1547,7 @@ def apply_polygon( process = build_child_callback(process, parent_parameters=["data"], connection=self.connection) valid_geojson_types = ["Polygon", "MultiPolygon", "Feature", "FeatureCollection"] - geometries = self._get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types) + geometries = _get_geometry_argument(geometries, valid_geojson_types=valid_geojson_types, connection=self.connection) mask_value = float(mask_value) if mask_value is not None else None return self.process( process_id="apply_polygon", @@ -2074,7 +2035,7 @@ def mask_polygon( (which will be loaded client-side to get the geometries as GeoJSON construct). """ valid_geojson_types = ["Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"] - mask = self._get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, crs=srs) + mask = _get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, connection=self.connection, crs=srs) return self.process( process_id="mask_polygon", arguments=dict_no_none( @@ -2925,3 +2886,102 @@ def unflatten_dimension(self, dimension: str, target_dimensions: List[str], labe label_separator=label_separator, ), ) + + +def _get_geometry_argument( + argument: Union[ + shapely.geometry.base.BaseGeometry, + dict, + str, + pathlib.Path, + Parameter, + _FromNodeMixin, + ], + *, + valid_geojson_types: List[str], + connection: Connection = None, + crs: Optional[str] = None, + allow_parameter: bool = True, + allow_bounding_box: bool = False, + allow_none: bool = False, + argument_name: str = "n/a", + process_id: str = "n/a", +) -> Union[dict, Parameter, PGNode, _FromNodeMixin, None]: + """ + Normalize a user input to a openEO-compatible geometry representation, + like a GeoJSON construct, vector cube reference, bounding box construct, + a parameter reference, ... + + :param crs: value that encodes a coordinate reference system. + See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. + :param allow_parameter: allow argument to be a :py:class:`Parameter` instance, and pass-through as such + :param allow_none: allow argument to be ``None`` and pass-through as such + :param allow_bounding_box: allow argument to be a bounding box dictionary and pass-through as such + """ + # Some quick exit shortcuts + if allow_parameter and isinstance(argument, Parameter): + if not schema_supports(argument.schema, type="object"): + warnings.warn( + f"Schema mismatch with parameter given to `{argument_name}` in `{process_id}`:" + f" expected a schema compatible with type 'object' but got {argument.schema!r}." + ) + return argument + elif isinstance(argument, _FromNodeMixin): + # Typical use case here: VectorCube instance + return argument.from_node() + elif allow_none and argument is None: + return argument + elif ( + allow_bounding_box + and isinstance(argument, dict) + and all(k in argument for k in ["west", "south", "east", "north"]) + ): + return argument + + # Support URL based geometry references (with `load_url` and best-effort format guess) + if isinstance(argument, str) and re.match(r"^https?://", argument, flags=re.I): + url = urllib.parse.urlparse(argument) + suffix = pathlib.Path(url.path.lower()).suffix + format = { + ".json": "GeoJSON", + ".geojson": "GeoJSON", + ".pq": "Parquet", + ".parquet": "Parquet", + ".geoparquet": "Parquet", + }.get(suffix, suffix.split(".")[-1]) + return connection.load_url(url=argument, format=format) + + # Support loading GeoJSON from local files + if ( + isinstance(argument, (str, pathlib.Path)) + and pathlib.Path(argument).is_file() + and pathlib.Path(argument).suffix.lower() in [".json", ".geojson"] + ): + geometry = load_json(argument) + elif isinstance(argument, shapely.geometry.base.BaseGeometry): + geometry = mapping(argument) + elif isinstance(argument, dict): + geometry = argument + else: + raise OpenEoClientException(f"Invalid geometry argument: {argument!r}") + + # The assumption at this point is that we are working with a GeoJSON style dictionary + assert isinstance(geometry, dict) + if geometry.get("type") not in valid_geojson_types: + raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format( + t=geometry.get("type"), s=valid_geojson_types + )) + if crs: + # TODO: don't warn when the crs is Lon-Lat like EPSG:4326? + warnings.warn(f"Geometry with non-Lon-Lat CRS {crs!r} is only supported by specific back-ends.") + # TODO #204 alternative for non-standard CRS in GeoJSON object? + epsg_code = normalize_crs(crs) + if epsg_code is not None: + # proj did recognize the CRS + crs_name = f"EPSG:{epsg_code}" + else: + # proj did not recognise this CRS + warnings.warn(f"non-Lon-Lat CRS {crs!r} is not known to the proj library and might not be supported.") + crs_name = crs + geometry["crs"] = {"type": "name", "properties": {"name": crs_name}} + return geometry diff --git a/tests/api/test_process.py b/tests/api/test_process.py index b39b4555c..ddd02899a 100644 --- a/tests/api/test_process.py +++ b/tests/api/test_process.py @@ -291,3 +291,11 @@ def test_schema_supports_list(): assert schema_supports(schema, type="object") is True assert schema_supports(schema, type="object", subtype="datacube") is True assert schema_supports(schema, type="object", subtype="geojson") is False + + +def test_default_parameter_supports_anything(): + parameter = Parameter(name="foo") + assert schema_supports(parameter.schema, type="string") is True + assert schema_supports(parameter.schema, type="number") is True + assert schema_supports(parameter.schema, type="object") is True + assert schema_supports(parameter.schema, type="object", subtype="datacube") is True diff --git a/tests/rest/datacube/test_datacube.py b/tests/rest/datacube/test_datacube.py index 4e4f9f73e..f353b4d42 100644 --- a/tests/rest/datacube/test_datacube.py +++ b/tests/rest/datacube/test_datacube.py @@ -137,6 +137,72 @@ def test_load_collection_connectionless_temporal_extent_shortcut(self): } } + def test_load_collection_spatial_extent_bbox(self, dummy_backend): + spatial_extent = {"west": 1, "south": 2, "east": 3, "north": 4} + cube = DataCube.load_collection("S2", spatial_extent=spatial_extent, connection=dummy_backend.connection) + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": {"west": 1, "south": 2, "east": 3, "north": 4}, + "temporal_extent": None, + } + + def test_load_collection_spatial_extent_shapely(self, dummy_backend): + polygon = shapely.geometry.Polygon([(3, 51), (4, 51), (4, 52), (3, 52)]) + cube = DataCube.load_collection("S2", spatial_extent=polygon, connection=dummy_backend.connection) + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": { + "type": "Polygon", + "coordinates": [[[3, 51], [4, 51], [4, 52], [3, 52], [3, 51]]], + }, + "temporal_extent": None, + } + + @pytest.mark.parametrize("path_factory", [str, pathlib.Path]) + def test_load_collection_spatial_extent_local_path(self, dummy_backend, path_factory, test_data): + path = path_factory(test_data.get_path("geojson/polygon02.json")) + cube = DataCube.load_collection("S2", spatial_extent=path, connection=dummy_backend.connection) + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]}, + "temporal_extent": None, + } + + def test_load_collection_spatial_extent_url(self, dummy_backend): + cube = DataCube.load_collection( + "S2", spatial_extent="https://geo.test/geometry.json", connection=dummy_backend.connection + ) + cube.execute() + assert dummy_backend.get_sync_pg() == { + "loadurl1": { + "process_id": "load_url", + "arguments": {"format": "GeoJSON", "url": "https://geo.test/geometry.json"}, + }, + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "id": "S2", + "spatial_extent": {"from_node": "loadurl1"}, + "temporal_extent": None, + }, + "result": True, + }, + } + + def test_load_collection_spatial_extent_parameter(self, dummy_backend): + cube = DataCube.load_collection( + "S2", spatial_extent=Parameter.geojson("zpatial_extent"), connection=dummy_backend.connection + ) + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": {"from_parameter": "zpatial_extent"}, + "temporal_extent": None, + } + def test_load_collection_connectionless_save_result(self): cube = DataCube.load_collection("T3").save_result(format="GTiff") assert cube.flat_graph() == { @@ -179,6 +245,71 @@ def test_load_stac_connectionless_save_result(self): }, } + def test_load_stac_spatial_extent_bbox(self, dummy_backend): + spatial_extent = {"west": 1, "south": 2, "east": 3, "north": 4} + cube = DataCube.load_stac( + "https://stac.test/data", spatial_extent=spatial_extent, connection=dummy_backend.connection + ) + cube.execute() + assert dummy_backend.get_sync_pg()["loadstac1"]["arguments"] == { + "url": "https://stac.test/data", + "spatial_extent": {"west": 1, "south": 2, "east": 3, "north": 4}, + } + + def test_load_stac_spatial_extent_shapely(self, dummy_backend): + polygon = shapely.geometry.Polygon([(3, 51), (4, 51), (4, 52), (3, 52)]) + cube = DataCube.load_stac("https://stac.test/data", spatial_extent=polygon, connection=dummy_backend.connection) + cube.execute() + assert dummy_backend.get_sync_pg()["loadstac1"]["arguments"] == { + "url": "https://stac.test/data", + "spatial_extent": { + "type": "Polygon", + "coordinates": [[[3, 51], [4, 51], [4, 52], [3, 52], [3, 51]]], + }, + } + + @pytest.mark.parametrize("path_factory", [str, pathlib.Path]) + def test_load_stac_spatial_extent_local_path(self, dummy_backend, path_factory, test_data): + path = path_factory(test_data.get_path("geojson/polygon02.json")) + cube = DataCube.load_stac("https://stac.test/data", spatial_extent=path, connection=dummy_backend.connection) + cube.execute() + assert dummy_backend.get_sync_pg()["loadstac1"]["arguments"] == { + "url": "https://stac.test/data", + "spatial_extent": {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]}, + } + + def test_load_stac_spatial_extent_url(self, dummy_backend): + cube = DataCube.load_stac( + "https://stac.test/data", + spatial_extent="https://geo.test/geometry.json", + connection=dummy_backend.connection, + ) + cube.execute() + assert dummy_backend.get_sync_pg() == { + "loadurl1": { + "process_id": "load_url", + "arguments": {"format": "GeoJSON", "url": "https://geo.test/geometry.json"}, + }, + "loadstac1": { + "process_id": "load_stac", + "arguments": { + "url": "https://stac.test/data", + "spatial_extent": {"from_node": "loadurl1"}, + }, + "result": True, + }, + } + + def test_load_stac_spatial_extent_parameter(self, dummy_backend): + spatial_extent = Parameter.geojson("zpatial_extent") + cube = DataCube.load_stac( + "https://stac.test/data", spatial_extent=spatial_extent, connection=dummy_backend.connection + ) + cube.execute() + assert dummy_backend.get_sync_pg()["loadstac1"]["arguments"] == { + "url": "https://stac.test/data", + "spatial_extent": {"from_parameter": "zpatial_extent"}, + } def test_filter_temporal_basic_positional_args(s2cube): im = s2cube.filter_temporal("2016-01-01", "2016-03-10") diff --git a/tests/rest/test_connection.py b/tests/rest/test_connection.py index 20c36987a..16f5cb894 100644 --- a/tests/rest/test_connection.py +++ b/tests/rest/test_connection.py @@ -51,7 +51,44 @@ BASIC_ENDPOINTS = [{"path": "/credentials/basic", "methods": ["GET"]}] + +GEOJSON_POINT_01 = {"type": "Point", "coordinates": [3, 52]} +GEOJSON_LINESTRING_01 = {"type": "LineString", "coordinates": [[3, 50], [4, 51], [5, 53]]} +GEOJSON_POLYGON_01 = { + "type": "Polygon", + "coordinates": [[[3, 51], [4, 51], [4, 52], [3, 52], [3, 51]]], +} +GEOJSON_MULTIPOLYGON_01 = { + "type": "MultiPolygon", + "coordinates": [[[[3, 51], [4, 51], [4, 52], [3, 52], [3, 51]]]], +} +GEOJSON_FEATURE_01 = { + "type": "Feature", + "properties": {}, + "geometry": GEOJSON_POLYGON_01, +} +GEOJSON_FEATURE_02 = { + "type": "Feature", + "properties": {}, + "geometry": GEOJSON_MULTIPOLYGON_01, +} +GEOJSON_FEATURECOLLECTION_01 = { + "type": "FeatureCollection", + "features": [ + GEOJSON_FEATURE_01, + GEOJSON_FEATURE_02, + ], +} +GEOJSON_GEOMETRYCOLLECTION_01 = { + "type": "GeometryCollection", + "geometries": [ + GEOJSON_POINT_01, + GEOJSON_POLYGON_01, + ], +} + # Trick to avoid linting/auto-formatting tools to complain about or fix unused imports of these pytest fixtures +# TODO: use proper way to reuse fixtures instead of this hack auth_config = auth_config refresh_token_store = refresh_token_store @@ -2396,24 +2433,189 @@ def test_authenticate_oidc_access_token_wrong_provider(self): connection.authenticate_oidc_access_token(access_token="Th3Tok3n!@#", provider_id="nope") -def test_load_collection_arguments_100(requests_mock): - requests_mock.get(API_URL, json={"api_version": "1.0.0"}) - conn = Connection(API_URL) - requests_mock.get(API_URL + "collections/FOO", json={ - "summaries": {"eo:bands": [{"name": "red"}, {"name": "green"}, {"name": "blue"}]} - }) - spatial_extent = {"west": 1, "south": 2, "east": 3, "north": 4} - temporal_extent = ["2019-01-01", "2019-01-22"] - im = conn.load_collection( - "FOO", spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=["red", "green"] +class TestLoadCollection: + def test_load_collection_arguments_basic(self, dummy_backend): + spatial_extent = {"west": 1, "south": 2, "east": 3, "north": 4} + temporal_extent = ["2019-01-01", "2019-01-22"] + cube = dummy_backend.connection.load_collection( + "S2", spatial_extent=spatial_extent, temporal_extent=temporal_extent, bands=["B2", "B3"] + ) + cube.execute() + assert dummy_backend.get_sync_pg() == { + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "id": "S2", + "spatial_extent": {"east": 3, "north": 4, "south": 2, "west": 1}, + "temporal_extent": ["2019-01-01", "2019-01-22"], + "bands": ["B2", "B3"], + }, + "result": True, + } + } + + def test_load_collection_spatial_extent_bbox(self, dummy_backend): + spatial_extent = {"west": 1, "south": 2, "east": 3, "north": 4} + cube = dummy_backend.connection.load_collection("S2", spatial_extent=spatial_extent) + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": {"west": 1, "south": 2, "east": 3, "north": 4}, + "temporal_extent": None, + } + + @pytest.mark.parametrize( + "spatial_extent", + [ + GEOJSON_POLYGON_01, + GEOJSON_MULTIPOLYGON_01, + GEOJSON_FEATURE_01, + GEOJSON_FEATURECOLLECTION_01, + ], ) - assert im._pg.process_id == "load_collection" - assert im._pg.arguments == { - "id": "FOO", - "spatial_extent": spatial_extent, - "temporal_extent": temporal_extent, - "bands": ["red", "green"] - } + def test_load_collection_spatial_extent_geojson(self, dummy_backend, spatial_extent): + cube = dummy_backend.connection.load_collection("S2", spatial_extent=spatial_extent) + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": spatial_extent, + "temporal_extent": None, + } + + @pytest.mark.parametrize( + "spatial_extent", + [GEOJSON_POINT_01, GEOJSON_LINESTRING_01, GEOJSON_GEOMETRYCOLLECTION_01], + ) + def test_load_collection_spatial_extent_geojson_wrong_type(self, dummy_backend, spatial_extent): + with pytest.raises(OpenEoClientException, match="Invalid geometry type"): + _ = dummy_backend.connection.load_collection("S2", spatial_extent=spatial_extent) + + @pytest.mark.parametrize( + "geojson", + [ + GEOJSON_POLYGON_01, + GEOJSON_MULTIPOLYGON_01, + ], + ) + def test_load_collection_spatial_extent_shapely(self, geojson, dummy_backend): + spatial_extent = shapely.geometry.shape(geojson) + cube = dummy_backend.connection.load_collection("S2", spatial_extent=spatial_extent) + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": geojson, + "temporal_extent": None, + } + + @pytest.mark.parametrize( + "geojson", + [ + GEOJSON_POINT_01, + GEOJSON_GEOMETRYCOLLECTION_01, + ], + ) + def test_load_collection_spatial_extent_shapely_wrong_type(self, geojson, dummy_backend): + spatial_extent = shapely.geometry.shape(geojson) + with pytest.raises(OpenEoClientException, match="Invalid geometry type"): + _ = dummy_backend.connection.load_collection("S2", spatial_extent=spatial_extent) + + @pytest.mark.parametrize( + "geojson", + [ + GEOJSON_MULTIPOLYGON_01, + GEOJSON_FEATURECOLLECTION_01, + ], + ) + @pytest.mark.parametrize("path_factory", [str, Path]) + def test_load_collection_spatial_extent_local_path(self, geojson, dummy_backend, tmp_path, path_factory): + path = tmp_path / "geometry.json" + with path.open("w") as f: + json.dump(geojson, f) + cube = dummy_backend.connection.load_collection("S2", spatial_extent=path_factory(path)) + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": geojson, + "temporal_extent": None, + } + + def test_load_collection_spatial_extent_url(self, dummy_backend): + cube = dummy_backend.connection.load_collection("S2", spatial_extent="https://geo.test/geometry.json") + cube.execute() + assert dummy_backend.get_sync_pg() == { + "loadurl1": { + "process_id": "load_url", + "arguments": { + "url": "https://geo.test/geometry.json", + "format": "GeoJSON", + }, + }, + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "id": "S2", + "spatial_extent": {"from_node": "loadurl1"}, + "temporal_extent": None, + }, + "result": True, + }, + } + + @pytest.mark.parametrize( + "parameter", + [ + Parameter("zpatial_extent"), + Parameter.spatial_extent("zpatial_extent"), + Parameter.geojson("zpatial_extent"), + ], + ) + def test_load_collection_spatial_extent_parameter(self, dummy_backend, parameter, recwarn): + cube = dummy_backend.connection.load_collection("S2", spatial_extent=parameter) + assert len(recwarn) == 0 + + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": {"from_parameter": "zpatial_extent"}, + "temporal_extent": None, + } + + def test_load_collection_spatial_extent_parameter_schema_mismatch(self, dummy_backend, recwarn): + cube = dummy_backend.connection.load_collection( + "S2", spatial_extent=Parameter.number("zpatial_extent", description="foo") + ) + assert [str(w.message) for w in recwarn] == [ + "Schema mismatch with parameter given to `spatial_extent` in `load_collection`: expected a schema compatible with type 'object' but got {'type': 'number'}." + ] + + cube.execute() + assert dummy_backend.get_sync_pg()["loadcollection1"]["arguments"] == { + "id": "S2", + "spatial_extent": {"from_parameter": "zpatial_extent"}, + "temporal_extent": None, + } + + def test_load_collection_spatial_extent_vector_cube(self, dummy_backend): + vector_cube = VectorCube.load_url( + connection=dummy_backend.connection, url="https://geo.test/geometry.json", format="GeoJSON" + ) + cube = dummy_backend.connection.load_collection("S2", spatial_extent=vector_cube) + cube.execute() + assert dummy_backend.get_sync_pg() == { + "loadurl1": { + "process_id": "load_url", + "arguments": {"format": "GeoJSON", "url": "https://geo.test/geometry.json"}, + }, + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "id": "S2", + "spatial_extent": {"from_node": "loadurl1"}, + "temporal_extent": None, + }, + "result": True, + }, + } def test_load_result(requests_mock): @@ -2727,6 +2929,175 @@ def test_bands_parameterized(self, con120): } } + def test_load_stac_spatial_extent_bbox(self, dummy_backend): + spatial_extent = {"west": 1, "south": 2, "east": 3, "north": 4} + # TODO #694 how to avoid request to dummy STAC URL (without mocking, which is overkill for this test) + cube = dummy_backend.connection.load_stac("https://stac.test/data", spatial_extent=spatial_extent) + cube.execute() + assert dummy_backend.get_sync_pg()["loadstac1"]["arguments"] == { + "url": "https://stac.test/data", + "spatial_extent": {"west": 1, "south": 2, "east": 3, "north": 4}, + } + + @pytest.mark.parametrize( + "spatial_extent", + [ + GEOJSON_POLYGON_01, + GEOJSON_MULTIPOLYGON_01, + GEOJSON_FEATURE_01, + GEOJSON_FEATURECOLLECTION_01, + ], + ) + def test_load_stac_spatial_extent_geojson(self, dummy_backend, spatial_extent): + # TODO #694 how to avoid request to dummy STAC URL (without mocking, which is overkill for this test) + cube = dummy_backend.connection.load_stac("https://stac.test/data", spatial_extent=spatial_extent) + cube.execute() + assert dummy_backend.get_sync_pg()["loadstac1"]["arguments"] == { + "url": "https://stac.test/data", + "spatial_extent": spatial_extent, + } + + @pytest.mark.parametrize( + "spatial_extent", + [ + GEOJSON_POINT_01, + GEOJSON_LINESTRING_01, + GEOJSON_GEOMETRYCOLLECTION_01, + ], + ) + def test_load_stac_spatial_extent_geojson_wrong_type(self, dummy_backend, spatial_extent): + # TODO #694 how to avoid request to dummy STAC URL (without mocking, which is overkill for this test) + with pytest.raises(OpenEoClientException, match="Invalid geometry type"): + _ = dummy_backend.connection.load_stac("https://stac.test/data", spatial_extent=spatial_extent) + + @pytest.mark.parametrize( + "geojson", + [ + GEOJSON_POLYGON_01, + GEOJSON_MULTIPOLYGON_01, + ], + ) + def test_load_stac_spatial_extent_shapely(self, dummy_backend, geojson): + spatial_extent = shapely.geometry.shape(geojson) + # TODO #694 how to avoid request to dummy STAC URL (without mocking, which is overkill for this test) + cube = dummy_backend.connection.load_stac("https://stac.test/data", spatial_extent=spatial_extent) + cube.execute() + assert dummy_backend.get_sync_pg()["loadstac1"]["arguments"] == { + "url": "https://stac.test/data", + "spatial_extent": geojson, + } + + @pytest.mark.parametrize( + "geojson", + [ + GEOJSON_POINT_01, + GEOJSON_GEOMETRYCOLLECTION_01, + ], + ) + def test_load_stac_spatial_extent_shapely_wront_type(self, dummy_backend, geojson): + spatial_extent = shapely.geometry.shape(geojson) + # TODO #694 how to avoid request to dummy STAC URL (without mocking, which is overkill for this test) + with pytest.raises(OpenEoClientException, match="Invalid geometry type"): + _ = dummy_backend.connection.load_stac("https://stac.test/data", spatial_extent=spatial_extent) + + @pytest.mark.parametrize( + "geojson", + [ + GEOJSON_MULTIPOLYGON_01, + GEOJSON_FEATURECOLLECTION_01, + ], + ) + @pytest.mark.parametrize("path_factory", [str, Path]) + def test_load_stac_spatial_extent_local_path(self, geojson, dummy_backend, tmp_path, path_factory): + path = tmp_path / "geometry.json" + with path.open("w") as f: + json.dump(geojson, f) + + # TODO #694 how to avoid request to dummy STAC URL (without mocking, which is overkill for this test) + cube = dummy_backend.connection.load_stac("https://stac.test/data", spatial_extent=path_factory(path)) + cube.execute() + assert dummy_backend.get_sync_pg()["loadstac1"]["arguments"] == { + "url": "https://stac.test/data", + "spatial_extent": geojson, + } + + def test_load_stac_spatial_extent_url(self, dummy_backend): + # TODO #694 how to avoid request to dummy STAC URL (without mocking, which is overkill for this test) + cube = dummy_backend.connection.load_stac( + "https://stac.test/data", spatial_extent="https://geo.test/geometry.json" + ) + cube.execute() + assert dummy_backend.get_sync_pg() == { + "loadurl1": { + "process_id": "load_url", + "arguments": { + "url": "https://geo.test/geometry.json", + "format": "GeoJSON", + }, + }, + "loadstac1": { + "process_id": "load_stac", + "arguments": { + "url": "https://stac.test/data", + "spatial_extent": {"from_node": "loadurl1"}, + }, + "result": True, + }, + } + + @pytest.mark.parametrize( + "parameter", + [ + Parameter("zpatial_extent"), + Parameter.spatial_extent("zpatial_extent"), + Parameter.geojson("zpatial_extent"), + ], + ) + def test_load_stac_spatial_extent_parameter(self, dummy_backend, parameter, recwarn): + cube = dummy_backend.connection.load_stac("https://stac.test/data", spatial_extent=parameter) + assert len(recwarn) == 0 + + cube.execute() + assert dummy_backend.get_sync_pg()["loadstac1"]["arguments"] == { + "url": "https://stac.test/data", + "spatial_extent": {"from_parameter": "zpatial_extent"}, + } + + def test_load_stac_spatial_extent_parameter_schema_mismatch(self, dummy_backend, recwarn): + cube = dummy_backend.connection.load_stac( + "https://stac.test/data", spatial_extent=Parameter.number("zpatial_extent", description="foo") + ) + assert [str(w.message) for w in recwarn] == [ + "Schema mismatch with parameter given to `spatial_extent` in `load_stac`: expected a schema compatible with type 'object' but got {'type': 'number'}." + ] + + cube.execute() + assert dummy_backend.get_sync_pg()["loadstac1"]["arguments"] == { + "url": "https://stac.test/data", + "spatial_extent": {"from_parameter": "zpatial_extent"}, + } + + def test_load_stac_spatial_extent_vector_cube(self, dummy_backend): + vector_cube = VectorCube.load_url( + connection=dummy_backend.connection, url="https://geo.test/geometry.json", format="GeoJSON" + ) + cube = dummy_backend.connection.load_stac("https://stac.test/data", spatial_extent=vector_cube) + cube.execute() + assert dummy_backend.get_sync_pg() == { + "loadurl1": { + "process_id": "load_url", + "arguments": {"format": "GeoJSON", "url": "https://geo.test/geometry.json"}, + }, + "loadstac1": { + "process_id": "load_stac", + "arguments": { + "url": "https://stac.test/data", + "spatial_extent": {"from_node": "loadurl1"}, + }, + "result": True, + }, + } + @pytest.mark.parametrize( "data", diff --git a/tests/rest/test_udp.py b/tests/rest/test_udp.py index 8ac23236d..cd203a5e7 100644 --- a/tests/rest/test_udp.py +++ b/tests/rest/test_udp.py @@ -411,7 +411,7 @@ def test_build_parameterized_cube_load_collection_invalid_bbox_schema(con100): bbox = Parameter.string("bbox", description="Spatial extent") with pytest.warns( UserWarning, - match="Unexpected parameterized `spatial_extent` in `load_collection`: expected schema compatible with type 'object' but got {'type': 'string'}.", + match="Schema mismatch with parameter given to `spatial_extent` in `load_collection`: expected a schema compatible with type 'object' but got {'type': 'string'}.", ): cube = con100.load_collection(layer, spatial_extent=bbox, temporal_extent=dates)