From f95728dcb3865fdfe83a19ad38d1a8fdc0fefa4d Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Thu, 16 Jan 2025 15:22:18 +0100 Subject: [PATCH] Issue #678/#682 further finetuning - doc and typing tweaks - push more functionality to _get_geometry_argument - add support in load_stac as well --- CHANGELOG.md | 2 +- openeo/api/process.py | 3 + openeo/rest/connection.py | 15 +++-- openeo/rest/datacube.py | 87 +++++++++++++++++++--------- tests/api/test_process.py | 8 +++ tests/rest/datacube/test_datacube.py | 2 +- 6 files changed, 81 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd63f78bd..4a3aec77e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add support for `log_level` in `create_job()` and `execute_job()` ([#704](https://github.com/Open-EO/openeo-python-client/issues/704)) - Add initial support for "geometry" dimension type in `CubeMetadata` ([#705](https://github.com/Open-EO/openeo-python-client/issues/705)) - Add support for parameterized `bands` argument in `load_stac()` -- Argument `spatial_extent` in `load_collection` supports Shapely objects and loading GeoJSON from a local path. +- Argument `spatial_extent` in `load_collection()`/`load_stac()`: add support for Shapely objects, loading GeoJSON from a local path and loading geometry from GeoJSON/GeoParquet URL. ([#678](https://github.com/Open-EO/openeo-python-client/issues/678)) ### Changed diff --git a/openeo/api/process.py b/openeo/api/process.py index 1e2d840ae..0947a47d4 100644 --- a/openeo/api/process.py +++ b/openeo/api/process.py @@ -467,6 +467,9 @@ def schema_supports(schema: Union[dict, List[dict]], type: str, subtype: Optiona elif isinstance(actual_type, list): if type not in actual_type: return False + elif actual_type is None: + # Without explicit "type", anything is accepted + return True else: raise ValueError(actual_type) if subtype: diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 615b13f3d..6d087c27b 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -1256,7 +1256,7 @@ def datacube_from_json(self, src: Union[str, Path], parameters: Optional[dict] = def load_collection( self, collection_id: Union[str, Parameter], - spatial_extent: Union[Dict[str, float], Parameter, shapely.geometry.base.BaseGeometry, None] = None, + spatial_extent: Union[dict, Parameter, shapely.geometry.base.BaseGeometry, str, Path, None] = None, temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, bands: Union[Iterable[str], Parameter, str, None] = None, properties: Union[ @@ -1272,8 +1272,8 @@ def load_collection( :param spatial_extent: limit data to specified bounding box or polygons. Can be provided in different ways: - a bounding box dictionary - a Shapely geometry object - - a GeoJSON-style dictionary, - - a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + - a GeoJSON-style dictionary + - a path (as :py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, which will be loaded automatically to get the geometries as GeoJSON construct. - a :py:class:`~openeo.api.process.Parameter` instance. :param temporal_extent: limit data to specified temporal interval. @@ -1296,7 +1296,7 @@ def load_collection( Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument. .. versionchanged:: 0.37.0 - Add support for passing a Shapely geometry or a local path to a GeoJSON file to the ``spatial_extent`` argument. + Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file. """ return DataCube.load_collection( collection_id=collection_id, @@ -1355,7 +1355,7 @@ def load_result( def load_stac( self, url: str, - spatial_extent: Union[Dict[str, float], Parameter, None] = None, + spatial_extent: Union[dict, Parameter, shapely.geometry.base.BaseGeometry, str, Path, None] = None, temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, bands: Union[Iterable[str], Parameter, str, None] = None, properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None, @@ -1457,6 +1457,9 @@ def load_stac( .. versionchanged:: 0.23.0 Argument ``temporal_extent``: add support for year/month shorthand notation as discussed at :ref:`date-shorthand-handling`. + + .. versionchanged:: 0.37.0 + Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file. """ return DataCube.load_stac( url=url, @@ -1562,7 +1565,7 @@ def load_geojson( return VectorCube.load_geojson(connection=self, data=data, properties=properties) @openeo_process - def load_url(self, url: str, format: str, options: Optional[dict] = None): + def load_url(self, url: str, format: str, options: Optional[dict] = None) -> VectorCube: """ Loads a file from a URL diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index 1de00ac3a..7d4d73333 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -143,7 +143,7 @@ def load_collection( cls, collection_id: Union[str, Parameter], connection: Optional[Connection] = None, - spatial_extent: Union[Dict[str, float], Parameter, shapely.geometry.base.BaseGeometry, None] = None, + spatial_extent: Union[dict, Parameter, shapely.geometry.base.BaseGeometry, str, pathlib.Path, None] = None, temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None, bands: Union[Iterable[str], Parameter, str, None] = None, fetch_metadata: bool = True, @@ -161,8 +161,8 @@ def load_collection( :param spatial_extent: limit data to specified bounding box or polygons. Can be provided in different ways: - a bounding box dictionary - a Shapely geometry object - - a GeoJSON-style dictionary, - - a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, + - a GeoJSON-style dictionary + - a path (as :py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file, which will be loaded automatically to get the geometries as GeoJSON construct. - a :py:class:`~openeo.api.process.Parameter` instance. :param temporal_extent: limit data to specified temporal interval. @@ -185,27 +185,20 @@ def load_collection( Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument. .. versionchanged:: 0.37.0 - Add support for passing a Shapely geometry or a local path to a GeoJSON file to the ``spatial_extent`` argument. + Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file. """ if temporal_extent: temporal_extent = cls._get_temporal_extent(extent=temporal_extent) - - if isinstance(spatial_extent, Parameter): - if not schema_supports(spatial_extent.schema, type="object"): - warnings.warn( - "Unexpected parameterized `spatial_extent` in `load_collection`:" - f" expected schema compatible with type 'object' but got {spatial_extent.schema!r}." - ) - elif spatial_extent is None or ( - isinstance(spatial_extent, dict) and spatial_extent.keys() & {"west", "east", "north", "south"} - ): - pass - else: - valid_geojson_types = [ - "Polygon", "MultiPolygon", "Feature", "FeatureCollection" - ] - spatial_extent = _get_geometry_argument(argument=spatial_extent, valid_geojson_types=valid_geojson_types, - connection=connection) + spatial_extent = _get_geometry_argument( + argument=spatial_extent, + valid_geojson_types=["Polygon", "MultiPolygon", "Feature", "FeatureCollection"], + connection=connection, + allow_none=True, + allow_parameter=True, + allow_bounding_box=True, + argument_name="spatial_extent", + process_id="load_collection", + ) arguments = { 'id': collection_id, @@ -390,11 +383,22 @@ def load_stac( .. versionadded:: 0.33.0 + .. versionchanged:: 0.37.0 + Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file. """ arguments = {"url": url} - # TODO #425 more normalization/validation of extent/band parameters if spatial_extent: - arguments["spatial_extent"] = spatial_extent + arguments["spatial_extent"] = _get_geometry_argument( + argument=spatial_extent, + valid_geojson_types=["Polygon", "MultiPolygon", "Feature", "FeatureCollection"], + connection=connection, + allow_none=True, + allow_parameter=True, + allow_bounding_box=True, + argument_name="spatial_extent", + process_id="load_stac", + ) + if temporal_extent: arguments["temporal_extent"] = DataCube._get_temporal_extent(extent=temporal_extent) bands = cls._get_bands(bands, process_id="load_stac") @@ -2892,23 +2896,47 @@ def _get_geometry_argument( Parameter, _FromNodeMixin, ], + *, valid_geojson_types: List[str], connection: Connection = None, crs: Optional[str] = None, -) -> Union[dict, Parameter, PGNode]: + allow_parameter: bool = True, + allow_bounding_box: bool = False, + allow_none: bool = False, + argument_name: str = "n/a", + process_id: str = "n/a", +) -> Union[dict, Parameter, PGNode, _FromNodeMixin, None]: """ - Convert input to a geometry as "geojson" subtype object or vectorcube. + Convert input to a geometry as "geojson" subtype object or vector cube. :param crs: value that encodes a coordinate reference system. See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument. + :param allow_parameter: allow argument to be a :py:class:`Parameter` instance, and pass-through as such + :param allow_none: allow argument to be ``None`` and pass-through as such + :param allow_bounding_box: allow argument to be a bounding box dictionary and pass-through as such """ - if isinstance(argument, Parameter): + # Some quick exit shortcuts + if allow_parameter and isinstance(argument, Parameter): + if not schema_supports(argument.schema, type="object"): + warnings.warn( + f"Unexpected parameterized `{argument_name}` in `{process_id}`:" + f" expected schema compatible with type 'object' but got {argument.schema!r}." + ) return argument elif isinstance(argument, _FromNodeMixin): + # Typical use case here: VectorCube instance return argument.from_node() + elif allow_none and argument is None: + return argument + elif ( + allow_bounding_box + and isinstance(argument, dict) + and all(k in argument for k in ["west", "south", "east", "north"]) + ): + return argument + # Support URL based geometry references (with `load_url` and best-effort format guess) if isinstance(argument, str) and re.match(r"^https?://", argument, flags=re.I): - # Geometry provided as URL: load with `load_url` (with best-effort format guess) url = urllib.parse.urlparse(argument) suffix = pathlib.Path(url.path.lower()).suffix format = { @@ -2919,7 +2947,8 @@ def _get_geometry_argument( ".geoparquet": "Parquet", }.get(suffix, suffix.split(".")[-1]) return connection.load_url(url=argument, format=format) - # + + # Support loading GeoJSON from local files if ( isinstance(argument, (str, pathlib.Path)) and pathlib.Path(argument).is_file() @@ -2933,6 +2962,8 @@ def _get_geometry_argument( else: raise OpenEoClientException(f"Invalid geometry argument: {argument!r}") + # The assumption at this point is that we are working with a GeoJSON style dictionary + assert isinstance(geometry, dict) if geometry.get("type") not in valid_geojson_types: raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format( t=geometry.get("type"), s=valid_geojson_types diff --git a/tests/api/test_process.py b/tests/api/test_process.py index b39b4555c..ddd02899a 100644 --- a/tests/api/test_process.py +++ b/tests/api/test_process.py @@ -291,3 +291,11 @@ def test_schema_supports_list(): assert schema_supports(schema, type="object") is True assert schema_supports(schema, type="object", subtype="datacube") is True assert schema_supports(schema, type="object", subtype="geojson") is False + + +def test_default_parameter_supports_anything(): + parameter = Parameter(name="foo") + assert schema_supports(parameter.schema, type="string") is True + assert schema_supports(parameter.schema, type="number") is True + assert schema_supports(parameter.schema, type="object") is True + assert schema_supports(parameter.schema, type="object", subtype="datacube") is True diff --git a/tests/rest/datacube/test_datacube.py b/tests/rest/datacube/test_datacube.py index 48a05444f..67faf2c3f 100644 --- a/tests/rest/datacube/test_datacube.py +++ b/tests/rest/datacube/test_datacube.py @@ -138,7 +138,7 @@ def test_load_collection_connectionless_temporal_extent_shortcut(self): } def test_load_collection_connectionless_shapely_spatial_extent(self): - polygon = shapely.Polygon(((0.0,1.0),(2.0,1.0),(3.0,2.0),(1.5,0.0),(0.0,1.0))) + polygon = shapely.geometry.Polygon(((0.0, 1.0), (2.0, 1.0), (3.0, 2.0), (1.5, 0.0), (0.0, 1.0))) cube = DataCube.load_collection("T3", spatial_extent=polygon) assert cube.flat_graph() == { "loadcollection1": {