Skip to content

Commit

Permalink
Issue #678/#682 further finetuning
Browse files Browse the repository at this point in the history
- doc and typing tweaks
- push more functionality to _get_geometry_argument
- add support in load_stac as well
  • Loading branch information
soxofaan committed Jan 17, 2025
1 parent a72e3a5 commit f95728d
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 36 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add support for `log_level` in `create_job()` and `execute_job()` ([#704](https://github.com/Open-EO/openeo-python-client/issues/704))
- Add initial support for "geometry" dimension type in `CubeMetadata` ([#705](https://github.com/Open-EO/openeo-python-client/issues/705))
- Add support for parameterized `bands` argument in `load_stac()`
- Argument `spatial_extent` in `load_collection` supports Shapely objects and loading GeoJSON from a local path.
- Argument `spatial_extent` in `load_collection()`/`load_stac()`: add support for Shapely objects, loading GeoJSON from a local path and loading geometry from GeoJSON/GeoParquet URL. ([#678](https://github.com/Open-EO/openeo-python-client/issues/678))

### Changed

Expand Down
3 changes: 3 additions & 0 deletions openeo/api/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,9 @@ def schema_supports(schema: Union[dict, List[dict]], type: str, subtype: Optiona
elif isinstance(actual_type, list):
if type not in actual_type:
return False
elif actual_type is None:
# Without explicit "type", anything is accepted
return True
else:
raise ValueError(actual_type)
if subtype:
Expand Down
15 changes: 9 additions & 6 deletions openeo/rest/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,7 +1256,7 @@ def datacube_from_json(self, src: Union[str, Path], parameters: Optional[dict] =
def load_collection(
self,
collection_id: Union[str, Parameter],
spatial_extent: Union[Dict[str, float], Parameter, shapely.geometry.base.BaseGeometry, None] = None,
spatial_extent: Union[dict, Parameter, shapely.geometry.base.BaseGeometry, str, Path, None] = None,
temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None,
bands: Union[Iterable[str], Parameter, str, None] = None,
properties: Union[
Expand All @@ -1272,8 +1272,8 @@ def load_collection(
:param spatial_extent: limit data to specified bounding box or polygons. Can be provided in different ways:
- a bounding box dictionary
- a Shapely geometry object
- a GeoJSON-style dictionary,
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
- a GeoJSON-style dictionary
- a path (as :py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
which will be loaded automatically to get the geometries as GeoJSON construct.
- a :py:class:`~openeo.api.process.Parameter` instance.
:param temporal_extent: limit data to specified temporal interval.
Expand All @@ -1296,7 +1296,7 @@ def load_collection(
Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument.
.. versionchanged:: 0.37.0
Add support for passing a Shapely geometry or a local path to a GeoJSON file to the ``spatial_extent`` argument.
Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file.
"""
return DataCube.load_collection(
collection_id=collection_id,
Expand Down Expand Up @@ -1355,7 +1355,7 @@ def load_result(
def load_stac(
self,
url: str,
spatial_extent: Union[Dict[str, float], Parameter, None] = None,
spatial_extent: Union[dict, Parameter, shapely.geometry.base.BaseGeometry, str, Path, None] = None,
temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None,
bands: Union[Iterable[str], Parameter, str, None] = None,
properties: Optional[Dict[str, Union[str, PGNode, Callable]]] = None,
Expand Down Expand Up @@ -1457,6 +1457,9 @@ def load_stac(
.. versionchanged:: 0.23.0
Argument ``temporal_extent``: add support for year/month shorthand notation
as discussed at :ref:`date-shorthand-handling`.
.. versionchanged:: 0.37.0
Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file.
"""
return DataCube.load_stac(
url=url,
Expand Down Expand Up @@ -1562,7 +1565,7 @@ def load_geojson(
return VectorCube.load_geojson(connection=self, data=data, properties=properties)

@openeo_process
def load_url(self, url: str, format: str, options: Optional[dict] = None):
def load_url(self, url: str, format: str, options: Optional[dict] = None) -> VectorCube:
"""
Loads a file from a URL
Expand Down
87 changes: 59 additions & 28 deletions openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def load_collection(
cls,
collection_id: Union[str, Parameter],
connection: Optional[Connection] = None,
spatial_extent: Union[Dict[str, float], Parameter, shapely.geometry.base.BaseGeometry, None] = None,
spatial_extent: Union[dict, Parameter, shapely.geometry.base.BaseGeometry, str, pathlib.Path, None] = None,
temporal_extent: Union[Sequence[InputDate], Parameter, str, None] = None,
bands: Union[Iterable[str], Parameter, str, None] = None,
fetch_metadata: bool = True,
Expand All @@ -161,8 +161,8 @@ def load_collection(
:param spatial_extent: limit data to specified bounding box or polygons. Can be provided in different ways:
- a bounding box dictionary
- a Shapely geometry object
- a GeoJSON-style dictionary,
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
- a GeoJSON-style dictionary
- a path (as :py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
which will be loaded automatically to get the geometries as GeoJSON construct.
- a :py:class:`~openeo.api.process.Parameter` instance.
:param temporal_extent: limit data to specified temporal interval.
Expand All @@ -185,27 +185,20 @@ def load_collection(
Add :py:func:`~openeo.rest.graph_building.collection_property` support to ``properties`` argument.
.. versionchanged:: 0.37.0
Add support for passing a Shapely geometry or a local path to a GeoJSON file to the ``spatial_extent`` argument.
Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file.
"""
if temporal_extent:
temporal_extent = cls._get_temporal_extent(extent=temporal_extent)

if isinstance(spatial_extent, Parameter):
if not schema_supports(spatial_extent.schema, type="object"):
warnings.warn(
"Unexpected parameterized `spatial_extent` in `load_collection`:"
f" expected schema compatible with type 'object' but got {spatial_extent.schema!r}."
)
elif spatial_extent is None or (
isinstance(spatial_extent, dict) and spatial_extent.keys() & {"west", "east", "north", "south"}
):
pass
else:
valid_geojson_types = [
"Polygon", "MultiPolygon", "Feature", "FeatureCollection"
]
spatial_extent = _get_geometry_argument(argument=spatial_extent, valid_geojson_types=valid_geojson_types,
connection=connection)
spatial_extent = _get_geometry_argument(
argument=spatial_extent,
valid_geojson_types=["Polygon", "MultiPolygon", "Feature", "FeatureCollection"],
connection=connection,
allow_none=True,
allow_parameter=True,
allow_bounding_box=True,
argument_name="spatial_extent",
process_id="load_collection",
)

arguments = {
'id': collection_id,
Expand Down Expand Up @@ -390,11 +383,22 @@ def load_stac(
.. versionadded:: 0.33.0
.. versionchanged:: 0.37.0
Argument ``spatial_extent``: add support for passing a Shapely geometry or a local path to a GeoJSON file.
"""
arguments = {"url": url}
# TODO #425 more normalization/validation of extent/band parameters
if spatial_extent:
arguments["spatial_extent"] = spatial_extent
arguments["spatial_extent"] = _get_geometry_argument(
argument=spatial_extent,
valid_geojson_types=["Polygon", "MultiPolygon", "Feature", "FeatureCollection"],
connection=connection,
allow_none=True,
allow_parameter=True,
allow_bounding_box=True,
argument_name="spatial_extent",
process_id="load_stac",
)

if temporal_extent:
arguments["temporal_extent"] = DataCube._get_temporal_extent(extent=temporal_extent)
bands = cls._get_bands(bands, process_id="load_stac")
Expand Down Expand Up @@ -2892,23 +2896,47 @@ def _get_geometry_argument(
Parameter,
_FromNodeMixin,
],
*,
valid_geojson_types: List[str],
connection: Connection = None,
crs: Optional[str] = None,
) -> Union[dict, Parameter, PGNode]:
allow_parameter: bool = True,
allow_bounding_box: bool = False,
allow_none: bool = False,
argument_name: str = "n/a",
process_id: str = "n/a",
) -> Union[dict, Parameter, PGNode, _FromNodeMixin, None]:
"""
Convert input to a geometry as "geojson" subtype object or vectorcube.
Convert input to a geometry as "geojson" subtype object or vector cube.
:param crs: value that encodes a coordinate reference system.
See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument.
:param allow_parameter: allow argument to be a :py:class:`Parameter` instance, and pass-through as such
:param allow_none: allow argument to be ``None`` and pass-through as such
:param allow_bounding_box: allow argument to be a bounding box dictionary and pass-through as such
"""
if isinstance(argument, Parameter):
# Some quick exit shortcuts
if allow_parameter and isinstance(argument, Parameter):
if not schema_supports(argument.schema, type="object"):
warnings.warn(
f"Unexpected parameterized `{argument_name}` in `{process_id}`:"
f" expected schema compatible with type 'object' but got {argument.schema!r}."
)
return argument
elif isinstance(argument, _FromNodeMixin):
# Typical use case here: VectorCube instance
return argument.from_node()
elif allow_none and argument is None:
return argument
elif (
allow_bounding_box
and isinstance(argument, dict)
and all(k in argument for k in ["west", "south", "east", "north"])
):
return argument

# Support URL based geometry references (with `load_url` and best-effort format guess)
if isinstance(argument, str) and re.match(r"^https?://", argument, flags=re.I):
# Geometry provided as URL: load with `load_url` (with best-effort format guess)
url = urllib.parse.urlparse(argument)
suffix = pathlib.Path(url.path.lower()).suffix
format = {
Expand All @@ -2919,7 +2947,8 @@ def _get_geometry_argument(
".geoparquet": "Parquet",
}.get(suffix, suffix.split(".")[-1])
return connection.load_url(url=argument, format=format)
#

# Support loading GeoJSON from local files
if (
isinstance(argument, (str, pathlib.Path))
and pathlib.Path(argument).is_file()
Expand All @@ -2933,6 +2962,8 @@ def _get_geometry_argument(
else:
raise OpenEoClientException(f"Invalid geometry argument: {argument!r}")

# The assumption at this point is that we are working with a GeoJSON style dictionary
assert isinstance(geometry, dict)
if geometry.get("type") not in valid_geojson_types:
raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format(
t=geometry.get("type"), s=valid_geojson_types
Expand Down
8 changes: 8 additions & 0 deletions tests/api/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,11 @@ def test_schema_supports_list():
assert schema_supports(schema, type="object") is True
assert schema_supports(schema, type="object", subtype="datacube") is True
assert schema_supports(schema, type="object", subtype="geojson") is False


def test_default_parameter_supports_anything():
parameter = Parameter(name="foo")
assert schema_supports(parameter.schema, type="string") is True
assert schema_supports(parameter.schema, type="number") is True
assert schema_supports(parameter.schema, type="object") is True
assert schema_supports(parameter.schema, type="object", subtype="datacube") is True
2 changes: 1 addition & 1 deletion tests/rest/datacube/test_datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def test_load_collection_connectionless_temporal_extent_shortcut(self):
}

def test_load_collection_connectionless_shapely_spatial_extent(self):
polygon = shapely.Polygon(((0.0,1.0),(2.0,1.0),(3.0,2.0),(1.5,0.0),(0.0,1.0)))
polygon = shapely.geometry.Polygon(((0.0, 1.0), (2.0, 1.0), (3.0, 2.0), (1.5, 0.0), (0.0, 1.0)))
cube = DataCube.load_collection("T3", spatial_extent=polygon)
assert cube.flat_graph() == {
"loadcollection1": {
Expand Down

0 comments on commit f95728d

Please sign in to comment.