From 5eafa442c9d587d4626832ae860220be064b03c9 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Fri, 4 Aug 2023 15:56:20 +0200 Subject: [PATCH] Issue #211 add `load_geojson` --- openeo_driver/ProcessGraphDeserializer.py | 8 + openeo_driver/processes.py | 20 ++- openeo_driver/util/geometry.py | 63 +++++++- tests/test_processes.py | 23 +++ tests/util/test_geometry.py | 170 ++++++++++++++++++++++ 5 files changed, 280 insertions(+), 4 deletions(-) diff --git a/openeo_driver/ProcessGraphDeserializer.py b/openeo_driver/ProcessGraphDeserializer.py index 90fb1860..046ff821 100644 --- a/openeo_driver/ProcessGraphDeserializer.py +++ b/openeo_driver/ProcessGraphDeserializer.py @@ -1589,6 +1589,14 @@ def to_vector_cube(args: Dict, env: EvalEnv): raise FeatureUnsupportedException(f"Converting {type(data)} to vector cube is not supported") +@process_registry_100.add_function(spec=read_spec("openeo-processes/2.x/proposals/load_geojson.json")) +def load_geojson(args: ProcessArgs, env: EvalEnv) -> DriverVectorCube: + data = args.get_required("data", validator=ProcessArgs.validator_geojson_dict()) + properties = args.get_optional("properties", default=[], expected_type=(list, tuple)) + vector_cube = env.backend_implementation.vector_cube_cls.from_geojson(data, columns_for_cube=properties) + return vector_cube + + @non_standard_process( ProcessSpec("get_geometries", description="Reads vector data from a file or a URL or get geometries from a FeatureCollection") .param('filename', description="filename or http url of a vector file", schema={"type": "string"}, required=False) diff --git a/openeo_driver/processes.py b/openeo_driver/processes.py index 6722a529..e01f0006 100644 --- a/openeo_driver/processes.py +++ b/openeo_driver/processes.py @@ -1,10 +1,9 @@ import functools import inspect -import typing import warnings from collections import namedtuple from pathlib import Path -from typing import Callable, Dict, List, Tuple, Optional, Any, Union +from typing import Callable, Dict, List, Tuple, Optional, Any, Union, Collection from openeo_driver.errors import ( ProcessUnsupportedException, @@ -12,6 +11,7 @@ ProcessParameterInvalidException, ) from openeo_driver.specs import SPECS_ROOT +from openeo_driver.util.geometry import validate_geojson_basic from openeo_driver.utils import read_json, EvalEnv @@ -411,7 +411,7 @@ def get_subset(self, names: List[str], aliases: Optional[Dict[str, str]] = None) kwargs[key] = self[alias] return kwargs - def get_enum(self, name: str, options: typing.Container[ArgumentValue]) -> ArgumentValue: + def get_enum(self, name: str, options: Collection[ArgumentValue]) -> ArgumentValue: """ Get argument by name and check if it belongs to given set of (enum) values. @@ -440,3 +440,17 @@ def validator(value): return True return validator + + @staticmethod + def validator_geojson_dict( + allowed_types: Optional[Collection[str]] = None, + ): + """Build validator to verify that provided structure looks like a GeoJSON-style object""" + + def validator(value): + issues = validate_geojson_basic(value=value, allowed_types=allowed_types, raise_exception=False) + if issues: + raise ValueError(f"Invalid GeoJSON: {', '.join(issues)}.") + return True + + return validator diff --git a/openeo_driver/util/geometry.py b/openeo_driver/util/geometry.py index ccee107f..8feab9fe 100644 --- a/openeo_driver/util/geometry.py +++ b/openeo_driver/util/geometry.py @@ -3,7 +3,7 @@ import logging import re from pathlib import Path -from typing import Union, Tuple, Optional, List, Mapping, Sequence +from typing import Union, Tuple, Optional, List, Mapping, Sequence, Any, Collection import pyproj import shapely.geometry @@ -17,6 +17,67 @@ _log = logging.getLogger(__name__) +GEOJSON_GEOMETRY_TYPES_BASIC = frozenset( + {"Point", "MultiPoint", "LineString", "MultiLineString", "Polygon", "MultiPolygon"} +) +GEOJSON_GEOMETRY_TYPES_EXTENDED = GEOJSON_GEOMETRY_TYPES_BASIC | {"GeometryCollection"} + + +def validate_geojson_basic( + value: Any, + *, + allowed_types: Optional[Collection[str]] = None, + raise_exception: bool = True, + recurse: bool = True, +) -> List[str]: + """ + Validate if given value looks like a valid GeoJSON construct. + + Note: this is just for basic inspection to catch simple/obvious structural issues. + It is not intended for a full-blown, deep GeoJSON validation and coordinate inspection. + + :param value: the value to inspect + :param allowed_types: optional collection of GeoJSON types to accept + :param raise_exception: whether to raise an exception when issues are found (default), + or just return list of issues + :param recurse: whether to recursively validate Feature's geometry and FeatureCollection's features + :returns: list of issues found (when `raise_exception` is off) + """ + try: + if not isinstance(value, dict): + raise ValueError(f"JSON object (mapping/dictionary) expected, but got {type(value).__name__}") + assert "type" in value, "No 'type' field" + geojson_type = value["type"] + assert isinstance(geojson_type, str), f"Invalid 'type' type: {type(geojson_type).__name__}" + if allowed_types and geojson_type not in allowed_types: + raise ValueError(f"Found type {geojson_type!r}, but expects one of {sorted(allowed_types)}") + if geojson_type in GEOJSON_GEOMETRY_TYPES_BASIC: + assert "coordinates" in value, f"No 'coordinates' field (type {geojson_type!r})" + elif geojson_type in {"GeometryCollection"}: + assert "geometries" in value, f"No 'geometries' field (type {geojson_type!r})" + # TODO: recursively check sub-geometries? + elif geojson_type in {"Feature"}: + assert "geometry" in value, f"No 'geometry' field (type {geojson_type!r})" + assert "properties" in value, f"No 'properties' field (type {geojson_type!r})" + if recurse: + validate_geojson_basic( + value["geometry"], recurse=True, allowed_types=GEOJSON_GEOMETRY_TYPES_EXTENDED, raise_exception=True + ) + elif geojson_type in {"FeatureCollection"}: + assert "features" in value, f"No 'features' field (type {geojson_type!r})" + if recurse: + for f in value["features"]: + validate_geojson_basic(f, recurse=True, allowed_types=["Feature"], raise_exception=True) + else: + raise ValueError(f"Invalid type {geojson_type!r}") + + except Exception as e: + if raise_exception: + raise + return [str(e)] + return [] + + def validate_geojson_coordinates(geojson): def _validate_coordinates(coordinates, initial_run=True): max_evaluations = 20 diff --git a/tests/test_processes.py b/tests/test_processes.py index 646f31be..9ed730af 100644 --- a/tests/test_processes.py +++ b/tests/test_processes.py @@ -612,3 +612,26 @@ def test_get_enum(self): ), ): _ = args.get_enum("color", options=["R", "G", "B"]) + + def test_validator_geojson_dict(self): + polygon = {"type": "Polygon", "coordinates": [[1, 2]]} + args = ProcessArgs({"geometry": polygon, "color": "red"}, process_id="wibble") + + validator = ProcessArgs.validator_geojson_dict() + assert args.get_required("geometry", validator=validator) == polygon + with pytest.raises( + ProcessParameterInvalidException, + match=re.escape( + "The value passed for parameter 'color' in process 'wibble' is invalid: Invalid GeoJSON: JSON object (mapping/dictionary) expected, but got str." + ), + ): + _ = args.get_required("color", validator=validator) + + validator = ProcessArgs.validator_geojson_dict(allowed_types=["FeatureCollection"]) + with pytest.raises( + ProcessParameterInvalidException, + match=re.escape( + "The value passed for parameter 'geometry' in process 'wibble' is invalid: Invalid GeoJSON: Found type 'Polygon', but expects one of ['FeatureCollection']." + ), + ): + _ = args.get_required("geometry", validator=validator) diff --git a/tests/util/test_geometry.py b/tests/util/test_geometry.py index cbb2379d..607fb64e 100644 --- a/tests/util/test_geometry.py +++ b/tests/util/test_geometry.py @@ -1,3 +1,6 @@ +import contextlib +from typing import Union, List + import math import pyproj @@ -19,6 +22,7 @@ BoundingBox, BoundingBoxException, CrsRequired, + validate_geojson_basic, ) @@ -746,3 +750,169 @@ def test_best_utm(self): bbox = BoundingBox(-72, -13, -71, -12, crs="EPSG:4326") assert bbox.best_utm() == 32719 + + +class TestValidateGeoJSON: + @staticmethod + @contextlib.contextmanager + def _checker(expected_issue: Union[str, None], raise_exception: bool): + """ + Helper context manager to easily check a validate_geojson_basic result + for both raise_exception modes: + + - "exception mode": context manger __exit__ phase checks result + - "return issue mode": returned `check` function should be used inside context manageer body + """ + checked = False + + def check(result: List[str]): + """Check validation result in case no actual exception was thrown""" + nonlocal checked + checked = True + if expected_issue: + if raise_exception: + pytest.fail("Exception should have been raised") + if not result: + pytest.fail("No issue was reported") + assert expected_issue in "\n".join(result) + else: + if result: + pytest.fail(f"Unexpected issue reported: {result}") + + try: + yield check + except Exception as e: + # Check validation result in case of actual exception + if not raise_exception: + pytest.fail(f"Unexpected {e!r}: issue should be returned") + if not expected_issue: + pytest.fail(f"Unexpected {e!r}: no issue expected") + assert expected_issue in str(e) + else: + # No exception was thrown: check that the `check` function has been called. + if not checked: + raise RuntimeError("`check` function was not used") + + @pytest.mark.parametrize( + ["value", "expected_issue"], + [ + ("nope nope", "JSON object (mapping/dictionary) expected, but got str"), + (123, "JSON object (mapping/dictionary) expected, but got int"), + ({}, "No 'type' field"), + ({"type": 123}, "Invalid 'type' type: int"), + ({"type": {"Poly": "gon"}}, "Invalid 'type' type: dict"), + ({"type": "meh"}, "Invalid type 'meh'"), + ({"type": "Point"}, "No 'coordinates' field (type 'Point')"), + ({"type": "Point", "coordinates": [1, 2]}, None), + ({"type": "Polygon"}, "No 'coordinates' field (type 'Polygon')"), + ({"type": "Polygon", "coordinates": [[1, 2]]}, None), + ({"type": "MultiPolygon"}, "No 'coordinates' field (type 'MultiPolygon')"), + ({"type": "MultiPolygon", "coordinates": [[[1, 2]]]}, None), + ({"type": "GeometryCollection", "coordinates": []}, "No 'geometries' field (type 'GeometryCollection')"), + ({"type": "GeometryCollection", "geometries": []}, None), + ({"type": "Feature", "coordinates": []}, "No 'geometry' field (type 'Feature')"), + ({"type": "Feature", "geometry": {}}, "No 'properties' field (type 'Feature')"), + ({"type": "Feature", "geometry": {}, "properties": {}}, "No 'type' field"), + ( + {"type": "Feature", "geometry": {"type": "Polygon"}, "properties": {}}, + "No 'coordinates' field (type 'Polygon')", + ), + ( + {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[1, 2]]}, "properties": {}}, + None, + ), + ( + {"type": "Feature", "geometry": {"type": "Polygonnnnn", "coordinates": [[1, 2]]}, "properties": {}}, + "Found type 'Polygonnnnn', but expects one of ", + ), + ({"type": "FeatureCollection"}, "No 'features' field (type 'FeatureCollection')"), + ({"type": "FeatureCollection", "features": []}, None), + ({"type": "FeatureCollection", "features": [{"type": "Feature"}]}, "No 'geometry' field (type 'Feature')"), + ( + {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {}}]}, + "No 'properties' field (type 'Feature')", + ), + ( + {"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {}, "properties": {}}]}, + "No 'type' field", + ), + ( + { + "type": "FeatureCollection", + "features": [{"type": "Feature", "geometry": {"type": "Polygon"}, "properties": {}}], + }, + "No 'coordinates' field (type 'Polygon')", + ), + ( + { + "type": "FeatureCollection", + "features": [ + {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[1, 2]]}, "properties": {}}, + {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[3, 4]]}, "properties": {}}, + ], + }, + None, + ), + ], + ) + @pytest.mark.parametrize("raise_exception", [False, True]) + def test_validate_geojson_basic(self, value, expected_issue, raise_exception): + with self._checker(expected_issue=expected_issue, raise_exception=raise_exception) as check: + result = validate_geojson_basic(value, raise_exception=raise_exception) + check(result) + + @pytest.mark.parametrize( + ["value", "allowed_types", "expected_issue"], + [ + ( + {"type": "Point", "coordinates": [1, 2]}, + {"Polygon", "MultiPolygon"}, + "Found type 'Point', but expects one of ['MultiPolygon', 'Polygon']", + ), + ({"type": "Polygon", "coordinates": [[1, 2]]}, {"Polygon", "MultiPolygon"}, None), + ({"type": "MultiPolygon", "coordinates": [[[1, 2]]]}, {"Polygon", "MultiPolygon"}, None), + ( + {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[1, 2]]}, "properties": {}}, + {"Polygon", "MultiPolygon"}, + "Found type 'Feature', but expects one of ['MultiPolygon', 'Polygon']", + ), + ( + {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[1, 2]]}, "properties": {}}, + {"Feature"}, + None, + ), + ( + { + "type": "FeatureCollection", + "features": [ + {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[1, 2]]}, "properties": {}}, + {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[3, 4]]}, "properties": {}}, + ], + }, + {"Polygon", "MultiPolygon"}, + "Found type 'FeatureCollection', but expects one of ['MultiPolygon', 'Polygon']", + ), + ( + { + "type": "FeatureCollection", + "features": [ + {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[1, 2]]}, "properties": {}}, + {"type": "Feature", "geometry": {"type": "Polygon", "coordinates": [[3, 4]]}, "properties": {}}, + ], + }, + {"FeatureCollection"}, + None, + ), + ], + ) + @pytest.mark.parametrize( + "raise_exception", + [ + False, + True, + ], + ) + def test_validate_geojson_basic_allowed_types(self, value, allowed_types, expected_issue, raise_exception): + with self._checker(expected_issue=expected_issue, raise_exception=raise_exception) as check: + result = validate_geojson_basic(value, allowed_types=allowed_types, raise_exception=raise_exception) + check(result)