From a1d9321694063a7699ea63d3cb51eac77f0c8dfa Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Mon, 23 Sep 2024 10:36:13 +0200 Subject: [PATCH 1/6] remove ErrorHandlingMethod in favour of NoDataStrategy --- hydromt/_typing/__init__.py | 2 - hydromt/_typing/error.py | 8 ---- hydromt/data_catalog/data_catalog.py | 12 +++--- hydromt/data_catalog/sources/dataframe.py | 25 +----------- hydromt/data_catalog/sources/dataset.py | 11 +----- hydromt/data_catalog/sources/geodataframe.py | 13 ++----- hydromt/data_catalog/sources/geodataset.py | 12 +----- hydromt/data_catalog/sources/rasterdataset.py | 12 +----- .../sources/test_dataset_source.py | 10 ----- tests/data_catalog/test_data_catalog.py | 39 ------------------- 10 files changed, 18 insertions(+), 126 deletions(-) diff --git a/hydromt/_typing/__init__.py b/hydromt/_typing/__init__.py index 14b0670b8..c17b59f78 100644 --- a/hydromt/_typing/__init__.py +++ b/hydromt/_typing/__init__.py @@ -2,7 +2,6 @@ from .crs import CRS from .error import ( - ErrorHandleMethod, NoDataException, NoDataStrategy, exec_nodata_strat, @@ -55,7 +54,6 @@ "TotalBounds", "XArrayDict", "ModelMode", - "ErrorHandleMethod", "NoDataStrategy", "NoDataException", "exec_nodata_strat", diff --git a/hydromt/_typing/error.py b/hydromt/_typing/error.py index 79fa8d9a9..8dce66a03 100644 --- a/hydromt/_typing/error.py +++ b/hydromt/_typing/error.py @@ -47,11 +47,3 @@ def exec_nodata_strat(msg: str, strategy: NoDataStrategy) -> None: logger.warning(msg) elif strategy == NoDataStrategy.IGNORE: pass - - -class ErrorHandleMethod(Enum): - """Strategies for error handling within hydromt.""" - - RAISE = 1 - SKIP = 2 - COERCE = 3 diff --git a/hydromt/data_catalog/data_catalog.py b/hydromt/data_catalog/data_catalog.py index 7d3b11f4f..ab990681b 100644 --- a/hydromt/data_catalog/data_catalog.py +++ b/hydromt/data_catalog/data_catalog.py @@ -38,7 +38,7 @@ from hydromt import __version__ from hydromt._io.readers import _yml_from_uri_or_path -from hydromt._typing import Bbox, ErrorHandleMethod, SourceSpecDict, StrPath, TimeRange +from hydromt._typing import Bbox, SourceSpecDict, StrPath, TimeRange from hydromt._typing.error import NoDataException, NoDataStrategy, exec_nodata_strat from hydromt._utils import ( _deep_merge, @@ -157,7 +157,7 @@ def to_stac_catalog( description: str = "The stac catalog of hydromt", used_only: bool = False, catalog_type: CatalogType = CatalogType.RELATIVE_PUBLISHED, - on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE, + handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE, ): """Write data catalog to STAC format. @@ -181,7 +181,7 @@ def to_stac_catalog( meta = meta or {} stac_catalog = StacCatalog(id=catalog_name, description=description) for _name, source in self.list_sources(used_only): - stac_child_catalog = source.to_stac_catalog(on_error) + stac_child_catalog = source.to_stac_catalog(handle_nodata) if stac_child_catalog: stac_catalog.add_child(stac_child_catalog) @@ -191,7 +191,7 @@ def to_stac_catalog( def from_stac_catalog( self, stac_like: Union[str, Path, StacCatalog, dict], - on_error: ErrorHandleMethod = ErrorHandleMethod.SKIP, + handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE, ): """Write data catalog to STAC format. @@ -199,8 +199,8 @@ def from_stac_catalog( ---------- path: str, Path stac path. - on_error: ErrorHandleMethod - What to do on error when converting from STAC + handle_nodata: NoDataStrategy + What to do when required data is not available when converting from STAC """ if isinstance(stac_like, (str, Path)): stac_catalog = StacCatalog.from_file(stac_like) diff --git a/hydromt/data_catalog/sources/dataframe.py b/hydromt/data_catalog/sources/dataframe.py index 38da14356..9b2cd433d 100644 --- a/hydromt/data_catalog/sources/dataframe.py +++ b/hydromt/data_catalog/sources/dataframe.py @@ -1,18 +1,14 @@ """DataSource class for the DataFrame type.""" -from datetime import datetime from logging import Logger, getLogger from typing import Any, ClassVar, Dict, List, Literal, Optional import pandas as pd from fsspec import filesystem from pydantic import Field -from pystac import Asset as StacAsset from pystac import Catalog as StacCatalog -from pystac import Item as StacItem from hydromt._typing import ( - ErrorHandleMethod, NoDataStrategy, StrPath, TimeRange, @@ -122,7 +118,7 @@ def to_file( def to_stac_catalog( self, - on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE, + handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE, ) -> Optional[StacCatalog]: """ Convert a dataframe into a STAC Catalog representation. @@ -141,29 +137,12 @@ def to_stac_catalog( - Optional[StacCatalog]: The STAC Catalog representation of the dataframe, or None if the dataset was skipped. """ - if on_error == ErrorHandleMethod.SKIP: + if handle_nodata == NoDataStrategy.IGNORE: logger.warning( f"Skipping {self.name} during stac conversion because" "because detecting temporal extent failed." ) return - elif on_error == ErrorHandleMethod.COERCE: - stac_catalog = StacCatalog( - self.name, - description=self.name, - ) - stac_item = StacItem( - self.name, - geometry=None, - bbox=[0, 0, 0, 0], - properties=self.metadata.model_dump(), - datetime=datetime(1, 1, 1), - ) - stac_asset = StacAsset(self.full_uri) - stac_item.add_asset("hydromt_path", stac_asset) - - stac_catalog.add_item(stac_item) - return stac_catalog else: raise NotImplementedError( "DataFrameSource does not support full stac conversion as it lacks" diff --git a/hydromt/data_catalog/sources/dataset.py b/hydromt/data_catalog/sources/dataset.py index 50d7a9080..82100a553 100644 --- a/hydromt/data_catalog/sources/dataset.py +++ b/hydromt/data_catalog/sources/dataset.py @@ -1,6 +1,5 @@ """DataSource class for the Dataset type.""" -from datetime import datetime from logging import Logger, getLogger from os.path import basename, splitext from typing import Any, ClassVar, Dict, List, Literal, Optional, Union @@ -16,7 +15,6 @@ from pystac import MediaType from hydromt._typing import ( - ErrorHandleMethod, NoDataStrategy, StrPath, TimeRange, @@ -202,7 +200,7 @@ def detect_time_range( def to_stac_catalog( self, - on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE, + handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE, ) -> Optional[StacCatalog]: """ Convert a dataset into a STAC Catalog representation. @@ -236,17 +234,12 @@ def to_stac_catalog( f"Unknown extension: {ext} cannot determine media type" ) except (IndexError, KeyError, CRSError) as e: - if on_error == ErrorHandleMethod.SKIP: + if handle_nodata == NoDataStrategy.IGNORE: logger.warning( "Skipping {name} during stac conversion because" "because detecting spacial extent failed." ) return - elif on_error == ErrorHandleMethod.COERCE: - props = self.metadata.model_dump(exclude_none=True, exclude_unset=True) - start_dt = datetime(1, 1, 1) - end_dt = datetime(1, 1, 1) - media_type = MediaType.JSON else: raise e diff --git a/hydromt/data_catalog/sources/geodataframe.py b/hydromt/data_catalog/sources/geodataframe.py index 2f91a49fc..0f1da16dd 100644 --- a/hydromt/data_catalog/sources/geodataframe.py +++ b/hydromt/data_catalog/sources/geodataframe.py @@ -17,7 +17,6 @@ from hydromt._typing import ( Bbox, - ErrorHandleMethod, Geom, NoDataStrategy, StrPath, @@ -206,7 +205,7 @@ def detect_bbox( def to_stac_catalog( self, - on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE, + handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE, ) -> Optional[StacCatalog]: """ Convert a geodataframe into a STAC Catalog representation. @@ -239,19 +238,15 @@ def to_stac_catalog( raise RuntimeError( f"Unknown extension: {ext} cannot determine media type" ) - except (IndexError, KeyError, CRSError): - if on_error == ErrorHandleMethod.SKIP: + except (IndexError, KeyError, CRSError) as e: + if handle_nodata == NoDataStrategy.IGNORE: logger.warning( "Skipping {name} during stac conversion because" "because detecting spacial extent failed." ) return - elif on_error == ErrorHandleMethod.COERCE: - bbox = [0.0, 0.0, 0.0, 0.0] - props = self.data_adapter.meta - media_type = MediaType.JSON else: - raise + raise e else: stac_catalog = StacCatalog( self.name, diff --git a/hydromt/data_catalog/sources/geodataset.py b/hydromt/data_catalog/sources/geodataset.py index 6aa45a41f..309eeb3a9 100644 --- a/hydromt/data_catalog/sources/geodataset.py +++ b/hydromt/data_catalog/sources/geodataset.py @@ -1,6 +1,5 @@ """DataSource class for the GeoDataset type.""" -from datetime import datetime from logging import Logger, getLogger from os.path import basename, splitext from typing import Any, ClassVar, Dict, List, Literal, Optional, Union, cast @@ -18,7 +17,6 @@ from hydromt._typing import ( Bbox, - ErrorHandleMethod, Geom, NoDataStrategy, StrPath, @@ -273,7 +271,7 @@ def detect_time_range( def to_stac_catalog( self, - on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE, + handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE, ) -> Optional[StacCatalog]: """ Convert a geodataset into a STAC Catalog representation. @@ -308,18 +306,12 @@ def to_stac_catalog( f"Unknown extension: {ext} cannot determine media type" ) except (IndexError, KeyError, CRSError) as e: - if on_error == ErrorHandleMethod.SKIP: + if handle_nodata == NoDataStrategy.IGNORE: logger.warning( "Skipping {name} during stac conversion because" "because detecting spacial extent failed." ) return - elif on_error == ErrorHandleMethod.COERCE: - bbox = [0.0, 0.0, 0.0, 0.0] - props = self.metadata - start_dt = datetime(1, 1, 1) - end_dt = datetime(1, 1, 1) - media_type = MediaType.JSON else: raise e diff --git a/hydromt/data_catalog/sources/rasterdataset.py b/hydromt/data_catalog/sources/rasterdataset.py index a54c71416..a9a3bc625 100644 --- a/hydromt/data_catalog/sources/rasterdataset.py +++ b/hydromt/data_catalog/sources/rasterdataset.py @@ -1,6 +1,5 @@ """DataSource class for the RasterDataset type.""" -from datetime import datetime from logging import Logger, getLogger from os.path import basename, splitext from typing import Any, ClassVar, Dict, List, Literal, Optional, Union, cast @@ -18,7 +17,6 @@ from hydromt._typing import ( Bbox, - ErrorHandleMethod, Geom, NoDataStrategy, StrPath, @@ -274,7 +272,7 @@ def detect_time_range( def to_stac_catalog( self, - on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE, + handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE, ) -> Optional[StacCatalog]: """ Convert a rasterdataset into a STAC Catalog representation. @@ -314,18 +312,12 @@ def to_stac_catalog( f"Unknown extension: {ext} cannot determine media type" ) except (IndexError, KeyError, CRSError) as e: - if on_error == ErrorHandleMethod.SKIP: + if handle_nodata == NoDataStrategy.IGNORE: logger.warning( "Skipping {name} during stac conversion because" "because detecting spacial extent failed." ) return - elif on_error == ErrorHandleMethod.COERCE: - bbox = [0.0, 0.0, 0.0, 0.0] - props = self.data_adapter.meta - start_dt = datetime(1, 1, 1) - end_dt = datetime(1, 1, 1) - media_type = MediaType.JSON else: raise e diff --git a/tests/data_catalog/sources/test_dataset_source.py b/tests/data_catalog/sources/test_dataset_source.py index c4f4d0d9a..029645a16 100644 --- a/tests/data_catalog/sources/test_dataset_source.py +++ b/tests/data_catalog/sources/test_dataset_source.py @@ -83,13 +83,3 @@ def test_to_stac_catalog_skip(self, dataset_source_no_timerange: DatasetSource): on_error=ErrorHandleMethod.SKIP ) assert catalog is None - - def test_to_stac_catalog_coerce(self, dataset_source_no_timerange: DatasetSource): - catalog: Optional[StacCatalog] = dataset_source_no_timerange.to_stac_catalog( - on_error=ErrorHandleMethod.COERCE - ) - assert isinstance(catalog, StacCatalog) - stac_item = next(catalog.get_items(dataset_source_no_timerange.name), None) - assert list(stac_item.assets.keys())[0] == "test.nc" - assert stac_item.properties["start_datetime"] == "0001-01-01T00:00:00Z" - assert stac_item.properties["end_datetime"] == "0001-01-01T00:00:00Z" diff --git a/tests/data_catalog/test_data_catalog.py b/tests/data_catalog/test_data_catalog.py index 2d386a020..7e24e75b7 100644 --- a/tests/data_catalog/test_data_catalog.py +++ b/tests/data_catalog/test_data_catalog.py @@ -41,7 +41,6 @@ _yml_from_uri_or_path, ) from hydromt.data_catalog.sources import ( - DataFrameSource, DataSource, GeoDataFrameSource, GeoDatasetSource, @@ -1500,44 +1499,6 @@ def test_time_variable_slice(self, csv_uri_time: str, data_catalog: DataCatalog) ) assert np.all(dfts.columns == vars_slice) - def test_to_stac(self, df: pd.DataFrame, tmp_dir: Path): - uri_df = str(tmp_dir / "test.csv") - name = "test_dataframe" - df.to_csv(uri_df) - dc = DataCatalog().from_dict( - {name: {"data_type": "DataFrame", "uri": uri_df, "driver": "pandas"}} - ) - - source = cast(DataFrameSource, dc.get_source(name)) - - with pytest.raises( - NotImplementedError, - match="DataFrameSource does not support full stac conversion ", - ): - source.to_stac_catalog(on_error=ErrorHandleMethod.RAISE) - - assert source.to_stac_catalog(on_error=ErrorHandleMethod.SKIP) is None - - stac_catalog = StacCatalog( - name, - description=name, - ) - stac_item = StacItem( - name, - geometry=None, - bbox=[0, 0, 0, 0], - properties=source.metadata.model_dump(exclude_none=True), - datetime=datetime(1, 1, 1), - ) - stac_asset = StacAsset(str(uri_df)) - stac_item.add_asset("hydromt_path", stac_asset) - - stac_catalog.add_item(stac_item) - outcome = cast( - StacCatalog, source.to_stac_catalog(on_error=ErrorHandleMethod.COERCE) - ) - assert stac_catalog.to_dict() == outcome.to_dict() # type: ignore - def test_get_dataframe(df, tmpdir, data_catalog): n = len(data_catalog) From 658dce8ee426f4efaedd69c96725f76bc6a3eb4c Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Mon, 23 Sep 2024 10:42:14 +0200 Subject: [PATCH 2/6] remove unecessary tests --- .../sources/test_dataset_source.py | 4 +- .../sources/test_geo_dataframe_source.py | 39 --------------- tests/data_catalog/test_data_catalog.py | 47 +------------------ 3 files changed, 3 insertions(+), 87 deletions(-) diff --git a/tests/data_catalog/sources/test_dataset_source.py b/tests/data_catalog/sources/test_dataset_source.py index 029645a16..077a80d9d 100644 --- a/tests/data_catalog/sources/test_dataset_source.py +++ b/tests/data_catalog/sources/test_dataset_source.py @@ -5,7 +5,7 @@ import xarray as xr from pystac import Catalog as StacCatalog -from hydromt._typing import ErrorHandleMethod +from hydromt._typing.error import NoDataStrategy from hydromt.data_catalog.adapters import DatasetAdapter from hydromt.data_catalog.drivers import DatasetDriver from hydromt.data_catalog.sources import DatasetSource @@ -80,6 +80,6 @@ def _get_time_range(self, *args, **kwargs): def test_to_stac_catalog_skip(self, dataset_source_no_timerange: DatasetSource): catalog: Optional[StacCatalog] = dataset_source_no_timerange.to_stac_catalog( - on_error=ErrorHandleMethod.SKIP + handle_nodata=NoDataStrategy.IGNORE ) assert catalog is None diff --git a/tests/data_catalog/sources/test_geo_dataframe_source.py b/tests/data_catalog/sources/test_geo_dataframe_source.py index 94bfb4b67..8503a8f8e 100644 --- a/tests/data_catalog/sources/test_geo_dataframe_source.py +++ b/tests/data_catalog/sources/test_geo_dataframe_source.py @@ -1,19 +1,12 @@ -from datetime import datetime -from os.path import basename from pathlib import Path -from typing import cast from uuid import uuid4 import geopandas as gpd import numpy as np import pytest from pydantic import ValidationError -from pystac import Asset as StacAsset -from pystac import Catalog as StacCatalog -from pystac import Item as StacItem from hydromt._typing import NoDataException -from hydromt._typing.error import ErrorHandleMethod from hydromt.data_catalog import DataCatalog from hydromt.data_catalog.adapters.geodataframe import GeoDataFrameAdapter from hydromt.data_catalog.drivers import GeoDataFrameDriver, PyogrioDriver @@ -122,35 +115,3 @@ def test_geodataframe_unit_attrs(self, artifact_data: DataCatalog): source.metadata.attrs = {"NAME_0": {"long_name": "Country names"}} gdf = source.read_data() assert gdf["NAME_0"].attrs["long_name"] == "Country names" - - def test_to_stac_geodataframe(self, geodf: gpd.GeoDataFrame, tmp_dir: Path): - gdf_path = str(tmp_dir / "test.geojson") - geodf.to_file(gdf_path, driver="GeoJSON") - data_catalog = DataCatalog() # read artifacts - _ = data_catalog.sources # load artifact data as fallback - - # geodataframe - name = "gadm_level1" - adapter = cast(GeoDataFrameAdapter, data_catalog.get_source(name)) - bbox, _ = adapter.get_bbox() - gdf_stac_catalog = StacCatalog(id=name, description=name) - gds_stac_item = StacItem( - name, - geometry=None, - bbox=list(bbox), - properties=adapter.metadata, - datetime=datetime(1, 1, 1), - ) - gds_stac_asset = StacAsset(str(adapter.uri)) - gds_base_name = basename(adapter.uri) - gds_stac_item.add_asset(gds_base_name, gds_stac_asset) - - gdf_stac_catalog.add_item(gds_stac_item) - outcome = cast( - StacCatalog, adapter.to_stac_catalog(on_error=ErrorHandleMethod.RAISE) - ) - assert gdf_stac_catalog.to_dict() == outcome.to_dict() # type: ignore - adapter.metadata.crs = ( - -3.14 - ) # manually create an invalid adapter by deleting the crs - assert adapter.to_stac_catalog(on_error=ErrorHandleMethod.SKIP) is None diff --git a/tests/data_catalog/test_data_catalog.py b/tests/data_catalog/test_data_catalog.py index 7e24e75b7..ccaca3c11 100644 --- a/tests/data_catalog/test_data_catalog.py +++ b/tests/data_catalog/test_data_catalog.py @@ -27,7 +27,7 @@ from hydromt._compat import HAS_GCSFS, HAS_OPENPYXL, HAS_S3FS from hydromt._io.writers import _write_xy from hydromt._typing import Bbox, TimeRange -from hydromt._typing.error import ErrorHandleMethod, NoDataException, NoDataStrategy +from hydromt._typing.error import NoDataException, NoDataStrategy from hydromt.config import Settings from hydromt.data_catalog.adapters import ( GeoDataFrameAdapter, @@ -116,24 +116,6 @@ def test_parser(): datasource = _parse_data_source_dict("test", source, root=root) assert isinstance(datasource, GeoDataFrameSource) assert datasource.full_uri == abspath(source["uri"]) - # TODO: do we want to allow Path objects? - # # test with Path object - # source.update(uri=Path(source["uri"])) - # datasource = _parse_data_source_dict("test", source, root=root) - # assert datasource.uri == abspath(source["uri"]) - # rel path - # source = { - # "data_adapter": {"name": "GeoDataFrame"}, - # "driver": {"name": "pyogrio"}, - # "data_type": "GeoDataFrame", - # "uri": "path/to/data.gpkg", - # "kwargs": {"fn": "test"}, - # } - # datasource = _parse_data_source_dict("test", source, root=root) - # assert datasource.uri == abspath(join(root, source["uri"])) - # check if path in kwargs is also absolute - # assert datasource.driver_kwargs["fn"] == abspath(join(root, "test")) - # alias dd = { "test": { "driver": {"name": "pyogrio"}, @@ -712,16 +694,6 @@ def test_to_stac(self, data_catalog: DataCatalog): raster_stac_catalog.add_item(raster_stac_item) - outcome = cast( - StacCatalog, source.to_stac_catalog(on_error=ErrorHandleMethod.RAISE) - ) - - assert raster_stac_catalog.to_dict() == outcome.to_dict() # type: ignore - source.metadata.crs = ( - -3.14 - ) # manually create an invalid adapter by deleting the crs - assert source.to_stac_catalog(on_error=ErrorHandleMethod.SKIP) is None - @pytest.fixture() def zoom_dict(self, tmp_dir: Path, zoom_level_tif: str) -> Dict[str, Any]: return { @@ -1042,14 +1014,6 @@ def test_to_stac_geodataframe(self, data_catalog: DataCatalog): gds_stac_item.add_asset(gds_base_name, gds_stac_asset) gdf_stac_catalog.add_item(gds_stac_item) - outcome = cast( - StacCatalog, source.to_stac_catalog(on_error=ErrorHandleMethod.RAISE) - ) - assert gdf_stac_catalog.to_dict() == outcome.to_dict() # type: ignore - source.metadata.crs = ( - -3.14 - ) # manually create an invalid adapter by deleting the crs - assert source.to_stac_catalog(on_error=ErrorHandleMethod.SKIP) is None def test_get_geodataframe_path(data_catalog): @@ -1271,15 +1235,6 @@ def test_to_stac_geodataset(self, data_catalog: DataCatalog): gds_stac_catalog.add_item(gds_stac_item) - outcome = cast( - StacCatalog, source.to_stac_catalog(on_error=ErrorHandleMethod.RAISE) - ) - assert gds_stac_catalog.to_dict() == outcome.to_dict() # type: ignore - source.metadata.crs = ( - -3.14 - ) # manually create an invalid adapter by deleting the crs - assert source.to_stac_catalog(ErrorHandleMethod.SKIP) is None - def test_get_geodataset_artifact_data(data_catalog): name = "gtsmv3_eu_era5" From 3f50e3ec344fa132ab6c44cc182b5ec61e379cee Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Mon, 23 Sep 2024 14:12:13 +0200 Subject: [PATCH 3/6] revert accidentally deleted tests --- .../sources/test_geo_dataframe_source.py | 40 ++++++++++++++++++- tests/data_catalog/test_data_catalog.py | 28 +++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/tests/data_catalog/sources/test_geo_dataframe_source.py b/tests/data_catalog/sources/test_geo_dataframe_source.py index 8503a8f8e..01b8807d0 100644 --- a/tests/data_catalog/sources/test_geo_dataframe_source.py +++ b/tests/data_catalog/sources/test_geo_dataframe_source.py @@ -1,12 +1,18 @@ +from datetime import datetime +from os.path import basename from pathlib import Path +from typing import cast from uuid import uuid4 import geopandas as gpd import numpy as np import pytest from pydantic import ValidationError +from pystac import Asset as StacAsset +from pystac import Catalog as StacCatalog +from pystac import Item as StacItem -from hydromt._typing import NoDataException +from hydromt._typing import NoDataException, NoDataStrategy from hydromt.data_catalog import DataCatalog from hydromt.data_catalog.adapters.geodataframe import GeoDataFrameAdapter from hydromt.data_catalog.drivers import GeoDataFrameDriver, PyogrioDriver @@ -115,3 +121,35 @@ def test_geodataframe_unit_attrs(self, artifact_data: DataCatalog): source.metadata.attrs = {"NAME_0": {"long_name": "Country names"}} gdf = source.read_data() assert gdf["NAME_0"].attrs["long_name"] == "Country names" + + def test_to_stac_geodataframe(self, geodf: gpd.GeoDataFrame, tmp_dir: Path): + gdf_path = str(tmp_dir / "test.geojson") + geodf.to_file(gdf_path, driver="GeoJSON") + data_catalog = DataCatalog() # read artifacts + _ = data_catalog.sources # load artifact data as fallback + + # geodataframe + name = "gadm_level1" + adapter = cast(GeoDataFrameAdapter, data_catalog.get_source(name)) + bbox, _ = adapter.get_bbox() + gdf_stac_catalog = StacCatalog(id=name, description=name) + gds_stac_item = StacItem( + name, + geometry=None, + bbox=list(bbox), + properties=adapter.metadata, + datetime=datetime(1, 1, 1), + ) + gds_stac_asset = StacAsset(str(adapter.uri)) + gds_base_name = basename(adapter.uri) + gds_stac_item.add_asset(gds_base_name, gds_stac_asset) + + gdf_stac_catalog.add_item(gds_stac_item) + outcome = cast( + StacCatalog, adapter.to_stac_catalog(on_error=NoDataStrategy.RAISE) + ) + assert gdf_stac_catalog.to_dict() == outcome.to_dict() # type: ignore + adapter.metadata.crs = ( + -3.14 + ) # manually create an invalid adapter by deleting the crs + assert adapter.to_stac_catalog(on_error=NoDataStrategy.IGNORE) is None diff --git a/tests/data_catalog/test_data_catalog.py b/tests/data_catalog/test_data_catalog.py index ccaca3c11..588978892 100644 --- a/tests/data_catalog/test_data_catalog.py +++ b/tests/data_catalog/test_data_catalog.py @@ -116,6 +116,7 @@ def test_parser(): datasource = _parse_data_source_dict("test", source, root=root) assert isinstance(datasource, GeoDataFrameSource) assert datasource.full_uri == abspath(source["uri"]) + dd = { "test": { "driver": {"name": "pyogrio"}, @@ -694,6 +695,16 @@ def test_to_stac(self, data_catalog: DataCatalog): raster_stac_catalog.add_item(raster_stac_item) + outcome = cast( + StacCatalog, source.to_stac_catalog(on_error=NoDataStrategy.RAISE) + ) + + assert raster_stac_catalog.to_dict() == outcome.to_dict() # type: ignore + source.metadata.crs = ( + -3.14 + ) # manually create an invalid adapter by deleting the crs + assert source.to_stac_catalog(on_error=NoDataStrategy.SKIP) is None + @pytest.fixture() def zoom_dict(self, tmp_dir: Path, zoom_level_tif: str) -> Dict[str, Any]: return { @@ -1014,6 +1025,14 @@ def test_to_stac_geodataframe(self, data_catalog: DataCatalog): gds_stac_item.add_asset(gds_base_name, gds_stac_asset) gdf_stac_catalog.add_item(gds_stac_item) + outcome = cast( + StacCatalog, source.to_stac_catalog(on_error=NoDataStrategy.RAISE) + ) + assert gdf_stac_catalog.to_dict() == outcome.to_dict() # type: ignore + source.metadata.crs = ( + -3.14 + ) # manually create an invalid adapter by deleting the crs + assert source.to_stac_catalog(on_error=NoDataStrategy.SKIP) is None def test_get_geodataframe_path(data_catalog): @@ -1235,6 +1254,15 @@ def test_to_stac_geodataset(self, data_catalog: DataCatalog): gds_stac_catalog.add_item(gds_stac_item) + outcome = cast( + StacCatalog, source.to_stac_catalog(on_error=NoDataStrategy.RAISE) + ) + assert gds_stac_catalog.to_dict() == outcome.to_dict() # type: ignore + source.metadata.crs = ( + -3.14 + ) # manually create an invalid adapter by deleting the crs + assert source.to_stac_catalog(NoDataStrategy.IGNORE) is None + def test_get_geodataset_artifact_data(data_catalog): name = "gtsmv3_eu_era5" From b1f52cea58e74bef5165e7ce38065ca9de579424 Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Mon, 23 Sep 2024 14:13:43 +0200 Subject: [PATCH 4/6] one more test --- tests/data_catalog/test_data_catalog.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/data_catalog/test_data_catalog.py b/tests/data_catalog/test_data_catalog.py index 588978892..ef5929795 100644 --- a/tests/data_catalog/test_data_catalog.py +++ b/tests/data_catalog/test_data_catalog.py @@ -46,6 +46,7 @@ GeoDatasetSource, RasterDatasetSource, ) +from hydromt.data_catalog.sources.dataframe import DataFrameSource from hydromt.gis._gis_utils import _to_geographic_bbox CATALOGDIR = join(dirname(abspath(__file__)), "..", "..", "data", "catalogs") @@ -1499,6 +1500,25 @@ def test_get_dataframe_variables(df, data_catalog): assert df.columns == ["city"] +def test_to_stac(self, df: pd.DataFrame, tmp_dir: Path): + uri_df = str(tmp_dir / "test.csv") + name = "test_dataframe" + df.to_csv(uri_df) + dc = DataCatalog().from_dict( + {name: {"data_type": "DataFrame", "uri": uri_df, "driver": "pandas"}} + ) + + source = cast(DataFrameSource, dc.get_source(name)) + + with pytest.raises( + NotImplementedError, + match="DataFrameSource does not support full stac conversion ", + ): + source.to_stac_catalog(on_error=NoDataStrategy.RAISE) + + assert source.to_stac_catalog(on_error=NoDataStrategy.IGNORE) is None + + def test_get_dataframe_custom_data(tmp_dir, df, data_catalog): name = "test.csv" path = Path(tmp_dir, name) From ab88ea6616acc9fa1a7ad19853f176262329fbf4 Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Mon, 23 Sep 2024 14:30:15 +0200 Subject: [PATCH 5/6] forgot to rename argument --- hydromt/data_catalog/sources/dataframe.py | 2 +- hydromt/data_catalog/sources/dataset.py | 4 ++-- hydromt/data_catalog/sources/geodataframe.py | 7 +++---- hydromt/data_catalog/sources/geodataset.py | 7 +++---- hydromt/data_catalog/sources/rasterdataset.py | 6 +++--- .../sources/test_geo_dataframe_source.py | 4 ++-- tests/data_catalog/test_data_catalog.py | 14 +++++++------- 7 files changed, 21 insertions(+), 23 deletions(-) diff --git a/hydromt/data_catalog/sources/dataframe.py b/hydromt/data_catalog/sources/dataframe.py index 9b2cd433d..63cfe7f98 100644 --- a/hydromt/data_catalog/sources/dataframe.py +++ b/hydromt/data_catalog/sources/dataframe.py @@ -128,7 +128,7 @@ def to_stac_catalog( Parameters ---------- - - on_error (str, optional): The error handling strategy. + - handle_nodata (str, optional): The error handling strategy. Options are: "raise" to raise an error on failure, "skip" to skip the dataframe on failure, and "coerce" (default) to set default values on failure. diff --git a/hydromt/data_catalog/sources/dataset.py b/hydromt/data_catalog/sources/dataset.py index 82100a553..3414a318f 100644 --- a/hydromt/data_catalog/sources/dataset.py +++ b/hydromt/data_catalog/sources/dataset.py @@ -210,9 +210,9 @@ def to_stac_catalog( Parameters ---------- - - on_error (str, optional): The error handling strategy. + - handle_nodata (str, optional): The error handling strategy. Options are: "raise" to raise an error on failure, "skip" to skip the - dataset on failure, and "coerce" (default) to set default values on failure. + dataset on failure. Returns ------- diff --git a/hydromt/data_catalog/sources/geodataframe.py b/hydromt/data_catalog/sources/geodataframe.py index 0f1da16dd..fc0915800 100644 --- a/hydromt/data_catalog/sources/geodataframe.py +++ b/hydromt/data_catalog/sources/geodataframe.py @@ -217,10 +217,9 @@ def to_stac_catalog( Parameters ---------- - - on_error (str, optional): The error handling strategy. - Options are: "raise" to raise an error on failure, "skip" to skip - the dataset on failure, and "coerce" (default) to set - default values on failure. + - handle_nodata (str, optional): The error handling strategy. + Options are: "raise" to raise an error on failure, "ignore" to skip + the dataset on failure. Returns ------- diff --git a/hydromt/data_catalog/sources/geodataset.py b/hydromt/data_catalog/sources/geodataset.py index 309eeb3a9..75f11f941 100644 --- a/hydromt/data_catalog/sources/geodataset.py +++ b/hydromt/data_catalog/sources/geodataset.py @@ -281,10 +281,9 @@ def to_stac_catalog( Parameters ---------- - - on_error (str, optional): The error handling strategy. - Options are: "raise" to raise an error on failure, "skip" to skip - the dataset on failure, and "coerce" (default) to set default - values on failure. + - handle_nodata (str, optional): The error handling strategy. + Options are: "raise" to raise an error on failure, "IGNORE" to skip + the dataset on failure Returns ------- diff --git a/hydromt/data_catalog/sources/rasterdataset.py b/hydromt/data_catalog/sources/rasterdataset.py index c90a541dc..cf9a64e65 100644 --- a/hydromt/data_catalog/sources/rasterdataset.py +++ b/hydromt/data_catalog/sources/rasterdataset.py @@ -282,9 +282,9 @@ def to_stac_catalog( Parameters ---------- - - on_error (str, optional): The error handling strategy. - Options are: "raise" to raise an error on failure, "skip" to skip the - dataset on failure, and "coerce" (default) to set default values on failure. + - handle_nodata (str, optional): The error handling strategy. + Options are: "raise" to raise an error on failure, "ignore" to skip the + dataset on failure Returns ------- diff --git a/tests/data_catalog/sources/test_geo_dataframe_source.py b/tests/data_catalog/sources/test_geo_dataframe_source.py index 01b8807d0..f2d423050 100644 --- a/tests/data_catalog/sources/test_geo_dataframe_source.py +++ b/tests/data_catalog/sources/test_geo_dataframe_source.py @@ -146,10 +146,10 @@ def test_to_stac_geodataframe(self, geodf: gpd.GeoDataFrame, tmp_dir: Path): gdf_stac_catalog.add_item(gds_stac_item) outcome = cast( - StacCatalog, adapter.to_stac_catalog(on_error=NoDataStrategy.RAISE) + StacCatalog, adapter.to_stac_catalog(handle_nodata=NoDataStrategy.RAISE) ) assert gdf_stac_catalog.to_dict() == outcome.to_dict() # type: ignore adapter.metadata.crs = ( -3.14 ) # manually create an invalid adapter by deleting the crs - assert adapter.to_stac_catalog(on_error=NoDataStrategy.IGNORE) is None + assert adapter.to_stac_catalog(handle_nodata=NoDataStrategy.IGNORE) is None diff --git a/tests/data_catalog/test_data_catalog.py b/tests/data_catalog/test_data_catalog.py index ef5929795..f6ecea0d6 100644 --- a/tests/data_catalog/test_data_catalog.py +++ b/tests/data_catalog/test_data_catalog.py @@ -697,14 +697,14 @@ def test_to_stac(self, data_catalog: DataCatalog): raster_stac_catalog.add_item(raster_stac_item) outcome = cast( - StacCatalog, source.to_stac_catalog(on_error=NoDataStrategy.RAISE) + StacCatalog, source.to_stac_catalog(handle_nodata=NoDataStrategy.RAISE) ) assert raster_stac_catalog.to_dict() == outcome.to_dict() # type: ignore source.metadata.crs = ( -3.14 ) # manually create an invalid adapter by deleting the crs - assert source.to_stac_catalog(on_error=NoDataStrategy.SKIP) is None + assert source.to_stac_catalog(handle_nodata=NoDataStrategy.SKIP) is None @pytest.fixture() def zoom_dict(self, tmp_dir: Path, zoom_level_tif: str) -> Dict[str, Any]: @@ -1027,13 +1027,13 @@ def test_to_stac_geodataframe(self, data_catalog: DataCatalog): gdf_stac_catalog.add_item(gds_stac_item) outcome = cast( - StacCatalog, source.to_stac_catalog(on_error=NoDataStrategy.RAISE) + StacCatalog, source.to_stac_catalog(handle_nodata=NoDataStrategy.RAISE) ) assert gdf_stac_catalog.to_dict() == outcome.to_dict() # type: ignore source.metadata.crs = ( -3.14 ) # manually create an invalid adapter by deleting the crs - assert source.to_stac_catalog(on_error=NoDataStrategy.SKIP) is None + assert source.to_stac_catalog(handle_nodata=NoDataStrategy.SKIP) is None def test_get_geodataframe_path(data_catalog): @@ -1256,7 +1256,7 @@ def test_to_stac_geodataset(self, data_catalog: DataCatalog): gds_stac_catalog.add_item(gds_stac_item) outcome = cast( - StacCatalog, source.to_stac_catalog(on_error=NoDataStrategy.RAISE) + StacCatalog, source.to_stac_catalog(handle_nodata=NoDataStrategy.RAISE) ) assert gds_stac_catalog.to_dict() == outcome.to_dict() # type: ignore source.metadata.crs = ( @@ -1514,9 +1514,9 @@ def test_to_stac(self, df: pd.DataFrame, tmp_dir: Path): NotImplementedError, match="DataFrameSource does not support full stac conversion ", ): - source.to_stac_catalog(on_error=NoDataStrategy.RAISE) + source.to_stac_catalog(handle_nodata=NoDataStrategy.RAISE) - assert source.to_stac_catalog(on_error=NoDataStrategy.IGNORE) is None + assert source.to_stac_catalog(handle_nodata=NoDataStrategy.IGNORE) is None def test_get_dataframe_custom_data(tmp_dir, df, data_catalog): From 23fdc1aada7450f8601e7b7a91dba588b6f158fe Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Mon, 23 Sep 2024 14:42:42 +0200 Subject: [PATCH 6/6] fix tests --- hydromt/data_catalog/sources/geodataframe.py | 2 +- tests/data_catalog/test_data_catalog.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hydromt/data_catalog/sources/geodataframe.py b/hydromt/data_catalog/sources/geodataframe.py index fc0915800..a66e484ce 100644 --- a/hydromt/data_catalog/sources/geodataframe.py +++ b/hydromt/data_catalog/sources/geodataframe.py @@ -237,7 +237,7 @@ def to_stac_catalog( raise RuntimeError( f"Unknown extension: {ext} cannot determine media type" ) - except (IndexError, KeyError, CRSError) as e: + except (IndexError, KeyError, CRSError, TypeError) as e: if handle_nodata == NoDataStrategy.IGNORE: logger.warning( "Skipping {name} during stac conversion because" diff --git a/tests/data_catalog/test_data_catalog.py b/tests/data_catalog/test_data_catalog.py index f6ecea0d6..44e472c6b 100644 --- a/tests/data_catalog/test_data_catalog.py +++ b/tests/data_catalog/test_data_catalog.py @@ -702,9 +702,9 @@ def test_to_stac(self, data_catalog: DataCatalog): assert raster_stac_catalog.to_dict() == outcome.to_dict() # type: ignore source.metadata.crs = ( - -3.14 - ) # manually create an invalid adapter by deleting the crs - assert source.to_stac_catalog(handle_nodata=NoDataStrategy.SKIP) is None + 234234234 # manually create an invalid adapter by deleting the crs + ) + assert source.to_stac_catalog(handle_nodata=NoDataStrategy.IGNORE) is None @pytest.fixture() def zoom_dict(self, tmp_dir: Path, zoom_level_tif: str) -> Dict[str, Any]: @@ -1033,7 +1033,7 @@ def test_to_stac_geodataframe(self, data_catalog: DataCatalog): source.metadata.crs = ( -3.14 ) # manually create an invalid adapter by deleting the crs - assert source.to_stac_catalog(handle_nodata=NoDataStrategy.SKIP) is None + assert source.to_stac_catalog(handle_nodata=NoDataStrategy.IGNORE) is None def test_get_geodataframe_path(data_catalog): @@ -1500,7 +1500,7 @@ def test_get_dataframe_variables(df, data_catalog): assert df.columns == ["city"] -def test_to_stac(self, df: pd.DataFrame, tmp_dir: Path): +def test_to_stac(df: pd.DataFrame, tmp_dir: Path): uri_df = str(tmp_dir / "test.csv") name = "test_dataframe" df.to_csv(uri_df)