Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove ErrorHandlingMethod in favour of NoDataStrategy #1055

Merged
merged 7 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions hydromt/_typing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from .crs import CRS
from .error import (
ErrorHandleMethod,
NoDataException,
NoDataStrategy,
exec_nodata_strat,
Expand Down Expand Up @@ -55,7 +54,6 @@
"TotalBounds",
"XArrayDict",
"ModelMode",
"ErrorHandleMethod",
"NoDataStrategy",
"NoDataException",
"exec_nodata_strat",
Expand Down
8 changes: 0 additions & 8 deletions hydromt/_typing/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,3 @@ def exec_nodata_strat(msg: str, strategy: NoDataStrategy) -> None:
logger.warning(msg)
elif strategy == NoDataStrategy.IGNORE:
pass


class ErrorHandleMethod(Enum):
"""Strategies for error handling within hydromt."""

RAISE = 1
SKIP = 2
COERCE = 3
12 changes: 6 additions & 6 deletions hydromt/data_catalog/data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

from hydromt import __version__
from hydromt._io.readers import _yml_from_uri_or_path
from hydromt._typing import Bbox, ErrorHandleMethod, SourceSpecDict, StrPath, TimeRange
from hydromt._typing import Bbox, SourceSpecDict, StrPath, TimeRange
from hydromt._typing.error import NoDataException, NoDataStrategy, exec_nodata_strat
from hydromt._utils import (
_deep_merge,
Expand Down Expand Up @@ -157,7 +157,7 @@ def to_stac_catalog(
description: str = "The stac catalog of hydromt",
used_only: bool = False,
catalog_type: CatalogType = CatalogType.RELATIVE_PUBLISHED,
on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE,
handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE,
):
"""Write data catalog to STAC format.

Expand All @@ -181,7 +181,7 @@ def to_stac_catalog(
meta = meta or {}
stac_catalog = StacCatalog(id=catalog_name, description=description)
for _name, source in self.list_sources(used_only):
stac_child_catalog = source.to_stac_catalog(on_error)
stac_child_catalog = source.to_stac_catalog(handle_nodata)
if stac_child_catalog:
stac_catalog.add_child(stac_child_catalog)

Expand All @@ -191,16 +191,16 @@ def to_stac_catalog(
def from_stac_catalog(
self,
stac_like: Union[str, Path, StacCatalog, dict],
on_error: ErrorHandleMethod = ErrorHandleMethod.SKIP,
handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE,
):
"""Write data catalog to STAC format.

Parameters
----------
path: str, Path
stac path.
on_error: ErrorHandleMethod
What to do on error when converting from STAC
handle_nodata: NoDataStrategy
What to do when required data is not available when converting from STAC
"""
if isinstance(stac_like, (str, Path)):
stac_catalog = StacCatalog.from_file(stac_like)
Expand Down
25 changes: 2 additions & 23 deletions hydromt/data_catalog/sources/dataframe.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
"""DataSource class for the DataFrame type."""

from datetime import datetime
from logging import Logger, getLogger
from typing import Any, ClassVar, Dict, List, Literal, Optional

import pandas as pd
from fsspec import filesystem
from pydantic import Field
from pystac import Asset as StacAsset
from pystac import Catalog as StacCatalog
from pystac import Item as StacItem

from hydromt._typing import (
ErrorHandleMethod,
NoDataStrategy,
StrPath,
TimeRange,
Expand Down Expand Up @@ -122,7 +118,7 @@ def to_file(

def to_stac_catalog(
self,
on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE,
handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE,
) -> Optional[StacCatalog]:
"""
Convert a dataframe into a STAC Catalog representation.
Expand All @@ -141,29 +137,12 @@ def to_stac_catalog(
- Optional[StacCatalog]: The STAC Catalog representation of the dataframe, or
None if the dataset was skipped.
"""
if on_error == ErrorHandleMethod.SKIP:
if handle_nodata == NoDataStrategy.IGNORE:
logger.warning(
f"Skipping {self.name} during stac conversion because"
"because detecting temporal extent failed."
)
return
elif on_error == ErrorHandleMethod.COERCE:
stac_catalog = StacCatalog(
self.name,
description=self.name,
)
stac_item = StacItem(
self.name,
geometry=None,
bbox=[0, 0, 0, 0],
properties=self.metadata.model_dump(),
datetime=datetime(1, 1, 1),
)
stac_asset = StacAsset(self.full_uri)
stac_item.add_asset("hydromt_path", stac_asset)

stac_catalog.add_item(stac_item)
return stac_catalog
else:
raise NotImplementedError(
"DataFrameSource does not support full stac conversion as it lacks"
Expand Down
11 changes: 2 additions & 9 deletions hydromt/data_catalog/sources/dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""DataSource class for the Dataset type."""

from datetime import datetime
from logging import Logger, getLogger
from os.path import basename, splitext
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
Expand All @@ -16,7 +15,6 @@
from pystac import MediaType

from hydromt._typing import (
ErrorHandleMethod,
NoDataStrategy,
StrPath,
TimeRange,
Expand Down Expand Up @@ -202,7 +200,7 @@ def detect_time_range(

def to_stac_catalog(
self,
on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE,
handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE,
) -> Optional[StacCatalog]:
"""
Convert a dataset into a STAC Catalog representation.
Expand Down Expand Up @@ -236,17 +234,12 @@ def to_stac_catalog(
f"Unknown extension: {ext} cannot determine media type"
)
except (IndexError, KeyError, CRSError) as e:
if on_error == ErrorHandleMethod.SKIP:
if handle_nodata == NoDataStrategy.IGNORE:
logger.warning(
"Skipping {name} during stac conversion because"
"because detecting spacial extent failed."
)
return
elif on_error == ErrorHandleMethod.COERCE:
props = self.metadata.model_dump(exclude_none=True, exclude_unset=True)
start_dt = datetime(1, 1, 1)
end_dt = datetime(1, 1, 1)
media_type = MediaType.JSON
else:
raise e

Expand Down
13 changes: 4 additions & 9 deletions hydromt/data_catalog/sources/geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

from hydromt._typing import (
Bbox,
ErrorHandleMethod,
Geom,
NoDataStrategy,
StrPath,
Expand Down Expand Up @@ -206,7 +205,7 @@ def detect_bbox(

def to_stac_catalog(
self,
on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE,
handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE,
) -> Optional[StacCatalog]:
"""
Convert a geodataframe into a STAC Catalog representation.
Expand Down Expand Up @@ -239,19 +238,15 @@ def to_stac_catalog(
raise RuntimeError(
f"Unknown extension: {ext} cannot determine media type"
)
except (IndexError, KeyError, CRSError):
if on_error == ErrorHandleMethod.SKIP:
except (IndexError, KeyError, CRSError) as e:
if handle_nodata == NoDataStrategy.IGNORE:
logger.warning(
"Skipping {name} during stac conversion because"
"because detecting spacial extent failed."
)
return
elif on_error == ErrorHandleMethod.COERCE:
bbox = [0.0, 0.0, 0.0, 0.0]
props = self.data_adapter.meta
media_type = MediaType.JSON
else:
raise
raise e
else:
stac_catalog = StacCatalog(
self.name,
Expand Down
12 changes: 2 additions & 10 deletions hydromt/data_catalog/sources/geodataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""DataSource class for the GeoDataset type."""

from datetime import datetime
from logging import Logger, getLogger
from os.path import basename, splitext
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union, cast
Expand All @@ -18,7 +17,6 @@

from hydromt._typing import (
Bbox,
ErrorHandleMethod,
Geom,
NoDataStrategy,
StrPath,
Expand Down Expand Up @@ -273,7 +271,7 @@ def detect_time_range(

def to_stac_catalog(
self,
on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE,
handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE,
) -> Optional[StacCatalog]:
"""
Convert a geodataset into a STAC Catalog representation.
Expand Down Expand Up @@ -308,18 +306,12 @@ def to_stac_catalog(
f"Unknown extension: {ext} cannot determine media type"
)
except (IndexError, KeyError, CRSError) as e:
if on_error == ErrorHandleMethod.SKIP:
if handle_nodata == NoDataStrategy.IGNORE:
logger.warning(
"Skipping {name} during stac conversion because"
"because detecting spacial extent failed."
)
return
elif on_error == ErrorHandleMethod.COERCE:
bbox = [0.0, 0.0, 0.0, 0.0]
props = self.metadata
start_dt = datetime(1, 1, 1)
end_dt = datetime(1, 1, 1)
media_type = MediaType.JSON
else:
raise e

Expand Down
12 changes: 2 additions & 10 deletions hydromt/data_catalog/sources/rasterdataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""DataSource class for the RasterDataset type."""

from datetime import datetime
from logging import Logger, getLogger
from os.path import basename, splitext
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union, cast
Expand All @@ -18,7 +17,6 @@

from hydromt._typing import (
Bbox,
ErrorHandleMethod,
Geom,
NoDataStrategy,
StrPath,
Expand Down Expand Up @@ -274,7 +272,7 @@ def detect_time_range(

def to_stac_catalog(
self,
on_error: ErrorHandleMethod = ErrorHandleMethod.COERCE,
handle_nodata: NoDataStrategy = NoDataStrategy.IGNORE,
) -> Optional[StacCatalog]:
"""
Convert a rasterdataset into a STAC Catalog representation.
Expand Down Expand Up @@ -314,18 +312,12 @@ def to_stac_catalog(
f"Unknown extension: {ext} cannot determine media type"
)
except (IndexError, KeyError, CRSError) as e:
if on_error == ErrorHandleMethod.SKIP:
if handle_nodata == NoDataStrategy.IGNORE:
logger.warning(
"Skipping {name} during stac conversion because"
"because detecting spacial extent failed."
)
return
elif on_error == ErrorHandleMethod.COERCE:
bbox = [0.0, 0.0, 0.0, 0.0]
props = self.data_adapter.meta
start_dt = datetime(1, 1, 1)
end_dt = datetime(1, 1, 1)
media_type = MediaType.JSON
else:
raise e

Expand Down
14 changes: 2 additions & 12 deletions tests/data_catalog/sources/test_dataset_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import xarray as xr
from pystac import Catalog as StacCatalog

from hydromt._typing import ErrorHandleMethod
from hydromt._typing.error import NoDataStrategy
from hydromt.data_catalog.adapters import DatasetAdapter
from hydromt.data_catalog.drivers import DatasetDriver
from hydromt.data_catalog.sources import DatasetSource
Expand Down Expand Up @@ -80,16 +80,6 @@ def _get_time_range(self, *args, **kwargs):

def test_to_stac_catalog_skip(self, dataset_source_no_timerange: DatasetSource):
catalog: Optional[StacCatalog] = dataset_source_no_timerange.to_stac_catalog(
on_error=ErrorHandleMethod.SKIP
handle_nodata=NoDataStrategy.IGNORE
)
assert catalog is None

def test_to_stac_catalog_coerce(self, dataset_source_no_timerange: DatasetSource):
catalog: Optional[StacCatalog] = dataset_source_no_timerange.to_stac_catalog(
on_error=ErrorHandleMethod.COERCE
)
assert isinstance(catalog, StacCatalog)
stac_item = next(catalog.get_items(dataset_source_no_timerange.name), None)
assert list(stac_item.assets.keys())[0] == "test.nc"
assert stac_item.properties["start_datetime"] == "0001-01-01T00:00:00Z"
assert stac_item.properties["end_datetime"] == "0001-01-01T00:00:00Z"
39 changes: 0 additions & 39 deletions tests/data_catalog/sources/test_geo_dataframe_source.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,12 @@
from datetime import datetime
from os.path import basename
from pathlib import Path
from typing import cast
from uuid import uuid4

import geopandas as gpd
import numpy as np
import pytest
from pydantic import ValidationError
from pystac import Asset as StacAsset
from pystac import Catalog as StacCatalog
from pystac import Item as StacItem

from hydromt._typing import NoDataException
from hydromt._typing.error import ErrorHandleMethod
from hydromt.data_catalog import DataCatalog
from hydromt.data_catalog.adapters.geodataframe import GeoDataFrameAdapter
from hydromt.data_catalog.drivers import GeoDataFrameDriver, PyogrioDriver
Expand Down Expand Up @@ -122,35 +115,3 @@ def test_geodataframe_unit_attrs(self, artifact_data: DataCatalog):
source.metadata.attrs = {"NAME_0": {"long_name": "Country names"}}
gdf = source.read_data()
assert gdf["NAME_0"].attrs["long_name"] == "Country names"

def test_to_stac_geodataframe(self, geodf: gpd.GeoDataFrame, tmp_dir: Path):
gdf_path = str(tmp_dir / "test.geojson")
geodf.to_file(gdf_path, driver="GeoJSON")
data_catalog = DataCatalog() # read artifacts
_ = data_catalog.sources # load artifact data as fallback

# geodataframe
name = "gadm_level1"
adapter = cast(GeoDataFrameAdapter, data_catalog.get_source(name))
bbox, _ = adapter.get_bbox()
gdf_stac_catalog = StacCatalog(id=name, description=name)
gds_stac_item = StacItem(
name,
geometry=None,
bbox=list(bbox),
properties=adapter.metadata,
datetime=datetime(1, 1, 1),
)
gds_stac_asset = StacAsset(str(adapter.uri))
gds_base_name = basename(adapter.uri)
gds_stac_item.add_asset(gds_base_name, gds_stac_asset)

gdf_stac_catalog.add_item(gds_stac_item)
outcome = cast(
StacCatalog, adapter.to_stac_catalog(on_error=ErrorHandleMethod.RAISE)
)
assert gdf_stac_catalog.to_dict() == outcome.to_dict() # type: ignore
adapter.metadata.crs = (
-3.14
) # manually create an invalid adapter by deleting the crs
assert adapter.to_stac_catalog(on_error=ErrorHandleMethod.SKIP) is None
Loading
Loading