Skip to content

Commit

Permalink
Docs/export data example (#1057)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jaapel authored Sep 24, 2024
1 parent bb088c9 commit 48bcc34
Show file tree
Hide file tree
Showing 22 changed files with 134 additions and 77 deletions.
5 changes: 2 additions & 3 deletions docs/guides/advanced_user/model_clip.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
Clipping a model
================

The ``clip`` method allows to clip a subregion from an existing model, including all static maps,
static geometries and forcing data.
The ``clip`` method allows to clip a subregion from an existing model, including all
static maps, static geometries and forcing data.

**Steps in brief:**

Expand All @@ -17,7 +17,6 @@ static geometries and forcing data.
This method is not yet implemented for all plugins. Please check the documentation of the respective
:ref:`plugin<plugins>` for more information on whether the clip method is available.


.. _cli_clip:

From CLI
Expand Down
22 changes: 8 additions & 14 deletions examples/export_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,8 @@
"metadata": {},
"outputs": [],
"source": [
"# import hydromt and setup logging\n",
"import hydromt\n",
"from hydromt.utils import setuplog\n",
"\n",
"logger = setuplog(\"export data\", log_level=10)"
"# import hydromt\n",
"import hydromt"
]
},
{
Expand All @@ -53,10 +50,7 @@
"outputs": [],
"source": [
"# Download and read artifacts for the Piave basin to `~/.hydromt_data/`.\n",
"data_catalog = hydromt.DataCatalog(\n",
" logger=logger,\n",
" data_libs=[\"artifact_data=v1.0.0\"],\n",
")"
"data_catalog = hydromt.DataCatalog(data_libs=[\"artifact_data=v1.0.0\"])"
]
},
{
Expand Down Expand Up @@ -169,7 +163,7 @@
"source": [
"# List of data sources to export\n",
"# NOTE that for ERA5 we only export the precip variable and for merit_hydro we only export the elevtn variable\n",
"source_list = [\"merit_hydro[elevtn,flwdir]\", \"era5[precip]\", \"vito\"]\n",
"source_list = [\"merit_hydro[elevtn,flwdir]\", \"era5[precip]\", \"vito_2015\"]\n",
"# Geographic extent\n",
"bbox = [12.0, 46.0, 13.0, 46.5]\n",
"# Time extent\n",
Expand All @@ -194,11 +188,11 @@
"source": [
"folder_name = \"tmp_data_export\"\n",
"data_catalog.export_data(\n",
" data_root=folder_name,\n",
" new_root=folder_name,\n",
" bbox=bbox,\n",
" time_range=time_range,\n",
" source_names=source_list,\n",
" meta={\"version\": \"1\"},\n",
" metadata={\"version\": \"1\"},\n",
")"
]
},
Expand Down Expand Up @@ -322,7 +316,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.5 ('hydromt-dev')",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -336,7 +330,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.11.9"
},
"vscode": {
"interpreter": {
Expand Down
72 changes: 49 additions & 23 deletions hydromt/data_catalog/data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import os
from datetime import datetime
from os.path import abspath, basename, dirname, exists, isfile, join, splitext
from pathlib import Path, PurePath
from pathlib import Path
from typing import (
Any,
Dict,
Expand All @@ -23,7 +23,6 @@
cast,
)

import dateutil
import dateutil.parser
import geopandas as gpd
import numpy as np
Expand All @@ -42,7 +41,6 @@
from hydromt._typing.error import NoDataException, NoDataStrategy, exec_nodata_strat
from hydromt._utils import (
_deep_merge,
_is_valid_url,
_partition_dictionaries,
_single_var_as_array,
)
Expand Down Expand Up @@ -1053,12 +1051,8 @@ def export_data(
new_root = new_root.absolute()
new_root.mkdir(exist_ok=True)

if not time_range:
time_range: Tuple[Union[datetime, str], Union[datetime, str]] = tuple()

# convert strings to timerange
if any(map(lambda t: not isinstance(t, datetime), time_range)):
time_range = tuple(map(lambda t: dateutil.parser.parse(t), time_range))
if time_range:
time_range: TimeRange = _parse_time_range(time_range)

# create copy of data with selected source names
source_vars = {}
Expand Down Expand Up @@ -1115,20 +1109,6 @@ def export_data(
source.data_adapter.unit_mult = {}
source.data_adapter.unit_add = {}
try:
if (
_is_valid_url(source.uri)
or PurePath(source.uri).is_absolute()
):
new_uri: PurePath = PurePath(source.uri).name
else:
new_uri: PurePath = source.uri
p = cast(Path, Path(new_root) / new_uri)
if not force_overwrite and isfile(p):
logger.warning(
f"File {p} already exists and not in forced overwrite mode. skipping..."
)
continue

# get keyword only params
kw_only_params: Set[inspect.Parameter] = set(
map(
Expand All @@ -1155,6 +1135,27 @@ def export_data(
if k in kw_only_params
}

bbox: Optional[Bbox] = query_kwargs.get("bbox")
if bbox is not None:
mask = _parse_geom_bbox_buffer(bbox=bbox)
else:
mask = None

source_kwargs: Dict[str, Any] = copy.deepcopy(query_kwargs)
source_kwargs.pop("bbox", None)
source_kwargs["mask"] = mask

basename: str = source._get_uri_basename(
handle_nodata, **source_kwargs
)

p = cast(Path, Path(new_root) / basename)
if not force_overwrite and isfile(p):
logger.warning(
f"File {p} already exists and not in forced overwrite mode. skipping..."
)
continue

new_source: DataSource = source.to_file(
file_path=p,
handle_nodata=NoDataStrategy.RAISE,
Expand Down Expand Up @@ -1267,6 +1268,9 @@ def get_rasterdataset(
if isinstance(variables, str):
variables = [variables]

if time_range:
time_range = _parse_time_range(time_range)

if isinstance(data_like, dict):
data_like, provider, version = _parse_data_like_dict(
data_like, provider, version
Expand Down Expand Up @@ -1523,10 +1527,15 @@ def get_geodataset(
mask = _parse_geom_bbox_buffer(geom=geom, bbox=bbox, buffer=buffer)
else:
mask = None

if time_range:
time_range = _parse_time_range(time_range)

if isinstance(data_like, dict):
data_like, provider, version = _parse_data_like_dict(
data_like, provider, version
)

if isinstance(data_like, (str, Path)):
if isinstance(data_like, str) and data_like in self.sources:
name = data_like
Expand Down Expand Up @@ -1636,6 +1645,10 @@ def get_dataset(
data_like, provider, version = _parse_data_like_dict(
data_like, provider, version
)

if time_range:
time_range = _parse_time_range(time_range)

if isinstance(data_like, (str, Path)):
if isinstance(data_like, str) and data_like in self.sources:
name = data_like
Expand Down Expand Up @@ -1719,6 +1732,10 @@ def get_dataframe(
data_like, provider, version = _parse_data_like_dict(
data_like, provider, version
)

if time_range:
time_range = _parse_time_range(time_range)

if isinstance(data_like, (str, Path)):
if isinstance(data_like, str) and data_like in self.sources:
name = data_like
Expand Down Expand Up @@ -1867,3 +1884,12 @@ def _denormalise_data_dict(data_dict) -> List[Tuple[str, Dict]]:
data_list.extend(_denormalise_data_dict(item))

return data_list


def _parse_time_range(
time_range: Tuple[Union[str, datetime], Union[str, datetime]],
) -> TimeRange:
"""Parse timerange with strings to datetime."""
if any(map(lambda t: not isinstance(t, datetime), time_range)):
time_range = tuple(map(lambda t: dateutil.parser.parse(t), time_range))
return cast(TimeRange, time_range)
2 changes: 1 addition & 1 deletion hydromt/data_catalog/drivers/dataframe/dataframe_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def write(
path: StrPath,
df: pd.DataFrame,
**kwargs,
) -> None:
) -> str:
"""
Write out a DataFrame to file.
Expand Down
4 changes: 3 additions & 1 deletion hydromt/data_catalog/drivers/dataframe/pandas_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def write(
path: StrPath,
df: pd.DataFrame,
**kwargs,
) -> None:
) -> str:
"""
Write out a DataFrame to file.
Expand Down Expand Up @@ -119,6 +119,8 @@ def write(
else:
raise ValueError(f"DataFrame: file extension {extension} is unknown.")

return str(path)

def _unify_variables_and_pandas_kwargs(
self,
uri: str,
Expand Down
2 changes: 1 addition & 1 deletion hydromt/data_catalog/drivers/dataset/dataset_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def write(
path: StrPath,
ds: xr.Dataset,
**kwargs,
) -> None:
) -> str:
"""
Write out a Dataset to file.
Expand Down
4 changes: 3 additions & 1 deletion hydromt/data_catalog/drivers/dataset/xarray_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def read(
)
return ds

def write(self, path: StrPath, ds: xr.Dataset, **kwargs) -> None:
def write(self, path: StrPath, ds: xr.Dataset, **kwargs) -> str:
"""
Write the Dataset to a local file using zarr.
Expand All @@ -109,3 +109,5 @@ def write(self, path: StrPath, ds: xr.Dataset, **kwargs) -> None:
ds.to_netcdf(path, **kwargs)
else:
raise ValueError(f"Unknown extension for DatasetXarrayDriver: {ext} ")

return str(path)
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def write(
path: StrPath,
gdf: gpd.GeoDataFrame,
**kwargs,
) -> None:
) -> str:
"""
Write out a GeoDataFrame to file.
Expand Down
4 changes: 3 additions & 1 deletion hydromt/data_catalog/drivers/geodataframe/pyogrio_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def write(
path: StrPath,
gdf: gpd.GeoDataFrame,
**kwargs,
) -> None:
) -> str:
"""
Write out a GeoDataFrame to file using pyogrio.
Expand All @@ -89,6 +89,8 @@ def write(

write_dataframe(gdf, path, **kwargs)

return str(path)


def _bbox_from_file_and_mask(
uri: str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def write(
path: StrPath,
ds: xr.Dataset,
**kwargs,
) -> None:
) -> str:
"""
Write out a GeoDataset to file.
Expand Down
2 changes: 1 addition & 1 deletion hydromt/data_catalog/drivers/geodataset/vector_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,6 @@ def write(
path: StrPath,
ds: xr.Dataset,
**kwargs,
) -> xr.Dataset:
) -> str:
"""Not implemented."""
raise NotImplementedError("GeodatasetVectorDriver does not support writing. ")
4 changes: 3 additions & 1 deletion hydromt/data_catalog/drivers/geodataset/xarray_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def read(
)
return ds

def write(self, path: StrPath, ds: xr.Dataset, **kwargs) -> None:
def write(self, path: StrPath, ds: xr.Dataset, **kwargs) -> str:
"""
Write the GeoDataset to a local file using zarr.
Expand All @@ -116,3 +116,5 @@ def write(self, path: StrPath, ds: xr.Dataset, **kwargs) -> None:
ds.vector.to_netcdf(path, **kwargs)
else:
raise ValueError(f"Unknown extension for GeoDatasetXarrayDriver: {ext} ")

return path
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def write(
path: StrPath,
ds: xr.Dataset,
**kwargs,
) -> None:
) -> str:
"""
Write out a RasterDataset to file.
Expand Down
6 changes: 5 additions & 1 deletion hydromt/data_catalog/drivers/raster/raster_xarray_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,13 @@ def read(
)
return ds

def write(self, path: StrPath, ds: xr.Dataset, **kwargs) -> None:
def write(self, path: StrPath, ds: xr.Dataset, **kwargs) -> str:
"""
Write the RasterDataset to a local file using zarr.
args:
returns: str with written uri
"""
no_ext, ext = splitext(path)
# set filepath if incompat
Expand All @@ -131,3 +133,5 @@ def write(self, path: StrPath, ds: xr.Dataset, **kwargs) -> None:
ds.to_zarr(path, mode="w", **kwargs)
else:
ds.to_netcdf(path, **kwargs)

return str(path)
2 changes: 1 addition & 1 deletion hydromt/data_catalog/drivers/raster/rasterio_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _open() -> Union[xr.DataArray, xr.Dataset]:
)
return ds

def write(self, path: StrPath, ds: xr.Dataset, **kwargs) -> None:
def write(self, path: StrPath, ds: xr.Dataset, **kwargs) -> str:
"""Write out a RasterDataset using rasterio."""
raise NotImplementedError()

Expand Down
Loading

0 comments on commit 48bcc34

Please sign in to comment.