diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2b727677..70d16a80 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,6 +16,7 @@ Internal changes ^^^^^^^^^^^^^^^^ * Include domain in `weight_location` in ``regrid_dataset``. (:pull:`414`). * Added pins to `xarray`, `xclim`, `h5py`, and `netcdf4`. (:pull:`414`). +* Add ``.zip`` and ``.zarr.zip`` as possible file extensions for Zarr datasets. (:pull:`426`). v0.9.1 (2024-06-04) ------------------- diff --git a/src/xscen/catutils.py b/src/xscen/catutils.py index 46fc4145..9be103fd 100644 --- a/src/xscen/catutils.py +++ b/src/xscen/catutils.py @@ -39,6 +39,18 @@ # ## File finding and path parsing ## # +SUFFIX_TO_FORMAT = { + ".nc": "nc", + ".nc4": "nc", + ".zip": "zarr", + ".zarr.zip": "zarr", + ".zarr": "zarr", +} +"""Mapping from file suffix to format. + +This is used to populate the "format" esm catalog column from the parsed path. +""" + EXTRA_PARSE_TYPES = {} """Extra parse types to add to parse's default. @@ -223,7 +235,7 @@ def _name_parser( return None d["path"] = abs_path - d["format"] = path.suffix[1:] + d["format"] = SUFFIX_TO_FORMAT.get(path.suffix, path.suffix[1:]) if "DATES" in d: d["date_start"], d["date_end"] = d.pop("DATES") diff --git a/src/xscen/io.py b/src/xscen/io.py index 11357cc8..5d9a7763 100644 --- a/src/xscen/io.py +++ b/src/xscen/io.py @@ -47,7 +47,10 @@ def get_engine(file: Union[str, os.PathLike]) -> str: - """Use functionality of h5py to determine if a NetCDF file is compatible with h5netcdf. + """Determine which Xarray engine should be used to open the given file. + + The .zarr, .zarr.zip and .zip extensions are recognized as Zarr datasets, + the rest is seen as a netCDF. If the file is HDF5, the h5netcdf engine is used. Parameters ---------- @@ -60,7 +63,7 @@ def get_engine(file: Union[str, os.PathLike]) -> str: Engine to use with xarray """ # find the ideal engine for xr.open_mfdataset - if Path(file).suffix == ".zarr": + if Path(file).suffix in [".zarr", ".zip", ".zarr.zip"]: engine = "zarr" elif h5py.is_hdf5(file): engine = "h5netcdf"