diff --git a/CHANGES.rst b/CHANGES.rst index 79728a71..21b89ae7 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -31,6 +31,7 @@ Bug fixes * Fixed a bug to accept `group = False` in `adjust` function. (:pull:`366`). * `creep_weights` now correctly handles the case where the grid is small, `n` is large, and `mode=wrap`. (:issue:`367`). * Fixed a bug in ``tasmin_from_dtr`` and ``tasmax_from_dtr``, when `dtr` units differed from tasmin/max. (:pull:`372`). +* Fixed a bug where the requested chunking would be ignored when saving a dataset (:pull:`379`). v0.8.3 (2024-02-28) ------------------- diff --git a/environment-dev.yml b/environment-dev.yml index 9ff631b4..cd20dde0 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -35,7 +35,7 @@ dependencies: - zarr # Opt - nc-time-axis >=1.3.1 - - pyarrow >=1.0.0 + - pyarrow >=10.0.1 # Dev - babel - black ==24.2.0 diff --git a/environment.yml b/environment.yml index e827ccc1..b54bd875 100644 --- a/environment.yml +++ b/environment.yml @@ -37,5 +37,5 @@ dependencies: - babel # Opt - nc-time-axis >=1.3.1 - - pyarrow >=1.0.0 + - pyarrow >=10.0.1 - pip diff --git a/pyproject.toml b/pyproject.toml index dcfd0131..1f6d00f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ dependencies = [ "pandas >=2.2", "parse", # Used when opening catalogs. - "pyarrow", + "pyarrow>=10.0.1", "pyyaml", "rechunker", "scipy", diff --git a/xscen/io.py b/xscen/io.py index 572764eb..11357cc8 100644 --- a/xscen/io.py +++ b/xscen/io.py @@ -401,6 +401,8 @@ def save_to_netcdf( for var in list(ds.data_vars.keys()): if keepbits := _get_keepbits(bitround, var, ds[var].dtype): ds = ds.assign({var: round_bits(ds[var], keepbits)}) + # Remove original_shape from encoding, since it can cause issues with some engines. + ds[var].encoding.pop("original_shape", None) _coerce_attrs(ds.attrs) for var in ds.variables.values(): @@ -519,6 +521,8 @@ def _skip(var): encoding.pop(var) if keepbits := _get_keepbits(bitround, var, ds[var].dtype): ds = ds.assign({var: round_bits(ds[var], keepbits)}) + # Remove original_shape from encoding, since it can cause issues with some engines. + ds[var].encoding.pop("original_shape", None) if len(ds.data_vars) == 0: return None @@ -904,8 +908,12 @@ def rechunk_for_saving(ds: xr.Dataset, rechunk: dict): ds[rechunk_var] = ds[rechunk_var].chunk( {d: chnks for d, chnks in rechunk_dims.items() if d in ds[rechunk_var].dims} ) - ds[rechunk_var].encoding.pop("chunksizes", None) + ds[rechunk_var].encoding["chunksizes"] = tuple( + rechunk_dims[d] if d in rechunk_dims else ds[d].shape[0] + for d in ds[rechunk_var].dims + ) ds[rechunk_var].encoding.pop("chunks", None) + ds[rechunk_var].encoding.pop("preferred_chunks", None) return ds