Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compatibility with Zarr v3b2 #9795

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ def extract_zarr_variable_encoding(
shape = shape if shape else variable.shape
encoding = variable.encoding.copy()

safe_to_drop = {"source", "original_shape"}
safe_to_drop = {"source", "original_shape", "preferred_chunks"}
valid_encodings = {
"codecs",
"chunks",
Expand Down
1 change: 1 addition & 0 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def _importorskip(
has_pint, requires_pint = _importorskip("pint")
has_numexpr, requires_numexpr = _importorskip("numexpr")
has_flox, requires_flox = _importorskip("flox")
has_netcdf, requires_netcdf = _importorskip("netcdf")
has_pandas_ge_2_2, requires_pandas_ge_2_2 = _importorskip("pandas", "2.2")
has_pandas_3, requires_pandas_3 = _importorskip("pandas", "3.0.0.dev0")

Expand Down
71 changes: 34 additions & 37 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
requires_h5netcdf_1_4_0_or_above,
requires_h5netcdf_ros3,
requires_iris,
requires_netcdf,
requires_netCDF4,
requires_netCDF4_1_6_2_or_above,
requires_netCDF4_1_7_0_or_above,
Expand Down Expand Up @@ -634,6 +635,7 @@ def test_roundtrip_float64_data(self) -> None:
with self.roundtrip(expected) as actual:
assert_identical(expected, actual)

@requires_netcdf
def test_roundtrip_example_1_netcdf(self) -> None:
with open_example_dataset("example_1.nc") as expected:
with self.roundtrip(expected) as actual:
Expand Down Expand Up @@ -1126,13 +1128,11 @@ def _create_cf_dataset():

def test_grid_mapping_and_bounds_are_not_coordinates_in_file(self) -> None:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of these CF convention handling test really only need to be run with one backend.

cc @kmuehlbauer in case that interests you. Similarly with the "coordinates" roundtripping

original = self._create_cf_dataset()
with create_tmp_file() as tmp_file:
original.to_netcdf(tmp_file)
with open_dataset(tmp_file, decode_coords=False) as ds:
assert ds.coords["latitude"].attrs["bounds"] == "latitude_bnds"
assert ds.coords["longitude"].attrs["bounds"] == "longitude_bnds"
assert "coordinates" not in ds["variable"].attrs
assert "coordinates" not in ds.attrs
with self.roundtrip(original, open_kwargs={"decode_coords": False}) as ds:
assert ds.coords["latitude"].attrs["bounds"] == "latitude_bnds"
assert ds.coords["longitude"].attrs["bounds"] == "longitude_bnds"
assert "coordinates" not in ds["variable"].attrs
assert "coordinates" not in ds.attrs

def test_coordinate_variables_after_dataset_roundtrip(self) -> None:
original = self._create_cf_dataset()
Expand Down Expand Up @@ -1194,36 +1194,30 @@ def equals_latlon(obj):
)
with self.roundtrip(original) as actual:
assert_identical(actual, original)
with create_tmp_file() as tmp_file:
original.to_netcdf(tmp_file)
with open_dataset(tmp_file, decode_coords=False) as ds:
assert equals_latlon(ds["temp"].attrs["coordinates"])
assert equals_latlon(ds["precip"].attrs["coordinates"])
assert "coordinates" not in ds.attrs
assert "coordinates" not in ds["lat"].attrs
assert "coordinates" not in ds["lon"].attrs
with self.roundtrip(original, open_kwargs=dict(decode_coords=False)) as ds:
assert equals_latlon(ds["temp"].attrs["coordinates"])
assert equals_latlon(ds["precip"].attrs["coordinates"])
assert "coordinates" not in ds.attrs
assert "coordinates" not in ds["lat"].attrs
assert "coordinates" not in ds["lon"].attrs

modified = original.drop_vars(["temp", "precip"])
with self.roundtrip(modified) as actual:
assert_identical(actual, modified)
with create_tmp_file() as tmp_file:
modified.to_netcdf(tmp_file)
with open_dataset(tmp_file, decode_coords=False) as ds:
assert equals_latlon(ds.attrs["coordinates"])
assert "coordinates" not in ds["lat"].attrs
assert "coordinates" not in ds["lon"].attrs
with self.roundtrip(modified, open_kwargs=dict(decode_coords=False)) as ds:
assert equals_latlon(ds.attrs["coordinates"])
assert "coordinates" not in ds["lat"].attrs
assert "coordinates" not in ds["lon"].attrs

original["temp"].encoding["coordinates"] = "lat"
with self.roundtrip(original) as actual:
assert_identical(actual, original)
original["precip"].encoding["coordinates"] = "lat"
with create_tmp_file() as tmp_file:
original.to_netcdf(tmp_file)
with open_dataset(tmp_file, decode_coords=True) as ds:
assert "lon" not in ds["temp"].encoding["coordinates"]
assert "lon" not in ds["precip"].encoding["coordinates"]
assert "coordinates" not in ds["lat"].encoding
assert "coordinates" not in ds["lon"].encoding
with self.roundtrip(original, open_kwargs=dict(decode_coords=True)) as ds:
assert "lon" not in ds["temp"].encoding["coordinates"]
assert "lon" not in ds["precip"].encoding["coordinates"]
assert "coordinates" not in ds["lat"].encoding
assert "coordinates" not in ds["lon"].encoding

def test_roundtrip_endian(self) -> None:
skip_if_zarr_format_3("zarr v3 has not implemented endian support yet")
Expand Down Expand Up @@ -2322,7 +2316,9 @@ def test_read_non_consolidated_warning(self) -> None:
assert_identical(ds, expected)

def test_non_existent_store(self) -> None:
with pytest.raises(FileNotFoundError, match="No such file or directory"):
with pytest.raises(
FileNotFoundError, match="(No such file or directory|Unable to find group)"
):
xr.open_zarr(f"{uuid.uuid4()}")

@pytest.mark.skipif(has_zarr_v3, reason="chunk_store not implemented in zarr v3")
Expand Down Expand Up @@ -2552,6 +2548,7 @@ def test_chunk_encoding_with_dask(self) -> None:
# don't actually check equality because the data could be corrupted
pass

@requires_netcdf
def test_drop_encoding(self):
with open_example_dataset("example_1.nc") as ds:
encodings = {v: {**ds[v].encoding} for v in ds.data_vars}
Expand Down Expand Up @@ -3274,7 +3271,7 @@ def create_zarr_target(self):
pytest.skip("Instrumented tests only work on latest Zarr.")

if has_zarr_v3:
kwargs = {"mode": "a"}
kwargs = {"read_only": False}
else:
kwargs = {} # type: ignore[arg-type,unused-ignore]

Expand Down Expand Up @@ -3316,10 +3313,10 @@ def test_append(self) -> None:
if has_zarr_v3:
# TOOD: verify these
expected = {
"set": 17,
"get": 12,
"set": 5,
"get": 7,
"list_dir": 3,
"list_prefix": 0,
"list_prefix": 1,
}
else:
expected = {
Expand All @@ -3341,10 +3338,10 @@ def test_append(self) -> None:
# 6057128b: {'iter': 5, 'contains': 2, 'setitem': 5, 'getitem': 10, "listdir": 5, "list_prefix": 0}
if has_zarr_v3:
expected = {
"set": 10,
"set": 4,
"get": 16, # TODO: fixme upstream (should be 8)
"list_dir": 3, # TODO: fixme upstream (should be 2)
"list_prefix": 0,
"list_prefix": 1,
}
else:
expected = {
Expand All @@ -3367,7 +3364,7 @@ def test_append(self) -> None:
"set": 10,
"get": 16, # TODO: fixme upstream (should be 8)
"list_dir": 3, # TODO: fixme upstream (should be 2)
"list_prefix": 0,
"list_prefix": 2,
}
else:
expected = {
Expand Down Expand Up @@ -3491,7 +3488,7 @@ class TestZarrDictStore(ZarrBase):
@contextlib.contextmanager
def create_zarr_target(self):
if has_zarr_v3:
yield zarr.storage.MemoryStore({}, mode="a")
yield zarr.storage.MemoryStore({}, read_only=False)
else:
yield {}

Expand Down
Loading