zarr-developers · abarciauskas-bgse · Feb 4, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025
diff --git a/conftest.py b/conftest.py
@@ -6,6 +6,9 @@
 import pytest
 import xarray as xr
 from xarray.core.variable import Variable
+from zarr.core.metadata.v3 import ArrayV3Metadata
+
+from virtualizarr.zarr import convert_to_codec_pipeline
 
 
 def pytest_addoption(parser):
@@ -150,3 +153,58 @@ def simple_netcdf4(tmp_path: Path) -> str:
     ds.to_netcdf(filepath)
 
     return str(filepath)
+
+
+@pytest.fixture
+def array_v3_metadata():
+    def _create_metadata(
+        shape: tuple,
+        chunks: tuple,
+        compressors: list[dict] = [{"id": "zlib", "level": 1}],
+        filters: list[dict] | None = None,
+    ):
+        return ArrayV3Metadata(
+            shape=shape,
+            data_type="int32",
+            chunk_grid={"name": "regular", "configuration": {"chunk_shape": chunks}},
+            chunk_key_encoding={"name": "default"},
+            fill_value=0,
+            codecs=convert_to_codec_pipeline(
+                compressors=compressors,
+                filters=filters,
+                dtype=np.dtype("int32"),
+            ),
+            attributes={},
+            dimension_names=None,
+            storage_transformers=None,
+        )
+
+    return _create_metadata
+
+
+@pytest.fixture
+def array_v3_metadata_dict():
+    def _create_metadata_dict(
+        shape: tuple,
+        chunks: tuple,
+        codecs: list[dict] = [
+            {"configuration": {"endian": "little"}, "name": "bytes"},
+            {
+                "name": "numcodecs.zlib",
+                "configuration": {"level": 1},
+            },
+        ],
+    ):
+        return {
+            "shape": shape,
+            "data_type": "int32",
+            "chunk_grid": {"name": "regular", "configuration": {"chunk_shape": chunks}},
+            "chunk_key_encoding": {"name": "default"},
+            "fill_value": 0,
+            "codecs": codecs,
+            "attributes": {},
+            "dimension_names": None,
+            "storage_transformers": None,
+        }
+
+    return _create_metadata_dict
diff --git a/virtualizarr/codecs.py b/virtualizarr/codecs.py
@@ -54,10 +54,8 @@ def _get_manifestarray_codecs(
     normalize_to_zarr_v3: bool = False,
 ) -> Union[Codec, tuple["ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec", ...]]:
     """Get codecs for a ManifestArray based on its zarr_format."""
-    if normalize_to_zarr_v3 or array.zarray.zarr_format == 3:
-        return (array.zarray.serializer(),) + array.zarray._v3_codec_pipeline()
-    elif array.zarray.zarr_format == 2:
-        return array.zarray.codec
+    if normalize_to_zarr_v3 or array.metadata.zarr_format == 3:
+        return array.metadata.codecs
     else:
         raise ValueError("Unsupported zarr_format for ManifestArray.")
 

diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py
@@ -2,13 +2,13 @@
 from typing import Any, Callable, Union
 
 import numpy as np
+from zarr.core.metadata.v3 import ArrayV3Metadata, RegularChunkGrid
 
 from virtualizarr.manifests.array_api import (
     MANIFESTARRAY_HANDLED_ARRAY_FUNCTIONS,
     _isnan,
 )
 from virtualizarr.manifests.manifest import ChunkManifest
-from virtualizarr.zarr import ZArray
 
 
 class ManifestArray:
@@ -24,27 +24,27 @@ class ManifestArray:
     """
 
     _manifest: ChunkManifest
-    _zarray: ZArray
+    _metadata: ArrayV3Metadata
 
     def __init__(
         self,
-        zarray: ZArray | dict,
+        metadata: ArrayV3Metadata | dict,
         chunkmanifest: dict | ChunkManifest,
     ) -> None:
         """
         Create a ManifestArray directly from the .zarray information of a zarr array and the manifest of chunks.
 
         Parameters
         ----------
-        zarray : dict or ZArray
+        metadata : dict or ArrayV3Metadata
         chunkmanifest : dict or ChunkManifest
         """
 
-        if isinstance(zarray, ZArray):
-            _zarray = zarray
+        if isinstance(metadata, ArrayV3Metadata):
+            _metadata = metadata
         else:
             # try unpacking the dict
-            _zarray = ZArray(**zarray)
+            _metadata = ArrayV3Metadata(**metadata)
 
         if isinstance(chunkmanifest, ChunkManifest):
             _chunkmanifest = chunkmanifest
@@ -55,32 +55,43 @@ def __init__(
                 f"chunkmanifest arg must be of type ChunkManifest or dict, but got type {type(chunkmanifest)}"
             )
 
-        # TODO check that the zarray shape and chunkmanifest shape are consistent with one another
+        # TODO check that the metadata shape and chunkmanifest shape are consistent with one another
         # TODO also cover the special case of scalar arrays
 
-        self._zarray = _zarray
+        self._metadata = _metadata
         self._manifest = _chunkmanifest
 
     @property
     def manifest(self) -> ChunkManifest:
         return self._manifest
 
     @property
-    def zarray(self) -> ZArray:
-        return self._zarray
+    def metadata(self) -> ArrayV3Metadata:
+        return self._metadata
 
     @property
     def chunks(self) -> tuple[int, ...]:
-        return tuple(self.zarray.chunks)
+        """
+        Individual chunk size by number of elements.
+        """
+        if isinstance(self._metadata.chunk_grid, RegularChunkGrid):
+            return self._metadata.chunk_grid.chunk_shape
+        else:
+            raise NotImplementedError(
+                "Only RegularChunkGrid is currently supported for chunk size"
+            )
 
     @property
     def dtype(self) -> np.dtype:
-        dtype_str = self.zarray.dtype
-        return np.dtype(dtype_str)
+        dtype_str = self.metadata.data_type
+        return dtype_str.to_numpy()
 
     @property
     def shape(self) -> tuple[int, ...]:
-        return tuple(int(length) for length in list(self.zarray.shape))
+        """
+        Array shape by number of elements along each dimension.
+        """
+        return tuple(int(length) for length in list(self.metadata.shape))
 
     @property
     def ndim(self) -> int:
@@ -155,7 +166,7 @@ def __eq__(  # type: ignore[override]
         if self.shape != other.shape:
             raise NotImplementedError("Unsure how to handle broadcasting like this")
 
-        if self.zarray != other.zarray:
+        if self.metadata != other.metadata:
             return np.full(shape=self.shape, fill_value=False, dtype=np.dtype(bool))
         else:
             if self.manifest == other.manifest:
@@ -263,7 +274,7 @@ def rename_paths(
         ChunkManifest.rename_paths
         """
         renamed_manifest = self.manifest.rename_paths(new)
-        return ManifestArray(zarray=self.zarray, chunkmanifest=renamed_manifest)
+        return ManifestArray(metadata=self.metadata, chunkmanifest=renamed_manifest)
 
 
 def _possibly_expand_trailing_ellipsis(key, ndim: int):

diff --git a/virtualizarr/manifests/array_api.py b/virtualizarr/manifests/array_api.py
@@ -53,6 +53,8 @@ def concatenate(
 
     The signature of this function is array API compliant, so that it can be called by `xarray.concat`.
     """
+    from zarr.core.metadata.v3 import ArrayV3Metadata
+
     from .array import ManifestArray
 
     if axis is None:
@@ -100,12 +102,12 @@ def concatenate(
         lengths=concatenated_lengths,
     )
 
-    # chunk shape has not changed, there are just now more chunks along the concatenation axis
-    new_zarray = first_arr.zarray.replace(
-        shape=tuple(new_shape),
-    )
+    metadata_copy = first_arr.metadata.to_dict().copy()
+    metadata_copy["shape"] = tuple(new_shape)
+    # ArrayV3Metadata.from_dict removes extra keys zarr_format and node_type
+    new_metadata = ArrayV3Metadata.from_dict(metadata_copy)
 
-    return ManifestArray(chunkmanifest=concatenated_manifest, zarray=new_zarray)
+    return ManifestArray(chunkmanifest=concatenated_manifest, metadata=new_metadata)
 
 
 @implements(np.stack)
@@ -120,6 +122,8 @@ def stack(
 
     The signature of this function is array API compliant, so that it can be called by `xarray.stack`.
     """
+    from zarr.core.metadata.v3 import ArrayV3Metadata
+
     from .array import ManifestArray
 
     if not isinstance(axis, int):
@@ -170,12 +174,13 @@ def stack(
     new_chunks = list(old_chunks)
     new_chunks.insert(axis, 1)
 
-    new_zarray = first_arr.zarray.replace(
-        chunks=tuple(new_chunks),
-        shape=tuple(new_shape),
-    )
+    metadata_copy = first_arr.metadata.to_dict().copy()
+    metadata_copy["shape"] = tuple(new_shape)
+    metadata_copy["chunk_grid"]["configuration"]["chunk_shape"] = tuple(new_chunks)
+    # ArrayV3Metadata.from_dict removes extra keys zarr_format and node_type
+    new_metadata = ArrayV3Metadata.from_dict(metadata_copy)
 
-    return ManifestArray(chunkmanifest=stacked_manifest, zarray=new_zarray)
+    return ManifestArray(chunkmanifest=stacked_manifest, metadata=new_metadata)
 
 
 @implements(np.expand_dims)
@@ -190,6 +195,7 @@ def broadcast_to(x: "ManifestArray", /, shape: tuple[int, ...]) -> "ManifestArra
     """
     Broadcasts a ManifestArray to a specified shape, by either adjusting chunk keys or copying chunk manifest entries.
     """
+    from zarr.core.metadata.v3 import ArrayV3Metadata
 
     from .array import ManifestArray
 
@@ -236,12 +242,13 @@ def broadcast_to(x: "ManifestArray", /, shape: tuple[int, ...]) -> "ManifestArra
         lengths=broadcasted_lengths,
     )
 
-    new_zarray = x.zarray.replace(
-        chunks=new_chunk_shape,
-        shape=new_shape,
-    )
+    metadata_copy = x.metadata.to_dict().copy()
+    metadata_copy["shape"] = tuple(new_shape)
+    metadata_copy["chunk_grid"]["configuration"]["chunk_shape"] = tuple(new_chunk_shape)
+    # ArrayV3Metadata.from_dict removes extra keys zarr_format and node_type
+    new_metadata = ArrayV3Metadata.from_dict(metadata_copy)
 
-    return ManifestArray(chunkmanifest=broadcasted_manifest, zarray=new_zarray)
+    return ManifestArray(chunkmanifest=broadcasted_manifest, metadata=new_metadata)
 
 
 def _prepend_singleton_dimensions(shape: tuple[int, ...], ndim: int) -> tuple[int, ...]:

diff --git a/virtualizarr/readers/dmrpp.py b/virtualizarr/readers/dmrpp.py
@@ -12,7 +12,6 @@
 from virtualizarr.readers.common import VirtualBackend
 from virtualizarr.types import ChunkKey
 from virtualizarr.utils import _FsspecFSFromFilepath, check_for_collisions
-from virtualizarr.zarr import ZArray
 
 
 class DMRPPVirtualBackend(VirtualBackend):
@@ -378,6 +377,10 @@ def _parse_variable(self, var_tag: ET.Element) -> Variable:
         -------
         xr.Variable
         """
+        from zarr.core.metadata.v3 import ArrayV3Metadata
+
+        from virtualizarr.zarr import convert_to_codec_pipeline
+
         # Dimension info
         dims: dict[str, int] = {}
         dimension_tags = self._find_dimension_tags(var_tag)
@@ -414,16 +417,27 @@ def _parse_variable(self, var_tag: ET.Element) -> Variable:
         # Fill value is placed in zarr array's fill_value and variable encoding and removed from attributes
         encoding = {k: attrs.get(k) for k in self._ENCODING_KEYS if k in attrs}
         fill_value = attrs.pop("_FillValue", None)
-        # create ManifestArray and ZArray
-        zarray = ZArray(
-            chunks=chunks_shape,
-            dtype=dtype,
-            fill_value=fill_value,
-            filters=filters,
-            order="C",
+        # create ManifestArray
+        metadata = ArrayV3Metadata(
             shape=shape,
+            data_type=dtype,
+            chunk_grid={
+                "name": "regular",
+                "configuration": {"chunk_shape": chunks_shape},
+            },
+            chunk_key_encoding={"name": "default"},
+            fill_value=fill_value,
+            codecs=convert_to_codec_pipeline(
+                compressors=filters,
+                dtype=dtype,
+                filters=None,
+                serializer="auto",
+            ),
+            attributes=attrs,
+            dimension_names=None,
+            storage_transformers=None,
         )
-        marr = ManifestArray(zarray=zarray, chunkmanifest=chunkmanifest)
+        marr = ManifestArray(metadata=metadata, chunkmanifest=chunkmanifest)
         return Variable(dims=dims.keys(), data=marr, attrs=attrs, encoding=encoding)
 
     def _parse_attribute(self, attr_tag: ET.Element) -> dict[str, Any]:

diff --git a/virtualizarr/readers/hdf/hdf.py b/virtualizarr/readers/hdf/hdf.py
@@ -28,7 +28,6 @@
 from virtualizarr.readers.hdf.filters import cfcodec_from_dataset, codecs_from_dataset
 from virtualizarr.types import ChunkKey
 from virtualizarr.utils import _FsspecFSFromFilepath, check_for_collisions, soft_import
-from virtualizarr.zarr import ZArray
 
 h5py = soft_import("h5py", "For reading hdf files", strict=False)
 
@@ -285,6 +284,9 @@ def _dataset_to_variable(
         """
         # This chunk determination logic mirrors zarr-python's create
         # https://github.com/zarr-developers/zarr-python/blob/main/zarr/creation.py#L62-L66
+        from zarr.core.metadata.v3 import ArrayV3Metadata
+
+        from virtualizarr.zarr import convert_to_codec_pipeline
 
         chunks = dataset.chunks if dataset.chunks else dataset.shape
         codecs = codecs_from_dataset(dataset)
@@ -306,20 +308,27 @@ def _dataset_to_variable(
         if isinstance(fill_value, np.generic):
             fill_value = fill_value.item()
         filters = [codec.get_config() for codec in codecs]
-        zarray = ZArray(
-            chunks=chunks,  # type: ignore
-            compressor=None,
-            dtype=dtype,
-            fill_value=fill_value,
-            filters=filters,
-            order="C",
+
+        metadata = ArrayV3Metadata(
             shape=dataset.shape,
-            zarr_format=2,
+            data_type=dtype,
+            chunk_grid={"name": "regular", "configuration": {"chunk_shape": chunks}},
+            chunk_key_encoding={"name": "default"},
+            fill_value=fill_value,
+            codecs=convert_to_codec_pipeline(
+                compressors=None,
+                dtype=dtype,
+                filters=filters,
+                serializer="auto",
+            ),
+            attributes=attrs,
+            dimension_names=None,
+            storage_transformers=None,
         )
         dims = HDFVirtualBackend._dataset_dims(dataset, group=group)
         manifest = HDFVirtualBackend._dataset_chunk_manifest(path, dataset)
         if manifest:
-            marray = ManifestArray(zarray=zarray, chunkmanifest=manifest)
+            marray = ManifestArray(metadata=metadata, chunkmanifest=manifest)
             variable = xr.Variable(data=marray, dims=dims, attrs=attrs)
         else:
             variable = xr.Variable(data=np.empty(dataset.shape), dims=dims, attrs=attrs)