From 1fa42d9038dd96f979a3a770c06f4fc009000a1d Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 6 Nov 2024 16:15:45 +0100
Subject: [PATCH 01/35] add default compressor to config

---
 src/zarr/core/config.py      |  4 ++++
 src/zarr/core/metadata/v2.py | 21 ++++++++++++++++++---
 tests/test_config.py         |  4 ++++
 tests/test_v2.py             | 22 ++++++++++++++++++++--
 4 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 29f5e139fe..9445e2a789 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -64,6 +64,10 @@ def reset(self) -> None:
             },
             "buffer": "zarr.core.buffer.cpu.Buffer",
             "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
+            "v2_dtype_kind_to_default_compressor": {
+                "biufcmM": "zstd",
+                "OSUV": "vlen-bytes",
+            },
         }
     ],
 )
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index f18f2e4e8d..7d3eecb330 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -71,6 +71,8 @@ def __init__(
         shape_parsed = parse_shapelike(shape)
         dtype_parsed = parse_dtype(dtype)
         chunks_parsed = parse_shapelike(chunks)
+        if compressor is None:
+            compressor = _default_compressor(dtype_parsed)
         compressor_parsed = parse_compressor(compressor)
         order_parsed = parse_indexing_order(order)
         dimension_separator_parsed = parse_separator(dimension_separator)
@@ -238,15 +240,15 @@ def parse_filters(data: object) -> tuple[numcodecs.abc.Codec, ...] | None:
     raise TypeError(msg)
 
 
-def parse_compressor(data: object) -> numcodecs.abc.Codec | None:
+def parse_compressor(data: object) -> numcodecs.abc.Codec:
     """
     Parse a potential compressor.
     """
-    if data is None or isinstance(data, numcodecs.abc.Codec):
+    if isinstance(data, numcodecs.abc.Codec):
         return data
     if isinstance(data, dict):
         return numcodecs.get_codec(data)
-    msg = f"Invalid compressor. Expected None, a numcodecs.abc.Codec, or a dict representation of a numcodecs.abc.Codec. Got {type(data)} instead."
+    msg = f"Invalid compressor. Expected a numcodecs.abc.Codec, or a dict representation of a numcodecs.abc.Codec. Got {type(data)} instead."
     raise ValueError(msg)
 
 
@@ -326,3 +328,16 @@ def _default_fill_value(dtype: np.dtype[Any]) -> Any:
         return ""
     else:
         return dtype.type(0)
+
+
+def _default_compressor(dtype: np.dtype[Any]) -> numcodecs.abc.Codec:
+    """Get the default compressor for a type.
+
+    The config contains a mapping from numpy dtype kind to the default compressor.
+    https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html
+    """
+    dtype_kind_to_default_compressor = config.get("v2_dtype_kind_to_default_compressor")
+    for dtype_kinds, compressor in dtype_kind_to_default_compressor.items():
+        if dtype.kind in dtype_kinds:
+            return numcodecs.get_codec({"id": compressor})
+    raise ValueError(f"No default compressor found for dtype {dtype} of kind {dtype.kind}")
diff --git a/tests/test_config.py b/tests/test_config.py
index ddabffb467..e1a15a5f8c 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -63,6 +63,10 @@ def test_config_defaults_set() -> None:
                 "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec",
                 "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec",
             },
+            "v2_dtype_kind_to_default_compressor": {
+                "biufcmM": "zstd",
+                "OSUV": "vlen-bytes",
+            },
         }
     ]
     assert config.get("array.order") == "C"
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 3dd17848fb..777d96511b 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -10,7 +10,7 @@
 
 import zarr
 import zarr.storage
-from zarr import Array
+from zarr import Array, config
 from zarr.storage import MemoryStore, StorePath
 
 
@@ -96,7 +96,6 @@ async def test_v2_encode_decode(dtype):
     serialized = json.loads(result.to_bytes())
     expected = {
         "chunks": [3],
-        "compressor": None,
         "dtype": f"{dtype}0",
         "fill_value": "WA==",
         "filters": None,
@@ -105,6 +104,7 @@ async def test_v2_encode_decode(dtype):
         "zarr_format": 2,
         "dimension_separator": ".",
     }
+    del serialized["compressor"]
     assert serialized == expected
 
     data = zarr.open_array(store=store, path="foo")[:]
@@ -130,3 +130,21 @@ def test_v2_filters_codecs(filters: Any) -> None:
     arr[:] = array_fixture
     result = arr[:]
     np.testing.assert_array_equal(result, array_fixture)
+
+
+@pytest.mark.parametrize(
+    "dtype_compressor",
+    [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-bytes"], ["|U1", "vlen-bytes"]],
+)
+def test_default_compressors(dtype_compressor: Any) -> None:
+    with config.set(
+        {
+            "v2_dtype_kind_to_default_compressor": {
+                "biufcmM": "zstd",
+                "OSUV": "vlen-bytes",
+            },
+        }
+    ):
+        dtype, expected_compressor = dtype_compressor
+        arr = zarr.create(shape=(10,), path="foo", store={}, zarr_format=2, dtype=dtype)
+        assert arr.metadata.compressor.codec_id == expected_compressor

From 02053e9bf7f52de83e703d12273a35c4f5cf8276 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 6 Nov 2024 19:48:12 +0100
Subject: [PATCH 02/35] modify _default_compressor to
 _default_filters_and_compressor

---
 src/zarr/core/array.py       |  8 ----
 src/zarr/core/config.py      |  6 +--
 src/zarr/core/metadata/v2.py | 34 +++++++++------
 tests/test_config.py         |  6 +--
 tests/test_v2.py             | 82 ++++++++++++++++++++----------------
 5 files changed, 73 insertions(+), 63 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 1646959cb5..933e9e2c85 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -492,14 +492,6 @@ async def create(
                 order=order,
             )
         elif zarr_format == 2:
-            if dtype is str or dtype == "str":
-                # another special case: zarr v2 added the vlen-utf8 codec
-                vlen_codec: dict[str, JSON] = {"id": "vlen-utf8"}
-                if filters and not any(x["id"] == "vlen-utf8" for x in filters):
-                    filters = list(filters) + [vlen_codec]
-                else:
-                    filters = [vlen_codec]
-
             if codecs is not None:
                 raise ValueError(
                     "codecs cannot be used for arrays with version 2. Use filters and compressor instead."
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 9445e2a789..3373d08958 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -64,9 +64,9 @@ def reset(self) -> None:
             },
             "buffer": "zarr.core.buffer.cpu.Buffer",
             "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
-            "v2_dtype_kind_to_default_compressor": {
-                "biufcmM": "zstd",
-                "OSUV": "vlen-bytes",
+            "v2_dtype_kind_to_default_filters_and_compressor": {
+                "biufcmM": ["zstd"],
+                "OSUV": ["vlen-utf8"],
             },
         }
     ],
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 7d3eecb330..bcd23e24bc 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -4,7 +4,7 @@
 from collections.abc import Iterable
 from enum import Enum
 from functools import cached_property
-from typing import TYPE_CHECKING, TypedDict, cast
+from typing import TYPE_CHECKING, Any, TypedDict, cast
 
 from zarr.abc.metadata import Metadata
 
@@ -71,8 +71,14 @@ def __init__(
         shape_parsed = parse_shapelike(shape)
         dtype_parsed = parse_dtype(dtype)
         chunks_parsed = parse_shapelike(chunks)
-        if compressor is None:
-            compressor = _default_compressor(dtype_parsed)
+        if not filters and not compressor:
+            filters, compressor = _default_filters_and_compressor(dtype_parsed)
+        if dtype is str or dtype == "str":
+            vlen_codec: dict[str, JSON] = {"id": "vlen-utf8"}
+            if filters and not any(x["id"] == "vlen-utf8" for x in filters):
+                filters = list(filters) + [vlen_codec]
+            else:
+                filters = [vlen_codec]
         compressor_parsed = parse_compressor(compressor)
         order_parsed = parse_indexing_order(order)
         dimension_separator_parsed = parse_separator(dimension_separator)
@@ -240,15 +246,15 @@ def parse_filters(data: object) -> tuple[numcodecs.abc.Codec, ...] | None:
     raise TypeError(msg)
 
 
-def parse_compressor(data: object) -> numcodecs.abc.Codec:
+def parse_compressor(data: object) -> numcodecs.abc.Codec | None:
     """
     Parse a potential compressor.
     """
-    if isinstance(data, numcodecs.abc.Codec):
+    if data is None or isinstance(data, numcodecs.abc.Codec):
         return data
     if isinstance(data, dict):
         return numcodecs.get_codec(data)
-    msg = f"Invalid compressor. Expected a numcodecs.abc.Codec, or a dict representation of a numcodecs.abc.Codec. Got {type(data)} instead."
+    msg = f"Invalid compressor. Expected None, a numcodecs.abc.Codec, or a dict representation of a numcodecs.abc.Codec. Got {type(data)} instead."
     raise ValueError(msg)
 
 
@@ -330,14 +336,18 @@ def _default_fill_value(dtype: np.dtype[Any]) -> Any:
         return dtype.type(0)
 
 
-def _default_compressor(dtype: np.dtype[Any]) -> numcodecs.abc.Codec:
-    """Get the default compressor for a type.
+def _default_filters_and_compressor(
+    dtype: np.dtype[Any],
+) -> tuple[list[dict[str, str]], dict[str, str] | None]:
+    """Get the default filters and compressor for a dtype.
 
     The config contains a mapping from numpy dtype kind to the default compressor.
     https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html
     """
-    dtype_kind_to_default_compressor = config.get("v2_dtype_kind_to_default_compressor")
-    for dtype_kinds, compressor in dtype_kind_to_default_compressor.items():
+    dtype_kind_to_default_compressor = config.get("v2_dtype_kind_to_default_filters_and_compressor")
+    for dtype_kinds, filters_and_compressor in dtype_kind_to_default_compressor.items():
         if dtype.kind in dtype_kinds:
-            return numcodecs.get_codec({"id": compressor})
-    raise ValueError(f"No default compressor found for dtype {dtype} of kind {dtype.kind}")
+            filters = [{"id": f} for f in filters_and_compressor]
+            compressor = None
+            return filters, compressor
+    return [], None
diff --git a/tests/test_config.py b/tests/test_config.py
index e1a15a5f8c..2d158ebd9f 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -63,9 +63,9 @@ def test_config_defaults_set() -> None:
                 "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec",
                 "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec",
             },
-            "v2_dtype_kind_to_default_compressor": {
-                "biufcmM": "zstd",
-                "OSUV": "vlen-bytes",
+            "v2_dtype_kind_to_default_filters_and_compressor": {
+                "biufcmM": ["zstd"],
+                "OSUV": ["vlen-utf8"],
             },
         }
     ]
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 777d96511b..86d54492a7 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -80,36 +80,43 @@ def test_codec_pipeline() -> None:
 
 @pytest.mark.parametrize("dtype", ["|S", "|V"])
 async def test_v2_encode_decode(dtype):
-    store = zarr.storage.MemoryStore(mode="w")
-    g = zarr.group(store=store, zarr_format=2)
-    g.create_array(
-        name="foo",
-        shape=(3,),
-        chunks=(3,),
-        dtype=dtype,
-        fill_value=b"X",
-    )
-
-    result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
-    assert result is not None
-
-    serialized = json.loads(result.to_bytes())
-    expected = {
-        "chunks": [3],
-        "dtype": f"{dtype}0",
-        "fill_value": "WA==",
-        "filters": None,
-        "order": "C",
-        "shape": [3],
-        "zarr_format": 2,
-        "dimension_separator": ".",
-    }
-    del serialized["compressor"]
-    assert serialized == expected
+    with config.set(
+        {
+            "v2_dtype_kind_to_default_filters_and_compressor": {
+                "OSUV": ["vlen-bytes"],
+            },
+        }
+    ):
+        store = zarr.storage.MemoryStore(mode="w")
+        g = zarr.group(store=store, zarr_format=2)
+        g.create_array(
+            name="foo",
+            shape=(3,),
+            chunks=(3,),
+            dtype=dtype,
+            fill_value=b"X",
+        )
+
+        result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
+        assert result is not None
+
+        serialized = json.loads(result.to_bytes())
+        expected = {
+            "chunks": [3],
+            "compressor": None,
+            "dtype": f"{dtype}0",
+            "fill_value": "WA==",
+            "filters": [{"id": "vlen-bytes"}],
+            "order": "C",
+            "shape": [3],
+            "zarr_format": 2,
+            "dimension_separator": ".",
+        }
+        assert serialized == expected
 
-    data = zarr.open_array(store=store, path="foo")[:]
-    expected = np.full((3,), b"X", dtype=dtype)
-    np.testing.assert_equal(data, expected)
+        data = zarr.open_array(store=store, path="foo")[:]
+        expected = np.full((3,), b"X", dtype=dtype)
+        np.testing.assert_equal(data, expected)
 
 
 @pytest.mark.parametrize("dtype", [str, "str"])
@@ -133,18 +140,19 @@ def test_v2_filters_codecs(filters: Any) -> None:
 
 
 @pytest.mark.parametrize(
-    "dtype_compressor",
-    [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-bytes"], ["|U1", "vlen-bytes"]],
+    "dtype_expected",
+    [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-utf8"], ["|U1", "vlen-utf8"]],
 )
-def test_default_compressors(dtype_compressor: Any) -> None:
+def test_default_filters_and_compressor(dtype_expected: Any) -> None:
     with config.set(
         {
-            "v2_dtype_kind_to_default_compressor": {
-                "biufcmM": "zstd",
-                "OSUV": "vlen-bytes",
+            "v2_dtype_kind_to_default_filters_and_compressor": {
+                "biufcmM": ["zstd"],
+                "OSUV": ["vlen-utf8"],
             },
         }
     ):
-        dtype, expected_compressor = dtype_compressor
+        dtype, expected = dtype_expected
         arr = zarr.create(shape=(10,), path="foo", store={}, zarr_format=2, dtype=dtype)
-        assert arr.metadata.compressor.codec_id == expected_compressor
+        assert arr.metadata.filters[0].codec_id == expected
+        print(arr.metadata)

From 6ac38eadd97a1e879fec697a3ca22b78c9865c6d Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 6 Nov 2024 20:01:39 +0100
Subject: [PATCH 03/35] fix test_metadata_to_dict

---
 tests/test_metadata/test_v2.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
index 089d5c98e1..8801bed4f6 100644
--- a/tests/test_metadata/test_v2.py
+++ b/tests/test_metadata/test_v2.py
@@ -11,7 +11,7 @@
 from zarr.core.buffer import cpu
 from zarr.core.group import ConsolidatedMetadata, GroupMetadata
 from zarr.core.metadata import ArrayV2Metadata
-from zarr.core.metadata.v2 import parse_zarr_format
+from zarr.core.metadata.v2 import _default_filters_and_compressor, parse_zarr_format
 
 if TYPE_CHECKING:
     from typing import Any
@@ -77,6 +77,15 @@ def test_metadata_to_dict(
         assert observed["dimension_separator"] == expected_dimension_sep
         observed.pop("dimension_separator")
 
+    if not filters and not compressor:
+        assert observed["filters"], observed["compressor"] == _default_filters_and_compressor(
+            np.dtype(data_type)
+        )
+        observed.pop("filters")
+        observed.pop("compressor")
+        expected.pop("filters")
+        expected.pop("compressor")
+
     assert observed == expected
 
 

From 9507e1912c8b9e64fca0cf4a6bcf945c925ed4f4 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 6 Nov 2024 22:06:46 +0100
Subject: [PATCH 04/35] wip debugging

---
 src/zarr/codecs/_v2.py   | 3 +++
 src/zarr/core/config.py  | 3 ++-
 tests/test_properties.py | 2 +-
 tests/test_v2.py         | 9 +++++----
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
index 30504ad204..7f3c1ff8ec 100644
--- a/src/zarr/codecs/_v2.py
+++ b/src/zarr/codecs/_v2.py
@@ -46,7 +46,10 @@ async def _decode_single(
         # special case object dtype, because incorrect handling can lead to
         # segfaults and other bad things happening
         if chunk_spec.dtype != object:
+            print(chunk_spec.dtype, chunk.dtype)
             chunk = chunk.view(chunk_spec.dtype)
+            print("worked")
+
         elif chunk.dtype != object:
             # If we end up here, someone must have hacked around with the filters.
             # We cannot deal with object arrays unless there is an object
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 3373d08958..0391c714bc 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -66,7 +66,8 @@ def reset(self) -> None:
             "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
             "v2_dtype_kind_to_default_filters_and_compressor": {
                 "biufcmM": ["zstd"],
-                "OSUV": ["vlen-utf8"],
+                "SV": ["vlen-bytes"],
+                "OU": ["vlen-utf8"],
             },
         }
     ],
diff --git a/tests/test_properties.py b/tests/test_properties.py
index f70753ceb5..8100181fef 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -6,7 +6,7 @@
 
 import hypothesis.extra.numpy as npst  # noqa: E402
 import hypothesis.strategies as st  # noqa: E402
-from hypothesis import assume, given  # noqa: E402
+from hypothesis import assume, given, reproduce_failure  # noqa: E402
 
 from zarr.testing.strategies import arrays, basic_indices, numpy_arrays, zarr_formats  # noqa: E402
 
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 86d54492a7..309a6ae9fe 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -141,18 +141,19 @@ def test_v2_filters_codecs(filters: Any) -> None:
 
 @pytest.mark.parametrize(
     "dtype_expected",
-    [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-utf8"], ["|U1", "vlen-utf8"]],
+   # [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-utf8"], ["|U1", "vlen-utf8"]],
+    [["|S1", "vlen-bytes"]],
 )
 def test_default_filters_and_compressor(dtype_expected: Any) -> None:
     with config.set(
         {
             "v2_dtype_kind_to_default_filters_and_compressor": {
                 "biufcmM": ["zstd"],
-                "OSUV": ["vlen-utf8"],
+                "OSUV": ["vlen-bytes"],
             },
         }
     ):
         dtype, expected = dtype_expected
-        arr = zarr.create(shape=(10,), path="foo", store={}, zarr_format=2, dtype=dtype)
+        arr = zarr.create(shape=(3,), path="foo", store={}, zarr_format=2, dtype=dtype)
         assert arr.metadata.filters[0].codec_id == expected
-        print(arr.metadata)
+        arr[:] = np.array(["a", "bb", "ccc"], dtype=dtype)

From f93ced262040f5b119529a6b7e527673ac33433c Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 13 Nov 2024 12:34:56 +0100
Subject: [PATCH 05/35] format

---
 tests/test_properties.py | 2 +-
 tests/test_v2.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_properties.py b/tests/test_properties.py
index 8100181fef..f70753ceb5 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -6,7 +6,7 @@
 
 import hypothesis.extra.numpy as npst  # noqa: E402
 import hypothesis.strategies as st  # noqa: E402
-from hypothesis import assume, given, reproduce_failure  # noqa: E402
+from hypothesis import assume, given  # noqa: E402
 
 from zarr.testing.strategies import arrays, basic_indices, numpy_arrays, zarr_formats  # noqa: E402
 
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 3c783831f5..c99fd1742f 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -141,7 +141,7 @@ def test_v2_filters_codecs(filters: Any) -> None:
 
 @pytest.mark.parametrize(
     "dtype_expected",
-   # [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-utf8"], ["|U1", "vlen-utf8"]],
+    # [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-utf8"], ["|U1", "vlen-utf8"]],
     [["|S1", "vlen-bytes"]],
 )
 def test_default_filters_and_compressor(dtype_expected: Any) -> None:

From 07590ca12d11a17944e8fa948c373f4335dcd663 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 13 Nov 2024 14:49:49 +0100
Subject: [PATCH 06/35] fix v2 decode string dtype

---
 src/zarr/codecs/_v2.py  | 10 +++++++---
 src/zarr/core/config.py |  4 ++--
 tests/test_v2.py        | 37 +++++++++++++++++++++++++++++--------
 3 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
index 7f3c1ff8ec..6ed64739e2 100644
--- a/src/zarr/codecs/_v2.py
+++ b/src/zarr/codecs/_v2.py
@@ -5,6 +5,7 @@
 from typing import TYPE_CHECKING
 
 import numcodecs
+import numpy as np
 from numcodecs.compat import ensure_ndarray_like
 
 from zarr.abc.codec import ArrayBytesCodec
@@ -43,12 +44,15 @@ async def _decode_single(
 
         # view as numpy array with correct dtype
         chunk = ensure_ndarray_like(chunk)
+        print(chunk)
+        print(chunk.dtype)
         # special case object dtype, because incorrect handling can lead to
         # segfaults and other bad things happening
         if chunk_spec.dtype != object:
-            print(chunk_spec.dtype, chunk.dtype)
-            chunk = chunk.view(chunk_spec.dtype)
-            print("worked")
+            try:
+                chunk = chunk.view(chunk_spec.dtype)
+            except TypeError:
+                chunk = np.array(chunk).astype(chunk_spec.dtype)
 
         elif chunk.dtype != object:
             # If we end up here, someone must have hacked around with the filters.
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 0391c714bc..fa28258ba6 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -66,8 +66,8 @@ def reset(self) -> None:
             "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
             "v2_dtype_kind_to_default_filters_and_compressor": {
                 "biufcmM": ["zstd"],
-                "SV": ["vlen-bytes"],
-                "OU": ["vlen-utf8"],
+                "U": ["vlen-utf8"],
+                "OSV": ["vlen-bytes"],
             },
         }
     ],
diff --git a/tests/test_v2.py b/tests/test_v2.py
index c99fd1742f..0da668ad89 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -83,7 +83,7 @@ async def test_v2_encode_decode(dtype):
     with config.set(
         {
             "v2_dtype_kind_to_default_filters_and_compressor": {
-                "OSUV": ["vlen-bytes"],
+                "SV": ["vlen-bytes"],
             },
         }
     ):
@@ -119,15 +119,37 @@ async def test_v2_encode_decode(dtype):
         np.testing.assert_equal(data, expected)
 
 
+@pytest.mark.parametrize("dtype_value", [["|S", b"Y"], ["|U", "Y"], ["O", b"Y"]])
+def test_v2_encode_decode_with_data(dtype_value):
+    dtype, value = dtype_value
+    with config.set(
+        {
+            "v2_dtype_kind_to_default_filters_and_compressor": {
+                "U": ["vlen-utf8"],
+                "OSV": ["vlen-bytes"],
+            },
+        }
+    ):
+        expected = np.full((3,), value, dtype=dtype)
+        a = zarr.create(
+            shape=(3,),
+            zarr_format=2,
+            dtype=dtype,
+        )
+        a[:] = expected
+        data = a[:]
+        np.testing.assert_equal(data, expected)
+
+
 @pytest.mark.parametrize("dtype", [str, "str"])
 async def test_create_dtype_str(dtype: Any) -> None:
     arr = zarr.create(shape=3, dtype=dtype, zarr_format=2)
     assert arr.dtype.kind == "O"
     assert arr.metadata.to_dict()["dtype"] == "|O"
-    assert arr.metadata.filters == (numcodecs.vlen.VLenUTF8(),)
-    arr[:] = ["a", "bb", "ccc"]
+    assert arr.metadata.filters == (numcodecs.vlen.VLenBytes(),)
+    arr[:] = [b"a", b"bb", b"ccc"]
     result = arr[:]
-    np.testing.assert_array_equal(result, np.array(["a", "bb", "ccc"], dtype="object"))
+    np.testing.assert_array_equal(result, np.array([b"a", b"bb", b"ccc"], dtype="object"))
 
 
 @pytest.mark.parametrize("filters", [[], [numcodecs.Delta(dtype="<i4")], [numcodecs.Zlib(level=2)]])
@@ -141,19 +163,18 @@ def test_v2_filters_codecs(filters: Any) -> None:
 
 @pytest.mark.parametrize(
     "dtype_expected",
-    # [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-utf8"], ["|U1", "vlen-utf8"]],
-    [["|S1", "vlen-bytes"]],
+    [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-bytes"], ["|U1", "vlen-utf8"]],
 )
 def test_default_filters_and_compressor(dtype_expected: Any) -> None:
     with config.set(
         {
             "v2_dtype_kind_to_default_filters_and_compressor": {
                 "biufcmM": ["zstd"],
-                "OSUV": ["vlen-bytes"],
+                "U": ["vlen-utf8"],
+                "OSV": ["vlen-bytes"],
             },
         }
     ):
         dtype, expected = dtype_expected
         arr = zarr.create(shape=(3,), path="foo", store={}, zarr_format=2, dtype=dtype)
         assert arr.metadata.filters[0].codec_id == expected
-        arr[:] = np.array(["a", "bb", "ccc"], dtype=dtype)

From 4e2a3bc5bb830759297d697914a687bdf29dad41 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 13 Nov 2024 16:06:43 +0100
Subject: [PATCH 07/35] fix config default tests

---
 tests/test_array.py  | 2 ++
 tests/test_config.py | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_array.py b/tests/test_array.py
index 3948896186..4452c018da 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -4,6 +4,7 @@
 from itertools import accumulate
 from typing import Any, Literal
 
+import numcodecs
 import numpy as np
 import pytest
 
@@ -431,6 +432,7 @@ def test_info_v2(self) -> None:
             _read_only=False,
             _store_type="MemoryStore",
             _count_bytes=128,
+            _filters= (numcodecs.Zstd(),)
         )
         assert result == expected
 
diff --git a/tests/test_config.py b/tests/test_config.py
index da1ebfa5f3..7bfede2c43 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -65,7 +65,8 @@ def test_config_defaults_set() -> None:
             },
             "v2_dtype_kind_to_default_filters_and_compressor": {
                 "biufcmM": ["zstd"],
-                "OSUV": ["vlen-utf8"],
+                "U": ["vlen-utf8"],
+                "OSV": ["vlen-bytes"],
             },
         }
     ]

From 0fc7b2396ae3599cdce74157749285e8bb66b7e8 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 13 Nov 2024 16:08:30 +0100
Subject: [PATCH 08/35] format

---
 tests/test_array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_array.py b/tests/test_array.py
index 4452c018da..b0873f8469 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -432,7 +432,7 @@ def test_info_v2(self) -> None:
             _read_only=False,
             _store_type="MemoryStore",
             _count_bytes=128,
-            _filters= (numcodecs.Zstd(),)
+            _filters=(numcodecs.Zstd(),),
         )
         assert result == expected
 

From 8ec16e8c15ced582853333f6bf80e8599ba5a120 Mon Sep 17 00:00:00 2001
From: Norman Rzepka <code@normanrz.com>
Date: Fri, 6 Dec 2024 17:33:22 +0100
Subject: [PATCH 09/35] Update src/zarr/codecs/_v2.py

---
 src/zarr/codecs/_v2.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
index 6ed64739e2..a20f4aea8d 100644
--- a/src/zarr/codecs/_v2.py
+++ b/src/zarr/codecs/_v2.py
@@ -44,8 +44,6 @@ async def _decode_single(
 
         # view as numpy array with correct dtype
         chunk = ensure_ndarray_like(chunk)
-        print(chunk)
-        print(chunk.dtype)
         # special case object dtype, because incorrect handling can lead to
         # segfaults and other bad things happening
         if chunk_spec.dtype != object:

From d6dc14676a6b6e6b94ab5fa64cac41f882d43515 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 11 Dec 2024 14:04:50 +0100
Subject: [PATCH 10/35] rename v2_dtype_kind_to_default_filters_and_compressor
 to v2_default_compressors

---
 src/zarr/core/config.py      |  8 ++++----
 src/zarr/core/metadata/v2.py | 19 +++++++++++--------
 tests/test_config.py         |  8 ++++----
 tests/test_v2.py             | 16 ++++++++--------
 4 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index fa28258ba6..e5ab29b6c9 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -64,10 +64,10 @@ def reset(self) -> None:
             },
             "buffer": "zarr.core.buffer.cpu.Buffer",
             "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
-            "v2_dtype_kind_to_default_filters_and_compressor": {
-                "biufcmM": ["zstd"],
-                "U": ["vlen-utf8"],
-                "OSV": ["vlen-bytes"],
+            "v2_default_compressors": {
+                "numeric": ["zstd"],
+                "unicode": ["vlen-utf8"],
+                "bytes": ["vlen-bytes"],
             },
         }
     ],
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index bcd23e24bc..763aefbf7a 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -341,13 +341,16 @@ def _default_filters_and_compressor(
 ) -> tuple[list[dict[str, str]], dict[str, str] | None]:
     """Get the default filters and compressor for a dtype.
 
-    The config contains a mapping from numpy dtype kind to the default compressor.
     https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html
     """
-    dtype_kind_to_default_compressor = config.get("v2_dtype_kind_to_default_filters_and_compressor")
-    for dtype_kinds, filters_and_compressor in dtype_kind_to_default_compressor.items():
-        if dtype.kind in dtype_kinds:
-            filters = [{"id": f} for f in filters_and_compressor]
-            compressor = None
-            return filters, compressor
-    return [], None
+    default_compressors = config.get("v2_default_compressors")
+    if dtype.kind in "biufcmM":
+        dtype_key = "numeric"
+    elif dtype.kind in "U":
+        dtype_key = "unicode"
+    elif dtype.kind in "OSV":
+        dtype_key = "bytes"
+    else:
+        raise ValueError(f"Unsupported dtype kind {dtype.kind}")
+
+    return [{"id": f} for f in default_compressors[dtype_key]], None
diff --git a/tests/test_config.py b/tests/test_config.py
index 7bfede2c43..c46b456302 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -63,10 +63,10 @@ def test_config_defaults_set() -> None:
                 "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec",
                 "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec",
             },
-            "v2_dtype_kind_to_default_filters_and_compressor": {
-                "biufcmM": ["zstd"],
-                "U": ["vlen-utf8"],
-                "OSV": ["vlen-bytes"],
+            "v2_default_compressors": {
+                "numeric": ["zstd"],
+                "unicode": ["vlen-utf8"],
+                "bytes": ["vlen-bytes"],
             },
         }
     ]
diff --git a/tests/test_v2.py b/tests/test_v2.py
index bb8ef624ff..1a6a179142 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -84,8 +84,8 @@ def test_codec_pipeline() -> None:
 async def test_v2_encode_decode(dtype):
     with config.set(
         {
-            "v2_dtype_kind_to_default_filters_and_compressor": {
-                "SV": ["vlen-bytes"],
+            "v2_default_compressors": {
+                "bytes": ["vlen-bytes"],
             },
         }
     ):
@@ -126,9 +126,9 @@ def test_v2_encode_decode_with_data(dtype_value):
     dtype, value = dtype_value
     with config.set(
         {
-            "v2_dtype_kind_to_default_filters_and_compressor": {
-                "U": ["vlen-utf8"],
-                "OSV": ["vlen-bytes"],
+            "v2_default_compressors": {
+                "unicode": ["vlen-utf8"],
+                "bytes": ["vlen-bytes"],
             },
         }
     ):
@@ -171,9 +171,9 @@ def test_default_filters_and_compressor(dtype_expected: Any) -> None:
     with config.set(
         {
             "v2_dtype_kind_to_default_filters_and_compressor": {
-                "biufcmM": ["zstd"],
-                "U": ["vlen-utf8"],
-                "OSV": ["vlen-bytes"],
+                "numeric": ["zstd"],
+                "unicode": ["vlen-utf8"],
+                "bytes": ["vlen-bytes"],
             },
         }
     ):

From 15577ae9ee71adfc0a6f6d58143aeb64226bf7fe Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 11 Dec 2024 16:33:57 +0100
Subject: [PATCH 11/35] recover test_v2.py

---
 tests/test_v2.py | 179 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)

diff --git a/tests/test_v2.py b/tests/test_v2.py
index e69de29bb2..68c07e2024 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -0,0 +1,179 @@
+import json
+from collections.abc import Iterator
+from typing import Any, Literal
+
+import numcodecs.vlen
+import numpy as np
+import pytest
+from numcodecs import Delta
+from numcodecs.blosc import Blosc
+
+import zarr
+import zarr.core.buffer
+import zarr.storage
+from zarr import Array
+from zarr.storage import MemoryStore, StorePath
+
+
+@pytest.fixture
+async def store() -> Iterator[StorePath]:
+    return StorePath(await MemoryStore.open())
+
+
+def test_simple(store: StorePath) -> None:
+    data = np.arange(0, 256, dtype="uint16").reshape((16, 16))
+
+    a = Array.create(
+        store / "simple_v2",
+        zarr_format=2,
+        shape=data.shape,
+        chunks=(16, 16),
+        dtype=data.dtype,
+        fill_value=0,
+    )
+
+    a[:, :] = data
+    assert np.array_equal(data, a[:, :])
+
+
+@pytest.mark.parametrize("store", ["memory"], indirect=True)
+@pytest.mark.parametrize(
+    ("dtype", "fill_value"),
+    [
+        ("bool", False),
+        ("int64", 0),
+        ("float64", 0.0),
+        ("|S1", b""),
+        ("|U1", ""),
+        ("object", ""),
+        (str, ""),
+    ],
+)
+def test_implicit_fill_value(store: MemoryStore, dtype: str, fill_value: Any) -> None:
+    arr = zarr.create(store=store, shape=(4,), fill_value=None, zarr_format=2, dtype=dtype)
+    assert arr.metadata.fill_value is None
+    assert arr.metadata.to_dict()["fill_value"] is None
+    result = arr[:]
+    if dtype is str:
+        # special case
+        numpy_dtype = np.dtype(object)
+    else:
+        numpy_dtype = np.dtype(dtype)
+    expected = np.full(arr.shape, fill_value, dtype=numpy_dtype)
+    np.testing.assert_array_equal(result, expected)
+
+
+def test_codec_pipeline() -> None:
+    # https://github.com/zarr-developers/zarr-python/issues/2243
+    store = MemoryStore()
+    array = zarr.create(
+        store=store,
+        shape=(1,),
+        dtype="i4",
+        zarr_format=2,
+        filters=[Delta(dtype="i4").get_config()],
+        compressor=Blosc().get_config(),
+    )
+    array[:] = 1
+    result = array[:]
+    expected = np.ones(1)
+    np.testing.assert_array_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", ["|S", "|V"])
+async def test_v2_encode_decode(dtype):
+    store = zarr.storage.MemoryStore()
+    g = zarr.group(store=store, zarr_format=2)
+    g.create_array(
+        name="foo",
+        shape=(3,),
+        chunks=(3,),
+        dtype=dtype,
+        fill_value=b"X",
+    )
+
+    result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
+    assert result is not None
+
+    serialized = json.loads(result.to_bytes())
+    expected = {
+        "chunks": [3],
+        "compressor": None,
+        "dtype": f"{dtype}0",
+        "fill_value": "WA==",
+        "filters": None,
+        "order": "C",
+        "shape": [3],
+        "zarr_format": 2,
+        "dimension_separator": ".",
+    }
+    assert serialized == expected
+
+    data = zarr.open_array(store=store, path="foo")[:]
+    expected = np.full((3,), b"X", dtype=dtype)
+    np.testing.assert_equal(data, expected)
+
+
+@pytest.mark.parametrize("dtype", [str, "str"])
+async def test_create_dtype_str(dtype: Any) -> None:
+    arr = zarr.create(shape=3, dtype=dtype, zarr_format=2)
+    assert arr.dtype.kind == "O"
+    assert arr.metadata.to_dict()["dtype"] == "|O"
+    assert arr.metadata.filters == (numcodecs.vlen.VLenUTF8(),)
+    arr[:] = ["a", "bb", "ccc"]
+    result = arr[:]
+    np.testing.assert_array_equal(result, np.array(["a", "bb", "ccc"], dtype="object"))
+
+
+@pytest.mark.parametrize("filters", [[], [numcodecs.Delta(dtype="<i4")], [numcodecs.Zlib(level=2)]])
+@pytest.mark.parametrize("order", ["C", "F"])
+def test_v2_filters_codecs(filters: Any, order: Literal["C", "F"]) -> None:
+    array_fixture = [42]
+    arr = zarr.create(shape=1, dtype="<i4", zarr_format=2, filters=filters, order=order)
+    arr[:] = array_fixture
+    result = arr[:]
+    np.testing.assert_array_equal(result, array_fixture)
+
+
+@pytest.mark.parametrize("array_order", ["C", "F"])
+@pytest.mark.parametrize("data_order", ["C", "F"])
+def test_v2_non_contiguous(array_order: Literal["C", "F"], data_order: Literal["C", "F"]) -> None:
+    arr = zarr.Array.create(
+        MemoryStore({}),
+        shape=(10, 8),
+        chunks=(3, 3),
+        fill_value=np.nan,
+        dtype="float64",
+        zarr_format=2,
+        exists_ok=True,
+        order=array_order,
+    )
+
+    # Non-contiguous write
+    a = np.arange(arr.shape[0] * arr.shape[1]).reshape(arr.shape, order=data_order)
+    arr[slice(6, 9, None), slice(3, 6, None)] = a[
+        slice(6, 9, None), slice(3, 6, None)
+    ]  # The slice on the RHS is important
+    np.testing.assert_array_equal(
+        arr[slice(6, 9, None), slice(3, 6, None)], a[slice(6, 9, None), slice(3, 6, None)]
+    )
+
+    arr = zarr.Array.create(
+        MemoryStore({}),
+        shape=(10, 8),
+        chunks=(3, 3),
+        fill_value=np.nan,
+        dtype="float64",
+        zarr_format=2,
+        exists_ok=True,
+        order=array_order,
+    )
+
+    # Contiguous write
+    a = np.arange(9).reshape((3, 3), order=data_order)
+    if data_order == "F":
+        assert a.flags.f_contiguous
+    else:
+        assert a.flags.c_contiguous
+    arr[slice(6, 9, None), slice(3, 6, None)] = a
+    np.testing.assert_array_equal(arr[slice(6, 9, None), slice(3, 6, None)], a)

From 67010ce2ab526f733d0d678cff40b7ae7cc171bb Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 11 Dec 2024 18:39:02 +0100
Subject: [PATCH 12/35] incorporate feedback

---
 src/zarr/api/asynchronous.py             |  15 ++-
 src/zarr/codecs/_v2.py                   |   6 ++
 src/zarr/core/array.py                   |   9 ++
 src/zarr/core/metadata/v2.py             |  10 +-
 tests/test_array.py                      |   2 +
 tests/test_group.py                      |   2 +
 tests/test_metadata/test_consolidated.py |   2 +
 tests/test_metadata/test_v2.py           |  11 +--
 tests/test_v2.py                         | 116 ++++++++++++++++-------
 9 files changed, 119 insertions(+), 54 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index 26822f725b..6e8ef9ce8d 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -17,10 +17,12 @@
     ChunkCoords,
     MemoryOrder,
     ZarrFormat,
+    parse_dtype,
 )
 from zarr.core.config import config
 from zarr.core.group import AsyncGroup, ConsolidatedMetadata, GroupMetadata
 from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
+from zarr.core.metadata.v2 import _default_filters_and_compressor
 from zarr.errors import NodeTypeValidationError
 from zarr.storage import (
     StoreLike,
@@ -885,8 +887,17 @@ async def create(
         or _default_zarr_version()
     )
 
-    if zarr_format == 2 and chunks is None:
-        chunks = shape
+    if zarr_format == 2:
+        if chunks is None:
+            chunks = shape
+        dtype = parse_dtype(dtype, zarr_format)
+        if not filters and not compressor:
+            filters, compressor = _default_filters_and_compressor(dtype)
+        if np.issubdtype(dtype, np.str_):
+            filters = filters or []
+            if not any(x["id"] == "vlen-utf8" for x in filters):
+                filters = list(filters) + [{"id": "vlen-utf8"}]
+
     elif zarr_format == 3 and chunk_shape is None:
         if chunks is not None:
             chunk_shape = chunks
diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
index 2d186b8878..53edc1f4a1 100644
--- a/src/zarr/codecs/_v2.py
+++ b/src/zarr/codecs/_v2.py
@@ -50,6 +50,12 @@ async def _decode_single(
             try:
                 chunk = chunk.view(chunk_spec.dtype)
             except TypeError:
+                # this will happen if the dtype of the chunk
+                # does not match the dtype of the array spec i.g. if
+                # the dtype of the chunk_spec is a string dtype, but the chunk
+                # is an object array. In this case, we need to convert the object
+                # array to the correct dtype.
+
                 chunk = np.array(chunk).astype(chunk_spec.dtype)
 
         elif chunk.dtype != object:
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 56e30e2715..c7cc49a07c 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -77,6 +77,7 @@
     ArrayV3MetadataDict,
     T_ArrayMetadata,
 )
+from zarr.core.metadata.v2 import _default_filters_and_compressor
 from zarr.core.metadata.v3 import parse_node_type_array
 from zarr.core.sync import sync
 from zarr.errors import MetadataValidationError
@@ -617,6 +618,14 @@ async def _create_v2(
         if dimension_separator is None:
             dimension_separator = "."
 
+        dtype = parse_dtype(dtype, 2)
+        if not filters and not compressor:
+            filters, compressor = _default_filters_and_compressor(dtype)
+        if np.issubdtype(dtype, np.str_):
+            filters = filters or []
+            if not any(x["id"] == "vlen-utf8" for x in filters):
+                filters = list(filters) + [{"id": "vlen-utf8"}]
+
         metadata = ArrayV2Metadata(
             shape=shape,
             dtype=np.dtype(dtype),
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 763aefbf7a..1cbc4d79a9 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -71,14 +71,7 @@ def __init__(
         shape_parsed = parse_shapelike(shape)
         dtype_parsed = parse_dtype(dtype)
         chunks_parsed = parse_shapelike(chunks)
-        if not filters and not compressor:
-            filters, compressor = _default_filters_and_compressor(dtype_parsed)
-        if dtype is str or dtype == "str":
-            vlen_codec: dict[str, JSON] = {"id": "vlen-utf8"}
-            if filters and not any(x["id"] == "vlen-utf8" for x in filters):
-                filters = list(filters) + [vlen_codec]
-            else:
-                filters = [vlen_codec]
+
         compressor_parsed = parse_compressor(compressor)
         order_parsed = parse_indexing_order(order)
         dimension_separator_parsed = parse_separator(dimension_separator)
@@ -343,6 +336,7 @@ def _default_filters_and_compressor(
 
     https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html
     """
+    dtype = np.dtype(dtype)
     default_compressors = config.get("v2_default_compressors")
     if dtype.kind in "biufcmM":
         dtype_key = "numeric"
diff --git a/tests/test_array.py b/tests/test_array.py
index 58bc823068..eb138aa8cd 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -8,6 +8,7 @@
 import numcodecs
 import numpy as np
 import pytest
+from numcodecs import Zstd
 
 import zarr.api.asynchronous
 from zarr import Array, AsyncArray, Group
@@ -513,6 +514,7 @@ async def test_info_v2_async(self) -> None:
             _order="C",
             _read_only=False,
             _store_type="MemoryStore",
+            _filters=(Zstd(level=0),),
             _count_bytes=128,
         )
         assert result == expected
diff --git a/tests/test_group.py b/tests/test_group.py
index afa290207d..3b7acc9b15 100644
--- a/tests/test_group.py
+++ b/tests/test_group.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import pytest
+from numcodecs import Zstd
 
 import zarr
 import zarr.api.asynchronous
@@ -496,6 +497,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
                     "shape": (1,),
                     "chunks": (1,),
                     "order": "C",
+                    "filters": (Zstd(level=0),),
                     "zarr_format": zarr_format,
                 },
                 "subgroup": {
diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py
index 8ae9cc81fd..26e9904608 100644
--- a/tests/test_metadata/test_consolidated.py
+++ b/tests/test_metadata/test_consolidated.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 import pytest
+from numcodecs import Zstd
 
 import zarr.api.asynchronous
 import zarr.api.synchronous
@@ -486,6 +487,7 @@ async def test_consolidated_metadata_v2(self):
                         attributes={"key": "a"},
                         chunks=(1,),
                         fill_value=None,
+                        filters=(Zstd(level=0),),
                         order="C",
                     ),
                     "g1": GroupMetadata(
diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
index 8c82eac20a..003aef331f 100644
--- a/tests/test_metadata/test_v2.py
+++ b/tests/test_metadata/test_v2.py
@@ -11,7 +11,7 @@
 from zarr.core.buffer import cpu
 from zarr.core.group import ConsolidatedMetadata, GroupMetadata
 from zarr.core.metadata import ArrayV2Metadata
-from zarr.core.metadata.v2 import _default_filters_and_compressor, parse_zarr_format
+from zarr.core.metadata.v2 import parse_zarr_format
 
 if TYPE_CHECKING:
     from typing import Any
@@ -77,15 +77,6 @@ def test_metadata_to_dict(
         assert observed["dimension_separator"] == expected_dimension_sep
         observed.pop("dimension_separator")
 
-    if not filters and not compressor:
-        assert observed["filters"], observed["compressor"] == _default_filters_and_compressor(
-            np.dtype(data_type)
-        )
-        observed.pop("filters")
-        observed.pop("compressor")
-        expected.pop("filters")
-        expected.pop("compressor")
-
     assert observed == expected
 
 
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 68c07e2024..ba8544ff0f 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -11,7 +11,7 @@
 import zarr
 import zarr.core.buffer
 import zarr.storage
-from zarr import Array
+from zarr import Array, config
 from zarr.storage import MemoryStore, StorePath
 
 
@@ -82,36 +82,65 @@ def test_codec_pipeline() -> None:
 
 @pytest.mark.parametrize("dtype", ["|S", "|V"])
 async def test_v2_encode_decode(dtype):
-    store = zarr.storage.MemoryStore()
-    g = zarr.group(store=store, zarr_format=2)
-    g.create_array(
-        name="foo",
-        shape=(3,),
-        chunks=(3,),
-        dtype=dtype,
-        fill_value=b"X",
-    )
-
-    result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
-    assert result is not None
-
-    serialized = json.loads(result.to_bytes())
-    expected = {
-        "chunks": [3],
-        "compressor": None,
-        "dtype": f"{dtype}0",
-        "fill_value": "WA==",
-        "filters": None,
-        "order": "C",
-        "shape": [3],
-        "zarr_format": 2,
-        "dimension_separator": ".",
-    }
-    assert serialized == expected
-
-    data = zarr.open_array(store=store, path="foo")[:]
-    expected = np.full((3,), b"X", dtype=dtype)
-    np.testing.assert_equal(data, expected)
+    with config.set(
+        {
+            "v2_default_compressors": {
+                "bytes": ["vlen-bytes"],
+            },
+        }
+    ):
+        store = zarr.storage.MemoryStore()
+        g = zarr.group(store=store, zarr_format=2)
+        g.create_array(
+            name="foo",
+            shape=(3,),
+            chunks=(3,),
+            dtype=dtype,
+            fill_value=b"X",
+        )
+
+        result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
+        assert result is not None
+
+        serialized = json.loads(result.to_bytes())
+        expected = {
+            "chunks": [3],
+            "compressor": None,
+            "dtype": f"{dtype}0",
+            "fill_value": "WA==",
+            "filters": [{"id": "vlen-bytes"}],
+            "order": "C",
+            "shape": [3],
+            "zarr_format": 2,
+            "dimension_separator": ".",
+        }
+        assert serialized == expected
+
+        data = zarr.open_array(store=store, path="foo")[:]
+        expected = np.full((3,), b"X", dtype=dtype)
+        np.testing.assert_equal(data, expected)
+
+
+@pytest.mark.parametrize("dtype_value", [["|S", b"Y"], ["|U", "Y"], ["O", b"Y"]])
+def test_v2_encode_decode_with_data(dtype_value):
+    dtype, value = dtype_value
+    with config.set(
+        {
+            "v2_default_compressors": {
+                "unicode": ["vlen-utf8"],
+                "bytes": ["vlen-bytes"],
+            },
+        }
+    ):
+        expected = np.full((3,), value, dtype=dtype)
+        a = zarr.create(
+            shape=(3,),
+            zarr_format=2,
+            dtype=dtype,
+        )
+        a[:] = expected
+        data = a[:]
+        np.testing.assert_equal(data, expected)
 
 
 @pytest.mark.parametrize("dtype", [str, "str"])
@@ -119,10 +148,10 @@ async def test_create_dtype_str(dtype: Any) -> None:
     arr = zarr.create(shape=3, dtype=dtype, zarr_format=2)
     assert arr.dtype.kind == "O"
     assert arr.metadata.to_dict()["dtype"] == "|O"
-    assert arr.metadata.filters == (numcodecs.vlen.VLenUTF8(),)
-    arr[:] = ["a", "bb", "ccc"]
+    assert arr.metadata.filters == (numcodecs.vlen.VLenBytes(),)
+    arr[:] = [b"a", b"bb", b"ccc"]
     result = arr[:]
-    np.testing.assert_array_equal(result, np.array(["a", "bb", "ccc"], dtype="object"))
+    np.testing.assert_array_equal(result, np.array([b"a", b"bb", b"ccc"], dtype="object"))
 
 
 @pytest.mark.parametrize("filters", [[], [numcodecs.Delta(dtype="<i4")], [numcodecs.Zlib(level=2)]])
@@ -177,3 +206,22 @@ def test_v2_non_contiguous(array_order: Literal["C", "F"], data_order: Literal["
         assert a.flags.c_contiguous
     arr[slice(6, 9, None), slice(3, 6, None)] = a
     np.testing.assert_array_equal(arr[slice(6, 9, None), slice(3, 6, None)], a)
+
+
+@pytest.mark.parametrize(
+    "dtype_expected",
+    [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-bytes"], ["|U1", "vlen-utf8"]],
+)
+def test_default_filters_and_compressor(dtype_expected: Any) -> None:
+    with config.set(
+        {
+            "v2_dtype_kind_to_default_filters_and_compressor": {
+                "numeric": ["zstd"],
+                "unicode": ["vlen-utf8"],
+                "bytes": ["vlen-bytes"],
+            },
+        }
+    ):
+        dtype, expected = dtype_expected
+        arr = zarr.create(shape=(3,), path="foo", store={}, zarr_format=2, dtype=dtype)
+        assert arr.metadata.filters[0].codec_id == expected

From f6b98c3d1f8b06803a2ba7898223fa5ddbea9790 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 11 Dec 2024 18:39:02 +0100
Subject: [PATCH 13/35] incorporate feedback

---
 src/zarr/api/asynchronous.py             |  18 +++-
 src/zarr/codecs/_v2.py                   |   6 ++
 src/zarr/core/array.py                   |   9 ++
 src/zarr/core/metadata/v2.py             |  10 +-
 tests/test_array.py                      |   2 +
 tests/test_group.py                      |   2 +
 tests/test_metadata/test_consolidated.py |   2 +
 tests/test_metadata/test_v2.py           |  11 +--
 tests/test_v2.py                         | 116 ++++++++++++++++-------
 9 files changed, 121 insertions(+), 55 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index 26822f725b..dc199f28ff 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -17,10 +17,12 @@
     ChunkCoords,
     MemoryOrder,
     ZarrFormat,
+    parse_dtype,
 )
 from zarr.core.config import config
 from zarr.core.group import AsyncGroup, ConsolidatedMetadata, GroupMetadata
 from zarr.core.metadata import ArrayMetadataDict, ArrayV2Metadata, ArrayV3Metadata
+from zarr.core.metadata.v2 import _default_filters_and_compressor
 from zarr.errors import NodeTypeValidationError
 from zarr.storage import (
     StoreLike,
@@ -815,7 +817,12 @@ async def create(
     dtype : str or dtype, optional
         NumPy dtype.
     compressor : Codec, optional
-        Primary compressor.
+        Primary compressor for `zarr_format=2`.
+        If neither `compressor` nor `filters` are provided, a default compressor will be used:
+        - For numeric arrays, the default is `ZstdCodec`.
+        - For Unicode strings, the default is `VLenUTF8Codec`.
+        - For bytes or objects, the default is `VLenBytesCodec`.
+        These defaults can be changed using the `v2_default_compressors` variable in the Zarr config.
     fill_value : object
         Default value to use for uninitialized portions of the array.
     order : {'C', 'F'}, optional
@@ -885,8 +892,13 @@ async def create(
         or _default_zarr_version()
     )
 
-    if zarr_format == 2 and chunks is None:
-        chunks = shape
+    if zarr_format == 2:
+        if chunks is None:
+            chunks = shape
+        dtype = parse_dtype(dtype, zarr_format)
+        if not filters and not compressor:
+            filters, compressor = _default_filters_and_compressor(dtype)
+
     elif zarr_format == 3 and chunk_shape is None:
         if chunks is not None:
             chunk_shape = chunks
diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
index 2d186b8878..53edc1f4a1 100644
--- a/src/zarr/codecs/_v2.py
+++ b/src/zarr/codecs/_v2.py
@@ -50,6 +50,12 @@ async def _decode_single(
             try:
                 chunk = chunk.view(chunk_spec.dtype)
             except TypeError:
+                # this will happen if the dtype of the chunk
+                # does not match the dtype of the array spec i.g. if
+                # the dtype of the chunk_spec is a string dtype, but the chunk
+                # is an object array. In this case, we need to convert the object
+                # array to the correct dtype.
+
                 chunk = np.array(chunk).astype(chunk_spec.dtype)
 
         elif chunk.dtype != object:
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 56e30e2715..c7cc49a07c 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -77,6 +77,7 @@
     ArrayV3MetadataDict,
     T_ArrayMetadata,
 )
+from zarr.core.metadata.v2 import _default_filters_and_compressor
 from zarr.core.metadata.v3 import parse_node_type_array
 from zarr.core.sync import sync
 from zarr.errors import MetadataValidationError
@@ -617,6 +618,14 @@ async def _create_v2(
         if dimension_separator is None:
             dimension_separator = "."
 
+        dtype = parse_dtype(dtype, 2)
+        if not filters and not compressor:
+            filters, compressor = _default_filters_and_compressor(dtype)
+        if np.issubdtype(dtype, np.str_):
+            filters = filters or []
+            if not any(x["id"] == "vlen-utf8" for x in filters):
+                filters = list(filters) + [{"id": "vlen-utf8"}]
+
         metadata = ArrayV2Metadata(
             shape=shape,
             dtype=np.dtype(dtype),
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 763aefbf7a..1cbc4d79a9 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -71,14 +71,7 @@ def __init__(
         shape_parsed = parse_shapelike(shape)
         dtype_parsed = parse_dtype(dtype)
         chunks_parsed = parse_shapelike(chunks)
-        if not filters and not compressor:
-            filters, compressor = _default_filters_and_compressor(dtype_parsed)
-        if dtype is str or dtype == "str":
-            vlen_codec: dict[str, JSON] = {"id": "vlen-utf8"}
-            if filters and not any(x["id"] == "vlen-utf8" for x in filters):
-                filters = list(filters) + [vlen_codec]
-            else:
-                filters = [vlen_codec]
+
         compressor_parsed = parse_compressor(compressor)
         order_parsed = parse_indexing_order(order)
         dimension_separator_parsed = parse_separator(dimension_separator)
@@ -343,6 +336,7 @@ def _default_filters_and_compressor(
 
     https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html
     """
+    dtype = np.dtype(dtype)
     default_compressors = config.get("v2_default_compressors")
     if dtype.kind in "biufcmM":
         dtype_key = "numeric"
diff --git a/tests/test_array.py b/tests/test_array.py
index 58bc823068..eb138aa8cd 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -8,6 +8,7 @@
 import numcodecs
 import numpy as np
 import pytest
+from numcodecs import Zstd
 
 import zarr.api.asynchronous
 from zarr import Array, AsyncArray, Group
@@ -513,6 +514,7 @@ async def test_info_v2_async(self) -> None:
             _order="C",
             _read_only=False,
             _store_type="MemoryStore",
+            _filters=(Zstd(level=0),),
             _count_bytes=128,
         )
         assert result == expected
diff --git a/tests/test_group.py b/tests/test_group.py
index afa290207d..3b7acc9b15 100644
--- a/tests/test_group.py
+++ b/tests/test_group.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import pytest
+from numcodecs import Zstd
 
 import zarr
 import zarr.api.asynchronous
@@ -496,6 +497,7 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
                     "shape": (1,),
                     "chunks": (1,),
                     "order": "C",
+                    "filters": (Zstd(level=0),),
                     "zarr_format": zarr_format,
                 },
                 "subgroup": {
diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py
index 8ae9cc81fd..26e9904608 100644
--- a/tests/test_metadata/test_consolidated.py
+++ b/tests/test_metadata/test_consolidated.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 import pytest
+from numcodecs import Zstd
 
 import zarr.api.asynchronous
 import zarr.api.synchronous
@@ -486,6 +487,7 @@ async def test_consolidated_metadata_v2(self):
                         attributes={"key": "a"},
                         chunks=(1,),
                         fill_value=None,
+                        filters=(Zstd(level=0),),
                         order="C",
                     ),
                     "g1": GroupMetadata(
diff --git a/tests/test_metadata/test_v2.py b/tests/test_metadata/test_v2.py
index 8c82eac20a..003aef331f 100644
--- a/tests/test_metadata/test_v2.py
+++ b/tests/test_metadata/test_v2.py
@@ -11,7 +11,7 @@
 from zarr.core.buffer import cpu
 from zarr.core.group import ConsolidatedMetadata, GroupMetadata
 from zarr.core.metadata import ArrayV2Metadata
-from zarr.core.metadata.v2 import _default_filters_and_compressor, parse_zarr_format
+from zarr.core.metadata.v2 import parse_zarr_format
 
 if TYPE_CHECKING:
     from typing import Any
@@ -77,15 +77,6 @@ def test_metadata_to_dict(
         assert observed["dimension_separator"] == expected_dimension_sep
         observed.pop("dimension_separator")
 
-    if not filters and not compressor:
-        assert observed["filters"], observed["compressor"] == _default_filters_and_compressor(
-            np.dtype(data_type)
-        )
-        observed.pop("filters")
-        observed.pop("compressor")
-        expected.pop("filters")
-        expected.pop("compressor")
-
     assert observed == expected
 
 
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 68c07e2024..ba8544ff0f 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -11,7 +11,7 @@
 import zarr
 import zarr.core.buffer
 import zarr.storage
-from zarr import Array
+from zarr import Array, config
 from zarr.storage import MemoryStore, StorePath
 
 
@@ -82,36 +82,65 @@ def test_codec_pipeline() -> None:
 
 @pytest.mark.parametrize("dtype", ["|S", "|V"])
 async def test_v2_encode_decode(dtype):
-    store = zarr.storage.MemoryStore()
-    g = zarr.group(store=store, zarr_format=2)
-    g.create_array(
-        name="foo",
-        shape=(3,),
-        chunks=(3,),
-        dtype=dtype,
-        fill_value=b"X",
-    )
-
-    result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
-    assert result is not None
-
-    serialized = json.loads(result.to_bytes())
-    expected = {
-        "chunks": [3],
-        "compressor": None,
-        "dtype": f"{dtype}0",
-        "fill_value": "WA==",
-        "filters": None,
-        "order": "C",
-        "shape": [3],
-        "zarr_format": 2,
-        "dimension_separator": ".",
-    }
-    assert serialized == expected
-
-    data = zarr.open_array(store=store, path="foo")[:]
-    expected = np.full((3,), b"X", dtype=dtype)
-    np.testing.assert_equal(data, expected)
+    with config.set(
+        {
+            "v2_default_compressors": {
+                "bytes": ["vlen-bytes"],
+            },
+        }
+    ):
+        store = zarr.storage.MemoryStore()
+        g = zarr.group(store=store, zarr_format=2)
+        g.create_array(
+            name="foo",
+            shape=(3,),
+            chunks=(3,),
+            dtype=dtype,
+            fill_value=b"X",
+        )
+
+        result = await store.get("foo/.zarray", zarr.core.buffer.default_buffer_prototype())
+        assert result is not None
+
+        serialized = json.loads(result.to_bytes())
+        expected = {
+            "chunks": [3],
+            "compressor": None,
+            "dtype": f"{dtype}0",
+            "fill_value": "WA==",
+            "filters": [{"id": "vlen-bytes"}],
+            "order": "C",
+            "shape": [3],
+            "zarr_format": 2,
+            "dimension_separator": ".",
+        }
+        assert serialized == expected
+
+        data = zarr.open_array(store=store, path="foo")[:]
+        expected = np.full((3,), b"X", dtype=dtype)
+        np.testing.assert_equal(data, expected)
+
+
+@pytest.mark.parametrize("dtype_value", [["|S", b"Y"], ["|U", "Y"], ["O", b"Y"]])
+def test_v2_encode_decode_with_data(dtype_value):
+    dtype, value = dtype_value
+    with config.set(
+        {
+            "v2_default_compressors": {
+                "unicode": ["vlen-utf8"],
+                "bytes": ["vlen-bytes"],
+            },
+        }
+    ):
+        expected = np.full((3,), value, dtype=dtype)
+        a = zarr.create(
+            shape=(3,),
+            zarr_format=2,
+            dtype=dtype,
+        )
+        a[:] = expected
+        data = a[:]
+        np.testing.assert_equal(data, expected)
 
 
 @pytest.mark.parametrize("dtype", [str, "str"])
@@ -119,10 +148,10 @@ async def test_create_dtype_str(dtype: Any) -> None:
     arr = zarr.create(shape=3, dtype=dtype, zarr_format=2)
     assert arr.dtype.kind == "O"
     assert arr.metadata.to_dict()["dtype"] == "|O"
-    assert arr.metadata.filters == (numcodecs.vlen.VLenUTF8(),)
-    arr[:] = ["a", "bb", "ccc"]
+    assert arr.metadata.filters == (numcodecs.vlen.VLenBytes(),)
+    arr[:] = [b"a", b"bb", b"ccc"]
     result = arr[:]
-    np.testing.assert_array_equal(result, np.array(["a", "bb", "ccc"], dtype="object"))
+    np.testing.assert_array_equal(result, np.array([b"a", b"bb", b"ccc"], dtype="object"))
 
 
 @pytest.mark.parametrize("filters", [[], [numcodecs.Delta(dtype="<i4")], [numcodecs.Zlib(level=2)]])
@@ -177,3 +206,22 @@ def test_v2_non_contiguous(array_order: Literal["C", "F"], data_order: Literal["
         assert a.flags.c_contiguous
     arr[slice(6, 9, None), slice(3, 6, None)] = a
     np.testing.assert_array_equal(arr[slice(6, 9, None), slice(3, 6, None)], a)
+
+
+@pytest.mark.parametrize(
+    "dtype_expected",
+    [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-bytes"], ["|U1", "vlen-utf8"]],
+)
+def test_default_filters_and_compressor(dtype_expected: Any) -> None:
+    with config.set(
+        {
+            "v2_dtype_kind_to_default_filters_and_compressor": {
+                "numeric": ["zstd"],
+                "unicode": ["vlen-utf8"],
+                "bytes": ["vlen-bytes"],
+            },
+        }
+    ):
+        dtype, expected = dtype_expected
+        arr = zarr.create(shape=(3,), path="foo", store={}, zarr_format=2, dtype=dtype)
+        assert arr.metadata.filters[0].codec_id == expected

From fcbae8bdeda8aba34145f83bf65fb07608734804 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 11 Dec 2024 19:27:34 +0100
Subject: [PATCH 14/35] fix mypy

---
 src/zarr/api/asynchronous.py | 3 +--
 src/zarr/core/metadata/v2.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index dc199f28ff..1cd7125c55 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -898,8 +898,7 @@ async def create(
         dtype = parse_dtype(dtype, zarr_format)
         if not filters and not compressor:
             filters, compressor = _default_filters_and_compressor(dtype)
-
-    elif zarr_format == 3 and chunk_shape is None:
+    elif zarr_format == 3 and chunk_shape is None: #type: ignore[redundant-expr]
         if chunks is not None:
             chunk_shape = chunks
             chunks = None
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 1cbc4d79a9..abbadfe00d 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -331,7 +331,7 @@ def _default_fill_value(dtype: np.dtype[Any]) -> Any:
 
 def _default_filters_and_compressor(
     dtype: np.dtype[Any],
-) -> tuple[list[dict[str, str]], dict[str, str] | None]:
+) -> tuple[list[dict[str, JSON]], dict[str, JSON] | None]:
     """Get the default filters and compressor for a dtype.
 
     https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html

From a77fb0d7b122b4222746f175403005e4775b7362 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 11 Dec 2024 19:37:44 +0100
Subject: [PATCH 15/35] allow only one default compressor

---
 src/zarr/api/asynchronous.py |  4 ++--
 src/zarr/core/config.py      |  8 ++++----
 src/zarr/core/metadata/v2.py |  6 +++---
 tests/test_config.py         |  8 ++++----
 tests/test_v2.py             | 18 +++++++++---------
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index 1cd7125c55..7aabe416e1 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -822,7 +822,7 @@ async def create(
         - For numeric arrays, the default is `ZstdCodec`.
         - For Unicode strings, the default is `VLenUTF8Codec`.
         - For bytes or objects, the default is `VLenBytesCodec`.
-        These defaults can be changed using the `v2_default_compressors` variable in the Zarr config.
+        These defaults can be changed using the `v2_default_compressor` variable in the Zarr config.
     fill_value : object
         Default value to use for uninitialized portions of the array.
     order : {'C', 'F'}, optional
@@ -898,7 +898,7 @@ async def create(
         dtype = parse_dtype(dtype, zarr_format)
         if not filters and not compressor:
             filters, compressor = _default_filters_and_compressor(dtype)
-    elif zarr_format == 3 and chunk_shape is None: #type: ignore[redundant-expr]
+    elif zarr_format == 3 and chunk_shape is None:  # type: ignore[redundant-expr]
         if chunks is not None:
             chunk_shape = chunks
             chunks = None
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index e5ab29b6c9..1d64ae2056 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -64,10 +64,10 @@ def reset(self) -> None:
             },
             "buffer": "zarr.core.buffer.cpu.Buffer",
             "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
-            "v2_default_compressors": {
-                "numeric": ["zstd"],
-                "unicode": ["vlen-utf8"],
-                "bytes": ["vlen-bytes"],
+            "v2_default_compressor": {
+                "numeric": "zstd",
+                "string": "vlen-utf8",
+                "bytes": "vlen-bytes",
             },
         }
     ],
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index abbadfe00d..04dee87ca4 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -337,14 +337,14 @@ def _default_filters_and_compressor(
     https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html
     """
     dtype = np.dtype(dtype)
-    default_compressors = config.get("v2_default_compressors")
+    default_compressor = config.get("v2_default_compressor")
     if dtype.kind in "biufcmM":
         dtype_key = "numeric"
     elif dtype.kind in "U":
-        dtype_key = "unicode"
+        dtype_key = "string"
     elif dtype.kind in "OSV":
         dtype_key = "bytes"
     else:
         raise ValueError(f"Unsupported dtype kind {dtype.kind}")
 
-    return [{"id": f} for f in default_compressors[dtype_key]], None
+    return [{"id": default_compressor[dtype_key]}], None
diff --git a/tests/test_config.py b/tests/test_config.py
index c46b456302..d24bd23333 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -63,10 +63,10 @@ def test_config_defaults_set() -> None:
                 "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec",
                 "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec",
             },
-            "v2_default_compressors": {
-                "numeric": ["zstd"],
-                "unicode": ["vlen-utf8"],
-                "bytes": ["vlen-bytes"],
+            "v2_default_compressor": {
+                "numeric": "zstd",
+                "string": "vlen-utf8",
+                "bytes": "vlen-bytes",
             },
         }
     ]
diff --git a/tests/test_v2.py b/tests/test_v2.py
index ba8544ff0f..9811a576d1 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -84,8 +84,8 @@ def test_codec_pipeline() -> None:
 async def test_v2_encode_decode(dtype):
     with config.set(
         {
-            "v2_default_compressors": {
-                "bytes": ["vlen-bytes"],
+            "v2_default_compressor": {
+                "bytes": "vlen-bytes",
             },
         }
     ):
@@ -126,9 +126,9 @@ def test_v2_encode_decode_with_data(dtype_value):
     dtype, value = dtype_value
     with config.set(
         {
-            "v2_default_compressors": {
-                "unicode": ["vlen-utf8"],
-                "bytes": ["vlen-bytes"],
+            "v2_default_compressor": {
+                "string": "vlen-utf8",
+                "bytes": "vlen-bytes",
             },
         }
     ):
@@ -215,10 +215,10 @@ def test_v2_non_contiguous(array_order: Literal["C", "F"], data_order: Literal["
 def test_default_filters_and_compressor(dtype_expected: Any) -> None:
     with config.set(
         {
-            "v2_dtype_kind_to_default_filters_and_compressor": {
-                "numeric": ["zstd"],
-                "unicode": ["vlen-utf8"],
-                "bytes": ["vlen-bytes"],
+            "v2_default_compressor": {
+                "numeric": "zstd",
+                "string": "vlen-utf8",
+                "bytes": "vlen-bytes",
             },
         }
     ):

From 876e67dbee73a2ca3c78e9ad9349e0390b0b6245 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Sat, 14 Dec 2024 17:01:59 +0100
Subject: [PATCH 16/35] put `v2_default_compressor` under `array`

---
 src/zarr/core/config.py      | 15 +++++++++------
 src/zarr/core/metadata/v2.py |  2 +-
 tests/test_config.py         | 14 ++++++++------
 tests/test_v2.py             |  8 +++-----
 4 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 1d64ae2056..163f122f8b 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -42,7 +42,14 @@ def reset(self) -> None:
     defaults=[
         {
             "default_zarr_version": 3,
-            "array": {"order": "C"},
+            "array": {
+                "order": "C",
+                "v2_default_compressor": {
+                    "numeric": "zstd",
+                    "string": "vlen-utf8",
+                    "bytes": "vlen-bytes",
+                },
+            },
             "async": {"concurrency": 10, "timeout": None},
             "threading": {"max_workers": None},
             "json_indent": 2,
@@ -64,11 +71,7 @@ def reset(self) -> None:
             },
             "buffer": "zarr.core.buffer.cpu.Buffer",
             "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
-            "v2_default_compressor": {
-                "numeric": "zstd",
-                "string": "vlen-utf8",
-                "bytes": "vlen-bytes",
-            },
+
         }
     ],
 )
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index d1546d16b2..b66e41bf0f 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -337,7 +337,7 @@ def _default_filters_and_compressor(
     https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html
     """
     dtype = np.dtype(dtype)
-    default_compressor = config.get("v2_default_compressor")
+    default_compressor = config.get("array.v2_default_compressor")
     if dtype.kind in "biufcmM":
         dtype_key = "numeric"
     elif dtype.kind in "U":
diff --git a/tests/test_config.py b/tests/test_config.py
index d24bd23333..437b2a56b8 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -41,7 +41,14 @@ def test_config_defaults_set() -> None:
     assert config.defaults == [
         {
             "default_zarr_version": 3,
-            "array": {"order": "C"},
+            "array": {
+                "order": "C",
+                "v2_default_compressor": {
+                    "numeric": "zstd",
+                    "string": "vlen-utf8",
+                    "bytes": "vlen-bytes",
+                },
+            },
             "async": {"concurrency": 10, "timeout": None},
             "threading": {"max_workers": None},
             "json_indent": 2,
@@ -63,11 +70,6 @@ def test_config_defaults_set() -> None:
                 "vlen-utf8": "zarr.codecs.vlen_utf8.VLenUTF8Codec",
                 "vlen-bytes": "zarr.codecs.vlen_utf8.VLenBytesCodec",
             },
-            "v2_default_compressor": {
-                "numeric": "zstd",
-                "string": "vlen-utf8",
-                "bytes": "vlen-bytes",
-            },
         }
     ]
     assert config.get("array.order") == "C"
diff --git a/tests/test_v2.py b/tests/test_v2.py
index b95b491a41..defd86a685 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -84,9 +84,7 @@ def test_codec_pipeline() -> None:
 async def test_v2_encode_decode(dtype):
     with config.set(
         {
-            "v2_default_compressor": {
-                "bytes": "vlen-bytes",
-            },
+            "array.v2_default_compressor.bytes": "vlen-bytes",
         }
     ):
         store = zarr.storage.MemoryStore()
@@ -126,7 +124,7 @@ def test_v2_encode_decode_with_data(dtype_value):
     dtype, value = dtype_value
     with config.set(
         {
-            "v2_default_compressor": {
+            "array.v2_default_compressor": {
                 "string": "vlen-utf8",
                 "bytes": "vlen-bytes",
             },
@@ -215,7 +213,7 @@ def test_v2_non_contiguous(array_order: Literal["C", "F"], data_order: Literal["
 def test_default_filters_and_compressor(dtype_expected: Any) -> None:
     with config.set(
         {
-            "v2_default_compressor": {
+            "array.v2_default_compressor": {
                 "numeric": "zstd",
                 "string": "vlen-utf8",
                 "bytes": "vlen-bytes",

From 12dfaf43195ccbbaf611f87759110e2b0fe07a14 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Sat, 14 Dec 2024 17:38:31 +0100
Subject: [PATCH 17/35] deprecate zarr.storage.default_compressor

---
 src/zarr/core/config.py      |  7 +++----
 src/zarr/storage/__init__.py | 22 ++++++++++++++++++++++
 tests/test_v2.py             | 11 ++++++-----
 3 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 163f122f8b..9b3b20e5a6 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -31,10 +31,10 @@ def reset(self) -> None:
 # The config module is responsible for managing the configuration of zarr and  is based on the Donfig python library.
 # For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations
 # in the registry and then select them in the config.
-# e.g. an implementation of the bytes codec in a class "NewBytesCodec", requires the value of codecs.bytes.name to be
-# "NewBytesCodec".
+# e.g. an implementation of the bytes codec in a class "your.module.NewBytesCodec", requires the value of codecs.bytes
+# to be "your.module.NewBytesCodec".
 # Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations
-# e.g. export ZARR_CODECS__BYTES__NAME="NewBytesCodec"
+# e.g. export ZARR_CODECS__BYTES="your.module.NewBytesCodec"
 # (for more information see github.com/pytroll/donfig)
 # Default values below point to the standard implementations of zarr-python
 config = Config(
@@ -71,7 +71,6 @@ def reset(self) -> None:
             },
             "buffer": "zarr.core.buffer.cpu.Buffer",
             "ndbuffer": "zarr.core.buffer.cpu.NDBuffer",
-
         }
     ],
 )
diff --git a/src/zarr/storage/__init__.py b/src/zarr/storage/__init__.py
index 17b11f54a6..282f8ed9c3 100644
--- a/src/zarr/storage/__init__.py
+++ b/src/zarr/storage/__init__.py
@@ -1,3 +1,8 @@
+import sys
+import warnings
+from types import ModuleType
+from typing import Any
+
 from zarr.storage.common import StoreLike, StorePath, make_store_path
 from zarr.storage.local import LocalStore
 from zarr.storage.logging import LoggingStore
@@ -17,3 +22,20 @@
     "ZipStore",
     "make_store_path",
 ]
+
+
+class VerboseModule(ModuleType):
+    def __setattr__(self, attr: str, value: Any) -> None:
+        if attr == "default_compressor":
+            warnings.warn(
+                "setting zarr.storage.default_compressor is deprecated, use "
+                "zarr.config to configure array.v2_default_compressor "
+                "e.g. config.set({'codecs.zstd':'your.module.Zstd', 'array.v2_default_compressor.numeric': 'zstd'})",
+                DeprecationWarning,
+                stacklevel=1,
+            )
+        else:
+            super().__setattr__(attr, value)
+
+
+sys.modules[__name__].__class__ = VerboseModule
diff --git a/tests/test_v2.py b/tests/test_v2.py
index defd86a685..205b0fdf52 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -82,11 +82,7 @@ def test_codec_pipeline() -> None:
 
 @pytest.mark.parametrize("dtype", ["|S", "|V"])
 async def test_v2_encode_decode(dtype):
-    with config.set(
-        {
-            "array.v2_default_compressor.bytes": "vlen-bytes",
-        }
-    ):
+    with config.set({"array.v2_default_compressor.bytes": "vlen-bytes"}):
         store = zarr.storage.MemoryStore()
         g = zarr.group(store=store, zarr_format=2)
         g.create_array(
@@ -206,6 +202,11 @@ def test_v2_non_contiguous(array_order: Literal["C", "F"], data_order: Literal["
     np.testing.assert_array_equal(arr[slice(6, 9, None), slice(3, 6, None)], a)
 
 
+def test_default_compressor_deprecation_warning():
+    with pytest.warns(DeprecationWarning):
+        zarr.storage.default_compressor = "zarr.codecs.zstd.ZstdCodec()"
+
+
 @pytest.mark.parametrize(
     "dtype_expected",
     [["b", "zstd"], ["i", "zstd"], ["f", "zstd"], ["|S1", "vlen-bytes"], ["|U1", "vlen-utf8"]],

From 6954b601124859c34729e494e01029c8c60b8e6e Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Sat, 14 Dec 2024 18:01:00 +0100
Subject: [PATCH 18/35] test v3_default_codecs

---
 src/zarr/core/config.py      |  5 +++++
 src/zarr/core/metadata/v2.py |  1 -
 tests/test_config.py         | 18 +++++++++++++++++-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 9b3b20e5a6..809d23d86c 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -49,6 +49,11 @@ def reset(self) -> None:
                     "string": "vlen-utf8",
                     "bytes": "vlen-bytes",
                 },
+                "v3_default_codecs": {
+                    "numeric": ["bytes", "zstd"],
+                    "string": ["vlen-utf8"],
+                    "bytes": ["vlen-bytes"],
+                },
             },
             "async": {"concurrency": 10, "timeout": None},
             "threading": {"max_workers": None},
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index b66e41bf0f..bd0fbecf4a 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -336,7 +336,6 @@ def _default_filters_and_compressor(
 
     https://numpy.org/doc/2.1/reference/generated/numpy.dtype.kind.html
     """
-    dtype = np.dtype(dtype)
     default_compressor = config.get("array.v2_default_compressor")
     if dtype.kind in "biufcmM":
         dtype_key = "numeric"
diff --git a/tests/test_config.py b/tests/test_config.py
index 437b2a56b8..56b20d4b41 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -11,7 +11,8 @@
 from zarr import Array, zeros
 from zarr.abc.codec import CodecInput, CodecOutput, CodecPipeline
 from zarr.abc.store import ByteSetter, Store
-from zarr.codecs import BloscCodec, BytesCodec, Crc32cCodec, ShardingCodec
+from zarr.codecs import BloscCodec, BytesCodec, Crc32cCodec, ShardingCodec, TransposeCodec, GzipCodec, VLenBytesCodec, \
+    VLenUTF8Codec
 from zarr.core.array_spec import ArraySpec
 from zarr.core.buffer import NDBuffer
 from zarr.core.codec_pipeline import BatchedCodecPipeline
@@ -239,3 +240,18 @@ def test_config_buffer_implementation() -> None:
     )
     arr_Crc32c[:] = data2d
     assert np.array_equal(arr_Crc32c[:], data2d)
+
+@pytest.mark.parametrize("dtype", ["int", "bytes", "str"])
+def test_default_codecs(dtype:str) -> None:
+    with config.set({"array.v3_default_codecs": {
+        "numeric": ["bytes", "gzip"], # test setting non-standard codecs
+        "string": ["vlen-utf8"],
+        "bytes": ["vlen-bytes"],
+    }}):
+        arr = zeros(shape=(100), store=StoreExpectingTestBuffer(), dtype=dtype)
+        if dtype == "int":
+            assert arr.metadata.codecs == [BytesCodec(), GzipCodec()]
+        elif dtype == "bytes":
+            assert arr.metadata.codecs == [VLenBytesCodec()]
+        elif dtype == "str":
+            assert arr.metadata.codecs == [VLenUTF8Codec()]

From 80dfc40059f4c86b5a72be2949b5d4b216ef58cb Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Sat, 14 Dec 2024 18:21:53 +0100
Subject: [PATCH 19/35] use v3_default_codecs

---
 src/zarr/codecs/__init__.py | 18 ---------
 src/zarr/core/array.py      | 24 ++++++++----
 tests/test_config.py        | 76 +++++++++++++++++++++----------------
 3 files changed, 61 insertions(+), 57 deletions(-)

diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py
index e407d94892..165dbe476d 100644
--- a/src/zarr/codecs/__init__.py
+++ b/src/zarr/codecs/__init__.py
@@ -1,10 +1,5 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    import numpy as np
-
 from zarr.codecs.blosc import BloscCname, BloscCodec, BloscShuffle
 from zarr.codecs.bytes import BytesCodec, Endian
 from zarr.codecs.crc32c_ import Crc32cCodec
@@ -13,7 +8,6 @@
 from zarr.codecs.transpose import TransposeCodec
 from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec
 from zarr.codecs.zstd import ZstdCodec
-from zarr.core.metadata.v3 import DataType
 
 __all__ = [
     "BloscCname",
@@ -30,15 +24,3 @@
     "VLenUTF8Codec",
     "ZstdCodec",
 ]
-
-
-def _get_default_array_bytes_codec(
-    np_dtype: np.dtype[Any],
-) -> BytesCodec | VLenUTF8Codec | VLenBytesCodec:
-    dtype = DataType.from_numpy(np_dtype)
-    if dtype == DataType.string:
-        return VLenUTF8Codec()
-    elif dtype == DataType.bytes:
-        return VLenBytesCodec()
-    else:
-        return BytesCodec()
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 32d30562fa..5b255cd13f 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -12,7 +12,6 @@
 
 from zarr._compat import _deprecate_positional_args
 from zarr.abc.store import Store, set_or_delete
-from zarr.codecs import _get_default_array_bytes_codec
 from zarr.codecs._v2 import V2Codec
 from zarr.core._info import ArrayInfo
 from zarr.core.attributes import Attributes
@@ -78,7 +77,7 @@
     T_ArrayMetadata,
 )
 from zarr.core.metadata.v2 import _default_filters_and_compressor
-from zarr.core.metadata.v3 import parse_node_type_array
+from zarr.core.metadata.v3 import DataType, parse_node_type_array
 from zarr.core.sync import sync
 from zarr.errors import MetadataValidationError
 from zarr.registry import get_pipeline_class
@@ -556,11 +555,7 @@ async def _create_v3(
             await ensure_no_existing_node(store_path, zarr_format=3)
 
         shape = parse_shapelike(shape)
-        codecs = (
-            list(codecs)
-            if codecs is not None
-            else [_get_default_array_bytes_codec(np.dtype(dtype))]
-        )
+        codecs = list(codecs) if codecs is not None else _get_default_codecs(np.dtype(dtype))
 
         if chunk_key_encoding is None:
             chunk_key_encoding = ("default", "/")
@@ -3318,3 +3313,18 @@ def _build_parents(
         )
 
     return parents
+
+
+def _get_default_codecs(
+    np_dtype: np.dtype[Any],
+) -> list[dict[str, JSON]]:
+    default_codecs = config.get("array.v3_default_codecs")
+    dtype = DataType.from_numpy(np_dtype)
+    if dtype == DataType.string:
+        dtype_key = "string"
+    elif dtype == DataType.bytes:
+        dtype_key = "bytes"
+    else:
+        dtype_key = "numeric"
+
+    return [{"name": codec_id, "configuration": {}} for codec_id in default_codecs[dtype_key]]
diff --git a/tests/test_config.py b/tests/test_config.py
index 56b20d4b41..eeeedec3fd 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -11,8 +11,15 @@
 from zarr import Array, zeros
 from zarr.abc.codec import CodecInput, CodecOutput, CodecPipeline
 from zarr.abc.store import ByteSetter, Store
-from zarr.codecs import BloscCodec, BytesCodec, Crc32cCodec, ShardingCodec, TransposeCodec, GzipCodec, VLenBytesCodec, \
-    VLenUTF8Codec
+from zarr.codecs import (
+    BloscCodec,
+    BytesCodec,
+    Crc32cCodec,
+    GzipCodec,
+    ShardingCodec,
+    VLenBytesCodec,
+    VLenUTF8Codec,
+)
 from zarr.core.array_spec import ArraySpec
 from zarr.core.buffer import NDBuffer
 from zarr.core.codec_pipeline import BatchedCodecPipeline
@@ -216,39 +223,44 @@ def test_config_buffer_implementation() -> None:
         arr[:] = np.arange(100)
 
     register_buffer(TestBuffer)
-    config.set({"buffer": fully_qualified_name(TestBuffer)})
-    assert get_buffer_class() == TestBuffer
-
-    # no error using TestBuffer
-    data = np.arange(100)
-    arr[:] = np.arange(100)
-    assert np.array_equal(arr[:], data)
-
-    data2d = np.arange(1000).reshape(100, 10)
-    arr_sharding = zeros(
-        shape=(100, 10),
-        store=StoreExpectingTestBuffer(),
-        codecs=[ShardingCodec(chunk_shape=(10, 10))],
-    )
-    arr_sharding[:] = data2d
-    assert np.array_equal(arr_sharding[:], data2d)
+    with config.set({"buffer": fully_qualified_name(TestBuffer)}):
+        assert get_buffer_class() == TestBuffer
+
+        # no error using TestBuffer
+        data = np.arange(100)
+        arr[:] = np.arange(100)
+        assert np.array_equal(arr[:], data)
+
+        data2d = np.arange(1000).reshape(100, 10)
+        arr_sharding = zeros(
+            shape=(100, 10),
+            store=StoreExpectingTestBuffer(),
+            codecs=[ShardingCodec(chunk_shape=(10, 10))],
+        )
+        arr_sharding[:] = data2d
+        assert np.array_equal(arr_sharding[:], data2d)
+
+        arr_Crc32c = zeros(
+            shape=(100, 10),
+            store=StoreExpectingTestBuffer(),
+            codecs=[BytesCodec(), Crc32cCodec()],
+        )
+        arr_Crc32c[:] = data2d
+        assert np.array_equal(arr_Crc32c[:], data2d)
 
-    arr_Crc32c = zeros(
-        shape=(100, 10),
-        store=StoreExpectingTestBuffer(),
-        codecs=[BytesCodec(), Crc32cCodec()],
-    )
-    arr_Crc32c[:] = data2d
-    assert np.array_equal(arr_Crc32c[:], data2d)
 
 @pytest.mark.parametrize("dtype", ["int", "bytes", "str"])
-def test_default_codecs(dtype:str) -> None:
-    with config.set({"array.v3_default_codecs": {
-        "numeric": ["bytes", "gzip"], # test setting non-standard codecs
-        "string": ["vlen-utf8"],
-        "bytes": ["vlen-bytes"],
-    }}):
-        arr = zeros(shape=(100), store=StoreExpectingTestBuffer(), dtype=dtype)
+def test_default_codecs(dtype: str) -> None:
+    with config.set(
+        {
+            "array.v3_default_codecs": {
+                "numeric": ["bytes", "gzip"],  # test setting non-standard codecs
+                "string": ["vlen-utf8"],
+                "bytes": ["vlen-bytes"],
+            }
+        }
+    ):
+        arr = zeros(shape=(100), dtype=dtype)
         if dtype == "int":
             assert arr.metadata.codecs == [BytesCodec(), GzipCodec()]
         elif dtype == "bytes":

From 6001e93789cd74317d0b76b474357609ba0f3c11 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Sat, 14 Dec 2024 19:09:14 +0100
Subject: [PATCH 20/35] fix tests that expected codecs==["bytes"]

---
 tests/test_array.py                      | 18 +++++++++++-------
 tests/test_config.py                     |  5 +++++
 tests/test_group.py                      |  5 ++++-
 tests/test_metadata/test_consolidated.py | 10 ++++++++--
 4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/tests/test_array.py b/tests/test_array.py
index b6d82a95ac..feebbc687b 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -12,7 +12,7 @@
 
 import zarr.api.asynchronous
 from zarr import Array, AsyncArray, Group
-from zarr.codecs import BytesCodec, VLenBytesCodec
+from zarr.codecs import BytesCodec, VLenBytesCodec, ZstdCodec
 from zarr.core._info import ArrayInfo
 from zarr.core.array import chunks_initialized
 from zarr.core.buffer import default_buffer_prototype
@@ -376,7 +376,7 @@ async def test_chunks_initialized() -> None:
 
 
 def test_nbytes_stored() -> None:
-    arr = zarr.create(shape=(100,), chunks=(10,), dtype="i4")
+    arr = zarr.create(shape=(100,), chunks=(10,), dtype="i4", codecs=[BytesCodec()])
     result = arr.nbytes_stored()
     assert result == 366  # the size of the metadata document. This is a fragile test.
     arr[:50] = 1
@@ -388,7 +388,9 @@ def test_nbytes_stored() -> None:
 
 
 async def test_nbytes_stored_async() -> None:
-    arr = await zarr.api.asynchronous.create(shape=(100,), chunks=(10,), dtype="i4")
+    arr = await zarr.api.asynchronous.create(
+        shape=(100,), chunks=(10,), dtype="i4", codecs=[BytesCodec()]
+    )
     result = await arr.nbytes_stored()
     assert result == 366  # the size of the metadata document. This is a fragile test.
     await arr.setitem(slice(50), 1)
@@ -473,13 +475,13 @@ def test_info_v3(self) -> None:
             _order="C",
             _read_only=False,
             _store_type="MemoryStore",
-            _codecs=[BytesCodec()],
+            _codecs=[BytesCodec(), ZstdCodec()],
             _count_bytes=128,
         )
         assert result == expected
 
     def test_info_complete(self) -> None:
-        arr = zarr.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
+        arr = zarr.create(shape=(4, 4), chunks=(2, 2), zarr_format=3, codecs=[BytesCodec()])
         result = arr.info_complete()
         expected = ArrayInfo(
             _zarr_format=3,
@@ -530,13 +532,15 @@ async def test_info_v3_async(self) -> None:
             _order="C",
             _read_only=False,
             _store_type="MemoryStore",
-            _codecs=[BytesCodec()],
+            _codecs=[BytesCodec(), ZstdCodec()],
             _count_bytes=128,
         )
         assert result == expected
 
     async def test_info_complete_async(self) -> None:
-        arr = await zarr.api.asynchronous.create(shape=(4, 4), chunks=(2, 2), zarr_format=3)
+        arr = await zarr.api.asynchronous.create(
+            shape=(4, 4), chunks=(2, 2), zarr_format=3, codecs=[BytesCodec()]
+        )
         result = await arr.info_complete()
         expected = ArrayInfo(
             _zarr_format=3,
diff --git a/tests/test_config.py b/tests/test_config.py
index eeeedec3fd..6860427908 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -56,6 +56,11 @@ def test_config_defaults_set() -> None:
                     "string": "vlen-utf8",
                     "bytes": "vlen-bytes",
                 },
+                "v3_default_codecs": {
+                    "bytes": ["vlen-bytes"],
+                    "numeric": ["bytes", "zstd"],
+                    "string": ["vlen-utf8"],
+                },
             },
             "async": {"concurrency": 10, "timeout": None},
             "threading": {"max_workers": None},
diff --git a/tests/test_group.py b/tests/test_group.py
index ef5196067b..e0bc304b9b 100644
--- a/tests/test_group.py
+++ b/tests/test_group.py
@@ -523,7 +523,10 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat
                         "configuration": {"separator": "/"},
                         "name": "default",
                     },
-                    "codecs": ({"configuration": {"endian": "little"}, "name": "bytes"},),
+                    "codecs": (
+                        {"configuration": {"endian": "little"}, "name": "bytes"},
+                        {"configuration": {}, "name": "zstd"},
+                    ),
                     "data_type": "float64",
                     "fill_value": fill_value,
                     "node_type": "array",
diff --git a/tests/test_metadata/test_consolidated.py b/tests/test_metadata/test_consolidated.py
index 26e9904608..7f0c49338e 100644
--- a/tests/test_metadata/test_consolidated.py
+++ b/tests/test_metadata/test_consolidated.py
@@ -72,7 +72,10 @@ async def test_consolidated(self, memory_store_with_hierarchy: Store) -> None:
                 "configuration": {"separator": "/"},
                 "name": "default",
             },
-            "codecs": ({"configuration": {"endian": "little"}, "name": "bytes"},),
+            "codecs": (
+                {"configuration": {"endian": "little"}, "name": "bytes"},
+                {"configuration": {}, "name": "zstd"},
+            ),
             "data_type": "float64",
             "fill_value": np.float64(0.0),
             "node_type": "array",
@@ -216,7 +219,10 @@ def test_consolidated_sync(self, memory_store):
                 "configuration": {"separator": "/"},
                 "name": "default",
             },
-            "codecs": ({"configuration": {"endian": "little"}, "name": "bytes"},),
+            "codecs": (
+                {"configuration": {"endian": "little"}, "name": "bytes"},
+                {"configuration": {}, "name": "zstd"},
+            ),
             "data_type": "float64",
             "fill_value": np.float64(0.0),
             "node_type": "array",

From ff766179df6b02b9fb222e1de88970bbf2317721 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Sat, 14 Dec 2024 19:33:45 +0100
Subject: [PATCH 21/35] fix test_default_codecs

---
 tests/test_config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_config.py b/tests/test_config.py
index 6860427908..6952c9c7fe 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -254,7 +254,7 @@ def test_config_buffer_implementation() -> None:
         assert np.array_equal(arr_Crc32c[:], data2d)
 
 
-@pytest.mark.parametrize("dtype", ["int", "bytes", "str"])
+@pytest.mark.parametrize("dtype", ["int", "bytes", str])
 def test_default_codecs(dtype: str) -> None:
     with config.set(
         {
@@ -265,7 +265,7 @@ def test_default_codecs(dtype: str) -> None:
             }
         }
     ):
-        arr = zeros(shape=(100), dtype=dtype)
+        arr = zeros(shape=(100), dtype=np.dtype(dtype), zarr_format=3)
         if dtype == "int":
             assert arr.metadata.codecs == [BytesCodec(), GzipCodec()]
         elif dtype == "bytes":

From f04e0e6c52a00cd3b65699d89a56e046f9f2fe3a Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Sat, 14 Dec 2024 19:48:38 +0100
Subject: [PATCH 22/35] fail-fast: false

---
 .github/workflows/test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1c25dcb1f4..770241c59c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -19,6 +19,7 @@ jobs:
     name: os=${{ matrix.os }}, py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}
 
     strategy:
+      fail-fast: false
       matrix:
         python-version: ['3.11', '3.12', '3.13']
         numpy-version: ['1.25', '2.1']

From f63bb671d6530e279a9d1aea488bdc25436b05d7 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Sat, 14 Dec 2024 20:25:17 +0100
Subject: [PATCH 23/35] fix string codecs for np1.25

---
 .github/workflows/test.yml   |  1 -
 src/zarr/core/metadata/v3.py |  6 +++++-
 tests/test_config.py         | 25 ++++++++++++++-----------
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 770241c59c..1c25dcb1f4 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -19,7 +19,6 @@ jobs:
     name: os=${{ matrix.os }}, py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }}
 
     strategy:
-      fail-fast: false
       matrix:
         python-version: ['3.11', '3.12', '3.13']
         numpy-version: ['1.25', '2.1']
diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index b800ae4d73..eeaab217c3 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -37,7 +37,7 @@
 )
 from zarr.core.config import config
 from zarr.core.metadata.common import parse_attributes
-from zarr.core.strings import _STRING_DTYPE as STRING_NP_DTYPE
+from zarr.core.strings import _STRING_DTYPE as STRING_NP_DTYPE, _NUMPY_SUPPORTS_VLEN_STRING
 from zarr.errors import MetadataValidationError, NodeTypeValidationError
 from zarr.registry import get_codec_class
 
@@ -606,6 +606,10 @@ def from_numpy(cls, dtype: np.dtype[Any]) -> DataType:
             return DataType.string
         elif dtype.kind == "S":
             return DataType.bytes
+        elif not _NUMPY_SUPPORTS_VLEN_STRING and dtype.kind == "O":
+            # numpy < 2.0 does not support vlen string dtype
+            # so we fall back on object array of strings
+            return DataType.string
         dtype_to_data_type = {
             "|b1": "bool",
             "bool": "bool",
diff --git a/tests/test_config.py b/tests/test_config.py
index 6952c9c7fe..93c2acd37d 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -8,8 +8,8 @@
 import pytest
 
 import zarr
-from zarr import Array, zeros
-from zarr.abc.codec import CodecInput, CodecOutput, CodecPipeline
+from zarr import Array, zeros, AsyncArray
+from zarr.abc.codec import CodecInput, CodecOutput, CodecPipeline, Codec
 from zarr.abc.store import ByteSetter, Store
 from zarr.codecs import (
     BloscCodec,
@@ -25,6 +25,7 @@
 from zarr.core.codec_pipeline import BatchedCodecPipeline
 from zarr.core.config import BadConfigError, config
 from zarr.core.indexing import SelectorTuple
+from zarr.core.strings import _STRING_DTYPE
 from zarr.registry import (
     fully_qualified_name,
     get_buffer_class,
@@ -36,6 +37,7 @@
     register_ndbuffer,
     register_pipeline,
 )
+from zarr.storage import MemoryStore
 from zarr.testing.buffer import (
     NDBufferUsingTestNDArrayLike,
     StoreExpectingTestBuffer,
@@ -254,8 +256,14 @@ def test_config_buffer_implementation() -> None:
         assert np.array_equal(arr_Crc32c[:], data2d)
 
 
-@pytest.mark.parametrize("dtype", ["int", "bytes", str])
-def test_default_codecs(dtype: str) -> None:
+@pytest.mark.parametrize(("dtype", "expected_codecs"),
+    [
+        ("int", [BytesCodec(), GzipCodec()]),
+        ("bytes", [VLenBytesCodec()]),
+        ("str", [VLenUTF8Codec()]),
+    ]
+                         )
+async def test_default_codecs(dtype: str, expected_codecs: list[Codec]) -> None:
     with config.set(
         {
             "array.v3_default_codecs": {
@@ -265,10 +273,5 @@ def test_default_codecs(dtype: str) -> None:
             }
         }
     ):
-        arr = zeros(shape=(100), dtype=np.dtype(dtype), zarr_format=3)
-        if dtype == "int":
-            assert arr.metadata.codecs == [BytesCodec(), GzipCodec()]
-        elif dtype == "bytes":
-            assert arr.metadata.codecs == [VLenBytesCodec()]
-        elif dtype == "str":
-            assert arr.metadata.codecs == [VLenUTF8Codec()]
+        arr = await AsyncArray.create(shape=(100,), chunk_shape=(100,),dtype=np.dtype(dtype), zarr_format=3, store=MemoryStore())
+        assert arr.metadata.codecs == expected_codecs

From 00e241ead3ee1a42f58bbe34c3502e0f413dfe4c Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Sat, 14 Dec 2024 20:37:29 +0100
Subject: [PATCH 24/35] format

---
 src/zarr/core/metadata/v3.py |  3 ++-
 tests/test_config.py         | 20 +++++++++++++-------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index eeaab217c3..3f8c5def64 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -37,7 +37,8 @@
 )
 from zarr.core.config import config
 from zarr.core.metadata.common import parse_attributes
-from zarr.core.strings import _STRING_DTYPE as STRING_NP_DTYPE, _NUMPY_SUPPORTS_VLEN_STRING
+from zarr.core.strings import _NUMPY_SUPPORTS_VLEN_STRING
+from zarr.core.strings import _STRING_DTYPE as STRING_NP_DTYPE
 from zarr.errors import MetadataValidationError, NodeTypeValidationError
 from zarr.registry import get_codec_class
 
diff --git a/tests/test_config.py b/tests/test_config.py
index 93c2acd37d..d7b04eb21c 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -8,8 +8,8 @@
 import pytest
 
 import zarr
-from zarr import Array, zeros, AsyncArray
-from zarr.abc.codec import CodecInput, CodecOutput, CodecPipeline, Codec
+from zarr import Array, AsyncArray, zeros
+from zarr.abc.codec import Codec, CodecInput, CodecOutput, CodecPipeline
 from zarr.abc.store import ByteSetter, Store
 from zarr.codecs import (
     BloscCodec,
@@ -25,7 +25,6 @@
 from zarr.core.codec_pipeline import BatchedCodecPipeline
 from zarr.core.config import BadConfigError, config
 from zarr.core.indexing import SelectorTuple
-from zarr.core.strings import _STRING_DTYPE
 from zarr.registry import (
     fully_qualified_name,
     get_buffer_class,
@@ -256,13 +255,14 @@ def test_config_buffer_implementation() -> None:
         assert np.array_equal(arr_Crc32c[:], data2d)
 
 
-@pytest.mark.parametrize(("dtype", "expected_codecs"),
+@pytest.mark.parametrize(
+    ("dtype", "expected_codecs"),
     [
         ("int", [BytesCodec(), GzipCodec()]),
         ("bytes", [VLenBytesCodec()]),
         ("str", [VLenUTF8Codec()]),
-    ]
-                         )
+    ],
+)
 async def test_default_codecs(dtype: str, expected_codecs: list[Codec]) -> None:
     with config.set(
         {
@@ -273,5 +273,11 @@ async def test_default_codecs(dtype: str, expected_codecs: list[Codec]) -> None:
             }
         }
     ):
-        arr = await AsyncArray.create(shape=(100,), chunk_shape=(100,),dtype=np.dtype(dtype), zarr_format=3, store=MemoryStore())
+        arr = await AsyncArray.create(
+            shape=(100,),
+            chunk_shape=(100,),
+            dtype=np.dtype(dtype),
+            zarr_format=3,
+            store=MemoryStore(),
+        )
         assert arr.metadata.codecs == expected_codecs

From 58406c813813f6860fd61d8b4e10a355e9b4094e Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 18 Dec 2024 12:58:02 +0100
Subject: [PATCH 25/35] add docstrings to create in asynchronous.py and
 array.py

---
 src/zarr/api/asynchronous.py | 38 ++++++++++++----
 src/zarr/core/array.py       | 86 +++++++++++++++++++++++++++---------
 2 files changed, 94 insertions(+), 30 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index ff89b6e4a5..a19f56e06d 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -394,7 +394,7 @@ async def save_array(
     arr : ndarray
         NumPy array with data to save.
     zarr_format : {2, 3, None}, optional
-        The zarr format to use when saving.
+        The zarr format to use when saving (default is 3).
     path : str or None, optional
         The path within the store where the array will be saved.
     storage_options : dict
@@ -810,24 +810,40 @@ async def create(
     shape : int or tuple of ints
         Array shape.
     chunks : int or tuple of ints, optional
-        Chunk shape. If True, will be guessed from `shape` and `dtype`. If
-        False, will be set to `shape`, i.e., single chunk for the whole array.
-        If an int, the chunk size in each dimension will be given by the value
-        of `chunks`. Default is True.
+        The shape of the array's chunks.
+        V2 only. V3 arrays should use `chunk_shape` instead.
+        Default values are guessed based on the shape and dtype.
     dtype : str or dtype, optional
         NumPy dtype.
+    chunk_shape : int or tuple of ints, optional
+        The shape of the Array's chunks (default is None).
+        V3 only. V2 arrays should use `chunks` instead.
+    chunk_key_encoding : ChunkKeyEncoding, optional
+        A specification of how the chunk keys are represented in storage.
+        V3 only. V2 arrays should use `dimension_separator` instead.
+        Default is ("default", "/").
+    codecs : Sequence of Codecs or dicts, optional
+        An iterable of Codec or dict serializations thereof. The elements of
+        this collection specify the transformation from array values to stored bytes.
+        V3 only. V2 arrays should use `filters` and `compressor` instead.
+        If no codecs are provided, default codecs will be used:
+        - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
+        - For Unicode strings, the default is `VLenUTF8Codec`.
+        - For bytes or objects, the default is `VLenBytesCodec`.
+        These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
     compressor : Codec, optional
-        Primary compressor for `zarr_format=2`.
+        Primary compressor to compress chunk data.
+        V2 only. V3 arrays should use `codecs` instead.
         If neither `compressor` nor `filters` are provided, a default compressor will be used:
         - For numeric arrays, the default is `ZstdCodec`.
         - For Unicode strings, the default is `VLenUTF8Codec`.
         - For bytes or objects, the default is `VLenBytesCodec`.
-        These defaults can be changed using the `v2_default_compressor` variable in the Zarr config.
+        These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
     fill_value : object
         Default value to use for uninitialized portions of the array.
     order : {'C', 'F'}, optional
         Memory layout to be used within each chunk.
-        Default is set in Zarr's config (`array.order`).
+        Default is specified in the Zarr config `array.order`.
     store : Store or str
         Store or path to directory in file system or name of zip file.
     synchronizer : object, optional
@@ -842,6 +858,8 @@ async def create(
         for storage of both chunks and metadata.
     filters : sequence of Codecs, optional
         Sequence of filters to use to encode chunk data prior to compression.
+        V2 only. If neither `compressor` nor `filters` are provided, a default
+        compressor will be used. (see `compressor` for details)
     cache_metadata : bool, optional
         If True, array configuration metadata will be cached for the
         lifetime of the object. If False, array metadata will be reloaded
@@ -857,7 +875,8 @@ async def create(
         A codec to encode object arrays, only needed if dtype=object.
     dimension_separator : {'.', '/'}, optional
         Separator placed between the dimensions of a chunk.
-
+        V2 only. V3 arrays should use `chunk_key_encoding` instead.
+        Default is ".".
         .. versionadded:: 2.8
 
     write_empty_chunks : bool, optional
@@ -873,6 +892,7 @@ async def create(
 
     zarr_format : {2, 3, None}, optional
         The zarr format to use when saving.
+        Default is 3.
     meta_array : array-like, optional
         An array instance to use for determining arrays to create and return
         to users. Use `numpy.empty(())` by default.
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 5b255cd13f..990d7b0936 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -408,27 +408,47 @@ async def create(
         attributes : dict[str, JSON], optional
             The attributes of the array (default is None).
         chunk_shape : ChunkCoords, optional
-            The shape of the array's chunks (default is None).
+            The shape of the array's chunks
+            V3 only. V2 arrays should use `chunks` instead.
+            Default values are guessed based on the shape and dtype.
         chunk_key_encoding : ChunkKeyEncoding, optional
-            The chunk key encoding (default is None).
-        codecs : Iterable[Codec | dict[str, JSON]], optional
-            The codecs used to encode the data (default is None).
+            A specification of how the chunk keys are represented in storage.
+            V3 only. V2 arrays should use `dimension_separator` instead.
+            Default is ("default", "/").
+        codecs : Sequence of Codecs or dicts, optional
+            An iterable of Codec or dict serializations thereof. The elements of
+            this collection specify the transformation from array values to stored bytes.
+            V3 only. V2 arrays should use `filters` and `compressor` instead.
+            If no codecs are provided, default codecs will be used:
+            - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
+            - For Unicode strings, the default is `VLenUTF8Codec`.
+            - For bytes or objects, the default is `VLenBytesCodec`.
+            These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
         dimension_names : Iterable[str], optional
             The names of the dimensions (default is None).
+            V3 only. V2 arrays should not use this parameter.
         chunks : ShapeLike, optional
-            The shape of the array's chunks (default is None).
-            V2 only. V3 arrays should not have 'chunks' parameter.
+            The shape of the array's chunks.
+            V2 only. V3 arrays should use `chunk_shape` instead.
+            Default values are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"], optional
-            The dimension separator (default is None).
-            V2 only. V3 arrays cannot have a dimension separator.
+            The dimension separator (default is ".").
+            V2 only. V3 arrays should use `chunk_key_encoding` instead.
         order : Literal["C", "F"], optional
-            The order of the array (default is None).
+            The order of the array (default is specified in the Zarr config `array.order`).
         filters : list[dict[str, JSON]], optional
-            The filters used to compress the data (default is None).
-            V2 only. V3 arrays should not have 'filters' parameter.
+            Sequence of filters to use to encode chunk data prior to compression.
+            V2 only. V3 arrays should use `codecs` instead. If neither `compressor`
+            nor `filters` are provided, a default compressor will be used. (see
+            `compressor` for details)
         compressor : dict[str, JSON], optional
             The compressor used to compress the data (default is None).
-            V2 only. V3 arrays should not have 'compressor' parameter.
+            V2 only. V3 arrays should use `codecs` instead.
+            If neither `compressor` nor `filters` are provided, a default compressor will be used:
+            - For numeric arrays, the default is `ZstdCodec`.
+            - For Unicode strings, the default is `VLenUTF8Codec`.
+            - For bytes or objects, the default is `VLenBytesCodec`.
+            These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
         overwrite : bool, optional
             Whether to raise an error if the store already exists (default is False).
         data : npt.ArrayLike, optional
@@ -1472,23 +1492,47 @@ def create(
         dtype : npt.DTypeLike
             The data type of the array.
         chunk_shape : ChunkCoords, optional
-            The shape of the Array's chunks (default is None).
+            The shape of the Array's chunks.
+            V3 only. V2 arrays should use `chunks` instead.
+            Default values are guessed based on the shape and dtype.
         chunk_key_encoding : ChunkKeyEncoding, optional
-            The chunk key encoding (default is None).
-        codecs : Iterable[Codec | dict[str, JSON]], optional
-            The codecs used to encode the data (default is None).
+            A specification of how the chunk keys are represented in storage.
+            V3 only. V2 arrays should use `dimension_separator` instead.
+            Default is ("default", "/").
+        codecs : Sequence of Codecs or dicts, optional
+            An iterable of Codec or dict serializations thereof. The elements of
+            this collection specify the transformation from array values to stored bytes.
+            V3 only. V2 arrays should use `filters` and `compressor` instead.
+            If no codecs are provided, default codecs will be used:
+            - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
+            - For Unicode strings, the default is `VLenUTF8Codec`.
+            - For bytes or objects, the default is `VLenBytesCodec`.
+            These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
         dimension_names : Iterable[str], optional
             The names of the dimensions (default is None).
+            V3 only. V2 arrays should not use this parameter.
         chunks : ChunkCoords, optional
-            The shape of the Array's chunks (default is None).
+            The shape of the array's chunks.
+            V2 only. V3 arrays should use `chunk_shape` instead.
+            Default values are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"], optional
-            The dimension separator (default is None).
+            The dimension separator (default is ".").
+            V2 only. V3 arrays should use `chunk_key_encoding` instead.
         order : Literal["C", "F"], optional
-            The order of the array (default is None).
+            The order of the array (default is specified in the Zarr config `array.order`).
         filters : list[dict[str, JSON]], optional
-            The filters used to compress the data (default is None).
+            Sequence of filters to use to encode chunk data prior to compression.
+            V2 only. V3 arrays should use `codecs` instead. If neither `compressor`
+            nor `filters` are provided, a default compressor will be used. (see
+            `compressor` for details)
         compressor : dict[str, JSON], optional
-            The compressor used to compress the data (default is None).
+            Primary compressor to compress chunk data.
+            V2 only. V3 arrays should use `codecs` instead.
+            If neither `compressor` nor `filters` are provided, a default compressor will be used:
+            - For numeric arrays, the default is `ZstdCodec`.
+            - For Unicode strings, the default is `VLenUTF8Codec`.
+            - For bytes or objects, the default is `VLenBytesCodec`.
+            These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
         overwrite : bool, optional
             Whether to raise an error if the store already exists (default is False).
 

From fc0998923ccd68652b4ea25f9beacf6789b621c4 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 18 Dec 2024 13:19:30 +0100
Subject: [PATCH 26/35] add docstrings to creation in group.py

---
 src/zarr/core/group.py | 109 +++++++++++++++++++++++++++++++++--------
 1 file changed, 88 insertions(+), 21 deletions(-)

diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
index f46c5126b2..8adce41c64 100644
--- a/src/zarr/core/group.py
+++ b/src/zarr/core/group.py
@@ -1034,24 +1034,46 @@ async def create_array(
         dtype : np.DtypeLike = float64
             The data type of the array.
         chunk_shape : tuple[int, ...] | None = None
-            The shape of the chunks of the array. V3 only.
+            The shape of the chunks of the array.
+            V3 only. V2 arrays should use `chunks` instead.
+            Default values are guessed based on the shape and dtype.
         chunk_key_encoding : ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None
             A specification of how the chunk keys are represented in storage.
+            V3 only. V2 arrays should use `dimension_separator` instead.
+            Default is ("default", "/").
         codecs : Iterable[Codec | dict[str, JSON]] | None = None
             An iterable of Codec or dict serializations thereof. The elements of
             this collection specify the transformation from array values to stored bytes.
+            V3 only. V2 arrays should use `filters` and `compressor` instead.
+            If no codecs are provided, default codecs will be used:
+            - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
+            - For Unicode strings, the default is `VLenUTF8Codec`.
+            - For bytes or objects, the default is `VLenBytesCodec`.
+            These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
-            The shape of the chunks of the array. V2 only.
+            The shape of the chunks of the array.
+            V2 only. V3 arrays should use `chunk_shape` instead.
+            Default values are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
-            The delimiter used for the chunk keys.
+            The delimiter used for the chunk keys. (default: ".")
+            V2 only. V3 arrays should use `chunk_key_encoding` instead.
         order : Literal["C", "F"] | None = None
-            The memory order of the array.
+            The memory order of the array (default is specified in the Zarr config `array.order`).
         filters : list[dict[str, JSON]] | None = None
-            Filters for the array.
+            Sequence of filters to use to encode chunk data prior to compression.
+            V2 only. V3 arrays should use `codecs` instead. If neither `compressor`
+            nor `filters` are provided, a default compressor will be used. (see
+            `compressor` for details)
         compressor : dict[str, JSON] | None = None
-            The compressor for the array.
+            The compressor used to compress the data (default is None).
+            V2 only. V3 arrays should use `codecs` instead.
+            If neither `compressor` nor `filters` are provided, a default compressor will be used:
+            - For numeric arrays, the default is `ZstdCodec`.
+            - For Unicode strings, the default is `VLenUTF8Codec`.
+            - For bytes or objects, the default is `VLenBytesCodec`.
+            These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will
             be overwritten. If False, the presence of a pre-existing array or group is
@@ -2222,7 +2244,7 @@ def create_array(
     ) -> Array:
         """Create a zarr array within this AsyncGroup.
 
-        This method lightly wraps AsyncArray.create.
+        This method lightly wraps `AsyncArray.create`.
 
         Parameters
         ----------
@@ -2233,24 +2255,46 @@ def create_array(
         dtype : np.DtypeLike = float64
             The data type of the array.
         chunk_shape : tuple[int, ...] | None = None
-            The shape of the chunks of the array. V3 only.
+            The shape of the chunks of the array.
+            V3 only. V2 arrays should use `chunks` instead.
+            Default values are guessed based on the shape and dtype.
         chunk_key_encoding : ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None
             A specification of how the chunk keys are represented in storage.
+            V3 only. V2 arrays should use `dimension_separator` instead.
+            Default is ("default", "/").
         codecs : Iterable[Codec | dict[str, JSON]] | None = None
-            An iterable of Codec or dict serializations thereof. The elements of this collection
-            specify the transformation from array values to stored bytes.
+            An iterable of Codec or dict serializations thereof. The elements of
+            this collection specify the transformation from array values to stored bytes.
+            V3 only. V2 arrays should use `filters` and `compressor` instead.
+            If no codecs are provided, default codecs will be used:
+            - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
+            - For Unicode strings, the default is `VLenUTF8Codec`.
+            - For bytes or objects, the default is `VLenBytesCodec`.
+            These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
-            The shape of the chunks of the array. V2 only.
+            The shape of the chunks of the array.
+            V2 only. V3 arrays should use `chunk_shape` instead.
+            Default values are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
-            The delimiter used for the chunk keys.
+            The delimiter used for the chunk keys. (default: ".")
+            V2 only. V3 arrays should use `chunk_key_encoding` instead.
         order : Literal["C", "F"] | None = None
-            The memory order of the array.
+            The memory order of the array (default is specified in the Zarr config `array.order`).
         filters : list[dict[str, JSON]] | None = None
-            Filters for the array.
+            Sequence of filters to use to encode chunk data prior to compression.
+            V2 only. V3 arrays should use `codecs` instead. If neither `compressor`
+            nor `filters` are provided, a default compressor will be used. (see
+            `compressor` for details)
         compressor : dict[str, JSON] | None = None
-            The compressor for the array.
+            The compressor used to compress the data (default is None).
+            V2 only. V3 arrays should use `codecs` instead.
+            If neither `compressor` nor `filters` are provided, a default compressor will be used:
+            - For numeric arrays, the default is `ZstdCodec`.
+            - For Unicode strings, the default is `VLenUTF8Codec`.
+            - For bytes or objects, the default is `VLenBytesCodec`.
+            These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will
             be overwritten. If False, the presence of a pre-existing array or group is
@@ -2260,6 +2304,7 @@ def create_array(
 
         Returns
         -------
+
         Array
 
         """
@@ -2574,24 +2619,46 @@ def array(
         dtype : np.DtypeLike = float64
             The data type of the array.
         chunk_shape : tuple[int, ...] | None = None
-            The shape of the chunks of the array. V3 only.
+            The shape of the chunks of the array.
+            V3 only. V2 arrays should use `chunks` instead.
+            Default values are guessed based on the shape and dtype.
         chunk_key_encoding : ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None
             A specification of how the chunk keys are represented in storage.
+            V3 only. V2 arrays should use `dimension_separator` instead.
+            Default is ("default", "/").
         codecs : Iterable[Codec | dict[str, JSON]] | None = None
             An iterable of Codec or dict serializations thereof. The elements of
             this collection specify the transformation from array values to stored bytes.
+            V3 only. V2 arrays should use `filters` and `compressor` instead.
+            If no codecs are provided, default codecs will be used:
+            - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
+            - For Unicode strings, the default is `VLenUTF8Codec`.
+            - For bytes or objects, the default is `VLenBytesCodec`.
+            These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
-            The shape of the chunks of the array. V2 only.
+            The shape of the chunks of the array.
+            V2 only. V3 arrays should use `chunk_shape` instead.
+            Default values are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
-            The delimiter used for the chunk keys.
+            The delimiter used for the chunk keys. (default: ".")
+            V2 only. V3 arrays should use `chunk_key_encoding` instead.
         order : Literal["C", "F"] | None = None
-            The memory order of the array.
+            The memory order of the array (default is specified in the Zarr config `array.order`).
         filters : list[dict[str, JSON]] | None = None
-            Filters for the array.
+            Sequence of filters to use to encode chunk data prior to compression.
+            V2 only. V3 arrays should use `codecs` instead. If neither `compressor`
+            nor `filters` are provided, a default compressor will be used. (see
+            `compressor` for details)
         compressor : dict[str, JSON] | None = None
-            The compressor for the array.
+            The compressor used to compress the data (default is None).
+            V2 only. V3 arrays should use `codecs` instead.
+            If neither `compressor` nor `filters` are provided, a default compressor will be used:
+            - For numeric arrays, the default is `ZstdCodec`.
+            - For Unicode strings, the default is `VLenUTF8Codec`.
+            - For bytes or objects, the default is `VLenBytesCodec`.
+            These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will
             be overwritten. If False, the presence of a pre-existing array or group is

From c62aff53ac226fe825028aa56b0e26a8a1f38cac Mon Sep 17 00:00:00 2001
From: Hannes Spitz <44113112+brokkoli71@users.noreply.github.com>
Date: Wed, 18 Dec 2024 15:12:49 +0100
Subject: [PATCH 27/35] Apply suggestions from code review

Co-authored-by: David Stansby <dstansby@gmail.com>
---
 src/zarr/api/asynchronous.py | 20 ++++++++++----------
 src/zarr/core/array.py       |  2 +-
 src/zarr/storage/__init__.py |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index b47bc07f22..ba4acf5cbc 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -403,7 +403,7 @@ async def save_array(
     arr : ndarray
         NumPy array with data to save.
     zarr_format : {2, 3, None}, optional
-        The zarr format to use when saving (default is 3).
+        The zarr format to use when saving (default is 3 if not specified).
     path : str or None, optional
         The path within the store where the array will be saved.
     storage_options : dict
@@ -821,7 +821,7 @@ async def create(
     chunks : int or tuple of ints, optional
         The shape of the array's chunks.
         V2 only. V3 arrays should use `chunk_shape` instead.
-        Default values are guessed based on the shape and dtype.
+        If not specified, default values are guessed based on the shape and dtype.
     dtype : str or dtype, optional
         NumPy dtype.
     chunk_shape : int or tuple of ints, optional
@@ -830,9 +830,9 @@ async def create(
     chunk_key_encoding : ChunkKeyEncoding, optional
         A specification of how the chunk keys are represented in storage.
         V3 only. V2 arrays should use `dimension_separator` instead.
-        Default is ("default", "/").
+        Default is ``("default", "/")``.
     codecs : Sequence of Codecs or dicts, optional
-        An iterable of Codec or dict serializations thereof. The elements of
+        An iterable of Codec or dict serializations of Codecs. The elements of
         this collection specify the transformation from array values to stored bytes.
         V3 only. V2 arrays should use `filters` and `compressor` instead.
         If no codecs are provided, default codecs will be used:
@@ -842,8 +842,8 @@ async def create(
         These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
     compressor : Codec, optional
         Primary compressor to compress chunk data.
-        V2 only. V3 arrays should use `codecs` instead.
-        If neither `compressor` nor `filters` are provided, a default compressor will be used:
+        V2 only. V3 arrays should use ``codecs`` instead.
+        If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
         - For numeric arrays, the default is `ZstdCodec`.
         - For Unicode strings, the default is `VLenUTF8Codec`.
         - For bytes or objects, the default is `VLenBytesCodec`.
@@ -852,7 +852,7 @@ async def create(
         Default value to use for uninitialized portions of the array.
     order : {'C', 'F'}, optional
         Memory layout to be used within each chunk.
-        Default is specified in the Zarr config `array.order`.
+        If not specified, default is taken from the Zarr config ``array.order``.
     store : Store or str
         Store or path to directory in file system or name of zip file.
     synchronizer : object, optional
@@ -867,8 +867,8 @@ async def create(
         for storage of both chunks and metadata.
     filters : sequence of Codecs, optional
         Sequence of filters to use to encode chunk data prior to compression.
-        V2 only. If neither `compressor` nor `filters` are provided, a default
-        compressor will be used. (see `compressor` for details)
+        V2 only. If neither ``compressor`` nor ``filters`` are provided, a default
+        compressor will be used. (see ``compressor`` for details).
     cache_metadata : bool, optional
         If True, array configuration metadata will be cached for the
         lifetime of the object. If False, array metadata will be reloaded
@@ -884,7 +884,7 @@ async def create(
         A codec to encode object arrays, only needed if dtype=object.
     dimension_separator : {'.', '/'}, optional
         Separator placed between the dimensions of a chunk.
-        V2 only. V3 arrays should use `chunk_key_encoding` instead.
+        V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
         Default is ".".
         .. versionadded:: 2.8
 
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 0372d67026..4001ff6e2d 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -642,7 +642,7 @@ async def _create_v2(
         if dimension_separator is None:
             dimension_separator = "."
 
-        dtype = parse_dtype(dtype, 2)
+        dtype = parse_dtype(dtype, zarr_format=2)
         if not filters and not compressor:
             filters, compressor = _default_filters_and_compressor(dtype)
         if np.issubdtype(dtype, np.str_):
diff --git a/src/zarr/storage/__init__.py b/src/zarr/storage/__init__.py
index 9172f8c9ce..514361bd6b 100644
--- a/src/zarr/storage/__init__.py
+++ b/src/zarr/storage/__init__.py
@@ -30,7 +30,7 @@ def __setattr__(self, attr: str, value: Any) -> None:
             warnings.warn(
                 "setting zarr.storage.default_compressor is deprecated, use "
                 "zarr.config to configure array.v2_default_compressor "
-                "e.g. config.set({'codecs.zstd':'your.module.Zstd', 'array.v2_default_compressor.numeric': 'zstd'})",
+                "e.g. config.set({'codecs.zstd':'numcodecs.Zstd', 'array.v2_default_compressor.numeric': 'zstd'})",
                 DeprecationWarning,
                 stacklevel=1,
             )

From 48c74485e5c8bdd3abe0111e9071842a87065e6d Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 18 Dec 2024 15:17:29 +0100
Subject: [PATCH 28/35] apply suggestions from review

---
 src/zarr/api/asynchronous.py |  2 +-
 src/zarr/core/array.py       | 44 ++++++++++++------------
 src/zarr/core/group.py       | 66 ++++++++++++++++++------------------
 tests/test_v2.py             |  2 +-
 4 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index ba4acf5cbc..726149e351 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -834,7 +834,7 @@ async def create(
     codecs : Sequence of Codecs or dicts, optional
         An iterable of Codec or dict serializations of Codecs. The elements of
         this collection specify the transformation from array values to stored bytes.
-        V3 only. V2 arrays should use `filters` and `compressor` instead.
+        V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
         If no codecs are provided, default codecs will be used:
         - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
         - For Unicode strings, the default is `VLenUTF8Codec`.
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 4001ff6e2d..195d676326 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -411,15 +411,15 @@ async def create(
         chunk_shape : ChunkCoords, optional
             The shape of the array's chunks
             V3 only. V2 arrays should use `chunks` instead.
-            Default values are guessed based on the shape and dtype.
+            If not specified, default are guessed based on the shape and dtype.
         chunk_key_encoding : ChunkKeyEncoding, optional
             A specification of how the chunk keys are represented in storage.
             V3 only. V2 arrays should use `dimension_separator` instead.
-            Default is ("default", "/").
+            Default is ``("default", "/")``.
         codecs : Sequence of Codecs or dicts, optional
-            An iterable of Codec or dict serializations thereof. The elements of
+            An iterable of Codec or dict serializations of Codecs. The elements of
             this collection specify the transformation from array values to stored bytes.
-            V3 only. V2 arrays should use `filters` and `compressor` instead.
+            V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
             If no codecs are provided, default codecs will be used:
             - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
             - For Unicode strings, the default is `VLenUTF8Codec`.
@@ -431,21 +431,21 @@ async def create(
         chunks : ShapeLike, optional
             The shape of the array's chunks.
             V2 only. V3 arrays should use `chunk_shape` instead.
-            Default values are guessed based on the shape and dtype.
+            If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"], optional
             The dimension separator (default is ".").
-            V2 only. V3 arrays should use `chunk_key_encoding` instead.
+            V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
         order : Literal["C", "F"], optional
             The order of the array (default is specified in the Zarr config `array.order`).
         filters : list[dict[str, JSON]], optional
             Sequence of filters to use to encode chunk data prior to compression.
-            V2 only. V3 arrays should use `codecs` instead. If neither `compressor`
-            nor `filters` are provided, a default compressor will be used. (see
-            `compressor` for details)
+            V2 only. V3 arrays should use ``codecs`` instead. If neither ``compressor``
+            nor ``filters`` are provided, a default compressor will be used. (see
+            ``compressor`` for details)
         compressor : dict[str, JSON], optional
             The compressor used to compress the data (default is None).
-            V2 only. V3 arrays should use `codecs` instead.
-            If neither `compressor` nor `filters` are provided, a default compressor will be used:
+            V2 only. V3 arrays should use ``codecs`` instead.
+            If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
             - For numeric arrays, the default is `ZstdCodec`.
             - For Unicode strings, the default is `VLenUTF8Codec`.
             - For bytes or objects, the default is `VLenBytesCodec`.
@@ -1503,15 +1503,15 @@ def create(
         chunk_shape : ChunkCoords, optional
             The shape of the Array's chunks.
             V3 only. V2 arrays should use `chunks` instead.
-            Default values are guessed based on the shape and dtype.
+            If not specified, default are guessed based on the shape and dtype.
         chunk_key_encoding : ChunkKeyEncoding, optional
             A specification of how the chunk keys are represented in storage.
             V3 only. V2 arrays should use `dimension_separator` instead.
-            Default is ("default", "/").
+            Default is ``("default", "/")``.
         codecs : Sequence of Codecs or dicts, optional
-            An iterable of Codec or dict serializations thereof. The elements of
+            An iterable of Codec or dict serializations of Codecs. The elements of
             this collection specify the transformation from array values to stored bytes.
-            V3 only. V2 arrays should use `filters` and `compressor` instead.
+            V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
             If no codecs are provided, default codecs will be used:
             - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
             - For Unicode strings, the default is `VLenUTF8Codec`.
@@ -1523,21 +1523,21 @@ def create(
         chunks : ChunkCoords, optional
             The shape of the array's chunks.
             V2 only. V3 arrays should use `chunk_shape` instead.
-            Default values are guessed based on the shape and dtype.
+            If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"], optional
             The dimension separator (default is ".").
-            V2 only. V3 arrays should use `chunk_key_encoding` instead.
+            V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
         order : Literal["C", "F"], optional
             The order of the array (default is specified in the Zarr config `array.order`).
         filters : list[dict[str, JSON]], optional
             Sequence of filters to use to encode chunk data prior to compression.
-            V2 only. V3 arrays should use `codecs` instead. If neither `compressor`
-            nor `filters` are provided, a default compressor will be used. (see
-            `compressor` for details)
+            V2 only. V3 arrays should use ``codecs`` instead. If neither ``compressor``
+            nor ``filters`` are provided, a default compressor will be used. (see
+            ``compressor`` for details)
         compressor : dict[str, JSON], optional
             Primary compressor to compress chunk data.
-            V2 only. V3 arrays should use `codecs` instead.
-            If neither `compressor` nor `filters` are provided, a default compressor will be used:
+            V2 only. V3 arrays should use ``codecs`` instead.
+            If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
             - For numeric arrays, the default is `ZstdCodec`.
             - For Unicode strings, the default is `VLenUTF8Codec`.
             - For bytes or objects, the default is `VLenBytesCodec`.
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
index 8adce41c64..1bf5bd3e58 100644
--- a/src/zarr/core/group.py
+++ b/src/zarr/core/group.py
@@ -1036,15 +1036,15 @@ async def create_array(
         chunk_shape : tuple[int, ...] | None = None
             The shape of the chunks of the array.
             V3 only. V2 arrays should use `chunks` instead.
-            Default values are guessed based on the shape and dtype.
+            If not specified, default are guessed based on the shape and dtype.
         chunk_key_encoding : ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None
             A specification of how the chunk keys are represented in storage.
             V3 only. V2 arrays should use `dimension_separator` instead.
-            Default is ("default", "/").
+            Default is ``("default", "/")``.
         codecs : Iterable[Codec | dict[str, JSON]] | None = None
-            An iterable of Codec or dict serializations thereof. The elements of
+            An iterable of Codec or dict serializations of Codecs. The elements of
             this collection specify the transformation from array values to stored bytes.
-            V3 only. V2 arrays should use `filters` and `compressor` instead.
+            V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
             If no codecs are provided, default codecs will be used:
             - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
             - For Unicode strings, the default is `VLenUTF8Codec`.
@@ -1055,21 +1055,21 @@ async def create_array(
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
             V2 only. V3 arrays should use `chunk_shape` instead.
-            Default values are guessed based on the shape and dtype.
+            If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")
-            V2 only. V3 arrays should use `chunk_key_encoding` instead.
+            V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
         order : Literal["C", "F"] | None = None
             The memory order of the array (default is specified in the Zarr config `array.order`).
         filters : list[dict[str, JSON]] | None = None
             Sequence of filters to use to encode chunk data prior to compression.
-            V2 only. V3 arrays should use `codecs` instead. If neither `compressor`
-            nor `filters` are provided, a default compressor will be used. (see
-            `compressor` for details)
+            V2 only. V3 arrays should use ``codecs`` instead. If neither ``compressor``
+            nor ``filters`` are provided, a default compressor will be used. (see
+            ``compressor`` for details)
         compressor : dict[str, JSON] | None = None
             The compressor used to compress the data (default is None).
-            V2 only. V3 arrays should use `codecs` instead.
-            If neither `compressor` nor `filters` are provided, a default compressor will be used:
+            V2 only. V3 arrays should use ``codecs`` instead.
+            If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
             - For numeric arrays, the default is `ZstdCodec`.
             - For Unicode strings, the default is `VLenUTF8Codec`.
             - For bytes or objects, the default is `VLenBytesCodec`.
@@ -2257,15 +2257,15 @@ def create_array(
         chunk_shape : tuple[int, ...] | None = None
             The shape of the chunks of the array.
             V3 only. V2 arrays should use `chunks` instead.
-            Default values are guessed based on the shape and dtype.
+            If not specified, default are guessed based on the shape and dtype.
         chunk_key_encoding : ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None
             A specification of how the chunk keys are represented in storage.
             V3 only. V2 arrays should use `dimension_separator` instead.
-            Default is ("default", "/").
+            Default is ``("default", "/")``.
         codecs : Iterable[Codec | dict[str, JSON]] | None = None
-            An iterable of Codec or dict serializations thereof. The elements of
+            An iterable of Codec or dict serializations of Codecs. The elements of
             this collection specify the transformation from array values to stored bytes.
-            V3 only. V2 arrays should use `filters` and `compressor` instead.
+            V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
             If no codecs are provided, default codecs will be used:
             - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
             - For Unicode strings, the default is `VLenUTF8Codec`.
@@ -2276,21 +2276,21 @@ def create_array(
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
             V2 only. V3 arrays should use `chunk_shape` instead.
-            Default values are guessed based on the shape and dtype.
+            If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")
-            V2 only. V3 arrays should use `chunk_key_encoding` instead.
+            V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
         order : Literal["C", "F"] | None = None
             The memory order of the array (default is specified in the Zarr config `array.order`).
         filters : list[dict[str, JSON]] | None = None
             Sequence of filters to use to encode chunk data prior to compression.
-            V2 only. V3 arrays should use `codecs` instead. If neither `compressor`
-            nor `filters` are provided, a default compressor will be used. (see
-            `compressor` for details)
+            V2 only. V3 arrays should use ``codecs`` instead. If neither ``compressor``
+            nor ``filters`` are provided, a default compressor will be used. (see
+            ``compressor`` for details)
         compressor : dict[str, JSON] | None = None
             The compressor used to compress the data (default is None).
-            V2 only. V3 arrays should use `codecs` instead.
-            If neither `compressor` nor `filters` are provided, a default compressor will be used:
+            V2 only. V3 arrays should use ``codecs`` instead.
+            If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
             - For numeric arrays, the default is `ZstdCodec`.
             - For Unicode strings, the default is `VLenUTF8Codec`.
             - For bytes or objects, the default is `VLenBytesCodec`.
@@ -2621,15 +2621,15 @@ def array(
         chunk_shape : tuple[int, ...] | None = None
             The shape of the chunks of the array.
             V3 only. V2 arrays should use `chunks` instead.
-            Default values are guessed based on the shape and dtype.
+            If not specified, default are guessed based on the shape and dtype.
         chunk_key_encoding : ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None
             A specification of how the chunk keys are represented in storage.
             V3 only. V2 arrays should use `dimension_separator` instead.
-            Default is ("default", "/").
+            Default is ``("default", "/")``.
         codecs : Iterable[Codec | dict[str, JSON]] | None = None
-            An iterable of Codec or dict serializations thereof. The elements of
+            An iterable of Codec or dict serializations of Codecs. The elements of
             this collection specify the transformation from array values to stored bytes.
-            V3 only. V2 arrays should use `filters` and `compressor` instead.
+            V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
             If no codecs are provided, default codecs will be used:
             - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
             - For Unicode strings, the default is `VLenUTF8Codec`.
@@ -2640,21 +2640,21 @@ def array(
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
             V2 only. V3 arrays should use `chunk_shape` instead.
-            Default values are guessed based on the shape and dtype.
+            If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")
-            V2 only. V3 arrays should use `chunk_key_encoding` instead.
+            V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
         order : Literal["C", "F"] | None = None
             The memory order of the array (default is specified in the Zarr config `array.order`).
         filters : list[dict[str, JSON]] | None = None
             Sequence of filters to use to encode chunk data prior to compression.
-            V2 only. V3 arrays should use `codecs` instead. If neither `compressor`
-            nor `filters` are provided, a default compressor will be used. (see
-            `compressor` for details)
+            V2 only. V3 arrays should use ``codecs`` instead. If neither ``compressor``
+            nor ``filters`` are provided, a default compressor will be used. (see
+            ``compressor`` for details)
         compressor : dict[str, JSON] | None = None
             The compressor used to compress the data (default is None).
-            V2 only. V3 arrays should use `codecs` instead.
-            If neither `compressor` nor `filters` are provided, a default compressor will be used:
+            V2 only. V3 arrays should use ``codecs`` instead.
+            If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
             - For numeric arrays, the default is `ZstdCodec`.
             - For Unicode strings, the default is `VLenUTF8Codec`.
             - For bytes or objects, the default is `VLenBytesCodec`.
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 205b0fdf52..ef06c13e26 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -203,7 +203,7 @@ def test_v2_non_contiguous(array_order: Literal["C", "F"], data_order: Literal["
 
 
 def test_default_compressor_deprecation_warning():
-    with pytest.warns(DeprecationWarning):
+    with pytest.warns(DeprecationWarning, match="default_compressor is deprecated"):
         zarr.storage.default_compressor = "zarr.codecs.zstd.ZstdCodec()"
 
 

From 083c4cbd4f364e7b7e4773f891e6dc534b19a06d Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 18 Dec 2024 15:32:01 +0100
Subject: [PATCH 29/35] correct code double backticks

---
 src/zarr/api/asynchronous.py | 12 +++++------
 src/zarr/core/array.py       | 28 ++++++++++++------------
 src/zarr/core/group.py       | 42 ++++++++++++++++++------------------
 3 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index 726149e351..3e14fea4f0 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -836,17 +836,17 @@ async def create(
         this collection specify the transformation from array values to stored bytes.
         V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
         If no codecs are provided, default codecs will be used:
-        - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
-        - For Unicode strings, the default is `VLenUTF8Codec`.
-        - For bytes or objects, the default is `VLenBytesCodec`.
+        - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
+        - For Unicode strings, the default is ``VLenUTF8Codec``.
+        - For bytes or objects, the default is ``VLenBytesCodec``.
         These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
     compressor : Codec, optional
         Primary compressor to compress chunk data.
         V2 only. V3 arrays should use ``codecs`` instead.
         If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
-        - For numeric arrays, the default is `ZstdCodec`.
-        - For Unicode strings, the default is `VLenUTF8Codec`.
-        - For bytes or objects, the default is `VLenBytesCodec`.
+        - For numeric arrays, the default is ``ZstdCodec``.
+        - For Unicode strings, the default is ``VLenUTF8Codec``.
+        - For bytes or objects, the default is ``VLenBytesCodec``.
         These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
     fill_value : object
         Default value to use for uninitialized portions of the array.
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 195d676326..c487eb3f1a 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -421,16 +421,16 @@ async def create(
             this collection specify the transformation from array values to stored bytes.
             V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
             If no codecs are provided, default codecs will be used:
-            - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
-            - For Unicode strings, the default is `VLenUTF8Codec`.
-            - For bytes or objects, the default is `VLenBytesCodec`.
+            - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
+            - For Unicode strings, the default is ``VLenUTF8Codec``.
+            - For bytes or objects, the default is ``VLenBytesCodec``.
             These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
         dimension_names : Iterable[str], optional
             The names of the dimensions (default is None).
             V3 only. V2 arrays should not use this parameter.
         chunks : ShapeLike, optional
             The shape of the array's chunks.
-            V2 only. V3 arrays should use `chunk_shape` instead.
+            V2 only. V3 arrays should use :func:`chunk_shape` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"], optional
             The dimension separator (default is ".").
@@ -446,9 +446,9 @@ async def create(
             The compressor used to compress the data (default is None).
             V2 only. V3 arrays should use ``codecs`` instead.
             If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
-            - For numeric arrays, the default is `ZstdCodec`.
-            - For Unicode strings, the default is `VLenUTF8Codec`.
-            - For bytes or objects, the default is `VLenBytesCodec`.
+            - For numeric arrays, the default is ``ZstdCodec``.
+            - For Unicode strings, the default is ``VLenUTF8Codec``.
+            - For bytes or objects, the default is ``VLenBytesCodec``.
             These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
         overwrite : bool, optional
             Whether to raise an error if the store already exists (default is False).
@@ -1513,16 +1513,16 @@ def create(
             this collection specify the transformation from array values to stored bytes.
             V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
             If no codecs are provided, default codecs will be used:
-            - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
-            - For Unicode strings, the default is `VLenUTF8Codec`.
-            - For bytes or objects, the default is `VLenBytesCodec`.
+            - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
+            - For Unicode strings, the default is ``VLenUTF8Codec``.
+            - For bytes or objects, the default is ``VLenBytesCodec``.
             These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
         dimension_names : Iterable[str], optional
             The names of the dimensions (default is None).
             V3 only. V2 arrays should not use this parameter.
         chunks : ChunkCoords, optional
             The shape of the array's chunks.
-            V2 only. V3 arrays should use `chunk_shape` instead.
+            V2 only. V3 arrays should use :func:`chunk_shape` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"], optional
             The dimension separator (default is ".").
@@ -1538,9 +1538,9 @@ def create(
             Primary compressor to compress chunk data.
             V2 only. V3 arrays should use ``codecs`` instead.
             If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
-            - For numeric arrays, the default is `ZstdCodec`.
-            - For Unicode strings, the default is `VLenUTF8Codec`.
-            - For bytes or objects, the default is `VLenBytesCodec`.
+            - For numeric arrays, the default is ``ZstdCodec``.
+            - For Unicode strings, the default is ``VLenUTF8Codec``.
+            - For bytes or objects, the default is ``VLenBytesCodec``.
             These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
         overwrite : bool, optional
             Whether to raise an error if the store already exists (default is False).
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
index 1bf5bd3e58..eb8cf70f93 100644
--- a/src/zarr/core/group.py
+++ b/src/zarr/core/group.py
@@ -1046,15 +1046,15 @@ async def create_array(
             this collection specify the transformation from array values to stored bytes.
             V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
             If no codecs are provided, default codecs will be used:
-            - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
-            - For Unicode strings, the default is `VLenUTF8Codec`.
-            - For bytes or objects, the default is `VLenBytesCodec`.
+            - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
+            - For Unicode strings, the default is ``VLenUTF8Codec``.
+            - For bytes or objects, the default is ``VLenBytesCodec``.
             These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
-            V2 only. V3 arrays should use `chunk_shape` instead.
+            V2 only. V3 arrays should use :func:`chunk_shape` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")
@@ -1070,9 +1070,9 @@ async def create_array(
             The compressor used to compress the data (default is None).
             V2 only. V3 arrays should use ``codecs`` instead.
             If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
-            - For numeric arrays, the default is `ZstdCodec`.
-            - For Unicode strings, the default is `VLenUTF8Codec`.
-            - For bytes or objects, the default is `VLenBytesCodec`.
+            - For numeric arrays, the default is ``ZstdCodec``.
+            - For Unicode strings, the default is ``VLenUTF8Codec``.
+            - For bytes or objects, the default is ``VLenBytesCodec``.
             These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will
@@ -2267,15 +2267,15 @@ def create_array(
             this collection specify the transformation from array values to stored bytes.
             V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
             If no codecs are provided, default codecs will be used:
-            - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
-            - For Unicode strings, the default is `VLenUTF8Codec`.
-            - For bytes or objects, the default is `VLenBytesCodec`.
+            - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
+            - For Unicode strings, the default is ``VLenUTF8Codec``.
+            - For bytes or objects, the default is ``VLenBytesCodec``.
             These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
-            V2 only. V3 arrays should use `chunk_shape` instead.
+            V2 only. V3 arrays should use :func:`chunk_shape` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")
@@ -2291,9 +2291,9 @@ def create_array(
             The compressor used to compress the data (default is None).
             V2 only. V3 arrays should use ``codecs`` instead.
             If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
-            - For numeric arrays, the default is `ZstdCodec`.
-            - For Unicode strings, the default is `VLenUTF8Codec`.
-            - For bytes or objects, the default is `VLenBytesCodec`.
+            - For numeric arrays, the default is ``ZstdCodec``.
+            - For Unicode strings, the default is ``VLenUTF8Codec``.
+            - For bytes or objects, the default is ``VLenBytesCodec``.
             These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will
@@ -2631,15 +2631,15 @@ def array(
             this collection specify the transformation from array values to stored bytes.
             V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
             If no codecs are provided, default codecs will be used:
-            - For numeric arrays, the default is `BytesCodec` and `ZstdCodec`.
-            - For Unicode strings, the default is `VLenUTF8Codec`.
-            - For bytes or objects, the default is `VLenBytesCodec`.
+            - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
+            - For Unicode strings, the default is ``VLenUTF8Codec``.
+            - For bytes or objects, the default is ``VLenBytesCodec``.
             These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
-            V2 only. V3 arrays should use `chunk_shape` instead.
+            V2 only. V3 arrays should use :func:`chunk_shape` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")
@@ -2655,9 +2655,9 @@ def array(
             The compressor used to compress the data (default is None).
             V2 only. V3 arrays should use ``codecs`` instead.
             If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
-            - For numeric arrays, the default is `ZstdCodec`.
-            - For Unicode strings, the default is `VLenUTF8Codec`.
-            - For bytes or objects, the default is `VLenBytesCodec`.
+            - For numeric arrays, the default is ``ZstdCodec``.
+            - For Unicode strings, the default is ``VLenUTF8Codec``.
+            - For bytes or objects, the default is ``VLenBytesCodec``.
             These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will

From 500bc7b469b295658b5ffe7becda096c344dc63a Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 18 Dec 2024 15:47:22 +0100
Subject: [PATCH 30/35] correct attribute links in docstring

---
 src/zarr/core/array.py | 4 ++--
 src/zarr/core/group.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index c487eb3f1a..7214a062fb 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -430,7 +430,7 @@ async def create(
             V3 only. V2 arrays should not use this parameter.
         chunks : ShapeLike, optional
             The shape of the array's chunks.
-            V2 only. V3 arrays should use :func:`chunk_shape` instead.
+            V2 only. V3 arrays should use :attr:`chunk_shape` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"], optional
             The dimension separator (default is ".").
@@ -1522,7 +1522,7 @@ def create(
             V3 only. V2 arrays should not use this parameter.
         chunks : ChunkCoords, optional
             The shape of the array's chunks.
-            V2 only. V3 arrays should use :func:`chunk_shape` instead.
+            V2 only. V3 arrays should use :attr:`chunk_shape` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"], optional
             The dimension separator (default is ".").
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
index eb8cf70f93..8affa75a8f 100644
--- a/src/zarr/core/group.py
+++ b/src/zarr/core/group.py
@@ -1054,7 +1054,7 @@ async def create_array(
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
-            V2 only. V3 arrays should use :func:`chunk_shape` instead.
+            V2 only. V3 arrays should use :attr:`chunk_shape` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")
@@ -2275,7 +2275,7 @@ def create_array(
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
-            V2 only. V3 arrays should use :func:`chunk_shape` instead.
+            V2 only. V3 arrays should use :attr:`chunk_shape` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")
@@ -2639,7 +2639,7 @@ def array(
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
-            V2 only. V3 arrays should use :func:`chunk_shape` instead.
+            V2 only. V3 arrays should use :attr:`chunk_shape` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")

From cdf55429a0be97c3b93cba4076a9e62f9da9489a Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 18 Dec 2024 16:09:24 +0100
Subject: [PATCH 31/35] link zarr.core.config in docstrings

---
 src/zarr/api/asynchronous.py |  6 +++---
 src/zarr/core/array.py       | 16 ++++++++--------
 src/zarr/core/config.py      | 23 +++++++++++++----------
 src/zarr/core/group.py       | 24 ++++++++++++------------
 4 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index 3e14fea4f0..80a854ead8 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -839,7 +839,7 @@ async def create(
         - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
         - For Unicode strings, the default is ``VLenUTF8Codec``.
         - For bytes or objects, the default is ``VLenBytesCodec``.
-        These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
+        These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
     compressor : Codec, optional
         Primary compressor to compress chunk data.
         V2 only. V3 arrays should use ``codecs`` instead.
@@ -847,12 +847,12 @@ async def create(
         - For numeric arrays, the default is ``ZstdCodec``.
         - For Unicode strings, the default is ``VLenUTF8Codec``.
         - For bytes or objects, the default is ``VLenBytesCodec``.
-        These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
+        These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
     fill_value : object
         Default value to use for uninitialized portions of the array.
     order : {'C', 'F'}, optional
         Memory layout to be used within each chunk.
-        If not specified, default is taken from the Zarr config ``array.order``.
+        If not specified, default is taken from the Zarr config ```array.order```.
     store : Store or str
         Store or path to directory in file system or name of zip file.
     synchronizer : object, optional
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 7214a062fb..afc46866c6 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -424,19 +424,19 @@ async def create(
             - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
+            These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
         dimension_names : Iterable[str], optional
             The names of the dimensions (default is None).
             V3 only. V2 arrays should not use this parameter.
         chunks : ShapeLike, optional
             The shape of the array's chunks.
-            V2 only. V3 arrays should use :attr:`chunk_shape` instead.
+            V2 only. V3 arrays should use ``chunk_shape`` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"], optional
             The dimension separator (default is ".").
             V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
         order : Literal["C", "F"], optional
-            The order of the array (default is specified in the Zarr config `array.order`).
+            The order of the array (default is specified by ``array.order`` in :mod:`zarr.core.config`).
         filters : list[dict[str, JSON]], optional
             Sequence of filters to use to encode chunk data prior to compression.
             V2 only. V3 arrays should use ``codecs`` instead. If neither ``compressor``
@@ -449,7 +449,7 @@ async def create(
             - For numeric arrays, the default is ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
+            These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
         overwrite : bool, optional
             Whether to raise an error if the store already exists (default is False).
         data : npt.ArrayLike, optional
@@ -1516,19 +1516,19 @@ def create(
             - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
+            These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
         dimension_names : Iterable[str], optional
             The names of the dimensions (default is None).
             V3 only. V2 arrays should not use this parameter.
         chunks : ChunkCoords, optional
             The shape of the array's chunks.
-            V2 only. V3 arrays should use :attr:`chunk_shape` instead.
+            V2 only. V3 arrays should use ``chunk_shape`` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"], optional
             The dimension separator (default is ".").
             V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
         order : Literal["C", "F"], optional
-            The order of the array (default is specified in the Zarr config `array.order`).
+            The order of the array (default is specified by ``array.order`` in :mod:`zarr.core.config`).
         filters : list[dict[str, JSON]], optional
             Sequence of filters to use to encode chunk data prior to compression.
             V2 only. V3 arrays should use ``codecs`` instead. If neither ``compressor``
@@ -1541,7 +1541,7 @@ def create(
             - For numeric arrays, the default is ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
+            These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
         overwrite : bool, optional
             Whether to raise an error if the store already exists (default is False).
 
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 809d23d86c..b3ff8c6ceb 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -1,3 +1,15 @@
+"""
+The config module is responsible for managing the configuration of zarr and  is based on the Donfig python library.
+For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations
+in the registry and then select them in the config.
+e.g. an implementation of the bytes codec in a class "your.module.NewBytesCodec", requires the value of codecs.bytes
+to be "your.module.NewBytesCodec".
+Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations
+e.g. export ZARR_CODECS__BYTES="your.module.NewBytesCodec"
+(for more information see github.com/pytroll/donfig)
+Default values below point to the standard implementations of zarr-python
+"""
+
 from __future__ import annotations
 
 from typing import Any, Literal, cast
@@ -10,7 +22,7 @@ class BadConfigError(ValueError):
 
 
 class Config(DConfig):  # type: ignore[misc]
-    """Will collect configuration from config files and environment variables
+    """The Config will collect configuration from config files and environment variables
 
     Example environment variables:
     Grabs environment variables of the form "ZARR_FOO__BAR_BAZ=123" and
@@ -28,15 +40,6 @@ def reset(self) -> None:
         self.refresh()
 
 
-# The config module is responsible for managing the configuration of zarr and  is based on the Donfig python library.
-# For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations
-# in the registry and then select them in the config.
-# e.g. an implementation of the bytes codec in a class "your.module.NewBytesCodec", requires the value of codecs.bytes
-# to be "your.module.NewBytesCodec".
-# Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations
-# e.g. export ZARR_CODECS__BYTES="your.module.NewBytesCodec"
-# (for more information see github.com/pytroll/donfig)
-# Default values below point to the standard implementations of zarr-python
 config = Config(
     "zarr",
     defaults=[
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
index 8affa75a8f..ceb9c9a77b 100644
--- a/src/zarr/core/group.py
+++ b/src/zarr/core/group.py
@@ -1049,18 +1049,18 @@ async def create_array(
             - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
+            These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
-            V2 only. V3 arrays should use :attr:`chunk_shape` instead.
+            V2 only. V3 arrays should use ``chunk_shape`` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")
             V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
         order : Literal["C", "F"] | None = None
-            The memory order of the array (default is specified in the Zarr config `array.order`).
+            The memory order of the array (default is specified by ``array.order`` in :mod:`zarr.core.config`).
         filters : list[dict[str, JSON]] | None = None
             Sequence of filters to use to encode chunk data prior to compression.
             V2 only. V3 arrays should use ``codecs`` instead. If neither ``compressor``
@@ -1073,7 +1073,7 @@ async def create_array(
             - For numeric arrays, the default is ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
+            These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will
             be overwritten. If False, the presence of a pre-existing array or group is
@@ -2270,18 +2270,18 @@ def create_array(
             - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
+            These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
-            V2 only. V3 arrays should use :attr:`chunk_shape` instead.
+            V2 only. V3 arrays should use ``chunk_shape`` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")
             V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
         order : Literal["C", "F"] | None = None
-            The memory order of the array (default is specified in the Zarr config `array.order`).
+            The memory order of the array (default is specified by ``array.order`` in :mod:`zarr.core.config`).
         filters : list[dict[str, JSON]] | None = None
             Sequence of filters to use to encode chunk data prior to compression.
             V2 only. V3 arrays should use ``codecs`` instead. If neither ``compressor``
@@ -2294,7 +2294,7 @@ def create_array(
             - For numeric arrays, the default is ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
+            These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will
             be overwritten. If False, the presence of a pre-existing array or group is
@@ -2634,18 +2634,18 @@ def array(
             - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the `array.v3_default_codecs` variable in the Zarr config.
+            These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
             The shape of the chunks of the array.
-            V2 only. V3 arrays should use :attr:`chunk_shape` instead.
+            V2 only. V3 arrays should use ``chunk_shape`` instead.
             If not specified, default are guessed based on the shape and dtype.
         dimension_separator : Literal[".", "/"] | None = None
             The delimiter used for the chunk keys. (default: ".")
             V2 only. V3 arrays should use ``chunk_key_encoding`` instead.
         order : Literal["C", "F"] | None = None
-            The memory order of the array (default is specified in the Zarr config `array.order`).
+            The memory order of the array (default is specified by ``array.order`` in :mod:`zarr.core.config`).
         filters : list[dict[str, JSON]] | None = None
             Sequence of filters to use to encode chunk data prior to compression.
             V2 only. V3 arrays should use ``codecs`` instead. If neither ``compressor``
@@ -2658,7 +2658,7 @@ def array(
             - For numeric arrays, the default is ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the `array.v2_default_compressor` variable in the Zarr config.
+            These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will
             be overwritten. If False, the presence of a pre-existing array or group is

From 390c4354eb49ec00c9b7745de195ac2e6e381153 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 18 Dec 2024 16:29:06 +0100
Subject: [PATCH 32/35] improve docstring readability

---
 src/zarr/api/asynchronous.py | 11 ++++++++---
 src/zarr/core/array.py       | 20 ++++++++++++++++----
 src/zarr/core/config.py      | 29 ++++++++++++++++++++++-------
 src/zarr/core/group.py       | 30 ++++++++++++++++++++++++------
 4 files changed, 70 insertions(+), 20 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index 80a854ead8..cccbc27e0e 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -835,20 +835,25 @@ async def create(
         An iterable of Codec or dict serializations of Codecs. The elements of
         this collection specify the transformation from array values to stored bytes.
         V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
+
         If no codecs are provided, default codecs will be used:
+
         - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
         - For Unicode strings, the default is ``VLenUTF8Codec``.
         - For bytes or objects, the default is ``VLenBytesCodec``.
-        These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
+
+        These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
     compressor : Codec, optional
         Primary compressor to compress chunk data.
         V2 only. V3 arrays should use ``codecs`` instead.
+
         If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
+
         - For numeric arrays, the default is ``ZstdCodec``.
         - For Unicode strings, the default is ``VLenUTF8Codec``.
         - For bytes or objects, the default is ``VLenBytesCodec``.
-        These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
-    fill_value : object
+
+        These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`.    fill_value : object
         Default value to use for uninitialized portions of the array.
     order : {'C', 'F'}, optional
         Memory layout to be used within each chunk.
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index afc46866c6..3dc186846c 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -420,11 +420,14 @@ async def create(
             An iterable of Codec or dict serializations of Codecs. The elements of
             this collection specify the transformation from array values to stored bytes.
             V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
+
             If no codecs are provided, default codecs will be used:
+
             - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
+
+            These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
         dimension_names : Iterable[str], optional
             The names of the dimensions (default is None).
             V3 only. V2 arrays should not use this parameter.
@@ -445,11 +448,14 @@ async def create(
         compressor : dict[str, JSON], optional
             The compressor used to compress the data (default is None).
             V2 only. V3 arrays should use ``codecs`` instead.
+
             If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
+
             - For numeric arrays, the default is ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
+
+            These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`.
         overwrite : bool, optional
             Whether to raise an error if the store already exists (default is False).
         data : npt.ArrayLike, optional
@@ -1512,11 +1518,14 @@ def create(
             An iterable of Codec or dict serializations of Codecs. The elements of
             this collection specify the transformation from array values to stored bytes.
             V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
+
             If no codecs are provided, default codecs will be used:
+
             - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
+
+            These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
         dimension_names : Iterable[str], optional
             The names of the dimensions (default is None).
             V3 only. V2 arrays should not use this parameter.
@@ -1537,11 +1546,14 @@ def create(
         compressor : dict[str, JSON], optional
             Primary compressor to compress chunk data.
             V2 only. V3 arrays should use ``codecs`` instead.
+
             If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
+
             - For numeric arrays, the default is ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
+
+            These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`.
         overwrite : bool, optional
             Whether to raise an error if the store already exists (default is False).
 
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index b3ff8c6ceb..f9db5ab90f 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -1,13 +1,28 @@
 """
-The config module is responsible for managing the configuration of zarr and  is based on the Donfig python library.
+The config module is responsible for managing the configuration of zarr and is based on the Donfig python library.
 For selecting custom implementations of codecs, pipelines, buffers and ndbuffers, first register the implementations
 in the registry and then select them in the config.
-e.g. an implementation of the bytes codec in a class "your.module.NewBytesCodec", requires the value of codecs.bytes
-to be "your.module.NewBytesCodec".
-Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations
-e.g. export ZARR_CODECS__BYTES="your.module.NewBytesCodec"
-(for more information see github.com/pytroll/donfig)
-Default values below point to the standard implementations of zarr-python
+
+Example:
+    An implementation of the bytes codec in a class `your.module.NewBytesCodec` requires the value of `codecs.bytes`
+    to be `your.module.NewBytesCodec`.
+
+    ```python
+    from your.module import NewBytesCodec
+    from zarr.core.config import register_codec, config
+
+    register_codec("bytes", NewBytesCodec)
+    config.set({"codecs.bytes": "your.module.NewBytesCodec"})
+    ```
+
+Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations.
+For example, to set the bytes codec via an environment variable:
+
+    export ZARR_CODECS__BYTES="your.module.NewBytesCodec"
+
+For more information, see the Donfig documentation at https://github.com/pytroll/donfig.
+
+Default values below point to the standard implementations of zarr-python.
 """
 
 from __future__ import annotations
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
index ceb9c9a77b..2d7a21911a 100644
--- a/src/zarr/core/group.py
+++ b/src/zarr/core/group.py
@@ -1045,11 +1045,14 @@ async def create_array(
             An iterable of Codec or dict serializations of Codecs. The elements of
             this collection specify the transformation from array values to stored bytes.
             V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
+
             If no codecs are provided, default codecs will be used:
+
             - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
+
+            These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
@@ -1069,11 +1072,14 @@ async def create_array(
         compressor : dict[str, JSON] | None = None
             The compressor used to compress the data (default is None).
             V2 only. V3 arrays should use ``codecs`` instead.
+
             If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
+
             - For numeric arrays, the default is ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
+
+            These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will
             be overwritten. If False, the presence of a pre-existing array or group is
@@ -2266,11 +2272,14 @@ def create_array(
             An iterable of Codec or dict serializations of Codecs. The elements of
             this collection specify the transformation from array values to stored bytes.
             V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
+
             If no codecs are provided, default codecs will be used:
+
             - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
+
+            These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
@@ -2290,11 +2299,14 @@ def create_array(
         compressor : dict[str, JSON] | None = None
             The compressor used to compress the data (default is None).
             V2 only. V3 arrays should use ``codecs`` instead.
+
             If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
+
             - For numeric arrays, the default is ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
+
+            These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will
             be overwritten. If False, the presence of a pre-existing array or group is
@@ -2630,11 +2642,14 @@ def array(
             An iterable of Codec or dict serializations of Codecs. The elements of
             this collection specify the transformation from array values to stored bytes.
             V3 only. V2 arrays should use ``filters`` and ``compressor`` instead.
+
             If no codecs are provided, default codecs will be used:
+
             - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the ``array.v3_default_codecs`` variable in :mod:`zarr.core.config`.
+
+            These defaults can be changed by modifying the value of ``array.v3_default_codecs`` in :mod:`zarr.core.config`.
         dimension_names : Iterable[str] | None = None
             The names of the dimensions of the array. V3 only.
         chunks : ChunkCoords | None = None
@@ -2654,11 +2669,14 @@ def array(
         compressor : dict[str, JSON] | None = None
             The compressor used to compress the data (default is None).
             V2 only. V3 arrays should use ``codecs`` instead.
+
             If neither ``compressor`` nor ``filters`` are provided, a default compressor will be used:
+
             - For numeric arrays, the default is ``ZstdCodec``.
             - For Unicode strings, the default is ``VLenUTF8Codec``.
             - For bytes or objects, the default is ``VLenBytesCodec``.
-            These defaults can be changed using the ``array.v2_default_compressor`` variable in :mod:`zarr.core.config`.
+
+            These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`.
         overwrite : bool = False
             If True, a pre-existing array or group at the path of this array will
             be overwritten. If False, the presence of a pre-existing array or group is

From 35e35c4868533bc36053d76961a957dc5b1a30ef Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 18 Dec 2024 16:43:46 +0100
Subject: [PATCH 33/35] correct config docstring

---
 src/zarr/core/config.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index f9db5ab90f..a57fdfc077 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -4,20 +4,20 @@
 in the registry and then select them in the config.
 
 Example:
-    An implementation of the bytes codec in a class `your.module.NewBytesCodec` requires the value of `codecs.bytes`
-    to be `your.module.NewBytesCodec`.
+    An implementation of the bytes codec in a class ``your.module.NewBytesCodec`` requires the value of ``codecs.bytes``
+    to be ``your.module.NewBytesCodec``.
 
-    ```python
+.. code-block:: python
     from your.module import NewBytesCodec
     from zarr.core.config import register_codec, config
 
     register_codec("bytes", NewBytesCodec)
     config.set({"codecs.bytes": "your.module.NewBytesCodec"})
-    ```
 
 Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations.
 For example, to set the bytes codec via an environment variable:
 
+.. code-block:: bash
     export ZARR_CODECS__BYTES="your.module.NewBytesCodec"
 
 For more information, see the Donfig documentation at https://github.com/pytroll/donfig.

From 92de85caf3b6cc8295eecf068c86d64e8767b586 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 18 Dec 2024 16:46:46 +0100
Subject: [PATCH 34/35] correct config docstring

---
 src/zarr/core/config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index a57fdfc077..9fd0490862 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -8,6 +8,7 @@
     to be ``your.module.NewBytesCodec``.
 
 .. code-block:: python
+
     from your.module import NewBytesCodec
     from zarr.core.config import register_codec, config
 
@@ -18,6 +19,7 @@
 For example, to set the bytes codec via an environment variable:
 
 .. code-block:: bash
+
     export ZARR_CODECS__BYTES="your.module.NewBytesCodec"
 
 For more information, see the Donfig documentation at https://github.com/pytroll/donfig.

From 6fd3f25c108f5cd39b43b5f0837b73901d16cf67 Mon Sep 17 00:00:00 2001
From: brokkoli71 <brokkoli71b@gmail.com>
Date: Wed, 18 Dec 2024 17:16:22 +0100
Subject: [PATCH 35/35] improve config docstring

---
 src/zarr/core/config.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 9fd0490862..1feb4a6c2f 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -5,26 +5,26 @@
 
 Example:
     An implementation of the bytes codec in a class ``your.module.NewBytesCodec`` requires the value of ``codecs.bytes``
-    to be ``your.module.NewBytesCodec``.
+    to be ``your.module.NewBytesCodec``. Donfig can be configured programmatically, by environment variables, or from
+    YAML files in standard locations.
 
-.. code-block:: python
+    .. code-block:: python
 
-    from your.module import NewBytesCodec
-    from zarr.core.config import register_codec, config
+        from your.module import NewBytesCodec
+        from zarr.core.config import register_codec, config
 
-    register_codec("bytes", NewBytesCodec)
-    config.set({"codecs.bytes": "your.module.NewBytesCodec"})
+        register_codec("bytes", NewBytesCodec)
+        config.set({"codecs.bytes": "your.module.NewBytesCodec"})
 
-Donfig can be configured programmatically, by environment variables, or from YAML files in standard locations.
-For example, to set the bytes codec via an environment variable:
+    Instead of setting the value programmatically with ``config.set``, you can also set the value with an environment
+    variable. The environment variable ``ZARR_CODECS__BYTES`` can be set to ``your.module.NewBytesCodec``. The double
+    underscore ``__`` is used to indicate nested access.
 
-.. code-block:: bash
+    .. code-block:: bash
 
-    export ZARR_CODECS__BYTES="your.module.NewBytesCodec"
+        export ZARR_CODECS__BYTES="your.module.NewBytesCodec"
 
 For more information, see the Donfig documentation at https://github.com/pytroll/donfig.
-
-Default values below point to the standard implementations of zarr-python.
 """
 
 from __future__ import annotations
@@ -57,6 +57,7 @@ def reset(self) -> None:
         self.refresh()
 
 
+# The default configuration for zarr
 config = Config(
     "zarr",
     defaults=[