Skip to content

Commit

Permalink
Don't automatically fall back to using zarr
Browse files Browse the repository at this point in the history
  • Loading branch information
oliverwm1 committed Jan 20, 2025
1 parent c10784f commit 57f76ff
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 54 deletions.
5 changes: 3 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ Can show information for a particular variable/array:
longname: surface temperature
units: K

Diagnostic information will also be printed for zarr arrays or zarr groups which do not represent xarray datasets:
Diagnostic information can also be printed for zarr arrays or zarr groups which do not
represent xarray datasets by using the `--zarr` flag:
::

$ zarrdump group.zarr
$ zarrdump --zarr group.zarr
Name : /
Type : zarr.hierarchy.Group
Read-only : False
Expand Down
55 changes: 30 additions & 25 deletions tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import zarrdump
from zarrdump.core import dump, _open_with_xarray_or_zarr
from zarrdump.core import dump

from click.testing import CliRunner
import fsspec
Expand Down Expand Up @@ -40,55 +40,60 @@ def write_group_to_zarr(consolidated=False):
return write_group_to_zarr


@pytest.mark.parametrize("consolidated", [True, False])
def test__open_with_xarray_or_zarr_on_zarr_group(tmp_zarr_group, consolidated):
group, path = tmp_zarr_group(consolidated=consolidated)
m = fsspec.get_mapper(path)
opened_group, is_xarray_dataset = _open_with_xarray_or_zarr(m, consolidated)
np.testing.assert_allclose(group["var1"], opened_group["var1"])
assert not is_xarray_dataset


@pytest.mark.parametrize("consolidated", [True, False])
def test__open_with_xarray_or_zarr_on_xarray_ds(tmp_xarray_ds, consolidated):
ds, path = tmp_xarray_ds(consolidated=consolidated)
m = fsspec.get_mapper(path)
opened_ds, is_xarray_dataset = _open_with_xarray_or_zarr(m, consolidated)
np.testing.assert_allclose(ds["var1"], opened_ds["var1"])
assert is_xarray_dataset


def test_dump_non_existent_url():
runner = CliRunner()
result = runner.invoke(dump, ["non/existent/path"])
assert result.exit_code == 1
assert result.output == "Error: No file or directory at non/existent/path\n"


@pytest.mark.parametrize("options", [[], ["-v", "var1"]])
def test_dump_executes_on_zarr_group(tmp_zarr_group, options):
@pytest.mark.parametrize("consolidated", [True, False])
@pytest.mark.parametrize("options", [["--zarr"], ["--zarr", "-v", "var1"]])
def test_dump_executes_on_zarr_group(tmp_zarr_group, consolidated, options):
runner = CliRunner()
_, path = tmp_zarr_group()
_, path = tmp_zarr_group(consolidated=consolidated)
result = runner.invoke(dump, [path] + options)
assert result.exit_code == 0
if "-v" in options:
assert "Array" in result.output
else:
assert "Group" in result.output


@pytest.mark.parametrize("consolidated", [True, False])
@pytest.mark.parametrize("options", [[], ["-v", "var1"], ["--info"]])
def test_dump_executes_on_xarray_dataset(tmp_xarray_ds, options):
def test_dump_executes_on_xarray_dataset(tmp_xarray_ds, consolidated, options):
runner = CliRunner()
_, path = tmp_xarray_ds()
_, path = tmp_xarray_ds(consolidated=consolidated)
result = runner.invoke(dump, [path] + options)
assert result.exit_code == 0

if "-v" in options:
expected_content = "<xarray.DataArray"
elif "--info" in options:
expected_content = "xarray.Dataset"
else:
expected_content = "<xarray.Dataset>"

assert expected_content in result.output

def test_dump_disallowed_options(tmp_xarray_ds):

def test_dump_disallowed_options_variable_info(tmp_xarray_ds):
runner = CliRunner()
_, path = tmp_xarray_ds()
result = runner.invoke(dump, [path, "-v", "var1", "-i"])
assert result.exit_code == 1
assert result.output == "Error: Cannot use both '-v' and '-i' options\n"


def test_dump_disallowed_options_zarr_info(tmp_zarr_group):
runner = CliRunner()
_, path = tmp_zarr_group()
result = runner.invoke(dump, [path, "-z", "-i"])
assert result.exit_code == 1
assert result.output == "Error: Cannot use both '-z' and '-i' options\n"


def test_dump_max_rows_default(tmp_xarray_ds):
runner = CliRunner()
_, path = tmp_xarray_ds(consolidated=True, n_vars=30)
Expand Down
65 changes: 38 additions & 27 deletions zarrdump/core.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,73 @@
from typing import Tuple, Union
from typing import Union

import click
import fsspec
import xarray as xr
import zarr
import zarr as zarr_pkg


@click.command()
@click.argument("url")
@click.option("-v", "--variable", type=str, help="Dump variable's info")
@click.option("-m", "--max-rows", default=999, help="Maximum number of rows to display")
@click.option("-i", "--info", is_flag=True, help="Use ncdump style")
def dump(url: str, variable: str, max_rows: int, info: bool):
@click.option(
"-z",
"--zarr",
is_flag=True,
help="Open the given URL using zarr-python instead of xarray.",
)
def dump(url: str, variable: str, max_rows: int, info: bool, zarr: bool):
fs, _, _ = fsspec.get_fs_token_paths(url)
if not fs.exists(url):
raise click.ClickException(f"No file or directory at {url}")

if info and zarr:
raise click.ClickException("Cannot use both '-z' and '-i' options")

if variable is not None and info:
raise click.ClickException("Cannot use both '-v' and '-i' options")

m = fs.get_mapper(url)
consolidated = _metadata_is_consolidated(m)
object_, object_is_xarray = _open_with_xarray_or_zarr(m, consolidated)

if zarr:
object_ = _open_with_zarr(m, consolidated)
else:
object_ = _open_with_xarray(m, consolidated)

if variable is not None:
if info:
raise click.ClickException("Cannot use both '-v' and '-i' options")
object_ = object_[variable]

if not object_is_xarray:
object_ = object_.info

if object_is_xarray and info:
object_.info()
if zarr:
print(object_.info)
else:
try:
with xr.set_options(display_max_rows=max_rows):
if info:
object_.info()
else:
if xr.__version__ >= "0.18.0":
with xr.set_options(display_max_rows=max_rows):
print(object_)
else:
# xarray<v0.18.0 does not have display_max_rows option
print(object_)
except ValueError:
# xarray<v0.18.0 does not have display_max_rows option
print(object_)


def _metadata_is_consolidated(m: fsspec.FSMap) -> bool:
try:
zarr.open_consolidated(m)
zarr_pkg.open_consolidated(m)
consolidated = True
except KeyError:
# group with un-consolidated metadata, or array
consolidated = False
return consolidated


def _open_with_xarray_or_zarr(
def _open_with_xarray(m: fsspec.FSMap, consolidated: bool) -> xr.Dataset:
return xr.open_zarr(m, consolidated=consolidated)


def _open_with_zarr(
m: fsspec.FSMap, consolidated: bool
) -> Tuple[Union[xr.Dataset, zarr.hierarchy.Group, zarr.core.Array], bool]:
try:
result = xr.open_zarr(m, consolidated=consolidated)
is_xarray_dataset = True
except (KeyError, TypeError):
# xarray requires _ARRAY_DIMENSIONS attribute, assuming missing if KeyError
result = zarr.open_consolidated(m) if consolidated else zarr.open(m)
is_xarray_dataset = False
return result, is_xarray_dataset
) -> Union[zarr_pkg.hierarchy.Group, zarr_pkg.core.Array]:
return zarr_pkg.open_consolidated(m) if consolidated else zarr_pkg.open(m)

0 comments on commit 57f76ff

Please sign in to comment.