Skip to content

Commit

Permalink
Fix combine from zarr with parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant committed Jan 17, 2024
1 parent 6fc1049 commit 66b9deb
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
5 changes: 4 additions & 1 deletion kerchunk/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,10 @@ def second_pass(self):
# a coordinate is any array appearing in its own or other array's _ARRAY_DIMENSIONS
skip.add(v)
for k in fs.ls(v, detail=False):
self.out[k] = fs.references[k]
if k.rsplit("/", 1)[-1].startswith(".z"):
self.out[k] = fs.cat(k)
else:
self.out[k] = fs.references[k]
continue

dont_skip.add(v) # don't check for coord or identical again
Expand Down
16 changes: 16 additions & 0 deletions kerchunk/tests/test_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import pandas as pd
import pytest
import xarray as xr
import fsspec.implementations.reference as reffs

import kerchunk.combine
import kerchunk.zarr
import kerchunk.utils

Expand Down Expand Up @@ -68,3 +70,17 @@ def test_zarr_in_zip(zarr_in_zip, ds):
"reference", fo=out, remote_protocol="zip", remote_options={"fo": zarr_in_zip}
)
assert isinstance(fs.references["temp/.zarray"], (str, bytes))


def test_zarr_combine(tmpdir, ds):
fn1 = f"{tmpdir}/test1.zarr"
ds.to_zarr(fn1)

one = kerchunk.zarr.ZarrToZarr(fn1, inline_threshold=0).translate()
fn = f"{tmpdir}/out.parq"
out = reffs.LazyReferenceMapper.create(fn)
mzz = kerchunk.combine.MultiZarrToZarr([one], concat_dims=["time"], out=out)
mzz.translate()

ds2 = xr.open_dataset(fn, engine="kerchunk")
assert ds.equals(ds2)

0 comments on commit 66b9deb

Please sign in to comment.