From 66b9deb963cde9513bed920e944cc4f76af1afdc Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 16 Jan 2024 21:19:47 -0500 Subject: [PATCH] Fix combine from zarr with parquet --- kerchunk/combine.py | 5 ++++- kerchunk/tests/test_zarr.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/kerchunk/combine.py b/kerchunk/combine.py index 898d0ab3..08a0ea0d 100644 --- a/kerchunk/combine.py +++ b/kerchunk/combine.py @@ -459,7 +459,10 @@ def second_pass(self): # a coordinate is any array appearing in its own or other array's _ARRAY_DIMENSIONS skip.add(v) for k in fs.ls(v, detail=False): - self.out[k] = fs.references[k] + if k.rsplit("/", 1)[-1].startswith(".z"): + self.out[k] = fs.cat(k) + else: + self.out[k] = fs.references[k] continue dont_skip.add(v) # don't check for coord or identical again diff --git a/kerchunk/tests/test_zarr.py b/kerchunk/tests/test_zarr.py index bc7becd2..1280993a 100644 --- a/kerchunk/tests/test_zarr.py +++ b/kerchunk/tests/test_zarr.py @@ -3,7 +3,9 @@ import pandas as pd import pytest import xarray as xr +import fsspec.implementations.reference as reffs +import kerchunk.combine import kerchunk.zarr import kerchunk.utils @@ -68,3 +70,17 @@ def test_zarr_in_zip(zarr_in_zip, ds): "reference", fo=out, remote_protocol="zip", remote_options={"fo": zarr_in_zip} ) assert isinstance(fs.references["temp/.zarray"], (str, bytes)) + + +def test_zarr_combine(tmpdir, ds): + fn1 = f"{tmpdir}/test1.zarr" + ds.to_zarr(fn1) + + one = kerchunk.zarr.ZarrToZarr(fn1, inline_threshold=0).translate() + fn = f"{tmpdir}/out.parq" + out = reffs.LazyReferenceMapper.create(fn) + mzz = kerchunk.combine.MultiZarrToZarr([one], concat_dims=["time"], out=out) + mzz.translate() + + ds2 = xr.open_dataset(fn, engine="kerchunk") + assert ds.equals(ds2)