diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 80c3286c..dd83bea5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [38, 39, 310] + python-version: [39, 310, 311] steps: - uses: actions/checkout@v4 @@ -16,6 +16,9 @@ jobs: uses: mamba-org/setup-micromamba@v1 with: environment-file: ci/environment-py${{matrix.python-version}}.yml + cache-downloads: false + cache-environment: true + generate-run-shell: false - name: Install kerchunk shell: bash -l {0} run: | diff --git a/ci/environment-py310.yml b/ci/environment-py310.yml index 6c760a37..021b150f 100644 --- a/ci/environment-py310.yml +++ b/ci/environment-py310.yml @@ -1,7 +1,7 @@ name: test_env channels: - conda-forge - - defaults + - nodefaults dependencies: - python=3.10 - dask diff --git a/ci/environment-py38.yml b/ci/environment-py311.yml similarity index 93% rename from ci/environment-py38.yml rename to ci/environment-py311.yml index 3d276036..d680ae71 100644 --- a/ci/environment-py38.yml +++ b/ci/environment-py311.yml @@ -1,9 +1,9 @@ name: test_env channels: - conda-forge - - defaults + - nodefaults dependencies: - - python=3.8 + - python=3.11 - dask - zarr - xarray diff --git a/ci/environment-py39.yml b/ci/environment-py39.yml index f5f8f90d..e4ca09ad 100644 --- a/ci/environment-py39.yml +++ b/ci/environment-py39.yml @@ -1,7 +1,7 @@ name: test_env channels: - conda-forge - - defaults + - nodefaults dependencies: - python=3.9 - dask diff --git a/kerchunk/combine.py b/kerchunk/combine.py index 0aa4c68c..69ec3cc6 100644 --- a/kerchunk/combine.py +++ b/kerchunk/combine.py @@ -201,6 +201,7 @@ def append( ds = xr.open_dataset( fs.get_mapper(), engine="zarr", backend_kwargs={"consolidated": False} ) + z = zarr.open(fs.get_mapper()) mzz = MultiZarrToZarr( path, out=fs.references, # dict or parquet/lazy @@ -235,7 +236,7 @@ def append( mzz.coos[var].add(value2) else: - mzz.coos[var] = set(ds[var].values) + mzz.coos[var] = set(z[var][:]) return mzz @property @@ -336,6 +337,8 @@ def _get_value(self, index, z, var, fn=None): self.cf_units[var] = dict(units=units, calendar=calendar) else: o = selector # must be a non-number constant - error? + if var in self.coo_dtypes: + o = np.array(o, dtype=self.coo_dtypes[var]) logger.debug("Decode: %s -> %s", (selector, index, var, fn), o) return o diff --git a/kerchunk/tests/test_combine.py b/kerchunk/tests/test_combine.py index 3e08dc67..64e6e5c5 100644 --- a/kerchunk/tests/test_combine.py +++ b/kerchunk/tests/test_combine.py @@ -174,6 +174,19 @@ b'1970-01-01T00:00:00"}', ) +tdata1 = xr.DataArray( + data=arr, + coords={"time": np.array([3])}, + dims=["time", "x", "y"], + name="data", +) +xr.Dataset({"data": tdata1}).to_zarr("memory://cfstdtime3.zarr") +fs.pipe( + "cfstdtime3.zarr/time/.zattrs", + b'{"_ARRAY_DIMENSIONS": ["time"], "units": "seconds since ' + b'1970-01-01T00:00:00"}', +) + # cftime arrays - non standard tdata1 = xr.DataArray( data=arr, @@ -345,6 +358,51 @@ def test_single_append(refs): assert z.time.values.tolist() == [1, 2, 3] +@pytest.mark.parametrize("mapper", [{}, {"time": "cf:time"}]) +@pytest.mark.parametrize("dtype", [{"time": "M8[s]"}, {}]) +def test_single_append_cf(refs, mapper, dtype): + mzz = MultiZarrToZarr( + [refs["cfstdtime1"], refs["cfstdtime2"]], + remote_protocol="memory", + concat_dims=["time"], + coo_map=mapper, + coo_dtypes=dtype, + ) + out = mzz.translate() + mzz = MultiZarrToZarr.append( + [refs["cfstdtime3"]], + out, + remote_protocol="memory", + concat_dims=["time"], + coo_map=mapper, + coo_dtypes=dtype, + ) + out = mzz.translate() + z = xr.open_dataset( + "reference://", + backend_kwargs={ + "storage_options": {"fo": out, "remote_protocol": "memory"}, + "consolidated": False, + }, + engine="zarr", + ) + assert z.data.shape == (3, 10, 10) + assert out["refs"]["data/0.0.0"] == ["memory:///cfstdtime1.zarr/data/0.0.0"] + assert out["refs"]["data/1.0.0"] == ["memory:///cfstdtime2.zarr/data/0.0.0"] + assert out["refs"]["data/2.0.0"] == ["memory:///cfstdtime3.zarr/data/0.0.0"] + np.testing.assert_equal( + z.time.values, + np.array( + [ + "1970-01-01T00:00:01.000000000", + "1970-01-01T00:00:02.000000000", + "1970-01-01T00:00:03.000000000", + ], + dtype="datetime64[ns]", + ), + ) + + def test_single_append_parquet(refs): from fsspec.implementations.reference import LazyReferenceMapper diff --git a/kerchunk/xarray_backend.py b/kerchunk/xarray_backend.py index badf2907..ca377f6d 100644 --- a/kerchunk/xarray_backend.py +++ b/kerchunk/xarray_backend.py @@ -6,21 +6,14 @@ class KerchunkBackend(BackendEntrypoint): def open_dataset( - self, - filename_or_obj, - *, - drop_variables=None, - storage_options=None, - open_dataset_options=None + self, filename_or_obj, *, storage_options=None, open_dataset_options=None, **kw ): - + open_dataset_options = (open_dataset_options or {}) | kw ref_ds = open_reference_dataset( filename_or_obj, storage_options=storage_options, open_dataset_options=open_dataset_options, ) - if drop_variables is not None: - ref_ds = ref_ds.drop_vars(drop_variables) return ref_ds open_dataset_parameters = [