Skip to content

Commit

Permalink
Merge pull request #414 from maxrjones/append-time
Browse files Browse the repository at this point in the history
Add test for appending with cftime
  • Loading branch information
martindurant authored Feb 2, 2024
2 parents 0636846 + 55496d8 commit 23c1146
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 15 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [38, 39, 310]
python-version: [39, 310, 311]

steps:
- uses: actions/checkout@v4
- name: Setup conda
uses: mamba-org/setup-micromamba@v1
with:
environment-file: ci/environment-py${{matrix.python-version}}.yml
cache-downloads: false
cache-environment: true
generate-run-shell: false
- name: Install kerchunk
shell: bash -l {0}
run: |
Expand Down
2 changes: 1 addition & 1 deletion ci/environment-py310.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: test_env
channels:
- conda-forge
- defaults
- nodefaults
dependencies:
- python=3.10
- dask
Expand Down
4 changes: 2 additions & 2 deletions ci/environment-py38.yml → ci/environment-py311.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: test_env
channels:
- conda-forge
- defaults
- nodefaults
dependencies:
- python=3.8
- python=3.11
- dask
- zarr
- xarray
Expand Down
2 changes: 1 addition & 1 deletion ci/environment-py39.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: test_env
channels:
- conda-forge
- defaults
- nodefaults
dependencies:
- python=3.9
- dask
Expand Down
5 changes: 4 additions & 1 deletion kerchunk/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ def append(
ds = xr.open_dataset(
fs.get_mapper(), engine="zarr", backend_kwargs={"consolidated": False}
)
z = zarr.open(fs.get_mapper())
mzz = MultiZarrToZarr(
path,
out=fs.references, # dict or parquet/lazy
Expand Down Expand Up @@ -235,7 +236,7 @@ def append(
mzz.coos[var].add(value2)

else:
mzz.coos[var] = set(ds[var].values)
mzz.coos[var] = set(z[var][:])
return mzz

@property
Expand Down Expand Up @@ -336,6 +337,8 @@ def _get_value(self, index, z, var, fn=None):
self.cf_units[var] = dict(units=units, calendar=calendar)
else:
o = selector # must be a non-number constant - error?
if var in self.coo_dtypes:
o = np.array(o, dtype=self.coo_dtypes[var])
logger.debug("Decode: %s -> %s", (selector, index, var, fn), o)
return o

Expand Down
58 changes: 58 additions & 0 deletions kerchunk/tests/test_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,19 @@
b'1970-01-01T00:00:00"}',
)

tdata1 = xr.DataArray(
data=arr,
coords={"time": np.array([3])},
dims=["time", "x", "y"],
name="data",
)
xr.Dataset({"data": tdata1}).to_zarr("memory://cfstdtime3.zarr")
fs.pipe(
"cfstdtime3.zarr/time/.zattrs",
b'{"_ARRAY_DIMENSIONS": ["time"], "units": "seconds since '
b'1970-01-01T00:00:00"}',
)

# cftime arrays - non standard
tdata1 = xr.DataArray(
data=arr,
Expand Down Expand Up @@ -345,6 +358,51 @@ def test_single_append(refs):
assert z.time.values.tolist() == [1, 2, 3]


@pytest.mark.parametrize("mapper", [{}, {"time": "cf:time"}])
@pytest.mark.parametrize("dtype", [{"time": "M8[s]"}, {}])
def test_single_append_cf(refs, mapper, dtype):
mzz = MultiZarrToZarr(
[refs["cfstdtime1"], refs["cfstdtime2"]],
remote_protocol="memory",
concat_dims=["time"],
coo_map=mapper,
coo_dtypes=dtype,
)
out = mzz.translate()
mzz = MultiZarrToZarr.append(
[refs["cfstdtime3"]],
out,
remote_protocol="memory",
concat_dims=["time"],
coo_map=mapper,
coo_dtypes=dtype,
)
out = mzz.translate()
z = xr.open_dataset(
"reference://",
backend_kwargs={
"storage_options": {"fo": out, "remote_protocol": "memory"},
"consolidated": False,
},
engine="zarr",
)
assert z.data.shape == (3, 10, 10)
assert out["refs"]["data/0.0.0"] == ["memory:///cfstdtime1.zarr/data/0.0.0"]
assert out["refs"]["data/1.0.0"] == ["memory:///cfstdtime2.zarr/data/0.0.0"]
assert out["refs"]["data/2.0.0"] == ["memory:///cfstdtime3.zarr/data/0.0.0"]
np.testing.assert_equal(
z.time.values,
np.array(
[
"1970-01-01T00:00:01.000000000",
"1970-01-01T00:00:02.000000000",
"1970-01-01T00:00:03.000000000",
],
dtype="datetime64[ns]",
),
)


def test_single_append_parquet(refs):
from fsspec.implementations.reference import LazyReferenceMapper

Expand Down
11 changes: 2 additions & 9 deletions kerchunk/xarray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,14 @@

class KerchunkBackend(BackendEntrypoint):
def open_dataset(
self,
filename_or_obj,
*,
drop_variables=None,
storage_options=None,
open_dataset_options=None
self, filename_or_obj, *, storage_options=None, open_dataset_options=None, **kw
):

open_dataset_options = (open_dataset_options or {}) | kw
ref_ds = open_reference_dataset(
filename_or_obj,
storage_options=storage_options,
open_dataset_options=open_dataset_options,
)
if drop_variables is not None:
ref_ds = ref_ds.drop_vars(drop_variables)
return ref_ds

open_dataset_parameters = [
Expand Down

0 comments on commit 23c1146

Please sign in to comment.