diff --git a/CHANGES.md b/CHANGES.md index b9f444970..20eef50e8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,11 @@ if it encountered datasets it could not open. New behaviour is to emit a warning and ignore such datasets. (#630) +* Introduced helper function `add_spatial_ref()` + of package `xcube.core.gridmapping.cfconv` that allows + adding a spatial coordinate reference system to an existing + Zarr dataset. (#629) + * Introduced parameter `base_dataset_id` for writing multi-level datasets with the "file", "s3", and "memory" data stores. If given, the base dataset will be linked only with the diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index 611562529..4dff9be68 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -2,13 +2,20 @@ import unittest from typing import Set +import fsspec import numpy as np import pyproj import xarray as xr +from test.s3test import MOTO_SERVER_ENDPOINT_URL +from test.s3test import S3Test +from xcube.core.gridmapping import GridMapping from xcube.core.gridmapping.cfconv import GridCoords from xcube.core.gridmapping.cfconv import GridMappingProxy +from xcube.core.gridmapping.cfconv import add_spatial_ref from xcube.core.gridmapping.cfconv import get_dataset_grid_mapping_proxies +from xcube.core.new import new_cube +from xcube.core.store.fs.registry import new_fs_data_store CRS_WGS84 = pyproj.crs.CRS(4326) CRS_CRS84 = pyproj.crs.CRS.from_string("urn:ogc:def:crs:OGC:1.3:CRS84") @@ -192,3 +199,98 @@ def _gen_2d(self): self.assertEqual(('y', 'x'), lon.dims) self.assertEqual(('y', 'x'), lat.dims) return noise, crs, lon, lat + + +class AddSpatialRefTest(S3Test): + + def test_add_spatial_ref(self): + self.assert_add_spatial_ref_ok(None, None) + self.assert_add_spatial_ref_ok(None, ('cx', 'cy')) + self.assert_add_spatial_ref_ok('crs', None) + self.assert_add_spatial_ref_ok('crs', ('cx', 'cy')) + + def assert_add_spatial_ref_ok(self, crs_var_name, xy_dim_names): + + root = 'eurodatacube-test/xcube-eea' + data_id = 'test.zarr' + crs = pyproj.CRS.from_string("EPSG:3035") + + if xy_dim_names: + x_name, y_name = xy_dim_names + else: + x_name, y_name = 'x', 'y' + + cube = new_cube(x_name=x_name, + y_name=y_name, + x_start=0, + y_start=0, + x_res=10, + y_res=10, + x_units='metres', + y_units='metres', + drop_bounds=True, + width=100, + height=100, + variables=dict(A=1.3, B=8.3)) + + storage_options = dict( + anon=False, + client_kwargs=dict( + endpoint_url=MOTO_SERVER_ENDPOINT_URL, + ) + ) + + fs: fsspec.AbstractFileSystem = fsspec.filesystem('s3', + **storage_options) + if fs.isdir(root): + fs.rm(root, recursive=True) + fs.mkdirs(root, exist_ok=True) + + data_store = new_fs_data_store('s3', + root=root, + storage_options=storage_options) + + data_store.write_data(cube, data_id=data_id) + cube = data_store.open_data(data_id) + self.assertEqual({'A', 'B', 'time', x_name, y_name}, + set(cube.variables)) + + with self.assertRaises(ValueError) as cm: + GridMapping.from_dataset(cube) + self.assertEqual( + ('cannot find any grid mapping in dataset',), + cm.exception.args + ) + + path = f"{root}/{data_id}" + group_store = fs.get_mapper(path, create=True) + + expected_crs_var_name = crs_var_name or 'spatial_ref' + + self.assertTrue(fs.exists(path)) + self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}")) + self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}/.zarray")) + self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}/.zattrs")) + + kwargs = {} + if crs_var_name is not None: + kwargs.update(crs_var_name=crs_var_name) + if xy_dim_names is not None: + kwargs.update(xy_dim_names=xy_dim_names) + add_spatial_ref(group_store, crs, **kwargs) + + self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}")) + self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}/.zarray")) + self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}/.zattrs")) + + cube = data_store.open_data(data_id) + self.assertEqual({'A', 'B', 'time', + x_name, y_name, expected_crs_var_name}, + set(cube.variables)) + self.assertEqual(expected_crs_var_name, + cube.A.attrs.get('grid_mapping')) + self.assertEqual(expected_crs_var_name, + cube.B.attrs.get('grid_mapping')) + + gm = GridMapping.from_dataset(cube) + self.assertIn("LAEA Europe", gm.crs.srs) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 52a1e8072..2d5b995f3 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -20,12 +20,17 @@ # SOFTWARE. import warnings +from collections.abc import MutableMapping from typing import Optional, Dict, Any, Hashable, Union, Set, List, Tuple +import numpy as np import pyproj import xarray as xr +import zarr +import zarr.convenience from xcube.core.schema import get_dataset_chunks +from xcube.util.assertions import assert_instance class GridCoords: @@ -285,3 +290,43 @@ def _find_dataset_tile_size(dataset: xr.Dataset, if tile_width is not None and tile_height is not None: return tile_width, tile_height return None + + +def add_spatial_ref(dataset_store: zarr.convenience.StoreLike, + crs: pyproj.CRS, + crs_var_name: Optional[str] = 'spatial_ref', + xy_dim_names: Optional[Tuple[str, str]] = None): + """ + Helper function that allows adding a spatial reference to an + existing Zarr dataset. + + :param dataset_store: The dataset's existing Zarr store or path. + :param crs: The spatial coordinate reference system. + :param crs_var_name: The name of the variable that will hold the + spatial reference. Defaults to "spatial_ref". + :param xy_dim_names: The names of the x and y dimensions. + Defaults to ("x", "y"). + """ + assert_instance(dataset_store, (MutableMapping, str), name='group_store') + assert_instance(crs_var_name, str, name='crs_var_name') + x_dim_name, y_dim_name = xy_dim_names or ('x', 'y') + + spatial_attrs = crs.to_cf() + spatial_attrs['_ARRAY_DIMENSIONS'] = [] # Required by xarray + group = zarr.open(dataset_store, mode='r+') + spatial_ref = group.array(crs_var_name, + 0, + shape=(), + dtype=np.uint8, + fill_value=0) + spatial_ref.attrs.update(**spatial_attrs) + + for item_name, item in group.items(): + if item_name != crs_var_name: + dims = item.attrs.get('_ARRAY_DIMENSIONS') + if dims and len(dims) >= 2 \ + and dims[-2] == y_dim_name \ + and dims[-1] == x_dim_name: + item.attrs['grid_mapping'] = crs_var_name + + zarr.convenience.consolidate_metadata(dataset_store)