From 955a2d3000c942e42b55b9950aae6ea0254b1eff Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Mon, 28 Feb 2022 17:13:34 +0100 Subject: [PATCH 1/6] New function "add_spatial_ref" --- test/core/gridmapping/test_cfconv.py | 80 ++++++++++++++++++++++++++++ xcube/core/gridmapping/cfconv.py | 38 +++++++++++++ 2 files changed, 118 insertions(+) diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index 611562529..c72555ad7 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -2,13 +2,20 @@ import unittest from typing import Set +import fsspec import numpy as np import pyproj import xarray as xr +from test.s3test import MOTO_SERVER_ENDPOINT_URL +from test.s3test import S3Test +from xcube.core.gridmapping import GridMapping from xcube.core.gridmapping.cfconv import GridCoords from xcube.core.gridmapping.cfconv import GridMappingProxy +from xcube.core.gridmapping.cfconv import add_spatial_ref from xcube.core.gridmapping.cfconv import get_dataset_grid_mapping_proxies +from xcube.core.new import new_cube +from xcube.core.store.fs.registry import new_fs_data_store CRS_WGS84 = pyproj.crs.CRS(4326) CRS_CRS84 = pyproj.crs.CRS.from_string("urn:ogc:def:crs:OGC:1.3:CRS84") @@ -192,3 +199,76 @@ def _gen_2d(self): self.assertEqual(('y', 'x'), lon.dims) self.assertEqual(('y', 'x'), lat.dims) return noise, crs, lon, lat + + +class AddSpatialRefTest(S3Test): + + def test_add_spatial_ref(self): + original_dataset = new_cube(x_name='x', + y_name='y', + x_start=0, + y_start=0, + x_res=10, + y_res=10, + x_units='metres', + y_units='metres', + drop_bounds=True, + width=100, + height=100, + variables=dict(A=1.3, B=8.3)) + + root = 'eurodatacube-test/xcube-eea' + data_id = 'test.zarr' + crs_var_name = 'spatial_ref' + crs = pyproj.CRS.from_string("EPSG:3035") + + storage_options = dict( + anon=False, + client_kwargs=dict( + endpoint_url=MOTO_SERVER_ENDPOINT_URL, + ) + ) + + fs: fsspec.AbstractFileSystem = fsspec.filesystem('s3', + **storage_options) + if fs.isdir(root): + fs.rm(root, recursive=True) + fs.mkdirs(root, exist_ok=True) + + data_store = new_fs_data_store('s3', + root=root, + storage_options=storage_options) + + data_store.write_data(original_dataset, data_id=data_id) + opened_dataset = data_store.open_data(data_id) + self.assertEqual({'A', 'B', 'x', 'y', 'time'}, + set(opened_dataset.variables)) + + with self.assertRaises(ValueError) as cm: + GridMapping.from_dataset(opened_dataset) + self.assertEqual( + ('cannot find any grid mapping in dataset',), + cm.exception.args + ) + + path = f"{root}/{data_id}" + group_store = fs.get_mapper(path, create=True) + + add_spatial_ref(group_store, + crs, + crs_var_name=crs_var_name) + + self.assertTrue(fs.exists(f"{path}/{crs_var_name}")) + self.assertTrue(fs.exists(f"{path}/{crs_var_name}/.zarray")) + self.assertTrue(fs.exists(f"{path}/{crs_var_name}/.zattrs")) + + opened_dataset = data_store.open_data(data_id) + self.assertEqual({'A', 'B', 'x', 'y', 'time', 'spatial_ref'}, + set(opened_dataset.variables)) + self.assertEqual(crs_var_name, + opened_dataset.A.attrs.get('grid_mapping')) + self.assertEqual(crs_var_name, + opened_dataset.B.attrs.get('grid_mapping')) + + gm = GridMapping.from_dataset(opened_dataset) + self.assertIn("LAEA Europe", gm.crs.srs) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 52a1e8072..8da033a1e 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -22,8 +22,11 @@ import warnings from typing import Optional, Dict, Any, Hashable, Union, Set, List, Tuple +import numpy as np import pyproj import xarray as xr +import zarr +import zarr.convenience from xcube.core.schema import get_dataset_chunks @@ -285,3 +288,38 @@ def _find_dataset_tile_size(dataset: xr.Dataset, if tile_width is not None and tile_height is not None: return tile_width, tile_height return None + + +def add_spatial_ref(group_store: zarr.convenience.StoreLike, + crs: pyproj.CRS, + crs_var_name: Optional[str] = 'spatial_ref', + xy_dim_names: Optional[Tuple[str, str]] = None): + """ + Helper function that allows adding a spatial reference to an + existing Zarr dataset. + + :param group_store: The dataset's existing Zarr store or path. + :param crs: The coordinate reference system. + :param crs_var_name: The name of the variable that will hold the + spatial reference. Defaults to "spatial_ref". + :param xy_dim_names: The names of the x and y dimensions. + Defaults to ("x", "y"). + """ + spatial_attrs = crs.to_cf() + spatial_attrs['_ARRAY_DIMENSIONS'] = [] # Required by xarray + group = zarr.open(group_store, mode='r+') + spatial_ref = group.array(crs_var_name, + 0, + shape=(), + dtype=np.uint8, + fill_value=0) + spatial_ref.attrs.update(**spatial_attrs) + + yx_dim_names = list(reversed(xy_dim_names or ('x', 'y'))) + for item_name, item in group.items(): + if item_name != crs_var_name: + dims = item.attrs.get('_ARRAY_DIMENSIONS') + if dims and len(dims) >= 2 and dims[-2:] == yx_dim_names: + item.attrs['grid_mapping'] = crs_var_name + + zarr.convenience.consolidate_metadata(group_store) From e6012b14f1ce53558afe4352623fb3379faaf5e6 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 1 Mar 2022 10:22:27 +0100 Subject: [PATCH 2/6] Update --- CHANGES.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index ac85154e2..aba2e5223 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,11 @@ ### Enhancements +* Introduced helper function `add_spatial_ref()` + of package `xcube.core.gridmapping` that allows subsequently + adding a missing spatial coordinate reference system to an existing + Zarr dataset. (#629) + * Introduced parameter `base_dataset_id` for writing multi-level datasets with the "file", "s3", and "memory" data stores. If given, the base dataset will be linked only with the From d2c099b2533022cd7febfaffbd0424b20e3b7009 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 1 Mar 2022 10:24:08 +0100 Subject: [PATCH 3/6] Exporting add_spatial_ref() --- xcube/core/gridmapping/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xcube/core/gridmapping/__init__.py b/xcube/core/gridmapping/__init__.py index 6ba7c1246..b0935cdd9 100644 --- a/xcube/core/gridmapping/__init__.py +++ b/xcube/core/gridmapping/__init__.py @@ -23,3 +23,4 @@ from .base import CRS_WGS84 from .base import DEFAULT_TOLERANCE from .base import GridMapping +from .cfconv import add_spatial_ref From 52dddbae60a630a939f433021ac0935d6b1e7340 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 1 Mar 2022 10:59:01 +0100 Subject: [PATCH 4/6] Checking args, added tests --- test/core/gridmapping/test_cfconv.py | 92 +++++++++++++++++----------- xcube/core/gridmapping/cfconv.py | 21 ++++--- 2 files changed, 71 insertions(+), 42 deletions(-) diff --git a/test/core/gridmapping/test_cfconv.py b/test/core/gridmapping/test_cfconv.py index c72555ad7..4dff9be68 100644 --- a/test/core/gridmapping/test_cfconv.py +++ b/test/core/gridmapping/test_cfconv.py @@ -204,24 +204,35 @@ def _gen_2d(self): class AddSpatialRefTest(S3Test): def test_add_spatial_ref(self): - original_dataset = new_cube(x_name='x', - y_name='y', - x_start=0, - y_start=0, - x_res=10, - y_res=10, - x_units='metres', - y_units='metres', - drop_bounds=True, - width=100, - height=100, - variables=dict(A=1.3, B=8.3)) + self.assert_add_spatial_ref_ok(None, None) + self.assert_add_spatial_ref_ok(None, ('cx', 'cy')) + self.assert_add_spatial_ref_ok('crs', None) + self.assert_add_spatial_ref_ok('crs', ('cx', 'cy')) + + def assert_add_spatial_ref_ok(self, crs_var_name, xy_dim_names): root = 'eurodatacube-test/xcube-eea' data_id = 'test.zarr' - crs_var_name = 'spatial_ref' crs = pyproj.CRS.from_string("EPSG:3035") + if xy_dim_names: + x_name, y_name = xy_dim_names + else: + x_name, y_name = 'x', 'y' + + cube = new_cube(x_name=x_name, + y_name=y_name, + x_start=0, + y_start=0, + x_res=10, + y_res=10, + x_units='metres', + y_units='metres', + drop_bounds=True, + width=100, + height=100, + variables=dict(A=1.3, B=8.3)) + storage_options = dict( anon=False, client_kwargs=dict( @@ -239,13 +250,13 @@ def test_add_spatial_ref(self): root=root, storage_options=storage_options) - data_store.write_data(original_dataset, data_id=data_id) - opened_dataset = data_store.open_data(data_id) - self.assertEqual({'A', 'B', 'x', 'y', 'time'}, - set(opened_dataset.variables)) + data_store.write_data(cube, data_id=data_id) + cube = data_store.open_data(data_id) + self.assertEqual({'A', 'B', 'time', x_name, y_name}, + set(cube.variables)) with self.assertRaises(ValueError) as cm: - GridMapping.from_dataset(opened_dataset) + GridMapping.from_dataset(cube) self.assertEqual( ('cannot find any grid mapping in dataset',), cm.exception.args @@ -254,21 +265,32 @@ def test_add_spatial_ref(self): path = f"{root}/{data_id}" group_store = fs.get_mapper(path, create=True) - add_spatial_ref(group_store, - crs, - crs_var_name=crs_var_name) - - self.assertTrue(fs.exists(f"{path}/{crs_var_name}")) - self.assertTrue(fs.exists(f"{path}/{crs_var_name}/.zarray")) - self.assertTrue(fs.exists(f"{path}/{crs_var_name}/.zattrs")) - - opened_dataset = data_store.open_data(data_id) - self.assertEqual({'A', 'B', 'x', 'y', 'time', 'spatial_ref'}, - set(opened_dataset.variables)) - self.assertEqual(crs_var_name, - opened_dataset.A.attrs.get('grid_mapping')) - self.assertEqual(crs_var_name, - opened_dataset.B.attrs.get('grid_mapping')) - - gm = GridMapping.from_dataset(opened_dataset) + expected_crs_var_name = crs_var_name or 'spatial_ref' + + self.assertTrue(fs.exists(path)) + self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}")) + self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}/.zarray")) + self.assertFalse(fs.exists(f"{path}/{expected_crs_var_name}/.zattrs")) + + kwargs = {} + if crs_var_name is not None: + kwargs.update(crs_var_name=crs_var_name) + if xy_dim_names is not None: + kwargs.update(xy_dim_names=xy_dim_names) + add_spatial_ref(group_store, crs, **kwargs) + + self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}")) + self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}/.zarray")) + self.assertTrue(fs.exists(f"{path}/{expected_crs_var_name}/.zattrs")) + + cube = data_store.open_data(data_id) + self.assertEqual({'A', 'B', 'time', + x_name, y_name, expected_crs_var_name}, + set(cube.variables)) + self.assertEqual(expected_crs_var_name, + cube.A.attrs.get('grid_mapping')) + self.assertEqual(expected_crs_var_name, + cube.B.attrs.get('grid_mapping')) + + gm = GridMapping.from_dataset(cube) self.assertIn("LAEA Europe", gm.crs.srs) diff --git a/xcube/core/gridmapping/cfconv.py b/xcube/core/gridmapping/cfconv.py index 8da033a1e..2d5b995f3 100644 --- a/xcube/core/gridmapping/cfconv.py +++ b/xcube/core/gridmapping/cfconv.py @@ -20,6 +20,7 @@ # SOFTWARE. import warnings +from collections.abc import MutableMapping from typing import Optional, Dict, Any, Hashable, Union, Set, List, Tuple import numpy as np @@ -29,6 +30,7 @@ import zarr.convenience from xcube.core.schema import get_dataset_chunks +from xcube.util.assertions import assert_instance class GridCoords: @@ -290,7 +292,7 @@ def _find_dataset_tile_size(dataset: xr.Dataset, return None -def add_spatial_ref(group_store: zarr.convenience.StoreLike, +def add_spatial_ref(dataset_store: zarr.convenience.StoreLike, crs: pyproj.CRS, crs_var_name: Optional[str] = 'spatial_ref', xy_dim_names: Optional[Tuple[str, str]] = None): @@ -298,16 +300,20 @@ def add_spatial_ref(group_store: zarr.convenience.StoreLike, Helper function that allows adding a spatial reference to an existing Zarr dataset. - :param group_store: The dataset's existing Zarr store or path. - :param crs: The coordinate reference system. + :param dataset_store: The dataset's existing Zarr store or path. + :param crs: The spatial coordinate reference system. :param crs_var_name: The name of the variable that will hold the spatial reference. Defaults to "spatial_ref". :param xy_dim_names: The names of the x and y dimensions. Defaults to ("x", "y"). """ + assert_instance(dataset_store, (MutableMapping, str), name='group_store') + assert_instance(crs_var_name, str, name='crs_var_name') + x_dim_name, y_dim_name = xy_dim_names or ('x', 'y') + spatial_attrs = crs.to_cf() spatial_attrs['_ARRAY_DIMENSIONS'] = [] # Required by xarray - group = zarr.open(group_store, mode='r+') + group = zarr.open(dataset_store, mode='r+') spatial_ref = group.array(crs_var_name, 0, shape=(), @@ -315,11 +321,12 @@ def add_spatial_ref(group_store: zarr.convenience.StoreLike, fill_value=0) spatial_ref.attrs.update(**spatial_attrs) - yx_dim_names = list(reversed(xy_dim_names or ('x', 'y'))) for item_name, item in group.items(): if item_name != crs_var_name: dims = item.attrs.get('_ARRAY_DIMENSIONS') - if dims and len(dims) >= 2 and dims[-2:] == yx_dim_names: + if dims and len(dims) >= 2 \ + and dims[-2] == y_dim_name \ + and dims[-1] == x_dim_name: item.attrs['grid_mapping'] = crs_var_name - zarr.convenience.consolidate_metadata(group_store) + zarr.convenience.consolidate_metadata(dataset_store) From 414745e8b76994d4511970a2847bb5c91edaabe8 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 1 Mar 2022 11:19:18 +0100 Subject: [PATCH 5/6] Fixed imports --- CHANGES.md | 2 +- xcube/core/gridmapping/__init__.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index aba2e5223..8a350fc11 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,7 +3,7 @@ ### Enhancements * Introduced helper function `add_spatial_ref()` - of package `xcube.core.gridmapping` that allows subsequently + of package `xcube.core.gridmapping.cfconv` that allows subsequently adding a missing spatial coordinate reference system to an existing Zarr dataset. (#629) diff --git a/xcube/core/gridmapping/__init__.py b/xcube/core/gridmapping/__init__.py index b0935cdd9..6ba7c1246 100644 --- a/xcube/core/gridmapping/__init__.py +++ b/xcube/core/gridmapping/__init__.py @@ -23,4 +23,3 @@ from .base import CRS_WGS84 from .base import DEFAULT_TOLERANCE from .base import GridMapping -from .cfconv import add_spatial_ref From 29d10b20b4834d50c977f01ed5d6be120f693b0b Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Tue, 1 Mar 2022 17:22:32 +0100 Subject: [PATCH 6/6] Update CHANGES.md Co-authored-by: Tonio Fincke --- CHANGES.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 8a350fc11..013b9d8b4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,8 +3,8 @@ ### Enhancements * Introduced helper function `add_spatial_ref()` - of package `xcube.core.gridmapping.cfconv` that allows subsequently - adding a missing spatial coordinate reference system to an existing + of package `xcube.core.gridmapping.cfconv` that allows + adding a spatial coordinate reference system to an existing Zarr dataset. (#629) * Introduced parameter `base_dataset_id` for writing multi-level