Collection Metadata (#62)

* Initial metadata implementation * Initial extents with dask arrays * More accurate to the spec version of the metadata * Update readme * More tests * Suggestions, formatting * Use projected dataset for extents * Better coordinate handling in projections * Adjust typing * Better parameter filtering * Extract metadata logic placeholder * forgot metadata file * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add a whole bunch of pydantic * Full pydantic typesafe collection metadata --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
xpublish-community · Nov 21, 2024 · 7a75087 · 7a75087
1 parent 221db07
commit 7a75087
Show file tree

Hide file tree

Showing 5 changed files with 569 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -55,9 +55,9 @@ This package attempts to follow [the spec](https://docs.ogc.org/is/19-086r6/19-0
 
 ### [collections](https://docs.ogc.org/is/19-086r6/19-086r6.html#_e55ba0f5-8f24-4f1b-a7e3-45775e39ef2e) and Resource Paths Support
 
-`xpublish-edr` does not currently support the `/collections/{collectionId}/query` path template described in the spec. Instead the path resource appears as `/{dataset_id}/query`. This is because of the path structure of xpublish.
+`xpublish-edr` does not currently support the `/collections/{collectionId}/query` path template described in the spec. Instead the path resource appears as `/{dataset_id}/edr/{query}`. This is because of the path structure of xpublish. In the future, if `xpublish` supports [`DataTree`](https://docs.xarray.dev/en/stable/generated/xarray.DataTree.html) it could provide a path to supporting the spec compliant `collections` resource path.
 
-In the future, when `xpublish` supports [`DataTree`](https://docs.xarray.dev/en/stable/generated/xarray.DataTree.html) it will provide a path to supporting the spec compliant `collections` resource path.
+ However, despite the collections resource not existing, this implementation supports [collection metadata](https://docs.ogc.org/is/19-086r6/19-086r6.html#_5d07dde9-231a-4652-a1f3-dd036c337bdc) at the dataset level through the `/{dataset_id}/edr/` resource.
 
 ### Supported Queries
 

diff --git a/tests/test_cf_router.py b/tests/test_cf_router.py
@@ -62,6 +62,74 @@ def test_cf_area_formats(cf_client):
     assert "csv" in data, "csv is not a valid format"
 
 
+def test_cf_metadata_query(cf_client):
+    response = cf_client.get("/datasets/air/edr/")
+    assert response.status_code == 200, "Response did not return successfully"
+    data = response.json()
+
+    assert data["id"] == "air", "The id should be air"
+    assert data["title"] == "4x daily NMC reanalysis (1948)", "The title is incorrect"
+    assert (
+        data["description"]
+        == "Data is from NMC initialized reanalysis\n(4x/day).  These are the 0.9950 sigma level values."
+    ), "The description is incorrect"
+    assert data["crs"] == ["EPSG:4326"], "The crs is incorrect"
+    assert set(data["output_formats"]) == {
+        "cf_covjson",
+        "nc",
+        "netcdf4",
+        "nc4",
+        "netcdf",
+        "csv",
+        "geojson",
+    }, "The output formats are incorrect"
+    assert (
+        "position" in data["data_queries"] and "area" in data["data_queries"]
+    ), "The data queries are incorrect"
+
+    assert (
+        "temporal" and "spatial" in data["extent"]
+    ), "Temporal and spatial extents should be present in extent"
+    assert (
+        "vertical" not in data["extent"]
+    ), "Vertical extent should not be present in extent"
+
+    assert data["extent"]["temporal"]["interval"] == [
+        "2013-01-01T00:00:00",
+        "2013-01-01T18:00:00",
+    ], "Temporal interval is incorrect"
+    assert (
+        data["extent"]["temporal"]["values"][0]
+        == "2013-01-01T00:00:00/2013-01-01T18:00:00"
+    ), "Temporal values are incorrect"
+
+    assert data["extent"]["spatial"]["bbox"] == [
+        [200.0, 15.0, 322.5, 75.0],
+    ], "Spatial bbox is incorrect"
+    assert data["extent"]["spatial"]["crs"] == "EPSG:4326", "Spatial CRS is incorrect"
+
+    assert "air" in data["parameter_names"], "Air parameter should be present"
+    assert "lat" not in data["parameter_names"], "lat should not be present"
+    assert "lon" not in data["parameter_names"], "lon should not be present"
+
+
+def test_cf_metadata_query_temp_smoke_test(cf_client):
+    response = cf_client.get("/datasets/temp/edr/")
+    assert response.status_code == 200, "Response did not return successfully"
+    data = response.json()
+
+    assert data["id"] == "temp", "The id should be temp"
+    for key in (
+        "title",
+        "description",
+        "crs",
+        "extent",
+        "output_formats",
+        "data_queries",
+    ):
+        assert key in data, f"Key {key} is not a top level key in the metadata response"
+
+
 def test_cf_position_query(cf_client, cf_air_dataset, cf_temp_dataset):
     x = 204
     y = 44
@@ -122,14 +190,20 @@ def test_cf_position_query(cf_client, cf_air_dataset, cf_temp_dataset):
 
     axes = data["domain"]["axes"]
 
-    npt.assert_array_almost_equal(
-        axes["x"]["values"],
-        [[x]],
-    ), "Did not select nearby x coordinate"
-    npt.assert_array_almost_equal(
-        axes["y"]["values"],
-        [[y]],
-    ), "Did not select a nearby y coordinate"
+    (
+        npt.assert_array_almost_equal(
+            axes["x"]["values"],
+            [[x]],
+        ),
+        "Did not select nearby x coordinate",
+    )
+    (
+        npt.assert_array_almost_equal(
+            axes["y"]["values"],
+            [[y]],
+        ),
+        "Did not select a nearby y coordinate",
+    )
 
     temp_range = data["ranges"]["temp"]
     assert temp_range["type"] == "NdArray", "Response range should be a NdArray"

diff --git a/xpublish_edr/geometry/common.py b/xpublish_edr/geometry/common.py
@@ -4,6 +4,7 @@
 
 import itertools
 from functools import lru_cache
+from typing import Union
 
 import pyproj
 import xarray as xr
@@ -16,6 +17,9 @@
 transformer_from_crs = lru_cache(pyproj.Transformer.from_crs)
 
 
+DEFAULT_CRS = pyproj.CRS.from_epsg(4326)
+
+
 def coord_is_regular(da: xr.DataArray) -> bool:
     """
     Check if the DataArray has a regular grid
@@ -30,6 +34,20 @@ def is_regular_xy_coords(ds: xr.Dataset) -> bool:
     return coord_is_regular(ds.cf["X"]) and coord_is_regular(ds.cf["Y"])
 
 
+def spatial_bounds(ds: xr.Dataset) -> tuple[float, float, float, float]:
+    """
+    Get the spatial bounds of the dataset, naively, in whatever CRS it is in
+    """
+    x = ds.cf["X"]
+    min_x = float(x.min().values)
+    max_x = float(x.max().values)
+
+    y = ds.cf["Y"]
+    min_y = float(y.min().values)
+    max_y = float(y.max().values)
+    return min_x, min_y, max_x, max_y
+
+
 def dataset_crs(ds: xr.Dataset) -> pyproj.CRS:
     grid_mapping_names = ds.cf.grid_mapping_names
     if len(grid_mapping_names) == 0:
@@ -61,12 +79,15 @@ def project_geometry(ds: xr.Dataset, geometry_crs: str, geometry: Geometry) -> G
     return transform(transformer.transform, geometry)
 
 
-def project_dataset(ds: xr.Dataset, query_crs: str) -> xr.Dataset:
+def project_dataset(ds: xr.Dataset, query_crs: Union[str, pyproj.CRS]) -> xr.Dataset:
     """
     Project the dataset to the given CRS
     """
     data_crs = dataset_crs(ds)
-    target_crs = pyproj.CRS.from_string(query_crs)
+    if isinstance(query_crs, pyproj.CRS):
+        target_crs = query_crs
+    else:
+        target_crs = pyproj.CRS.from_string(query_crs)
     if data_crs == target_crs:
         return ds
 
@@ -76,15 +97,23 @@ def project_dataset(ds: xr.Dataset, query_crs: str) -> xr.Dataset:
         always_xy=True,
     )
 
-    # TODO: Handle rotated pole
-    cf_coords = target_crs.coordinate_system.to_cf()
+    # Unpack the coordinates
+    try:
+        X = ds.cf["X"]
+        Y = ds.cf["Y"]
+    except KeyError:
+        # If the dataset has multiple X axes, we can try to find the right one
+        source_cf_coords = data_crs.coordinate_system.to_cf()
 
-    # Get the new X and Y coordinates
-    target_y_coord = next(coord for coord in cf_coords if coord["axis"] == "Y")
-    target_x_coord = next(coord for coord in cf_coords if coord["axis"] == "X")
+        source_x_coord = next(
+            coord["standard_name"] for coord in source_cf_coords if coord["axis"] == "X"
+        )
+        source_y_coord = next(
+            coord["standard_name"] for coord in source_cf_coords if coord["axis"] == "Y"
+        )
 
-    X = ds.cf["X"]
-    Y = ds.cf["Y"]
+        X = ds.cf[source_x_coord]
+        Y = ds.cf[source_y_coord]
 
     # Transform the coordinates
     # If the data is vectorized, we just transform the points in full
@@ -104,6 +133,13 @@ def project_dataset(ds: xr.Dataset, query_crs: str) -> xr.Dataset:
         c for c in ds.coords if x_dim in ds[c].dims or y_dim in ds[c].dims
     ]
 
+    # TODO: Handle rotated pole
+    target_cf_coords = target_crs.coordinate_system.to_cf()
+
+    # Get the new X and Y coordinates
+    target_x_coord = next(coord for coord in target_cf_coords if coord["axis"] == "X")
+    target_y_coord = next(coord for coord in target_cf_coords if coord["axis"] == "Y")
+
     target_x_coord_name = target_x_coord["standard_name"]
     target_y_coord_name = target_y_coord["standard_name"]