Skip to content

Commit

Permalink
Collection Metadata (#62)
Browse files Browse the repository at this point in the history
* Initial metadata implementation

* Initial extents with dask arrays

* More accurate to the spec version of the metadata

* Update readme

* More tests

* Suggestions, formatting

* Use projected dataset for extents

* Better coordinate handling in projections

* Adjust typing

* Better parameter filtering

* Extract metadata logic placeholder

* forgot metadata file

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add a whole bunch of pydantic

* Full pydantic typesafe collection metadata

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
mpiannucci and pre-commit-ci[bot] authored Nov 21, 2024
1 parent 221db07 commit 7a75087
Show file tree
Hide file tree
Showing 5 changed files with 569 additions and 24 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ This package attempts to follow [the spec](https://docs.ogc.org/is/19-086r6/19-0

### [collections](https://docs.ogc.org/is/19-086r6/19-086r6.html#_e55ba0f5-8f24-4f1b-a7e3-45775e39ef2e) and Resource Paths Support

`xpublish-edr` does not currently support the `/collections/{collectionId}/query` path template described in the spec. Instead the path resource appears as `/{dataset_id}/query`. This is because of the path structure of xpublish.
`xpublish-edr` does not currently support the `/collections/{collectionId}/query` path template described in the spec. Instead the path resource appears as `/{dataset_id}/edr/{query}`. This is because of the path structure of xpublish. In the future, if `xpublish` supports [`DataTree`](https://docs.xarray.dev/en/stable/generated/xarray.DataTree.html) it could provide a path to supporting the spec compliant `collections` resource path.

In the future, when `xpublish` supports [`DataTree`](https://docs.xarray.dev/en/stable/generated/xarray.DataTree.html) it will provide a path to supporting the spec compliant `collections` resource path.
However, despite the collections resource not existing, this implementation supports [collection metadata](https://docs.ogc.org/is/19-086r6/19-086r6.html#_5d07dde9-231a-4652-a1f3-dd036c337bdc) at the dataset level through the `/{dataset_id}/edr/` resource.

### Supported Queries

Expand Down
90 changes: 82 additions & 8 deletions tests/test_cf_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,74 @@ def test_cf_area_formats(cf_client):
assert "csv" in data, "csv is not a valid format"


def test_cf_metadata_query(cf_client):
response = cf_client.get("/datasets/air/edr/")
assert response.status_code == 200, "Response did not return successfully"
data = response.json()

assert data["id"] == "air", "The id should be air"
assert data["title"] == "4x daily NMC reanalysis (1948)", "The title is incorrect"
assert (
data["description"]
== "Data is from NMC initialized reanalysis\n(4x/day). These are the 0.9950 sigma level values."
), "The description is incorrect"
assert data["crs"] == ["EPSG:4326"], "The crs is incorrect"
assert set(data["output_formats"]) == {
"cf_covjson",
"nc",
"netcdf4",
"nc4",
"netcdf",
"csv",
"geojson",
}, "The output formats are incorrect"
assert (
"position" in data["data_queries"] and "area" in data["data_queries"]
), "The data queries are incorrect"

assert (
"temporal" and "spatial" in data["extent"]
), "Temporal and spatial extents should be present in extent"
assert (
"vertical" not in data["extent"]
), "Vertical extent should not be present in extent"

assert data["extent"]["temporal"]["interval"] == [
"2013-01-01T00:00:00",
"2013-01-01T18:00:00",
], "Temporal interval is incorrect"
assert (
data["extent"]["temporal"]["values"][0]
== "2013-01-01T00:00:00/2013-01-01T18:00:00"
), "Temporal values are incorrect"

assert data["extent"]["spatial"]["bbox"] == [
[200.0, 15.0, 322.5, 75.0],
], "Spatial bbox is incorrect"
assert data["extent"]["spatial"]["crs"] == "EPSG:4326", "Spatial CRS is incorrect"

assert "air" in data["parameter_names"], "Air parameter should be present"
assert "lat" not in data["parameter_names"], "lat should not be present"
assert "lon" not in data["parameter_names"], "lon should not be present"


def test_cf_metadata_query_temp_smoke_test(cf_client):
response = cf_client.get("/datasets/temp/edr/")
assert response.status_code == 200, "Response did not return successfully"
data = response.json()

assert data["id"] == "temp", "The id should be temp"
for key in (
"title",
"description",
"crs",
"extent",
"output_formats",
"data_queries",
):
assert key in data, f"Key {key} is not a top level key in the metadata response"


def test_cf_position_query(cf_client, cf_air_dataset, cf_temp_dataset):
x = 204
y = 44
Expand Down Expand Up @@ -122,14 +190,20 @@ def test_cf_position_query(cf_client, cf_air_dataset, cf_temp_dataset):

axes = data["domain"]["axes"]

npt.assert_array_almost_equal(
axes["x"]["values"],
[[x]],
), "Did not select nearby x coordinate"
npt.assert_array_almost_equal(
axes["y"]["values"],
[[y]],
), "Did not select a nearby y coordinate"
(
npt.assert_array_almost_equal(
axes["x"]["values"],
[[x]],
),
"Did not select nearby x coordinate",
)
(
npt.assert_array_almost_equal(
axes["y"]["values"],
[[y]],
),
"Did not select a nearby y coordinate",
)

temp_range = data["ranges"]["temp"]
assert temp_range["type"] == "NdArray", "Response range should be a NdArray"
Expand Down
54 changes: 45 additions & 9 deletions xpublish_edr/geometry/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import itertools
from functools import lru_cache
from typing import Union

import pyproj
import xarray as xr
Expand All @@ -16,6 +17,9 @@
transformer_from_crs = lru_cache(pyproj.Transformer.from_crs)


DEFAULT_CRS = pyproj.CRS.from_epsg(4326)


def coord_is_regular(da: xr.DataArray) -> bool:
"""
Check if the DataArray has a regular grid
Expand All @@ -30,6 +34,20 @@ def is_regular_xy_coords(ds: xr.Dataset) -> bool:
return coord_is_regular(ds.cf["X"]) and coord_is_regular(ds.cf["Y"])


def spatial_bounds(ds: xr.Dataset) -> tuple[float, float, float, float]:
"""
Get the spatial bounds of the dataset, naively, in whatever CRS it is in
"""
x = ds.cf["X"]
min_x = float(x.min().values)
max_x = float(x.max().values)

y = ds.cf["Y"]
min_y = float(y.min().values)
max_y = float(y.max().values)
return min_x, min_y, max_x, max_y


def dataset_crs(ds: xr.Dataset) -> pyproj.CRS:
grid_mapping_names = ds.cf.grid_mapping_names
if len(grid_mapping_names) == 0:
Expand Down Expand Up @@ -61,12 +79,15 @@ def project_geometry(ds: xr.Dataset, geometry_crs: str, geometry: Geometry) -> G
return transform(transformer.transform, geometry)


def project_dataset(ds: xr.Dataset, query_crs: str) -> xr.Dataset:
def project_dataset(ds: xr.Dataset, query_crs: Union[str, pyproj.CRS]) -> xr.Dataset:
"""
Project the dataset to the given CRS
"""
data_crs = dataset_crs(ds)
target_crs = pyproj.CRS.from_string(query_crs)
if isinstance(query_crs, pyproj.CRS):
target_crs = query_crs
else:
target_crs = pyproj.CRS.from_string(query_crs)
if data_crs == target_crs:
return ds

Expand All @@ -76,15 +97,23 @@ def project_dataset(ds: xr.Dataset, query_crs: str) -> xr.Dataset:
always_xy=True,
)

# TODO: Handle rotated pole
cf_coords = target_crs.coordinate_system.to_cf()
# Unpack the coordinates
try:
X = ds.cf["X"]
Y = ds.cf["Y"]
except KeyError:
# If the dataset has multiple X axes, we can try to find the right one
source_cf_coords = data_crs.coordinate_system.to_cf()

# Get the new X and Y coordinates
target_y_coord = next(coord for coord in cf_coords if coord["axis"] == "Y")
target_x_coord = next(coord for coord in cf_coords if coord["axis"] == "X")
source_x_coord = next(
coord["standard_name"] for coord in source_cf_coords if coord["axis"] == "X"
)
source_y_coord = next(
coord["standard_name"] for coord in source_cf_coords if coord["axis"] == "Y"
)

X = ds.cf["X"]
Y = ds.cf["Y"]
X = ds.cf[source_x_coord]
Y = ds.cf[source_y_coord]

# Transform the coordinates
# If the data is vectorized, we just transform the points in full
Expand All @@ -104,6 +133,13 @@ def project_dataset(ds: xr.Dataset, query_crs: str) -> xr.Dataset:
c for c in ds.coords if x_dim in ds[c].dims or y_dim in ds[c].dims
]

# TODO: Handle rotated pole
target_cf_coords = target_crs.coordinate_system.to_cf()

# Get the new X and Y coordinates
target_x_coord = next(coord for coord in target_cf_coords if coord["axis"] == "X")
target_y_coord = next(coord for coord in target_cf_coords if coord["axis"] == "Y")

target_x_coord_name = target_x_coord["standard_name"]
target_y_coord_name = target_y_coord["standard_name"]

Expand Down
Loading

0 comments on commit 7a75087

Please sign in to comment.