Skip to content

Commit

Permalink
Add initial support for controlling selection interpolation scheme (#54)
Browse files Browse the repository at this point in the history
* Initial selection method control support

* lint

* Add tests for interp select

* More tests

* More tests

* Update readme to document spec compliance and query parameters

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* lint

* Add more detail

* Update README.md

Co-authored-by: Alex Kerney <[email protected]>

* Notes about path resource

* lint

* More notes on area query

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Alex Kerney <[email protected]>
  • Loading branch information
3 people authored Nov 7, 2024
1 parent 0e2530d commit a791ee2
Show file tree
Hide file tree
Showing 7 changed files with 210 additions and 18 deletions.
44 changes: 44 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,50 @@ rest = xpublish.Rest(
```


## OGC EDR Spec Compliance

This package attempts to follow [the spec](https://docs.ogc.org/is/19-086r6/19-086r6.html) where reasonable, adding functionality where the value is demonstrable.

### [collections](https://docs.ogc.org/is/19-086r6/19-086r6.html#_e55ba0f5-8f24-4f1b-a7e3-45775e39ef2e) and Resource Paths Support

`xpublish-edr` does not currently support the `/collections/{collectionId}/query` path template described in the spec. Instead the path resource appears as `/{dataset_id}/query`. This is because of the path structure of xpublish.

In the future, when `xpublish` supports [`DataTree`](https://docs.xarray.dev/en/stable/generated/xarray.DataTree.html) it will provide a path to supporting the spec compliant `collections` resource path.

### Supported Queries

[8.2.1 Position query](https://docs.ogc.org/is/19-086r6/19-086r6.html#_bbda46d4-04c5-426b-bea3-230d592fe1c2)

| Query | Compliant | Comments
| ------------- | ------------- | ------------- |
| `coords` || |
| `z` || |
| `datetime` || |
| `parameter-name` || |
| `crs` || Not currently supported, all coordinates should be in the reference system of the queried dataset |
| `parameter-name` || |
| `f` || |
| `method` || Optional: controls data selection. Use "nearest" for nearest neighbor selection, or "linear" for interpolated selection. Uses `nearest` if not specified |

> Any additional query parameters are assumed to be additional selections to make on the dimensions/coordinates. These queries will use the specified selections `method`.
[8.2.3 Area query](https://docs.ogc.org/is/19-086r6/19-086r6.html#_c92d1888-dc80-454f-8452-e2f070b90dcd)

| Query | Compliant | Comments
| ------------- | ------------- | ------------- |
| `coords` || Only `POLYGON` supported currently |
| `z` || |
| `datetime` || |
| `parameter-name` || |
| `crs` || Not currently supported, all coordinates should be in the reference system of the queried dataset |
| `parameter-name` || |
| `f` || |
| `method` || Optional: controls data selection. Use "nearest" for nearest neighbor selection, or "linear" for interpolated selection. Uses `nearest` if not specified |

> `method` is not applicable for the coordinates of area queries, only for selecting datetime, z, or additional dimensions.
For `POLYGON` coordinates, points that are located within **OR** on the polygons boundary are included in the response.

## Get in touch

Report bugs, suggest features or view the source code on [GitHub](https://github.com/gulfofmaine/xpublish-edr/issues).
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
geopandas
scipy
shapely
xarray
xpublish>=0.3.0,<0.4.0
50 changes: 50 additions & 0 deletions tests/test_cf_router.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy.testing as npt
import pytest
import xpublish
from fastapi.testclient import TestClient
Expand Down Expand Up @@ -127,6 +128,55 @@ def test_cf_position_csv(cf_client):
assert key in csv_data[0], f"column {key} should be in the header"


def test_cf_position_csv_interpolate(cf_client):
x = 204
y = 44
response = cf_client.get(
f"/datasets/air/edr/position?coords=POINT({x} {y})&f=csv&method=linear",
)

assert response.status_code == 200, "Response did not return successfully"
assert (
"text/csv" in response.headers["content-type"]
), "The content type should be set as a CSV"
assert (
"attachment" in response.headers["content-disposition"]
), "The response should be set as an attachment to trigger download"
assert (
"position.csv" in response.headers["content-disposition"]
), "The file name should be position.csv"

csv_data = [
line.split(",") for line in response.content.decode("utf-8").splitlines()
]

assert (
len(csv_data) == 5
), "There should be 4 data rows (one for each time step), and one header row"
for key in ("time", "lat", "lon", "air", "cell_area"):
assert key in csv_data[0], f"column {key} should be in the header"

lon_index = csv_data[0].index("lon")
lons = [float(row[lon_index]) for row in csv_data[1:]]
(
npt.assert_array_equal(
lons,
[204.0, 204.0, 204.0, 204.0],
),
"Longitude should be interpolated as 204.0",
)

lat_index = csv_data[0].index("lat")
lats = [float(row[lat_index]) for row in csv_data[1:]]
(
npt.assert_array_almost_equal(
lats,
[44.0, 44.0, 44.0, 44.0],
),
"Latitude should be interpolated as 44.0",
)


def test_cf_position_nc(cf_client):
x = 204
y = 44
Expand Down
76 changes: 72 additions & 4 deletions tests/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def test_select_query(regular_xy_dataset):
coords="POINT(200 45)",
datetime="2013-01-01T06:00:00/2013-01-01T12:00:00",
parameters="air,time",
method="nearest",
)

ds = query.select(regular_xy_dataset, query_params)
Expand All @@ -57,6 +58,25 @@ def test_select_query(regular_xy_dataset):
)
assert ds["air"].shape == (2, 25, 53), "Dataset shape is incorrect"

query = EDRQuery(
coords="POINT(203 46)",
datetime="2013-01-01T08:00:00",
parameters="air,time",
method="linear",
)

ds = query.select(regular_xy_dataset, query_params)
(
npt.assert_array_equal(
ds["time"],
np.array(
["2013-01-01T08:00:00"],
dtype="datetime64[ns]",
),
),
"Time is incorrect",
)


def test_select_query_error(regular_xy_dataset):
query = EDRQuery(
Expand Down Expand Up @@ -88,6 +108,15 @@ def test_select_query_error(regular_xy_dataset):
with pytest.raises(KeyError):
query.select(regular_xy_dataset, {})

with pytest.raises(ValueError):
query = EDRQuery(
coords="POINT(200 45)",
datetime="2013-01-01T06:00:00",
parameters="air",
z="100",
method="foo",
)


def test_select_position_regular_xy(regular_xy_dataset):
point = Point((204, 44))
Expand All @@ -105,6 +134,22 @@ def test_select_position_regular_xy(regular_xy_dataset):
npt.assert_approx_equal(ds["air"][-1], 279.19), "Temperature is incorrect"


def test_select_position_regular_xy_interpolate(regular_xy_dataset):
point = Point((204, 44))
ds = select_by_position(regular_xy_dataset, point, method="linear")

assert ds is not None, "Dataset was not returned"
assert "air" in ds, "Dataset does not contain the air variable"
assert "lat" in ds, "Dataset does not contain the lat variable"
assert "lon" in ds, "Dataset does not contain the lon variable"

assert ds["air"].shape == ds["time"].shape, "Dataset shape is incorrect"
npt.assert_array_equal(ds["lat"], 44.0), "Latitude is incorrect"
npt.assert_array_equal(ds["lon"], 204.0), "Longitude is incorrect"
npt.assert_approx_equal(ds["air"][0], 281.376), "Temperature is incorrect"
npt.assert_approx_equal(ds["air"][-1], 279.87), "Temperature is incorrect"


def test_select_position_regular_xy_multi(regular_xy_dataset):
points = MultiPoint([(202, 45), (205, 48)])
ds = select_by_position(regular_xy_dataset, points)
Expand All @@ -116,10 +161,33 @@ def test_select_position_regular_xy_multi(regular_xy_dataset):

npt.assert_array_equal(ds["lat"], [45.0, 47.5]), "Latitude is incorrect"
npt.assert_array_equal(ds["lon"], [202.5, 205.0]), "Longitude is incorrect"
npt.assert_array_equal(
ds["air"].isel(time=2).values,
[279.1, 278.6],
), "Temperature is incorrect"
(
npt.assert_array_equal(
ds["air"].isel(time=2).values,
[279.1, 278.6],
),
"Temperature is incorrect",
)


def test_select_position_regular_xy_multi_interpolate(regular_xy_dataset):
points = MultiPoint([(202, 45), (205, 48)])
ds = select_by_position(regular_xy_dataset, points, method="linear")

assert ds is not None, "Dataset was not returned"
assert "air" in ds, "Dataset does not contain the air variable"
assert "lat" in ds, "Dataset does not contain the lat variable"
assert "lon" in ds, "Dataset does not contain the lon variable"

npt.assert_array_equal(ds["lat"], [45.0, 48.0]), "Latitude is incorrect"
npt.assert_array_equal(ds["lon"], [202.0, 205.0]), "Longitude is incorrect"
(
npt.assert_array_almost_equal(
ds["air"].isel(time=2).values,
[279.0, 278.2],
),
"Temperature is incorrect",
)


def test_select_area_regular_xy(regular_xy_dataset):
Expand Down
19 changes: 15 additions & 4 deletions xpublish_edr/geometry/position.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from __future__ import annotations

from typing import Literal

import numpy as np
import shapely
import xarray as xr
Expand All @@ -14,6 +16,7 @@
def select_by_position(
ds: xr.Dataset,
point: shapely.Point | shapely.MultiPoint,
method: Literal["nearest", "linear"] = "nearest",
) -> xr.Dataset:
"""
Return a dataset with the position nearest to the given coordinates
Expand All @@ -23,9 +26,9 @@ def select_by_position(
raise NotImplementedError("Only 1D coordinates are supported")

if isinstance(point, shapely.Point):
return _select_by_position_regular_xy_grid(ds, point)
return _select_by_position_regular_xy_grid(ds, point, method)
elif isinstance(point, shapely.MultiPoint):
return _select_by_multiple_positions_regular_xy_grid(ds, point)
return _select_by_multiple_positions_regular_xy_grid(ds, point, method)
else:
raise ValueError(
f"Invalid point type {point.geom_type}, must be Point or MultiPoint",
Expand All @@ -35,17 +38,22 @@ def select_by_position(
def _select_by_position_regular_xy_grid(
ds: xr.Dataset,
point: shapely.Point,
method: Literal["nearest", "linear"] = "nearest",
) -> xr.Dataset:
"""
Return a dataset with the position nearest to the given coordinates
"""
# Find the nearest X and Y coordinates to the point
return ds.cf.sel(X=point.x, Y=point.y, method="nearest")
if method == "nearest":
return ds.cf.sel(X=point.x, Y=point.y, method=method)
else:
return ds.cf.interp(X=point.x, Y=point.y, method=method)


def _select_by_multiple_positions_regular_xy_grid(
ds: xr.Dataset,
points: shapely.MultiPoint,
method: Literal["nearest", "linear"] = "nearest",
) -> xr.Dataset:
"""
Return a dataset with the positions nearest to the given coordinates
Expand All @@ -54,4 +62,7 @@ def _select_by_multiple_positions_regular_xy_grid(
x, y = np.array(list(zip(*[(point.x, point.y) for point in points.geoms])))
sel_x = xr.Variable(data=x, dims=VECTORIZED_DIM)
sel_y = xr.Variable(data=y, dims=VECTORIZED_DIM)
return ds.cf.sel(X=sel_x, Y=sel_y, method="nearest")
if method == "nearest":
return ds.cf.sel(X=sel_x, Y=sel_y, method=method)
else:
return ds.cf.interp(X=sel_x, Y=sel_y, method=method)
2 changes: 1 addition & 1 deletion xpublish_edr/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def get_position(
Extra selecting/slicing parameters can be provided as extra query parameters
"""
try:
ds = select_by_position(dataset, query.geometry)
ds = select_by_position(dataset, query.geometry, query.method)
except KeyError:
raise HTTPException(
status_code=404,
Expand Down
36 changes: 27 additions & 9 deletions xpublish_edr/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
OGC EDR Query param parsing
"""

from typing import Optional
from typing import Literal, Optional

import xarray as xr
from fastapi import Query
Expand All @@ -27,6 +27,7 @@ class EDRQuery(BaseModel):
parameters: Optional[str] = None
crs: Optional[str] = None
format: Optional[str] = None
method: Literal["nearest", "linear"] = "nearest"

@property
def geometry(self):
Expand All @@ -36,13 +37,19 @@ def geometry(self):
def select(self, ds: xr.Dataset, query_params: dict) -> xr.Dataset:
"""Select data from a dataset based on the query"""
if self.z:
ds = ds.cf.sel(Z=self.z, method="nearest")
if self.method == "nearest":
ds = ds.cf.sel(Z=self.z, method=self.method)
else:
ds = ds.cf.interp(Z=self.z, method=self.method)

if self.datetime:
try:
datetimes = self.datetime.split("/")
if len(datetimes) == 1:
ds = ds.cf.sel(T=datetimes[0], method="nearest")
if self.method == "nearest":
ds = ds.cf.sel(T=datetimes[0], method=self.method)
else:
ds = ds.cf.interp(T=datetimes[0], method=self.method)
elif len(datetimes) == 2:
ds = ds.cf.sel(T=slice(datetimes[0], datetimes[1]))
else:
Expand All @@ -64,20 +71,25 @@ def select(self, ds: xr.Dataset, query_params: dict) -> xr.Dataset:
if query_param in edr_query_params:
del query_params[query_param]

# TODO: allow for controlling selection method
method: Optional[str] = "nearest"

for key, value in query_params.items():
split_value = value.split("/")
if len(split_value) == 1:
continue
elif len(split_value) == 2:
query_params[key] = slice(split_value[0], split_value[1])
method = None
else:
raise ValueError(f"Too many values for selecting {key}")

ds = ds.sel(query_params, method=method)
if self.method == "nearest":
ds = ds.sel(query_params, method=self.method)
else:
# Interpolation may not be supported for all possible selection
# parameters, so we provide a fallback to xarray's nearest selection
try:
ds = ds.interp(query_params, method=self.method)
except Exception as e:
logger.warning(f"Interpolation failed: {e}, falling back to selection")
ds = ds.sel(query_params, method="nearest")
return ds


Expand Down Expand Up @@ -118,6 +130,11 @@ def edr_query(
"Get `/formats` to discover what other formats are accessible"
),
),
method: Literal["nearest", "linear"] = Query(
"nearest",
title="Selection method",
description="Method for selecting data from the dataset, options are 'nearest' or 'linear'",
),
):
"""Extract EDR query params from request query strings"""
return EDRQuery(
Expand All @@ -127,7 +144,8 @@ def edr_query(
parameters=parameters,
crs=crs,
format=f,
method=method,
)


edr_query_params = {"coords", "z", "datetime", "parameter-name", "crs", "f"}
edr_query_params = {"coords", "z", "datetime", "parameter-name", "crs", "f", "method"}

0 comments on commit a791ee2

Please sign in to comment.