Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CRS support for queries #58

Merged
merged 22 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ In the future, when `xpublish` supports [`DataTree`](https://docs.xarray.dev/en/
| `z` | ✅ | |
| `datetime` | ✅ | |
| `parameter-name` | ✅ | |
| `crs` | ❌ | Not currently supported, all coordinates should be in the reference system of the queried dataset |
| `crs` | ✅ | Requires a CF compliant [grid mapping](https://cf-xarray.readthedocs.io/en/latest/grid_mappings.html) on the target dataset. Default is `EPSG:4326` |
| `parameter-name` | ✅ | |
| `f` | ✅ | |
| `method` | ➕ | Optional: controls data selection. Use "nearest" for nearest neighbor selection, or "linear" for interpolated selection. Uses `nearest` if not specified |
Expand All @@ -84,7 +84,7 @@ In the future, when `xpublish` supports [`DataTree`](https://docs.xarray.dev/en/
| `z` | ✅ | |
| `datetime` | ✅ | |
| `parameter-name` | ✅ | |
| `crs` | ❌ | Not currently supported, all coordinates should be in the reference system of the queried dataset |
| `crs` | ✅ | Requires a CF compliant [grid mapping](https://cf-xarray.readthedocs.io/en/latest/grid_mappings.html) on the target dataset. Default is `EPSG:4326` |
| `parameter-name` | ✅ | |
| `f` | ✅ | |
| `method` | ➕ | Optional: controls data selection. Use "nearest" for nearest neighbor selection, or "linear" for interpolated selection. Uses `nearest` if not specified |
Expand Down
72 changes: 56 additions & 16 deletions tests/test_cf_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,25 @@


@pytest.fixture(scope="session")
def cf_dataset():
def cf_air_dataset():
from cf_xarray.datasets import airds

return airds


@pytest.fixture(scope="session")
def cf_xpublish(cf_dataset):
rest = xpublish.Rest({"air": cf_dataset}, plugins={"edr": CfEdrPlugin()})
def cf_temp_dataset():
from cf_xarray.datasets import rotds

return rotds


@pytest.fixture(scope="session")
def cf_xpublish(cf_air_dataset, cf_temp_dataset):
rest = xpublish.Rest(
{"air": cf_air_dataset, "temp": cf_temp_dataset},
plugins={"edr": CfEdrPlugin()},
)

return rest

Expand Down Expand Up @@ -52,7 +62,7 @@ def test_cf_area_formats(cf_client):
assert "csv" in data, "csv is not a valid format"


def test_cf_position_query(cf_client, cf_dataset):
def test_cf_position_query(cf_client, cf_air_dataset, cf_temp_dataset):
x = 204
y = 44
response = cf_client.get(f"/datasets/air/edr/position?coords=POINT({x} {y})")
Expand All @@ -76,17 +86,18 @@ def test_cf_position_query(cf_client, cf_dataset):
air_param = data["parameters"]["air"]

assert (
air_param["unit"]["label"]["en"] == cf_dataset["air"].attrs["units"]
air_param["unit"]["label"]["en"] == cf_air_dataset["air"].attrs["units"]
), "DataArray units should be set as parameter units"
assert (
air_param["observedProperty"]["id"] == cf_dataset["air"].attrs["standard_name"]
air_param["observedProperty"]["id"]
== cf_air_dataset["air"].attrs["standard_name"]
), "DataArray standard_name should be set as the observed property id"
assert (
air_param["observedProperty"]["label"]["en"]
== cf_dataset["air"].attrs["long_name"]
== cf_air_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter observed property"
assert (
air_param["description"]["en"] == cf_dataset["air"].attrs["long_name"]
air_param["description"]["en"] == cf_air_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter description"

air_range = data["ranges"]["air"]
Expand All @@ -99,6 +110,34 @@ def test_cf_position_query(cf_client, cf_dataset):
len(air_range["values"]) == 4
), "There should be 4 values, one for each time step"

# Test with a dataset containing data in a different coordinate system
x = 64.59063409
y = 66.66454929
response = cf_client.get(f"/datasets/temp/edr/position?coords=POINT({x} {y})")
assert response.status_code == 200, "Response did not return successfully"

data = response.json()
for key in ("type", "domain", "parameters", "ranges"):
assert key in data, f"Key {key} is not a top level key in the CovJSON response"

axes = data["domain"]["axes"]

npt.assert_array_almost_equal(
axes["longitude"]["values"],
[[x]],
), "Did not select nearby x coordinate"
npt.assert_array_almost_equal(
axes["latitude"]["values"],
[[y]],
), "Did not select a nearby y coordinate"

temp_range = data["ranges"]["temp"]
assert temp_range["type"] == "NdArray", "Response range should be a NdArray"
assert temp_range["dataType"] == "float", "Air dataType should be floats"
assert temp_range["axisNames"] == ["rlat", "rlon"], "All dimensions should persist"
assert temp_range["shape"] == [1, 1], "The shape of the array should be 1x1"
assert len(temp_range["values"]) == 1, "There should be 1 value selected"


def test_cf_position_csv(cf_client):
x = 204
Expand Down Expand Up @@ -346,7 +385,7 @@ def test_cf_multiple_position_csv(cf_client):
assert key in csv_data[0], f"column {key} should be in the header"


def test_cf_area_query(cf_client, cf_dataset):
def test_cf_area_query(cf_client, cf_air_dataset):
coords = "POLYGON((201 41, 201 49, 209 49, 209 41, 201 41))"
response = cf_client.get(f"/datasets/air/edr/area?coords={coords}&f=cf_covjson")

Expand All @@ -372,17 +411,18 @@ def test_cf_area_query(cf_client, cf_dataset):
air_param = data["parameters"]["air"]

assert (
air_param["unit"]["label"]["en"] == cf_dataset["air"].attrs["units"]
air_param["unit"]["label"]["en"] == cf_air_dataset["air"].attrs["units"]
), "DataArray units should be set as parameter units"
assert (
air_param["observedProperty"]["id"] == cf_dataset["air"].attrs["standard_name"]
air_param["observedProperty"]["id"]
== cf_air_dataset["air"].attrs["standard_name"]
), "DataArray standard_name should be set as the observed property id"
assert (
air_param["observedProperty"]["label"]["en"]
== cf_dataset["air"].attrs["long_name"]
== cf_air_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter observed property"
assert (
air_param["description"]["en"] == cf_dataset["air"].attrs["long_name"]
air_param["description"]["en"] == cf_air_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter description"

air_range = data["ranges"]["air"]
Expand All @@ -403,7 +443,7 @@ def test_cf_area_query(cf_client, cf_dataset):
), "There should be 26 values, 9 for each time step"


def test_cf_area_csv_query(cf_client, cf_dataset):
def test_cf_area_csv_query(cf_client, cf_air_dataset):
coords = "POLYGON((201 41, 201 49, 209 49, 209 41, 201 41))"
response = cf_client.get(f"/datasets/air/edr/area?coords={coords}&f=csv")

Expand All @@ -427,7 +467,7 @@ def test_cf_area_csv_query(cf_client, cf_dataset):
assert key in csv_data[0], f"column {key} should be in the header"


def test_cf_area_geojson_query(cf_client, cf_dataset):
def test_cf_area_geojson_query(cf_client, cf_air_dataset):
coords = "POLYGON((201 41, 201 49, 209 49, 209 41, 201 41))"
response = cf_client.get(f"/datasets/air/edr/area?coords={coords}&f=geojson")

Expand All @@ -446,7 +486,7 @@ def test_cf_area_geojson_query(cf_client, cf_dataset):
assert len(features) == 36, "There should be 36 data points"


def test_cf_area_nc_query(cf_client, cf_dataset):
def test_cf_area_nc_query(cf_client, cf_air_dataset):
coords = "POLYGON((201 41, 201 49, 209 49, 209 41, 201 41))"
response = cf_client.get(f"/datasets/air/edr/area?coords={coords}&f=nc")

Expand Down
100 changes: 100 additions & 0 deletions tests/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
import pandas as pd
import pytest
import xarray as xr
import xarray.testing as xrt
from shapely import MultiPoint, Point, from_wkt

from xpublish_edr.geometry.area import select_by_area
from xpublish_edr.geometry.common import project_dataset
from xpublish_edr.geometry.position import select_by_position
from xpublish_edr.query import EDRQuery

Expand All @@ -17,6 +19,14 @@ def regular_xy_dataset():
return xr.tutorial.load_dataset("air_temperature")


@pytest.fixture(scope="function")
def projected_xy_dataset():
"""Loads a sample dataset with projected X and Y coordinates"""
from cf_xarray.datasets import rotds

return rotds


def test_select_query(regular_xy_dataset):
query = EDRQuery(
coords="POINT(200 45)",
Expand Down Expand Up @@ -134,6 +144,44 @@ def test_select_position_regular_xy(regular_xy_dataset):
npt.assert_approx_equal(ds["air"][-1], 279.19), "Temperature is incorrect"


def test_select_position_projected_xy(projected_xy_dataset):
query = EDRQuery(
coords="POINT(64.59063409 66.66454929)",
crs="EPSG:4326",
)

projected_point = query.project_geometry(projected_xy_dataset)
npt.assert_approx_equal(projected_point.x, 18.045), "Longitude is incorrect"
npt.assert_approx_equal(projected_point.y, 21.725), "Latitude is incorrect"

ds = select_by_position(projected_xy_dataset, projected_point)
xrt.assert_identical(
ds,
projected_xy_dataset.sel(rlon=[18.045], rlat=[21.725], method="nearest"),
)

projected_ds = project_dataset(ds, query.crs)
(
npt.assert_approx_equal(projected_ds.longitude.values, 64.59063409),
"Longitude is incorrect",
)
(
npt.assert_approx_equal(projected_ds.latitude.values, 66.66454929),
"Latitude is incorrect",
)
(
npt.assert_approx_equal(
projected_ds.temp.values,
projected_xy_dataset.sel(
rlon=[18.045],
rlat=[21.725],
method="nearest",
).temp.values,
),
"Temperature is incorrect",
)


def test_select_position_regular_xy_interpolate(regular_xy_dataset):
point = Point((204, 44))
ds = select_by_position(regular_xy_dataset, point, method="linear")
Expand Down Expand Up @@ -170,6 +218,36 @@ def test_select_position_regular_xy_multi(regular_xy_dataset):
)


def test_select_position_projected_xy_multi(projected_xy_dataset):
query = EDRQuery(
coords="MULTIPOINT(64.3 66.6, 64.6 66.5)",
crs="EPSG:4326",
method="linear",
)

projected_points = query.project_geometry(projected_xy_dataset)
ds = select_by_position(projected_xy_dataset, projected_points, method="linear")
projected_ds = project_dataset(ds, query.crs)
assert "temp" in projected_ds, "Dataset does not contain the temp variable"
assert "rlon" not in projected_ds, "Dataset does not contain the rlon variable"
assert "rlat" not in projected_ds, "Dataset does not contain the rlat variable"
(
npt.assert_array_almost_equal(projected_ds.longitude, [64.3, 64.6]),
"Longitude is incorrect",
)
(
npt.assert_array_almost_equal(projected_ds.latitude, [66.6, 66.5]),
"Latitude is incorrect",
)
(
npt.assert_array_almost_equal(
ds.temp,
projected_ds.temp,
),
"Temperature is incorrect",
)


def test_select_position_regular_xy_multi_interpolate(regular_xy_dataset):
points = MultiPoint([(202, 45), (205, 48)])
ds = select_by_position(regular_xy_dataset, points, method="linear")
Expand Down Expand Up @@ -236,6 +314,28 @@ def test_select_area_regular_xy(regular_xy_dataset):
)


def test_select_area_projected_xy(projected_xy_dataset):
query = EDRQuery(
coords="POLYGON((64.3 66.82, 64.5 66.82, 64.5 66.6, 64.3 66.6, 64.3 66.82))",
crs="EPSG:4326",
)

projected_area = query.project_geometry(projected_xy_dataset)
ds = select_by_area(projected_xy_dataset, projected_area)
projected_ds = project_dataset(ds, query.crs)

assert projected_ds is not None, "Dataset was not returned"
assert "temp" in projected_ds, "Dataset does not contain the air variable"
assert "latitude" in projected_ds, "Dataset does not contain the latitude variable"
assert (
"longitude" in projected_ds
), "Dataset does not contain the longitude variable"

assert projected_ds.longitude.shape[0] == 1, "Longitude shape is incorrect"
assert projected_ds.latitude.shape[0] == 1, "Latitude shape is incorrect"
assert projected_ds.temp.shape[0] == 1, "Temperature shape is incorrect"


def test_select_area_regular_xy_boundary(regular_xy_dataset):
polygon = from_wkt("POLYGON((200 40, 200 50, 210 50, 210 40, 200 40))").buffer(
0.0001,
Expand Down
15 changes: 11 additions & 4 deletions xpublish_edr/geometry/area.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,16 @@
"""
# To minimize performance impact, we first subset the dataset to the bounding box of the polygon
(minx, miny, maxx, maxy) = polygon.bounds
ds = ds.cf.sel(X=slice(minx, maxx), Y=slice(maxy, miny))
indexes = ds.cf.indexes
if indexes["X"].is_monotonic_increasing:
x_sel = slice(minx, maxx)
else:
x_sel = slice(maxx, minx)

Check warning on line 38 in xpublish_edr/geometry/area.py

View check run for this annotation

Codecov / codecov/patch

xpublish_edr/geometry/area.py#L38

Added line #L38 was not covered by tests
if indexes["Y"].is_monotonic_increasing:
y_sel = slice(miny, maxy)
else:
y_sel = slice(maxy, miny)
ds = ds.cf.sel(X=x_sel, Y=y_sel)

# For a regular grid, we can create a meshgrid of the X and Y coordinates to create a spatial mask
pts = np.meshgrid(ds.cf["X"], ds.cf["Y"])
Expand All @@ -45,6 +54,4 @@
y_sel = xr.Variable(data=y_inds, dims=VECTORIZED_DIM)

# Apply the mask and vectorize to a 1d collection of points
ds_sub = ds.cf.isel(X=x_sel, Y=y_sel)

return ds_sub
return ds.cf.isel(X=x_sel, Y=y_sel)
Loading
Loading