Skip to content

Commit

Permalink
band name defaults to asset key
Browse files Browse the repository at this point in the history
* works if asset key matches the band name

#762

* clean it up

#762

* support test STAC API

#762

* fix tests

The presence of "eo:bands" is still a good indicator of a band asset.

#762

* add test for STAC API with assets that lack eo:bands

#762

* fixup! add test for STAC API with assets that lack eo:bands

#762

* adapt CHANGELOG

#762
  • Loading branch information
bossie authored Aug 26, 2024
1 parent 3ef42b1 commit dd4b2d9
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 29 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ without compromising stable operations.
- The default processing chunk size can now be configured for backends. If not set, the default may be determined automatically. We observe that a lower default, like 128 pixels, allows running jobs with less memory. ([Open-EO/openeo-geotrellis-extensions#311](https://github.com/Open-EO/openeo-geotrellis-extensions/issues/311))
- aggregate_spatial: trying to use the probabilities argument in a _single_ 'quantiles' reduces was throwing an error. ([#821](https://github.com/Open-EO/openeo-geopyspark-driver/issues/821))
- sar_backscatter: when a target resolution is provided via resample_spatial, it is now immediately taken into account for computing backscatter, reducing memory usage.
- the temporary folder which is created for aggregate_spatial now contains a timestamp to aid cleanup scripts.
- the temporary folder which is created for aggregate_spatial now contains a timestamp to aid cleanup scripts.
- apply_neighborhood: support applying UDF on cubes without a time dimension
- Add "separate_asset_per_band" to save_result options. Currently, for TIFF only.
- `load_stac`: `eo:bands` is no longer a hard dependency; band name defaults to asset key [#762](https://github.com/Open-EO/openeo-geopyspark-driver/issues/762).

## 0.39.0

- Correctly apply the method parameter in resample_spatial and resample_cube_spatial, when downsampling to lower resolution, and the sampling is not applied at load time. ([Open-EO/openeo-geotrellis-extensions#303](https://github.com/Open-EO/openeo-geotrellis-extensions/issues/303))
- Correctly apply the method parameter in resample_spatial and resample_cube_spatial, when downsampling to lower resolution, and the sampling is not applied at load time. ([Open-EO/openeo-geotrellis-extensions#303](https://github.com/Open-EO/openeo-geotrellis-extensions/issues/303))
- Use band names as column name in GeoParquet output ([#723](https://github.com/Open-EO/openeo-geopyspark-driver/issues/723))
- Prevent nightly cleaner from failing a job tracker run ([eu-cdse/openeo-cdse-infra#166](https://github.com/eu-cdse/openeo-cdse-infra/issues/166))
- Sentinelhub collections handle non zero nodata better ([openeo-geotrellis-extensions#300](https://github.com/Open-EO/openeo-geotrellis-extensions/issues/300))
Expand Down
41 changes: 21 additions & 20 deletions openeogeotrellis/load_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def supports_item_search(coll: pystac.Collection) -> bool:
return any(conformance_class.endswith("/item-search") for conformance_class in conforms_to)

def is_band_asset(asset: pystac.Asset) -> bool:
return "eo:bands" in asset.extra_fields
return asset.has_role("data") or "eo:bands" in asset.extra_fields

def get_band_names(itm: pystac.Item, asst: pystac.Asset) -> List[str]:
def get_band_name(eo_band) -> str:
Expand All @@ -97,7 +97,7 @@ def get_band_name(eo_band) -> str:
else itm.get_collection().summaries.to_dict())
return get_band_name(eo_bands_location["eo:bands"][eo_band_index])

return [get_band_name(eo_band) for eo_band in asst.extra_fields["eo:bands"]]
return [get_band_name(eo_band) for eo_band in asst.extra_fields.get("eo:bands", [])]

def get_proj_metadata(itm: pystac.Item, asst: pystac.Asset) -> (Optional[int],
Optional[Tuple[float, float, float, float]],
Expand Down Expand Up @@ -226,7 +226,8 @@ def operator_value(criterion: Dict[str, object]) -> (str, object):
elif (
root_catalog.get_self_href().startswith("https://tamn.snapplanet.io")
or root_catalog.get_self_href().startswith("https://stac.eurac.edu")
or root_catalog.get_self_href().startswith("https://catalogue.dataspace.copernicus.eu/")
or root_catalog.get_self_href().startswith("https://catalogue.dataspace.copernicus.eu/stac")
or root_catalog.get_self_href().startswith("https://pgstac.demo.cloudferro.com")
):
modifier = None
# by default, returns all properties and "none" if fields are specified
Expand Down Expand Up @@ -345,7 +346,7 @@ def intersects_temporally(interval) -> bool:
.withNominalDate(itm.properties.get("datetime") or itm.properties["start_datetime"]))

for asset_id, asset in band_assets.items():
asset_band_names = get_band_names(itm, asset)
asset_band_names = get_band_names(itm, asset) or [asset_id]
for asset_band_name in asset_band_names:
if asset_band_name not in band_names:
band_names.append(asset_band_name)
Expand Down Expand Up @@ -386,11 +387,6 @@ def intersects_temporally(interval) -> bool:
if not items_found:
raise no_data_available_exception

if not band_names:
raise OpenEOApiException(
message=f'No band assets found in items; a band asset requires an "eo:bands" property with a "name".',
status_code=400)

target_bbox = requested_bbox or stac_bbox

if not target_bbox:
Expand Down Expand Up @@ -519,23 +515,28 @@ def get_best_url(asset: pystac.Asset):
"""
Relevant doc: https://github.com/stac-extensions/alternate-assets
"""
alternate = asset.extra_fields.get("alternate")
if alternate:
for key, alternate_local in alternate.items():
if key not in {"local", "s3"}:
continue
href = alternate_local.get("href")
for key, alternate_asset in asset.extra_fields.get("alternate", {}).items():
if key in {"local", "s3"}:
href = alternate_asset["href"]
# Checking if file exists takes around 10ms on /data/MTDA mounted on laptop
# Checking if URL exists takes around 100ms on https://services.terrascope.be
# Checking if URL exists depends also on what Datasource is used in the scala code.
# That would be hacky to predict here.
tmp = urlparse(href)
url = urlparse(href)
# Support paths like "file:///data/MTDA", but also "//data/MTDA" just in case.
if tmp.scheme == "file" or tmp.scheme == "":
if Path(tmp.path).exists():
return href

file_path = None
if url.scheme in ["", "file"]:
file_path = url.path
elif url.scheme == "s3":
file_path = f"/{url.netloc}{url.path}"

if file_path and Path(file_path).exists():
logger.debug(f"Using local alternate file path {file_path}")
return file_path
else:
logger.warning("Only support file paths as local alternate urls, but found: " + href)
logger.warning(f"Only support file paths as local alternate urls, but found {href}")

return asset.get_absolute_href() or asset.href


Expand Down
17 changes: 17 additions & 0 deletions tests/data/stac/issue762-api-no-eo-bands/catalog.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"id": "catalog",
"type": "Catalog",
"description": "description",
"conformsTo": ["https://api.stacspec.org/v1.0.0-rc.1/item-search"],
"stac_version": "1.0.0",
"links": [
{
"rel": "child",
"href": "https://stac.test/collections/collection"
},
{
"rel": "search",
"href": "search"
}
]
}
46 changes: 46 additions & 0 deletions tests/data/stac/issue762-api-no-eo-bands/collection.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"id": "collection",
"type": "Collection",
"description": "description",
"license": "proprietary",
"stac_version": "1.0.0",
"extent": {
"spatial": {
"bbox": [
[
-180,
-90,
180,
90
]
]
},
"temporal": {
"interval": [
[
"1982-08-22T00:00:00.000Z",
null
]
]
}
},
"links": [
{
"rel": "root",
"href": "https://stac.test"
}
],
"summaries": {
"eo:bands": [
{
"name": "band1"
},
{
"name": "band2"
},
{
"name": "band3"
}
]
}
}
82 changes: 82 additions & 0 deletions tests/data/stac/issue762-api-no-eo-bands/item01.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{
"stac_version": "1.0.0",
"stac_extensions": [],
"type": "Feature",
"id": "item01",
"bbox": [
5.0,
50.0,
6.0,
51.0
],
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
5.0,
50.0
],
[
5.0,
51.0
],
[
6.0,
51.0
],
[
6.0,
50.0
],
[
5.0,
50.0
]
]
]
},
"properties": {
"datetime": "2021-02-03T00:00:00Z",
"proj:epsg": 4326,
"proj:bbox": [
5.0,
50.0,
6.0,
51.0
],
"proj:shape": [
10,
10
]
},
"links": [
{
"rel": "collection",
"href": "https://stac.test/collections/collection"
}
],
"assets": {
"asset1": {
"href": "asset01.tiff",
"type": "image/tiff; application=geotiff",
"roles": ["data"],
"eo:bands": [
{
"name": "band1"
},
{
"name": "band2"
},
{
"name": "band3"
}
]
},
"band4": {
"href": "asset02.tiff",
"type": "image/tiff; application=geotiff",
"roles": ["data"]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
"href": "https://catalogue.dataspace.copernicus.eu/odata/v1/Products(f8039bf2-dc86-4dc2-a21a-b1a58c42fcdd)/$value",
"title": "Product",
"type": "application/octet-stream",
"roles": ["data", "role was manually added here to be supported."],
"alternate": {
"s3": {
"href": "/eodata/Global-Mosaics/Sentinel-1/S1SAR_L3_IW_MCM/2023/06/01/Sentinel-1_IW_mosaic_2023_M06_31UFS_0_0",
Expand All @@ -81,13 +82,7 @@
"storage:requester_pays": false,
"storage:tier": "Online"
}
},
"eo:bands": [
{
"message": "eo:bands was manually added here to be supported.",
"name": "B02"
}
]
}
}
}
}
Expand Down
80 changes: 80 additions & 0 deletions tests/test_api_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -4302,6 +4302,86 @@ def test_load_stac_from_unsigned_partial_job_results_basic(self, api110, batch_j
assert ("OpenEO batch job results status of own job j-2405078f40904a0b85cf8dc5dd55b07e: finished"
in caplog.messages)

def test_load_stac_loads_assets_without_eo_bands(self, api110, urllib_mock, requests_mock, tmp_path):
"""load_stac from a STAC API with one item and two assets, one of which does not carry eo:bands"""

def feature_collection(request, _) -> dict:
# upper is needed because requests_mock converts to lowercase, this may change in future release
# replace of Z is needed on python3.8, from 3.11 onwards should no longer be needed
datetime_from, datetime_to = map(
dt.datetime.fromisoformat, request.qs["datetime"][0].upper().replace("Z", "+00:00").split("/")
)

def item(path) -> dict:
return json.loads(
get_test_data_file(path)
.read_text()
.replace(
"asset01.tiff",
f"file://{get_test_data_file('binary/load_stac/collection01/asset01.tif').absolute()}",
)
.replace(
"asset02.tiff",
f"file://{get_test_data_file('binary/load_stac/collection01/asset02.tif').absolute()}",
)
)

items = [
item(path)
for path in [
"stac/issue762-api-no-eo-bands/item01.json",
]
]

intersecting_items = [
item
for item in items
if datetime_from <= dateutil.parser.parse(item["properties"]["datetime"]) <= datetime_to
]

return {
"type": "FeatureCollection",
"features": intersecting_items,
}

urllib_mock.get(
"https://stac.test/collections/collection",
data=get_test_data_file("stac/issue762-api-no-eo-bands/collection.json").read_text(),
)
urllib_mock.get(
"https://stac.test", # for pystac
data=get_test_data_file("stac/issue762-api-no-eo-bands/catalog.json").read_text(),
)
requests_mock.get(
"https://stac.test", # for pystac_client
text=get_test_data_file("stac/issue762-api-no-eo-bands/catalog.json").read_text(),
)
requests_mock.get("https://stac.test/search", json=feature_collection)

process_graph = {
"loadstac1": {
"process_id": "load_stac",
"arguments": {"url": "https://stac.test/collections/collection"},
},
"saveresult1": {
"process_id": "save_result",
"arguments": {"data": {"from_node": "loadstac1"}, "format": "NetCDF"},
"result": True,
},
}

res = api110.result(process_graph).assert_status_code(200)
res_path = tmp_path / "res.nc"
res_path.write_bytes(res.data)
ds = xarray.load_dataset(res_path)
assert ds.dims == {"t": 1, "x": 10, "y": 10}
assert numpy.datetime_as_string(ds.coords["t"].values, unit="D").tolist() == ["2021-02-03"]
assert list(ds.data_vars.keys())[1:] == ["band1", "band2", "band3", "band4"]
assert (ds["band1"] == 1).all()
assert (ds["band2"] == 2).all()
assert (ds["band3"] == 3).all()
assert (ds["band4"] == 4).all()


class TestEtlApiReporting:
@pytest.fixture(autouse=True)
Expand Down

0 comments on commit dd4b2d9

Please sign in to comment.