Skip to content

Commit

Permalink
Implement get_urls(type_="s3") to fetch cslc S3 urls from `asf_sear…
Browse files Browse the repository at this point in the history
…ch` (#71)

* implement `get_urls(type_="s3")`

* make `get_urls` public

* fix test reqs file

* add more specific error, dont suppress exception

* new gdal 3.9 needs separate netcdf/hdf5
  • Loading branch information
scottstanie authored Sep 30, 2024
1 parent b1f956a commit 085f986
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 19 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ jobs:
spec: >-
rasterio
gdal
libgdal-netcdf
libgdal-hdf5
asf_search
- label: Minimum
spec: >-
Expand Down
14 changes: 7 additions & 7 deletions src/opera_utils/_cslc.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,13 +439,13 @@ def make_nodata_mask(
test_f = f"NETCDF:{opera_file_list[-1]}:{dataset_name}"
# convert pixels to degrees lat/lon
gt = _get_raster_gt(test_f)
# TODO: more robust way to get the pixel size... this is a hack
# maybe just use pyproj to warp lat/lon to meters and back?
dx_meters = gt[1]
dx_degrees = dx_meters / 111000
buffer_degrees = buffer_pixels * dx_degrees
except RuntimeError:
raise ValueError(f"Unable to open {test_f}")
except RuntimeError as e:
raise ValueError(f"Unable to get geotransform from {test_f}") from e
# TODO: more robust way to get the pixel size... this is a hack
# maybe just use pyproj to warp lat/lon to meters and back?
dx_meters = gt[1]
dx_degrees = dx_meters / 111000
buffer_degrees = buffer_pixels * dx_degrees

# Get the union of all the polygons and convert to a temp geojson
union_poly = get_union_polygon(opera_file_list, buffer_degrees=buffer_degrees)
Expand Down
27 changes: 17 additions & 10 deletions src/opera_utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def _download_for_burst_ids(
raise ValueError(msg)
logger.info(msg)
session = _get_auth_session()
urls = _get_urls(results)
urls = get_urls(results)
asf.download_urls(
urls=urls, path=str(output_dir), session=session, processes=max_jobs
)
Expand Down Expand Up @@ -293,23 +293,30 @@ def filter_results_by_date_and_version(results: ASFSearchResults) -> ASFSearchRe
return ASFSearchResults(filtered_results)


def _get_urls(
def get_urls(
results: asf.ASFSearchResults,
type_: Literal["https", "s3"] = "https",
file_ext: str = ".h5",
) -> list[str]:
"""Parse the `ASFSearchResults` object for HTTPS or S3 urls."""
if type_ == "https":
return [r.properties["url"] for r in results]
elif type_ == "s3":
# TODO: go through .umm, find s3 url
raise NotImplementedError()
out: list[str] = []
for r in results:
if "s3Urls" not in r.properties:
raise ValueError(f"No S3 URL for {r}")

for url in r.properties["s3Urls"]:
if url.endswith(file_ext):
out.append(url)
break
else:
raise ValueError(f"Failed to find HDF5 S3 url for {r}")
return out

else:
raise ValueError(f"type_ must be 'https' or 's3'. Got {type_}")
# r.umm
# 'RelatedUrls': [...
# {'URL': 's3://asf-cumulus-prod-opera-products/OPERA_L2_CSLC
# 'Type': 'GET DATA VIA DIRECT ACCESS',
# 'Description': 'This link provides direct download access vi
# 'Format': 'HDF5'},


def _get_auth_session() -> asf.ASFSession:
Expand Down
4 changes: 2 additions & 2 deletions tests/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ asf_search
pre-commit
pytest
pytest-cov
pytest-randomly # control random seed
pytest-randomly
pytest-recording
pytest-xdist # parallel tests: https://pytest-xdist.readthedocs.io/en/latest/
pytest-xdist
ruff

0 comments on commit 085f986

Please sign in to comment.