diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4136a78..ad8a43e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,6 +20,8 @@ jobs: spec: >- rasterio gdal + libgdal-netcdf + libgdal-hdf5 asf_search - label: Minimum spec: >- diff --git a/src/opera_utils/_cslc.py b/src/opera_utils/_cslc.py index 46cf223..c583fd0 100644 --- a/src/opera_utils/_cslc.py +++ b/src/opera_utils/_cslc.py @@ -439,13 +439,13 @@ def make_nodata_mask( test_f = f"NETCDF:{opera_file_list[-1]}:{dataset_name}" # convert pixels to degrees lat/lon gt = _get_raster_gt(test_f) - # TODO: more robust way to get the pixel size... this is a hack - # maybe just use pyproj to warp lat/lon to meters and back? - dx_meters = gt[1] - dx_degrees = dx_meters / 111000 - buffer_degrees = buffer_pixels * dx_degrees - except RuntimeError: - raise ValueError(f"Unable to open {test_f}") + except RuntimeError as e: + raise ValueError(f"Unable to get geotransform from {test_f}") from e + # TODO: more robust way to get the pixel size... this is a hack + # maybe just use pyproj to warp lat/lon to meters and back? + dx_meters = gt[1] + dx_degrees = dx_meters / 111000 + buffer_degrees = buffer_pixels * dx_degrees # Get the union of all the polygons and convert to a temp geojson union_poly = get_union_polygon(opera_file_list, buffer_degrees=buffer_degrees) diff --git a/src/opera_utils/download.py b/src/opera_utils/download.py index 66d0358..1e3f85b 100644 --- a/src/opera_utils/download.py +++ b/src/opera_utils/download.py @@ -241,7 +241,7 @@ def _download_for_burst_ids( raise ValueError(msg) logger.info(msg) session = _get_auth_session() - urls = _get_urls(results) + urls = get_urls(results) asf.download_urls( urls=urls, path=str(output_dir), session=session, processes=max_jobs ) @@ -293,23 +293,30 @@ def filter_results_by_date_and_version(results: ASFSearchResults) -> ASFSearchRe return ASFSearchResults(filtered_results) -def _get_urls( +def get_urls( results: asf.ASFSearchResults, type_: Literal["https", "s3"] = "https", + file_ext: str = ".h5", ) -> list[str]: + """Parse the `ASFSearchResults` object for HTTPS or S3 urls.""" if type_ == "https": return [r.properties["url"] for r in results] elif type_ == "s3": - # TODO: go through .umm, find s3 url - raise NotImplementedError() + out: list[str] = [] + for r in results: + if "s3Urls" not in r.properties: + raise ValueError(f"No S3 URL for {r}") + + for url in r.properties["s3Urls"]: + if url.endswith(file_ext): + out.append(url) + break + else: + raise ValueError(f"Failed to find HDF5 S3 url for {r}") + return out + else: raise ValueError(f"type_ must be 'https' or 's3'. Got {type_}") - # r.umm - # 'RelatedUrls': [... - # {'URL': 's3://asf-cumulus-prod-opera-products/OPERA_L2_CSLC - # 'Type': 'GET DATA VIA DIRECT ACCESS', - # 'Description': 'This link provides direct download access vi - # 'Format': 'HDF5'}, def _get_auth_session() -> asf.ASFSession: diff --git a/tests/requirements.txt b/tests/requirements.txt index 152a5d5..134923e 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -2,7 +2,7 @@ asf_search pre-commit pytest pytest-cov -pytest-randomly # control random seed +pytest-randomly pytest-recording -pytest-xdist # parallel tests: https://pytest-xdist.readthedocs.io/en/latest/ +pytest-xdist ruff