-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from scottstanie/add-datasets
Add datasets pulled by `pooch`
- Loading branch information
Showing
13 changed files
with
974 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
channels: | ||
- conda-forge | ||
dependencies: | ||
- pyogrio>=0.5 | ||
- gdal>=3.3 | ||
- geopandas-base>=0.12 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
from __future__ import annotations | ||
|
||
import json | ||
import zipfile | ||
from pathlib import Path | ||
from typing import Optional, Sequence | ||
|
||
from . import datasets | ||
from ._types import Bbox, PathOrStr | ||
|
||
|
||
def read_zipped_json(filename: PathOrStr): | ||
"""Read a zipped JSON file and returns its contents as a dictionary. | ||
Parameters | ||
---------- | ||
filename : PathOrStr | ||
The path to the zipped JSON file. | ||
Returns | ||
------- | ||
dict | ||
The contents of the zipped JSON file as a dictionary. | ||
""" | ||
if Path(filename).suffix == ".zip": | ||
with zipfile.ZipFile(filename) as zf: | ||
bytes = zf.read(str(Path(filename).name).replace(".zip", "")) | ||
return json.loads(bytes.decode()) | ||
else: | ||
with open(filename) as f: | ||
return json.load(f) | ||
|
||
|
||
def get_frame_to_burst_mapping( | ||
frame_id: int, json_file: Optional[PathOrStr] = None | ||
) -> dict: | ||
"""Get the frame data for one frame ID. | ||
Parameters | ||
---------- | ||
frame_id : int | ||
The ID of the frame to get the bounding box for. | ||
json_file : PathOrStr, optional | ||
The path to the JSON file containing the frame-to-burst mapping. | ||
If `None`, uses the zip file contained in `data/` | ||
Returns | ||
------- | ||
dict | ||
The frame data for the given frame ID. | ||
""" | ||
if json_file is None: | ||
json_file = datasets.fetch_frame_to_burst_mapping_file() | ||
js = read_zipped_json(json_file) | ||
return js["data"][str(frame_id)] | ||
|
||
|
||
def get_frame_geojson( | ||
as_geodataframe: bool = False, | ||
columns: Optional[Sequence[str]] = None, | ||
frame_ids: Optional[Sequence[str]] = None, | ||
) -> dict: | ||
"""Get the GeoJSON for the frame geometries.""" | ||
where = _form_where_in_query(frame_ids, "frame_id") if frame_ids else None | ||
return _get_geojson( | ||
datasets.fetch_frame_geometries_simple(), | ||
as_geodataframe=as_geodataframe, | ||
columns=columns, | ||
where=where, | ||
) | ||
|
||
|
||
def get_burst_id_geojson( | ||
as_geodataframe: bool = False, | ||
columns: Optional[Sequence[str]] = None, | ||
burst_ids: Optional[Sequence[str]] = None, | ||
) -> dict: | ||
"""Get the GeoJSON for the burst_id geometries.""" | ||
where = _form_where_in_query(burst_ids, "burst_id_jpl") if burst_ids else None | ||
return _get_geojson( | ||
datasets.fetch_burst_id_geometries_simple(), | ||
as_geodataframe=as_geodataframe, | ||
columns=columns, | ||
where=where, | ||
) | ||
|
||
|
||
def _form_where_in_query(values: Sequence[str], column_name): | ||
# Example: | ||
# "burst_id_jpl in ('t005_009471_iw2','t007_013706_iw2','t008_015794_iw1')" | ||
burst_str = ",".join(f"'{b}'" for b in values) | ||
return f"{column_name} IN ({burst_str})" | ||
|
||
|
||
def _get_geojson( | ||
f, | ||
as_geodataframe: bool = False, | ||
columns: Optional[Sequence[str]] = None, | ||
where: Optional[str] = None, | ||
) -> dict: | ||
# https://gdal.org/user/ogr_sql_dialect.html#where | ||
# https://pyogrio.readthedocs.io/en/latest/introduction.html#filter-records-by-attribute-value | ||
if as_geodataframe: | ||
from pyogrio import read_dataframe | ||
|
||
# import geopandas as gpd | ||
# return gpd.read_file(f) | ||
return read_dataframe(f, columns=columns, where=where) | ||
|
||
return read_zipped_json(f) | ||
|
||
|
||
def get_frame_bbox( | ||
frame_id: int, json_file: Optional[PathOrStr] = None | ||
) -> tuple[int, Bbox]: | ||
"""Get the bounding box of a frame from a JSON file. | ||
Parameters | ||
---------- | ||
frame_id : int | ||
The ID of the frame to get the bounding box for. | ||
json_file : PathOrStr, optional | ||
The path to the JSON file containing the frame-to-burst mapping. | ||
If `None`, fetches the remote zip file from `datasets` | ||
Returns | ||
------- | ||
epsg : int | ||
EPSG code for the bounds coordinates | ||
tuple[float, float, float, float] | ||
bounding box coordinates (xmin, ymin, xmax, ymax) | ||
""" | ||
frame_dict = get_frame_to_burst_mapping(frame_id=frame_id, json_file=json_file) | ||
epsg = int(frame_dict["epsg"]) | ||
bounds = ( | ||
float(frame_dict["xmin"]), | ||
float(frame_dict["ymin"]), | ||
float(frame_dict["xmax"]), | ||
float(frame_dict["ymax"]), | ||
) | ||
return epsg, bounds | ||
|
||
|
||
def get_burst_ids_for_frame( | ||
frame_id: int, json_file: Optional[PathOrStr] = None | ||
) -> list[str]: | ||
"""Get the burst IDs for one frame ID. | ||
Parameters | ||
---------- | ||
frame_id : int | ||
The ID of the frame to get the bounding box for. | ||
json_file : PathOrStr, optional | ||
The path to the JSON file containing the frame-to-burst mapping. | ||
If `None`, fetches the remote zip file from `datasets` | ||
Returns | ||
------- | ||
list[str] | ||
The burst IDs for the given frame ID. | ||
""" | ||
frame_data = get_frame_to_burst_mapping(frame_id, json_file) | ||
return frame_data["burst_ids"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
from __future__ import annotations | ||
|
||
import pooch | ||
|
||
__all__ = [ | ||
"fetch_frame_geometries_simple", | ||
"fetch_burst_id_geometries_simple", | ||
"fetch_burst_to_frame_mapping_file", | ||
"fetch_frame_to_burst_mapping_file", | ||
] | ||
|
||
# See: https://github.com/opera-adt/burst_db/tree/main/src/burst_db/data | ||
# BASE_URL = "https://github.com/opera-adt/burst_db/raw/v{version}/src/burst_db/data" | ||
# BASE_URL = "https://github.com/opera-adt/burst_db/raw/v0.3.0/src/burst_db/data" | ||
BASE_URL = "https://github.com/opera-adt/burst_db/releases/download/v{version}/" | ||
|
||
# $ ls *json.zip | xargs -n1 shasum -a 256 | ||
# 8ee9cae079b9adb24e223b9ff9c81c66506a2a1a72a456220133a9f7f5d4d93b burst_id_geometries_simple.geojson.zip | ||
# 86657e4e578cfced18a66984758fff9a1bf94e8591a288be0d1ad391399f2e59 frame_geometries_simple.geojson.zip | ||
# 436cce345378dc31e81ed661497bab2e744217a5d63c0bb92817dc837786cd22 opera-s1-disp-burst-to-frame.json.zip | ||
# 8b7ed8c8d90ef3d3348bc226958a26a2cb8ab302a6466762aa971b8f7333517f opera-s1-disp-frame-to-burst.json.zip | ||
|
||
BURST_DB_VERSION = "0.3.1" | ||
|
||
POOCH = pooch.create( | ||
# Folder where the data will be stored. For a sensible default, use the | ||
# default cache folder for your OS. | ||
path=pooch.os_cache("opera_utils"), | ||
# Base URL of the remote data store. Will call .format on this string | ||
# to insert the version (see below). | ||
base_url=BASE_URL, | ||
# Pooches are versioned so that you can use multiple versions of a | ||
# package simultaneously. Use PEP440 compliant version number. The | ||
# version will be appended to the path. | ||
version=BURST_DB_VERSION, | ||
# If a version as a "+XX.XXXXX" suffix, we'll assume that this is a dev | ||
# version and replace the version with this string. | ||
version_dev="main", | ||
# An environment variable that overwrites the path. | ||
env="OPERA_UTILS_DATA_DIR", | ||
# The cache file registry. A dictionary with all files managed by this | ||
# pooch. Keys are the file names (relative to *base_url*) and values | ||
# are their respective SHA256 hashes. Files will be downloaded | ||
# automatically when needed. | ||
registry={ | ||
f"frame-geometries-simple-{BURST_DB_VERSION}.geojson.zip": "f0094f4cdc287d56d7a126a42f1e3075e50309afe8a431f49df1ecd8d8b26c8b", | ||
f"burst-id-geometries-simple-{BURST_DB_VERSION}.geojson.zip": "d9cfe71ec836facd5a782ea82625c30a824b78f2b2689106c4d6808bbfce0898", | ||
f"opera-s1-disp-burst-to-frame-{BURST_DB_VERSION}.json.zip": "436cce345378dc31e81ed661497bab2e744217a5d63c0bb92817dc837786cd22", | ||
f"opera-s1-disp-frame-to-burst-{BURST_DB_VERSION}.json.zip": "a48382afcb89f0ff681982b0fc24476ec9c6c1b8a67ae1a26cf380a450ffadc0", | ||
}, | ||
) | ||
|
||
|
||
def fetch_frame_geometries_simple() -> str: | ||
"""Get the simplified frame geometries for the burst database.""" | ||
return POOCH.fetch(f"frame-geometries-simple-{BURST_DB_VERSION}.geojson.zip") | ||
|
||
|
||
def fetch_burst_id_geometries_simple() -> str: | ||
"""Get the simplified burst ID geometries for the burst database.""" | ||
return POOCH.fetch(f"burst-id-geometries-simple-{BURST_DB_VERSION}.geojson.zip") | ||
|
||
|
||
def fetch_burst_to_frame_mapping_file() -> str: | ||
"""Get the burst-to-frame mapping for the burst database.""" | ||
return POOCH.fetch(f"opera-s1-disp-burst-to-frame-{BURST_DB_VERSION}.json.zip") | ||
|
||
|
||
def fetch_frame_to_burst_mapping_file() -> str: | ||
"""Get the frame-to-burst mapping for the burst database.""" | ||
return POOCH.fetch(f"opera-s1-disp-frame-to-burst-{BURST_DB_VERSION}.json.zip") |
Oops, something went wrong.