Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assessing Quality of Nighttime Lights Data #23

Merged
merged 13 commits into from
Dec 7, 2023
31 changes: 29 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,5 +128,32 @@ dmypy.json
# Pyre type checker
.pyre/

# templates
.github/templates/*
# Folder view configuration files
.DS_Store
Desktop.ini

# Thumbnail cache files
._*
Thumbs.db

# Files that might appear on external disks
.Spotlight-V100
.Trashes

# Compiled Python files
*.pyc

# Compiled C++ files
*.out

# Application specific files
venv
node_modules
.sass-cache

# Documentation
_build/

# Data
*.tif
*.h5
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pip install blackmarblepy

## Usage

Before downloading and extracting Black Marble data, define the NASA bearer token, and define a region of interest. For more detailed documentation and examples, please refer to the [documentation](https://worldbank.github.io/blackmarblepy).
Before downloading and extracting Black Marble data, define the NASA bearer token, and define a region of interest. For more detailed documentation and examples, please refer to the [documentation](https://worldbank.github.io/blackmarblepy/examples/blackmarblepy.html).

## Contributing

Expand Down
1 change: 1 addition & 0 deletions docs/_toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ parts:
- caption: Examples
chapters:
- file: examples/blackmarblepy.ipynb
- file: examples/quality-assessment.ipynb
Binary file modified docs/images/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/nasa_laads_login.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2,113 changes: 1,061 additions & 1,052 deletions examples/blackmarblepy.ipynb

Large diffs are not rendered by default.

2,221 changes: 2,221 additions & 0 deletions examples/quality-assessment.ipynb

Large diffs are not rendered by default.

Binary file added src/blackmarble/.DS_Store
Binary file not shown.
17 changes: 8 additions & 9 deletions src/blackmarble/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pydantic import BaseModel
from tqdm.auto import tqdm

from .types import ProductId
from .types import Product


@dataclass
Expand Down Expand Up @@ -43,14 +43,14 @@ def __init__(self, bearer: str, directory: Path):

def _retrieve_manifest(
self,
product_id: ProductId,
product_id: Product,
date_range: datetime.date | List[datetime.date],
) -> pd.DataFrame:
"""Retrieve NASA Black Marble data manifest. i.d., download links.

Parameters
----------
product_id: ProductId
product_id: Product
NASA Black Marble product suite (VNP46) identifier

date_range: datetime.date | List[datetime.date]
Expand All @@ -64,14 +64,14 @@ def _retrieve_manifest(
if isinstance(date_range, datetime.date):
date_range = [date_range]
if isinstance(product_id, str):
product_id = ProductId(product_id)
product_id = Product(product_id)

urlpaths = set()
for date in date_range:
match product_id:
case ProductId.VNP46A3: # if VNP46A3 then first day of the month
case Product.VNP46A3: # if VNP46A3 then first day of the month
tm_yday = date.replace(day=1).timetuple().tm_yday
case ProductId.VNP46A4: # if VNP46A4 then first day of the year
case Product.VNP46A4: # if VNP46A4 then first day of the year
tm_yday = date.replace(month=1, day=1).timetuple().tm_yday
case _:
tm_yday = date.timetuple().tm_yday
Expand Down Expand Up @@ -150,7 +150,7 @@ def _download(self, names: List[str], n_jobs: int = 16):
def download(
self,
gdf: geopandas.GeoDataFrame,
product_id: ProductId,
product_id: Product,
date_range: datetime.date | List[datetime.date],
skip_if_exists: bool = True,
):
Expand All @@ -161,7 +161,7 @@ def download(
gdf: geopandas.GeoDataFrame
Region of Interest

product: ProductId
product: Product
Nasa Black Marble Product Id (e.g, VNP46A1)

skip_if_exists: bool, default=True
Expand All @@ -170,7 +170,6 @@ def download(
gdf = geopandas.overlay(
gdf.to_crs("EPSG:4326").dissolve(), self.TILES, how="intersection"
)

bm_files_df = self._retrieve_manifest(product_id, date_range)
bm_files_df = bm_files_df[
bm_files_df["name"].str.contains("|".join(gdf["TileID"]))
Expand Down
36 changes: 12 additions & 24 deletions src/blackmarble/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,18 @@
import geopandas
import numpy as np
import pandas as pd
from rasterio.transform import from_origin
from rasterstats import zonal_stats

from .raster import bm_raster
from .types import ProductId
from .raster import VARIABLE_DEFAULT, bm_raster, transform
from .types import Product


def bm_extract(
roi: geopandas.GeoDataFrame,
product_id: ProductId,
product_id: Product,
date_range: datetime.date | List[datetime.date],
bearer: str,
aggfun: str | List[str] = ["mean"],
aggfunc: str | List[str] = ["mean"],
variable: Optional[str] = None,
quality_flag_rm: List[int] = [255],
check_all_tiles_exist: bool = True,
Expand Down Expand Up @@ -90,8 +89,11 @@ def bm_extract(
Returns
-------
pandas.DataFrame
Zonal statitics dataframe
NASA Black Marble zonal statitics dataframe
"""
if variable is None:
variable = VARIABLE_DEFAULT.get(Product(product_id))

ds = bm_raster(
roi,
product_id,
Expand All @@ -107,28 +109,14 @@ def bm_extract(

results = []
for t in ds["time"]:
data = ds.radiance.sel(time=t)

left, bottom, right, top = (
ds["x"].min(),
ds["y"].min(),
ds["x"].max(),
ds["y"].max(),
)
height, width = data.shape
transform = from_origin(
left,
top,
(right - left) / width,
(top - bottom) / height,
)
da = ds[variable].sel(time=t)

zs = zonal_stats(
roi,
data.values,
da.values,
nodata=np.nan,
affine=transform,
stats=aggfun,
affine=transform(da),
stats=aggfunc,
)
zs = pd.DataFrame(zs).add_prefix("ntl_")
zs = pd.concat([roi, zs], axis=1)
Expand Down
116 changes: 83 additions & 33 deletions src/blackmarble/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,62 +18,75 @@
from tqdm.auto import tqdm

from .download import BlackMarbleDownloader
from .types import ProductId
from .types import Product


VARIABLE_DEFAULT = {
Product.VNP46A1: "DNB_At_Sensor_Radiance_500m",
Product.VNP46A2: "Gap_Filled_DNB_BRDF-Corrected_NTL",
Product.VNP46A3: "NearNadir_Composite_Snow_Free",
Product.VNP46A4: "NearNadir_Composite_Snow_Free",
}


def h5_to_geotiff(
f: Path,
/,
output_directory: Path = None,
output_prefix: str = None,
variable: str = None,
quality_flag_rm=[255],
output_directory: Path = None,
output_prefix: str = None,
):
"""
Convert HDF5 file to GeoTIFF for a selected (or default) variable from NASA Black Marble data

Parameters
----------
f: Path
H5DF filename

variable: str, default = None
Variable to create GeoTIFF raster. Further information, pleae see the `NASA Black Marble User Guide <https://ladsweb.modaps.eosdis.nasa.gov/api/v2/content/archives/Document%20Archive/Science%20Data%20Product%20Documentation/VIIRS_Black_Marble_UG_v1.2_April_2021.pdf>`_ for `VNP46A1`, see Table 3; for `VNP46A2` see Table 6; for `VNP46A3` and `VNP46A4`, see Table 9. By default, it uses the following default variables:

- For ``VNP46A1``, uses ``DNB_At_Sensor_Radiance_500m``
- For ``VNP46A2``, uses ``Gap_Filled_DNB_BRDF-Corrected_NTL``
- For ``VNP46A3``, uses ``NearNadir_Composite_Snow_Free``.
- For ``VNP46A4``, uses ``NearNadir_Composite_Snow_Free``.

Returns
------
output_path: Path
Path to which GeoTIFF
"""
variable_default = {
ProductId.VNP46A1: "DNB_At_Sensor_Radiance_500m",
ProductId.VNP46A2: "Gap_Filled_DNB_BRDF-Corrected_NTL",
ProductId.VNP46A3: "NearNadir_Composite_Snow_Free",
ProductId.VNP46A4: "NearNadir_Composite_Snow_Free",
}
output_path = Path(output_directory, f.stem).with_suffix(".tif")
product_id = ProductId(f.stem.split(".")[0])
variable = variable_default.get(product_id)
product_id = Product(f.stem.split(".")[0])

if variable is None:
variable = VARIABLE_DEFAULT.get(product_id)

with h5py.File(f, "r") as h5_data:
attrs = h5_data.attrs

if product_id in [ProductId.VNP46A1, ProductId.VNP46A2]:
if product_id in [Product.VNP46A1, Product.VNP46A2]:
dataset = h5_data["HDFEOS"]["GRIDS"]["VNP_Grid_DNB"]["Data Fields"][
variable
]
qf = h5_data["HDFEOS"]["GRIDS"]["VNP_Grid_DNB"]["Data Fields"][
"Mandatory_Quality_Flag"
]

left, bottom, right, top = (
attrs.get("WestBoundingCoord"),
attrs.get("SouthBoundingCoord"),
attrs.get("EastBoundingCoord"),
attrs.get("NorthBoundingCoord"),
)

qf = h5_data["HDFEOS"]["GRIDS"]["VNP_Grid_DNB"]["Data Fields"][
"Mandatory_Quality_Flag"
]
else:
dataset = h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"][
variable
]
h5_names = list(
h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"].keys()
)

lat = h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"]["lat"]
lon = h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"]["lon"]
left, bottom, right, top = min(lon), min(lat), max(lon), max(lat)
Expand All @@ -83,13 +96,19 @@ def h5_to_geotiff(
variable_short = re.sub("_Num", "", variable_short)
variable_short = re.sub("_Std", "", variable_short)

qf_name = variable_short + "_Quality"

if qf_name in h5_names:
h5_names = list(
h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"][
"Data Fields"
].keys()
)
if (qf_name := f"{variable_short}_Quality") in h5_names:
qf = h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"][
variable + "_Quality"
qf_name
]
if variable in h5_names:
qf = h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"][
variable
]

# Extract data and attributes
data = dataset[:]
qf = qf[:]
Expand Down Expand Up @@ -123,6 +142,24 @@ def h5_to_geotiff(
return output_path


def transform(da: xr.DataArray):
"""Return Affice transformation"""
left, bottom, right, top = (
da["x"].min(),
da["y"].min(),
da["x"].max(),
da["y"].max(),
)
height, width = da.shape

return from_origin(
left,
top,
(right - left) / width,
(top - bottom) / height,
)


def _pivot_paths_by_date(paths: List[Path]):
"""Return dictionary of paths by date

Expand All @@ -143,8 +180,7 @@ def _pivot_paths_by_date(paths: List[Path]):
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
def bm_raster(
gdf: geopandas.GeoDataFrame,
/,
product_id: ProductId,
product_id: Product,
date_range: datetime.date | List[datetime.date],
bearer: str,
variable: Optional[str] = None,
Expand Down Expand Up @@ -223,10 +259,13 @@ def bm_raster(
if not isinstance(date_range, list):
date_range = [date_range]

if variable is None:
variable = VARIABLE_DEFAULT.get(product_id)

match product_id:
case ProductId.VNP46A3:
case Product.VNP46A3:
date_range = sorted(set([d.replace(day=1) for d in date_range]))
case ProductId.VNP46A4:
case Product.VNP46A4:
date_range = sorted(set([d.replace(day=1, month=1) for d in date_range]))

# Download and construct Dataset
Expand All @@ -240,7 +279,16 @@ def bm_raster(
filenames = _pivot_paths_by_date(pathnames).get(date)

# Open each GeoTIFF file as a DataArray and store in a list
da = [rioxarray.open_rasterio(h5_to_geotiff(f, d)) for f in filenames]
da = [
rioxarray.open_rasterio(
h5_to_geotiff(
f,
variable=variable,
output_directory=d,
)
)
for f in filenames
]
ds = merge_arrays(da)
ds = ds.rio.clip(gdf.geometry.apply(mapping), gdf.crs, drop=True)
ds["time"] = pd.to_datetime(date)
Expand All @@ -250,14 +298,16 @@ def bm_raster(
# Stack the individual dates along "time" dimension
ds = (
xr.concat(dx, dim="time", combine_attrs="drop_conflicts")
.to_dataset(name="radiance", promote_attrs=True)
.to_dataset(name=variable, promote_attrs=True)
.sortby("time")
.drop(["band", "spatial_ref"])
.assign_attrs(
)
if variable in VARIABLE_DEFAULT.values():
ds.assign_attrs(
short_name="Radiance",
long_name=variable,
units="Watts per square meter per steradian (W/m²/sr)",
description="Radiance",
)
)
ds.radiance.attrs["units"] = "W/m²/sr"
ds[variable].attrs = {"units": "W/m²/sr"}

return ds
Loading