From 22e9846ef60398793e4cd109f55ba2ea6901d3bc Mon Sep 17 00:00:00 2001 From: Gabriel Stefanini Vicente Date: Wed, 6 Dec 2023 20:20:43 -0500 Subject: [PATCH] Amend raster --- blackmarblepy/.DS_Store | Bin 6148 -> 0 bytes docs/_toc.yml | 1 + src/blackmarble/download.py | 17 +++--- src/blackmarble/extract.py | 31 +++------- src/blackmarble/raster.py | 116 ++++++++++++++++++++++++++---------- src/blackmarble/types.py | 2 +- 6 files changed, 101 insertions(+), 66 deletions(-) delete mode 100644 blackmarblepy/.DS_Store diff --git a/blackmarblepy/.DS_Store b/blackmarblepy/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 pd.DataFrame: """Retrieve NASA Black Marble data manifest. i.d., download links. Parameters ---------- - product_id: ProductId + product_id: Product NASA Black Marble product suite (VNP46) identifier date_range: datetime.date | List[datetime.date] @@ -64,14 +64,14 @@ def _retrieve_manifest( if isinstance(date_range, datetime.date): date_range = [date_range] if isinstance(product_id, str): - product_id = ProductId(product_id) + product_id = Product(product_id) urlpaths = set() for date in date_range: match product_id: - case ProductId.VNP46A3: # if VNP46A3 then first day of the month + case Product.VNP46A3: # if VNP46A3 then first day of the month tm_yday = date.replace(day=1).timetuple().tm_yday - case ProductId.VNP46A4: # if VNP46A4 then first day of the year + case Product.VNP46A4: # if VNP46A4 then first day of the year tm_yday = date.replace(month=1, day=1).timetuple().tm_yday case _: tm_yday = date.timetuple().tm_yday @@ -150,7 +150,7 @@ def _download(self, names: List[str], n_jobs: int = 16): def download( self, gdf: geopandas.GeoDataFrame, - product_id: ProductId, + product_id: Product, date_range: datetime.date | List[datetime.date], skip_if_exists: bool = True, ): @@ -161,7 +161,7 @@ def download( gdf: geopandas.GeoDataFrame Region of Interest - product: ProductId + product: Product Nasa Black Marble Product Id (e.g, VNP46A1) skip_if_exists: bool, default=True @@ -170,7 +170,6 @@ def download( gdf = geopandas.overlay( gdf.to_crs("EPSG:4326").dissolve(), self.TILES, how="intersection" ) - bm_files_df = self._retrieve_manifest(product_id, date_range) bm_files_df = bm_files_df[ bm_files_df["name"].str.contains("|".join(gdf["TileID"])) diff --git a/src/blackmarble/extract.py b/src/blackmarble/extract.py index e0c94ad..cdc6343 100644 --- a/src/blackmarble/extract.py +++ b/src/blackmarble/extract.py @@ -5,19 +5,18 @@ import geopandas import numpy as np import pandas as pd -from rasterio.transform import from_origin from rasterstats import zonal_stats -from .raster import bm_raster -from .types import ProductId +from .raster import bm_raster, transform +from .types import Product def bm_extract( roi: geopandas.GeoDataFrame, - product_id: ProductId, + product_id: Product, date_range: datetime.date | List[datetime.date], bearer: str, - aggfun: str | List[str] = ["mean"], + aggfunc: str | List[str] = ["mean"], variable: Optional[str] = None, quality_flag_rm: List[int] = [255], check_all_tiles_exist: bool = True, @@ -107,28 +106,14 @@ def bm_extract( results = [] for t in ds["time"]: - data = ds.radiance.sel(time=t) - - left, bottom, right, top = ( - ds["x"].min(), - ds["y"].min(), - ds["x"].max(), - ds["y"].max(), - ) - height, width = data.shape - transform = from_origin( - left, - top, - (right - left) / width, - (top - bottom) / height, - ) + da = ds[variable].sel(time=t) zs = zonal_stats( roi, - data.values, + da.values, nodata=np.nan, - affine=transform, - stats=aggfun, + affine=transform(da), + stats=aggfunc, ) zs = pd.DataFrame(zs).add_prefix("ntl_") zs = pd.concat([roi, zs], axis=1) diff --git a/src/blackmarble/raster.py b/src/blackmarble/raster.py index 9527a68..cfd0d84 100644 --- a/src/blackmarble/raster.py +++ b/src/blackmarble/raster.py @@ -18,62 +18,75 @@ from tqdm.auto import tqdm from .download import BlackMarbleDownloader -from .types import ProductId +from .types import Product + + +VARIABLE_DEFAULT = { + Product.VNP46A1: "DNB_At_Sensor_Radiance_500m", + Product.VNP46A2: "Gap_Filled_DNB_BRDF-Corrected_NTL", + Product.VNP46A3: "NearNadir_Composite_Snow_Free", + Product.VNP46A4: "NearNadir_Composite_Snow_Free", +} def h5_to_geotiff( f: Path, /, - output_directory: Path = None, - output_prefix: str = None, variable: str = None, quality_flag_rm=[255], + output_directory: Path = None, + output_prefix: str = None, ): """ Convert HDF5 file to GeoTIFF for a selected (or default) variable from NASA Black Marble data Parameters ---------- + f: Path + H5DF filename + + variable: str, default = None + Variable to create GeoTIFF raster. Further information, pleae see the `NASA Black Marble User Guide `_ for `VNP46A1`, see Table 3; for `VNP46A2` see Table 6; for `VNP46A3` and `VNP46A4`, see Table 9. By default, it uses the following default variables: + - For ``VNP46A1``, uses ``DNB_At_Sensor_Radiance_500m`` + - For ``VNP46A2``, uses ``Gap_Filled_DNB_BRDF-Corrected_NTL`` + - For ``VNP46A3``, uses ``NearNadir_Composite_Snow_Free``. + - For ``VNP46A4``, uses ``NearNadir_Composite_Snow_Free``. Returns ------ output_path: Path Path to which GeoTIFF """ - variable_default = { - ProductId.VNP46A1: "DNB_At_Sensor_Radiance_500m", - ProductId.VNP46A2: "Gap_Filled_DNB_BRDF-Corrected_NTL", - ProductId.VNP46A3: "NearNadir_Composite_Snow_Free", - ProductId.VNP46A4: "NearNadir_Composite_Snow_Free", - } output_path = Path(output_directory, f.stem).with_suffix(".tif") - product_id = ProductId(f.stem.split(".")[0]) - variable = variable_default.get(product_id) + product_id = Product(f.stem.split(".")[0]) + + if variable is None: + variable = VARIABLE_DEFAULT.get(product_id) with h5py.File(f, "r") as h5_data: attrs = h5_data.attrs - if product_id in [ProductId.VNP46A1, ProductId.VNP46A2]: + if product_id in [Product.VNP46A1, Product.VNP46A2]: dataset = h5_data["HDFEOS"]["GRIDS"]["VNP_Grid_DNB"]["Data Fields"][ variable ] - qf = h5_data["HDFEOS"]["GRIDS"]["VNP_Grid_DNB"]["Data Fields"][ - "Mandatory_Quality_Flag" - ] + left, bottom, right, top = ( attrs.get("WestBoundingCoord"), attrs.get("SouthBoundingCoord"), attrs.get("EastBoundingCoord"), attrs.get("NorthBoundingCoord"), ) + + qf = h5_data["HDFEOS"]["GRIDS"]["VNP_Grid_DNB"]["Data Fields"][ + "Mandatory_Quality_Flag" + ] else: dataset = h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"][ variable ] - h5_names = list( - h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"].keys() - ) + lat = h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"]["lat"] lon = h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"]["lon"] left, bottom, right, top = min(lon), min(lat), max(lon), max(lat) @@ -83,13 +96,19 @@ def h5_to_geotiff( variable_short = re.sub("_Num", "", variable_short) variable_short = re.sub("_Std", "", variable_short) - qf_name = variable_short + "_Quality" - - if qf_name in h5_names: + h5_names = list( + h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"][ + "Data Fields" + ].keys() + ) + if (qf_name := f"{variable_short}_Quality") in h5_names: qf = h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"][ - variable + "_Quality" + qf_name + ] + if variable in h5_names: + qf = h5_data["HDFEOS"]["GRIDS"]["VIIRS_Grid_DNB_2d"]["Data Fields"][ + variable ] - # Extract data and attributes data = dataset[:] qf = qf[:] @@ -123,6 +142,24 @@ def h5_to_geotiff( return output_path +def transform(da: xr.DataArray): + """Return Affice transformation""" + left, bottom, right, top = ( + da["x"].min(), + da["y"].min(), + da["x"].max(), + da["y"].max(), + ) + height, width = da.shape + + return from_origin( + left, + top, + (right - left) / width, + (top - bottom) / height, + ) + + def _pivot_paths_by_date(paths: List[Path]): """Return dictionary of paths by date @@ -143,8 +180,7 @@ def _pivot_paths_by_date(paths: List[Path]): @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) def bm_raster( gdf: geopandas.GeoDataFrame, - /, - product_id: ProductId, + product_id: Product, date_range: datetime.date | List[datetime.date], bearer: str, variable: Optional[str] = None, @@ -223,10 +259,13 @@ def bm_raster( if not isinstance(date_range, list): date_range = [date_range] + if variable is None: + variable = VARIABLE_DEFAULT.get(product_id) + match product_id: - case ProductId.VNP46A3: + case Product.VNP46A3: date_range = sorted(set([d.replace(day=1) for d in date_range])) - case ProductId.VNP46A4: + case Product.VNP46A4: date_range = sorted(set([d.replace(day=1, month=1) for d in date_range])) # Download and construct Dataset @@ -240,7 +279,16 @@ def bm_raster( filenames = _pivot_paths_by_date(pathnames).get(date) # Open each GeoTIFF file as a DataArray and store in a list - da = [rioxarray.open_rasterio(h5_to_geotiff(f, d)) for f in filenames] + da = [ + rioxarray.open_rasterio( + h5_to_geotiff( + f, + variable=variable, + output_directory=d, + ) + ) + for f in filenames + ] ds = merge_arrays(da) ds = ds.rio.clip(gdf.geometry.apply(mapping), gdf.crs, drop=True) ds["time"] = pd.to_datetime(date) @@ -250,14 +298,16 @@ def bm_raster( # Stack the individual dates along "time" dimension ds = ( xr.concat(dx, dim="time", combine_attrs="drop_conflicts") - .to_dataset(name="radiance", promote_attrs=True) + .to_dataset(name=variable, promote_attrs=True) .sortby("time") .drop(["band", "spatial_ref"]) - .assign_attrs( + ) + if variable in VARIABLE_DEFAULT.values(): + ds.assign_attrs( + short_name="Radiance", + long_name=variable, units="Watts per square meter per steradian (W/m²/sr)", - description="Radiance", ) - ) - ds.radiance.attrs["units"] = "W/m²/sr" + ds[variable].attrs = {"units": "W/m²/sr"} return ds diff --git a/src/blackmarble/types.py b/src/blackmarble/types.py index 21a33c5..c847dc4 100644 --- a/src/blackmarble/types.py +++ b/src/blackmarble/types.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, validator -class ProductId(Enum): +class Product(Enum): """NASA Black Marble product suite (VNP46)""" VNP46A1 = "VNP46A1"