From 70e5145589cffa83f40e93922f1c8c883c28e4ed Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 18 Mar 2024 13:54:25 -0400 Subject: [PATCH 1/5] WIP - add nrcan-gridded-obs_cf_attrs.json --- .../configs/nrcan-gridded-obs_cf_attrs.json | 131 ++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 miranda/convert/configs/nrcan-gridded-obs_cf_attrs.json diff --git a/miranda/convert/configs/nrcan-gridded-obs_cf_attrs.json b/miranda/convert/configs/nrcan-gridded-obs_cf_attrs.json new file mode 100644 index 00000000..78bc9212 --- /dev/null +++ b/miranda/convert/configs/nrcan-gridded-obs_cf_attrs.json @@ -0,0 +1,131 @@ +{ + "Header": { + "Conventions": "CF-1.9", + "_miranda_version": true, + "_missing_values": [ + "-999." + ], + "_variable": true, + "acknowledgement": "This data is provided by Natural Resources Canada (NRCan).", + "author": "Natural Resources Canada", + "citation": "When referring to the ANUSPLIN gridded climate data set, the authors provide this data under the Environment and Climate Change Canada Data Server End Use License () and demand that any references to or derivative works based on this data set be cited as follows: McKenney, D.W., Hutchinson, M., Papadopol, P., Lawrence, K., Pedlar, J.H., Campbell, K.L., Milewska, E., Hopkinson, R.F., Price, D., Owen, T., 2011a. Customized spatial climate models for North America. Bulletin of the American Meteorological Society 92(12), pp. 1611-1622. https://doi.org/10.1175/2011BAMS3132.1", + "contact": "dan.mckenney@NRCan-RNCan.gc.ca", + "documentation": "https://www.canada.ca/en/environment-climate-change/services/climate-change/canadian-centre-climate-services/display-download/technical-documentation-adjusted-climate-data.html", + "domain": "CAN", + "frequency": "day", + "institution": "GovCan", + "license": "https://open.canada.ca/en/open-government-licence-canada", + "license_type": "permissive", + "organization": "NRCan", + "processing_level": "raw", + "product": "Natural Resources Canada ANUSPLIN interpolated historical climate model dataset", + "realm": "atmos", + "references:": "McKenney, D.W., M.F. Hutchinson, P. Papadopol, K. Lawrence, J. Pedlar, K. Campbell, E. Milewska, R.F. Hopkinson, D. Price, and T. Owen, 2011: Customized Spatial Climate Models for North America. Bull. Amer. Meteor. Soc., 92, 1611–1622, https://doi.org/10.1175/2011BAMS3132.1", + "source": "nrcan", + "title": "NRCan ANUSPLIN 10-Km Gridded Climate dataset", + "table_date": "2024-03-06", + "table_id": "nrcan-gridded-obs", + "type": "reconstruction" + }, + "dimensions:": { + "lat": { + "axis": "Y", + "long_name": "Latitude", + "standard_name": "latitude", + "units": "degrees_north" + }, + "long": { + "_cf_dimension_name": "lon", + "axis": "X", + "long_name": "Longitude", + "standard_name": "longitude", + "units": "degrees_east" + }, + "time": { + "axis": "T", + "calendar": "gregorian", + "long_name": "Time", + "standard_name": "time" + } + }, + "variables": { + "dm": { + "_cf_variable_name": "tas", + "add_offset": 273.15, + "cell_methods": "time: mean", + "comments": "Station data converted from Mean Temp (°C)", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Near-Surface Air Temperature", + "original_field": "Mean Temp (°C)", + "standard_name": "air_temperature", + "type": "real", + "units": "K" + }, + "dn": { + "_cf_variable_name": "tasmin", + "add_offset": 273.15, + "cell_methods": "time: minimum", + "comments": "Station data converted from Min Temp (°C)", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Daily Minimum Near-Surface Air Temperature", + "original_field": "Min Temp (°C)", + "standard_name": "air_temperature", + "type": "real", + "units": "K" + }, + "dr": { + "_cf_variable_name": "prlp", + "cell_methods": "time: mean", + "comments": "Station data converted from Total Rain (mm) using a density of 1000 kg/m³", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Liquid Precipitation", + "original_field": "Total Rain (mm)", + "scale_factor": 1.1574074074074073e-05, + "standard_name": "rainfall_flux", + "type": "real", + "units": "kg m-2 s-1" + }, + "ds": { + "_cf_variable_name": "prsn", + "cell_methods": "time: mean", + "comments": "station data converted from Total Snow (cm) using a density of 100 kg/m³", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Snowfall Flux", + "original_field": "Total Snow (cm)", + "scale_factor": 1.1574074074074073e-05, + "standard_name": "snowfall_flux", + "type": "real", + "units": "kg m-2 s-1" + }, + "dt": { + "_cf_variable_name": "pr", + "cell_methods": "time: mean", + "comments": "Station data converted from Total Precip (mm) using a density of 1000 kg/m³", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Precipitation", + "original_field": "Total Precip (mm)", + "scale_factor": 1.1574074074074073e-05, + "standard_name": "precipitation_flux", + "type": "real", + "units": "kg m-2 s-1" + }, + "dx": { + "_cf_variable_name": "tasmax", + "add_offset": 273.15, + "cell_methods": "time: maximum", + "comments": "station data converted from Max Temp (°C)", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Daily Maximum Near-Surface Air Temperature", + "original_field": "Max Temp (°C)", + "standard_name": "air_temperature", + "type": "real", + "units": "K" + } + } +} From 1190b75f09579005533e2a4bcfdc18ea9efb4313 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 18 Mar 2024 14:56:24 -0400 Subject: [PATCH 2/5] fix some checking logic --- miranda/convert/corrections.py | 22 +++++++++++----------- miranda/io/_input.py | 29 ++++++++++++++++------------- templates/restructure_datasets.py | 2 +- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/miranda/convert/corrections.py b/miranda/convert/corrections.py index 0fcce7d4..5b74590b 100644 --- a/miranda/convert/corrections.py +++ b/miranda/convert/corrections.py @@ -4,8 +4,8 @@ import datetime import os +import pathlib from functools import partial -from pathlib import Path from typing import Callable, Iterator, Sequence import xarray as xr @@ -27,7 +27,7 @@ ) from miranda.treatments.utils import load_json_data_mappings -CONFIG_FOLDER = Path(__file__).parent / "data" +CONFIG_FOLDER = pathlib.Path(__file__).parent / "data" CONFIG_FILES = { "EMDNA": "emdna_cf_attrs.json", "ESPO-G6-E5L": "espo-g6-e5l_attrs.json", @@ -89,9 +89,9 @@ def dataset_corrections(ds: xr.Dataset, project: str) -> xr.Dataset: def dataset_conversion( input_files: ( str - | os.PathLike - | Sequence[str | os.PathLike] - | Iterator[os.PathLike] + | pathlib.Path + | Sequence[str | pathlib.Path] + | Iterator[pathlib.Path] | xr.Dataset ), project: str, @@ -137,14 +137,14 @@ def dataset_conversion( ds = input_files else: if isinstance(input_files, (str, os.PathLike)): - if Path(input_files).is_dir(): + if pathlib.Path(input_files).is_dir(): files = [] - files.extend([f for f in Path(input_files).glob("*.nc")]) - files.extend([f for f in Path(input_files).glob("*.zarr")]) + files.extend([f for f in pathlib.Path(input_files).glob("*.nc")]) + files.extend([f for f in pathlib.Path(input_files).glob("*.zarr")]) else: - files = [Path(input_files)] + files = [pathlib.Path(input_files)] elif isinstance(input_files, (Sequence, Iterator)): - files = [Path(f) for f in input_files] + files = [pathlib.Path(f) for f in input_files] else: files = input_files version_hashes = dict() @@ -175,7 +175,7 @@ def dataset_conversion( if domain: ds = subset_domain(ds, domain) - if isinstance(mask, (str, Path)): + if isinstance(mask, (str, pathlib.Path)): mask = xr.open_dataset(mask) if isinstance(mask, (xr.Dataset, xr.DataArray)): if regrid: diff --git a/miranda/io/_input.py b/miranda/io/_input.py index d9b0141b..9b54f858 100644 --- a/miranda/io/_input.py +++ b/miranda/io/_input.py @@ -1,8 +1,7 @@ from __future__ import annotations import logging.config -import os -from pathlib import Path +import pathlib from types import GeneratorType import netCDF4 as nc # noqa @@ -20,10 +19,10 @@ # FIXME: How are these two functions different? def discover_data( - input_files: str | os.PathLike | list[str | os.PathLike] | GeneratorType, + input_files: str | pathlib.Path | list[str | pathlib.Path] | GeneratorType, suffix: str = "nc", recurse: bool = True, -) -> list[Path] | GeneratorType: +) -> list[pathlib.Path] | GeneratorType: """Discover data. Parameters @@ -43,20 +42,20 @@ def discover_data( -------- Recursion through ".zarr" files is explicitly disabled. Recursive globs and generators will not be expanded/sorted. """ - if isinstance(input_files, (Path, str)): - input_files = Path(input_files) + if isinstance(input_files, (pathlib.Path, str)): + input_files = pathlib.Path(input_files) if input_files.is_dir(): if suffix.endswith("zarr") or not recurse: input_files = sorted(list(input_files.glob(f"*.{suffix}"))) else: input_files = input_files.rglob(f"*.{suffix}") - if input_files.is_file(): + elif input_files.is_file(): logging.warning( "Data discovery yielded a single file. Casting to `list[Path]`." ) input_files = [input_files] elif isinstance(input_files, list): - input_files = sorted(Path(p) for p in input_files) + input_files = sorted(pathlib.Path(p) for p in input_files) elif isinstance(input_files, GeneratorType): logging.warning( "A Generator was passed to `discover_data`. Passing object along..." @@ -68,11 +67,11 @@ def discover_data( def find_filepaths( - source: str | Path | GeneratorType | list[Path | str], + source: str | pathlib.Path | GeneratorType | list[pathlib.Path | str], recursive: bool = True, file_suffixes: str | list[str] | None = None, **_, -) -> list[Path]: +) -> list[pathlib.Path]: """Find all available filepaths at a given source. Parameters @@ -91,7 +90,7 @@ def find_filepaths( file_suffixes = [file_suffixes] found = list() - if isinstance(source, (Path, str)): + if isinstance(source, (pathlib.Path, str)): source = [source] for location in source: @@ -99,9 +98,13 @@ def find_filepaths( if "*" not in pattern: pattern = f"*{pattern}*" if recursive: - found.extend([f for f in Path(location).expanduser().rglob(pattern)]) + found.extend( + [f for f in pathlib.Path(location).expanduser().rglob(pattern)] + ) elif not recursive: - found.extend([f for f in Path(location).expanduser().glob(pattern)]) + found.extend( + [f for f in pathlib.Path(location).expanduser().glob(pattern)] + ) else: raise ValueError(f"Recursive: {recursive}") diff --git a/templates/restructure_datasets.py b/templates/restructure_datasets.py index d10fa8dc..f0d45ee9 100644 --- a/templates/restructure_datasets.py +++ b/templates/restructure_datasets.py @@ -17,5 +17,5 @@ guess=False, method="copy", make_dirs=True, - filename_pattern="*.zarr", + suffix="zarr", ) From 4bdd9c395601bbe47d3eaedea73f71d37fc7770a Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 18 Mar 2024 15:20:24 -0400 Subject: [PATCH 3/5] correct variable transform and metadata --- .../configs/nrcan-gridded-obs_cf_attrs.json | 80 +++++-------------- 1 file changed, 22 insertions(+), 58 deletions(-) diff --git a/miranda/convert/configs/nrcan-gridded-obs_cf_attrs.json b/miranda/convert/configs/nrcan-gridded-obs_cf_attrs.json index 78bc9212..84a96fe1 100644 --- a/miranda/convert/configs/nrcan-gridded-obs_cf_attrs.json +++ b/miranda/convert/configs/nrcan-gridded-obs_cf_attrs.json @@ -43,89 +43,53 @@ }, "time": { "axis": "T", - "calendar": "gregorian", + "calendar": "standard", "long_name": "Time", "standard_name": "time" } }, "variables": { - "dm": { - "_cf_variable_name": "tas", - "add_offset": 273.15, - "cell_methods": "time: mean", - "comments": "Station data converted from Mean Temp (°C)", + "maxt": { + "_cf_variable_name": "tasmax", + "_invert_sign": false, + "_offset_time": false, + "_transformation": "op + 273.15", + "cell_methods": "time: maximum", "frequency": "day", "grid_mapping": "regular_lon_lat", - "long_name": "Near-Surface Air Temperature", - "original_field": "Mean Temp (°C)", + "long_name": "Daily Maximum Near-Surface Air Temperature", + "original_units": "°C", + "original_field": "Maximum Temperature / températures maximales", "standard_name": "air_temperature", - "type": "real", "units": "K" }, - "dn": { + "mint": { "_cf_variable_name": "tasmin", - "add_offset": 273.15, + "_invert_sign": false, + "_offset_time": false, + "_transformation": "op + 273.15", "cell_methods": "time: minimum", - "comments": "Station data converted from Min Temp (°C)", "frequency": "day", "grid_mapping": "regular_lon_lat", - "long_name": "Daily Minimum Near-Surface Air Temperature", + "long_name": "Minimum Temperature / températures minimales", + "original_units": "°C", "original_field": "Min Temp (°C)", "standard_name": "air_temperature", - "type": "real", "units": "K" }, - "dr": { - "_cf_variable_name": "prlp", - "cell_methods": "time: mean", - "comments": "Station data converted from Total Rain (mm) using a density of 1000 kg/m³", - "frequency": "day", - "grid_mapping": "regular_lon_lat", - "long_name": "Liquid Precipitation", - "original_field": "Total Rain (mm)", - "scale_factor": 1.1574074074074073e-05, - "standard_name": "rainfall_flux", - "type": "real", - "units": "kg m-2 s-1" - }, - "ds": { - "_cf_variable_name": "prsn", - "cell_methods": "time: mean", - "comments": "station data converted from Total Snow (cm) using a density of 100 kg/m³", - "frequency": "day", - "grid_mapping": "regular_lon_lat", - "long_name": "Snowfall Flux", - "original_field": "Total Snow (cm)", - "scale_factor": 1.1574074074074073e-05, - "standard_name": "snowfall_flux", - "type": "real", - "units": "kg m-2 s-1" - }, - "dt": { + "pcp": { "_cf_variable_name": "pr", + "_invert_sign": false, + "_offset_time": false, + "_transformation": "amount2rate", "cell_methods": "time: mean", - "comments": "Station data converted from Total Precip (mm) using a density of 1000 kg/m³", "frequency": "day", "grid_mapping": "regular_lon_lat", "long_name": "Precipitation", - "original_field": "Total Precip (mm)", - "scale_factor": 1.1574074074074073e-05, + "original_field": "precipitation / précipitations", + "original_units": "mm", "standard_name": "precipitation_flux", - "type": "real", "units": "kg m-2 s-1" - }, - "dx": { - "_cf_variable_name": "tasmax", - "add_offset": 273.15, - "cell_methods": "time: maximum", - "comments": "station data converted from Max Temp (°C)", - "frequency": "day", - "grid_mapping": "regular_lon_lat", - "long_name": "Daily Maximum Near-Surface Air Temperature", - "original_field": "Max Temp (°C)", - "standard_name": "air_temperature", - "type": "real", - "units": "K" } } } From 3a2b4c6c6873a34931423074d97f205502630bf4 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 6 Jan 2025 10:29:00 -0500 Subject: [PATCH 4/5] ignore excluded --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index b8db2958..df1c0c56 100644 --- a/.gitignore +++ b/.gitignore @@ -108,3 +108,6 @@ ENV/ # IDE settings .vscode/ .idea/ + +# Manually excluded folders +excluded/ From 264ba2cbcfec3bfebadf0d1f60eb833dffa02b91 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 6 Jan 2025 10:29:26 -0500 Subject: [PATCH 5/5] ignore downloaded --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index df1c0c56..57ce0fd4 100644 --- a/.gitignore +++ b/.gitignore @@ -111,3 +111,4 @@ ENV/ # Manually excluded folders excluded/ +templates/downloaded/