diff --git a/.cruft.json b/.cruft.json index 4c8ccbea..d1d98051 100644 --- a/.cruft.json +++ b/.cruft.json @@ -1,6 +1,6 @@ { "template": "https://github.com/Ouranosinc/cookiecutter-pypackage", - "commit": "1d9ee5f08d3e8e4f78a4aabb75e2ce4eff8750bf", + "commit": "63f44fcbfe2e16118a4fa6b09fe847aa44e0715a", "checkout": null, "context": { "cookiecutter": { diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml index 1298f336..aa1e1a86 100644 --- a/.github/workflows/bump-version.yml +++ b/.github/workflows/bump-version.yml @@ -56,7 +56,7 @@ jobs: github.com:443 pypi.org:443 - name: Checkout Repository (no persist-credentials) - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false fetch-depth: 0 @@ -68,23 +68,20 @@ jobs: run: | git config --local user.email "bumpversion[bot]@ouranos.ca" git config --local user.name "bumpversion[bot]" - - name: Current Version - run: | - CURRENT_VERSION="$(grep -E '__version__' src/miranda/__init__.py | cut -d ' ' -f3)" - echo "CURRENT_VERSION=${CURRENT_VERSION}" >> $GITHUB_ENV - name: Install CI libraries run: | python -m pip install --require-hashes -r CI/requirements_ci.txt - name: Conditional Bump Version run: | - if [[ ${{ env.CURRENT_VERSION }} =~ -dev(\.\d+)? ]]; then + CURRENT_VERSION=$(bump-my-version show current_version) + if [[ ${CURRENT_VERSION} =~ -dev(\.\d+)? ]]; then echo "Development version (ends in 'dev(\.\d+)?'), bumping 'build' version" bump-my-version bump build else echo "Version is stable, bumping 'patch' version" bump-my-version bump patch fi - bump-my-version show-bump + echo "new_version=$(bump-my-version show current_version)" - name: Push Changes uses: ad-m/github-push-action@d91a481090679876dfc4178fef17f286781251df # v0.8.0 with: diff --git a/.github/workflows/cache-cleaner.yml b/.github/workflows/cache-cleaner.yml index 825fa33f..3b6d68ec 100644 --- a/.github/workflows/cache-cleaner.yml +++ b/.github/workflows/cache-cleaner.yml @@ -26,7 +26,9 @@ jobs: objects.githubusercontent.com:443 - name: Checkout Repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false - name: Cleanup run: | diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index f8af451a..40b47c3c 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -13,7 +13,8 @@ name: "CodeQL" on: push: - branches: [ "main" ] + branches: + - main paths-ignore: - ../../CHANGELOG.rst - pyproject.toml @@ -43,6 +44,8 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + with: + persist-credentials: false # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 32eaf0ca..b5f9acea 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -28,7 +28,9 @@ jobs: github.com:443 - name: Checkout Repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false - name: Dependency Review uses: actions/dependency-review-action@3b139cfc5fae8b618d3eae3675e383bb1769c019 # v4.5.0 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8d173938..86ddc6b8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -38,7 +38,9 @@ jobs: with: egress-policy: audit - name: Checkout Repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false - name: Set up Python${{ matrix.python-version }} uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: @@ -60,14 +62,21 @@ jobs: strategy: matrix: os: [ 'ubuntu-latest' ] - python-version: [ "3.9", "3.10", "3.11", "3.12" ] # "3.13" + python-version: + - "3.9" + - "3.10" + - "3.11" + - "3.12" + # - "3.13" steps: - name: Harden Runner uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2 with: egress-policy: audit - name: Checkout Repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: @@ -102,7 +111,12 @@ jobs: strategy: matrix: os: [ 'ubuntu-latest' ] - python-version: [ "3.9", "3.10", "3.11", "3.12" ] + python-version: + - "3.9" + - "3.10" + - "3.11" + - "3.12" + # - "3.13" defaults: run: shell: bash -l {0} @@ -112,7 +126,9 @@ jobs: with: egress-policy: audit - name: Checkout Repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false - name: Setup Conda (Micromamba) with Python${{ matrix.python-version }} uses: mamba-org/setup-micromamba@068f1ab4b37ed9b3d9f73da7db90a0cda0a48d29 # v2.0.3 with: @@ -120,7 +136,6 @@ jobs: environment-file: environment-dev.yml create-args: >- python=${{ matrix.python-version }} - micromamba-version: 1.5.10-0 # Pin micromamba version because of following issue: https://github.com/mamba-org/setup-micromamba/issues/225 - name: Install miranda run: | python -m pip install --no-deps . diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index 235d064d..b9eab553 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -28,7 +28,9 @@ jobs: pypi.org:443 upload.pypi.org:443 - name: Checkout Repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false - name: Set up Python3 uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 2ca0fdb0..2ed6d774 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -16,7 +16,9 @@ on: - main # Declare default permissions as read only. -permissions: read-all +# Read-all permission is not technically needed for this workflow. +permissions: + contents: read jobs: analysis: @@ -47,7 +49,7 @@ jobs: www.bestpractices.dev:443 - name: Checkout Repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false diff --git a/.github/workflows/tag-testpypi.yml b/.github/workflows/tag-testpypi.yml index 1fd111bb..0bf53932 100644 --- a/.github/workflows/tag-testpypi.yml +++ b/.github/workflows/tag-testpypi.yml @@ -21,7 +21,9 @@ jobs: with: egress-policy: audit - name: Checkout Repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false - name: Create Release uses: softprops/action-gh-release@7b4da11513bf3f43f9999e90eabced41ab8bb048 # 2.2.0 env: @@ -52,7 +54,9 @@ jobs: pypi.org:443 test.pypi.org:443 - name: Checkout Repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false - name: Set up Python3 uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 35300cf7..64517d23 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,7 +6,7 @@ repos: rev: v3.19.0 hooks: - id: pyupgrade - args: [ '--py38-plus' ] + args: [ '--py39-plus' ] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: @@ -41,7 +41,7 @@ repos: hooks: - id: isort - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.1 + rev: v0.8.2 hooks: - id: ruff args: [ '--fix' ] @@ -78,6 +78,11 @@ repos: hooks: - id: check-github-workflows - id: check-readthedocs + - repo: https://github.com/woodruffw/zizmor-pre-commit + rev: v0.8.0 + hooks: + - id: zizmor + args: [ '--config=.zizmor.yml' ] - repo: meta hooks: - id: check-hooks-apply diff --git a/.zizmor.yml b/.zizmor.yml new file mode 100644 index 00000000..6ac32154 --- /dev/null +++ b/.zizmor.yml @@ -0,0 +1,6 @@ +rules: + dangerous-triggers: + ignore: + - label.yml:9 + - first-pull-request.yml:3 + - workflow-warning.yml:3 diff --git a/CI/requirements_ci.in b/CI/requirements_ci.in index 6c0f500d..291e299f 100644 --- a/CI/requirements_ci.in +++ b/CI/requirements_ci.in @@ -1,6 +1,6 @@ -bump-my-version==0.27.0 +bump-my-version==0.28.0 coveralls==4.0.1 pip==24.3.1 flit==3.9.0 tox==4.23.2 -tox-gh==1.3.2 +tox-gh==1.4.4 diff --git a/CI/requirements_ci.txt b/CI/requirements_ci.txt index 1ecaf346..9d4c7f7d 100644 --- a/CI/requirements_ci.txt +++ b/CI/requirements_ci.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.9 # by the following command: # # pip-compile --generate-hashes --output-file=CI/requirements_ci.txt CI/requirements_ci.in @@ -12,9 +12,9 @@ bracex==2.4 \ --hash=sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb \ --hash=sha256:efdc71eff95eaff5e0f8cfebe7d01adf2c8637c8c92edaf63ef348c241a82418 # via wcmatch -bump-my-version==0.27.0 \ - --hash=sha256:483c517af91559644d45036648e5d99f4f8c85f8d01394097d3d3e42c9e6acad \ - --hash=sha256:911bfaf7d847d4348844c8fd16f7a11322233fb8dc90123f638069a369003642 +bump-my-version==0.28.0 \ + --hash=sha256:cc84ace477022a4cc8c401ef5c035f2f752df45488be90ccb764a47f7de0e395 \ + --hash=sha256:ff3cb51bb15509ae8ebb8e8efa3eaa7c02209677f45457c8b007ef2f5bef7179 # via -r CI/requirements_ci.in cachetools==5.5.0 \ --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ @@ -399,27 +399,25 @@ tox==4.23.2 \ # via # -r CI/requirements_ci.in # tox-gh -tox-gh==1.3.2 \ - --hash=sha256:beb8d277d5d7c1a1f09c107e4ef80bd7dd2f8f5d020edfaf4c1e3ae8fd45bf6f \ - --hash=sha256:c2d6e977f66712e7cd5e5d1b655a1bd4c91ebaf3be104befdb53c81587292d7e +tox-gh==1.4.4 \ + --hash=sha256:4ea585f66585b90f5826b1677cfc9453747792a0f9ff83d468603bc17556e07b \ + --hash=sha256:b962e0f8c4619e98d11c2a135939876691e148b843b7dac4cff7de1dc4f7c215 # via -r CI/requirements_ci.in typing-extensions==4.12.2 \ --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 # via - # annotated-types # pydantic # pydantic-core - # rich # rich-click # tox urllib3==2.2.2 \ --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \ --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168 # via requests -virtualenv==20.26.6 \ - --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ - --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 +virtualenv==20.27.1 \ + --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \ + --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4 # via tox wcmatch==8.5.2 \ --hash=sha256:17d3ad3758f9d0b5b4dedc770b65420d4dac62e680229c287bf24c9db856a478 \ diff --git a/environment-dev.yml b/environment-dev.yml index f17af588..45b1a76c 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -33,20 +33,20 @@ dependencies: - xesmf - zarr # Dev tools and testing - - pip >=24.2.0 - - bump-my-version >=0.25.1 - - watchdog >=4.0.0 + - pip >=24.3.1 + - black ==24.10.0 + - blackdoc ==0.3.9 + - bump-my-version >=0.28.0 + - coverage >=7.5.0 + - coveralls >=4.0.1 - flake8 >=7.1.1 - flake8-rst-docstrings >=0.3.0 - flit >=3.9.0,<4.0 - - tox >=4.17.1 - - coverage >=7.5.0 - - coveralls >=4.0.1 - - pytest >=8.3.2 - - pytest-cov >=5.0.0 - - black ==24.8.0 - - blackdoc ==0.3.9 - isort ==5.13.2 - numpydoc >=1.8.0 - pre-commit >=3.5.0 - - ruff >=0.5.7 + - pytest >=8.3.2 + - pytest-cov >=5.0.0 + - ruff >=0.8.2 + - tox >=4.23.2 + - watchdog >=4.0.0 diff --git a/environment-docs.yml b/environment-docs.yml index f8442cd8..64ed695a 100644 --- a/environment-docs.yml +++ b/environment-docs.yml @@ -3,16 +3,16 @@ channels: - conda-forge - defaults dependencies: - - python >=3.12,<3.13 - - sphinx >=7.0.0 - - pandoc - - furo >=2023.07.26 - - ipython + - python >=3.9,<3.13 - ipykernel + - ipython - nbsphinx + # Docs + - furo >=2023.07.26 + - pandoc + - sphinx >=7.1.0 - sphinx-autoapi - sphinx-codeautolink - sphinx-copybutton - sphinx-intl - sphinx-mdinclude - - sphinxcontrib-napoleon diff --git a/pyproject.toml b/pyproject.toml index 702825e2..e1cb9d4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ maintainers = [ {name = "Pascal Bourgault", email = "bourgault.pascal@ouranos.ca"} ] readme = {file = "README.rst", content-type = "text/x-rst"} -requires-python = ">=3.8.0" +requires-python = ">=3.9.0" keywords = ["xarray", "climate", "meteorology", "hydrology", "archiving", "collection", "conversion", "miranda"] license = {file = "LICENSE"} classifiers = [ @@ -52,39 +52,37 @@ dependencies = [ [project.optional-dependencies] dev = [ # Dev tools and testing - "pip >=24.2.0", - "bump-my-version >=0.26.0", - "watchdog >=4.0.0", + "black ==24.10.0", + "blackdoc ==0.3.9", + "bump-my-version >=0.28.0", + "coverage >=7.5.0", + "coveralls >=4.0.1", "flake8 >=7.1.1", "flake8-rst-docstrings >=0.3.0", "flit >=3.9.0,<4.0", - "tox >=4.18.0", - "coverage >=7.5.0", - "coveralls >=4.0.1", + "isort ==5.13.2", "mypy", "numpydoc >=1.8.0", + "pip >=24.3.1", + "pre-commit >=3.5.0", "pytest >=8.3.2", "pytest-cov >=5.0.0", - "black ==24.8.0", - "blackdoc ==0.3.9", - "isort ==5.13.2", - "ruff >=0.5.7", - "pre-commit >=3.5.0" + "ruff >=0.8.2", + "tox >=4.23.2", + "watchdog >=4.0.0" ] docs = [ # Documentation and examples + "furo >=2023.07.26", + "ipykernel", + "ipython", + "jupyter_client", + "nbsphinx", "sphinx >=7.0.0", - "sphinx_codeautolink", - "sphinx_copybutton", "sphinx-intl", "sphinx-mdinclude", - "sphinxcontrib-napoleon", - "nbsphinx", - "pandoc", - "ipython", - "ipykernel", - "jupyter_client", - "furo >=2023.07.26" + "sphinx_codeautolink", + "sphinx_copybutton" ] gis = [ # GIS library support @@ -234,6 +232,7 @@ exclude = [ ".pre-commit-config.yaml", ".readthedocs.yml", ".yamllint.yaml", + ".zizmor.yml", "docs/_*", "docs/apidoc/modules.rst", "docs/apidoc/miranda*.rst", diff --git a/src/miranda/__init__.py b/src/miranda/__init__.py index 955ef0e8..1a36d24e 100644 --- a/src/miranda/__init__.py +++ b/src/miranda/__init__.py @@ -3,7 +3,7 @@ ################################################################################### # Apache Software License 2.0 # -# Copyright (c) 2019-2024, Trevor James Smith +# Copyright (c) 2019-2025, Trevor James Smith # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,10 +31,12 @@ cv, decode, io, + preprocess, scripting, structure, units, utils, validators, + vocabularies, ) from .storage import FileMeta, StorageState diff --git a/src/miranda/archive/_groupings.py b/src/miranda/archive/_groupings.py index 1881427e..d542ced1 100644 --- a/src/miranda/archive/_groupings.py +++ b/src/miranda/archive/_groupings.py @@ -7,14 +7,13 @@ from logging.config import dictConfig from pathlib import Path from types import GeneratorType -from typing import Dict, List, Optional from miranda.scripting import LOGGING_CONFIG from miranda.storage import report_file_size dictConfig(LOGGING_CONFIG) -Nested_List = List[List[Path]] -PathDict = Dict[str, List[Path]] +Nested_List = list[list[Path]] +PathDict = dict[str, list[Path]] GiB = int(pow(2, 30)) diff --git a/src/miranda/convert/__init__.py b/src/miranda/convert/__init__.py index 2c427170..fcaed839 100644 --- a/src/miranda/convert/__init__.py +++ b/src/miranda/convert/__init__.py @@ -2,7 +2,7 @@ from __future__ import annotations -from . import deh, eccc, ecmwf, hq, melcc, utils +from . import deh, eccc_canswe, eccc_rdrs, hq, melcc, utils from ._aggregation import * from ._data_corrections import * from ._data_definitions import * diff --git a/src/miranda/convert/_data_corrections.py b/src/miranda/convert/_data_corrections.py index 0ef24be9..73999530 100644 --- a/src/miranda/convert/_data_corrections.py +++ b/src/miranda/convert/_data_corrections.py @@ -82,7 +82,7 @@ def load_json_data_mappings(project: str) -> dict[str, Any]: ) elif project.startswith("ec"): metadata_definition = json.load( - data_folder.joinpath("eccc_cf_attrs.json").open("r") + data_folder.joinpath("eccc_canswe_cf_attrs.json").open("r") ) elif project in ["NEX-GDDP-CMIP6"]: metadata_definition = json.load( diff --git a/src/miranda/convert/_data_definitions.py b/src/miranda/convert/_data_definitions.py index b72e1214..7af3f009 100644 --- a/src/miranda/convert/_data_definitions.py +++ b/src/miranda/convert/_data_definitions.py @@ -16,13 +16,13 @@ "era5_variables", "gather_agcfsr", "gather_agmerra", + "gather_eccc_rdrs", "gather_ecmwf", "gather_emdna", "gather_grnch", "gather_nex", "gather_nrcan_gridded_obs", "gather_raw_rdrs_by_years", - "gather_rdrs", "gather_sc_earth", "gather_wfdei_gem_capa", "nasa_ag_variables", @@ -33,7 +33,8 @@ "xarray_frequencies_to_cmip6like", ] -_data_folder = Path(__file__).parent / "data" +_data_folder = Path(__file__).resolve().parent / "data" + eccc_rdrs_variables = {} eccc_rdrs_variables["raw"] = [ @@ -85,6 +86,7 @@ # Manually map xarray frequencies to CMIP6/CMIP5 controlled vocabulary. # see: https://github.com/ES-DOC/pyessv-archive xarray_frequencies_to_cmip6like = { + "h": "hr", "H": "hr", "D": "day", "W": "sem", @@ -237,7 +239,7 @@ def gather_sc_earth(path: str | os.PathLike) -> dict[str, list[Path]]: ) -def gather_rdrs( +def gather_eccc_rdrs( name: str, path: str | os.PathLike, suffix: str, key: str ) -> dict[str, dict[str, list[Path]]]: """Gather RDRS processed source data. diff --git a/src/miranda/convert/_reconstruction.py b/src/miranda/convert/_reconstruction.py index 9311ec92..bfa72325 100644 --- a/src/miranda/convert/_reconstruction.py +++ b/src/miranda/convert/_reconstruction.py @@ -17,8 +17,8 @@ from miranda.utils import chunk_iterables from ._aggregation import aggregate as aggregate_func -from ._data_corrections import dataset_corrections from ._data_definitions import project_institutes, xarray_frequencies_to_cmip6like +from .corrections import dataset_corrections logging.config.dictConfig(LOGGING_CONFIG) diff --git a/src/miranda/convert/corrections.py b/src/miranda/convert/corrections.py new file mode 100644 index 00000000..c2c79d97 --- /dev/null +++ b/src/miranda/convert/corrections.py @@ -0,0 +1,201 @@ +"""Dataset corrections submodule.""" + +from __future__ import annotations + +import datetime +import pathlib +from collections.abc import Iterator, Sequence +from functools import partial +from typing import Callable + +import xarray as xr + +from miranda.convert.utils import find_version_hash +from miranda.gis import conservative_regrid, subset_domain, threshold_mask +from miranda.treatments import ( + cf_units_conversion, + clip_values, + correct_unit_names, + dimensions_compliance, + ensure_correct_time_frequency, + invert_value_sign, + metadata_conversion, + offset_time_dimension, + preprocessing_corrections, + transform_values, + variable_conversion, +) +from miranda.treatments.utils import load_json_data_mappings + +CONFIG_FOLDER = pathlib.Path(__file__).parent / "data" +CONFIG_FILES = { + "EMDNA": "emdna_cf_attrs.json", + "ESPO-G6-E5L": "espo-g6-e5l_attrs.json", + "ESPO-G6-R2": "espo-g6-r2_attrs.json", + "NEX-GDDP-CMIP6": "nex-gddp-cmip6_attrs.json", + "agcfsr": "agcfsr_agmerra2_cf_attrs.json", + "agmerra2": "agcfsr_agmerra2_cf_attrs.json", + "cmip": "cmip5_cmip6_cordex_ouranos_attrs.json", + "cordex": "cmip5_cmip6_cordex_ouranos_attrs.json", + "eccc-canswe": "eccc-canswe_cf_attrs.json", + "eccc-ahccd": "eccc-ahccd_cf_attrs.json", + "eccc-obs": "eccc-obs_cf_attrs.json", + "era5-land": "era5_era5-land_cf_attrs.json", + "era5-land-monthly-means": "era5_era5-land_cf_attrs.json", + "era5-pressure-levels": "era5_era5-land_cf_attrs.json", + "era5-pressure-levels-monthly-means": "era5_era5-land_cf_attrs.json", + "era5-pressure-levels-monthly-means-preliminary-back-extension": "era5_era5-land_cf_attrs.json", + "era5-pressure-levels-preliminary-back-extension": "era5_era5-land_cf_attrs.json", + "era5-single-levels": "era5_era5-land_cf_attrs.json", + "era5-single-levels-monthly-means": "era5_era5-land_cf_attrs.json", + "era5-single-levels-monthly-means-preliminary-back-extension": "era5_era5-land_cf_attrs.json", + "era5-single-levels-preliminary-back-extension": "era5_era5-land_cf_attrs.json", + "ets-grnch": "ets-grnch_cf_attrs.json", + "melcc": "melcc_cf_attrs.json", + "rdrs-v21": "eccc-rdrs_cf_attrs.json", + "wfdei-gem-capa": "wfdei-gem-capa_cf_attrs.json", +} +for k, v in CONFIG_FILES.items(): + CONFIG_FILES[k] = CONFIG_FOLDER / v + + +def dataset_corrections(ds: xr.Dataset, project: str) -> xr.Dataset: + """ + Convert variables to CF-compliant format. + + Parameters + ---------- + ds : xr.Dataset + Data to be converted. + project : str + Project name for decoding/handling purposes. + + Returns + ------- + xr.Dataset + The corrected dataset. + """ + metadata_definition = load_json_data_mappings(project, CONFIG_FILES) + + ds = correct_unit_names(ds, project, metadata_definition) + ds = transform_values(ds, project, metadata_definition) + ds = invert_value_sign(ds, project, metadata_definition) + ds = cf_units_conversion(ds, metadata_definition) + ds = clip_values(ds, project, metadata_definition) + + ds = dimensions_compliance(ds, project, metadata_definition) + ds = ensure_correct_time_frequency(ds, project, metadata_definition) + ds = offset_time_dimension(ds, project, metadata_definition) + + ds = variable_conversion(ds, project, metadata_definition) + + ds = metadata_conversion(ds, project, metadata_definition) + + ds.attrs["history"] = ( + f"{datetime.datetime.now()}: " + f"Variables converted from original files using miranda.convert.{dataset_corrections.__name__}. " + f"{ds.attrs.get('history')}".strip() + ) + + return ds + + +def dataset_conversion( + input_files: ( + str + | pathlib.Path + | Sequence[str | pathlib.Path] + | Iterator[pathlib.Path] + | xr.Dataset + ), + project: str, + domain: str | None = None, + mask: xr.Dataset | xr.DataArray | None = None, + mask_cutoff: float | bool = False, + regrid: bool = False, + add_version_hashes: bool = True, + preprocess: Callable | str | None = "auto", + **xr_kwargs, +) -> xr.Dataset | xr.DataArray: + r""" + Convert an existing Xarray-compatible dataset to another format with variable corrections applied. + + Parameters + ---------- + input_files : str or pathlib.Path or Sequence[str or pathlib.Path] or Iterator[pathlib.Path] or xr.Dataset + Files or objects to be converted. + If sent a list or GeneratorType, will open with :py:func:`xarray.open_mfdataset` and concatenate files. + project : {"cordex", "cmip5", "cmip6", "ets-grnch", "isimip-ft", "pcic-candcs-u6", "converted"} + Project name for decoding/handling purposes. + domain : {"global", "nam", "can", "qc", "mtl"}, optional + Domain to perform subsetting for. Default: None. + mask : Optional[Union[xr.Dataset, xr.DataArray]] + DataArray or single data_variable dataset containing mask. + mask_cutoff : float or bool + If land_sea_mask supplied, the threshold above which to mask with land_sea_mask. Default: False. + regrid : bool + Performing regridding with xesmf. Default: False. + add_version_hashes : bool + If True, version name and sha256sum of source file(s) will be added as a field among the global attributes. + preprocess : callable or str, optional + Preprocessing functions to perform over each Dataset. + Default: "auto" - Run preprocessing fixes based on supplied fields from metadata definition. + Callable - Runs function over Dataset (single) or supplied to `preprocess` (multifile dataset). + \*\*xr_kwargs : Any + Arguments passed directly to xarray. + + Returns + ------- + xr.Dataset or xr.DataArray + The corrected dataset. + """ + if isinstance(input_files, xr.Dataset): + ds = input_files + else: + if isinstance(input_files, (str, pathlib.Path)): + if pathlib.Path(input_files).is_dir(): + files = [] + files.extend([f for f in pathlib.Path(input_files).glob("*.nc")]) + files.extend([f for f in pathlib.Path(input_files).glob("*.zarr")]) + else: + files = [pathlib.Path(input_files)] + elif isinstance(input_files, (Sequence, Iterator)): + files = [pathlib.Path(f) for f in input_files] + else: + files = input_files + version_hashes = dict() + if add_version_hashes: + for file in files: + version_hashes[file.name] = find_version_hash(file) + + preprocess_kwargs = dict() + if preprocess: + if preprocess == "auto": + preprocess_kwargs.update( + preprocess=partial(preprocessing_corrections, project=project) + ) + elif isinstance(preprocess, Callable): + preprocess_kwargs.update(preprocess=preprocess) + + if len(files) == 1: + ds = xr.open_dataset(files[0], **xr_kwargs) + for process in preprocess_kwargs.values(): + ds = process(ds) + else: + ds = xr.open_mfdataset(files, **xr_kwargs, **preprocess_kwargs) + if version_hashes: + ds.attrs.update(dict(original_files=str(version_hashes))) + + ds = dataset_corrections(ds, project) + + if domain: + ds = subset_domain(ds, domain) + + if isinstance(mask, (str, pathlib.Path)): + mask = xr.open_dataset(mask) + if isinstance(mask, (xr.Dataset, xr.DataArray)): + if regrid: + mask = conservative_regrid(ds, mask) + ds = threshold_mask(ds, mask=mask, mask_cutoff=mask_cutoff) + + return ds diff --git a/src/miranda/eccc/eccc_homogenized_cf_attrs.json b/src/miranda/convert/data/eccc-ahccd_cf_attrs.json similarity index 80% rename from src/miranda/eccc/eccc_homogenized_cf_attrs.json rename to src/miranda/convert/data/eccc-ahccd_cf_attrs.json index 92c3b0f1..594de4e2 100644 --- a/src/miranda/eccc/eccc_homogenized_cf_attrs.json +++ b/src/miranda/convert/data/eccc-ahccd_cf_attrs.json @@ -1,29 +1,56 @@ { "Header": { - "Conventions": "CF-1.8", + "Conventions": "CF-1.9", + "_citation": { + "gen2": "Mekis, É and L.A. Vincent, 2011: An overview of the second generation adjusted daily precipitation dataset for trend analysis in Canada. Atmosphere-Ocean 49(2), 163-177 doi:10.1080/07055900.2011.583910", + "gen3": "Vincent, L.A., M.M. Hartwell and X.L. Wang, 2020: A Third Generation of Homogenized Temperature for Trend Analysis and Monitoring Changes in Canada’s Climate. Atmosphere-Ocean. https://doi.org/10.1080/07055900.2020.1765728" + }, + "_frequency": true, + "_generation": true, + "_miranda_version": true, + "_missing_values": [ + "-999", + "1e20" + ], "_product": { "gen2": "ECCC Adjusted and Homogenized Canadian Climate Data (AHCCD) version 2", "gen3": "ECCC Adjusted and Homogenized Canadian Climate Data (AHCCD) version 3" }, - "citation": { - "gen2": "Mekis, É and L.A. Vincent, 2011: An overview of the second generation adjusted daily precipitation dataset for trend analysis in Canada. Atmosphere-Ocean 49(2), 163-177 doi:10.1080/07055900.2011.583910", - "gen3": "Vincent, L.A., M.M. Hartwell and X.L. Wang, 2020: A Third Generation of Homogenized Temperature for Trend Analysis and Monitoring Changes in Canada’s Climate. Atmosphere-Ocean. https://doi.org/10.1080/07055900.2020.1765728" - }, "contact": "info.cccs-ccsc@canada.ca", "documentation": "https://www.canada.ca/en/environment-climate-change/services/climate-change/canadian-centre-climate-services/display-download/technical-documentation-adjusted-climate-data.html", - "float_missing_value": "1e20", - "frequency": "day", "institution": "GovCan", - "int_missing_value": "-999", "license": "https://climate.weather.gc.ca/prods_servs/attachment1_e.html", "license_type": "permissive", "organization": "ECCC", "realm": "atmos", + "source": "AHCCD", "table_date": "2023-03-23", "table_id": "ECCC" }, - "variable_entry": { + "dimensions:": { + "lat": { + "axis": "Y", + "long_name": "Latitude", + "standard_name": "latitude", + "units": "degrees_north" + }, + "long": { + "_cf_dimension_name": "lon", + "axis": "X", + "long_name": "Longitude", + "standard_name": "longitude", + "units": "degrees_east" + }, + "time": { + "axis": "T", + "calendar": "gregorian", + "long_name": "Time", + "standard_name": "time" + } + }, + "variables": { "dm": { + "_cf_variable_name": "tas", "add_offset": 273.15, "cell_methods": "time: mean", "comments": "Station data converted from Mean Temp (°C)", @@ -31,13 +58,12 @@ "grid_mapping": "regular_lon_lat", "long_name": "Near-Surface Air Temperature", "original_field": "Mean Temp (°C)", - "out_name": "tas", - "scale_factor": 1, "standard_name": "air_temperature", "type": "real", "units": "K" }, "dn": { + "_cf_variable_name": "tasmin", "add_offset": 273.15, "cell_methods": "time: minimum", "comments": "Station data converted from Min Temp (°C)", @@ -45,55 +71,51 @@ "grid_mapping": "regular_lon_lat", "long_name": "Daily Minimum Near-Surface Air Temperature", "original_field": "Min Temp (°C)", - "out_name": "tasmin", - "scale_factor": 1, "standard_name": "air_temperature", "type": "real", "units": "K" }, "dr": { - "add_offset": 0, + "_cf_variable_name": "prlp", "cell_methods": "time: mean", "comments": "Station data converted from Total Rain (mm) using a density of 1000 kg/m³", "frequency": "day", "grid_mapping": "regular_lon_lat", "long_name": "Liquid Precipitation", "original_field": "Total Rain (mm)", - "out_name": "prlp", "scale_factor": 1.1574074074074073e-05, "standard_name": "rainfall_flux", "type": "real", "units": "kg m-2 s-1" }, "ds": { - "add_offset": 0, + "_cf_variable_name": "prsn", "cell_methods": "time: mean", "comments": "station data converted from Total Snow (cm) using a density of 100 kg/m³", "frequency": "day", "grid_mapping": "regular_lon_lat", "long_name": "Snowfall Flux", "original_field": "Total Snow (cm)", - "out_name": "prsn", "scale_factor": 1.1574074074074073e-05, "standard_name": "snowfall_flux", "type": "real", "units": "kg m-2 s-1" }, "dt": { - "add_offset": 0, + "_cf_variable_name": "pr", "cell_methods": "time: mean", "comments": "Station data converted from Total Precip (mm) using a density of 1000 kg/m³", "frequency": "day", "grid_mapping": "regular_lon_lat", "long_name": "Precipitation", "original_field": "Total Precip (mm)", - "out_name": "pr", "scale_factor": 1.1574074074074073e-05, "standard_name": "precipitation_flux", "type": "real", "units": "kg m-2 s-1" }, "dx": { + "_cf_variable_name": "tasmax", "add_offset": 273.15, "cell_methods": "time: maximum", "comments": "station data converted from Max Temp (°C)", @@ -101,8 +123,6 @@ "grid_mapping": "regular_lon_lat", "long_name": "Daily Maximum Near-Surface Air Temperature", "original_field": "Max Temp (°C)", - "out_name": "tasmax", - "scale_factor": 1, "standard_name": "air_temperature", "type": "real", "units": "K" diff --git a/src/miranda/eccc/eccc_obs_cf_attrs.json b/src/miranda/convert/data/eccc-obs_cf_attrs.json similarity index 54% rename from src/miranda/eccc/eccc_obs_cf_attrs.json rename to src/miranda/convert/data/eccc-obs_cf_attrs.json index 7c882e31..c504965e 100644 --- a/src/miranda/eccc/eccc_obs_cf_attrs.json +++ b/src/miranda/convert/data/eccc-obs_cf_attrs.json @@ -1,996 +1,1128 @@ { "Header": { - "Conventions": "CF-1.8", + "Conventions": "CF-1.9", + "_frequency": true, + "_miranda_version": true, + "_missing_flags": "M", + "_missing_values": [ + "-999", + "1e20", + "-9999", + "#####" + ], "contact": "climatcentre-climatecentral@ec.gc.ca", "institution": "GovCan", - "int_missing_value": "-999", "license": "https://climate.weather.gc.ca/prods_servs/attachment1_e.html", "license_preamble": "The data is owned by the Government of Canada (Environment and Climate Change Canada), and fall under the licence agreement for use of Environment and Climate Change Canada data.", "license_type": "permissive", - "missing_value": "1e20", "organization": "ECCC", "processing_level": "raw", - "realm": "atmos", - "source": "msc", + "source": "ECCC-OBS", "table_date": "2023-03-23", "type": "station-obs" }, - "variable_entry": { + "dimensions": { + "latitude": { + "_cf_dimension_name": "lat", + "_precision": 4, + "axis": "Y", + "standard_name": "latitude" + }, + "longitude": { + "_cf_dimension_name": "lon", + "_precision": 4, + "axis": "X", + "standard_name": "longitude" + }, + "time": { + "_ensure_correct_time": { + "obs-daily": "1D", + "obs-hourly": "1H" + }, + "_strict_time": false, + "axis": "T", + "long_name": "time", + "standard_name": "time" + } + }, + "variables": { "001": { + "_cf_variable_name": "tasmax", + "_corrected_units": "degC", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04", "DLY44" ], - "add_offset": 0, - "nc_name": "tasmax", + "_transformation": "op / 10 degC", "original_units": "0.1 °C", "original_variable": "Daily Maximum Temperature", - "raw_units": "degC", - "scale_factor": 0.1, "standard_name": "air_temperature_maximum", "units": "K" }, "002": { + "_cf_variable_name": "tasmin", + "_corrected_units": "degC", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04", "DLY44" ], - "add_offset": 0, - "nc_name": "tasmin", + "_transformation": "op / 10 degC", "original_units": "0.1 °C", "original_variable": "Daily Minimum Temperature", - "raw_units": "degC", - "scale_factor": 0.1, "standard_name": "air_temperature_minimum", "units": "K" }, "003": { + "_cf_variable_name": "tas", + "_corrected_units": "degC", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04", "DLY44" ], - "add_offset": 0, - "nc_name": "tas", + "_transformation": "op / 10 degC", "original_units": "0.1 °C", "original_variable": "Daily Mean Temperature", - "raw_units": "degC", - "scale_factor": 0.1, "standard_name": "air_temperature", "units": "K" }, "010": { + "_cf_variable_name": "prlptot", + "_corrected_units": "mm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04", "DLY44" ], - "add_offset": 0, - "nc_name": "prlptot", + "_transformation": "op / 10 mm day-1", "original_units": "0.1 mm day-1", "original_variable": "Daily Total Rainfall", - "raw_units": "mm", - "scale_factor": 0.1, "standard_name": "liquid_precipitation_amount", "units": "m" }, "011": { + "_cf_variable_name": "prsntot", + "_corrected_units": "cm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04", "DLY44" ], - "add_offset": 0, - "nc_name": "prsntot", + "_transformation": "op / 10 cm day-1", "original_units": "0.1 cm day-1", "original_variable": "Daily Total Snowfall", - "raw_units": "cm", - "scale_factor": 0.1, "standard_name": "solid_precipitation_amount", "units": "m" }, "012": { + "_cf_variable_name": "prcptot", + "_corrected_units": "mm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04", "DLY44" ], - "add_offset": 0, - "nc_name": "prcptot", + "_transformation": "op / 10 mm day-1", "original_units": "0.1 mm day-1", "original_variable": "Daily Total Precipitation", - "raw_units": "mm", - "scale_factor": 0.1, "standard_name": "precipitation_amount", "units": "m" }, "013": { + "_cf_variable_name": "sndtot", + "_corrected_units": "cm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04", "DLY44" ], - "add_offset": 0, - "nc_name": "sndtot", + "_transformation": false, "original_units": "cm", "original_variable": "Snow on the Ground", - "raw_units": "cm", - "scale_factor": 1, "standard_name": "surface_snow_thickness", "units": "m" }, "014": { + "_cf_variable_name": "thunder", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04", "DLY44" ], - "add_offset": 0, - "nc_name": "thunder", + "_transformation": false, "original_variable": "Thunderstorms", - "raw_units": "1", - "scale_factor": 1, "standard_name": "thunderstorm_presence", "units": "1" }, "015": { + "_cf_variable_name": "freezing_rain_drizzle", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04", "DLY44" ], - "add_offset": 0, - "nc_name": "freezing_rain_drizzle", + "_transformation": false, "original_variable": "Freezing rain or drizzle", - "raw_units": "1", - "scale_factor": 1, "standard_name": "freeze_rain_drizzle_presence", "units": "1" }, "016": { + "_cf_variable_name": "hail", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04", "DLY44" ], - "add_offset": 0, - "nc_name": "hail", + "_transformation": false, "original_variable": "Hail", - "raw_units": "1", - "scale_factor": 1, "standard_name": "hail_presence", "units": "1" }, "017": { + "_cf_variable_name": "fog_ice_fog", + "_corrected_units": "1", "_table_name": [ "DLY02", "DLY04", "DLY44" ], - "add_offset": 0, - "nc_name": "fog_ice_fog", "original_variable": "Fog or Ice Fog", - "raw_units": "1", - "scale_factor": 1, "standard_name": "fog_ice_fog_presence", "units": "1" }, "018": { + "_cf_variable_name": "smoke_haze", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04" ], - "add_offset": 0, - "nc_name": "smoke_haze", + "_transformation": false, "original_variable": "Smoke or Haze", - "raw_units": "1", - "scale_factor": 1, "standard_name": "smoke_haze_presence", "units": "1" }, "019": { + "_cf_variable_name": "blowing_dust_sand", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04" ], - "add_offset": 0, - "nc_name": "blowing_dust_sand", + "_transformation": false, "original_variable": "Blowing Dust or Sand", - "raw_units": "1", - "scale_factor": 1, "standard_name": "blowing_dust_sand_presence", "units": "1" }, "020": { + "_cf_variable_name": "blow_snow", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04" ], - "add_offset": 0, - "nc_name": "blow_snow", + "_transformation": false, "original_variable": "Blowing snow", - "raw_units": "1", - "scale_factor": 1, "standard_name": "blowing_snow_presence", "units": "1" }, "021": { + "_cf_variable_name": "wind_gt_28kt", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04" ], - "add_offset": 0, - "nc_name": "wind_gt_28kt", + "_transformation": false, "original_variable": "Wind speed >= 28 Knots", - "raw_units": "1", - "scale_factor": 1, "standard_name": "wind_exceeding_28_knots", "units": "1" }, "022": { + "_cf_variable_name": "wind_gt_34kt", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04" ], - "add_offset": 0, - "nc_name": "wind_gt_34kt", + "_transformation": false, "original_variable": "Wind speed >= 34 Knots", - "raw_units": "1", - "scale_factor": 1, "standard_name": "wind_exceeding_34_knots", "units": "1" }, "023": { + "_cf_variable_name": "gust_dir_16pts", + "_corrected_units": "deg", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04" ], - "add_offset": 0, - "nc_name": "gust_dir_16pts", + "_transformation": "op * 10 deg", "original_units": "10's of degrees", "original_variable": "Direction of extreme gust (16 pts) to December 1976", - "raw_units": "deg", - "scale_factor": 10, "standard_name": "gust_to_direction", "units": "deg" }, "024": { + "_cf_variable_name": "gust_speed", + "_corrected_units": "km h-1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04" ], - "add_offset": 0, - "nc_name": "gust_speed", + "_transformation": false, "original_units": "km/h", "original_variable": "Speed of extreme gust", - "raw_units": "km h-1", - "scale_factor": 1, "standard_name": "wind_speed_of_gust", "units": "m s-1" }, "025": { + "_cf_variable_name": "gust_hour", + "_corrected_units": "h", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "DLY02", "DLY04" ], - "add_offset": 0, - "nc_name": "gust_hour", + "_transformation": false, "original_variable": "UTC hour of extreme gust", - "raw_units": "h", - "scale_factor": 1, "standard_name": "hour_of_extreme_gust", "units": "h" }, "061": { + "_cf_variable_name": "rf1_radiation", + "_corrected_units": "MJ m-2", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY11" ], - "add_offset": 0, - "nc_name": "rf1_radiation", + "_transformation": "op / 1000 MJ m-2", "original_units": "0.001 MJ/m", "original_variable": "RF1 global solar radiation", - "raw_units": "W m-2 h-1", - "scale_factor": 277.77777777777777, "standard_name": "solar_radiation_flux", - "units": "W m-2 h-1" + "units": "W h m-2" }, "062": { + "_cf_variable_name": "rf2_radiation", + "_corrected_units": "MJ m-2", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY11" ], - "add_offset": 0, - "nc_name": "rf2_radiation", + "_transformation": "op / 1000 MJ m-2", "original_units": "0.001 MJ/m", "original_variable": "RF2 sky (diffuse) radiation", - "raw_units": "W m-2 h-1", - "scale_factor": 277.77777777777777, "standard_name": "solar_radiation_flux", - "units": "W m-2 h-1" + "units": "W h m-2" }, "063": { + "_cf_variable_name": "rf3_radiation", + "_corrected_units": "MJ m-2", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY11" ], - "add_offset": 0, - "nc_name": "rf3_radiation", + "_transformation": "op / 1000 MJ m-2", "original_units": "0.001 MJ/m", "original_variable": "RF3 reflected solar radiation", - "raw_units": "W m-2 h-1", - "scale_factor": 277.77777777777777, "standard_name": "solar_radiation_flux", - "units": "W m-2 h-1" + "units": "W h m-2" }, "064": { + "_cf_variable_name": "rf4_radiation", + "_corrected_units": "MJ m-2", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY11" ], - "add_offset": 0, - "nc_name": "rf4_radiation", + "_transformation": "op / 1000 MJ m-2", "original_units": "0.001 MJ/m", "original_variable": "RF4 net all wave radiation", - "raw_units": "W m-2 h-1", - "scale_factor": 277.77777777777777, "standard_name": "solar_radiation_flux", - "units": "W m-2 h-1" + "units": "W h m-2" }, "067": { + "_cf_variable_name": "rf7_radiation", + "_corrected_units": "klx h", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY11" ], - "add_offset": 0, - "nc_name": "rf7_radiation", + "_transformation": "op / 100 klx h", "original_units": "0.01 Kilolux_hrs", "original_variable": "RF7 daylight illumination", - "raw_units": "lux h", - "scale_factor": 10, "standard_name": "solar_radiation_flux", - "units": "lux h" + "units": "klx h" }, "068": { + "_cf_variable_name": "rf8_radiation", + "_corrected_units": "MJ m-2", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY11" ], - "add_offset": 0, - "nc_name": "rf8_radiation", + "_transformation": "op / 1000 MJ m-2", "original_units": "0.001 MJ/m", "original_variable": "RF8 direct solar radiation", - "raw_units": "W m-2 h-1", - "scale_factor": 277.77777777777777, "standard_name": "solar_radiation_flux", - "units": "W m-2 h-1" + "units": "W h m-2" }, "069": { + "_cf_variable_name": "wind_dir_45B", + "_corrected_units": "deg", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY15" ], - "add_offset": 0, - "nc_name": "wind_dir_45B", + "_transformation": "op * 10 deg", "original_units": "10's of degrees", "original_variable": "Direction - 45B anemometer (8 pts)", - "raw_units": "deg", - "scale_factor": 1, "standard_name": "wind_to_direction", "units": "deg" }, "071": { + "_cf_variable_name": "ceiling_hgt", + "_corrected_units": "m", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "ceiling_hgt", + "_transformation": "op * 30 m", "original_units": "30's of meters", "original_variable": "Ceiling height of lowest layer of clouds", - "raw_units": "m", - "scale_factor": 30, "standard_name": "ceiling_cloud_height", "units": "m" }, "072": { + "_cf_variable_name": "visibility", + "_corrected_units": "km", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "visibility", + "_transformation": "op / 10 km", "original_units": "0.1 km", "original_variable": "Visibility", - "raw_units": "km", - "scale_factor": 0.1, "standard_name": "visibility_in_air", "units": "m" }, "073": { + "_cf_variable_name": "psl", + "_corrected_units": "Pa", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "psl", + "_transformation": "op / 100 kPa", "original_units": "0.01 kPa", "original_variable": "Sea Level Pressure", - "raw_units": "Pa", - "scale_factor": 10, "standard_name": "air_pressure_at_mean_sea_level", "units": "Pa" }, "074": { + "_cf_variable_name": "tds", + "_corrected_units": "degC", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "tds", + "_transformation": "op / 10 degC", "original_units": "0.1 °C", "original_variable": "Dew Point Temperature", - "raw_units": "degC", - "scale_factor": 0.1, "standard_name": "dew_point_temperature", "units": "K" }, "075": { + "_cf_variable_name": "wind_dir_u2a_16", + "_corrected_units": "deg", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "wind_dir_u2a_16", + "_transformation": "op * 10 deg", "original_units": "10's of degrees", "original_variable": "Wind Direction at 2 m (U2A Anemometer) (16 pts)", - "raw_units": "deg", - "scale_factor": 10, "standard_name": "wind_direction_u2a", "units": "deg" }, "076": { + "_cf_variable_name": "wind_speed_u2a", + "_corrected_units": "km h-1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "wind_speed_u2a", + "_transformation": false, "original_units": "km/h", "original_variable": "Wind Speed - U2A (16 pts) to December 1970", - "raw_units": "km h-1", - "scale_factor": 1, "standard_name": "wind_speed_u2a", "units": "m s-1" }, "077": { + "_cf_variable_name": "pressure", + "_corrected_units": "Pa", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "pressure", + "_transformation": "op / 100 kPa", "original_units": "0.01 kPa", "original_variable": "Station Pressure", - "raw_units": "Pa", - "scale_factor": 10, "standard_name": "atmospheric_pressure", "units": "Pa" }, "078": { + "_cf_variable_name": "tas_dry", + "_corrected_units": "degC", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "tas_dry", + "_transformation": "op / 10 degC", "original_units": "0.1 °C", "original_variable": "Dry Bulb Temperature", - "raw_units": "degC", - "scale_factor": 0.1, "standard_name": "dry_bulb_temperature", "units": "K" }, "079": { + "_cf_variable_name": "tas_wet", + "_corrected_units": "degC", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "tas_wet", + "_transformation": "op / 10 degC", "original_units": "0.1 °C", "original_variable": "Wet Bulb temperature", - "raw_units": "degC", - "scale_factor": 0.1, "standard_name": "wet_bulb_temperature", "units": "K" }, "080": { + "_cf_variable_name": "hur", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "hur", + "_transformation": false, "original_units": "%", "original_variable": "Relative Humidity", - "raw_units": "1", - "scale_factor": 1, "standard_name": "relative_humidity", "units": "1" }, "081": { + "_cf_variable_name": "clo", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "clo", - "original_units": "%", + "_transformation": "op * 10", + "original_units": "Tenths", "original_variable": "Total Cloud Opacity", - "raw_units": "1", "scale_factor": 10, "standard_name": "cloud_albedo", "units": "1" }, "082": { + "_cf_variable_name": "clt", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "clt", - "original_units": "%", + "_transformation": "op * 10", + "original_units": "Tenths", "original_variable": "Total Cloud Amount", - "raw_units": "1", "scale_factor": 10, "standard_name": "cloud_area_fraction", "units": "1" }, "089": { + "_cf_variable_name": "freeze_rain", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "freeze_rain", + "_transformation": false, "original_variable": "Freezing Rain", - "raw_units": "1", - "scale_factor": 1, "standard_name": "freezing_rain", "units": "1" }, "094": { + "_cf_variable_name": "ice_pellets", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "ice_pellets", + "_transformation": false, "original_variable": "Ice Pellets", - "raw_units": "1", - "scale_factor": 1, "standard_name": "ice_pellet_presence", "units": "1" }, "107": { + "_cf_variable_name": "1low_cloud_opac", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "1low_cloud_opac", + "_transformation": "op * 10", "original_units": "Tenths", "original_variable": "Lowest cloud layer opacity", - "raw_units": "1", - "scale_factor": 10, "standard_name": "low_type_cloud_opacity_fraction", "units": "1" }, "108": { + "_cf_variable_name": "1low_cloud_frac", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "1low_cloud_frac", + "_transformation": "op * 10", "original_units": "Tenths", "original_variable": "Lowest cloud layer amount or condition", - "raw_units": "1", - "scale_factor": 10, "standard_name": "low_type_cloud_area_fraction", "units": "1" }, "109": { + "_cf_variable_name": "1low_cloud_type", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "1low_cloud_type", + "_transformation": false, "original_variable": "Lowest cloud layer type", - "raw_units": "1", - "scale_factor": 1, "standard_name": "low_type_cloud_type", "units": "1" }, "110": { + "_cf_variable_name": "1low_cloud_hgt", + "_corrected_units": "m", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "1low_cloud_hgt", + "_transformation": "op * 30 m", "original_units": "30's of meters", "original_variable": "Lowest cloud layer height", - "raw_units": "m", - "scale_factor": 30, "standard_name": "low_type_cloud_height", "units": "m" }, "111": { + "_cf_variable_name": "2low_cloud_opac", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "2low_cloud_opac", + "_transformation": "op * 30 m", "original_units": "Tenths", "original_variable": "Second lowest cloud layer opacity", - "raw_units": "1", - "scale_factor": 10, "standard_name": "low_type_cloud_opacity_fraction", "units": "1" }, "112": { + "_cf_variable_name": "2low_cloud_frac", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "2low_cloud_frac", + "_transformation": "op * 10", "original_units": "Tenths", "original_variable": "Second lowest cloud layer amount or condition", - "raw_units": "1", - "scale_factor": 10, "standard_name": "low_type_cloud_area_fraction", "units": "1" }, "113": { + "_cf_variable_name": "2low_cloud_type", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "2low_cloud_type", + "_transformation": false, "original_units": "", "original_variable": "Second lowest cloud layer type", - "raw_units": "1", - "scale_factor": 1, "standard_name": "low_type_cloud_type", "units": "1" }, "114": { + "_cf_variable_name": "2low_cloud_hgt", + "_corrected_units": "m", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "2low_cloud_hgt", + "_transformation": "op * 30 m", "original_units": "30's of meters", "original_variable": "Second lowest cloud layer height", - "raw_units": "m", - "scale_factor": 30, "standard_name": "low_type_cloud_height", "units": "m" }, "115": { + "_cf_variable_name": "3low_cloud_opac", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "3low_cloud_opac", + "_transformation": "op * 10", "original_units": "Tenths", "original_variable": "Thirsd lowest cloud layer opacity", - "raw_units": "1", - "scale_factor": 10, "standard_name": "low_type_cloud_opacity_fraction", "units": "1" }, "116": { + "_cf_variable_name": "3low_cloud_frac", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "3low_cloud_frac", + "_transformation": "op * 10", "original_units": "Tenths", "original_variable": "Third lowest cloud layer amount or condition", - "raw_units": "1", - "scale_factor": 10, "standard_name": "low_type_cloud_area_fraction", "units": "1" }, "117": { + "_cf_variable_name": "3low_cloud_type", + "_corrected_units": "1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "3low_cloud_type", + "_transformation": false, "original_units": "", "original_variable": "Third lowest cloud layer type", - "raw_units": "1", - "scale_factor": 1, "standard_name": "low_type_cloud_type", "units": "1" }, "118": { + "_cf_variable_name": "3low_cloud_hgt", + "_corrected_units": "m", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "3low_cloud_hgt", + "_transformation": "op * 30 m", "original_units": "30's of meters", "original_variable": "Third lowest cloud layer height", - "raw_units": "m", - "scale_factor": 30, "standard_name": "low_type_cloud_height", "units": "m" }, "123": { + "_cf_variable_name": "rainfall", + "_corrected_units": "mm h-1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "add_offset": 0, - "nc_name": "rainfall", + "_transformation": "op / 10 mm h-1", "original_units": "0.1 mm", "original_variable": "Total Rainfall", - "raw_units": "mm h-1", - "scale_factor": 0.1, "standard_name": "rainfall_flux", "units": "kg m2 s-1" }, "133": { + "_cf_variable_name": "sun", + "_corrected_units": "h", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY10" ], - "add_offset": 0, - "nc_name": "sun", + "_transformation": "op / 10 h", "original_units": "0.1 hrs", "original_variable": "Sunshine", - "raw_units": "h", - "scale_factor": 0.1, "standard_name": "duration_of_sunshine", "units": "s" }, "156": { + "_cf_variable_name": "wind_dir_u2a_36", + "_corrected_units": "deg", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01" ], - "nc_name": "wind_dir_u2a_36", - "original_units": "10's of degrees", + "_transformation": "op * 10 deg", "original_variable": "Wind Direction - U2A (36 pts) from January 1971", - "raw_units": "deg", - "scale_factor": 10, "standard_name": "wind_direction_u2a", "units": "deg" }, + "209": { + "_cf_variable_name": "wind_character", + "_corrected_units": "", + "_invert_sign": false, + "_offset_time": false, + "_table_name": [ + "HLY01" + ], + "_transformation": false, + "description": "Gust (G)=1, Squall (Q)=2", + "long_name": "wind_direction_u2a", + "original_units": "1, 2", + "original_variable": "Wind character at 10 m", + "units": "" + }, + "210": { + "_cf_variable_name": "", + "_corrected_units": "km h-1", + "_invert_sign": false, + "_offset_time": false, + "_table_name": [ + "HLY01" + ], + "_transformation": false, + "original_units": "km/h", + "original_variable": "Wind gust speed at 10 m", + "standard_name": "wind_speed_of_gust", + "units": "m s-1" + }, "262": { + "_cf_variable_name": "prtot", + "_corrected_units": "mm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "prtot", + "_transformation": "op / 10 mm", "original_units": "0.1 mm", "original_variable": "Total Precipitation (minutes 00-60)", - "raw_units": "mm", - "scale_factor": 0.1, "standard_name": "precipitation_amount", "units": "kg m-2" }, "263": { + "_cf_variable_name": "prtot_q1", + "_corrected_units": "mm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "prtot_q1", + "_transformation": "op / 10 mm", "original_units": "0.1 mm", "original_variable": "Total Precipitation (minutes 00-15)", - "raw_units": "mm", - "scale_factor": 0.1, "standard_name": "precipitation_amount", "units": "kg m-2" }, "264": { + "_cf_variable_name": "prtot_q2", + "_corrected_units": "mm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "prtot_q2", + "_transformation": "op / 10 mm", "original_units": "0.1 mm", "original_variable": "Total Precipitation (minutes 15-30)", - "raw_units": "mm", - "scale_factor": 0.1, "standard_name": "precipitation_amount", "units": "kg m-2" }, "265": { + "_cf_variable_name": "prtot_q3", + "_corrected_units": "mm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "prtot_q3", + "_transformation": "op / 10 mm", "original_units": "0.1 mm", "original_variable": "Total Precipitation (minutes 30-45)", - "raw_units": "mm", - "scale_factor": 0.1, "standard_name": "precipitation_amount", "units": "kg m-2" }, "266": { + "_cf_variable_name": "prtot_q4", + "_corrected_units": "mm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "prtot_q4", + "_transformation": "op / 10 mm", "original_units": "0.1 mm", "original_variable": "Total Precipitation (minutes 45-60)", - "raw_units": "mm", - "scale_factor": 0.1, "standard_name": "precipitation_amount", "units": "kg m-2" }, "267": { + "_cf_variable_name": "precipitation_weight_q1", + "_corrected_units": "kg m-2", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "precipitation_weight_q1", + "_transformation": "op / 10 kg m-2", "original_units": "0.1 kg/m²", "original_variable": "Precipitation Gauge Weight per Unit Area (at minute 15)", - "raw_units": "kg m-2", - "scale_factor": 0.1, "standard_name": "precipitation_amount", "units": "kg m-2" }, "268": { + "_cf_variable_name": "precipitation_weight_q2", + "_corrected_units": "kg m-2", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "precipitation_weight_q2", + "_transformation": "op / 10 kg m-2", "original_units": "0.1 kg/m²", "original_variable": "Precipitation Gauge Weight per Unit Area (at minute 30)", - "raw_units": "kg m-2", - "scale_factor": 0.1, "standard_name": "precipitation_amount", "units": "kg m-2" }, "269": { + "_cf_variable_name": "precipitation_weight_q3", + "_corrected_units": "kg m-2", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "precipitation_weight_q3", + "_transformation": "op / 10 kg m-2", "original_units": "0.1 kg/m²", "original_variable": "Precipitation Gauge Weight per Unit Area (at minute 45)", - "raw_units": "kg m-2", - "scale_factor": 0.1, "standard_name": "precipitation_amount", "units": "kg m-2" }, "270": { + "_cf_variable_name": "precipitation_weight_q4", + "_corrected_units": "kg m-2", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "precipitation_weight_q4", + "_transformation": "op / 10 kg m-2", "original_units": "0.1 kg/m²", "original_variable": "Precipitation Gauge Weight per Unit Area (at minute 60)", - "raw_units": "kg m-2", - "scale_factor": 0.1, "standard_name": "precipitation_amount", "units": "kg m-2" }, "271": { + "_cf_variable_name": "wind_speed_q1", + "_corrected_units": "km h-1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "wind_speed_q1", - "nc_units": "m s-1", + "_transformation": "op / 10 km h-1", "original_units": "0.1 km/h", "original_variable": "Wind Speed at 2 m (minutes 00-15)", - "raw_units": "km h-1", - "scale_factor": 0.1, - "standard_name": "wind_speed" + "standard_name": "wind_speed", + "units": "m s-1" }, "272": { + "_cf_variable_name": "wind_speed_q2", + "_corrected_units": "km h-1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "wind_speed_q2", - "nc_units": "m s-1", + "_transformation": "op / 10 km h-1", "original_units": "0.1 km/h", "original_variable": "Wind Speed at 2 m (minutes 15-30)", - "raw_units": "km h-1", - "scale_factor": 0.1, - "standard_name": "wind_speed" + "standard_name": "wind_speed", + "units": "m s-1" }, "273": { + "_cf_variable_name": "wind_speed_q3", + "_corrected_units": "km h-1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "wind_speed_q3", - "nc_units": "m s-1", + "_transformation": "op / 10 km h-1", "original_units": "0.1 km/h", "original_variable": "Wind Speed at 2 m (minutes 30-45)", - "raw_units": "km h-1", - "scale_factor": 0.1, - "standard_name": "wind_speed" + "standard_name": "wind_speed", + "units": "m s-1" }, "274": { + "_cf_variable_name": "wind_speed_q4", + "_corrected_units": "km h-1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "wind_speed_q4", - "nc_units": "m s-1", + "_transformation": "op / 10 km h-1", "original_units": "0.1 km/h", "original_variable": "Wind Speed at 2 m (minutes 45-60)", - "raw_units": "km h-1", - "scale_factor": 0.1, - "standard_name": "wind_speed" + "standard_name": "wind_speed", + "units": "m s-1" }, "275": { + "_cf_variable_name": "snd", + "_corrected_units": "cm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "snd_q4", + "_transformation": false, "original_units": "cm", "original_variable": "Snow Depth (at minute 60)", - "raw_units": "cm", - "scale_factor": 1, "standard_name": "surface_snow_thickness", "units": "m" }, "276": { + "_cf_variable_name": "snd_q1", + "_corrected_units": "cm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "snd_q1", + "_transformation": false, "original_units": "cm", "original_variable": "Snow Depth (at minute 15)", - "raw_units": "cm", - "scale_factor": 1, "standard_name": "surface_snow_thickness", "units": "m" }, "277": { + "_cf_variable_name": "snd_q2", + "_corrected_units": "cm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "snd_q2", + "_transformation": false, "original_units": "cm", "original_variable": "Snow Depth (at minute 30)", - "raw_units": "cm", - "scale_factor": 1, "standard_name": "surface_snow_thickness", "units": "m" }, "278": { + "_cf_variable_name": "snd_q3", + "_corrected_units": "cm", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "snd_q3", + "_transformation": false, "original_units": "cm", "original_variable": "Snow Depth (at minute 45)", - "raw_units": "cm", - "scale_factor": 1, "standard_name": "surface_snow_thickness", "units": "m" }, "279": { + "_cf_variable_name": "wind_dir", + "_corrected_units": "deg", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "wind_dir", - "nc_units": "deg", + "_transformation": false, "original_units": "Degrees", "original_variable": "Wind Direction at 2 m (minutes 50-60)", - "raw_units": "deg", - "scale_factor": 1, - "standard_name": "wind_direction" + "standard_name": "wind_direction", + "units": "deg" }, "280": { + "_cf_variable_name": "wind_speed", + "_corrected_units": "km h-1", + "_invert_sign": false, + "_offset_time": false, "_table_name": [ "HLY01_RCS" ], - "add_offset": 0, - "nc_name": "wind_speed", + "_transformation": "op / 10 km h-1", "original_units": "0.1 km/h", "original_variable": "Wind Speed at 2 m (minutes 50-60)", - "raw_units": "km h-1", - "scale_factor": 0.1, "standard_name": "wind_speed", "units": "m s-1" } diff --git a/src/miranda/convert/data/eccc_cf_attrs.json b/src/miranda/convert/data/eccc_canswe_cf_attrs.json similarity index 60% rename from src/miranda/convert/data/eccc_cf_attrs.json rename to src/miranda/convert/data/eccc_canswe_cf_attrs.json index 4424ae76..4b48eb98 100644 --- a/src/miranda/convert/data/eccc_cf_attrs.json +++ b/src/miranda/convert/data/eccc_canswe_cf_attrs.json @@ -2,31 +2,32 @@ "Header": { "Conventions": "CF-1.9", "_contact": { - "ec-canswe": "vincent.vionnet@canada.ca" + "eccc-canswe": "vincent.vionnet@canada.ca" }, "_doi": { - "ec-canswe": "10.5281/zenodo.6638382" + "eccc-canswe": "10.5281/zenodo.6638382" }, "_license": { - "ec-canswe": "https://open.canada.ca/en/open-government-licence-canada" + "eccc-canswe": "https://open.canada.ca/en/open-government-licence-canada" }, "_miranda_version": true, "_reference": { - "ec-canswe": "https://zenodo.org/record/6638382" + "eccc-canswe": "https://zenodo.org/record/6638382" }, "_source": { - "ec-canswe": "CanSWE" + "eccc-canswe": "CanSWE" }, "_version": { - "ec-canswe": "v4" + "eccc-canswe": "v4" }, "institution": "GovCan", "license_type": { - "ec-canswe": "permissive" + "eccc-canswe": "permissive" }, "organisation": "ECCC", "processing_level": "raw", "realm": "atmos", + "source": "ECCC-CANSWE", "table_date": "2023-03-23", "table_id": "eccc", "type": "station-obs" diff --git a/src/miranda/convert/data/ecmwf_cf_attrs.json b/src/miranda/convert/data/ecmwf_cf_attrs.json index 1c080ac3..7acb0081 100644 --- a/src/miranda/convert/data/ecmwf_cf_attrs.json +++ b/src/miranda/convert/data/ecmwf_cf_attrs.json @@ -45,6 +45,7 @@ "era5-land-monthly-means": 4 }, "axis": "Y", + "long_name": "Latitude", "standard_name": "latitude" }, "longitude": { @@ -54,6 +55,7 @@ "era5-land-monthly-means": 4 }, "axis": "X", + "long_name": "Longitude", "standard_name": "longitude" }, "time": { @@ -71,7 +73,7 @@ }, "_strict_time": false, "axis": "T", - "long_name": "time", + "long_name": "Time", "standard_name": "time" } }, diff --git a/src/miranda/convert/data/espo-g6-e5l_attrs.json b/src/miranda/convert/data/espo-g6-e5l_attrs.json index e4e76045..71a2c80a 100644 --- a/src/miranda/convert/data/espo-g6-e5l_attrs.json +++ b/src/miranda/convert/data/espo-g6-e5l_attrs.json @@ -14,6 +14,7 @@ "domain": "NAM", "mip_era": "CMIP6", "processing_level": "biasadjusted", + "source": "ESPO-G6-E5L", "table_date": "2023-04-24", "table_id": "ESPO-G6-E5L", "type": "simulation", diff --git a/src/miranda/convert/data/espo-g6-r2_attrs.json b/src/miranda/convert/data/espo-g6-r2_attrs.json index ad57313f..c0e73f03 100644 --- a/src/miranda/convert/data/espo-g6-r2_attrs.json +++ b/src/miranda/convert/data/espo-g6-r2_attrs.json @@ -14,6 +14,7 @@ "domain": "NAM", "mip_era": "CMIP6", "processing_level": "biasadjusted", + "source": "ESPO-G6-R2", "table_date": "2023-04-24", "table_id": "ESPO-G6-R2", "type": "simulation", diff --git a/src/miranda/convert/data/nex-gddp-cmip6_attrs.json b/src/miranda/convert/data/nex-gddp-cmip6_attrs.json index a58f29de..2e962b6e 100644 --- a/src/miranda/convert/data/nex-gddp-cmip6_attrs.json +++ b/src/miranda/convert/data/nex-gddp-cmip6_attrs.json @@ -12,6 +12,7 @@ "domain": "QC", "mip_era": "CMIP6", "processing_level": "biasadjusted", + "source": "NASA-NEX-GDDP", "table_date": "2023-04-11", "table_id": "NEX-GDDP-CMIP6", "type": "simulation" diff --git a/src/miranda/convert/deh.py b/src/miranda/convert/deh.py index 3fa5acd1..72cbbeb7 100644 --- a/src/miranda/convert/deh.py +++ b/src/miranda/convert/deh.py @@ -25,7 +25,8 @@ "variable_entry" ] -# TODO: Some potentially useful attributes were skipped, because they would be complicated to include in a dataset since they vary per station +# TODO: Some potentially useful attributes were skipped +# because they would be complicated to include in a dataset since they vary per station meta_patterns = { "Station: ": "name", "Bassin versant: ": "bv", diff --git a/src/miranda/convert/eccc.py b/src/miranda/convert/eccc_canswe.py similarity index 96% rename from src/miranda/convert/eccc.py rename to src/miranda/convert/eccc_canswe.py index becf0509..e788bc60 100644 --- a/src/miranda/convert/eccc.py +++ b/src/miranda/convert/eccc_canswe.py @@ -8,7 +8,7 @@ import pandas as pd import xarray as xr -from ._data_corrections import dataset_corrections +from .corrections import dataset_corrections __all__ = ["convert_canswe"] @@ -91,7 +91,7 @@ def parse_desc(desc: str) -> dict: ds.snd.attrs["ancillary_variables"] = "data_flag_snd qc_flag_snd" ds.snw.attrs["ancillary_variables"] = "data_flag_snw qc_flag_snw" - ds = dataset_corrections(ds, "ec-canswe") + ds = dataset_corrections(ds, "eccc-canswe") ds.attrs["frequency"] = "day" date = "-".join(ds.indexes["time"][[0, -1]].strftime("%Y%m")) for var in ["snd", "snw"]: diff --git a/src/miranda/convert/eccc_rdrs.py b/src/miranda/convert/eccc_rdrs.py index 6542b228..c1174bc3 100644 --- a/src/miranda/convert/eccc_rdrs.py +++ b/src/miranda/convert/eccc_rdrs.py @@ -4,24 +4,29 @@ import logging.config import os -from pathlib import Path -from typing import Any import xarray as xr -from numpy import unique -from miranda.io import fetch_chunk_config, write_dataset_dict from miranda.scripting import LOGGING_CONFIG -from miranda.units import get_time_frequency -from ._aggregation import aggregate -from ._data_corrections import dataset_conversion, load_json_data_mappings -from ._data_definitions import gather_raw_rdrs_by_years, gather_rdrs +# from pathlib import Path +# from typing import Any + + +# from numpy import unique + + +# from miranda.treatments import load_json_data_mappings +# from miranda.units import get_time_frequency +# +# from ._aggregation import aggregate +# from ._data_definitions import gather_eccc_rdrs, gather_raw_rdrs_by_years +# from .corrections import dataset_conversion logging.config.dictConfig(LOGGING_CONFIG) -__all__ = ["convert_rdrs", "rdrs_to_daily"] +# __all__ = ["convert_rdrs", "rdrs_to_daily"] # FIXME: Can we use `name_output_file` instead? We already have a better version of this function. @@ -47,185 +52,186 @@ def _get_drop_vars(file: str | os.PathLike[str], *, keep_vars: list[str] | set[s return list(set(drop_vars) - set(keep_vars)) -def convert_rdrs( - project: str, - input_folder: str | os.PathLike[str], - output_folder: str | os.PathLike[str], - output_format: str = "zarr", - working_folder: str | os.PathLike[str] | None = None, - overwrite: bool = False, - cfvariable_list: list | None = None, - **dask_kwargs: dict[str, Any], -) -> None: - r""" - Convert RDRS dataset. - - Parameters - ---------- - project : str - The project name. - input_folder : str or os.PathLike - The input folder. - output_folder : str or os.PathLike - The output folder. - output_format : {"netcdf", "zarr"} - The output format. - working_folder : str or os.PathLike, optional - The working folder. - overwrite : bool - Whether to overwrite existing files. Default: False. - cfvariable_list : list, optional - The CF variable list. - \*\*dask_kwargs : dict - Additional keyword arguments passed to the Dask scheduler. - """ - # TODO: This setup configuration is near-universally portable. Should we consider applying it to all conversions? - var_attrs = load_json_data_mappings(project=project)["variables"] - if cfvariable_list: - var_attrs = { - v: var_attrs[v] - for v in var_attrs - if var_attrs[v]["_cf_variable_name"] in cfvariable_list - } - freq_dict = dict(h="hr", d="day") - - if isinstance(input_folder, str): - input_folder = Path(input_folder).expanduser() - if isinstance(output_folder, str): - output_folder = Path(output_folder).expanduser() - if isinstance(working_folder, str): - working_folder = Path(working_folder).expanduser() - - # FIXME: Do we want to collect everything? Maybe return a dictionary with years and associated files? - out_freq = None - gathered = gather_raw_rdrs_by_years(input_folder) - for year, ncfiles in gathered[project].items(): - ds_allvars = None - if len(ncfiles) >= 28: - for nc in ncfiles: - ds1 = xr.open_dataset(nc, chunks="auto") - if ds_allvars is None and out_freq is None: - ds_allvars = ds1 - out_freq, meaning = get_time_frequency(ds1) - out_freq = ( - f"{out_freq[0]}{freq_dict[out_freq[1]]}" - if meaning == "hour" - else freq_dict[out_freq[1]] - ) - ds_allvars.attrs["frequency"] = out_freq - else: - ds_allvars = xr.concat( - [ds_allvars, ds1], data_vars="minimal", dim="time" - ) - ds_allvars = ds_allvars.sel(time=f"{year}") - # This is the heart of the conversion utility; We could apply this to multiple projects. - for month in unique(ds_allvars.time.dt.month): - ds_month = ds_allvars.sel(time=f"{year}-{str(month).zfill(2)}") - for var_attr in var_attrs.keys(): - drop_vars = _get_drop_vars( - ncfiles[0], keep_vars=[var_attr, "rotated_pole"] - ) - ds_out = ds_month.drop_vars(drop_vars) - ds_out = ds_out.assign_coords(rotated_pole=ds_out["rotated_pole"]) - ds_corr = dataset_conversion( - ds_out, - project=project, - add_version_hashes=False, - overwrite=overwrite, - ) - chunks = fetch_chunk_config( - priority="time", freq=out_freq, dims=ds_corr.dims - ) - chunks["time"] = len(ds_corr.time) - write_dataset_dict( - {var_attrs[var_attr]["_cf_variable_name"]: ds_corr}, - output_folder=output_folder.joinpath(out_freq), - temp_folder=working_folder, - output_format=output_format, - overwrite=overwrite, - chunks=chunks, - **dask_kwargs, - ) +# FIXME: This looks like a utility function. Should it be moved to a utils module? +# def convert_rdrs( +# project: str, +# input_folder: str | os.PathLike[str], +# output_folder: str | os.PathLike[str], +# output_format: str = "zarr", +# working_folder: str | os.PathLike[str] | None = None, +# overwrite: bool = False, +# cfvariable_list: list | None = None, +# **dask_kwargs: dict[str, Any], +# ) -> None: +# r""" +# Convert RDRS dataset. +# +# Parameters +# ---------- +# project : str +# The project name. +# input_folder : str or os.PathLike +# The input folder. +# output_folder : str or os.PathLike +# The output folder. +# output_format : {"netcdf", "zarr"} +# The output format. +# working_folder : str or os.PathLike, optional +# The working folder. +# overwrite : bool +# Whether to overwrite existing files. Default: False. +# cfvariable_list : list, optional +# The CF variable list. +# \*\*dask_kwargs : dict +# Additional keyword arguments passed to the Dask scheduler. +# """ +# # TODO: This setup configuration is near-universally portable. Should we consider applying it to all conversions? +# var_attrs = load_json_data_mappings(project=project)["variables"] +# if cfvariable_list: +# var_attrs = { +# v: var_attrs[v] +# for v in var_attrs +# if var_attrs[v]["_cf_variable_name"] in cfvariable_list +# } +# freq_dict = dict(h="hr", d="day") +# +# if isinstance(input_folder, str): +# input_folder = Path(input_folder).expanduser() +# if isinstance(output_folder, str): +# output_folder = Path(output_folder).expanduser() +# if isinstance(working_folder, str): +# working_folder = Path(working_folder).expanduser() +# +# # FIXME: Do we want to collect everything? Maybe return a dictionary with years and associated files? +# out_freq = None +# gathered = gather_raw_rdrs_by_years(input_folder) +# for year, ncfiles in gathered[project].items(): +# ds_allvars = None +# if len(ncfiles) >= 28: +# for nc in ncfiles: +# ds1 = xr.open_dataset(nc, chunks="auto") +# if ds_allvars is None and out_freq is None: +# ds_allvars = ds1 +# out_freq, meaning = get_time_frequency(ds1) +# out_freq = ( +# f"{out_freq[0]}{freq_dict[out_freq[1]]}" +# if meaning == "hour" +# else freq_dict[out_freq[1]] +# ) +# ds_allvars.attrs["frequency"] = out_freq +# else: +# ds_allvars = xr.concat( +# [ds_allvars, ds1], data_vars="minimal", dim="time" +# ) +# ds_allvars = ds_allvars.sel(time=f"{year}") +# # This is the heart of the conversion utility; We could apply this to multiple projects. +# for month in unique(ds_allvars.time.dt.month): +# ds_month = ds_allvars.sel(time=f"{year}-{str(month).zfill(2)}") +# for var_attr in var_attrs.keys(): +# drop_vars = _get_drop_vars( +# ncfiles[0], keep_vars=[var_attr, "rotated_pole"] +# ) +# ds_out = ds_month.drop_vars(drop_vars) +# ds_out = ds_out.assign_coords(rotated_pole=ds_out["rotated_pole"]) +# ds_corr = dataset_conversion( +# ds_out, +# project=project, +# add_version_hashes=False, +# overwrite=overwrite, +# ) +# chunks = fetch_chunk_config( +# priority="time", freq=out_freq, dims=ds_corr.dims +# ) +# chunks["time"] = len(ds_corr.time) +# write_dataset_dict( +# {var_attrs[var_attr]["_cf_variable_name"]: ds_corr}, +# output_folder=output_folder.joinpath(out_freq), +# temp_folder=working_folder, +# output_format=output_format, +# overwrite=overwrite, +# chunks=chunks, +# **dask_kwargs, +# ) # FIXME: This looks mostly like code to stage writing out files. Should it be moved to an IO module? -def rdrs_to_daily( - project: str, - input_folder: str | os.PathLike, - output_folder: str | os.PathLike, - working_folder: str | os.PathLike | None = None, - overwrite: bool = False, - output_format: str = "zarr", - year_start: int | None = None, - year_end: int | None = None, - process_variables: list[str] | None = None, - **dask_kwargs: dict[str, Any], -) -> None: - r""" - Write out RDRS files to daily-timestep files. - - Parameters - ---------- - project : str - The project name. - input_folder : str or os.PathLike - The input folder. - output_folder : str or os.PathLike - The output folder. - working_folder : str or os.PathLike - The working folder. - overwrite : bool - Whether to overwrite existing files. Default: False. - output_format : {"netcdf", "zarr"} - The output format. - year_start : int, optional - The start year. - If not provided, the minimum year in the dataset will be used. - year_end : int, optional - The end year. - If not provided, the maximum year in the dataset will be used. - process_variables : list of str, optional - The variables to process. - If not provided, all variables will be processed. - \*\*dask_kwargs : dict - Additional keyword arguments passed to the Dask scheduler. - """ - if isinstance(input_folder, str): - input_folder = Path(input_folder).expanduser() - if isinstance(output_folder, str): - output_folder = Path(output_folder).expanduser() # noqa - if isinstance(working_folder, str): - working_folder = Path(working_folder).expanduser() - - # GATHER ALL RDRS FILES - gathered = gather_rdrs(project, input_folder, "zarr", "cf") - files = gathered["rdrs-v21"] # noqa - if process_variables: - for vv in [f for f in files.keys() if f not in process_variables]: - files.pop(vv) - for vv, zarrs in files.items(): - zarrs = sorted(zarrs) - if not year_start: - year_start = xr.open_zarr(zarrs[0]).time.dt.year.min().values - if not year_end: - year_end = xr.open_zarr(zarrs[-1]).time.dt.year.max().values - for year in range(year_start, year_end + 1): - infiles = [z for z in zarrs if f"_{year}" in z.name] - if len(infiles) != 12: - raise ValueError(f"Found {len(infiles)} input files. Expected 12.") - # - out_variables = aggregate( - xr.open_mfdataset(infiles, engine="zarr"), freq="day" - ) - # FIXME: Fetch chunk config has been modified to accept different arguments. - chunks = fetch_chunk_config(project=project, freq="day") - chunks["time"] = len(out_variables[list(out_variables.keys())[0]].time) - write_dataset_dict( - out_variables, - output_folder=output_folder, - temp_folder=working_folder, - output_format=output_format, - overwrite=overwrite, - chunks=chunks, - **dask_kwargs, - ) +# def rdrs_to_daily( +# project: str, +# input_folder: str | os.PathLike, +# output_folder: str | os.PathLike, +# working_folder: str | os.PathLike | None = None, +# overwrite: bool = False, +# output_format: str = "zarr", +# year_start: int | None = None, +# year_end: int | None = None, +# process_variables: list[str] | None = None, +# **dask_kwargs: dict[str, Any], +# ) -> None: +# r""" +# Write out RDRS files to daily-timestep files. +# +# Parameters +# ---------- +# project : str +# The project name. +# input_folder : str or os.PathLike +# The input folder. +# output_folder : str or os.PathLike +# The output folder. +# working_folder : str or os.PathLike +# The working folder. +# overwrite : bool +# Whether to overwrite existing files. Default: False. +# output_format : {"netcdf", "zarr"} +# The output format. +# year_start : int, optional +# The start year. +# If not provided, the minimum year in the dataset will be used. +# year_end : int, optional +# The end year. +# If not provided, the maximum year in the dataset will be used. +# process_variables : list of str, optional +# The variables to process. +# If not provided, all variables will be processed. +# \*\*dask_kwargs : dict +# Additional keyword arguments passed to the Dask scheduler. +# """ +# if isinstance(input_folder, str): +# input_folder = Path(input_folder).expanduser() +# if isinstance(output_folder, str): +# output_folder = Path(output_folder).expanduser() # noqa +# if isinstance(working_folder, str): +# working_folder = Path(working_folder).expanduser() +# +# # GATHER ALL RDRS FILES +# gathered = gather_eccc_rdrs(project, input_folder, "zarr", "cf") +# files = gathered["rdrs-v21"] # noqa +# if process_variables: +# for vv in [f for f in files.keys() if f not in process_variables]: +# files.pop(vv) +# for vv, zarrs in files.items(): +# zarrs = sorted(zarrs) +# if not year_start: +# year_start = xr.open_zarr(zarrs[0]).time.dt.year.min().values +# if not year_end: +# year_end = xr.open_zarr(zarrs[-1]).time.dt.year.max().values +# for year in range(year_start, year_end + 1): +# infiles = [z for z in zarrs if f"_{year}" in z.name] +# if len(infiles) != 12: +# raise ValueError(f"Found {len(infiles)} input files. Expected 12.") +# # +# out_variables = aggregate( +# xr.open_mfdataset(infiles, engine="zarr"), freq="day" +# ) +# # FIXME: Fetch chunk config has been modified to accept different arguments. +# chunks = fetch_chunk_config(project=project, freq="day") +# chunks["time"] = len(out_variables[list(out_variables.keys())[0]].time) +# write_dataset_dict( +# out_variables, +# output_folder=output_folder, +# temp_folder=working_folder, +# output_format=output_format, +# overwrite=overwrite, +# chunks=chunks, +# **dask_kwargs, +# ) diff --git a/src/miranda/convert/melcc.py b/src/miranda/convert/melcc.py index 3cadecd4..096748f6 100644 --- a/src/miranda/convert/melcc.py +++ b/src/miranda/convert/melcc.py @@ -23,13 +23,10 @@ from xclim.core.units import convert_units_to, pint_multiply, str2pint from miranda import __version__ +from miranda.convert.corrections import dataset_corrections from miranda.scripting import LOGGING_CONFIG - -from ._data_corrections import ( - dataset_corrections, - load_json_data_mappings, - metadata_conversion, -) +from miranda.treatments import metadata_conversion +from miranda.treatments.utils import load_json_data_mappings logging.config.dictConfig(LOGGING_CONFIG) logger = logging.getLogger(__name__) @@ -676,7 +673,7 @@ def convert_snow_table( ) ds.attrs.update(frequency="2sem") - meta = load_json_data_mappings("melcc-snow") + meta = load_json_data_mappings("melcc") ds = metadata_conversion(ds, "melcc-snow", meta) date = "-".join(ds.indexes["time"][[0, -1]].strftime("%Y%m")) # Save diff --git a/src/miranda/cv.py b/src/miranda/cv.py index 396885bd..2ba4534c 100644 --- a/src/miranda/cv.py +++ b/src/miranda/cv.py @@ -1,4 +1,4 @@ -"""Controlled Vocabulary module.""" +"""ESGF Controlled Vocabulary module.""" from __future__ import annotations diff --git a/src/miranda/eccc/__init__.py b/src/miranda/eccc/__init__.py index 4e05996d..507f571a 100644 --- a/src/miranda/eccc/__init__.py +++ b/src/miranda/eccc/__init__.py @@ -1,7 +1,3 @@ """Environment and Climate Change Canada specialized conversion module.""" from __future__ import annotations - -from ._homogenized import * -from ._raw import * -from ._summaries import * diff --git a/src/miranda/eccc/_homogenized.py b/src/miranda/eccc/_homogenized.py deleted file mode 100644 index daf28039..00000000 --- a/src/miranda/eccc/_homogenized.py +++ /dev/null @@ -1,286 +0,0 @@ -"""Adjusted and Homogenized Canadian Clime Data module.""" - -from __future__ import annotations - -import calendar -import logging.config -from pathlib import Path - -import numpy as np -import pandas as pd -import xarray as xr -from dask.diagnostics import ProgressBar - -from miranda.scripting import LOGGING_CONFIG - -from ._utils import cf_ahccd_metadata - -logging.config.dictConfig(LOGGING_CONFIG) -logger = logging.Logger("miranda") - -__all__ = ["convert_ahccd", "convert_ahccd_fwf_files"] - - -def convert_ahccd( - data_source: str | Path, - output_dir: str | Path, - variable: str, - generation: int | None = None, -) -> None: - """Convert Adjusted and Homogenized Canadian Climate Dataset files. - - Parameters - ---------- - data_source: str or Path - output_dir: str or Path - variable: str - generation: int, optional - - Returns - ------- - None - """ - output_dir = Path(output_dir).resolve().joinpath(variable) - output_dir.mkdir(parents=True, exist_ok=True) - - code = dict(tasmax="dx", tasmin="dn", tas="dm", pr="dt", prsn="ds", prlp="dr").get( - variable - ) - var, col_names, col_spaces, header_row, global_attrs = cf_ahccd_metadata( - code, generation - ) - gen = {2: "Second", 3: "Third"}.get(generation) - if generation == 3 and code in {"dx", "dn", "dm"}: - meta = "ahccd_gen3_temperature.csv" - elif generation == 2 and code in {"dt", "ds", "dr"}: - meta = "ahccd_gen2_precipitation.csv" - - else: - raise NotImplementedError(f"Code '{code} for generation {gen}.") - metadata_source = Path(__file__).resolve().parent.joinpath("data").joinpath(meta) - - if "tas" in variable: - metadata = pd.read_csv(metadata_source, header=2) - metadata.columns = col_names.keys() - cols_specs = col_spaces - - elif "pr" in variable: - metadata = pd.read_csv(metadata_source, header=3) - metadata.columns = col_names.keys() - cols_specs = col_spaces - for index, row in metadata.iterrows(): - if isinstance(row["stnid"], str): - metadata.loc[index, "stnid"] = metadata.loc[index, "stnid"].replace( - " ", "" - ) - else: - raise KeyError(f"{variable} does not include 'pr' or 'tas'.") - - # Convert station .txt files to netcdf - for ff in Path(data_source).glob("*d*.txt"): - outfile = output_dir.joinpath(ff.name.replace(".txt", ".nc")) - if not outfile.exists(): - logger.info(ff.name) - - stid = ff.name.replace(code, "").split(".txt")[0] - try: - metadata_st = metadata[metadata["stnid"] == int(stid)] - except ValueError: - metadata_st = metadata[metadata["stnid"] == stid] - - if len(metadata_st) == 1: - ds_out = convert_ahccd_fwf_files( - ff, metadata_st, variable, generation, cols_specs, var - ) - ds_out.attrs = global_attrs - - ds_out.to_netcdf(outfile, engine="h5netcdf") - else: - msg = f"metadata info for station {ff.name} not found : skipping" - - logger.warning(msg) - - # merge individual stations to single .nc file - # variable - ncfiles = list(output_dir.glob("*.nc")) - outfile = output_dir.parent.joinpath( - "merged_stations", f"ahccd_gen{generation}_{variable}.nc" - ) - - if not outfile.exists(): - logger.info("merging stations :", variable) - with ProgressBar(): - ds_ahccd = xr.open_mfdataset( - ncfiles, concat_dim="station", combine="nested" - ).load() - - for coord in ds_ahccd.coords: - # xarray object datatypes mix string and int (e.g. stnid) convert to string for merged nc files - # Do not apply to datetime object - if coord != "time" and ds_ahccd[coord].dtype == "O": - ds_ahccd[coord] = ds_ahccd[coord].astype(str) - - for v in ds_ahccd.data_vars: - # xarray object datatypes mix string and int (e.g. stnid) convert to string for merged nc files - # Do not apply to flag timeseries - if ds_ahccd[v].dtype == "O" and "flag" not in v: - logger.info(v) - ds_ahccd[v] = ds_ahccd[v].astype(str) - - ds_ahccd[f"{variable}_flag"].attrs[ - "long_name" - ] = f"{ds_ahccd[f'{variable}'].attrs['long_name']} flag" - ds_ahccd.lon.attrs["units"] = "degrees_east" - ds_ahccd.lon.attrs["long_name"] = "longitude" - ds_ahccd.lat.attrs["units"] = "degrees_north" - ds_ahccd.lat.attrs["long_name"] = "latitude" - - for clean_name, orig_name in col_names.items(): - if clean_name in ["lat", "long"]: - continue - ds_ahccd[clean_name].attrs["long_name"] = orig_name - - outfile.parent.mkdir(parents=True, exist_ok=True) - ds_ahccd.to_netcdf( - outfile, engine="h5netcdf", format="NETCDF4_CLASSIC", mode="w" - ) - - del ds_ahccd - for nc in outfile.parent.glob("*.nc"): - logger.info(nc) - ds = xr.open_dataset(nc) - logger.info(ds) - - -def convert_ahccd_fwf_files( - ff: Path | str, - metadata: pd.DataFrame, - variable: str, - generation: int | None = None, - cols_specs: list[tuple[int, int]] | None = None, - attrs: dict | None = None, -) -> xr.Dataset: - """Convert AHCCD fixed-width files. - - Parameters - ---------- - ff: str or Path - metadata: pandas.DataFrame - variable: str - generation - cols_specs - attrs - - Returns - ------- - xarray.Dataset - """ - code = dict(tasmax="dx", tasmin="dn", tas="dm", pr="dt", prsn="ds", prlp="dr").get( - variable - ) - - if attrs is None: - attrs, _, _, _, _ = cf_ahccd_metadata(code, generation) - if cols_specs is None: - _, _, cols_specs, _, _ = cf_ahccd_metadata(code, generation) - _, _, _, nhead, _ = cf_ahccd_metadata(code, generation) - - df = pd.read_fwf(ff, header=nhead, colspecs=cols_specs) - if "pr" in variable: - cols = list(df.columns[0:3]) - cols = cols[0::2] - cols.extend(list(df.columns[4::2])) - flags = list(df.columns[5::2]) - dfflags = df[flags] - else: - cols = [c for c in df.columns if "Unnamed" not in c] - flags = [c for c in df.columns if "Unnamed" in c] - dfflags = df[flags[2:]] - - df = df[cols] - df.replace(attrs["NaN_value"], np.NaN, inplace=True) - - for i, j in enumerate(["Year", "Month"]): - df = df.rename(columns={df.columns[i]: j}) - start_date = f"{df['Year'][0]}-{str(df['Month'][0]).zfill(2)}-01" - - _, ndays = calendar.monthrange(df["Year"].iloc[-1], df["Month"].iloc[-1]) - end_date = f"{df['Year'].iloc[-1]}-{str(df['Month'].iloc[-1]).zfill(2)}-{str(ndays).zfill(2)}" - time1 = pd.date_range(start=start_date, end=end_date) - - index = pd.MultiIndex.from_arrays([df["Year"], df["Month"]]) - df.index = index - dfflags.index = index - cols = [c for c in df.columns if "Year" not in c and "Month" not in c] - df = df[cols] - df.columns = np.arange(1, 32) - dfflags.columns = np.arange(1, 32) - ds = df.stack().to_frame() - ds = ds.rename(columns={0: variable}) - ds_flag = dfflags.stack().to_frame() - ds_flag = ds_flag.rename(columns={0: "flag"}) - ds.index.names = ["Year", "Month", "Day"] - ds_flag.index.names = ["Year", "Month", "Day"] - ds[f"{variable}_flag"] = ds_flag["flag"] - del ds_flag - - # find invalid dates - for y in time1.year.unique(): - for m in ( - ds[ds.index.get_level_values("Year") == y] - .index.get_level_values("Month") - .unique() - ): - _, exp_ndays = calendar.monthrange(y, m) - ndays = ( - (ds.index.get_level_values("Year") == y) - & (ds.index.get_level_values("Month") == m) - ).sum() - if ndays > np.int(exp_ndays): - print(f"year {y}, month {m}, ndays={ndays}, exp_ndays={exp_ndays}") - raise RuntimeError("Unknown days present.") - - time_ds = pd.DataFrame( - { - "year": ds.index.get_level_values("Year"), - "month": ds.index.get_level_values("Month"), - "day": ds.index.get_level_values("Day"), - } - ) - - ds.index = pd.to_datetime(time_ds) - - ds = ds.to_xarray().rename({"index": "time"}) - - ds_out = xr.Dataset(coords={"time": time1}) - for v in ds.data_vars: - ds_out[v] = ds[v] - - ds_out[variable].attrs = attrs - # ds_out - metadata = metadata.to_xarray().rename({"index": "station"}).drop_vars("station") - metadata = metadata.assign_coords( - { - "stnid": metadata["stnid"].astype(str), - "station_name": metadata["station_name"], - } - ) - # ds_out = ds_out.assign_coords({'lon': metadata['long'], 'lat': metadata['lat'], 'elevation': metadata['elev']}) - # - ds_out = ds_out.assign_coords(station=metadata.stnid) - metadata = metadata.drop_vars(["stnid", "station_name"]) - - ds_out["lon"] = metadata["long"] - ds_out["lon"].attrs["units"] = "degrees_east" - ds_out["lat"] = metadata["lat"] - ds_out["lat"].attrs["units"] = "degrees_north" - ds_out["elev"] = metadata["elev"] - ds_out["elev"].attrs["units"] = "m" - - metadata = metadata.drop_vars(["long", "lat", "elev"]) - for vv in metadata.data_vars: - if metadata[vv].dtype == "O" and (variable not in vv): - ds_out[vv] = metadata[vv].astype(str) - else: - ds_out[vv] = metadata[vv] - return ds_out diff --git a/src/miranda/eccc/_raw.py b/src/miranda/eccc/_raw.py deleted file mode 100644 index 9b6f1f63..00000000 --- a/src/miranda/eccc/_raw.py +++ /dev/null @@ -1,968 +0,0 @@ -###################################################################### -# S.Biner, Ouranos, mai 2019 -# -# methodologie -# -# 1) on rassemble les fichiers netcdf des differentes eccc en un seul fichier netCDF. -# -# 2) on scan les fichiers sources annuels en cherchant une variable et on sauve -# ce qu'on trouve dans des fichiers netcdf. On applique aussi les flags -# et on fait les changements d'unites -# -# obtenu via http://climate.weather.gc.ca/index_e.html en cliquant sur 'about the data' -####################################################################### -from __future__ import annotations - -import contextlib -import functools -import logging -import multiprocessing as mp -import os -import re -import sys -import tempfile -import time -from calendar import monthrange -from datetime import datetime as dt -from logging import config -from pathlib import Path -from typing import Optional -from urllib.error import HTTPError - -import dask.dataframe as dd -import numpy as np -import pandas as pd -import xarray as xr -from dask.diagnostics import ProgressBar -from xclim.core.units import convert_units_to - -from miranda.archive import group_by_length -from miranda.scripting import LOGGING_CONFIG -from miranda.storage import file_size, report_file_size -from miranda.units import GiB, MiB -from miranda.utils import generic_extract_archive - -from ._utils import cf_station_metadata - -config.dictConfig(LOGGING_CONFIG) - -__all__ = [ - "aggregate_stations", - "convert_flat_files", - "merge_converted_variables", -] - -TABLE_DATE = dt.now().strftime("%d %B %Y") - - -def load_station_metadata(meta: str | os.PathLike) -> xr.Dataset: - if meta: - df_inv = pd.read_csv(meta, header=0) - else: - try: - import geopandas as gpd - - station_metadata_url = "https://api.weather.gc.ca/collections/climate-stations/items?f=json&limit=15000000" - df_inv = gpd.read_file(station_metadata_url) - except HTTPError as err: - raise RuntimeError( - f"Station metadata table unable to be fetched. Considering downloading directly: {err}" - ) - df_inv["LONGITUDE"] = df_inv.geometry.x - df_inv["LATITUDE"] = df_inv.geometry.y - df_inv["ELEVATION"] = df_inv.ELEVATION.astype(float) - df_inv["CLIMATE_IDENTIFIER"] = df_inv["CLIMATE_IDENTIFIER"].astype(str) - - df_inv = df_inv.drop(["geometry"], axis=1) - return df_inv.to_xarray() - - -def _remove_duplicates(ds): - if any(ds.get_index("time").duplicated()): - msg = ( - f"Found {ds.get_index('time').duplicated().sum()} duplicated time coordinates " - f"for station {ds.station_id.values}. Assuming first value." - ) - logging.info(msg) - return ds.sel(time=~ds.get_index("time").duplicated()) - - -def _convert_station_file( - fichier: Path, - output_path: Path, - errored_files: list[Path], - mode: str, - add_offset: float, - column_dtypes: list[str], - column_names: list[str], - long_name: str, - missing_flags: set[str], - missing_values: set[str], - nc_name: str, - raw_units: str, - units: str, - scale_factor: float, - standard_name: str, - variable_code: str, - **kwargs, -): - if mode.lower() in ["h", "hour", "hourly"]: - num_observations = 24 - column_widths = [7, 4, 2, 2, 3] + [6, 1] * num_observations - elif mode.lower() in ["d", "day", "daily"]: - num_observations = 31 - column_widths = [7, 4, 2, 3] + [6, 1] * num_observations - else: - raise NotImplementedError("`mode` must be 'h'/'hourly or 'd'/'daily'.") - - if not missing_values: - missing_values = {-9999, "#####"} - - with tempfile.TemporaryDirectory() as temp_folder: - if fichier.suffix in [".gz", ".tar", ".zip", ".7z"]: - data_files = generic_extract_archive(fichier, output_dir=temp_folder) - else: - data_files = [fichier] - msg = f"Processing file: {fichier}." - logging.info(msg) - - size_limit = 1 * GiB - - for data in data_files: - if file_size(data) > size_limit and "dask" in sys.modules: - msg = f"File exceeds {report_file_size(size_limit)} - Using dask.dataframes." - - logging.info(msg) - pandas_reader = dd - using_dask_array = True - chunks = dict(blocksize=200 * MiB) - client = ProgressBar - else: - msg = f"File below {report_file_size(size_limit)} - Using pandas.dataframes." - - logging.info(msg) - pandas_reader = pd - chunks = dict() - using_dask_array = False - client = contextlib.nullcontext - - with client() as c: - # Create a dataframe from the files - try: - df = pandas_reader.read_fwf( - data, - widths=column_widths, - names=column_names, - dtype={ - name: data_type - for name, data_type in zip(column_names, column_dtypes) - }, - assume_missing=True, - **chunks, - ) - if using_dask_array: - df = c.persist(df) - - except FileNotFoundError: - msg = f"File {data} was not found." - logging.error(msg) - errored_files.append(data) - return - - except UnicodeDecodeError: - msg = ( - f"File {data.name} was unable to be read. " - f"This is probably an issue with the file." - ) - logging.error(msg) - errored_files.append(data) - return - - # Loop through the station codes - station_codes = df["code"].unique() - for code in station_codes: - df_code = df[df["code"] == code] - - # Abort if the variable is not found - if using_dask_array: - has_variable_codes = ( - (df_code["code_var"] == variable_code).compute() - ).any() - else: - has_variable_codes = ( - df_code["code_var"] == variable_code - ).any() - if not has_variable_codes: - msg = f"Variable `{nc_name}` not found for station code: {code} in file {data}. Continuing..." - - logging.info(msg) - continue - - # Perform the data treatment - msg = f"Converting `{nc_name}` for station code: {code}." - logging.info(msg) - - # Dump the data into a DataFrame - df_var = df_code[df_code["code_var"] == variable_code].copy() - - # Mask the data according to the missing values flag - df_var = df_var.replace(missing_values, np.nan) - - # Decode the values and flags - dfd = df_var.loc[ - :, [f"D{i:0n}" for i in range(1, num_observations + 1)] - ] - dff = df_var.loc[ - :, [f"F{i:0n}" for i in range(1, num_observations + 1)] - ] - - # Remove the "NaN" flag - dff = dff.fillna("") - - # Use the flag to mask the values - try: - val = np.asarray(dfd.values, float) - except ValueError as e: - msg = f"Issues with {dfd}. Continuing..." - logging.error(msg) - continue - try: - flag = np.asarray(dff.values, str) - except ValueError: - msg = f"Issues with {dff}. Continuing..." - logging.error(msg) - continue - mask = np.isin(flag, missing_flags) - val[mask] = np.nan - - # Treat according to units conversions - val = val * scale_factor + add_offset - - # Create the DataArray - date_summations = dict(time=list()) - if mode == "hourly": - for index, row in df_var.iterrows(): - period = pd.Period( - year=row.year, month=row.month, day=row.day, freq="D" - ) - dates = pd.Series( - pd.date_range( - start=period.start_time, - end=period.end_time, - freq="H", - ) - ) - date_summations["time"].extend(dates) - written_values = val.flatten() - written_flags = flag.flatten() - elif mode == "daily": - value_days = list() - flag_days = list() - for i, (index, row) in enumerate(df_var.iterrows()): - period = pd.Period(year=row.year, month=row.month, freq="M") - dates = pd.Series( - pd.date_range( - start=period.start_time, - end=period.end_time, - freq="D", - ) - ) - date_summations["time"].extend(dates) - - value_days.extend( - val[i][ - range(monthrange(int(row.year), int(row.month))[1]) - ] - ) - flag_days.extend( - flag[i][ - range(monthrange(int(row.year), int(row.month))[1]) - ] - ) - written_values = value_days - written_flags = flag_days - - ds = xr.Dataset() - da_val = xr.DataArray( - written_values, coords=date_summations, dims=["time"] - ) - - if raw_units != units: - da_val.attrs["units"] = raw_units - da_val = convert_units_to(da_val, units) - else: - da_val.attrs["units"] = units - - da_val = da_val.rename(nc_name) - variable_attributes = dict( - variable_code=variable_code, - standard_name=standard_name, - long_name=long_name, - ) - if "original_units" in kwargs: - variable_attributes["original_units"] = kwargs["original_units"] - da_val.attrs.update(variable_attributes) - - da_flag = xr.DataArray( - written_flags, coords=date_summations, dims=["time"] - ) - da_flag = da_flag.rename("flag") - flag_attributes = dict( - long_name="data flag", - note="See ECCC technical documentation for details", - ) - da_flag.attrs.update(flag_attributes) - - ds[nc_name] = da_val - ds["flag"] = da_flag - - # save the file in NetCDF format - start_year = ds.time.dt.year.values[0] - end_year = ds.time.dt.year.values[-1] - - station_folder = output_path.joinpath(str(code)) - station_folder.mkdir(parents=True, exist_ok=True) - - f_nc = ( - f"{code}_{variable_code}_{nc_name}_" - f"{start_year if start_year == end_year else '_'.join([str(start_year), str(end_year)])}.nc" - ) - - if station_folder.joinpath(f_nc).exists(): - msg = f"File `{f_nc}` already exists. Continuing..." - logging.warning(msg) - - history = ( - f"{dt.now().strftime('%Y-%m-%d %X')} converted from flat station file " - f"(`{fichier.name}`) to n-dimensional array." - ) - - # TODO: This info should eventually be sourced from a JSON definition - global_attrs = dict( - Conventions="CF-1.8", - comment="Acquired on demand from data specialists at " - "ECCC Climate Services / Services Climatiques.", - contact="John Richard", - contact_email="climatcentre-climatecentral@ec.gc.ca", - domain="CAN", - ) - if mode == "hourly": - global_attrs.update(dict(frequency="1hr")) - elif mode == "daily": - global_attrs.update(dict(frequency="day")) - global_attrs.update( - dict( - history=history, - internal_comment=f"Converted by {os.environ.get('USER', os.environ.get('USERNAME'))}.", - institution="ECCC", - license="https://climate.weather.gc.ca/prods_servs/attachment1_e.html", - member=code, - processing_level="raw", - redistribution="Redistribution permitted.", - references="https://climate.weather.gc.ca/doc/Technical_Documentation.pdf", - source="historical-station-records", - table_date=TABLE_DATE, - title="Environment and Climate Change Canada (ECCC) weather station observations", - type="station-obs", - usage="The original data is owned by the Government of Canada (Environment and Climate " - "Change Canada), and falls under the licence agreement for use of Environment and " - "Climate Change Canada data", - variable=str(nc_name), - version=f"v{dt.now().strftime('%Y.%m.%V')}", # Year.Month.Week - ) - ) - ds.attrs.update(global_attrs) - - msg = f"Exporting to: {station_folder.joinpath(f_nc)}" - logging.info(msg) - ds.to_netcdf(station_folder.joinpath(f_nc)) - del ds - del val - del mask - del flag - del da_val - del da_flag - del dfd - del dff - del written_values - del written_flags - del date_summations - - del df - - if os.listdir(temp_folder): - for temporary_file in Path(temp_folder).glob("*"): - if temporary_file in data_files: - temporary_file.unlink() - - -def convert_flat_files( - source_files: str | os.PathLike, - output_folder: str | os.PathLike | list[str | int], - variables: str | int | list[str | int], - mode: str = "hourly", - n_workers: int = 4, -) -> None: - """Convert flat formatted files. - - Parameters - ---------- - source_files : str or Path - output_folder : str or Path - variables : str or List[str] - mode : {"hourly", "daily"} - n_workers : int - - Returns - ------- - None - """ - func_time = time.time() - - if mode.lower() in ["h", "hour", "hourly"]: - num_observations = 24 - column_names = ["code", "year", "month", "day", "code_var"] - column_dtypes = [str, float, float, float, str] - elif mode.lower() in ["d", "day", "daily"]: - num_observations = 31 - column_names = ["code", "year", "month", "code_var"] - column_dtypes = [str, float, float, str] - else: - raise NotImplementedError("`mode` must be 'h'/'hourly or 'd'/'daily'.") - - # Preparing the data column headers - for i in range(1, num_observations + 1): - data_entry, flag_entry = f"D{i:0n}", f"F{i:0n}" - column_names.append(data_entry) - column_names.append(flag_entry) - column_dtypes.extend([str, str]) - - if isinstance(variables, (str, int)): - variables = [variables] - - for variable_code in variables: - variable_code = str(variable_code).zfill(3) - metadata = cf_station_metadata(variable_code) - nc_name = metadata["nc_name"] - - rep_nc = Path(output_folder).joinpath(nc_name) - rep_nc.mkdir(parents=True, exist_ok=True) - - # Loop on the files - msg = ( - f"Collecting files for variable '{metadata['standard_name']}' " - f"(filenames containing '{metadata['_table_name']}')." - ) - logging.info(msg) - list_files = list() - if isinstance(source_files, list) or Path(source_files).is_file(): - list_files.append(source_files) - else: - glob_patterns = [g for g in metadata["_table_name"]] - for pattern in glob_patterns: - list_files.extend( - [f for f in Path(source_files).rglob(f"{pattern}*") if f.is_file()] - ) - manager = mp.Manager() - errored_files = manager.list() - converter_func = functools.partial( - _convert_station_file, - output_path=rep_nc, - errored_files=errored_files, - mode=mode, - variable_code=variable_code, - column_names=column_names, - column_dtypes=column_dtypes, - **metadata, - ) - with mp.Pool(processes=n_workers) as pool: - pool.map(converter_func, list_files) - pool.close() - pool.join() - - if errored_files: - msg = "Some files failed to be properly parsed:\n", ", ".join(errored_files) - - logging.warning(msg) - - msg = f"Process completed in {time.time() - func_time:.2f} seconds" - logging.warning() - - -def aggregate_stations( - source_files: str | os.PathLike | None = None, - output_folder: str | os.PathLike | None = None, - time_step: str | None = None, - variables: str | int | list[str | int] | None = None, - include_flags: bool = True, - groupings: int | None = None, - mf_dataset_freq: str | None = None, - temp_directory: str | os.PathLike | None = None, - n_workers: int = 1, -) -> None: - """Aggregate stations. - - Parameters - ---------- - source_files : str or Path - Source files to be aggregated. - output_folder : str or Path - Output folder for the aggregated files. - variables : str or int or list of str or int, optional - The variable codes to be aggregated. - time_step : {"hourly", "daily"} - The time step to be used for aggregation. - include_flags : bool - Include flags in the output files. - groupings : int - The number of files in each group used for converting to multi-file Datasets. - mf_dataset_freq : str, optional - Resampling frequency for creating output multi-file Datasets. E.g. 'YS': 1 year per file, '5YS': 5 years per file. - temp_directory : str or Path, optional - Use another temporary directory location in case default location is not spacious enough. - n_workers : int - The number of workers to use. - - Returns - ------- - None - """ - func_time = time.time() - - if isinstance(source_files, str): - source_files = Path(source_files) - - if time_step.lower() in ["h", "hour", "hourly"]: - mode = "hourly" - elif time_step.lower() in ["d", "day", "daily"]: - mode = "daily" - else: - raise ValueError("Time step must be `h` / `hourly` or `d` / `daily`.") - - if isinstance(variables, list): - pass - elif isinstance(variables, (str, int)): - variables = [variables] - # TODO: have the variable gathered from a JSON file - elif variables is None: - if mode == "hourly": - variables = [ - 89, - 94, - 123, - ] - variables.extend(range(76, 81)) - variables.extend(range(262, 281)) - elif mode == "daily": - variables = [1, 2, 3] - variables.extend(range(10, 26)) - else: - raise NotImplementedError() - - for variable_code in variables: - info = cf_station_metadata(variable_code) - variable_name = info["nc_name"] - msg = f"Merging `{variable_name}` using `{time_step}` time step." - logging.info(msg) - - # Only perform aggregation on available data with corresponding metadata - logging.info("Performing glob and sort.") - nc_list = [str(nc) for nc in source_files.joinpath(variable_name).rglob("*.nc")] - - if not groupings: - groupings = max(n_workers**2, 4) - - if nc_list: - nc_lists = group_by_length(nc_list, groupings) - - with tempfile.TemporaryDirectory( - prefix="eccc", dir=temp_directory - ) as temp_dir: - combinations = sorted( - (ii, nc, temp_dir, len(nc_lists)) for ii, nc in enumerate(nc_lists) - ) - - with mp.Pool(processes=n_workers) as pool: - pool.starmap(_tmp_zarr, combinations) - pool.close() - pool.join() - - zarrs_found = [f for f in Path(temp_dir).glob("*.zarr")] - msg = f"Found {len(zarrs_found)} intermediary aggregation files." - - logging.info(msg) - - ds = xr.open_mfdataset( - zarrs_found, - engine="zarr", - combine="nested", - concat_dim={"station"}, - ) - - if ds: - station_file_codes = [Path(x).name.split("_")[0] for x in nc_list] - if not include_flags: - drop_vars = [vv for vv in ds.data_vars if "flag" in vv] - ds = ds.drop_vars(drop_vars) - ds = ds.sortby(ds.station_id, "time") - - # Rearrange column order to have lon, lat, elev first - # # FIXME: This doesn't work as intended - Assign coordinates instead - # cols = meta.columns.tolist() - # cols1 = [ - # "latitude", - # "longitude", - # "elevation", - # ] - # for rr in cols1: - # cols.remove(rr) - # cols1.extend(cols) - # meta = meta[cols1] - # meta.index.rename("station", inplace=True) - # meta = meta.to_xarray() - # meta.sortby(meta["climate_identifier"]) - # meta = meta.assign({"station": ds.station.values}) - - # np.testing.assert_array_equal( - # sorted(meta["climate_identifier"].values), sorted(ds.station_id.values) - # ) - # for vv in meta.data_vars: - # ds = ds.assign_coords({vv: meta[vv]}) - # ds = xr.merge([ds, meta]) - # ds.attrs = attrs1 - - # export done within tmddir context otherwise data is erased before final export!! - valid_stations = list(sorted(ds.station_id.values)) - valid_stations_count = len(valid_stations) - - msg = f"Processing stations for variable `{variable_name}`." - logging.info(msg) - - if len(station_file_codes) == 0: - msg = f"No stations were found containing variable filename `{variable_name}`. Exiting." - logging.error(msg) - return - - msg = ( - f"Files exist for {len(station_file_codes)} ECCC stations. " - f"Metadata found for {valid_stations_count} stations. " - ) - logging.info(msg) - - # FIXME: Is this still needed? - # logging.info("Preparing the NetCDF time period.") - # Create the time period timestamps - # year_start = ds.time.dt.year.min().values - # year_end = ds.time.dt.year.max().values - - # Calculate the time index dimensions of the output NetCDF - # time_index = pd.date_range( - # start=f"{year_start}-01-01", - # end=f"{year_end + 1}-01-01", - # freq=mode[0].capitalize(), - # )[:-1] - # logging.info( - # f"Number of ECCC stations: {valid_stations_count}, time steps: {time_index.size}." - # ) - - Path(output_folder).mkdir(parents=True, exist_ok=True) - file_out = Path(output_folder).joinpath(f"{variable_name}_eccc_{mode}") - - ds = ds.assign_coords(station=range(0, len(ds.station))).sortby("time") - if mf_dataset_freq is not None: - # output mf_dataset using resampling frequency - _, datasets = zip(*ds.resample(time=mf_dataset_freq)) - else: - datasets = [ds] - - paths = [ - f"{file_out}_{data.time.dt.year.min().values}-{data.time.dt.year.max().values}.nc" - for data in datasets - ] - - # FIXME: chunks need to be dealt with - # chunks = [1, len(ds.time)] - # comp = dict(zlib=True, complevel=5) # , chunk sizes=chunks) - - with ProgressBar(): - # FIXME: looping seems to cause increasing memory over time use a pool of one or 2?? - # for dataset, path in zip(datasets, paths): - # _export_agg_nc(dataset,path) - combs = zip(datasets, paths) - pool = mp.Pool(2) - pool.map(_export_agg_nc, combs) - pool.close() - pool.join() - ds.close() - del ds - - else: - msg = f"No files found for variable: `{variable_name}`." - logging.info(msg) - - runtime = f"Process completed in {time.time() - func_time:.2f} seconds" - logging.warning(runtime) - - -def _export_agg_nc(args): - dataset, path = args - comp = dict(zlib=True, complevel=5) - encoding = {var: comp for var in dataset.data_vars} - dataset.load().to_netcdf( - path, - engine="h5netcdf", - format="NETCDF4_CLASSIC", - encoding=encoding, - ) - dataset.close() - del dataset - - -def _tmp_zarr( - iterable: int, - nc: list[str | os.PathLike], - tempdir: str | os.PathLike, - group: int | None = None, -) -> None: - msg = ( - f"Processing batch of files {iterable + 1}" - f"{' of ' + str(group) if group is not None else ''}." - ) - logging.info(msg) - station_file_codes = [Path(x).name.split("_")[0] for x in nc] - - try: - ds = xr.open_mfdataset( - nc, combine="nested", concat_dim={"station"}, preprocess=_remove_duplicates - ) - except ValueError as e: - errored_nc_files = ", ".join([Path(f).name for f in nc]) - msg = f"Issues found with the following files: [{errored_nc_files}]: {e}" - - logging.error(msg) - return - - ds = ds.assign_coords( - station_id=xr.DataArray(station_file_codes, dims="station").astype(str) - ) - if "flag" in ds.data_vars: - ds1 = ds.drop_vars("flag").copy(deep=True) - ds1["flag"] = ds.flag.astype(str) - ds = ds1 - - with ProgressBar(): - ds.load().to_zarr( - Path(tempdir).joinpath(f"{str(iterable).zfill(4)}.zarr"), - ) - del ds - - -def _combine_years( - station_folder: str, - varia: str, - out_folder: str | os.PathLike, - meta_file: str | os.PathLike, - rejected: list[str], - _verbose: bool = False, -) -> None: - nc_files = sorted(list(Path(station_folder).glob("*.nc"))) - if len(nc_files): - msg = ( - f"Found {len(nc_files)} files for station code {Path(station_folder).name}." - ) - - logging.info(msg) - else: - msg = f"No readings found for station code {Path(station_folder).name}. Continuing..." - - logging.warning(msg) - return - - # Remove range files if years are all present, otherwise default to range_file. - years_found = dict() - range_files_found = dict() - years_parsed = True - for f in nc_files: - groups = re.findall(r"_\d{4}", f.stem) - if len(groups) == 1: - year = int(groups[0].strip("_")) - years_found[year] = f - elif len(groups) == 2: - year_start, year_end = int(groups[0].strip("_")), int(groups[1].strip("_")) - range_files_found[f] = set(range(year_start, year_end)) - else: - logging.warning( - "Years unable to be effectively parsed from series. Continuing with xarray solver..." - ) - years_parsed = False - break - if years_parsed: - if len(range_files_found) > 0: - msg = f"Overlapping single-year and multi-year files found for station code {station_folder}. Removing overlaps." - logging.warning(msg) - for ranged_file, years in range_files_found.items(): - if years.issubset(years_found.values()): - nc_files.remove(ranged_file) - else: - for y in years: - try: - nc_files.remove(years_found[y]) - except (KeyError, ValueError) as err: # noqa: PERF203 - logging.error(err) - continue - - year_range = min(years_found.keys()), max(years_found.keys()) - msg = f"Year(s) covered: {year_range[0]}{'-' + str(year_range[1]) if year_range[0] != year_range[1] else ''}." - logging.info(msg) - - if _verbose: - msg = f"Opening: {', '.join([p.name for p in nc_files])}" - logging.info(msg) - ds = xr.open_mfdataset(nc_files, combine="nested", concat_dim={"time"}) - outfile = Path(out_folder).joinpath( - f'{nc_files[0].name.split(f"_{varia}_")[0]}_{varia}_' - f"{ds.time.dt.year.min().values}-{ds.time.dt.year.max().values}.nc" - ) - - df_inv = xr.open_dataset(meta_file) - - station_id = ds.attrs["member"] - meta = df_inv.isel(index=df_inv.CLIMATE_IDENTIFIER == station_id) - meta = meta.rename({"index": "station", "CLIMATE_IDENTIFIER": "station_id"}) - try: - meta = meta.assign_coords(station=[0]) - except ValueError: - rejected.append(Path(station_folder).name) - msg = f"Something went wrong at the assign_coords step for station {station_folder}. Continuing..." - logging.error(msg) - return - if len(meta.indexes) > 1: - raise ValueError("Found more than 1 station.") - elif len(meta.indexes) == 0: - rejected.append(Path(station_folder).name) - msg = f"No metadata found for station code {station_folder}. Continuing..." - logging.warning(msg) - return - - keep_coords = [ - "time", - "station", - "station_id", - "latitude", - "longitude", - "elevation", - ] - for vv in meta.data_vars: - if vv.lower() not in keep_coords: - continue - ds = ds.assign_coords({vv.lower(): meta[vv]}) - - for vv in ds.data_vars: - if ds[vv].dtype == "O": - ds[vv] = ds[vv].astype(str) - - if not outfile.exists(): - msg = f"Merging to {outfile.name}" - logging.info(msg) - comp = dict(zlib=True, complevel=5) - encoding = {data_var: comp for data_var in ds.data_vars} - encoding["time"] = {"dtype": "single"} - with ProgressBar(): - ds.to_netcdf( - outfile, - engine="h5netcdf", - format="NETCDF4_CLASSIC", - encoding=encoding, - ) - else: - msg = f"Files exist for {outfile.name}. Continuing..." - logging.info(msg) - - -def merge_converted_variables( - source_files: str | os.PathLike, - output_folder: str | os.PathLike, - variables: str | int | list[str | int] | None = None, - station_metadata: str | os.PathLike | None = None, - overwrite: bool = False, - n_workers: int = 1, -) -> None: - """Merge converted variables. - - Parameters - ---------- - source_files : str, Path - output_folder : str, Path - variables : str or int or list of str or int, optional - station_metadata : str or Path, optional - overwrite : bool - n_workers : int - - Returns - ------- - None - """ - meta = load_station_metadata(station_metadata) - metadata_file = Path(tempfile.NamedTemporaryFile(suffix=".nc", delete=False).name) - meta.to_netcdf(metadata_file) - - if isinstance(source_files, str): - source_files = Path(source_files) - if isinstance(output_folder, str): - output_folder = Path(output_folder) - - selected_variables = list() - if variables is not None: - if not isinstance(variables, list): - variables = [variables] - selected_variables = [cf_station_metadata(var) for var in variables] - - variables_found = [x.name for x in source_files.iterdir() if x.is_dir()] - if selected_variables: - variables_found = [ - x - for x in variables_found - if x in [item["nc_name"] for item in selected_variables] - ] - - for variable in variables_found: - msg = f"Merging files found for variable: `{variable}`." - logging.info(msg) - station_dirs = [ - x for x in source_files.joinpath(variable).iterdir() if x.is_dir() - ] - msg = f"Number of stations found: {len(station_dirs)}." - logging.info(msg) - - output_rep = output_folder.joinpath(variable) - Path(output_rep).mkdir(parents=True, exist_ok=True) - - if ( - len(list(output_rep.iterdir())) >= (len(meta.CLIMATE_IDENTIFIER) * 0.75) - ) and not overwrite: - msg = ( - f"Variable {variable} appears to have already been converted. Will be skipped. " - f"To force conversion of this variable, set `overwrite=True`." - ) - logging.warning(msg) - continue - - manager = mp.Manager() - rejected_stations = manager.list() - - combine_func = functools.partial( - _combine_years, - varia=variable, - out_folder=output_rep, - meta_file=metadata_file, - rejected=rejected_stations, - ) - - with mp.Pool(processes=n_workers) as pool: - pool.map(combine_func, station_dirs) - pool.close() - pool.join() - - if rejected_stations: - msg = f"Rejected station codes are the following: {', '.join(rejected_stations)}." - logging.warning(msg) diff --git a/src/miranda/eccc/_utils.py b/src/miranda/eccc/_utils.py deleted file mode 100644 index 2f0673a8..00000000 --- a/src/miranda/eccc/_utils.py +++ /dev/null @@ -1,1003 +0,0 @@ -from __future__ import annotations - -import logging.config -from collections.abc import Mapping -from datetime import datetime as dt - -from miranda.scripting import LOGGING_CONFIG - -__all__ = ["cf_ahccd_metadata", "cf_station_metadata"] - -logging.config.dictConfig(LOGGING_CONFIG) - - -def cf_station_metadata(variable_code: int | str) -> Mapping[str, int | float | str]: - """CF metadata for hourly station data. - - Parameters - ---------- - variable_code: int or str - - Returns - ------- - dict - """ - ec_hourly_variables = { - "001": { - "_table_name": {"DLY02", "DLY04", "DLY44"}, - "original_units": "0.1 °C", - "raw_units": "degC", - "units": "K", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Daily Maximum Temperature", - "standard_name": "air_temperature_maximum", - "nc_name": "tasmax", - }, - "002": { - "_table_name": {"DLY02", "DLY04", "DLY44"}, - "original_units": "0.1 °C", - "raw_units": "degC", - "units": "K", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Daily Minimum Temperature", - "standard_name": "air_temperature_minimum", - "nc_name": "tasmin", - }, - "003": { - "_table_name": {"DLY02", "DLY04", "DLY44"}, - "original_units": "0.1 °C", - "raw_units": "degC", - "units": "K", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Daily Mean Temperature", - "standard_name": "air_temperature", - "nc_name": "tas", - }, - "010": { - "_table_name": {"DLY02", "DLY04", "DLY44"}, - "original_units": "0.1 mm day-1", - "raw_units": "mm", - "units": "m", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Daily Total Rainfall", - "standard_name": "liquid_precipitation_amount", - "nc_name": "prlptot", - }, - "011": { - "_table_name": {"DLY02", "DLY04", "DLY44"}, - "original_units": "0.1 cm day-1", - "raw_units": "cm", - "units": "m", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Daily Total Snowfall", - "standard_name": "solid_precipitation_amount", - "nc_name": "prsntot", - }, - "012": { - "_table_name": {"DLY02", "DLY04", "DLY44"}, - "original_units": "0.1 mm day-1", - "raw_units": "mm", - "units": "m", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Daily Total Precipitation", - "standard_name": "precipitation_amount", - "nc_name": "prcptot", - }, - "013": { - "_table_name": {"DLY02", "DLY04", "DLY44"}, - "original_units": "cm", - "raw_units": "cm", - "units": "m", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Snow on the Ground", - "standard_name": "surface_snow_thickness", - "nc_name": "sndtot", - }, - "014": { - "_table_name": {"DLY02", "DLY04", "DLY44"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Thunderstorms", - "standard_name": "thunderstorm_presence", - "nc_name": "thunder", - }, - "015": { - "_table_name": {"DLY02", "DLY04", "DLY44"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Freezing rain or drizzle", - "standard_name": "freeze_rain_drizzle_presence", - "nc_name": "freezing_rain_drizzle", - }, - "016": { - "_table_name": {"DLY02", "DLY04", "DLY44"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Hail", - "standard_name": "hail_presence", - "nc_name": "hail", - }, - "017": { - "_table_name": {"DLY02", "DLY04", "DLY44"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Fog or Ice Fog", - "standard_name": "fog_ice_fog_presence", - "nc_name": "fog_ice_fog", - }, - "018": { - "_table_name": {"DLY02", "DLY04"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Smoke or Haze", - "standard_name": "smoke_haze_presence", - "nc_name": "smoke_haze", - }, - "019": { - "_table_name": {"DLY02", "DLY04"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Blowing Dust or Sand", - "standard_name": "blowing_dust_sand_presence", - "nc_name": "blowing_dust_sand", - }, - "020": { - "_table_name": {"DLY02", "DLY04"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Blowing snow", - "standard_name": "blowing_snow_presence", - "nc_name": "blow_snow", - }, - "021": { - "_table_name": {"DLY02", "DLY04"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Wind speed >= 28 Knots", - "standard_name": "wind_exceeding_28_knots", - "nc_name": "wind_gt_28kt", - }, - "022": { - "_table_name": {"DLY02", "DLY04"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Wind speed >= 34 Knots", - "standard_name": "wind_exceeding_34_knots", - "nc_name": "wind_gt_34kt", - }, - "023": { - "_table_name": {"DLY02", "DLY04"}, - "original_units": "10's of degrees", - "raw_units": "deg", - "units": "deg", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Direction of extreme gust (16 pts) to December 1976", - "standard_name": "gust_to_direction", - "nc_name": "gust_dir_16pts", - }, - "024": { - "_table_name": {"DLY02", "DLY04"}, - "original_units": "km/h", - "raw_units": "km h-1", - "units": "m s-1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Speed of extreme gust", - "standard_name": "wind_speed_of_gust", - "nc_name": "gust_speed", - }, - "025": { - "_table_name": {"DLY02", "DLY04"}, - "raw_units": "h", - "units": "h", - "scale_factor": 1, - "add_offset": 0, - "long_name": "UTC hour of extreme gust", - "standard_name": "hour_of_extreme_gust", - "nc_name": "gust_hour", - }, - "061": { - "_table_name": {"HLY11"}, - "original_units": "0.001 MJ/m", - "raw_units": "W m-2 h-1", - "units": "W m-2 h-1", - "scale_factor": 1e6 / (60 * 60), - "add_offset": 0, - "long_name": "RF1 global solar radiation", - "standard_name": "solar_radiation_flux", - "nc_name": "rf1_radiation", - }, - "062": { - "_table_name": {"HLY11"}, - "original_units": "0.001 MJ/m", - "raw_units": "W m-2 h-1", - "units": "W m-2 h-1", - "scale_factor": 1e6 / (60 * 60), - "add_offset": 0, - "long_name": "RF2 sky (diffuse) radiation", - "standard_name": "solar_radiation_flux", - "nc_name": "rf2_radiation", - }, - "063": { - "_table_name": {"HLY11"}, - "original_units": "0.001 MJ/m", - "raw_units": "W m-2 h-1", - "units": "W m-2 h-1", - "scale_factor": 1e6 / (60 * 60), - "add_offset": 0, - "long_name": "RF3 reflected solar radiation", - "standard_name": "solar_radiation_flux", - "nc_name": "rf3_radiation", - }, - "064": { - "_table_name": {"HLY11"}, - "original_units": "0.001 MJ/m", - "raw_units": "W m-2 h-1", - "units": "W m-2 h-1", - "scale_factor": 1e6 / (60 * 60), - "add_offset": 0, - "long_name": "RF4 net all wave radiation", - "standard_name": "solar_radiation_flux", - "nc_name": "rf4_radiation", - }, - "067": { - "_table_name": {"HLY11"}, - "original_units": "0.01 Kilolux_hrs", - "raw_units": "lux h", - "units": "lux h", - "scale_factor": 10, - "add_offset": 0, - "long_name": "RF7 daylight illumination", - "standard_name": "solar_radiation_flux", - "nc_name": "rf7_radiation", - }, - "068": { - "_table_name": {"HLY11"}, - "original_units": "0.001 MJ/m", - "raw_units": "W m-2 h-1", - "units": "W m-2 h-1", - "scale_factor": 1e6 / (60 * 60), - "add_offset": 0, - "long_name": "RF8 direct solar radiation", - "standard_name": "solar_radiation_flux", - "nc_name": "rf8_radiation", - }, - "069": { - "_table_name": {"HLY15"}, - "original_units": "10's of degrees", - "raw_units": "deg", - "units": "deg", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Direction - 45B anemometer (8 pts)", - "standard_name": "wind_to_direction", - "nc_name": "wind_dir_45B", - }, - "071": { - "_table_name": {"HLY01"}, - "original_units": "30's of meters", - "raw_units": "m", - "units": "m", - "scale_factor": 30, - "add_offset": 0, - "long_name": "Ceiling height of lowest layer of clouds", - "standard_name": "ceiling_cloud_height", - "nc_name": "ceiling_hgt", - }, - "072": { - "_table_name": {"HLY01"}, - "original_units": "0.1 km", - "raw_units": "km", - "units": "m", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Visibility", - "standard_name": "visibility_in_air", - "nc_name": "visibility", - }, - "073": { - "_table_name": {"HLY01"}, - "original_units": "0.01 kPa", - "raw_units": "Pa", - "units": "Pa", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Sea Level Pressure", - "standard_name": "air_pressure_at_mean_sea_level", - "nc_name": "psl", - }, - "074": { - "_table_name": {"HLY01"}, - "original_units": "0.1 °C", - "raw_units": "degC", - "units": "K", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Dew Point Temperature", - "standard_name": "dew_point_temperature", - "nc_name": "tds", - }, - "075": { - "_table_name": {"HLY01"}, - "original_units": "10's of degrees", - "raw_units": "deg", - "units": "deg", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Wind Direction at 2 m (U2A Anemometer) (16 pts)", - "standard_name": "wind_direction_u2a", - "nc_name": "wind_dir_u2a_16", - }, - "076": { - "_table_name": {"HLY01"}, - "original_units": "km/h", - "raw_units": "km h-1", - "units": "m s-1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Wind Speed - U2A (16 pts) to December 1970", - "standard_name": "wind_speed_u2a", - "nc_name": "wind_speed_u2a", - }, - "077": { - "_table_name": {"HLY01"}, - "original_units": "0.01 kPa", - "raw_units": "Pa", - "units": "Pa", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Station Pressure", - "standard_name": "atmospheric_pressure", - "nc_name": "pressure", - }, - "078": { - "_table_name": {"HLY01"}, - "original_units": "0.1 °C", - "raw_units": "degC", - "units": "K", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Dry Bulb Temperature", - "standard_name": "dry_bulb_temperature", - "nc_name": "tas_dry", - }, - "079": { - "_table_name": {"HLY01"}, - "original_units": "0.1 °C", - "raw_units": "degC", - "units": "K", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Wet Bulb temperature", - "standard_name": "wet_bulb_temperature", - "nc_name": "tas_wet", - }, - "080": { - "_table_name": {"HLY01"}, - "original_units": "%", - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Relative Humidity", - "standard_name": "relative_humidity", - "nc_name": "hur", - }, - "081": { - "_table_name": {"HLY01"}, - "original_units": "%", - "raw_units": "1", - "units": "1", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Total Cloud Opacity", - "standard_name": "cloud_albedo", - "nc_name": "clo", - }, - "082": { - "_table_name": {"HLY01"}, - "original_units": "%", - "raw_units": "1", - "units": "1", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Total Cloud Amount", - "standard_name": "cloud_area_fraction", - "nc_name": "clt", - }, - "089": { - "_table_name": {"HLY01"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Freezing Rain", - "standard_name": "freezing_rain", - "nc_name": "freeze_rain", - }, - "094": { - "_table_name": {"HLY01"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Ice Pellets", - "standard_name": "ice_pellet_presence", - "nc_name": "ice_pellets", - }, - "107": { - "_table_name": {"HLY01"}, - "original_units": "Tenths", - "raw_units": "1", - "units": "1", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Lowest cloud layer opacity", - "standard_name": "low_type_cloud_opacity_fraction", - "nc_name": "1low_cloud_opac", - }, - "108": { - "_table_name": {"HLY01"}, - "original_units": "Tenths", - "raw_units": "1", - "units": "1", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Lowest cloud layer amount or condition", - "standard_name": "low_type_cloud_area_fraction", - "nc_name": "1low_cloud_frac", - }, - "109": { - "_table_name": {"HLY01"}, - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Lowest cloud layer type", - "standard_name": "low_type_cloud_type", - "nc_name": "1low_cloud_type", - }, - "110": { - "_table_name": {"HLY01"}, - "original_units": "30's of meters", - "raw_units": "m", - "units": "m", - "scale_factor": 30, - "add_offset": 0, - "long_name": "Lowest cloud layer height", - "standard_name": "low_type_cloud_height", - "nc_name": "1low_cloud_hgt", - }, - "111": { - "_table_name": {"HLY01"}, - "original_units": "Tenths", - "raw_units": "1", - "units": "1", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Second lowest cloud layer opacity", - "standard_name": "low_type_cloud_opacity_fraction", - "nc_name": "2low_cloud_opac", - }, - "112": { - "_table_name": {"HLY01"}, - "original_units": "Tenths", - "raw_units": "1", - "units": "1", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Second lowest cloud layer amount or condition", - "standard_name": "low_type_cloud_area_fraction", - "nc_name": "2low_cloud_frac", - }, - "113": { - "_table_name": {"HLY01"}, - "original_units": "", - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Second lowest cloud layer type", - "standard_name": "low_type_cloud_type", - "nc_name": "2low_cloud_type", - }, - "114": { - "_table_name": {"HLY01"}, - "original_units": "30's of meters", - "raw_units": "m", - "units": "m", - "scale_factor": 30, - "add_offset": 0, - "long_name": "Second lowest cloud layer height", - "standard_name": "low_type_cloud_height", - "nc_name": "2low_cloud_hgt", - }, - "115": { - "_table_name": {"HLY01"}, - "original_units": "Tenths", - "raw_units": "1", - "units": "1", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Thirsd lowest cloud layer opacity", - "standard_name": "low_type_cloud_opacity_fraction", - "nc_name": "3low_cloud_opac", - }, - "116": { - "_table_name": {"HLY01"}, - "original_units": "Tenths", - "raw_units": "1", - "units": "1", - "scale_factor": 10, - "add_offset": 0, - "long_name": "Third lowest cloud layer amount or condition", - "standard_name": "low_type_cloud_area_fraction", - "nc_name": "3low_cloud_frac", - }, - "117": { - "_table_name": {"HLY01"}, - "original_units": "", - "raw_units": "1", - "units": "1", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Third lowest cloud layer type", - "standard_name": "low_type_cloud_type", - "nc_name": "3low_cloud_type", - }, - "118": { - "_table_name": {"HLY01"}, - "original_units": "30's of meters", - "raw_units": "m", - "units": "m", - "scale_factor": 30, - "add_offset": 0, - "long_name": "Third lowest cloud layer height", - "standard_name": "low_type_cloud_height", - "nc_name": "3low_cloud_hgt", - }, - "123": { - "_table_name": {"HLY01"}, - "original_units": "0.1 mm", - "raw_units": "mm h-1", - "units": "kg m2 s-1", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Total Rainfall", - "standard_name": "rainfall_flux", - "nc_name": "rainfall", - }, - "133": { - "_table_name": {"HLY10"}, - "original_units": "0.1 hrs", - "raw_units": "h", - "units": "s", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Sunshine", - "standard_name": "duration_of_sunshine", - "nc_name": "sun", - }, - "156": { - "_table_name": {"HLY01"}, - "original_units": "10's of degrees", - "raw_units": "deg", - "units": "deg", - "scale_factor": 10, - "long_name": "Wind Direction - U2A (36 pts) from January 1971", - "standard_name": "wind_direction_u2a", - "nc_name": "wind_dir_u2a_36", - }, - "262": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 mm", - "raw_units": "mm", - "units": "kg m-2", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Total Precipitation (minutes 00-60)", - "standard_name": "precipitation_amount", - "nc_name": "prtot", - }, - "263": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 mm", - "raw_units": "mm", - "units": "kg m-2", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Total Precipitation (minutes 00-15)", - "standard_name": "precipitation_amount", - "nc_name": "prtot_q1", - }, - "264": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 mm", - "raw_units": "mm", - "units": "kg m-2", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Total Precipitation (minutes 15-30)", - "standard_name": "precipitation_amount", - "nc_name": "prtot_q2", - }, - "265": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 mm", - "raw_units": "mm", - "units": "kg m-2", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Total Precipitation (minutes 30-45)", - "standard_name": "precipitation_amount", - "nc_name": "prtot_q3", - }, - "266": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 mm", - "raw_units": "mm", - "units": "kg m-2", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Total Precipitation (minutes 45-60)", - "standard_name": "precipitation_amount", - "nc_name": "prtot_q4", - }, - "267": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 kg/m²", - "raw_units": "kg m-2", - "units": "kg m-2", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Precipitation Gauge Weight per Unit Area (at minute 15)", - "standard_name": "precipitation_amount", - "nc_name": "precipitation_weight_q1", - }, - "268": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 kg/m²", - "raw_units": "kg m-2", - "units": "kg m-2", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Precipitation Gauge Weight per Unit Area (at minute 30)", - "standard_name": "precipitation_amount", - "nc_name": "precipitation_weight_q2", - }, - "269": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 kg/m²", - "raw_units": "kg m-2", - "units": "kg m-2", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Precipitation Gauge Weight per Unit Area (at minute 45)", - "standard_name": "precipitation_amount", - "nc_name": "precipitation_weight_q3", - }, - "270": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 kg/m²", - "raw_units": "kg m-2", - "units": "kg m-2", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Precipitation Gauge Weight per Unit Area (at minute 60)", - "standard_name": "precipitation_amount", - "nc_name": "precipitation_weight_q4", - }, - "271": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 km/h", - "raw_units": "km h-1", - "nc_units": "m s-1", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Wind Speed at 2 m (minutes 00-15)", - "standard_name": "wind_speed", - "nc_name": "wind_speed_q1", - }, - "272": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 km/h", - "raw_units": "km h-1", - "nc_units": "m s-1", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Wind Speed at 2 m (minutes 15-30)", - "standard_name": "wind_speed", - "nc_name": "wind_speed_q2", - }, - "273": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 km/h", - "raw_units": "km h-1", - "nc_units": "m s-1", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Wind Speed at 2 m (minutes 30-45)", - "standard_name": "wind_speed", - "nc_name": "wind_speed_q3", - }, - "274": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 km/h", - "raw_units": "km h-1", - "nc_units": "m s-1", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Wind Speed at 2 m (minutes 45-60)", - "standard_name": "wind_speed", - "nc_name": "wind_speed_q4", - }, - "275": { - "_table_name": {"HLY01_RCS"}, - "original_units": "cm", - "raw_units": "cm", - "units": "m", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Snow Depth (at minute 60)", - "standard_name": "surface_snow_thickness", - "nc_name": "snd_q4", - }, - "276": { - "_table_name": {"HLY01_RCS"}, - "original_units": "cm", - "raw_units": "cm", - "units": "m", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Snow Depth (at minute 15)", - "standard_name": "surface_snow_thickness", - "nc_name": "snd_q1", - }, - "277": { - "_table_name": {"HLY01_RCS"}, - "original_units": "cm", - "raw_units": "cm", - "units": "m", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Snow Depth (at minute 30)", - "standard_name": "surface_snow_thickness", - "nc_name": "snd_q2", - }, - "278": { - "_table_name": {"HLY01_RCS"}, - "original_units": "cm", - "raw_units": "cm", - "units": "m", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Snow Depth (at minute 45)", - "standard_name": "surface_snow_thickness", - "nc_name": "snd_q3", - }, - "279": { - "_table_name": {"HLY01_RCS"}, - "original_units": "Degrees", - "raw_units": "deg", - "nc_units": "deg", - "scale_factor": 1, - "add_offset": 0, - "long_name": "Wind Direction at 2 m (minutes 50-60)", - "standard_name": "wind_direction", - "nc_name": "wind_dir", - }, - "280": { - "_table_name": {"HLY01_RCS"}, - "original_units": "0.1 km/h", - "raw_units": "km h-1", - "units": "m s-1", - "scale_factor": 0.1, - "add_offset": 0, - "long_name": "Wind Speed at 2 m (minutes 50-60)", - "standard_name": "wind_speed", - "nc_name": "wind_speed", - }, - } - code = str(variable_code).zfill(3) - if code in ["061"]: - raise NotImplementedError() - try: - variable = ec_hourly_variables[code] - variable["missing_flags"] = "M" - variable["missing_values"] = {-9999, "#####"} - variable["least_significant_digit"] = "" - except KeyError: - msg = f"Hourly variable `{code}` not supported." - logging.error(msg) - raise - return variable - - -def cf_ahccd_metadata( - code: str, gen: int -) -> (dict[str, int | float | str], dict, list[tuple[int, int]], int): - """CF compliant metadata for ECCC Adjusted and Homogenized Climate Data (AHCCD). - - Parameters - ---------- - code: {"dx", "dn", "dm", "dt", "ds", "dr"} - gen: {1, 2, 3} - - Returns - ------- - dict[str, int or str or float], dict, list[tuple[int, int]], int - """ - generation = {1: "First", 2: "Second", 3: "Third"}.get(gen) - - ec_ahccd_attrs = dict( - dx=dict( - variable="tasmax", - units="degC", - standard_name="air_temperature", - long_name="Near-Surface Maximum Daily Air Temperature", - comment=f"ECCC {generation} Generation of Adjusted and Homogenized Temperature Data", - ), - dn=dict( - variable="tasmin", - units="degC", - standard_name="air_temperature", - long_name="Near-Surface Minimum Daily Air Temperature", - comment=f"ECCC {generation} Generation of Adjusted and Homogenized Temperature Data", - ), - dm=dict( - variable="tas", - units="degC", - standard_name="air_temperature", - long_name="Near-Surface Daily Mean Air Temperature", - comment=f"ECCC {generation} Generation of Adjusted and Homogenized Temperature Data", - ), - dt=dict( - variable="pr", - units="mm d-1", - standard_name="precipitation_flux", - long_name="Daily Total Precipitation", - comment=f"ECCC {generation} Generation of Adjusted and Homogenized Precipitation Data", - ), - ds=dict( - variable="prsn", - units="mm d-1", - standard_name="snowfall_flux", - long_name="Daily Snowfall", - comment=f"ECCC {generation} Generation of Adjusted and Homogenized Precipitation Data", - ), - dr=dict( - variable="prlp", - units="mm d-1", - standard_name="rainfall_flux", - long_name="Daily Rainfall", - comment=f"ECCC {generation} Generation of Adjusted and Homogenized Precipitation Data", - ), - ) - try: - variable = ec_ahccd_attrs[code] - variable["missing_flags"] = "M" - if variable["variable"].startswith("tas"): - variable["NaN_value"] = -9999.9 - column_names = [ - "No", - "StnId", - "Station name", - "Prov", - "FromYear", - "FromMonth", - "ToYear", - "ToMonth", - "%Miss", - "Lat(deg)", - "Long(deg)", - "Elev(m)", - "Joined", - "RCS", - ] - column_spaces = [(0, 5), (5, 6), (6, 8), (8, 9)] - ii = 9 - for i in range(1, 32): - column_spaces.append((ii, ii + 7)) - ii += 7 - column_spaces.append((ii, ii + 1)) - ii += 1 - header_row = 3 - - elif variable["variable"].startswith("pr"): - variable["NaN_value"] = -9999.99 - column_names = [ - "Prov", - "Station name", - "stnid", - "beg yr", - "beg mon", - "end yr", - "end mon", - "lat (deg)", - "long (deg)", - "elev (m)", - "stns joined", - ] - column_spaces = [(0, 4), (4, 5), (5, 7), (7, 8)] - ii = 8 - for i in range(1, 32): - column_spaces.append((ii, ii + 8)) - ii += 8 - column_spaces.append((ii, ii + 1)) - ii += 1 - header_row = 0 - - else: - raise KeyError - - column_names = { - col.lower() - .split("(")[0] - .replace("%", "pct_") - .strip() - .replace(" ", "_"): col - for col in list(column_names) - } - - if gen == 3: - _citation = ( - "Vincent, L.A., M.M. Hartwell and X.L. Wang, 2020: A Third Generation of Homogenized " - "Temperature for Trend Analysis and Monitoring Changes in Canada’s Climate. " - "Atmosphere-Ocean. https://doi.org/10.1080/07055900.2020.1765728" - ) - elif gen == 2: - _citation = ( - "Mekis, É and L.A. Vincent, 2011: An overview of the second generation adjusted daily " - "precipitation dataset for trend analysis in Canada. Atmosphere-Ocean 49(2), " - "163-177 doi:10.1080/07055900.2011.583910" - ) - else: - msg = f"Generation '{gen}' not supported." - raise NotImplementedError(msg) - - global_attrs = dict( - title=f"{generation} Generation of Homogenized Daily {variable['variable']} " - "for Canada (Updated to December 2019)", - history=f"{dt.today().strftime('%Y-%m-%d')}: Convert from original format to NetCDF", - type="station_obs", - institute="Environment and Climate Change Canada", - institute_id="ECCC", - dataset_id=f"AHCCD_gen{gen}_day_{variable['variable']}", - frequency="day", - license_type="permissive", - license="https:/open.canada.ca/en/open-government-licence-canada", - citation=_citation, - ) - - except KeyError as e: - msg = f"AHCCD variable '{code}' or generation '{gen}' not supported." - logging.error(msg) - raise NotImplementedError(msg) from e - - return variable, column_names, column_spaces, header_row, global_attrs diff --git a/src/miranda/eccc/geomet.py b/src/miranda/eccc/geomet.py new file mode 100644 index 00000000..1b73f9d3 --- /dev/null +++ b/src/miranda/eccc/geomet.py @@ -0,0 +1,44 @@ +"""ECCC Geomet Module.""" + +from __future__ import annotations + +import os +from urllib.error import HTTPError + +import pandas as pd +import xarray as xr + + +def load_station_metadata(meta: str | os.PathLike | None) -> xr.Dataset: + """ + Method to load station metadata from a file or URL. + + Parameters + ---------- + meta : str or os.PathLike or None + The path to the file or URL. + + Returns + ------- + xr.Dataset + The station metadata. + """ + if meta: + df_inv = pd.read_csv(meta, header=0) + else: + try: + import geopandas as gpd + + station_metadata_url = "https://api.weather.gc.ca/collections/climate-stations/items?f=json&limit=15000000" + df_inv = gpd.read_file(station_metadata_url) + except HTTPError as err: + raise RuntimeError( + f"Station metadata table unable to be fetched. Considering downloading directly: {err}" + ) + df_inv["LONGITUDE"] = df_inv.geometry.x + df_inv["LATITUDE"] = df_inv.geometry.y + df_inv["ELEVATION"] = df_inv.ELEVATION.astype(float) + df_inv["CLIMATE_IDENTIFIER"] = df_inv["CLIMATE_IDENTIFIER"].astype(str) + + df_inv = df_inv.drop(["geometry"], axis=1) + return df_inv.to_xarray() diff --git a/src/miranda/gis/__init__.py b/src/miranda/gis/__init__.py index d3b5e40a..844c8daf 100644 --- a/src/miranda/gis/__init__.py +++ b/src/miranda/gis/__init__.py @@ -3,3 +3,4 @@ from __future__ import annotations from ._domains import * +from .utils import * diff --git a/src/miranda/gis/utils.py b/src/miranda/gis/utils.py new file mode 100644 index 00000000..54c5dd42 --- /dev/null +++ b/src/miranda/gis/utils.py @@ -0,0 +1,180 @@ +"""Utility functions for GIS operations.""" + +from __future__ import annotations + +import datetime +import logging +import warnings + +import numpy as np +import xarray as xr + +__all__ = [ + "conservative_regrid", + "threshold_mask", +] + + +def _simple_fix_dims(d: xr.Dataset | xr.DataArray) -> xr.Dataset | xr.DataArray: + """ + Adjust dimensions found in a file so that it can be used for regridding purposes. + + Parameters + ---------- + d : xr.Dataset or xr.DataArray + The dataset to adjust. + + Returns + ------- + xr.Dataset or xr.DataArray + The adjusted dataset. + """ + if "lon" not in d.dims or "lat" not in d.dims: + dim_rename = dict() + for dim in d.dims: + if str(dim).lower().startswith("lon"): + dim_rename[str(dim)] = "lon" + if str(dim).lower().startswith("lat"): + dim_rename[str(dim)] = "lat" + d = d.rename(dim_rename) + if np.any(d.lon > 180): + lon_wrapped = d.lon.where(d.lon <= 180.0, d.lon - 360.0) + d["lon"] = lon_wrapped + d = d.sortby(["lon"]) + + if "time" in d.dims: + d = d.isel(time=0, drop=True) + + return d + + +def conservative_regrid( + ds: xr.DataArray | xr.Dataset, ref_grid: xr.DataArray | xr.Dataset +) -> xr.DataArray | xr.Dataset: + """ + Perform a conservative_normed regridding. + + Parameters + ---------- + ds : xr.DataArray or xr.Dataset + The dataset to regrid. + ref_grid : xr.DataArray or xr.Dataset + The reference grid. + + Returns + ------- + xr.DataArray or xr.Dataset + The regridded dataset. + """ + try: + import xesmf as xe # noqa + except ModuleNotFoundError: + raise ModuleNotFoundError( + "This function requires the `xesmf` library which is not installed. " + "Regridding step will be skipped." + ) + + ref_grid = _simple_fix_dims(ref_grid) + method = "conservative_normed" + + msg = f"Performing regridding and masking with `xesmf` using method: {method}." + logging.info(msg) + + regridder = xe.Regridder(ds, ref_grid, method, periodic=False) + ds = regridder(ds) + + ds.attrs["history"] = ( + f"{datetime.datetime.now()}:" + f"Regridded dataset using xesmf with method: {method}. " + f"{ds.attrs.get('history')}".strip() + ) + return ds + + +def threshold_mask( + ds: xr.Dataset | xr.DataArray, + *, + mask: xr.Dataset | xr.DataArray, + mask_cutoff: float | bool = False, +) -> xr.Dataset | xr.DataArray: + """ + Land-Sea mask operations. + + Parameters + ---------- + ds : xr.Dataset or str or os.PathLike + The dataset to be masked. + mask : xr.Dataset or xr.DataArray + The land-sea mask. + mask_cutoff : float or bool + The mask cutoff value. + + Returns + ------- + xr.Dataset or xr.DataArray + The masked dataset. + """ + mask = _simple_fix_dims(mask) + + if isinstance(mask, xr.Dataset): + if len(mask.data_vars) == 1: + mask_variable = list(mask.data_vars)[0] + mask = mask[mask_variable] + else: + raise ValueError( + "More than one data variable found in land-sea mask. Supply a DataArray instead." + ) + else: + mask_variable = mask.name + + try: + from clisops.core import subset_bbox # noqa + + log_msg = f"Masking dataset with {mask_variable}." + if mask_cutoff: + log_msg = f"{log_msg.strip('.')} at `{mask_cutoff}` cutoff value." + logging.info(log_msg) + + lon_bounds = np.array([ds.lon.min(), ds.lon.max()]) + lat_bounds = np.array([ds.lat.min(), ds.lat.max()]) + + mask_subset = subset_bbox( + mask, + lon_bnds=lon_bounds, + lat_bnds=lat_bounds, + ).load() + except ModuleNotFoundError: + log_msg = ( + "This function requires the `clisops` library which is not installed. " + "subsetting step will be skipped." + ) + warnings.warn(log_msg) + mask_subset = mask.load() + + if mask_subset.dtype == bool: + if mask_cutoff: + logging.warning("Mask value cutoff set for boolean mask. Ignoring.") + mask_subset = mask_subset.where(mask) + else: + mask_subset = mask_subset.where(mask >= mask_cutoff) + ds = ds.where(mask_subset.notnull()) + + if mask_subset.min() >= 0: + if mask_subset.max() <= 1.00000001: + cutoff_info = f"{mask_cutoff * 100} %" + elif mask_subset.max() <= 100.00000001: + cutoff_info = f"{mask_cutoff} %" + else: + cutoff_info = f"{mask_cutoff}" + else: + cutoff_info = f"{mask_cutoff}" + ds.attrs["mask_cutoff"] = cutoff_info + + prev_history = ds.attrs.get("history", "") + history_msg = f"Mask calculated using `{mask_variable}`." + if mask_cutoff: + history_msg = f"{history_msg.strip('.')} with cutoff value `{cutoff_info}`." + history = f"{history_msg} {prev_history}".strip() + ds.attrs.update(dict(history=history)) + + return ds diff --git a/src/miranda/io/_input.py b/src/miranda/io/_input.py index d9b0141b..e91992a5 100644 --- a/src/miranda/io/_input.py +++ b/src/miranda/io/_input.py @@ -50,7 +50,7 @@ def discover_data( input_files = sorted(list(input_files.glob(f"*.{suffix}"))) else: input_files = input_files.rglob(f"*.{suffix}") - if input_files.is_file(): + elif input_files.is_file(): logging.warning( "Data discovery yielded a single file. Casting to `list[Path]`." ) diff --git a/src/miranda/io/_output.py b/src/miranda/io/_output.py index be360144..6ad791ab 100644 --- a/src/miranda/io/_output.py +++ b/src/miranda/io/_output.py @@ -35,6 +35,7 @@ def write_dataset( ds: xr.DataArray | xr.Dataset, output_path: str | os.PathLike, output_format: str, + output_name: str | None = None, chunks: dict | None = None, overwrite: bool = False, compute: bool = True, @@ -49,6 +50,8 @@ def write_dataset( Output folder path. output_format: {"netcdf", "zarr"} Output data container type. + output_name: str, optional + Output file name. chunks : dict, optional Chunking layout to be written to new files. If None, chunking will be left to the relevant backend engine. overwrite : bool @@ -65,8 +68,12 @@ def write_dataset( if isinstance(output_path, str): output_path = Path(output_path) - outfile = name_output_file(ds, output_format) - outfile_path = output_path.joinpath(outfile) + if not output_name: + output_name = name_output_file(ds, output_format) + else: + output_name = str(output_name) + + outfile_path = output_path.joinpath(output_name) if overwrite and outfile_path.exists(): msg = f"Removing existing {output_format} files for {outfile}." @@ -77,8 +84,15 @@ def write_dataset( outfile_path.unlink() if chunks is None and "frequency" in ds.attrs: - freq = ds.attrs["frequency"] # TOD0: check that this is really there - chunks = fetch_chunk_config(priority="time", freq=freq, dims=ds.dims) + freq = ds.attrs.get("frequency") + if not freq: + raise ValueError( + "If 'chunks' are not provided, the 'frequency' attribute must be set." + ) + if "lat" in ds.dims and "lon" in ds.dims: + chunks = fetch_chunk_config(priority="time", freq=freq, dims=ds.dims) + elif "lat" not in ds.dims and "lon" not in ds.dims: + chunks = fetch_chunk_config(priority="stations", freq=freq, dims=ds.dims) msg = f"Writing {outfile}." logging.info(msg) diff --git a/src/miranda/io/data/ouranos_chunk_config.json b/src/miranda/io/data/ouranos_chunk_config.json index 2ac759b7..0f18928d 100644 --- a/src/miranda/io/data/ouranos_chunk_config.json +++ b/src/miranda/io/data/ouranos_chunk_config.json @@ -37,6 +37,20 @@ } } }, + "stations": { + "1hr": { + "default": { + "station": 50, + "time": "5 years" + } + }, + "day": { + "default": { + "station": 200, + "time": "10 years" + } + } + }, "time": { "1hr": { "default": { diff --git a/src/miranda/io/utils.py b/src/miranda/io/utils.py index 00264ba4..f192b1ca 100644 --- a/src/miranda/io/utils.py +++ b/src/miranda/io/utils.py @@ -37,7 +37,9 @@ def name_output_file( - ds_or_dict: xr.Dataset | dict[str, str], output_format: str + ds_or_dict: xr.Dataset | dict[str, str], + output_format: str, + data_vars: str | None = None, ) -> str: """ Name an output file based on facets within a Dataset or a dictionary. @@ -48,6 +50,8 @@ def name_output_file( A miranda-converted Dataset or a dictionary containing the appropriate facets. output_format : {"netcdf", "zarr"} Output filetype to be used for generating filename suffix. + data_vars : str, optional + If using a Dataset, the name of the data variable to be used for naming the file. Returns ------- @@ -68,7 +72,9 @@ def name_output_file( facets["suffix"] = suffix if isinstance(ds_or_dict, xr.Dataset): - if len(ds_or_dict.data_vars) == 1: + if data_vars is not None: + facets["variable"] = data_vars + elif len(ds_or_dict.data_vars) == 1: facets["variable"] = list(ds_or_dict.data_vars.keys())[0] elif ( len(ds_or_dict.data_vars) == 2 @@ -79,7 +85,7 @@ def name_output_file( ][0] else: raise NotImplementedError( - f"Too many `data_vars` in Dataset: {' ,'.join(ds_or_dict.data_vars.keys())}." + f"Too many `data_vars` in Dataset: {', '.join(ds_or_dict.data_vars.keys())}." ) for f in [ "bias_adjust_project", diff --git a/src/miranda/preprocess/__init__.py b/src/miranda/preprocess/__init__.py new file mode 100644 index 00000000..84c999af --- /dev/null +++ b/src/miranda/preprocess/__init__.py @@ -0,0 +1,7 @@ +"""Preprocessing tools for Miranda.""" + +from __future__ import annotations + +from ._eccc_ahccd import * +from ._eccc_obs import * +from ._eccc_summaries import * diff --git a/src/miranda/preprocess/_eccc_ahccd.py b/src/miranda/preprocess/_eccc_ahccd.py new file mode 100644 index 00000000..0bc553e4 --- /dev/null +++ b/src/miranda/preprocess/_eccc_ahccd.py @@ -0,0 +1,326 @@ +"""Adjusted and Homogenized Canadian Clime Data module.""" + +from __future__ import annotations + +import calendar +import logging.config +from pathlib import Path + +import numpy as np +import pandas as pd +import xarray as xr + +from miranda.io import write_dataset +from miranda.io.utils import name_output_file +from miranda.preprocess._metadata import ( + eccc_variable_metadata, + homogenized_column_definitions, +) +from miranda.scripting import LOGGING_CONFIG +from miranda.treatments import find_project_variable_codes, load_json_data_mappings + +logging.config.dictConfig(LOGGING_CONFIG) +logger = logging.Logger("miranda") + +__all__ = ["convert_ahccd", "convert_ahccd_fwf_file", "merge_ahccd"] + + +def convert_ahccd_fwf_file( + ff: Path | str, + metadata: pd.DataFrame, + variable: str, + *, + generation: int, +) -> xr.Dataset: + """Convert AHCCD fixed-width files. + + Parameters + ---------- + ff: str or Path + metadata: pandas.DataFrame + variable: str + generation: int + + Returns + ------- + xarray.Dataset + """ + configuration = load_json_data_mappings("eccc-ahccd") + code = find_project_variable_codes(variable, configuration) + + variable_meta, global_attrs = eccc_variable_metadata( + code, "eccc-ahccd", generation, configuration + ) + column_names, column_spaces, column_dtypes, header = homogenized_column_definitions( + code + ) + + df = pd.read_fwf(ff, header=header, colspecs=column_spaces, dtype=column_dtypes) + + # Handle different variable types + if "pr" in variable: + cols = list(df.columns[0:3]) + cols = cols[0::2] + cols.extend(list(df.columns[4::2])) + flags = list(df.columns[5::2]) + dfflags = df[flags] + elif "tas" in variable: + cols = [c for c in df.columns if "Unnamed" not in c] + flags = [c for c in df.columns if "Unnamed" in c] + dfflags = df[flags[2:]] + else: + raise NotImplementedError(f"Variable `{variable}` not supported.") + + # Extract relevant columns + df = df[cols] + df.replace(variable_meta[variable]["NaN_value"], np.NaN, inplace=True) + + for i, j in enumerate(["Year", "Month"]): + df = df.rename(columns={df.columns[i]: j}) + start_date = f"{df['Year'][0]}-{str(df['Month'][0]).zfill(2)}-01" + + _, ndays = calendar.monthrange(df["Year"].iloc[-1], df["Month"].iloc[-1]) + end_date = f"{df['Year'].iloc[-1]}-{str(df['Month'].iloc[-1]).zfill(2)}-{str(ndays).zfill(2)}" + time1 = pd.date_range(start=start_date, end=end_date) + + index = pd.MultiIndex.from_arrays([df["Year"], df["Month"]]) + df.index = index + cols = [c for c in df.columns if "Year" not in c and "Month" not in c] + df = df[cols] + df.columns = np.arange(1, 32) + ds = df.stack().to_frame() + ds = ds.rename(columns={0: variable}) + ds.index.names = ["Year", "Month", "Day"] + + dfflags.index = index + dfflags.columns = np.arange(1, 32) + ds_flag = dfflags.stack().to_frame() + ds_flag = ds_flag.rename(columns={0: "flag"}) + ds_flag.index.names = ["Year", "Month", "Day"] + + ds[f"{variable}_flag"] = ds_flag["flag"] + del ds_flag + + # find invalid dates + for y in time1.year.unique(): + for m in ( + ds[ds.index.get_level_values("Year") == y] + .index.get_level_values("Month") + .unique() + ): + _, exp_ndays = calendar.monthrange(y, m) + ndays = ( + (ds.index.get_level_values("Year") == y) + & (ds.index.get_level_values("Month") == m) + ).sum() + if ndays > np.int(exp_ndays): + print(f"year {y}, month {m}, ndays={ndays}, exp_ndays={exp_ndays}") + raise RuntimeError("Unknown days present.") + + time_ds = pd.DataFrame( + { + "year": ds.index.get_level_values("Year"), + "month": ds.index.get_level_values("Month"), + "day": ds.index.get_level_values("Day"), + } + ) + + ds.index = pd.to_datetime(time_ds) # noqa + ds = ds.to_xarray().rename({"index": "time"}) + ds_out = xr.Dataset(coords={"time": time1}) + for v in ds.data_vars: + ds_out[v] = ds[v] + + ds_out[variable].attrs = variable_meta[variable] + metadata = metadata.to_xarray().rename({"index": "station"}).drop_vars("station") + metadata = metadata.assign_coords(dict(station_name=metadata["station_name"])) + ds_out = ds_out.assign_coords(station=metadata.stnid.astype(str)) + metadata = metadata.drop_vars(["stnid", "station_name"]) + + ds_out[f"{variable}_flag"].attrs["long_name"] = variable_meta[variable]["long_name"] + + ds_out["lon"] = metadata["long"] + ds_out.lon.attrs["units"] = "degrees_east" + ds_out.lon.attrs["axis"] = "X" + ds_out["lat"] = metadata["lat"] + ds_out.lat.attrs["units"] = "degrees_north" + ds_out.lat.attrs["axis"] = "Y" + ds_out["elev"] = metadata["elev"] + ds_out.elev.attrs["units"] = "meters" + ds_out.elev.attrs["positive"] = "up" + ds_out.elev.attrs["axis"] = "Z" + metadata = metadata.drop_vars(["long", "lat", "elev"]) + for vv in metadata.data_vars: + if metadata[vv].dtype == "O" and (variable not in vv): + ds_out[vv] = metadata[vv].astype(str) + else: + ds_out[vv] = metadata[vv] + return ds_out + + +def convert_ahccd( + data_source: str | Path, + output_dir: str | Path, + variable: str, + *, + generation: int, + merge: bool = False, + overwrite: bool = False, +) -> None: + """Convert Adjusted and Homogenized Canadian Climate Dataset files. + + Parameters + ---------- + data_source: str or Path + output_dir: str or Path + variable: str + generation: int + merge: bool + overwrite: bool + + Returns + ------- + None + """ + configuration = load_json_data_mappings("eccc-ahccd") + + output_dir = Path(output_dir).resolve().joinpath(variable) + output_dir.mkdir(parents=True, exist_ok=True) + + code = find_project_variable_codes(variable, configuration) + variable_meta, global_attrs = eccc_variable_metadata( + code, "eccc-ahccd", generation, configuration + ) + ( + column_names, + column_spaces, + column_dtypes, + header_row, + ) = homogenized_column_definitions(code) + + gen = {2: "Second", 3: "Third"}.get(generation) + if generation == 3 and code in {"dx", "dn", "dm"}: + station_meta = "ahccd_gen3_temperature.csv" + elif generation == 2 and code in {"dt", "ds", "dr"}: + station_meta = "ahccd_gen2_precipitation.csv" + else: + raise NotImplementedError(f"Code '{code} for generation {gen}.") + metadata_source = ( + Path(__file__).resolve().parent.joinpath("configs").joinpath(station_meta) + ) + + if "tas" in variable: + metadata = pd.read_csv(metadata_source, header=2) + metadata.columns = column_names.keys() + + elif "pr" in variable: + metadata = pd.read_csv(metadata_source, header=3) + metadata.columns = column_names.keys() + for index, row in metadata.iterrows(): + if isinstance(row["stnid"], str): + metadata.loc[index, "stnid"] = metadata.loc[index, "stnid"].replace( + " ", "" + ) + else: + raise KeyError(f"{variable} does not include 'pr' or 'tas'.") + + # Convert station .txt files to netcdf + for ff in Path(data_source).glob(f"{code}*.txt"): + output_name = ff.name.replace(".txt", ".nc") + if not output_dir.joinpath(output_name).exists() or overwrite: + logger.info(ff.name) + + station_id = ff.stem[2:] + metadata_st = metadata[metadata["stnid"] == station_id] + + if len(metadata_st) == 1: + ds_out = convert_ahccd_fwf_file( + ff, metadata_st, variable, generation=generation + ) + ds_out.attrs = global_attrs + + write_dataset( + ds_out, + output_dir, + output_format="netcdf", + output_name=output_name, + overwrite=overwrite, + compute=True, + ) + else: + msg = f"Metadata info for station {ff.name} not found: Skipping..." + logger.warning(msg) + else: + msg = f"{output_name} already exists: Skipping..." + logger.info(msg) + if merge: + merge_ahccd(data_source, output_dir, variable) + return + + +def merge_ahccd( + data_source: str | Path, + output_dir: str | Path | None = None, + variable: str | None = None, + overwrite: bool = False, +) -> None: + """Merge Adjusted and Homogenized Canadian Climate Dataset files.""" + configuration = load_json_data_mappings("eccc-ahccd") + + if variable: + code = find_project_variable_codes(variable, configuration) + glob_pattern = f"{code}*.nc" + output_dir = Path(output_dir).resolve().joinpath(variable) + else: + glob_pattern = "*.nc" + output_dir = Path(output_dir).resolve() + output_dir.mkdir(parents=True, exist_ok=True) + + # Merge individual stations to single .nc file + ds_ahccd = xr.open_mfdataset( + list(data_source.glob(glob_pattern)), concat_dim="station", combine="nested" + ) + + for coord in ds_ahccd.coords: + # xarray object datatypes mix string and int (e.g. station) convert to string for merged nc files + # Do not apply to datetime object + if coord != "time" and ds_ahccd[coord].dtype == "O": + ds_ahccd[coord] = ds_ahccd[coord].astype(str) + + variables_found = set() + for v in ds_ahccd.data_vars: + # xarray object datatypes mix string and int (e.g. station) convert to string for merged nc files + # Do not apply to flag timeseries + if ds_ahccd[v].dtype == "O" and "flag" not in v: + ds_ahccd[v] = ds_ahccd[v].astype(str) + try: + variables_found.add(find_project_variable_codes(str(v), configuration)) + except NotImplementedError: + msg = f"Variable {v} not found in metadata." + logging.info(msg) + pass + + # Name output file + ds_ahccd.attrs["variable"] = ", ".join(variables_found) + if len(variables_found) > 1: + variables = "-".join(variables_found) + msg = f"Many variables found. Merging station and variables files in {data_source}." + logger.info(msg) + else: + variables = variables_found.pop() + output_name = name_output_file(ds_ahccd, "netcdf", variables) + + try: + msg = f"Writing merged file to: {output_dir}." + logger.info(msg) + write_dataset( + ds_ahccd, + output_dir, + output_format="netcdf", + output_name=output_name, + overwrite=overwrite, + compute=True, + ) + del ds_ahccd + except FileExistsError: + logger.info("Merged file already exists. Use overwrite=`True` to overwrite.") diff --git a/src/miranda/preprocess/_eccc_obs.py b/src/miranda/preprocess/_eccc_obs.py new file mode 100644 index 00000000..b60eb023 --- /dev/null +++ b/src/miranda/preprocess/_eccc_obs.py @@ -0,0 +1,836 @@ +"""Specialized conversion tools for Environment and Climate Change Canada / Meteorological Service of Canada data.""" + +from __future__ import annotations + +import functools +import logging +import multiprocessing as mp +import os +import re +import tempfile +import time + +# from calendar import monthrange +from datetime import datetime as dt +from logging import config +from pathlib import Path +from typing import Any + +import dask.dataframe as dd + +# import numpy as np +import pandas as pd +import xarray as xr +from dask.diagnostics import ProgressBar + +from miranda.archive import group_by_length +from miranda.preprocess._metadata import eccc_variable_metadata, obs_column_definitions +from miranda.scripting import LOGGING_CONFIG +from miranda.treatments import find_project_variable_codes, load_json_data_mappings +from miranda.vocabularies.eccc import obs_vocabularies + +# from xclim.core.units import convert_units_to + + +config.dictConfig(LOGGING_CONFIG) + +__all__ = [ + "convert_station", + "merge_converted_variables", + "merge_stations", +] +TABLE_DATE = dt.now().strftime("%d %B %Y") + + +def _remove_duplicates(ds): + if any(ds.get_index("time").duplicated()): + msg = ( + f"Found {ds.get_index('time').duplicated().sum()} duplicated time coordinates " + f"for station {ds.station_id.values}. Assuming first value." + ) + logging.info(msg) + return ds.sel(time=~ds.get_index("time").duplicated()) + + +def convert_observation( + data_source: str | Path | list[str | Path], + output_dir: str | Path, + variable: str, + *, + generation: int | None = None, + merge: bool = False, + overwrite: bool = False, +): + """Convert a single station's data from the fixed-width format to a netCDF file.""" + output_dir = Path(output_dir).resolve().joinpath(variable) + output_dir.mkdir(parents=True, exist_ok=True) + + code = find_project_variable_codes(variable, "eccc-obs") + var_meta, global_attrs = eccc_variable_metadata(code, "eccc-obs", generation) + ( + column_names, + column_spaces, + column_dtypes, + header_row, + ) = obs_column_definitions(code) + + archives = list() + if isinstance(data_source, list) or Path(data_source).is_file(): + archives.append(data_source) + else: + tables = [ + str(repository.keys()) + for repository in obs_vocabularies + if code in repository.values() + ] + msg = ( + f"Collecting files for variable '{variable}'. " + f"Filename patterns containing variable code '{code}: {', '.join(tables)}'." + ) + logging.info(msg) + for table in tables: + archives.extend([f for f in Path(data_source).rglob(f"{table}*.gz")]) + + # Create the output directory + output_variable_dir = Path(output_dir).joinpath(variable) + output_variable_dir.mkdir(parents=True, exist_ok=True) + + # Loop on the files + errored_files = [] + for file in archives: + # FIXME: convert the file using the appropriate function + pass + + if errored_files: + msg = "Some files failed to be properly parsed:\n", ", ".join(errored_files) + logging.warning(msg) + + +def convert_station( + data: str | os.PathLike, + variable: str, + mode: str, + # missing_flags: set[str], + # missing_values: set[str], + using_dask_array: bool = False, + *, + client: Any, + **kwargs, +): + """Convert a single station's data from the fixed-width format to a netCDF file.""" + data = Path(data) + variable_code = find_project_variable_codes(variable, "eccc-obs") + column_names, column_widths, column_dtypes, header = obs_column_definitions(mode) + + # if not missing_values: + # missing_values = {-9999, "#####"} + + if using_dask_array: + pandas_reader = dd + # set the block size to 200 MB + chunks = dict(blocksize=200 * 2**20) + else: + pandas_reader = pd + chunks = dict() + using_dask_array = False + + # Create a dataframe from the files + try: + df = pandas_reader.read_fwf( + data, + widths=column_widths, + names=column_names, + dtype={ + name: data_type for name, data_type in zip(column_names, column_dtypes) + }, + assume_missing=True, + **chunks, + ) + if using_dask_array: + df = client.persist(df) + + except FileNotFoundError as e: + msg = f"File {data} was not found: {e}" + logging.error(msg) + raise FileNotFoundError(msg) + + except UnicodeDecodeError as e: + msg = f"File {data.name} was unable to be read. This is probably an issue with the file: {e}" + logging.error(msg) + raise + + # Loop through the station codes + station_codes = df["code"].unique() + for code in station_codes: + df_code = df[df["code"] == code] + + # Abort if the variable is not found + if using_dask_array: + has_variable_codes = ( + (df_code["code_var"] == variable_code).compute() + ).any() + else: + has_variable_codes = (df_code["code_var"] == variable_code).any() + if not has_variable_codes: + msg = f"Variable `{variable}` not found for station code: {code} in file {data}. Continuing..." + logging.info(msg) + continue + + # # Perform the data treatment + # logging.info(f"Converting `{variable}` for station code: {code}") + # + # # Dump the data into a DataFrame + # df_var = df_code[df_code["code_var"] == variable_code].copy() + # + # # Mask the data according to the missing values flag + # df_var = df_var.replace(missing_values, np.nan) + # + # # Decode the values and flags + # dfd = df_var.loc[:, [f"D{i:0n}" for i in range(1, num_observations + 1)]] + # dff = df_var.loc[:, [f"F{i:0n}" for i in range(1, num_observations + 1)]] + # + # # Remove the "NaN" flag + # dff = dff.fillna("") + # + # # Use the flag to mask the values + # try: + # val = np.asarray(dfd.values, float) + # except ValueError as e: + # logging.error(f"{e} raised from {dfd}, continuing...") + # continue + # try: + # flag = np.asarray(dff.values, str) + # except ValueError as e: + # logging.error(f"{e} raised from {dff}, continuing...") + # continue + # mask = np.isin(flag, missing_flags) + # val[mask] = np.nan + # + # # Treat according to units conversions + # val = val * scale_factor + add_offset + + # Create the DataArray + # date_summations = dict(time=list()) + # if mode == "hourly": + # for index, row in df_var.iterrows(): + # period = pd.Period( + # year=row.year, month=row.month, day=row.day, freq="D" + # ) + # dates = pd.Series( + # pd.date_range( + # start=period.start_time, + # end=period.end_time, + # freq="H", + # ) + # ) + # date_summations["time"].extend(dates) + # written_values = val.flatten() + # written_flags = flag.flatten() + # elif mode == "daily": + # value_days = list() + # flag_days = list() + # for i, (index, row) in enumerate(df_var.iterrows()): + # period = pd.Period(year=row.year, month=row.month, freq="M") + # dates = pd.Series( + # pd.date_range( + # start=period.start_time, + # end=period.end_time, + # freq="D", + # ) + # ) + # date_summations["time"].extend(dates) + # + # value_days.extend( + # val[i][range(monthrange(int(row.year), int(row.month))[1])] + # ) + # flag_days.extend( + # flag[i][range(monthrange(int(row.year), int(row.month))[1])] + # ) + # written_values = value_days + # written_flags = flag_days + # + # ds = xr.Dataset() + # da_val = xr.DataArray(written_values, coords=date_summations, dims=["time"]) + # + # if raw_units != units: + # da_val.attrs["units"] = raw_units + # da_val = convert_units_to(da_val, units) + # else: + # da_val.attrs["units"] = units + # + # da_val = da_val.rename(nc_name) + # variable_attributes = dict( + # variable_code=variable_code, + # standard_name=standard_name, + # long_name=long_name, + # ) + # if "original_units" in kwargs: + # variable_attributes["original_units"] = kwargs["original_units"] + # da_val.attrs.update(variable_attributes) + # + # da_flag = xr.DataArray(written_flags, coords=date_summations, dims=["time"]) + # da_flag = da_flag.rename("flag") + # flag_attributes = dict( + # long_name="data flag", + # note="See ECCC technical documentation for details", + # ) + # da_flag.attrs.update(flag_attributes) + # + # ds[nc_name] = da_val + # ds["flag"] = da_flag + # + # # save the file in NetCDF format + # start_year = ds.time.dt.year.values[0] + # end_year = ds.time.dt.year.values[-1] + # + # station_folder = output_path.joinpath(str(code)) + # station_folder.mkdir(parents=True, exist_ok=True) + # + # f_nc = ( + # f"{code}_{variable_code}_{nc_name}_" + # f"{start_year if start_year == end_year else '_'.join([str(start_year), str(end_year)])}.nc" + # ) + # + # if station_folder.joinpath(f_nc).exists(): + # logging.warning(f"File `{f_nc}` already exists. Continuing...") + # + # history = ( + # f"{dt.now().strftime('%Y-%m-%d %X')} converted from flat station file " + # f"(`{file.name}`) to n-dimensional array." + # ) + # + # # TODO: This info should eventually be sourced from a JSON definition + # global_attrs = dict( + # Conventions="CF-1.8", + # comment="Acquired on demand from data specialists at " + # "ECCC Climate Services / Services Climatiques.", + # contact="John Richard", + # contact_email="climatcentre-climatecentral@ec.gc.ca", + # domain="CAN", + # ) + # if mode == "hourly": + # global_attrs.update(dict(frequency="1hr")) + # elif mode == "daily": + # global_attrs.update(dict(frequency="day")) + # global_attrs.update( + # dict( + # history=history, + # internal_comment=f"Converted by {os.environ.get('USER', os.environ.get('USERNAME'))}.", + # institution="ECCC", + # license="https://climate.weather.gc.ca/prods_servs/attachment1_e.html", + # member=code, + # processing_level="raw", + # redistribution="Redistribution permitted.", + # references="https://climate.weather.gc.ca/doc/Technical_Documentation.pdf", + # source="historical-station-records", + # table_date=TABLE_DATE, + # title="Environment and Climate Change Canada (ECCC) weather station observations", + # type="station-obs", + # usage="The original data is owned by the Government of Canada (Environment and Climate " + # "Change Canada), and falls under the licence agreement for use of Environment and " + # "Climate Change Canada data", + # variable=str(nc_name), + # version=f"v{dt.now().strftime('%Y.%m.%V')}", # Year.Month.Week + # ) + # ) + # ds.attrs.update(global_attrs) + # + # logging.info(f"Exporting to: {station_folder.joinpath(f_nc)}") + # ds.to_netcdf(station_folder.joinpath(f_nc)) + # del ds + # del val + # del mask + # del flag + # del da_val + # del da_flag + # del dfd + # del dff + # del written_values + # del written_flags + # del date_summations + # + # del df + + +def merge_stations( + source_files: str | os.PathLike | None = None, + output_folder: str | os.PathLike | None = None, + *, + time_step: str, + variables: str | int | list[str | int] | None = None, + include_flags: bool = True, + groupings: int | None = None, + mf_dataset_freq: str | None = None, + temp_directory: str | os.PathLike | None = None, + n_workers: int = 1, +) -> None: + """Merge stations. + + Parameters + ---------- + source_files : str or Path + Source files to be aggregated. + output_folder : str or Path + Output folder for the aggregated files. + variables : str or int or list of str or int, optional + The variable codes to be aggregated. + time_step : {"hourly", "daily"} + The time step to be used for aggregation. + include_flags : bool + Include flags in the output files. + groupings : int + The number of files in each group used for converting to multi-file Datasets. + mf_dataset_freq : str, optional + Resampling frequency for creating output multi-file Datasets. E.g. 'YS': 1 year per file, '5YS': 5 years per file. + temp_directory : str or Path, optional + Use another temporary directory location in case default location is not spacious enough. + n_workers : int + The number of workers to use. + + Returns + ------- + None + """ + func_time = time.time() + + if isinstance(source_files, str): + source_files = Path(source_files) + + if time_step.lower() in ["h", "hour", "hourly"]: + mode = "hourly" + elif time_step.lower() in ["d", "day", "daily"]: + mode = "daily" + else: + raise ValueError("Time step must be `h` / `hourly` or `d` / `daily`.") + + if isinstance(variables, list): + pass + elif isinstance(variables, (str, int)): + variables = [variables] + + # TODO: have the variable gathered from a JSON file + elif variables is None: + if mode == "hourly": + variables = [ + 89, + 94, + 123, + ] + variables.extend(range(76, 81)) + variables.extend(range(262, 281)) + elif mode == "daily": + variables = [1, 2, 3] + variables.extend(range(10, 26)) + else: + raise NotImplementedError() + + for variable_code in variables: + info = load_json_data_mappings("eccc-obs")["variables"][variable_code] + variable_name = info["cf_variable_name"] + msg = f"Merging `{variable_name}` using `{time_step}` time step." + logging.info(msg) + + # Only perform aggregation on available data with corresponding metadata + logging.info("Performing glob and sort.") + nc_list = [str(nc) for nc in source_files.joinpath(variable_name).rglob("*.nc")] + + if not groupings: + groupings = max(n_workers**2, 4) + + if nc_list: + nc_lists = group_by_length(nc_list, groupings) + + with tempfile.TemporaryDirectory( + prefix="eccc", dir=temp_directory + ) as temp_dir: + combinations = sorted( + (ii, nc, temp_dir, len(nc_lists)) for ii, nc in enumerate(nc_lists) + ) + + with mp.Pool(processes=n_workers) as pool: + pool.starmap(_tmp_zarr, combinations) + pool.close() + pool.join() + + zarrs_found = [f for f in Path(temp_dir).glob("*.zarr")] + msg = f"Found {len(zarrs_found)} intermediary aggregation files." + logging.info(msg) + + ds = xr.open_mfdataset( + zarrs_found, + engine="zarr", + combine="nested", + concat_dim={"station"}, + ) + + if ds: + station_file_codes = [Path(x).name.split("_")[0] for x in nc_list] + if not include_flags: + drop_vars = [vv for vv in ds.data_vars if "flag" in vv] + ds = ds.drop_vars(drop_vars) + ds = ds.sortby(ds.station_id, "time") + + # Rearrange column order to have lon, lat, elev first + # # FIXME: This doesn't work as intended - Assign coordinates instead + # cols = meta.columns.tolist() + # cols1 = [ + # "latitude", + # "longitude", + # "elevation", + # ] + # for rr in cols1: + # cols.remove(rr) + # cols1.extend(cols) + # meta = meta[cols1] + # meta.index.rename("station", inplace=True) + # meta = meta.to_xarray() + # meta.sortby(meta["climate_identifier"]) + # meta = meta.assign({"station": ds.station.values}) + + # np.testing.assert_array_equal( + # sorted(meta["climate_identifier"].values), sorted(ds.station_id.values) + # ) + # for vv in meta.data_vars: + # ds = ds.assign_coords({vv: meta[vv]}) + # ds = xr.merge([ds, meta]) + # ds.attrs = attrs1 + + # export done within tmddir context otherwise data is erased before final export!! + valid_stations = list(sorted(ds.station_id.values)) + valid_stations_count = len(valid_stations) + + msg = f"Processing stations for variable `{variable_name}`." + logging.info(msg) + + if len(station_file_codes) == 0: + msg = f"No stations were found containing variable filename `{variable_name}`. Exiting." + logging.error(msg) + return + + msg = ( + f"Files exist for {len(station_file_codes)} ECCC stations. " + f"Metadata found for {valid_stations_count} stations. " + ) + logging.info(msg) + + # FIXME: Is this still needed? + # logging.info("Preparing the NetCDF time period.") + # Create the time period timestamps + # year_start = ds.time.dt.year.min().values + # year_end = ds.time.dt.year.max().values + + # Calculate the time index dimensions of the output NetCDF + # time_index = pd.date_range( + # start=f"{year_start}-01-01", + # end=f"{year_end + 1}-01-01", + # freq=mode[0].capitalize(), + # )[:-1] + # logging.info( + # f"Number of ECCC stations: {valid_stations_count}, time steps: {time_index.size}." + # ) + + Path(output_folder).mkdir(parents=True, exist_ok=True) + file_out = Path(output_folder).joinpath(f"{variable_name}_eccc_{mode}") + + ds = ds.assign_coords(station=range(0, len(ds.station))).sortby("time") + if mf_dataset_freq is not None: + # output mf_dataset using resampling frequency + _, datasets = zip(*ds.resample(time=mf_dataset_freq)) + else: + datasets = [ds] + + paths = [ + f"{file_out}_{data.time.dt.year.min().values}-{data.time.dt.year.max().values}.nc" + for data in datasets + ] + + # FIXME: chunks need to be dealt with + # chunks = [1, len(ds.time)] + # comp = dict(zlib=True, complevel=5) # , chunk sizes=chunks) + + with ProgressBar(): + # FIXME: looping seems to cause increasing memory over time use a pool of one or 2?? + # for dataset, path in zip(datasets, paths): + # _export_agg_nc(dataset,path) + combs = zip(datasets, paths) + pool = mp.Pool(2) + pool.map(_export_agg_nc, combs) + pool.close() + pool.join() + ds.close() + del ds + + else: + msg = f"No files found for variable: `{variable_name}`." + logging.info(msg) + + runtime = f"Process completed in {time.time() - func_time:.2f} seconds." + logging.warning(runtime) + + +def _export_agg_nc(args): + dataset, path = args + comp = dict(zlib=True, complevel=5) + encoding = {var: comp for var in dataset.data_vars} + dataset.load().to_netcdf( + path, + engine="h5netcdf", + format="NETCDF4_CLASSIC", + encoding=encoding, + ) + dataset.close() + del dataset + + +def _tmp_zarr( + iterable: int, + nc: list[str | os.PathLike], + tempdir: str | os.PathLike, + group: int | None = None, +) -> None: + msg = ( + f"Processing batch of files {iterable + 1}" + f"{' of ' + str(group) if group is not None else ''}." + ) + logging.info(msg) + station_file_codes = [Path(x).name.split("_")[0] for x in nc] + + try: + ds = xr.open_mfdataset( + nc, combine="nested", concat_dim="station", preprocess=_remove_duplicates + ) + except ValueError as e: + errored_nc_files = ", ".join([Path(f).name for f in nc]) + msg = f"Issues found with the following files: [{errored_nc_files}]: {e}" + logging.error(msg) + return + + ds = ds.assign_coords( + station_id=xr.DataArray(station_file_codes, dims="station").astype(str) + ) + if "flag" in ds.data_vars: + ds1 = ds.drop_vars("flag").copy(deep=True) + ds1["flag"] = ds.flag.astype(str) + ds = ds1 + + with ProgressBar(): + ds.load().to_zarr( + Path(tempdir).joinpath(f"{str(iterable).zfill(4)}.zarr"), + ) + del ds + + +def _combine_years( + station_folder: str, + varia: str, + out_folder: str | os.PathLike, + meta_file: str | os.PathLike, + rejected: list[str], + _verbose: bool = False, +) -> None: + nc_files = sorted(list(Path(station_folder).glob("*.nc"))) + if len(nc_files): + msg = ( + f"Found {len(nc_files)} files for station code {Path(station_folder).name}." + ) + logging.info(msg) + else: + msg = f"No readings found for station code {Path(station_folder).name}. Continuing..." + logging.warning(msg) + return + + # Remove range files if years are all present, otherwise default to range_file. + years_found = dict() + range_files_found = dict() + years_parsed = True + for f in nc_files: + groups = re.findall(r"_\d{4}", f.stem) + if len(groups) == 1: + year = int(groups[0].strip("_")) + years_found[year] = f + elif len(groups) == 2: + year_start, year_end = int(groups[0].strip("_")), int(groups[1].strip("_")) + range_files_found[f] = set(range(year_start, year_end)) + else: + msg = "Years unable to be effectively parsed from series. Continuing with xarray solver..." + logging.warning(msg) + years_parsed = False + break + if years_parsed: + if len(range_files_found) > 0: + msg = ( + f"Overlapping single-year and multi-year files found for station code {station_folder}. " + "Removing overlaps." + ) + logging.warning(msg) + for ranged_file, years in range_files_found.items(): + if years.issubset(years_found.values()): + nc_files.remove(ranged_file) + else: + missing_years = [] + for y in years: + try: + nc_files.remove(years_found[y]) + except (KeyError, ValueError): # noqa: PERF203 + missing_years.append(str(y)) + continue + if missing_years: + msg = f"Missing years {', '.join(missing_years)} from multi-year file {ranged_file}. " + logging.warning(msg) + + year_range = min(years_found.keys()), max(years_found.keys()) + msg = ( + "Year(s) covered: " + f"{year_range[0]}{'-' + str(year_range[1]) if year_range[0] != year_range[1] else ''}. " + ) + logging.info(msg) + + if _verbose: + msg = f"Opening: {', '.join([p.name for p in nc_files])}" + logging.info(msg) + ds = xr.open_mfdataset(nc_files, combine="nested", concat_dim="time") + outfile = Path(out_folder).joinpath( + f'{nc_files[0].name.split(f"_{varia}_")[0]}_{varia}_' + f"{ds.time.dt.year.min().values}-{ds.time.dt.year.max().values}.nc" + ) + + df_inv = xr.open_dataset(meta_file) + + station_id = ds.attrs["member"] + meta = df_inv.isel(index=df_inv.CLIMATE_IDENTIFIER == station_id) + meta = meta.rename({"index": "station", "CLIMATE_IDENTIFIER": "station_id"}) + try: + meta = meta.assign_coords(station=[0]) + except ValueError: + rejected.append(Path(station_folder).name) + msg = f"Something went wrong at the assign_coords step for station {station_folder}. Continuing..." + logging.error(msg) + return + if len(meta.indexes) > 1: + raise ValueError("Found more than 1 station.") + elif len(meta.indexes) == 0: + rejected.append(Path(station_folder).name) + msg = f"No metadata found for station code {station_folder}. Continuing..." + logging.warning(msg) + return + + keep_coords = [ + "time", + "station", + "station_id", + "latitude", + "longitude", + "elevation", + ] + for vv in meta.data_vars: + if str(vv).lower() not in keep_coords: + continue + ds = ds.assign_coords({str(vv).lower(): meta[vv]}) + + for vv in ds.data_vars: + if ds[vv].dtype == "O": + ds[vv] = ds[vv].astype(str) + + if not outfile.exists(): + msg = f"Merging to {outfile.name}." + logging.info(msg) + comp = dict(zlib=True, complevel=5) + encoding = {data_var: comp for data_var in ds.data_vars} + encoding["time"] = {"dtype": "single"} + with ProgressBar(): + ds.to_netcdf( + outfile, + engine="h5netcdf", + format="NETCDF4_CLASSIC", + encoding=encoding, + ) + else: + msg = f"Files exist for {outfile.name}. Continuing..." + logging.info(msg) + + +def merge_converted_variables( + source_files: str | os.PathLike, + output_folder: str | os.PathLike, + variables: str | int | list[str | int] | None = None, + overwrite: bool = False, + n_workers: int = 1, +) -> None: + """Merge converted variables into a single file per variable. + + Parameters + ---------- + source_files : str, Path + output_folder : str, Path + variables : str or int or list of str or int, optional + overwrite : bool + n_workers : int + + Returns + ------- + None + """ + meta = load_json_data_mappings("eccc-obs") + metadata_file = Path(tempfile.NamedTemporaryFile(suffix=".nc", delete=False).name) + meta.to_netcdf(metadata_file) + + if isinstance(source_files, str): + source_files = Path(source_files) + if isinstance(output_folder, str): + output_folder = Path(output_folder) + + selected_variables = list() + if variables is not None: + if not isinstance(variables, list): + variables = [variables] + selected_variables.extend(meta[var] for var in variables) + + variables_found = [x.name for x in source_files.iterdir() if x.is_dir()] + if selected_variables: + variables_found = [ + x + for x in variables_found + if x in [item["nc_name"] for item in selected_variables] + ] + + for variable in variables_found: + msg = f"Merging files found for variable: `{variable}`." + logging.info(msg) + station_dirs = [ + x for x in source_files.joinpath(variable).iterdir() if x.is_dir() + ] + msg = f"Number of stations found: {len(station_dirs)}." + logging.info(msg) + + output_rep = output_folder.joinpath(variable) + Path(output_rep).mkdir(parents=True, exist_ok=True) + + if ( + len(list(output_rep.iterdir())) >= (len(meta.CLIMATE_IDENTIFIER) * 0.75) + ) and not overwrite: + msg = ( + f"Variable {variable} appears to have already been converted. Will be skipped. " + f"To force conversion of this variable, set `overwrite=True`." + ) + logging.warning(msg) + continue + + manager = mp.Manager() + rejected_stations = manager.list() + + combine_func = functools.partial( + _combine_years, + varia=variable, + out_folder=output_rep, + meta_file=metadata_file, + rejected=rejected_stations, + ) + + with mp.Pool(processes=n_workers) as pool: + pool.map(combine_func, station_dirs) + pool.close() + pool.join() + + if rejected_stations: + msg = f"Rejected station codes are the following: {', '.join(rejected_stations)}." + logging.warning(msg) diff --git a/src/miranda/eccc/_summaries.py b/src/miranda/preprocess/_eccc_summaries.py similarity index 99% rename from src/miranda/eccc/_summaries.py rename to src/miranda/preprocess/_eccc_summaries.py index f77d3649..6c8ce6f2 100755 --- a/src/miranda/eccc/_summaries.py +++ b/src/miranda/preprocess/_eccc_summaries.py @@ -32,8 +32,10 @@ eccc_metadata = json.load( Path(__file__) - .parent.joinpath("eccc_obs_summary_cf_attrs.json") - .open("r", encoding="utf-8") + .resolve() + .parent.joinpath("configs") + .joinpath("eccc-obs-summary_attrs.json") + .open() )["variable_entry"] diff --git a/src/miranda/preprocess/_metadata.py b/src/miranda/preprocess/_metadata.py new file mode 100644 index 00000000..3b65d2b9 --- /dev/null +++ b/src/miranda/preprocess/_metadata.py @@ -0,0 +1,214 @@ +from __future__ import annotations + +import logging +from typing import Any + +from miranda import __version__ as __miranda_version__ +from miranda.treatments.utils import load_json_data_mappings + +__all__ = [ + "eccc_variable_metadata", + "homogenized_column_definitions", + "obs_column_definitions", +] + + +def eccc_variable_metadata( + variable_code: str | int, + project: str, + generation: int | None = None, + metadata: dict | None = None, +) -> dict[str, Any]: + """Return the metadata for a given variable code and project. + + Parameters + ---------- + variable_code: str or int + project: {"eccc-ahccd", "eccc-obs", "eccc-obs-summary"} + generation: {1, 2, 3}, optional + metadata: dict, optional + + Returns + ------- + dict + """ + if project == "eccc-ahccd": + generation = {1: "First", 2: "Second", 3: "Third"}.get(generation) + if not generation: + raise NotImplementedError(f"Generation '{generation}' not supported") + else: + generation = None + + if not metadata: + metadata = load_json_data_mappings(project) + + if isinstance(variable_code, int): + variable_code = str(variable_code).zfill(3) + + # code = find_project_variable_codes(variable_code, metadata) + + # Variable metadata + variable_meta = metadata["variables"].get(variable_code) + if variable_meta is None: + raise ValueError(f"No metadata found for variable code: {variable_code}") + + variable_name = "" + variable_name_fields = ["_variable_name", "_cf_variable_name"] + if set(variable_name_fields).issubset(variable_meta.keys()): + for variable_field in variable_name_fields: + variable_name = variable_meta.get(variable_field) + if variable_name: + variable_meta["original_variable_code"] = variable_code + del variable_meta[variable_field] + variable_meta = {variable_name: variable_meta} + else: + variable_meta = {variable_code: variable_meta} + if not variable_name: + variable_name = variable_code + + # Dataset metadata + header = metadata.get("Header") + # Static handling of version global attributes + miranda_version = header.get("_miranda_version") + if miranda_version: + if isinstance(miranda_version, bool): + header["miranda_version"] = __miranda_version__ + elif isinstance(miranda_version, dict): + if project in miranda_version.keys(): + header["miranda_version"] = __miranda_version__ + else: + msg = f"`_miranda_version` not properly configured for project `{project}`. Not appending." + logging.warning(msg) + if "_miranda_version" in header: + del header["_miranda_version"] + + to_delete = [] + # Conditional handling of global attributes based on fields + for field in [f for f in header if f.startswith("_")]: + if isinstance(header[field], bool): + if header[field] and field == "_variable": + header[field[1:]] = variable_name + elif isinstance(header[field], dict) and generation: + attr_treatment = header[field]["generation"] + if field in ["_citation" "_product"]: + for attribute, value in attr_treatment.items(): + if attribute == generation: + header[field[1:]] = value + else: + raise AttributeError( + f"Attribute treatment configuration for field `{field}` is not properly configured. Verify JSON." + ) + to_delete.append(field) + + for field in to_delete: + del header[field] + + return dict(metadata=variable_meta, header=header) + + +def homogenized_column_definitions( + variable_code: str, +) -> tuple[dict, list[tuple[int, int]], dict[str, type[str | int | float] | Any], int]: + """Return the column names, widths, and data types for the AHCCD fixed-width format data. + + Parameters + ---------- + variable_code : str + + Returns + ------- + tuple[dict, list[tuple[int, int]], dict[str, type[str | int | float] | Any], int] + """ + metadata = load_json_data_mappings("eccc-homogenized") + + variable = metadata["variables"][variable_code]["_variable_name"] + if variable.startswith("tas"): + column_dtypes = { + "No": str, + "StnId": str, + "Station name": str, + "Prov": str, + "FromYear": int, + "FromMonth": int, + "ToYear": int, + "ToMonth": int, + "%Miss": float, + "Lat(deg)": float, + "Long(deg)": float, + "Elev(m)": int, + "Joined": str, + "RCS": str, + } + column_spaces = [(0, 5), (5, 6), (6, 8), (8, 9)] + ii = 9 + # 31 days in a month + for i in range(1, 32): + column_spaces.append((ii, ii + 7)) + ii += 7 + column_spaces.append((ii, ii + 1)) + ii += 1 + header_row = 3 + + elif variable.startswith("pr"): + column_dtypes = { + "Prov": str, + "Station name": str, + "stnid": str, + "beg yr": int, + "beg mon": int, + "end yr": int, + "end mon": int, + "lat (deg)": float, + "long (deg)": float, + "elev (m)": int, + "stns joined": str, + } + column_spaces = [(0, 4), (4, 5), (5, 7), (7, 8)] + ii = 8 + # 31 days in a month + for i in range(1, 32): + column_spaces.append((ii, ii + 8)) + ii += 8 + column_spaces.append((ii, ii + 1)) + ii += 1 + header_row = 0 + + else: + raise KeyError + + column_names = { + col.lower().split("(")[0].replace("%", "pct_").strip().replace(" ", "_"): col + for col in list(column_dtypes.keys()) + } + + return column_names, column_spaces, column_dtypes, header_row + + +def obs_column_definitions( + time_frequency: str, +) -> tuple[list[str], list[int], list[type[str | int]], int]: + """Return the column names, widths, and data types for the fixed-width format.""" + if time_frequency.lower() in ["h", "hour", "hourly"]: + num_observations = 24 + column_names = ["code", "year", "month", "day", "code_var"] + column_widths = [7, 4, 2, 2, 3] + column_dtypes = [str, int, int, int, str] + elif time_frequency.lower() in ["d", "day", "daily"]: + num_observations = 31 + column_names = ["code", "year", "month", "code_var"] + column_widths = [7, 4, 2, 3] + column_dtypes = [str, int, int, str] + else: + raise NotImplementedError("`mode` must be 'h'/'hourly or 'd'/'daily'.") + + header = 0 + + # Add the data columns + for i in range(1, num_observations + 1): + data_entry, flag_entry = f"D{i:0n}", f"F{i:0n}" + column_names.append(data_entry) + column_names.append(flag_entry) + column_widths.extend([6, 1] * num_observations) + column_dtypes.extend([str, str]) + + return column_names, column_widths, column_dtypes, header diff --git a/src/miranda/eccc/data/ahccd_gen2_precipitation.csv b/src/miranda/preprocess/configs/ahccd_gen2_precipitation.csv similarity index 66% rename from src/miranda/eccc/data/ahccd_gen2_precipitation.csv rename to src/miranda/preprocess/configs/ahccd_gen2_precipitation.csv index ce59df01..6f0c0f3a 100644 --- a/src/miranda/eccc/data/ahccd_gen2_precipitation.csv +++ b/src/miranda/preprocess/configs/ahccd_gen2_precipitation.csv @@ -8,23 +8,23 @@ BC,ARMSTRONG HULLCAR,1160483,1912,1,1998,12,50.5,-119.216666666667,505,Yes BC,ATLIN,1200560,1906,1,2017,12,59.5666666666667,-133.7,674,No BC,BARKERVILLE,1090660,1888,1,2015,3,53.0691666666667,-121.514722222222,1283,No BC,BEAVERDELL NORTH,1130771,1926,1,2006,9,49.4783333333333,-119.047,838,Yes -BC,BELLA COOLA ,1060841,1899,1,2017,11,52.3875,-126.595833333333,36,Yes +BC,BELLA COOLA,1060841,1899,1,2017,11,52.3875,-126.595833333333,36,Yes BC,BIG CREEK,1080870,1904,1,1998,11,51.6672236111111,-123.073056944444,1175,No -BC,BLUE RIVER ,1160899,1929,1,2017,12,52.1290277777778,-119.289527777778,683,Yes +BC,BLUE RIVER,1160899,1929,1,2017,12,52.1290277777778,-119.289527777778,683,Yes BC,BRISCO,1171020,1924,1,2004,3,50.8205555555556,-116.258055555556,823,No BC,BRITANNIA BEACH FURRY CREEK,1041050,1914,1,2000,4,49.5838888888889,-123.223611111111,9,Yes BC,BURQUITLAM VANCOUVER GOLF COURSE,1101200,1926,1,2005,12,49.2516666666667,-122.876944444444,122,Yes BC,CAPE SCOTT,1031353,1921,1,2016,6,50.7822333333333,-128.427227777778,72,Yes BC,CAPE ST JAMES,1051350,1926,1,1992,8,51.9333333333333,-131.016666666667,89,No BC,CASSIAR,1191440,1954,1,1996,8,59.2833333333333,-129.833333333333,1078,No -BC,CELISTA,116146F ,1924,1,2004,7,50.9555555555556,-119.379444444444,515,Yes +BC,CELISTA,116146F,1924,1,2004,7,50.9555555555556,-119.379444444444,515,Yes BC,CHATHAM POINT,1021480,1932,1,2016,2,50.3331944444444,-125.445555555556,23,Yes -BC,COMOX ,1021830,1936,1,2017,12,49.7166666666667,-124.9,26,Yes +BC,COMOX,1021830,1936,1,2017,12,49.7166666666667,-124.9,26,Yes BC,CORTES ISLAND TIBER BAY,1021960,1919,1,2017,12,50.0713888888889,-124.949444444444,15,Yes -BC,CRANBROOK ,1152102,1909,1,2012,11,49.6122222222222,-115.781944444444,939,Yes +BC,CRANBROOK,1152102,1909,1,2012,11,49.6122222222222,-115.781944444444,939,Yes BC,CRESTON,1142160,1912,1,2015,6,49.0970555555556,-116.517833333333,597,No BC,DARFIELD,1162265,1914,1,2017,11,51.2973333333333,-120.182666666667,412,Yes -BC,DAWSON CREEK ,1182285,1952,1,2007,2,55.7416666666667,-120.181944444444,655,Yes +BC,DAWSON CREEK,1182285,1952,1,2007,2,55.7416666666667,-120.181944444444,655,Yes BC,DEASE LAKE,1192340,1945,1,2008,7,58.428335,-130.010556666667,807,No BC,DEER PARK,1142400,1924,1,1995,9,49.4166666666667,-118.05,485,No BC,DRYAD POINT,1062544,1933,1,2017,12,52.1850005555556,-128.112224444444,4,Yes @@ -33,12 +33,12 @@ BC,ESTEVAN POINT,1032730,1924,1,2017,12,49.3835,-126.550833333333,7,No BC,FALLS RIVER,1062790,1932,1,1992,10,53.9833333333333,-129.733333333333,18,No BC,FAUQUIER,1142820,1913,1,2015,6,49.8719444444444,-118.0675,490,No BC,FERNIE,1152850,1914,1,2017,12,49.4888888888889,-115.072222222222,1001,No -BC,FORT NELSON ,1192940,1938,1,2012,11,58.8363888888889,-122.597222222222,382,No +BC,FORT NELSON,1192940,1938,1,2012,11,58.8363888888889,-122.597222222222,382,No BC,FORT ST JAMES,1092970,1895,1,2017,12,54.4552802777778,-124.285556111111,686,No -BC,FORT ST JOHN ,1183000,1931,1,2012,12,56.2380555555556,-120.740277777778,695,Yes +BC,FORT ST JOHN,1183000,1931,1,2012,12,56.2380555555556,-120.740277777778,695,Yes BC,GERMANSEN LANDING,1183090,1952,1,2013,11,55.7855277777778,-124.701444444444,766,No BC,GLACIER NP ROGERS PASS,1173191,1909,1,2014,7,51.3009166666667,-117.516388888889,1323,Yes -BC,GOLDEN ,1173210,1908,1,2017,12,51.2983333333333,-116.981666666667,785,No +BC,GOLDEN,1173210,1908,1,2017,12,51.2983333333333,-116.981666666667,785,No BC,GRAND FORKS,1133270,1910,1,2008,3,49.0261666666667,-118.465666666667,532,Yes BC,GRASMERE,1153282,1896,1,1993,11,49.0833333333333,-115.066666666667,869,Yes BC,HAZELTON TEMLEHAN,1073347,1915,1,1997,4,55.2,-127.733333333333,122,Yes @@ -58,7 +58,7 @@ BC,MASSET AIRPORT,1054920,1900,1,2008,6,54.0226111111111,-132.117472222222,7,Yes BC,MCINNES ISLAND,1065010,1954,1,2017,12,52.2616666666667,-128.719444444444,26,No BC,MERRITT STP,1125079,1919,1,2017,12,50.1141677777778,-120.800834722222,609,Yes BC,MICA DAM,1175122,1962,1,2017,12,52.0530555555556,-118.585277777778,579,No -BC,NANAIMO CITY YARD,10253G0 ,1913,1,2017,12,49.1988888888889,-123.987777777778,114,Yes +BC,NANAIMO CITY YARD,10253G0,1913,1,2017,12,49.1988888888889,-123.987777777778,114,Yes BC,NASS CAMP,1075384,1924,1,2015,2,55.2375,-129.029444444444,290,Yes BC,NELSON NE,1145442,1904,1,2017,12,49.5861111111111,-117.206388888889,570,Yes BC,NEW DENVER,1145460,1924,1,2017,12,49.995835,-117.370285,570,No @@ -68,186 +68,186 @@ BC,OOTSA L SKINS L SPILLWAY,1085835,1926,1,2017,7,53.7721666666667,-125.99655555 BC,OSOYOOS WEST,1125865,1954,1,2009,9,49.0319444444444,-119.442777777778,297,Yes BC,PACHENA POINT,1035940,1925,1,2017,12,48.7227777777778,-125.097222222222,37,No BC,PEMBERTON AIRPORT,1086082,1913,1,1991,6,50.3056461111111,-122.734088888889,204,Yes -BC,PENTICTON ,1126150,1907,1,2012,5,49.4630555555556,-119.602222222222,344,Yes +BC,PENTICTON,1126150,1907,1,2012,5,49.4630555555556,-119.602222222222,344,Yes BC,PORT ALICE,1036240,1924,1,2016,4,50.3858361111111,-127.455286111111,21,No -BC,PORT HARDY ,1026270,1944,1,2013,6,50.6802777777778,-127.366111111111,22,No +BC,PORT HARDY,1026270,1944,1,2013,6,50.6802777777778,-127.366111111111,22,No BC,POWELL RIVER,1046390,1924,1,2007,7,49.8761111111111,-124.554166666667,52,No -BC,PRINCE GEORGE ,1096450,1913,1,2009,10,53.8908333333333,-122.678888888889,691,Yes -BC,PRINCE RUPERT ,1066481,1909,1,2006,3,54.2925,-130.444722222222,35,Yes -BC,PRINCETON ,1126510,1901,1,2017,12,49.4677777777778,-120.5125,700,Yes +BC,PRINCE GEORGE,1096450,1913,1,2009,10,53.8908333333333,-122.678888888889,691,Yes +BC,PRINCE RUPERT,1066481,1909,1,2006,3,54.2925,-130.444722222222,35,Yes +BC,PRINCETON,1126510,1901,1,2017,12,49.4677777777778,-120.5125,700,Yes BC,QUATSINO,1036570,1895,1,2017,12,50.5336138888889,-127.653335833333,8,No -BC,QUESNEL ,1096630,1900,1,2007,3,53.0261111111111,-122.51,545,Yes +BC,QUESNEL,1096630,1900,1,2007,3,53.0261111111111,-122.51,545,Yes BC,QUINSAM RIVER HATCHERY,1026639,1936,1,2017,12,50.0161111111111,-125.303888888889,46,Yes BC,REVELSTOKE,1176751,1898,1,1999,8,50.9533333333333,-118.166388888889,450,Yes BC,SAANICHTON,1016940,1914,1,2017,12,48.6216666666667,-123.418888888889,61,No -BC,SALMON RM ,1166R45 ,1911,1,2013,2,50.6855577777778,-119.233613611111,527,Yes -BC,SANDSPIT ,1057050,1949,1,2017,3,53.2538888888889,-131.813055555556,6,No +BC,SALMON RM,1166R45,1911,1,2013,2,50.6855577777778,-119.233613611111,527,Yes +BC,SANDSPIT,1057050,1949,1,2017,3,53.2538888888889,-131.813055555556,6,No BC,SEYMOUR FALLS,1107200,1928,1,2003,9,49.4402777777778,-122.971111111111,244,No BC,SHALALTH,1117215,1935,1,2004,4,50.7283333333333,-122.240555555556,244,Yes BC,SHAWNIGAN LAKE,1017230,1911,1,2017,12,48.6469472222222,-123.626408333333,138,No -BC,SMITHERS ,1077500,1922,1,2017,12,54.8247222222222,-127.182777777778,522,Yes +BC,SMITHERS,1077500,1922,1,2017,12,54.8247222222222,-127.182777777778,522,Yes BC,STAVE FALLS,1107680,1910,1,2004,8,49.2333333333333,-122.366666666667,110,No -BC,STEWART ,1067742,1911,1,2016,6,55.9361111111111,-129.985,7,Yes +BC,STEWART,1067742,1911,1,2016,6,55.9361111111111,-129.985,7,Yes BC,STILLWATER POWER HOUSE,1047770,1931,1,2007,7,49.7666666666667,-124.316666666667,7,No BC,TATLAYOKO LAKE,1088010,1928,1,2005,4,51.6747222222222,-124.405,870,No -BC,TERRACE ,1068130,1913,1,2013,1,54.4663888888889,-128.5775,217,Yes +BC,TERRACE,1068130,1913,1,2013,1,54.4663888888889,-128.5775,217,Yes BC,TLELL,1058190,1950,1,1999,1,53.5,-131.95,5,No -BC,TOFINO ,1038205,1942,1,2017,12,49.0822222222222,-125.772505555556,24,No +BC,TOFINO,1038205,1942,1,2017,12,49.0822222222222,-125.772505555556,24,No BC,UCLUELET KENNEDY CAMP,1038332,1958,1,2017,12,48.9452833333333,-125.527236111111,30,Yes -BC,VANCOUVER ,1108447,1896,1,2013,6,49.195,-123.181944444444,4,Yes +BC,VANCOUVER,1108447,1896,1,2013,6,49.195,-123.181944444444,4,Yes BC,VAVENBY,1168520,1913,1,2017,12,51.5761111111111,-119.778055555556,445,No BC,VERNON BELLA VISTA,1128553,1900,1,2015,6,50.2643611111111,-119.308861111111,427,Yes -BC,VICTORIA ,1018620,1899,1,2013,7,48.647225,-123.425833333333,19,Yes +BC,VICTORIA,1018620,1899,1,2013,7,48.647225,-123.425833333333,19,Yes BC,WARFIELD,1148700,1928,1,2002,12,49.1,-117.75,606,No BC,WASA,1158730,1924,1,2017,12,49.8239722222222,-115.630777777778,930,No BC,WESTWOLD,1168880,1921,1,2013,5,50.4688911111111,-119.750556388889,609,No -BC,WILLIAMS LAKE ,1098940,1936,1,2012,12,52.1830555555556,-122.054166666667,940,Yes +BC,WILLIAMS LAKE,1098940,1936,1,2012,12,52.1830555555556,-122.054166666667,940,Yes NU,ALERT,2400306,1950,1,2017,12,82.5,-62.3333333333333,65,Yes -NU,BAKER LAKE ,2300500,1949,1,2013,11,64.2988888888889,-96.0777777777778,18,No -YK,BURWASH ,2100182,1967,1,2015,2,61.3666666666667,-139.05,807,No -NU,BYRON BAY ,2400595,1957,1,1993,6,68.75,-109.066666666667,92,No -NU,CAMBRIDGE BAY ,2400600,1940,1,2015,2,69.1080555555556,-105.138333333333,27,No -NU,CAPE DORSET ,2400635,1932,1,2014,11,64.2302777777778,-76.525,50,Yes -NU,CAPE DYER ,2400654,1960,1,1993,3,66.5833333333333,-61.6166666666667,393,No +NU,BAKER LAKE,2300500,1949,1,2013,11,64.2988888888889,-96.0777777777778,18,No +YK,BURWASH,2100182,1967,1,2015,2,61.3666666666667,-139.05,807,No +NU,BYRON BAY,2400595,1957,1,1993,6,68.75,-109.066666666667,92,No +NU,CAMBRIDGE BAY,2400600,1940,1,2015,2,69.1080555555556,-105.138333333333,27,No +NU,CAPE DORSET,2400635,1932,1,2014,11,64.2302777777778,-76.525,50,Yes +NU,CAPE DYER,2400654,1960,1,1993,3,66.5833333333333,-61.6166666666667,393,No NU,CAPE HOOPER,2400660,1958,1,2007,9,68.4725,-66.8152777777778,390,No -NT,CAPE PARRY ,2200675,1960,1,1993,3,70.1666666666667,-124.716666666667,87,No +NT,CAPE PARRY,2200675,1960,1,1993,3,70.1666666666667,-124.716666666667,87,No YK,CARMACKS,2100300,1964,1,2008,2,62.1,-136.3,525,No -NU,CHESTERFIELD INLET ,2300707,1931,1,2014,11,63.3469444444444,-90.7311111111111,10,Yes +NU,CHESTERFIELD INLET,2300707,1931,1,2014,11,63.3469444444444,-90.7311111111111,10,Yes NU,CLINTON POINT,2300750,1957,1,1993,6,69.5833333333333,-120.8,101,No -NU,CLYDE ,2400800,1946,1,2002,6,70.4861111111111,-68.5166666666667,27,No -NU,CORAL HARBOUR ,2301000,1945,1,2015,5,64.1933333333333,-83.3594444444445,64,No -YK,DAWSON ,2100402,1901,1,2015,2,64.0430555555556,-139.127777777778,370,Yes +NU,CLYDE,2400800,1946,1,2002,6,70.4861111111111,-68.5166666666667,27,No +NU,CORAL HARBOUR,2301000,1945,1,2015,5,64.1933333333333,-83.3594444444445,64,No +YK,DAWSON,2100402,1901,1,2015,2,64.0430555555556,-139.127777777778,370,Yes NU,DEWAR LAKES,2401030,1958,1,1993,3,68.65,-71.1666666666667,527,No YK,DRURY CREEK,2100460,1970,1,2009,4,62.2019444444444,-134.39,609,No NU,EUREKA,2401200,1948,1,2016,2,79.9833333333333,-85.9333333333333,10,No -NT,FORT GOOD HOPE ,2201400,1945,1,2014,11,66.2408333333333,-128.650833333333,82,No -NT,FORT MCPHERSON ,2201601,1932,1,2014,11,67.4077777777778,-134.860277777778,35,Yes +NT,FORT GOOD HOPE,2201400,1945,1,2014,11,66.2408333333333,-128.650833333333,82,No +NT,FORT MCPHERSON,2201601,1932,1,2014,11,67.4077777777778,-134.860277777778,35,Yes NT,FORT RELIANCE,2201903,1949,1,2007,8,62.7113888888889,-109.168333333333,168,Yes -NT,FORT RESOLUTION ,2202000,1931,1,2014,11,61.1808333333333,-113.689722222222,160,No -NT,FORT SIMPSON ,2202101,1898,1,2014,10,61.7602777777778,-121.236666666667,169,Yes -NT,FORT SMITH ,2202200,1915,1,2014,11,60.0202777777778,-111.961944444444,205,Yes +NT,FORT RESOLUTION,2202000,1931,1,2014,11,61.1808333333333,-113.689722222222,160,No +NT,FORT SIMPSON,2202101,1898,1,2014,10,61.7602777777778,-121.236666666667,169,Yes +NT,FORT SMITH,2202200,1915,1,2014,11,60.0202777777778,-111.961944444444,205,Yes NU,FOX FIVE,2400570,1959,1,2007,9,67.5355555555556,-63.7888888888889,584,No -NU,GLADMAN POINT ,2402340,1957,1,1992,7,68.6666666666667,-97.8,14,No +NU,GLADMAN POINT,2402340,1957,1,1992,7,68.6666666666667,-97.8,14,No YK,HAINES JUNCTION,2100631,1945,1,2008,9,60.7495444444445,-137.50525,596,Yes -NU,HALL BEACH ,2402350,1957,1,2014,12,68.7758333333333,-81.2425,8,No -NT,HAY RIVER ,2202400,1909,1,2014,9,60.8397222222222,-115.782777777778,166,Yes +NU,HALL BEACH,2402350,1957,1,2014,12,68.7758333333333,-81.2425,8,No +NT,HAY RIVER,2202400,1909,1,2014,9,60.8397222222222,-115.782777777778,166,Yes NT,INUVIK,2202578,1957,1,2007,11,68.3166666666667,-133.516666666667,103,Yes NU,IQALUIT,2402592,1946,1,2007,11,63.7472222222222,-68.5444444444445,34,Yes -NU,JENNY LIND ISLAND ,2302650,1958,1,1992,7,68.65,-101.733333333333,18,No -YK,KOMAKUK BEACH ,2100685,1959,1,1993,6,69.5833333333333,-140.183333333333,7,No -NU,KUGAARUK ,2303092,1957,1,2012,8,68.5405555555556,-89.7972222222222,17,Yes -NU,KUGLUKTUK ,2300902,1931,1,2014,12,67.8166666666667,-115.143888888889,23,Yes -NU,LADY FRANKLIN POINT ,2302680,1958,1,1993,3,68.5,-113.216666666667,16,No +NU,JENNY LIND ISLAND,2302650,1958,1,1992,7,68.65,-101.733333333333,18,No +YK,KOMAKUK BEACH,2100685,1959,1,1993,6,69.5833333333333,-140.183333333333,7,No +NU,KUGAARUK,2303092,1957,1,2012,8,68.5405555555556,-89.7972222222222,17,Yes +NU,KUGLUKTUK,2300902,1931,1,2014,12,67.8166666666667,-115.143888888889,23,Yes +NU,LADY FRANKLIN POINT,2302680,1958,1,1993,3,68.5,-113.216666666667,16,No NU,LONGSTAFF BLUFF,2402684,1958,1,1991,6,68.8986111111111,-75.1408333333333,161,No -NU,LUPIN,230N002 ,1959,1,2007,7,65.7552916666667,-111.245841666667,488,Yes +NU,LUPIN,230N002,1959,1,2007,7,65.7552916666667,-111.245841666667,488,Yes NU,MACKAR INLET,2402686,1958,1,1992,5,68.3,-85.6666666666667,395,No -YK,MAYO ,2100700,1925,1,2013,11,63.6166666666667,-135.866666666667,504,No -NT,MOULD BAY,250M001 ,1948,1,2007,11,76.2375166666667,-119.347233333333,2,Yes -NU,NANISIVIK ,2402730,1938,1,2010,12,72.9833333333333,-84.6166666666667,642,Yes +YK,MAYO,2100700,1925,1,2013,11,63.6166666666667,-135.866666666667,504,No +NT,MOULD BAY,250M001,1948,1,2007,11,76.2375166666667,-119.347233333333,2,Yes +NU,NANISIVIK,2402730,1938,1,2010,12,72.9833333333333,-84.6166666666667,642,Yes NT,NICHOLSON PENINSULA,2202750,1958,1,1993,6,69.9333333333333,-128.966666666667,89,No -NT,NORMAN WELLS ,2202800,1943,1,2012,10,65.2825,-126.800277777778,73,No -YK,OLD CROW ,2100800,1952,1,2015,2,67.5705555555556,-139.839166666667,251,No +NT,NORMAN WELLS,2202800,1943,1,2012,10,65.2825,-126.800277777778,73,No +YK,OLD CROW,2100800,1952,1,2015,2,67.5705555555556,-139.839166666667,251,No YK,PELLY RANCH,2100880,1952,1,2015,3,62.8166666666667,-137.366666666667,454,No NU,RESOLUTE CARS,2403500,1948,1,2014,11,74.7169444444445,-94.9694444444445,67,No YK,ROSS RIVER YTG,2100941,1967,1,2008,2,61.9833333333333,-132.45,698,Yes -NT,SACHS HARBOUR ,2503650,1956,1,2013,2,72,-125.266666666667,86,No -NU,SHEPHERD BAY ,2303685,1957,1,1993,3,68.8166666666667,-93.4333333333333,43,No -YK,SHINGLE POINT ,2100950,1957,1,1993,3,68.95,-137.216666666667,49,No +NT,SACHS HARBOUR,2503650,1956,1,2013,2,72,-125.266666666667,86,No +NU,SHEPHERD BAY,2303685,1957,1,1993,3,68.8166666666667,-93.4333333333333,43,No +YK,SHINGLE POINT,2100950,1957,1,1993,3,68.95,-137.216666666667,49,No YK,SWIFT RIVER,2101081,1967,1,2008,2,60,-131.183333333333,891,No -YK,TESLIN ,2101100,1944,1,2013,12,60.1741388888889,-132.735888888889,705,No +YK,TESLIN,2101100,1944,1,2013,12,60.1741388888889,-132.735888888889,705,No YK,TUCHITUA,2101135,1967,1,2014,9,60.9333333333333,-129.216666666667,724,No NT,TUKTOYAKTUK,2203910,1957,1,1993,6,69.45,-133,18,No -NT,TULITA ,2201700,1904,1,2014,12,64.9086111111111,-125.568333333333,101,No -NT,ULUKHAKTOK ,2502501,1941,1,2010,6,70.7627777777778,-117.806111111111,36,Yes -YK,WATSON LAKE ,2101200,1939,1,2014,12,60.1165,-128.822333333333,687,No -YK,WHITEHORSE ,2101300,1942,1,2012,12,60.7095,-135.068833333333,706,No +NT,TULITA,2201700,1904,1,2014,12,64.9086111111111,-125.568333333333,101,No +NT,ULUKHAKTOK,2502501,1941,1,2010,6,70.7627777777778,-117.806111111111,36,Yes +YK,WATSON LAKE,2101200,1939,1,2014,12,60.1165,-128.822333333333,687,No +YK,WHITEHORSE,2101300,1942,1,2012,12,60.7095,-135.068833333333,706,No NT,WRIGLEY ,2204000,1944,1,2014,10,63.2094444444445,-123.436666666667,149,No NT,YELLOWKNIFE ,2204100,1943,1,2013,1,62.4627777777778,-114.440277777778,206,No NT,YOHIN,2204300,1957,1,2007,9,61.2419444444444,-123.741666666667,204,No -AB,ATHABASCA,3060L20 ,1918,1,2017,12,54.7222230555556,-113.2880575,515,Yes +AB,ATHABASCA,3060L20,1918,1,2017,12,54.7222230555556,-113.2880575,515,Yes AB,BANFF,3050519,1894,1,2007,11,51.1933583333333,-115.552236111111,1397,Yes AB,BEAVER MINES,3050600,1913,1,2012,3,49.4672277777778,-114.176955555556,1257,No AB,BEAVERLODGE,3070600,1916,1,2007,11,55.1966672222222,-119.396413888889,745,Yes -AB,CALGARY ,3031093,1885,1,2012,7,51.1138888888889,-114.020277777778,1084,No +AB,CALGARY,3031093,1885,1,2012,7,51.1138888888889,-114.020277777778,1084,No AB,CALMAR,3011120,1915,1,2016,12,53.2897241666667,-113.863057777778,720,No AB,CAMPSIE,3061200,1910,1,2013,10,54.1322227777778,-114.677778888889,671,No AB,CAMROSE,3011240,1946,1,2007,11,53.0347222222222,-112.814166666667,739,No AB,CARWAY,3031400,1915,1,2011,11,48.999725,-113.376111111111,1354,No -AB,CLARESHOLM MEADOW CREEK,3031F5F ,1913,1,2005,3,49.9375222222222,-113.737519444444,1035,No -AB,COLD LAKE ,3081680,1926,1,2017,12,54.4166666666667,-110.283333333333,541,Yes +AB,CLARESHOLM MEADOW CREEK,3031F5F,1913,1,2005,3,49.9375222222222,-113.737519444444,1035,No +AB,COLD LAKE,3081680,1926,1,2017,12,54.4166666666667,-110.283333333333,541,Yes AB,CORONATION,3011887,1928,1,2007,11,52.0741666666667,-111.449444444444,791,Yes -AB,CROWSNEST,3051R4R ,1913,1,2007,11,49.627525,-114.48195,1303,Yes +AB,CROWSNEST,3051R4R,1913,1,2007,11,49.627525,-114.48195,1303,Yes AB,DRUMHELLER ANDREW,3022136,1954,1,2008,3,51.4666666666667,-112.866666666667,719,No -AB,EDMONTON ,3012205,1883,1,2012,4,53.3166666666667,-113.583333333333,723,Yes +AB,EDMONTON,3012205,1883,1,2012,4,53.3166666666667,-113.583333333333,723,Yes AB,EDSON,3062246,1920,1,2007,11,53.5802797222222,-116.453335277778,927,Yes AB,ELK POINT,3012280,1913,1,1997,6,53.8833333333333,-111.066666666667,605,No AB,ENILDA-BERG,3062427,1932,1,2005,4,55.4166666666667,-116.3,591,Yes AB,FAIRVIEW THREE FOX FARM,3072539,1932,1,1999,12,56.0833333333333,-118.533333333333,604,Yes -AB,FORT CHIPEWYAN ,3072658,1884,1,2007,8,58.7666666666667,-111.116666666667,232,Yes -AB,FORT MCMURRAY ,3062693,1920,1,2007,11,56.65,-111.216666666667,369,Yes +AB,FORT CHIPEWYAN,3072658,1884,1,2007,8,58.7666666666667,-111.116666666667,232,Yes +AB,FORT MCMURRAY,3062693,1920,1,2007,11,56.65,-111.216666666667,369,Yes AB,FORT VERMILION,3072723,1909,1,2007,11,58.3823055555556,-116.040166666667,289,Yes AB,GLEICHEN,3032800,1903,1,2006,3,50.8833333333333,-113.05,905,No -AB,GRANDE PRAIRIE ,3072920,1931,1,2013,9,55.1797222222222,-118.885,669,Yes +AB,GRANDE PRAIRIE,3072920,1931,1,2013,9,55.1797222222222,-118.885,669,Yes AB,HIGHWOOD AU,3053250,1903,1,2011,9,50.5511111111111,-114.370555555556,1580,Yes -AB,HINTON VALLEY,306A009 ,1917,1,2017,12,53.40381,-117.537620277778,1011,Yes +AB,HINTON VALLEY,306A009,1917,1,2017,12,53.40381,-117.537620277778,1011,Yes AB,JASPER WARDEN,3053536,1936,1,2007,11,52.9263888888889,-118.029722222222,1020,Yes AB,JENNER,3023560,1916,1,2008,1,50.7222277777778,-111.195852777778,755,No AB,KEG RIVER,3073641,1936,1,2009,1,57.75,-117.616666666667,405,Yes AB,LACOMBE,3023722,1908,1,2007,11,52.4488905555556,-113.755834722222,860,Yes -AB,LETHBRIDGE ,3033880,1902,1,2007,8,49.6302777777778,-112.799722222222,929,Yes -AB,MEDICINE HAT ,3034480,1886,1,2006,5,50.0188888888889,-110.720833333333,717,No +AB,LETHBRIDGE,3033880,1902,1,2007,8,49.6302777777778,-112.799722222222,929,Yes +AB,MEDICINE HAT,3034480,1886,1,2006,5,50.0188888888889,-110.720833333333,717,No AB,MOUNTAIN VIEW,3034720,1913,1,2006,3,49.1269555555556,-113.630016666667,1339,No AB,OLDS,3024920,1914,1,2015,6,51.7833333333333,-114.1,1040,No AB,ONEFOUR,3044923,1928,1,2007,10,49.1166666666667,-110.466666666667,935,Yes -AB,PEACE RIVER ,3075040,1908,1,2014,5,56.2269444444444,-117.447222222222,571,Yes +AB,PEACE RIVER,3075040,1908,1,2014,5,56.2269444444444,-117.447222222222,571,Yes AB,PINCHER CREEK,3035206,1915,1,2007,11,49.5205555555556,-113.997222222222,1190,Yes AB,RANFURLY 2NW,3015405,1905,1,2014,11,53.4166666666667,-111.733333333333,673,Yes AB,ROCKY MTN HOUSE,3015523,1917,1,2007,11,52.4213905555556,-114.912223055556,988,Yes AB,SCOTFIELD,3025770,1913,1,2007,10,51.5833555555556,-111.363611666667,762,Yes AB,SION,3015960,1906,1,2004,12,53.8833333333333,-114.116666666667,701,No -AB,SLAVE LAKE ,3065999,1925,1,2007,8,55.2833333333333,-114.783333333333,583,Yes +AB,SLAVE LAKE,3065999,1925,1,2007,8,55.2833333333333,-114.783333333333,583,Yes AB,STETTLER NORTH,3016119,1919,1,2001,8,52.3333333333333,-112.716666666667,821,Yes AB,VAUXHALL,3036682,1914,1,2007,11,50.05,-112.133333333333,779,Yes AB,WABASCA,3076908,1915,1,2009,1,55.9666666666667,-113.833333333333,545,Yes -AB,WHITECOURT ,3067372,1943,1,2009,5,54.1438888888889,-115.786666666667,782,Yes +AB,WHITECOURT,3067372,1943,1,2009,5,54.1438888888889,-115.786666666667,782,Yes SK,ANEROID,4020160,1922,1,2005,4,49.7166666666667,-107.3,754,No SK,BANGOR,4010400,1951,1,2005,2,50.9,-102.283333333333,526,No -SK,BUFFALO NARROWS ,4060982,1962,1,2012,11,55.8333333333333,-108.433333333333,440,Yes +SK,BUFFALO NARROWS,4060982,1962,1,2012,11,55.8333333333333,-108.433333333333,440,Yes SK,CEYLON,4011441,1922,1,2002,12,49.3833333333333,-104.65,753,Yes SK,CHAPLIN,4021520,1904,1,1995,9,50.4666666666667,-106.65,672,No SK,COLLINS BAY CAMECO,4061632,1965,1,2017,12,58.1833333333333,-103.7,490,Yes SK,COTE,4011846,1913,1,2006,3,51.5166666666667,-101.783333333333,450,Yes SK,CREE LAKE,4061861,1962,1,1993,8,57.35,-107.133333333333,495,Yes SK,DAVIDSON,4012120,1922,1,2005,10,51.2666666666667,-105.983333333333,619,No -SK,ESTEVAN ,4012400,1902,1,2015,2,49.2166666666667,-102.966666666667,581,Yes +SK,ESTEVAN,4012400,1902,1,2015,2,49.2166666666667,-102.966666666667,581,Yes SK,HIGH POINT,4023240,1929,1,2017,7,50.9786127777778,-107.935278611111,645,No SK,HUDSON BAY,4083323,1943,1,2013,12,52.8833333333333,-102.583333333333,422,Yes SK,INDIAN HEAD,4013480,1895,1,2007,11,50.55,-103.65,579,No SK,ISLAND FALLS,4063560,1931,1,2004,9,55.5333333333333,-102.35,299,No SK,KELLIHER,4013660,1908,1,2017,12,51.2574166666667,-103.753027777778,676,Yes SK,KEY LAKE,4063755,1977,1,2017,12,57.25,-105.616666666667,509,No -SK,KINDERSLEY ,4043900,1942,1,2013,11,51.5166666666667,-109.183333333333,694,Yes +SK,KINDERSLEY,4043900,1942,1,2013,11,51.5166666666667,-109.183333333333,694,Yes SK,KLINTONEL,4024080,1911,1,1994,1,49.6833333333333,-108.916666666667,1074,No -SK,LA RONGE ,4064150,1923,1,2013,10,55.15,-105.266666666667,379,Yes -SK,LEADER AIRPORT,402DAF0 ,1923,1,2007,11,50.9094638888889,-109.501391666667,676,Yes +SK,LA RONGE,4064150,1923,1,2013,10,55.15,-105.266666666667,379,Yes +SK,LEADER AIRPORT,402DAF0,1923,1,2007,11,50.9094638888889,-109.501391666667,676,Yes SK,LOON LAKE EPF,4064600,1930,1,2005,10,54.05,-109.1,543,Yes SK,MANOR,4014913,1922,1,2004,7,49.6166666666667,-102.1,633,Yes SK,MELFORT,4055079,1910,1,2007,11,52.8166666666667,-104.6,490,Yes SK,MOOSE JAW,4015322,1895,1,2007,11,50.3316805555556,-105.537508333333,577,Yes SK,MOOSOMIN,4015360,1900,1,2000,9,50.1333333333333,-101.666666666667,576,No -SK,NIPAWIN ,4075518,1911,1,2005,9,53.3333333333333,-104,372,Yes +SK,NIPAWIN,4075518,1911,1,2005,9,53.3333333333333,-104,372,Yes SK,NORTH BATTLEFORD,4045605,1894,1,2007,11,52.7666666666667,-108.25,548,Yes SK,OUTLOOK,4055736,1915,1,2007,11,51.4833333333333,-107.05,541,Yes SK,PASWEGIN,4015960,1951,1,2003,9,51.9833333333333,-103.916666666667,533,No SK,PELLY,4086000,1952,1,2016,3,52.0833333333333,-101.866666666667,509,No SK,PILGER,4056120,1913,1,2011,9,52.4166666666667,-105.15,552,No -SK,PRINCE LBERT ,4056240,1889,1,2013,11,53.2166666666667,-105.666666666667,428,Yes -SK,REGINA ,4016560,1898,1,2007,11,50.4333333333333,-104.666666666667,577,No +SK,PRINCE LBERT,4056240,1889,1,2013,11,53.2166666666667,-105.666666666667,428,Yes +SK,REGINA,4016560,1898,1,2007,11,50.4333333333333,-104.666666666667,577,No SK,SASKATOON DIEFENBAKER ,4057120,1900,1,2007,11,52.1666666666667,-106.716666666667,504,No SK,SCOTT,4047241,1911,1,2007,11,52.35974,-108.834723333333,660,Yes SK,SWIFT CURRENT,4028060,1886,1,2007,11,50.2666666666667,-107.733333333333,825,Yes SK,TONKIN,4019082,1941,1,2016,1,51.2,-102.233333333333,527,Yes -SK,URANIUM CITY,406QLD0 ,1953,1,2007,10,59.5666666666667,-108.483333333333,318,Yes +SK,URANIUM CITY,406QLD0,1953,1,2007,10,59.5666666666667,-108.483333333333,318,Yes SK,VAL-MARIE,4038400,1937,1,2010,5,49.3700138888889,-107.847525,808,No SK,WASECA,4048520,1908,1,2014,12,53.1308555555556,-109.403902777778,638,No SK,WASKESIU LAKE,4068559,1966,1,2007,11,53.9166666666667,-106.066666666667,569,Yes @@ -258,13 +258,13 @@ MB,ARBORG,5030080,1951,1,2016,6,50.9333333333333,-97.0833333333333,224,No MB,BERENS RIVER,5030203,1905,1,2013,11,52.3597366666667,-97.0219533333333,222,Yes MB,BIRTLE,5010240,1917,1,2000,11,50.4333333333333,-101.05,522,No MB,BISSETT,5030282,1933,1,1997,6,51.0333333333333,-95.7,259,Yes -MB,BRANDON ,5010480,1890,1,2012,12,49.91,-99.9519444444445,409,Yes +MB,BRANDON,5010480,1890,1,2012,12,49.91,-99.9519444444445,409,Yes MB,CHURCHILL,5060606,1932,1,2015,12,58.7333333333333,-94.0666666666667,29,Yes MB,CYPRESS RIVER,5010640,1948,1,2012,3,49.55,-99.0833333333333,374,No MB,DAUPHIN,5040681,1911,1,2007,10,51.1003888888889,-100.056888888889,305,Yes MB,EMERSON,5020882,1942,1,2003,1,49,-97.2375,242,Yes MB,FLIN FLON,5050920,1927,1,2017,12,54.7666666666667,-101.883333333333,320,No -MB,GILLAM ,5061001,1943,1,2014,10,56.3575,-94.7105555555556,145,Yes +MB,GILLAM,5061001,1943,1,2014,10,56.3575,-94.7105555555556,145,Yes MB,GIMLI,5031039,1944,1,2008,3,50.6333333333333,-97.0166666666667,223,Yes MB,GRAND RAPIDS HYDRO,5031111,1962,1,2017,12,53.1580558333333,-99.2833444444444,223,Yes MB,GREAT FALLS,5031200,1923,1,2002,12,50.4666666666667,-96,249,No @@ -273,17 +273,17 @@ MB,LANGRUTH WEST,5041535,1958,1,2005,2,50.4138888888889,-98.8027777777778,264,Ye MB,LYNN LAKE,5061648,1952,1,2007,11,56.8638888888889,-101.076111111111,357,Yes MB,MORDEN,5021849,1888,1,2007,11,49.1876388888889,-98.0839444444444,298,Yes MB,NEEPAWA MURRAY 6 SOUTHWEST,5042004,1881,1,2008,11,50.15,-99.5666666666667,412,Yes -MB,NINETTE,50220M0 ,1916,1,1996,5,49.4166666666667,-99.65,419,Yes +MB,NINETTE,50220M0,1916,1,1996,5,49.4166666666667,-99.65,419,Yes MB,NORWAY HOUSE,5062045,1896,1,2007,11,53.9666666666667,-97.85,224,Yes MB,PIERSON,5012080,1933,1,2007,3,49.1833333333333,-101.266666666667,469,No MB,PINAWA WNRE,5032162,1915,1,2017,3,50.1805555555556,-96.0583333333333,267,Yes MB,PORTAGE LA PRAIRIE,5012321,1942,1,2017,12,49.95,-98.2666666666667,259,Yes MB,SPRAGUE,5022759,1916,1,2007,11,49.0236111111111,-95.5983358333333,329,Yes MB,STEINBACH,5022780,1956,1,2005,3,49.5333333333333,-96.7666666666667,254,No -MB,SWAN RIVER,504K80K ,1960,1,2007,10,52.1149722222222,-101.232916666667,335,Yes -MB,THE PAS ,5052880,1910,1,2014,11,53.9666666666667,-101.1,270,Yes +MB,SWAN RIVER,504K80K,1960,1,2007,10,52.1149722222222,-101.232916666667,335,Yes +MB,THE PAS,5052880,1910,1,2014,11,53.9666666666667,-101.1,270,Yes MB,THOMPSON ,5062922,1967,1,2014,11,55.8033333333333,-97.8625,222,No -MB,WINNIPEG RICHARDSON ,5023222,1872,1,2007,11,49.9166666666667,-97.2333333333333,239,Yes +MB,WINNIPEG RICHARDSON,5023222,1872,1,2007,11,49.9166666666667,-97.2333333333333,239,Yes ON,AMHERSTBURG,6130257,1917,1,2017,12,42.1033583333333,-83.0944633333333,182,Yes ON,ARMSTRONG JELLIEN,6040330,1939,1,1992,10,50.25,-89.1,341,Yes ON,ATIKOKAN MARMION,6020384,1919,1,2007,7,48.8,-91.5833333333333,442,Yes @@ -293,20 +293,20 @@ ON,BIG TROUT LAKE,6010738,1939,1,1992,10,53.8333333333333,-89.8666666666667,224, ON,BISCOTASING,6060773,1914,1,2000,10,47.3,-82.1,407,No ON,BROCKVILLE PCC,6100971,1915,1,2017,12,44.6,-75.6666666666667,96,Yes ON,CAMERON FALLS,6041109,1924,1,1998,8,49.15,-88.35,229,No -ON,CHAPLEAU ,6061361,1914,1,2015,3,47.82,-83.3466666666667,447,Yes +ON,CHAPLEAU,6061361,1914,1,2015,3,47.82,-83.3466666666667,447,Yes ON,CORNWALL,6101874,1951,1,2017,12,45.0155783333333,-74.7489,64,No -ON,DRYDEN ,6032119,1914,1,2005,1,49.8333333333333,-92.75,413,Yes -ON,EARLTON ,6072225,1939,1,2005,1,47.7,-79.85,243,No -ON,FORT FRANCES ,6022476,1912,1,2011,5,48.65,-93.4333333333333,342,Yes -ON,GERALDTON ,6042716,1950,1,2015,2,49.7828027777778,-86.9305694444445,349,Yes +ON,DRYDEN,6032119,1914,1,2005,1,49.8333333333333,-92.75,413,Yes +ON,EARLTON,6072225,1939,1,2005,1,47.7,-79.85,243,No +ON,FORT FRANCES,6022476,1912,1,2011,5,48.65,-93.4333333333333,342,Yes +ON,GERALDTON,6042716,1950,1,2015,2,49.7828027777778,-86.9305694444445,349,Yes ON,GODFREY,6102857,1924,1,2003,5,44.5666666666667,-76.6333333333333,160,Yes -ON,GORE BAY ,6092925,1916,1,1994,1,45.8833333333333,-82.5666666666667,194,Yes +ON,GORE BAY,6092925,1916,1,1994,1,45.8833333333333,-82.5666666666667,194,Yes ON,HALIBURTON,6163171,1883,1,2017,12,45.0322483333333,-78.531115,330,Yes -ON,HAMILTON ,6153194,1866,1,2011,12,43.1716866666667,-79.9341766666667,238,Yes -ON,HORNEPAYNE ,6053575,1917,1,1995,7,49.2,-84.7666666666667,335,Yes +ON,HAMILTON,6153194,1866,1,2011,12,43.1716866666667,-79.9341766666667,238,Yes +ON,HORNEPAYNE,6053575,1917,1,1995,7,49.2,-84.7666666666667,335,Yes ON,IROQUOIS FALLS,6073810,1913,1,1998,12,48.75,-80.6666666666667,259,No -ON,KAPUSKASING ,6073975,1918,1,2014,9,49.4138888888889,-82.4675,227,Yes -ON,KENORA ,6034075,1900,1,2013,2,49.7902791666667,-94.3652786111111,406,Yes +ON,KAPUSKASING,6073975,1918,1,2014,9,49.4138888888889,-82.4675,227,Yes +ON,KENORA,6034075,1900,1,2013,2,49.7902791666667,-94.3652786111111,406,Yes ON,KINGSTON PUMPING STATION,6104175,1872,1,2007,12,44.2439033333333,-76.4805666666667,77,Yes ON,LANSDOWNE HOUSE,6014350,1941,1,1989,6,52.2333333333333,-87.8833333333333,255,No ON,LONDON AIRPORT,6144475,1883,1,2017,4,43.0330555555556,-81.1511111111111,278,Yes @@ -315,68 +315,68 @@ ON,MADAWASKA,6084770,1916,1,2000,11,45.5,-77.9833333333333,316,No ON,MINE CENTRE SOUTHWEST,6025205,1914,1,2017,12,48.7597388888889,-92.6227777777778,361,Yes ON,MOOSONEE,6075425,1892,1,2017,12,51.2666666666667,-80.65,10,Yes ON,MORRISBURG,6105460,1913,1,2008,12,44.9236183333333,-75.1883433333333,82,No -ON,NORTH BAY ,6085700,1915,1,2013,1,46.3636111111111,-79.4227777777778,370,Yes +ON,NORTH BAY,6085700,1915,1,2013,1,46.3636111111111,-79.4227777777778,370,Yes ON,ORANGEVILLE MOE,6155790,1887,1,2015,12,43.9183516666667,-80.0864066666667,412,Yes ON,ORILLIA BRAIN,6115811,1871,1,2017,12,44.6027777777778,-79.4388888888889,250,Yes ON,OTTAWA,6105976,1890,1,2017,12,45.3833333333333,-75.7166666666667,79,No ON,OWEN SOUND MOE,6116132,1879,1,2007,12,44.5833333333333,-80.9333333333333,179,Yes -ON,PELEE ISLAND ,6136336,1888,1,1994,9,41.7833333333333,-82.6833333333333,174,Yes -ON,PETERBOROUGH ,6166418,1866,1,2007,5,44.2333333333333,-78.3666666666667,191,Yes -ON,PICKLE LAKE ,6016527,1933,1,2012,7,51.4463888888889,-90.2141666666667,386,Yes -ON,RED LAKE ,6016975,1939,1,2012,5,51.0669444444445,-93.7930555555556,386,No +ON,PELEE ISLAND,6136336,1888,1,1994,9,41.7833333333333,-82.6833333333333,174,Yes +ON,PETERBOROUGH,6166418,1866,1,2007,5,44.2333333333333,-78.3666666666667,191,Yes +ON,PICKLE LAKE,6016527,1933,1,2012,7,51.4463888888889,-90.2141666666667,386,Yes +ON,RED LAKE,6016975,1939,1,2012,5,51.0669444444445,-93.7930555555556,386,No ON,RIDGETOWN,6137149,1883,1,1997,4,42.45,-81.8833333333333,206,Yes -ON,SAULT STE MARIE ,6057592,1945,1,2012,3,46.4833333333333,-84.5094444444444,192,Yes -ON,SIOUX LOOKOUT ,6037775,1914,1,2013,2,50.1166666666667,-91.9,383,Yes +ON,SAULT STE MARIE,6057592,1945,1,2012,3,46.4833333333333,-84.5094444444444,192,Yes +ON,SIOUX LOOKOUT,6037775,1914,1,2013,2,50.1166666666667,-91.9,383,Yes ON,SMOKY FALLS,6077845,1934,1,1997,4,50.0666666666667,-82.1666666666667,183,No -ON,SUDBURY ,6068150,1921,1,2013,3,46.6255555555556,-80.7977777777778,348,Yes -ON,TERRACE BAY ,6048231,1910,1,2007,9,48.8166666666667,-87.1,290,Yes -ON,TIMMINS VICTOR POWER ,6078285,1955,1,2011,2,48.5697222222222,-81.3766666666667,295,No +ON,SUDBURY,6068150,1921,1,2013,3,46.6255555555556,-80.7977777777778,348,Yes +ON,TERRACE BAY,6048231,1910,1,2007,9,48.8166666666667,-87.1,290,Yes +ON,TIMMINS VICTOR POWER,6078285,1955,1,2011,2,48.5697222222222,-81.3766666666667,295,No ON,TOBERMORY CYPRUS LAKE,6128323,1915,1,1994,12,45.2333333333333,-81.5333333333333,190,Yes ON,TORONTO,6158350,1840,1,2017,4,43.6666666666667,-79.4,113,No -ON,TORONTO LESTER B. PEARSON ,6158733,1938,1,2013,6,43.6772222222222,-79.6305555555556,173,No +ON,TORONTO LESTER B. PEARSON,6158733,1938,1,2013,6,43.6772222222222,-79.6305555555556,173,No ON,TRANQUILLO RIDGE,6048864,1877,1,2007,12,48.2333333333333,-89.5166666666667,317,Yes ON,VINELAND,6139141,1919,1,2013,12,43.15,-79.4166666666667,110,Yes ON,WALLACEBURG,6139265,1906,1,1997,4,42.5833333333333,-82.4,177,No -ON,WAWA ,6059D09 ,1940,1,2014,9,47.9666666666667,-84.7833333333333,287,Yes +ON,WAWA ,6059D09,1940,1,2014,9,47.9666666666667,-84.7833333333333,287,Yes ON,WELLAND,6139445,1873,1,2014,8,42.9925266666667,-79.2611383333333,175,No -ON,WIARTON ,6119500,1948,1,2014,11,44.7458333333333,-81.1072222222222,222,No -ON,WINDSOR ,6139525,1866,1,2014,10,42.2755555555556,-82.9555555555556,190,Yes +ON,WIARTON,6119500,1948,1,2014,11,44.7458333333333,-81.1072222222222,222,No +ON,WINDSOR,6139525,1866,1,2014,10,42.2755555555556,-82.9555555555556,190,Yes ON,WOODSTOCK,6149625,1870,1,2017,12,43.1361233333333,-80.7705666666667,282,No QC,ARMAGH,7050240,1916,1,1994,5,46.75,-70.5333333333333,358,Yes QC,ARUNDEL,7030310,1914,1,2017,5,45.95,-74.6166666666667,191,Yes -QC,BAGOTVILLE ,7060400,1876,1,2017,12,48.3333333333333,-71,159,Yes +QC,BAGOTVILLE,7060400,1876,1,2017,12,48.3333333333333,-71,159,Yes QC,BARRAGE ANGLIERS,7080452,1911,1,1996,5,47.5519444444444,-79.2358333333333,267,No QC,BARRAGE TEMISCAMINGUE,7080468,1910,1,1995,10,46.7097222222222,-79.1011111111111,181,No QC,BELLETERRE,7080600,1952,1,2004,4,47.3833333333333,-78.7,322,No QC,BROME,7020840,1877,1,2014,7,45.1833333333333,-72.5666666666667,206,No QC,CAUSAPSCAL,7051200,1921,1,2017,8,48.3666666666667,-67.2333333333333,168,No -QC,CHIBOUGAMAU CHAPAIS ,7091404,1937,1,2016,11,49.7666666666667,-74.5333333333333,387,Yes +QC,CHIBOUGAMAU CHAPAIS,7091404,1937,1,2016,11,49.7666666666667,-74.5333333333333,387,Yes QC,CHELSEA,7031360,1928,1,2017,8,45.5166666666667,-75.7833333333333,113,No QC,DONNACONA,7012071,1919,1,2008,11,46.6833333333333,-71.7333333333333,46,Yes QC,DRUMMONDVILLE,7022160,1914,1,2017,8,45.8833333333333,-72.4833333333333,82,No QC,GASPE ,7052605,1916,1,2013,3,48.7769444444445,-64.4780555555556,33,Yes QC,GRANDE VALLEE,7052865,1883,1,2004,4,49.2,-65.15,8,Yes -QC,ILES DE LA MADELEINE ,705C2G9 ,1934,1,2002,11,47.4166666666667,-61.7833333333333,11,Yes +QC,ILES DE LA MADELEINE,705C2G9,1934,1,2002,11,47.4166666666667,-61.7833333333333,11,Yes QC,INUKJUAK,7103282,1938,1,1994,2,58.4666666666667,-78.0833333333333,24,No QC,JOLIETTE VILLE,7013362,1914,1,2011,4,46.0166666666667,-73.4333333333333,56,Yes -QC,KUUJJUAQ ,7113534,1947,1,2014,3,58.1,-68.4166666666667,39,No -QC,KUUJJUARAPIK ,7103536,1934,1,2014,4,55.2833333333333,-77.75,10,No +QC,KUUJJUAQ,7113534,1947,1,2014,3,58.1,-68.4166666666667,39,No +QC,KUUJJUARAPIK,7103536,1934,1,2014,4,55.2833333333333,-77.75,10,No QC,LA MALBAIE,7043960,1914,1,2004,4,47.6666666666667,-70.15,23,No QC,LA POCATIERE,7054095,1913,1,1996,3,47.35,-70.0333333333333,31,No QC,LA SARRE,7094120,1952,1,2004,4,48.7833333333333,-79.2166666666667,244,No QC,LA TUQUE,7074240,1912,1,2004,4,47.4,-72.7833333333333,152,No QC,LABRIEVILLE,7043540,1955,1,1994,12,49.3,-69.55,152,No -QC,LAC BERRY,709CEE9 ,1914,1,2017,8,48.8,-78.2833333333333,305,Yes +QC,LAC BERRY,709CEE9,1914,1,2017,8,48.8,-78.2833333333333,305,Yes QC,LAUZON,7024254,1872,1,2017,8,46.8166666666667,-71.1,69,Yes QC,LEBEL SUR QUEVILLON,7094275,1967,1,2004,4,49.05,-76.9666666666667,305,No QC,LENNOXVILLE,7024280,1915,1,1995,10,45.3688888888889,-71.8236111111111,181,No QC,LES BUISSONS,7044288,1947,1,2017,8,49.1166666666667,-68.3833333333333,15,Yes QC,LES CEDRES,7014290,1913,1,2017,8,45.3,-74.05,47,No -QC,MATAGAMI ,7094639,1964,1,1991,6,49.7666666666667,-77.8166666666667,281,Yes +QC,MATAGAMI,7094639,1964,1,1991,6,49.7666666666667,-77.8166666666667,281,Yes QC,MONT LAURIER,7035160,1920,1,2014,6,46.5666666666667,-75.55,244,Yes -QC,MONT-JOLI ,7055120,1943,1,2013,3,48.6,-68.2166666666667,52,No -QC,MONTREAL/PIERRE ELLIOTT TRUDEAU ,7025250,1872,1,2016,9,45.4666666666667,-73.75,36,Yes -QC,NATASHQUAN ,7045400,1915,1,2003,3,50.1833333333333,-61.8166666666667,11,No +QC,MONT-JOLI,7055120,1943,1,2013,3,48.6,-68.2166666666667,52,No +QC,MONTREAL/PIERRE ELLIOTT TRUDEAU,7025250,1872,1,2016,9,45.4666666666667,-73.75,36,Yes +QC,NATASHQUAN,7045400,1915,1,2003,3,50.1833333333333,-61.8166666666667,11,No QC,NICOLET,7025440,1914,1,2017,8,46.2,-72.6166666666667,30,No QC,NOMININGUE,7035520,1914,1,2013,11,46.4,-75.0833333333333,274,No QC,NORMANDIN,7065640,1936,1,1992,8,48.85,-72.5333333333333,137,No @@ -384,8 +384,8 @@ QC,PARENT S,7075799,1943,1,2004,4,47.9166666666667,-74.6166666666667,410,Yes QC,POINTE AU CHENE,7036063,1919,1,2009,6,45.65,-74.8,51,Yes QC,QUAQTAQ,7116270,1930,1,1988,5,61.05,-69.6333333333333,30,Yes QC,RIMOUSKI,7056480,1877,1,2017,8,48.45,-68.5166666666667,36,Yes -QC,ROBERVAL ,7066685,1914,1,2014,3,48.5166666666667,-72.2666666666667,179,Yes -QC,SCHEFFERVILLE ,7117825,1949,1,1993,9,54.8,-66.8166666666667,522,No +QC,ROBERVAL,7066685,1914,1,2014,3,48.5166666666667,-72.2666666666667,179,Yes +QC,SCHEFFERVILLE,7117825,1949,1,1993,9,54.8,-66.8166666666667,522,No QC,SENNETERRE,7097900,1940,1,1994,5,48.3333333333333,-77.2666666666667,310,Yes QC,SEPT-ILES,7047912,1945,1,2017,5,50.2166666666667,-66.25,53,Yes QC,SHAWINIGAN,7018000,1902,1,2004,4,46.5666666666667,-72.75,122,No @@ -402,43 +402,43 @@ QC,TADOUSSAC,7048320,1914,1,2004,4,48.15,-69.7,70,No QC,TETE A LA BALEINE,7048421,1912,1,1995,3,50.7,-59.3166666666667,9,Yes QC,THETFORD MINES,7028441,1922,1,2016,7,46.1,-71.35,381,Yes QC,TRINITE DES MONTS,7058520,1951,1,2004,4,48.1333333333333,-68.4833333333333,262,No -QC,VAL-D'OR ,7098600,1952,1,2017,12,48.0563888888889,-77.7866666666667,337,No +QC,VAL-D'OR,7098600,1952,1,2017,12,48.0563888888889,-77.7866666666667,337,No QC,VILLE MARIE,7088760,1914,1,2004,4,47.35,-79.4333333333333,213,No QC,WRIGHT,7038975,1914,1,2017,8,46.0666666666667,-76.05,142,Yes NS,ANNAPOLIS ROYAL,8200100,1915,1,2007,12,44.75,-65.5166666666667,8,No NB,AROOSTOOK,8100300,1920,1,2017,12,46.7122222222222,-67.7155555555556,91,Yes -NB,BATHURST ,8100503,1884,1,2013,10,47.6291805555556,-65.7483388888889,59,Yes +NB,BATHURST,8100503,1884,1,2013,10,47.6291805555556,-65.7483388888889,59,Yes NL,BAY D'ESPOIR,8400413,1968,1,2017,12,47.9833333333333,-55.8,23,No NS,BEAR RIVER,8200500,1915,1,2006,2,44.5666666666667,-65.6333333333333,8,Yes NL,BURGEO,8400798,1939,1,1995,7,47.6166666666667,-57.6166666666667,11,Yes NL,CARTWRIGHT,8501100,1936,1,2015,3,53.7083333333333,-57.035,14,No -NB,CHARLO ,8100880,1934,1,2002,10,47.9833333333333,-66.3333333333333,40,Yes +NB,CHARLO,8100880,1934,1,2002,10,47.9833333333333,-66.3333333333333,40,Yes PE,CHARLOTTETOWN ,8300300,1872,1,2012,9,46.2886166666667,-63.1286305555556,49,Yes -NL,CHURCHILL FALLS,850A131 ,1969,1,1998,4,53.5333333333333,-63.9666666666667,489,Yes +NL,CHURCHILL FALLS,850A131,1969,1,1998,4,53.5333333333333,-63.9666666666667,489,Yes NS,COLLEGEVILLE,8201000,1916,1,2014,6,45.4833333333333,-62.0166666666667,76,No NL,CORNER BROOK,8401300,1933,1,2017,12,48.95,-57.95,5,No NL,DANIELS HARBOUR,8401400,1947,1,1998,1,50.2363888888889,-57.5811111111111,19,No NL,DEER LAKE ,8401501,1933,1,2012,3,49.2166666666667,-57.4,22,Yes NS,DEMING,8201410,1884,1,2011,12,45.2163908333333,-61.1778027777778,16,Yes NB,DOAKTOWN,8101200,1944,1,2009,6,46.5525138888889,-66.1402916666667,38,No -NB,EDMUNDSTON,810AL00 ,1916,1,2009,7,47.3463888888889,-68.1877777777778,163,Yes +NB,EDMUNDSTON,810AL00,1916,1,2009,7,47.3463888888889,-68.1877777777778,163,Yes NL,EXPLOITS DAM,8401550,1956,1,2009,2,48.7666666666667,-56.6,154,No NB,FREDERICTON ,8101500,1874,1,2010,4,45.8721305555556,-66.5278916666667,21,Yes -NL,GANDER ,8401700,1937,1,2012,3,48.9463888888889,-54.5769444444444,151,No -NL,GOOSE ,8501900,1942,1,2017,12,53.3166666666667,-60.4166666666667,49,No +NL,GANDER,8401700,1937,1,2012,3,48.9463888888889,-54.5769444444444,151,No +NL,GOOSE,8501900,1942,1,2017,12,53.3166666666667,-60.4166666666667,49,No NL,GRAND FALLS,8402050,1937,1,2009,1,48.9333333333333,-55.6666666666667,60,No -NS,GREENWOOD ,8202000,1943,1,2017,12,44.9833333333333,-64.9166666666667,28,No -NS,HALIFAX STANFIELD ,8202250,1872,1,2012,9,44.8800166666667,-63.5000138888889,145,Yes +NS,GREENWOOD,8202000,1943,1,2017,12,44.9833333333333,-64.9166666666667,28,No +NS,HALIFAX STANFIELD,8202250,1872,1,2012,9,44.8800166666667,-63.5000138888889,145,Yes NL,ISLE UX MORTS,8402450,1909,1,2004,10,47.5833333333333,-58.9666666666667,5,Yes NB,KEDGWICK,8102300,1932,1,1994,9,47.65,-67.35,274,No NS,LIVERPOOL BIG FALLS,8203100,1940,1,2012,10,44.1333333333333,-64.9333333333333,50,No -NL,MAKKOVIK ,8502NHR ,1942,1,2014,11,55.0822222222222,-59.1886111111111,71,Yes -NL,MARY'S HARBOUR ,8502591,1881,1,1998,1,52.3036111111111,-55.8336111111111,12,Yes -NB,MIRAMICHI ,8101000,1873,1,2005,8,47.0094694444444,-65.4677888888889,33,Yes -NB,MONCTON ,8103200,1898,1,2012,6,46.1053055555556,-64.6838055555556,71,Yes +NL,MAKKOVIK,8502NHR,1942,1,2014,11,55.0822222222222,-59.1886111111111,71,Yes +NL,MARY'S HARBOUR,8502591,1881,1,1998,1,52.3036111111111,-55.8336111111111,12,Yes +NB,MIRAMICHI,8101000,1873,1,2005,8,47.0094694444444,-65.4677888888889,33,Yes +NB,MONCTON,8103200,1898,1,2012,6,46.1053055555556,-64.6838055555556,71,Yes PE,MONTICELLO,8300447,1960,1,2003,12,46.4666666666667,-62.4666666666667,32,No NS,MOUNT UNIACKE,8203600,1920,1,2003,7,44.9,-63.8333333333333,159,No -NL,NAIN ,8502800,1939,1,2013,3,56.55,-61.6833333333333,7,No +NL,NAIN,8502800,1939,1,2013,3,56.55,-61.6833333333333,7,No NS,NAPPAN,8203700,1913,1,2003,7,45.7666666666667,-64.25,20,No NB,NEPISIGUIT FALLS,8103500,1922,1,2006,2,47.4,-65.7833333333333,106,No NL,NORTH HARBOUR,8402874,1939,1,2007,11,47.1333333333333,-53.6666666666667,11,Yes @@ -446,22 +446,22 @@ NS,PARRSBORO,8204400,1897,1,2002,9,45.4,-64.3333333333333,24,No NL,PLUM POINT,8402958,1972,1,2016,6,51.0666666666667,-56.8833333333333,6,No NB,REXTON,8104400,1923,1,2009,12,46.6666666666667,-64.8666666666667,5,No NS,SABLE ISLAND,8204700,1891,1,2001,12,43.9322222222222,-60.0094444444444,5,No -NB,SAINT JOHN ,8104900,1871,1,2012,6,45.3180555555556,-65.8855694444444,109,Yes +NB,SAINT JOHN,8104900,1871,1,2012,6,45.3180555555556,-65.8855694444444,109,Yes NL,SPRINGDALE,8403700,1956,1,1993,6,49.5,-56.0833333333333,23,No NS,SPRINGFIELD,8205200,1920,1,2003,8,44.6666666666667,-64.85,167,No -NL,ST ANTHONY ,840C401 ,1883,1,2008,1,51.3833333333333,-56.1,33,Yes -NL,ST JOHN'S ,8403506,1874,1,2012,3,47.6222222222222,-52.7427777777778,141,Yes +NL,ST ANTHONY,840C401,1883,1,2008,1,51.3833333333333,-56.1,33,Yes +NL,ST JOHN'S,8403506,1874,1,2012,3,47.6222222222222,-52.7427777777778,141,Yes NS,ST MARGARET'S BAY,8204800,1922,1,2017,12,44.7,-63.9,17,No -NL,STEPHENVILLE ,8403800,1935,1,2014,10,48.5333333333333,-58.55,26,Yes -PE,SUMMERSIDE ,8300700,1936,1,2002,6,46.4388888888889,-63.8316666666667,20,Yes +NL,STEPHENVILLE,8403800,1935,1,2014,10,48.5333333333333,-58.55,26,Yes +PE,SUMMERSIDE,8300700,1936,1,2002,6,46.4388888888889,-63.8316666666667,20,Yes NB,SUSSEX,8105200,1898,1,2009,5,45.7166666666667,-65.5333333333333,21,No -NS,SYDNEY ,8205700,1870,1,2014,8,46.1666666666667,-60.0481388888889,62,Yes +NS,SYDNEY,8205700,1870,1,2014,8,46.1666666666667,-60.0481388888889,62,Yes NS,TRURO,8205990,1910,1,2002,10,45.3666666666667,-63.2666666666667,40,Yes NS,UPPER STEWIACKE,8206200,1916,1,2008,4,45.2166666666667,-63,23,No -NL,WABUSH LAKE ,8504175,1961,1,2013,2,52.9272222222222,-66.8741666666667,551,No +NL,WABUSH LAKE,8504175,1961,1,2013,2,52.9272222222222,-66.8741666666667,551,No NL,WESTBROOK ST LAWRENCE,8404201,1957,1,1995,7,46.95,-55.3833333333333,31,No NS,WESTPORT,8206260,1937,1,1993,6,44.25,-66.3666666666667,18,Yes NS,WHITE ROCK,8206316,1913,1,2017,6,45.05,-64.3833333333333,38,Yes NB,WOODSTOCK,8105600,1914,1,2017,12,46.1702777777778,-67.5536111111111,153,No NS,WRECK COVE BROOK,8206450,1951,1,2012,12,46.5333333333333,-60.45,76,Yes -NS,YARMOUTH ,8206500,1880,1,2012,4,43.8308333333333,-66.0886111111111,43,Yes +NS,YARMOUTH,8206500,1880,1,2012,4,43.8308333333333,-66.0886111111111,43,Yes diff --git a/src/miranda/eccc/data/ahccd_gen3_temperature.csv b/src/miranda/preprocess/configs/ahccd_gen3_temperature.csv similarity index 99% rename from src/miranda/eccc/data/ahccd_gen3_temperature.csv rename to src/miranda/preprocess/configs/ahccd_gen3_temperature.csv index 8c56a6b5..4a65dc15 100644 --- a/src/miranda/eccc/data/ahccd_gen3_temperature.csv +++ b/src/miranda/preprocess/configs/ahccd_gen3_temperature.csv @@ -24,7 +24,7 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long 21,1161663,CLINTON_AUT,BC,1993,1,2019,12,4.6,51.1,-121.5,105,y,y 22,1021830,COMOX,BC,1935,11,2019,12,1.2,49.7,-124.9,2,y,n 23,1021960,CORTES_ISLAND,BC,1947,3,2019,2,9.9,50,-124.9,1,y,n -24,1012010,COWICHAN_BAY_CHERRY_,BC,1913,10,1984,3,7.7,48.7,-123.5,0,n,n +24,1012010,COWICHAN_BAY_CHERRY,BC,1913,10,1984,3,7.7,48.7,-123.5,0,n,n 25,1152106,CRANBROOK,BC,1901,1,2019,12,6.6,49.6,-115.7,92,y,y 26,114B1F0,CRESTON,BC,1912,6,2019,12,0.5,49,-116.5,64,y,y 27,1022250,CUMBERLAND,BC,1922,5,1977,6,4.7,49.6,-125,15,n,n @@ -102,7 +102,7 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long 99,1176755,REVELSTOKE,BC,1898,5,2019,12,7.3,50.9,-118.1,44,y,y 100,1016940,SAANICHTON_CDA,BC,1914,3,2019,7,0.6,48.6,-123.4,6,n,n 101,1167337,SALMON_ARM,BC,1911,7,2019,12,1.1,50.5,-119.3,41,y,n -102,1016995,SALTSPRING_,BC,1909,11,2019,12,1,48.8,-123.5,4,y,n +102,1016995,SALTSPRING,BC,1909,11,2019,12,1,48.8,-123.5,4,y,n 103,1057051,SANDSPIT,BC,1945,9,2019,12,4.2,53.2,-131.8,0,y,y 104,1017099,SATURNA_CAPMON,BC,1989,6,2019,12,3,48.7,-123.1,17,y,y 105,1017230,SHAWNIGAN_LAKE,BC,1913,4,2019,12,0.6,48.6,-123.6,15,n,n @@ -620,7 +620,7 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long 617,7055122,MONT_JOLI,QUE,1875,10,2019,12,0.6,48.6,-68.2,5,y,y 618,7035160,MONT_LAURIER,QUE,1920,7,2014,6,7.2,46.5,-75.5,24,y,n 619,7024745,MONTREAL_TAVISH,QUE,1871,7,2019,12,2.8,45.5,-73.5,7,y,n -620,702S006,MONTREAL__TRUDEAU_IN,QUE,1953,1,2019,12,0.5,45.4,-73.7,3,y,y +620,702S006,MONTREAL_TRUDEAU_INTERNATIONAL,QUE,1953,1,2019,12,0.5,45.4,-73.7,3,y,y 621,7045401,NATASHQUAN,QUE,1914,10,2019,12,4.1,50.1,-61.8,1,y,y 622,7055422,NEW_CARLISLE,QUE,1963,1,2019,12,17.8,48,-65.3,4,y,n 623,7025442,NICOLET,QUE,1913,11,2019,12,2.9,46.2,-72.6,0,y,n @@ -657,8 +657,8 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long 654,7016800,ST_ALBAN,QUE,1949,9,2019,10,2.3,46.7,-72,7,n,n 655,7066820,ST_AMBROISE,QUE,1954,9,2019,10,4.5,48.5,-71.3,12,n,n 656,702FQLF,ST_ANICET,QUE,1960,11,2019,12,2,45.1,-74.2,4,y,y -657,7056930,ST_CAMILLE_,QUE,1963,7,2019,10,2,46.4,-70.2,39,n,n -658,7016960,ST_CHARLES_DE_MANDE_,QUE,1976,6,2019,10,21.4,46.3,-73.3,16,n,n +657,7056930,ST_CAMILLE,QUE,1963,7,2019,10,2,46.4,-70.2,39,n,n +658,7016960,ST_CHARLES_DE_MANDE,QUE,1976,6,2019,10,21.4,46.3,-73.3,16,n,n 659,7017080,ST_COME,QUE,1950,12,2018,11,4.6,46.2,-73.7,24,n,n 660,7027083,ST_COME_DE_LINIERE,QUE,1965,9,2019,10,3.7,46,-70.5,24,n,n 661,7027200,ST_EPHREM,QUE,1929,2,2019,10,18.1,46,-70.9,31,n,n @@ -666,7 +666,7 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long 663,7027259,ST_FLAVIEN,QUE,1963,1,2016,8,2.1,46.4,-71.5,13,n,n 664,7027302,ST_GUILLAUME,QUE,1963,1,2015,10,7.6,45.8,-72.7,4,n,n 665,7037310,ST_HIPPOLYTE,QUE,1961,2,2019,10,4.9,45.9,-74,36,n,n -666,7027329,ST_HUBERT_MONT_,QUE,1953,1,2019,12,0.8,45.5,-73.4,2,y,n +666,7027329,ST_HUBERT_MONT,QUE,1953,1,2019,12,0.8,45.5,-73.4,2,y,n 667,7027361,ST_HYACINTHE,QUE,1935,1,2019,10,8.4,45.5,-72.9,3,y,n 668,7037400,ST_JEROME,QUE,1932,5,2019,10,4.3,45.8,-74,17,n,n 669,7027516,ST_LUDGER,QUE,1964,10,2019,10,3.1,45.7,-70.6,33,n,n @@ -778,6 +778,6 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long 775,8403603,ST_JOHN_WEST,NFLD,1950,11,2019,12,6.6,47.5,-52.7,11,y,y 776,8403619,ST_LAWRENCE,NFLD,1989,11,2019,12,14.6,46.9,-55.3,4,y,y 777,8403820,STEPHENVILLE,NFLD,1895,6,2019,12,6.6,48.5,-58.5,5,y,y -778,8403851,TERRA_NOVA_NAT_PARK_,NFLD,1962,3,2019,12,7.1,48.5,-53.9,10,y,y +778,8403851,TERRA_NOVA_NAT_PARK,NFLD,1962,3,2019,12,7.1,48.5,-53.9,10,y,y 779,8504177,WABUSH_LAKE,NFLD,1960,11,2019,12,0.8,52.9,-66.8,55,y,y 780,8404343,WRECKHOUSE,NFLD,1981,6,2019,12,1.5,47.7,-59.3,3,y,y diff --git a/src/miranda/preprocess/configs/eccc-ahccd_attrs.json b/src/miranda/preprocess/configs/eccc-ahccd_attrs.json new file mode 100644 index 00000000..3de37b07 --- /dev/null +++ b/src/miranda/preprocess/configs/eccc-ahccd_attrs.json @@ -0,0 +1,131 @@ +{ + "Header": { + "_citation": { + "generation": { + "Second": "Mekis, É and L.A. Vincent, 2011: An overview of the second generation adjusted daily precipitation dataset for trend analysis in Canada. Atmosphere-Ocean 49(2), 163-177 doi:10.1080/07055900.2011.583910", + "Third": "Vincent, L.A., M.M. Hartwell and X.L. Wang, 2020: A Third Generation of Homogenized Temperature for Trend Analysis and Monitoring Changes in Canada’s Climate. Atmosphere-Ocean. https://doi.org/10.1080/07055900.2020.1765728" + } + }, + "_miranda_version": true, + "_product": { + "generation": { + "Second": "ECCC Adjusted and Homogenized Canadian Climate Data (AHCCD) version 2", + "Third": "ECCC Adjusted and Homogenized Canadian Climate Data (AHCCD) version 3" + } + }, + "_variable": true, + "acknowledgement": "This data is provided by Environment and Climate Change Canada (ECCC).", + "author": "Environment and Climate Change Canada (ECCC)", + "contact": "info.cccs-ccsc@canada.ca", + "dataset_id": "d6813de6-b20a-46cc-8990-01862ae15c5f", + "documentation": "https://www.canada.ca/en/environment-climate-change/services/climate-change/canadian-centre-climate-services/display-download/technical-documentation-adjusted-climate-data.html", + "domain": "CAN", + "frequency": "day", + "institution": "GovCan", + "license": "https://climate.weather.gc.ca/prods_servs/attachment1_e.html", + "license_preamble": "The data is owned by the Government of Canada (Environment and Climate Change Canada), and fall under the licence agreement for use of Environment and Climate Change Canada data.", + "license_type": "permissive", + "organization": "ECCC", + "processing_level": "adjusted", + "project": "AHCCD", + "realm": "atmos", + "source": "msc", + "table_date": "2023-08-03", + "table_id": "ECCC", + "type": "station-obs" + }, + "dimensions:": { + "lat": { + "axis": "Y", + "long_name": "Latitude", + "standard_name": "latitude", + "units": "degrees_north" + }, + "long": { + "axis": "X", + "long_name": "Longitude", + "standard_name": "longitude", + "units": "degrees_east" + }, + "time": { + "axis": "T", + "long_name": "Time", + "standard_name": "time" + } + }, + "variables": { + "dm": { + "NaN_value": -9999.9, + "_variable_name": "tas", + "cell_methods": "time: mean", + "comments": "Station data converted from Mean Temp (°C)", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Near-Surface Air Temperature", + "missing_flags": "M", + "original_field": "Mean Temp (°C)", + "units": "degC" + }, + "dn": { + "NaN_value": -9999.9, + "_variable_name": "tasmin", + "cell_methods": "time: minimum", + "comments": "Station data converted from Min Temp (°C)", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Daily Minimum Near-Surface Air Temperature", + "missing_flags": "M", + "original_field": "Min Temp (°C)", + "units": "degC" + }, + "dr": { + "NaN_value": -9999.99, + "_variable_name": "prlp", + "cell_methods": "time: mean", + "comments": "Station data converted from Total Rain (mm) using a density of 1000 kg/m³", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Liquid Precipitation", + "missing_flags": "M", + "original_field": "Total Rain (mm)", + "units": "mm" + }, + "ds": { + "NaN_value": -9999.99, + "_variable_name": "prsn", + "cell_methods": "time: mean", + "comments": "station data converted from Total Snow (cm) using a density of 100 kg/m³", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Snowfall Flux", + "missing_flags": "M", + "original_field": "Total Snow (cm)", + "units": "mm" + }, + "dt": { + "NaN_value": -9999.99, + "_variable_name": "pr", + "cell_methods": "time: mean", + "comments": "Station data converted from Total Precip (mm) using a density of 1000 kg/m³", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Precipitation", + "missing_flags": "M", + "original_field": "Total Precip (mm)", + "units": "mm" + }, + "dx": { + "NaN_value": -9999.9, + "_variable_name": "tasmax", + "cell_methods": "time: maximum", + "comments": "station data converted from Max Temp (°C)", + "frequency": "day", + "grid_mapping": "regular_lon_lat", + "long_name": "Daily Maximum Near-Surface Air Temperature", + "missing_flags": "M", + "original_field": "Max Temp (°C)", + "standard_name": "air_temperature", + "units": "degC" + } + } +} diff --git a/src/miranda/eccc/eccc_obs_summary_cf_attrs.json b/src/miranda/preprocess/configs/eccc-obs-summary_attrs.json similarity index 54% rename from src/miranda/eccc/eccc_obs_summary_cf_attrs.json rename to src/miranda/preprocess/configs/eccc-obs-summary_attrs.json index b21f224e..11b3dc51 100644 --- a/src/miranda/eccc/eccc_obs_summary_cf_attrs.json +++ b/src/miranda/preprocess/configs/eccc-obs-summary_attrs.json @@ -1,173 +1,160 @@ { "Header": { - "Conventions": "CF-1.8", + "_miranda_version": true, + "_variable": true, + "acknowledgement": "This data is provided by Environment and Climate Change Canada (ECCC).", + "author": "Environment and Climate Change Canada (ECCC)", "contact": "info.cccs-ccsc@canada.ca", + "dataset_id": "b24efb37-11b6-5d03-ab19-5759f83db546", + "documentation": "https://climate.weather.gc.ca/doc/Technical_Documentation.pdf", + "domain": "CAN", + "frequency": "mon", "institution": "GovCan", - "int_missing_value": "-999", "license": "https://climate.weather.gc.ca/prods_servs/attachment1_e.html", + "license_preamble": "The data is owned by the Government of Canada (Environment and Climate Change Canada), and fall under the licence agreement for use of Environment and Climate Change Canada data.", "license_type": "permissive", - "missing_value": "1e20", "organization": "ECCC", "processing_level": "raw", + "product": "A cross-country summary of the averages and extremes for the month, including precipitation totals, max-min temperatures, and degree days.", + "project": "ECCC-SUMMARIES", "realm": "atmos", "source": "msc", - "table_date": "2023-03-23", + "table_date": "2023-08-07", + "table_id": "ECCC", "type": "station-obs" }, "variable_entry": { "cdd": { - "add_offset": 0, + "_variable_name": "cdd", "cell_methods": "time: sum", "comments": "Station data converted from Cool Deg Days (°C)", "frequency": "day", "grid_mapping": "regular_lon_lat", "long_name": "Number of Degrees Celsius Over a Mean Temperature of 18 °C", - "original_variable": "Cool Deg Days (°C)", - "out_name": "cdd", - "scale_factor": 1, - "standard_name": "cooling_degree_days", + "original_field": "Cool Deg Days (°C)", "type": "real", - "units": "C" + "units": "degC" }, "hdd": { - "add_offset": 0, + "_variable_name": "hdd", "cell_methods": "time: sum", "comments": "Station data converted from Heat Deg Days (°C)", - "frequency": "day", + "frequency": "mon", "grid_mapping": "regular_lon_lat", "long_name": "Number of Degrees Celsius Under a Mean Temperature of 18 °C", - "original_variable": "Heat Deg Days (°C)", - "out_name": "hdd", - "scale_factor": 1, - "standard_name": "heating_degree_days", + "original_field": "Heat Deg Days (°C)", "type": "real", - "units": "C" + "units": "degC" }, "pr": { - "add_offset": 0, + "_variable_name": "pr", "cell_methods": "time: mean", "comments": "Station data converted from Total Precip (mm) using a density of 1000 kg/m³", - "frequency": "day", + "frequency": "mon", "grid_mapping": "regular_lon_lat", "long_name": "Precipitation", - "original_variable": "Total Precip (mm)", - "out_name": "pr", - "scale_factor": 1.1574074074074073e-05, - "standard_name": "precipitation_flux", + "original_field": "Total Precip (mm)", "type": "real", - "units": "kg m-2 s-1" + "units": "mm" }, "prlp": { - "add_offset": 0, + "_variable_name": "prlp", "cell_methods": "time: mean", "comments": "Station data converted from Total Rain (mm) using a density of 1000 kg/m³", - "frequency": "day", + "frequency": "mon", "grid_mapping": "regular_lon_lat", "long_name": "Liquid Precipitation", - "original_variable": "Total Rain (mm)", - "out_name": "prlp", - "scale_factor": 1.1574074074074073e-05, - "standard_name": "rainfall_flux", + "original_field": "Total Rain (mm)", "type": "real", - "units": "kg m-2 s-1" + "units": "mm" }, "prsn": { - "add_offset": 0, + "_variable_name": "prsn", "cell_methods": "time: mean", "comments": "station data converted from Total Snow (cm) using a density of 100 kg/m³", - "frequency": "day", + "frequency": "mon", "grid_mapping": "regular_lon_lat", "long_name": "Snowfall Flux", - "original_variable": "Total Snow (cm)", - "out_name": "prsn", - "scale_factor": 1.1574074074074073e-05, - "standard_name": "snowfall_flux", + "original_field": "Total Snow (cm)", "type": "real", - "units": "kg m-2 s-1" + "units": "cm" }, "sfcWindAz": { - "add_offset": 0, + "_variable_name": "sfcWindAz", "cell_methods": "time: mean", "comments": "Station data converted from Dir of Max Gust (10s deg)", - "frequency": "day", + "frequency": "mon", "grid_mapping": "regular_lon_lat", "long_name": "Direction from which the Daily Maximum Near-Surface Gust Wind Speed maximum Blows", - "original_variable": "Dir of Max Gust (10s deg)", - "out_name": "sfcWindAz", - "scale_factor": 1, - "standard_name": "wind_direction", + "original_field": "Dir of Max Gust (10s deg)", "type": "real", "units": "degree" }, "sfcWindMax": { - "add_offset": 0, + "_variable_name": "sfcWindMax", "cell_methods": "time: max", "comments": "Station data converted from Spd of Max Gust (km/h)", - "frequency": "day", + "frequency": "mon", "grid_mapping": "regular_lon_lat", "long_name": "Daily Maximum Near-Surface Gust Wind Speed maximum", - "original_variable": "Spd of Max Gust (km/h)", - "out_name": "sfcWindMax", - "scale_factor": 0.2777777777777778, - "standard_name": "wind_speed_of_gust maximum", + "original_field": "Spd of Max Gust (km/h)", "type": "real", - "units": "m s-1" + "units": "km h-1" }, "snd": { - "add_offset": 0, + "_variable_name": "snd", "cell_methods": "time: mean", "comments": "Station data converted from Snow on Grnd (cm)", - "frequency": "day", + "frequency": "mon", "grid_mapping": "regular_lon_lat", "long_name": "Snow Depth", - "original_variable": "Snow on Grnd (cm)", - "out_name": "snd", - "scale_factor": 0.01, - "standard_name": "surface_snow_thickness", + "original_field": "Snow on Grnd (cm)", "type": "real", - "units": "m" + "units": "cm" }, "tas": { - "add_offset": 273.15, + "_variable_name": "tas", "cell_methods": "time: mean", - "comments": "Station data converted from Mean Temp (°C)", - "frequency": "day", + "comments": "Station data converted from Mean Temperature (°C)", + "frequency": "mon", "grid_mapping": "regular_lon_lat", "long_name": "Near-Surface Air Temperature", - "original_variable": "Mean Temp (°C)", - "out_name": "tas", - "scale_factor": 1, - "standard_name": "air_temperature", + "original_field": "Mean Temperature", + "type": "real", + "units": "degC" + }, + "tas_days": { + "_variable_name": "tas_days", + "cell_methods": "time: count", + "comments": "Station data converted from Days With Valid Mean Temperature", + "frequency": "mon", + "grid_mapping": "regular_lon_lat", + "long_name": "Number of Days With Valid Near-Surface Air Temperature", + "original_field": "Days With Valid Mean Temp", "type": "real", - "units": "K" + "units": "1" }, "tasmax": { - "add_offset": 273.15, + "_variable_name": "tasmax", "cell_methods": "time: maximum", "comments": "station data converted from Max Temp (°C)", - "frequency": "day", + "frequency": "mon", "grid_mapping": "regular_lon_lat", "long_name": "Daily Maximum Near-Surface Air Temperature", - "original_variable": "Max Temp (°C)", - "out_name": "tasmax", - "scale_factor": 1, - "standard_name": "air_temperature", + "original_field": "Max Temp (°C)", "type": "real", - "units": "K" + "units": "degC" }, "tasmin": { - "add_offset": 273.15, + "_variable_name": "tasmin", "cell_methods": "time: minimum", "comments": "Station data converted from Min Temp (°C)", - "frequency": "day", + "frequency": "mon", "grid_mapping": "regular_lon_lat", "long_name": "Daily Minimum Near-Surface Air Temperature", - "original_variable": "Min Temp (°C)", - "out_name": "tasmin", - "scale_factor": 1, - "standard_name": "air_temperature", + "original_field": "Min Temp (°C)", "type": "real", - "units": "K" + "units": "degC" } } } diff --git a/src/miranda/preprocess/configs/eccc-obs_attrs.json b/src/miranda/preprocess/configs/eccc-obs_attrs.json new file mode 100644 index 00000000..8f438b16 --- /dev/null +++ b/src/miranda/preprocess/configs/eccc-obs_attrs.json @@ -0,0 +1,745 @@ +{ + "Header": { + "_frequency": true, + "_miranda_version": true, + "acknowledgement": "This data is provided by Environment and Climate Change Canada (ECCC).", + "author": "Environment and Climate Change Canada (ECCC)", + "contact": "ccsc-cccs@ec.gc.ca", + "documentation": "https://climate.weather.gc.ca/doc/Technical_Documentation.pdf", + "institution": "GovCan", + "license": "https://climate.weather.gc.ca/prods_servs/attachment1_e.html", + "license_preamble": "The data is owned by the Government of Canada (Environment and Climate Change Canada), and fall under the licence agreement for use of Environment and Climate Change Canada data.", + "license_type": "permissive", + "organization": "ECCC", + "processing_level": "raw", + "source": "msc", + "table_date": "2023-08-02", + "title": "Environment and Climate Change Canada (ECCC) weather station observations", + "type": "station-obs", + "usage": "The original data is owned by the Government of Canada (Environment and Climate Change Canada), and falls under the licence agreement for use of Environment and Climate Change Canada data" + }, + "variables": { + "001": { + "_variable_name": "tasmax", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Daily Maximum Temperature", + "original_units": "0.1 °C", + "scale_factor": 0.1, + "standard_name": "air_temperature_maximum", + "units": "degC" + }, + "002": { + "_variable_name": "tasmin", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Daily Minimum Temperature", + "original_units": "0.1 °C", + "scale_factor": 0.1, + "standard_name": "air_temperature_minimum", + "units": "degC" + }, + "003": { + "_variable_name": "tas", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Daily Mean Temperature", + "original_units": "0.1 °C", + "scale_factor": 0.1, + "standard_name": "air_temperature", + "units": "degC" + }, + "010": { + "_variable_name": "prlptot", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Daily Total Rainfall", + "original_units": "0.1 mm day-1", + "scale_factor": 0.1, + "standard_name": "liquid_precipitation_amount", + "units": "mmn day-1" + }, + "011": { + "_variable_name": "prsntot", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Daily Total Snowfall", + "original_units": "0.1 cm day-1", + "scale_factor": 0.1, + "standard_name": "solid_precipitation_amount", + "units": "cm day-1" + }, + "012": { + "_variable_name": "prcptot", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Daily Total Precipitation", + "original_units": "0.1 mm day-1", + "scale_factor": 0.1, + "standard_name": "precipitation_amount", + "units": "mm day-1" + }, + "013": { + "_variable_name": "sndtot", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Snow on the Ground", + "original_units": "cm", + "scale_factor": 1, + "standard_name": "surface_snow_thickness", + "units": "cm" + }, + "014": { + "_variable_name": "thunder", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Thunderstorms", + "scale_factor": 1, + "standard_name": "thunderstorm_presence", + "units": "1" + }, + "015": { + "_variable_name": "freezing_rain_drizzle", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Freezing rain or drizzle", + "scale_factor": 1, + "standard_name": "freeze_rain_drizzle_presence", + "units": "1" + }, + "016": { + "_variable_name": "hail", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Hail", + "scale_factor": 1, + "standard_name": "hail_presence", + "units": "1" + }, + "017": { + "_variable_name": "fog_ice_fog", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Fog or Ice Fog", + "scale_factor": 1, + "standard_name": "fog_ice_fog_presence", + "units": "1" + }, + "018": { + "_variable_name": "smoke_haze", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Smoke or Haze", + "scale_factor": 1, + "standard_name": "smoke_haze_presence", + "units": "1" + }, + "019": { + "_variable_name": "blowing_dust_sand", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Blowing Dust or Sand", + "scale_factor": 1, + "standard_name": "blowing_dust_sand_presence", + "units": "1" + }, + "020": { + "_variable_name": "blow_snow", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Blowing snow", + "scale_factor": 1, + "standard_name": "blowing_snow_presence", + "units": "1" + }, + "021": { + "_variable_name": "wind_gt_28kt", + "long_name": "Wind speed >= 28 Knots", + "scale_factor": 1, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "wind_exceeding_28_knots", + "units": "1" + }, + "022": { + "_variable_name": "wind_gt_34kt", + "long_name": "Wind speed >= 34 Knots", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 1, + "standard_name": "wind_exceeding_34_knots", + "units": "1" + }, + "023": { + "_variable_name": "gust_dir_16pts", + "long_name": "Direction of extreme gust (16 pts) to December 1976", + "original_units": "10's of degrees", + "scale_factor": 10, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "gust_to_direction", + "units": "deg" + }, + "024": { + "_variable_name": "gust_speed", + "long_name": "Speed of extreme gust", + "original_units": "km/h", + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "wind_speed_of_gust", + "units": "km h-1" + }, + "025": { + "_variable_name": "gust_hour", + "long_name": "UTC hour of extreme gust", + "standard_name": "hour_of_extreme_gust", + "missing_flags": "M", + "missing_values": "-99999", + "units": "h" + }, + "061": { + "_variable_name": "rf1_radiation", + "long_name": "RF1 global solar radiation", + "original_units": "0.001 MJ/m", + "scale_factor": 0.001, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "solar_radiation_flux", + "units": "MJ m-1" + }, + "062": { + "_variable_name": "rf2_radiation", + "long_name": "RF2 sky (diffuse) radiation", + "original_units": "0.001 MJ/m", + "scale_factor": 277.77777777777777, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "solar_radiation_flux", + "units": "MJ m-1" + }, + "063": { + "_variable_name": "rf3_radiation", + "long_name": "RF3 reflected solar radiation", + "original_units": "0.001 MJ/m", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 277.77777777777777, + "standard_name": "solar_radiation_flux", + "units": "MJ m-1" + }, + "064": { + "_variable_name": "rf4_radiation", + "long_name": "RF4 net all wave radiation", + "original_units": "0.001 MJ/m", + "scale_factor": 277.77777777777777, + "standard_name": "solar_radiation_flux", + "missing_flags": "M", + "missing_values": "-99999", + "units": "MJ m-1" + }, + "067": { + "_variable_name": "rf7_radiation", + "long_name": "RF7 daylight illumination", + "missing_flags": "M", + "missing_values": "-99999", + "original_units": "0.01 Kilolux_hrs", + "scale_factor": 0.01, + "standard_name": "solar_radiation_flux", + "units": "klux h" + }, + "068": { + "_variable_name": "rf8_radiation", + "long_name": "RF8 direct solar radiation", + "original_units": "0.001 MJ/m", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 277.77777777777777, + "standard_name": "solar_radiation_flux", + "units": "W m-2 h-1" + }, + "069": { + "_variable_name": "wind_dir_45B", + "long_name": "Direction - 45B anemometer (8 pts)", + "original_units": "10's of degrees", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 10, + "standard_name": "wind_to_direction", + "units": "deg" + }, + "071": { + "_variable_name": "ceiling_hgt", + "long_name": "Ceiling height of lowest layer of clouds", + "missing_flags": "M", + "missing_values": "-99999", + "original_units": "30's of meters", + "scale_factor": 30, + "standard_name": "ceiling_cloud_height", + "units": "m" + }, + "072": { + "_variable_name": "visibility", + "long_name": "Visibility", + "original_units": "0.1 km", + "scale_factor": 0.1, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "visibility_in_air", + "units": "km" + }, + "073": { + "_variable_name": "psl", + "long_name": "Sea Level Pressure", + "original_units": "0.01 kPa", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 0.01, + "standard_name": "air_pressure_at_mean_sea_level", + "units": "kPa" + }, + "074": { + "_variable_name": "tds", + "long_name": "Dew Point Temperature", + "missing_flags": "M", + "missing_values": "-99999", + "original_units": "0.1 °C", + "scale_factor": 0.1, + "standard_name": "dew_point_temperature", + "units": "degC" + }, + "075": { + "_variable_name": "wind_dir_u2a_16", + "long_name": "Wind Direction at 2 m (U2A Anemometer) (16 pts)", + "missing_flags": "M", + "missing_values": "-99999", + "original_units": "10's of degrees", + "scale_factor": 10, + "standard_name": "wind_direction_u2a", + "units": "deg" + }, + "076": { + "_variable_name": "wind_speed_u2a", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Wind Speed - U2A (16 pts) to December 1970", + "original_units": "km/h", + "scale_factor": 1, + "standard_name": "wind_speed_u2a", + "units": "km h-1" + }, + "077": { + "_variable_name": "pressure", + "long_name": "Station Pressure", + "missing_flags": "M", + "missing_values": "-99999", + "original_units": "0.01 kPa", + "scale_factor": 0.01, + "standard_name": "atmospheric_pressure", + "units": "kPa" + }, + "078": { + "_variable_name": "tas_dry", + "long_name": "Dry Bulb Temperature", + "original_units": "0.1 °C", + "scale_factor": 0.1, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "dry_bulb_temperature", + "units": "degC" + }, + "079": { + "_variable_name": "tas_wet", + "long_name": "Wet Bulb temperature", + "original_units": "0.1 °C", + "scale_factor": 0.1, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "wet_bulb_temperature", + "units": "degC" + }, + "080": { + "_variable_name": "hur", + "long_name": "Relative Humidity", + "original_units": "%", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 1, + "standard_name": "relative_humidity", + "units": "1" + }, + "081": { + "_variable_name": "clo", + "long_name": "Total Cloud Opacity", + "original_units": "%", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 10, + "standard_name": "cloud_albedo", + "units": "1" + }, + "082": { + "_variable_name": "clt", + "long_name": "Total Cloud Amount", + "original_units": "%", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 10, + "standard_name": "cloud_area_fraction", + "units": "1" + }, + "089": { + "_variable_name": "freeze_rain", + "long_name": "Freezing Rain", + "scale_factor": 1, + "standard_name": "freezing_rain", + "units": "1", + "missing_flags": "M", + "missing_values": "-99999" + }, + "094": { + "_variable_name": "ice_pellets", + "long_name": "Ice Pellets", + "scale_factor": 1, + "standard_name": "ice_pellet_presence", + "units": "1", + "missing_flags": "M", + "missing_values": "-99999" + }, + "107": { + "_variable_name": "1low_cloud_opac", + "long_name": "Lowest cloud layer opacity", + "original_units": "Tenths", + "scale_factor": 10, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "low_type_cloud_opacity_fraction", + "units": "1" + }, + "108": { + "_variable_name": "1low_cloud_frac", + "long_name": "Lowest cloud layer amount or condition", + "original_units": "Tenths", + "scale_factor": 10, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "low_type_cloud_area_fraction", + "units": "1" + }, + "109": { + "_variable_name": "1low_cloud_type", + "long_name": "Lowest cloud layer type", + "standard_name": "low_type_cloud_type", + "missing_flags": "M", + "missing_values": "-99999", + "units": "1" + }, + "110": { + "_variable_name": "1low_cloud_hgt", + "long_name": "Lowest cloud layer height", + "original_units": "30's of meters", + "scale_factor": 30, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "low_type_cloud_height", + "units": "m" + }, + "111": { + "_variable_name": "2low_cloud_opac", + "long_name": "Second lowest cloud layer opacity", + "original_units": "Tenths", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 10, + "standard_name": "low_type_cloud_opacity_fraction", + "units": "1" + }, + "112": { + "_variable_name": "2low_cloud_frac", + "long_name": "Second lowest cloud layer amount or condition", + "missing_flags": "M", + "missing_values": "-99999", + "original_units": "Tenths", + "scale_factor": 10, + "standard_name": "low_type_cloud_area_fraction", + "units": "1" + }, + "113": { + "_variable_name": "2low_cloud_type", + "long_name": "Second lowest cloud layer type", + "missing_flags": "M", + "missing_values": "-99999", + "original_units": "", + "scale_factor": 1, + "standard_name": "low_type_cloud_type", + "units": "1" + }, + "114": { + "_variable_name": "2low_cloud_hgt", + "long_name": "Second lowest cloud layer height", + "original_units": "30's of meters", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 30, + "standard_name": "low_type_cloud_height", + "units": "m" + }, + "115": { + "_variable_name": "3low_cloud_opac", + "long_name": "Thirsd lowest cloud layer opacity", + "original_units": "Tenths", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 10, + "standard_name": "low_type_cloud_opacity_fraction", + "units": "1" + }, + "116": { + "_variable_name": "3low_cloud_frac", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Third lowest cloud layer amount or condition", + "original_units": "Tenths", + "scale_factor": 10, + "standard_name": "low_type_cloud_area_fraction", + "units": "1" + }, + "117": { + "_variable_name": "3low_cloud_type", + "long_name": "Third lowest cloud layer type", + "original_units": "", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 1, + "standard_name": "low_type_cloud_type", + "units": "1" + }, + "118": { + "_variable_name": "3low_cloud_hgt", + "long_name": "Third lowest cloud layer height", + "original_units": "30's of meters", + "scale_factor": 30, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "low_type_cloud_height", + "units": "m" + }, + "123": { + "_variable_name": "rainfall", + "long_name": "Total Rainfall", + "original_units": "0.1 mm", + "scale_factor": 0.1, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "rainfall_flux", + "units": "mm h-1" + }, + "133": { + "_variable_name": "sun", + "long_name": "Sunshine", + "original_units": "0.1 hrs", + "scale_factor": 0.1, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "duration_of_sunshine", + "units": "h" + }, + "156": { + "_variable_name": "wind_dir_u2a_36", + "long_name": "Wind Direction - U2A (36 pts) from January 1971", + "original_units": "10's of degrees", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 10, + "standard_name": "wind_direction_u2a", + "units": "deg" + }, + "262": { + "_variable_name": "prtot", + "long_name": "Total Precipitation (minutes 00-60)", + "original_units": "0.1 mm", + "scale_factor": 0.1, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "precipitation_amount", + "units": "mm" + }, + "263": { + "_variable_name": "prtot_q1", + "long_name": "Total Precipitation (minutes 00-15)", + "original_units": "0.1 mm", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 0.1, + "standard_name": "precipitation_amount", + "units": "mm" + }, + "264": { + "_variable_name": "prtot_q2", + "long_name": "Total Precipitation (minutes 15-30)", + "original_units": "0.1 mm", + "scale_factor": 0.1, + "standard_name": "precipitation_amount", + "missing_flags": "M", + "missing_values": "-99999", + "units": "mm" + }, + "265": { + "_variable_name": "prtot_q3", + "long_name": "Total Precipitation (minutes 30-45)", + "original_units": "0.1 mm", + "scale_factor": 0.1, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "precipitation_amount", + "units": "mm" + }, + "266": { + "_variable_name": "prtot_q4", + "long_name": "Total Precipitation (minutes 45-60)", + "original_units": "0.1 mm", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 0.1, + "standard_name": "precipitation_amount", + "units": "mm" + }, + "267": { + "_variable_name": "precipitation_weight_q1", + "missing_flags": "M", + "missing_values": "-99999", + "long_name": "Precipitation Gauge Weight per Unit Area (at minute 15)", + "original_units": "0.1 kg/m²", + "scale_factor": 0.1, + "standard_name": "precipitation_amount", + "units": "kg m-2" + }, + "268": { + "_variable_name": "precipitation_weight_q2", + "long_name": "Precipitation Gauge Weight per Unit Area (at minute 30)", + "original_units": "0.1 kg/m²", + "scale_factor": 0.1, + "standard_name": "precipitation_amount", + "missing_flags": "M", + "missing_values": "-99999", + "units": "kg m-2" + }, + "269": { + "_variable_name": "precipitation_weight_q3", + "long_name": "Precipitation Gauge Weight per Unit Area (at minute 45)", + "original_units": "0.1 kg/m²", + "scale_factor": 0.1, + "standard_name": "precipitation_amount", + "units": "kg m-2", + "missing_flags": "M", + "missing_values": "-99999" + }, + "270": { + "_variable_name": "precipitation_weight_q4", + "long_name": "Precipitation Gauge Weight per Unit Area (at minute 60)", + "original_units": "0.1 kg/m²", + "scale_factor": 0.1, + "standard_name": "precipitation_amount", + "missing_flags": "M", + "missing_values": "-99999", + "units": "kg m-2" + }, + "271": { + "_variable_name": "wind_speed_q1", + "long_name": "Wind Speed at 2 m (minutes 00-15)", + "missing_flags": "M", + "missing_values": "-99999", + "original_units": "0.1 km/h", + "scale_factor": 0.1, + "standard_name": "wind_speed", + "units": "km h-1" + }, + "272": { + "_variable_name": "wind_speed_q2", + "long_name": "Wind Speed at 2 m (minutes 15-30)", + "original_units": "0.1 km/h", + "scale_factor": 0.1, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "wind_speed", + "units": "km h-1" + }, + "273": { + "_variable_name": "wind_speed_q3", + "long_name": "Wind Speed at 2 m (minutes 30-45)", + "original_units": "0.1 km/h", + "scale_factor": 0.1, + "standard_name": "wind_speed", + "units": "km h-1", + "missing_flags": "M", + "missing_values": "-99999" + }, + "274": { + "_variable_name": "wind_speed_q4", + "long_name": "Wind Speed at 2 m (minutes 45-60)", + "missing_flags": "M", + "missing_values": "-99999", + "original_units": "0.1 km/h", + "scale_factor": 0.1, + "standard_name": "wind_speed", + "units": "km h-1" + }, + "275": { + "_variable_name": "snd_q4", + "long_name": "Snow Depth (at minute 60)", + "original_units": "cm", + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "surface_snow_thickness", + "units": "cm" + }, + "276": { + "_variable_name": "snd_q1", + "long_name": "Snow Depth (at minute 15)", + "original_units": "cm", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 1, + "standard_name": "surface_snow_thickness", + "units": "cm" + }, + "277": { + "_variable_name": "snd_q2", + "long_name": "Snow Depth (at minute 30)", + "original_units": "cm", + "scale_factor": 1, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "surface_snow_thickness", + "units": "cm" + }, + "278": { + "_variable_name": "snd_q3", + "long_name": "Snow Depth (at minute 45)", + "original_units": "cm", + "missing_flags": "M", + "missing_values": "-99999", + "scale_factor": 1, + "standard_name": "surface_snow_thickness", + "units": "cm" + }, + "279": { + "_variable_name": "wind_dir", + "long_name": "Wind Direction at 2 m (minutes 50-60)", + "missing_flags": "M", + "missing_values": "-99999", + "nc_units": "deg", + "original_units": "Degrees", + "standard_name": "wind_direction" + }, + "280": { + "_variable_name": "wind_speed", + "long_name": "Wind Speed at 2 m (minutes 50-60)", + "original_units": "0.1 km/h", + "scale_factor": 0.1, + "missing_flags": "M", + "missing_values": "-99999", + "standard_name": "wind_speed", + "units": "km h-1" + } + } +} diff --git a/src/miranda/preprocess/eccc.py b/src/miranda/preprocess/eccc.py new file mode 100644 index 00000000..583a1bef --- /dev/null +++ b/src/miranda/preprocess/eccc.py @@ -0,0 +1,159 @@ +"""Specialized conversion tools for Environment and Climate Change Canada / Meteorological Service of Canada data.""" + +from __future__ import annotations + +import contextlib +import logging.config + +# import os +import tempfile +from pathlib import Path +from typing import Callable + +from dask.diagnostics import ProgressBar + +from miranda.scripting import LOGGING_CONFIG +from miranda.storage import file_size, report_file_size +from miranda.utils import generic_extract_archive + +logging.config.dictConfig(LOGGING_CONFIG) + + +_data_folder = Path(__file__).parent / "configs" + + +def _run_func_on_archive_with_optional_dask( + file: Path, + function: Callable, + errored_files: list[Path], + **dask_kwargs, +) -> None: + r""" + Run a function on a file archive, extracting it if necessary. + + Parameters + ---------- + file : Path + File archive to process. + function : Callable + Function to run on the file. + errored_files : list[Path] + List of files that errored during processing. + \*\*dask_kwargs : Any + Keyword arguments to pass to dask.distributed.Client. + + Notes + ----- + If the file is larger than 1 GiB or dask_kwargs are passed, dask.dataframes will be used. + Partial function requires the function to accept the following parameters: + - file: Path + - using_dask: bool + - client: dask.distributed.Client + """ + with tempfile.TemporaryDirectory() as temp_folder: + if file.suffix in [".gz", ".tar", ".zip", ".7z"]: + data_files = generic_extract_archive(file, output_dir=temp_folder) + else: + data_files = [file] + msg = f"Processing file: {file}." + logging.info(msg) + + # 1 GiB + size_limit = 2**30 + + for data in data_files: + size = file_size(data) + if size > size_limit or dask_kwargs: + if dask_kwargs: + logging.info("`dask_kwargs` provided - Using dask.dataframes.") + elif size > size_limit: + msg = f"File exceeds {report_file_size(size_limit)} - Using dask.dataframes." + logging.info(msg) + client = ProgressBar + using_dask = True + else: + msg = f"File below {report_file_size(size_limit)} - Using pandas.dataframes." + logging.info(msg) + client = contextlib.nullcontext + using_dask = False + + with client(**dask_kwargs) as c: + try: + function(data, using_dask=using_dask, client=c) + except FileNotFoundError: + errored_files.append(data) + + if Path(temp_folder).iterdir(): + for temporary_file in Path(temp_folder).glob("*"): + if temporary_file in data_files: + temporary_file.unlink() + + +# def convert_flat_files( +# source_files: str | os.PathLike, +# output_folder: str | os.PathLike | list[str | int], +# variables: str | int | list[str | int], +# project: str = "eccc-obs", +# mode: str = "hourly", +# **dask_kwargs, +# ) -> None: +# """ +# +# Parameters +# ---------- +# source_files: str or Path +# output_folder: str or Path +# variables: str or List[str] +# project: {"eccc-obs", "eccc-obs-summary", "eccc-homogenized"} +# mode: {"hourly", "daily"} +# +# Returns +# ------- +# None +# """ +# +# if isinstance(variables, (str, int)): +# variables = [variables] +# +# for variable_code in variables: +# variable_code = str(variable_code).zfill(3) +# metadata = load_json_data_mappings("eccc-obs").get(variable_code) +# +# +# +# # Loop on the files +# logging.info( +# f"Collecting files for variable '{metadata['standard_name']}' " +# f"(filenames containing '{metadata['_table_name']}')." +# ) +# list_files = list() +# if isinstance(source_files, list) or Path(source_files).is_file(): +# list_files.append(source_files) +# else: +# glob_patterns = [g for g in metadata["_table_name"]] +# for pattern in glob_patterns: +# list_files.extend( +# [f for f in Path(source_files).rglob(f"{pattern}*") if f.is_file()] +# ) +# +# +# +# +# manager = mp.Manager() +# errored_files = manager.list() +# converter_func = partial( +# _convert_station_file, +# output_path=rep_nc, +# errored_files=errored_files, +# mode=mode, +# variable_code=variable_code, +# column_names=column_names, +# column_dtypes=column_dtypes, +# **metadata, +# ) +# with mp.Pool(processes=n_workers) as pool: +# pool.map(converter_func, list_files) +# pool.close() +# pool.join() +# +# diff --git a/src/miranda/convert/ecmwf.py b/src/miranda/preprocess/ecmwf_tigge.py similarity index 100% rename from src/miranda/convert/ecmwf.py rename to src/miranda/preprocess/ecmwf_tigge.py diff --git a/src/miranda/structure/_structure.py b/src/miranda/structure/_structure.py index ab4a8302..d335e438 100644 --- a/src/miranda/structure/_structure.py +++ b/src/miranda/structure/_structure.py @@ -305,7 +305,9 @@ def build_path_from_schema( Path or None """ if schema is None: - schema = Path(__file__).parent.joinpath("data").joinpath("ouranos_schema.yml") + schema = ( + Path(__file__).parent.joinpath("configs").joinpath("ouranos_schema.yml") + ) tree = parse_schema(facets, schema, top_folder) branch = tree[0] diff --git a/src/miranda/treatments/__init__.py b/src/miranda/treatments/__init__.py new file mode 100644 index 00000000..4b57c5d4 --- /dev/null +++ b/src/miranda/treatments/__init__.py @@ -0,0 +1,115 @@ +"""Treatments module.""" + +from __future__ import annotations + +import datetime +import logging.config + +import xarray + +from miranda import __version__ as __miranda_version__ +from miranda.scripting import LOGGING_CONFIG +from miranda.treatments._dimensions import * +from miranda.treatments._preprocessing import * +from miranda.treatments._variables import * +from miranda.treatments.utils import * +from miranda.units import get_time_frequency + +logging.config.dictConfig(LOGGING_CONFIG) +VERSION = datetime.datetime.now().strftime("%Y.%m.%d") + + +def metadata_conversion(d: xarray.Dataset, p: str, m: dict) -> xarray.Dataset: + """Update xarray dataset and data_vars with project-specific metadata fields. + + Parameters + ---------- + d : xarray.Dataset + Dataset with metadata to be updated. + p : str + Dataset project name. + m : dict + Metadata definition dictionary for project and variable(s). + + Returns + ------- + xarray.Dataset + """ + logging.info("Converting metadata to CF-like conventions.") + + header = m["Header"] + + # Static handling of version global attributes + miranda_version = header.get("_miranda_version") + if miranda_version: + if isinstance(miranda_version, bool): + header["miranda_version"] = __miranda_version__ + elif isinstance(miranda_version, dict): + if p in miranda_version.keys(): + header["miranda_version"] = __miranda_version__ + else: + msg = f"`_miranda_version` not set for project `{p}`. Not appending." + logging.warning(msg) + if "_miranda_version" in header: + del header["_miranda_version"] + + frequency = m["Header"].get("_frequency") + if frequency: + if isinstance(frequency, bool): + _, m["Header"]["frequency"] = get_time_frequency(d) + elif isinstance(frequency, dict): + if p in frequency.keys(): + m["Header"]["frequency"] = get_time_frequency(d) + else: + logging.warning("`frequency` not set for project. Not appending.") + if "_frequency" in m["Header"]: + del m["Header"]["_frequency"] + + # Conditional handling of global attributes based on project name + for field in [f for f in header.keys() if f.startswith("_")]: + if isinstance(header[field], list): + if p in header[field]: + attr_treatments = header[field][p] + else: + msg = f"Attribute handling (`{field}`) not set for project `{p}`. Continuing..." + logging.warning(msg) + continue + elif isinstance(header[field], dict): + attr_treatments = header[field] + else: + msg = f"Attribute treatment configuration for field `{field}` is not properly configured. Verify JSON." + raise AttributeError(msg) + + if field[1:] in d.attrs: + msg = f"Overwriting `{field[1:]}` based on JSON configuration." + logging.warning(msg) + if field == "_map_attrs": + for attribute, mapping in attr_treatments.items(): + header[mapping] = d.attrs[attribute] + del d.attrs[attribute] + elif field == "_remove_attrs": + for ff in attr_treatments: + del d.attrs[ff] + elif field.startswith("_") and p in attr_treatments: + header[field[1:]] = attr_treatments[p] + else: + header[field[1:]] = attr_treatments + del header[field] + + # Add global attributes + d.attrs.update(header) + d.attrs.update(dict(project=p)) + + # Date-based versioning + if not d.attrs.get("version"): + d.attrs.update(dict(version=f"v{VERSION}")) + + prev_history = d.attrs.get("history", "") + history = ( + f"[{datetime.datetime.now()}] " + "Converted variables and modified metadata for CF-like compliance: " + f"{prev_history}".strip() + ) + d.attrs.update(dict(history=history)) + + return d diff --git a/src/miranda/treatments/_dimensions.py b/src/miranda/treatments/_dimensions.py new file mode 100644 index 00000000..e5185507 --- /dev/null +++ b/src/miranda/treatments/_dimensions.py @@ -0,0 +1,249 @@ +from __future__ import annotations + +import logging +import warnings +from typing import Any + +import numpy as np +import xarray as xr +from xclim.core.calendar import parse_offset + +from miranda.treatments.utils import _get_section_entry_key, _iter_entry_key # noqa +from miranda.units import get_time_frequency + + +def find_project_variable_codes(code: str, configuration: dict[str, Any]) -> str: + """Find the variable code for a given variable name and project. + + Parameters + ---------- + code : str + Variable name. + configuration : dict + Configuration dictionary. + + Returns + ------- + str + """ + variable_codes = {} + + if "variables" not in configuration: + raise ValueError("No `variables` section found in configuration. Check JSON.") + + for variable_code in configuration["variables"]: + variable_name = configuration["variables"][variable_code].get("_variable_name") + if variable_name: + variable_codes[variable_name] = variable_code + else: + warnings.warn( + f"Variable `{variable_code}` does not have accompanying `variable_name`. " + f"Verify JSON. Continuing with `{variable_code}` as `variable_name`." + ) + variable_codes[variable_code] = variable_code + + if code in variable_codes.values(): + variable = code + else: + variable = variable_codes.get(code) + if not variable: + raise NotImplementedError(f"Variable `{code}` not supported.") + + return variable + + +def dimensions_compliance(ds: xr.Dataset, project: str, metadata: dict) -> xr.Dataset: + """Rename dimensions to CF to their equivalents and reorder them if needed. + + Parameters + ---------- + ds : xarray.Dataset + Dataset with dimensions to be updated. + project : str + Dataset project name. + metadata : dict + Metadata definition dictionary for project and variable(s). + + Returns + ------- + xarray.Dataset + """ + rename_dims = dict() + for dim in ds.dims: + if dim in metadata["dimensions"].keys(): + cf_name = _get_section_entry_key( + metadata, "dimensions", dim, "_cf_dimension_name", project + ) + if cf_name: + rename_dims[dim] = cf_name + + # Rename dimensions + _rename_dims = [str(d) for d in rename_dims.keys()] + msg = f"Renaming dimensions: {', '.join(_rename_dims)}." + logging.info(msg) + ds = ds.rename(rename_dims) + for new in ["lon", "lat"]: + if new == "lon" and "lon" in ds.coords: + if np.any(ds.lon > 180): + lon1 = ds.lon.where(ds.lon <= 180.0, ds.lon - 360.0) + ds[new] = lon1 + + coord_precision = _get_section_entry_key( + metadata, "dimensions", new, "_precision", project + ) + if coord_precision is not None: + ds[new] = ds[new].round(coord_precision) + + # Ensure that lon and lat are written in proper order for plotting purposes + logging.info("Reordering dimensions.") + transpose_order = [] + if "lat" in ds.dims and "lon" in ds.dims: + transpose_order = ["lat", "lon"] + elif "rlat" in ds.dims and "rlon" in ds.dims: + transpose_order = ["rlat", "rlon"] + if "time" in ds.dims and transpose_order: + transpose_order.insert(0, "time") + transpose_order.extend(list(set(ds.dims) - set(transpose_order))) + ds = ds.transpose(*transpose_order) + ds = ds.sortby(transpose_order) + + # Add dimension original name and update attrs + logging.info("Updating dimension attributes.") + dim_descriptions = metadata["dimensions"] + for dim in metadata["dimensions"].keys(): + cf_name = dim_descriptions[dim].get("_cf_dimension_name") + if cf_name is not None and cf_name in ds.dims: + ds[cf_name].attrs.update(dict(original_variable=dim)) + else: + # variable name already follows CF standards + cf_name = dim + for field in dim_descriptions[dim].keys(): + if not field.startswith("_"): + ds[cf_name].attrs.update({field: dim_descriptions[dim][field]}) + + prev_history = ds.attrs.get("history", "") + history = f"Transposed and renamed dimensions. {prev_history}" + ds.attrs.update(dict(history=history)) + + return ds + + +def ensure_correct_time_frequency(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: + """Ensure that time frequency is consistent with expected frequency for project.""" + key = "_ensure_correct_time" + strict_time = "_strict_time" + + if "time" not in m["dimensions"].keys(): + msg = f"No time corrections listed for project `{p}`. Continuing..." + warnings.warn(msg) + return d + + if "time" not in list(d.variables.keys()): + msg = ( + "No time dimension among data variables: " + f"{' ,'.join([str(v) for v in d.variables.keys()])}. " + "Continuing..." + ) + logging.info(msg) + return d + + if key in m["dimensions"]["time"].keys(): + freq_found = xr.infer_freq(d.time) + if strict_time in m["dimensions"]["time"].keys(): + if not freq_found: + msg = ( + "Time frequency could not be found. There may be missing timesteps." + ) + if m["dimensions"]["time"].get(strict_time): + raise ValueError(msg) + else: + warnings.warn(f"{msg} Continuing...") + return d + + correct_time_entry = m["dimensions"]["time"][key] + if isinstance(correct_time_entry, str): + correct_times = [parse_offset(correct_time_entry)[1]] + elif isinstance(correct_time_entry, dict): + correct_times = correct_time_entry.get(p) + if isinstance(correct_times, list): + correct_times = [parse_offset(t)[1] for t in correct_times] + if correct_times is None: + warnings.warn(f"No expected times set for specified project `{p}`.") + elif isinstance(correct_time_entry, list): + correct_times = correct_time_entry + else: + warnings.warn("No expected times set for family of projects.") + return d + + if freq_found not in correct_times: + error_msg = ( + f"Time frequency {freq_found} not among allowed frequencies: " + f"{', '.join(correct_times) if isinstance(correct_times, list) else correct_times}" + ) + if isinstance(correct_time_entry, dict): + error_msg = f"{error_msg} for project `{p}`." + else: + error_msg = f"{error_msg}." + raise ValueError(error_msg) + + msg = f"Resampling dataset with time frequency: {freq_found}." + logging.info(msg) + with xr.set_options(keep_attrs=True): + d_out = d.assign_coords( + time=d.time.resample(time=freq_found).mean(dim="time").time + ) + d_out.time.attrs.update(d.time.attrs) + + prev_history = d.attrs.get("history", "") + history = f"Resampled time with `freq={freq_found}`. {prev_history}" + d_out.attrs.update(dict(history=history)) + return d_out + + return d + + +def offset_time_dimension(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: + """Offset time dimension using listed frequency.""" + key = "_offset_time" + d_out = xr.Dataset(coords=d.coords, attrs=d.attrs) + converted = [] + offset, offset_meaning = None, None + + time_freq = dict() + expected_period = _get_section_entry_key( + m, "dimensions", "time", "_ensure_correct_time", p + ) + if isinstance(expected_period, str): + time_freq["expected_period"] = expected_period + + for vv, offs in _iter_entry_key(d, m, "dimensions", key, p): + if offs: + # Offset time by value of one time-step + if offset is None and offset_meaning is None: + try: + offset, offset_meaning = get_time_frequency(d, **time_freq) + except TypeError: + msg = "Unable to parse the time frequency. Verify data integrity before retrying." + logging.error(msg) + raise + + msg = f"Offsetting data for `{vv}` by `{offset[0]} {offset_meaning}(s)`." + logging.info(msg) + with xr.set_options(keep_attrs=True): + out = d[vv] + out["time"] = out.time - np.timedelta64(offset[0], offset[1]) + d_out[vv] = out + converted.append(vv) + elif offs is False: + msg = f"No time offsetting needed for `{vv}` in `{p}` (Explicitly set to False)." + logging.info(msg) + continue + prev_history = d.attrs.get("history", "") + history = f"Offset variable `{vv}` values by `{offset[0]} {offset_meaning}(s). {prev_history}" + d_out.attrs.update(dict(history=history)) + + # Copy unconverted variables + for vv in d.data_vars: + if vv not in converted: + d_out[vv] = d[vv] + return d_out diff --git a/src/miranda/treatments/_preprocessing.py b/src/miranda/treatments/_preprocessing.py new file mode 100644 index 00000000..b09cceb6 --- /dev/null +++ b/src/miranda/treatments/_preprocessing.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +from functools import partial +from pathlib import Path +from typing import Any + +import numpy as np +import xarray as xr + +from miranda.convert.utils import date_parser + + +def correct_time_entries( + ds: xr.Dataset, + split: str = "_", + location: int = -1, + field: str = "time", +) -> xr.Dataset: + """Correct time entries in dataset. + + Parameters + ---------- + ds : xarray.Dataset + split : str + location : int + field : str + + Returns + ------- + xarray.Dataset + """ + filename = ds.encoding["source"] + date = date_parser(Path(filename).stem.split(split)[location]) + vals = np.arange(len(ds[field])) + days_since = f"days since {date}" + time = xr.coding.times.decode_cf_datetime( + vals, units=days_since, calendar="standard" + ) + ds = ds.assign_coords({field: time}) + + prev_history = ds.attrs.get("history", "") + history = ( + f"Time index recalculated in preprocessing step ({days_since}). {prev_history}" + ) + ds.attrs.update(dict(history=history)) + + return ds + + +def correct_var_names( + ds: xr.Dataset, split: str = "_", location: int = 0 +) -> xr.Dataset: + """Correct variable names in dataset. + + Parameters + ---------- + ds : xarray.Dataset + split : str + location : int + + Returns + ------- + xarray.Dataset + """ + filename = ds.encoding["source"] + new_name = Path(filename).stem.split(split)[location] + old_name = list(ds.data_vars.keys())[0] + + prev_history = ds.attrs.get("history", "") + history = f"Variable renamed in preprocessing step ({old_name}: {new_name}). {prev_history}" + ds.attrs.update(dict(history=history)) + + return ds.rename({old_name: new_name}) + + +def preprocessing_corrections( + ds: xr.Dataset, configuration: dict[str, Any] +) -> xr.Dataset: + """Corrections function dispatcher to ensure minimal dataset validity on open. + + Parameters + ---------- + ds : xarray.Dataset + configuration : dict + + Returns + ------- + xarray.Dataset + """ + + def _preprocess_correct(d: xr.Dataset, *, ops: list[partial]) -> xr.Dataset: + for correction in ops: + d = correction(d) + return d + + correction_fields = configuration.get("_preprocess") + if correction_fields: + preprocess_ops = [] + for field in correction_fields: + if field == "_variable_name": + preprocess_ops.append( + partial(correct_var_names, **correction_fields[field]) + ) + if field == "_time": + preprocess_ops.append( + partial(correct_time_entries, **correction_fields[field]) + ) + if preprocess_ops: + corrector = partial(_preprocess_correct, ops=preprocess_ops) + return corrector(ds) + return ds diff --git a/src/miranda/treatments/_variables.py b/src/miranda/treatments/_variables.py new file mode 100644 index 00000000..f5695570 --- /dev/null +++ b/src/miranda/treatments/_variables.py @@ -0,0 +1,271 @@ +from __future__ import annotations + +import logging.config + +import xarray as xr +import xclim.core.units +from xclim.core import units + +from miranda.treatments.utils import _get_section_entry_key # noqa +from miranda.treatments.utils import _iter_entry_key # noqa +from miranda.units import get_time_frequency + +__all__ = [ + "cf_units_conversion", + "clip_values", + "correct_unit_names", + "invert_value_sign", + "transform_values", + "variable_conversion", +] + + +def correct_unit_names(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: + """Correct unit names.""" + key = "_corrected_units" + for var, val in _iter_entry_key(d, m, "variables", key, p): + if val: + d[var].attrs["units"] = val + prev_history = d.attrs.get("history", "") + history = ( + f"Corrected units name for variable `{var}` to `{val}`. {prev_history}" + ) + d.attrs.update(dict(history=history)) + + return d + + +# for de-accumulation or conversion to flux +def transform_values(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: + """Transform dataset values according to operation listed.""" + key = "_transformation" + d_out = xr.Dataset(coords=d.coords, attrs=d.attrs) + converted = [] + offset, offset_meaning = None, None + + time_freq = dict() + expected_period = _get_section_entry_key( + m, "dimensions", "time", "_ensure_correct_time", p + ) + if isinstance(expected_period, str): + time_freq["expected_period"] = expected_period + + for vv, trans in _iter_entry_key(d, m, "variables", key, p): + if trans: + if trans == "deaccumulate": + # Time-step accumulated total to time-based flux (de-accumulation) + if offset is None and offset_meaning is None: + try: + offset, offset_meaning = get_time_frequency(d, **time_freq) + except TypeError: + logging.error( + "Unable to parse the time frequency. Verify data integrity before retrying." + ) + raise + + msg = f"De-accumulating units for variable `{vv}`." + logging.info(msg) + with xr.set_options(keep_attrs=True): + out = d[vv].diff(dim="time") + out = d[vv].where( + getattr(d[vv].time.dt, offset_meaning) == offset[0], + out.broadcast_like(d[vv]), + ) + out = units.amount2rate(out, out_units=m["variables"][vv]["units"]) + d_out[vv] = out + converted.append(vv) + elif trans == "amount2rate": + # NOTE: This treatment is no longer needed in xclim v0.43.0+ but is kept for backwards compatibility + # frequency-based totals to time-based flux + msg = f"Performing amount-to-rate units conversion for variable `{vv}`." + logging.info(msg) + with xr.set_options(keep_attrs=True): + out = units.amount2rate( + d[vv], + out_units=m["variables"][vv]["units"], + ) + d_out[vv] = out + converted.append(vv) + elif isinstance(trans, str): + if trans.startswith("op "): + op = trans[3] + value = trans[4:].strip() + if value.startswith("attrs"): + value = units.str2pint(d[vv].attrs[value[6:]]) + else: + value = units.str2pint(value) + with xr.set_options(keep_attrs=True): + if op == "+": + value = units.convert_units_to(value, d[vv]) + d_out[vv] = d[vv] + value + elif op == "-": + value = units.convert_units_to(value, d[vv]) + d_out[vv] = d[vv] - value + elif op == "*": + d_out[vv] = units.pint_multiply(d[vv], value) + elif op == "/": + d_out[vv] = units.pint_multiply(d[vv], 1 / value) + else: + raise NotImplementedError( + f"Op transform doesn't implement the «{op}» operator." + ) + converted.append(vv) + else: + raise NotImplementedError(f"Unknown transformation: {trans}") + elif trans is False: + msg = f"No transformations needed for `{vv}` (Explicitly set to False)." + logging.info(msg) + continue + + prev_history = d.attrs.get("history", "") + history = ( + f"Transformed variable `{vv}` values using method `{trans}`. {prev_history}" + ) + d_out.attrs.update(dict(history=history)) + + # Copy unconverted variables + for vv in d.data_vars: + if vv not in converted: + d_out[vv] = d[vv] + return d_out + + +def invert_value_sign(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: + """Flip value of DataArray.""" + key = "_invert_sign" + d_out = xr.Dataset(coords=d.coords, attrs=d.attrs) + converted = [] + for vv, inv_sign in _iter_entry_key(d, m, "variables", key, p): + if inv_sign: + msg = f"Inverting sign for `{vv}` (switching direction of values)." + logging.info(msg) + with xr.set_options(keep_attrs=True): + out = d[vv] + d_out[out.name] = -out + converted.append(vv) + elif inv_sign is False: + msg = f"No sign inversion needed for `{vv}` in `{p}` (Explicitly set to False)." + logging.info(msg) + continue + prev_history = d.attrs.get("history", "") + history = f"Inverted sign for variable `{vv}` (switched direction of values). {prev_history}" + d_out.attrs.update(dict(history=history)) + + # Copy unconverted variables + for vv in d.data_vars: + if vv not in converted: + d_out[vv] = d[vv] + return d_out + + +# For converting variable units to standard workflow units +def cf_units_conversion(d: xr.Dataset, m: dict) -> xr.Dataset: + """Perform pint-based units-conversion.""" + if "time" in m["dimensions"].keys(): + if m["dimensions"]["time"].get("units"): + d["time"]["units"] = m["dimensions"]["time"]["units"] + + for vv, unit in _iter_entry_key(d, m, "variables", "units", None): + if unit: + with xr.set_options(keep_attrs=True): + d[vv] = units.convert_units_to(d[vv], unit, context="hydro") + prev_history = d.attrs.get("history", "") + history = f"Converted variable `{vv}` to CF-compliant units (`{unit}`). {prev_history}" + d.attrs.update(dict(history=history)) + + return d + + +# For clipping variable values to an established maximum/minimum +def clip_values(d: xr.Dataset, p: str, m: dict) -> xr.Dataset: + """Clip values to an appropriate range,.""" + key = "_clip_values" + d_out = xr.Dataset(coords=d.coords, attrs=d.attrs) + converted = [] + for vv in d.data_vars: + if vv in m["variables"].keys(): + clip_vals = _get_section_entry_key(m, "variables", vv, key, p) + if clip_values: + min_value, max_value = None, None + # Gather unit conversion context, if applicable + context = clip_vals.get("context", None) + for op, value in clip_vals.items(): + if op == "min": + min_value = xclim.core.units.convert_units_to( + value, d[vv], context + ) + if op == "max": + max_value = xclim.core.units.convert_units_to( + value, d[vv], context + ) + msg = f"Clipping min/max values for `{vv}` ({min_value}/{max_value})." + logging.info(msg) + with xr.set_options(keep_attrs=True): + out = d[vv] + d_out[out.name] = out.clip(min_value, max_value) + converted.append(vv) + elif clip_values is False: + msg = f"No clipping of values needed for `{vv}` in `{p}` (Explicitly set to False)." + logging.info(msg) + continue + else: + msg = f"Unknown clipping values for `{vv}` in `{p}`." + logging.info(msg) + continue + + prev_history = d.attrs.get("history", "") + history = f"Clipped variable `{vv}` with `min={min_value}` and `max={max_value}`. {prev_history}" + d_out.attrs.update(dict(history=history)) + + # Copy unconverted variables + for vv in d.data_vars: + if vv not in converted: + d_out[vv] = d[vv] + + return d_out + + +# For renaming and reordering lat and lon dims + + +def variable_conversion(d: xr.Dataset, p: str | None, m: dict) -> xr.Dataset: + """Add variable metadata and remove nonstandard entries. + + Parameters + ---------- + d : xarray.Dataset + Dataset with variable(s) to be updated. + p : str + Dataset project name. + m : dict + Metadata definition dictionary for project and variable(s). + + Returns + ------- + xarray.Dataset + """ + var_descriptions = m["variables"] + var_correction_fields = [ + "_clip_values", + "_corrected_units", + "_invert_sign", + "_offset_time", + "_transformation", + ] + for var in d.variables: + if var in var_descriptions.keys(): + for field in var_correction_fields: + if field in var_descriptions[var].keys(): + del var_descriptions[var][field] + d[var].attrs.update(var_descriptions[var]) + + # Rename data variables + for orig_var_name, cf_name in _iter_entry_key( + d, m, "variables", "_cf_variable_name", p + ): + if cf_name is not None: + d = d.rename({orig_var_name: cf_name}) + d[cf_name].attrs.update(dict(original_variable=orig_var_name)) + del d[cf_name].attrs["_cf_variable_name"] + + return d diff --git a/src/miranda/treatments/utils.py b/src/miranda/treatments/utils.py new file mode 100644 index 00000000..4ee19470 --- /dev/null +++ b/src/miranda/treatments/utils.py @@ -0,0 +1,114 @@ +"""Utility functions for GIS operations.""" + +from __future__ import annotations + +import inspect +import json +from pathlib import Path +from typing import Any + +__all__ = [ + "load_json_data_mappings", +] + + +def _get_section_entry_key( + meta: dict, entry: str, var: str, key: str, project: str +) -> Any: + """ + Get a specific key from a section of the metadata. + + Parameters + ---------- + meta : dict + The metadata dictionary. + entry : str + The entry to look for. + var : str + The variable to look for. + key : str + The key to look for. + project : str + The project name. + + Returns + ------- + Any + The value of the key. + """ + var_meta = meta[entry].get(var, {}) + if key in var_meta: + if isinstance(var_meta[key], dict): + config = var_meta[key].get(project) + if config is None and "all" in var_meta[key].keys(): + config = var_meta[key].get("all") + return config + return var_meta[key] + return None + + +def _iter_entry_key(ds, meta, entry, key, project) -> tuple[str, Any]: + """ + Iterate through entry keys. + + Parameters + ---------- + ds : xr.Dataset + The dataset. + meta : dict + The metadata dictionary. + entry : str + The entry to look for. + key : str + The key to look for. + project : str + The project name. + + Yields + ------ + tuple[str, Any] + The variable and value. + """ + for vv in set(ds.data_vars).intersection(meta[entry]): + val = _get_section_entry_key(meta, entry, vv, key, project) + yield vv, val + + +def load_json_data_mappings( + project: str, configurations: dict[str, Path] | None = None +) -> dict[str, Any]: + """ + Load JSON mappings for supported dataset conversions. + + Parameters + ---------- + project : str + The project name. + configurations : dict, optional + Configuration files for the project. + If not provided, the function will try to find the configuration files in the `configs` folder. + + Returns + ------- + dict[str, Any] + The metadata definition. + """ + if configurations is None: + calling_frame = inspect.currentframe().f_back + calling_file_path = calling_frame.f_globals["__file__"] + config_folder = Path(calling_file_path).parent / "configs" + + configurations = {} + for configuration in config_folder.glob("*attrs.json"): + project_config = str(configuration.stem).split("_")[0] + if "|" in project: + for p in project_config.split("|"): + configurations[p] = configuration + configurations[project_config] = configuration + + if project in configurations.keys(): + config_file = configurations[project] + metadata_definition = json.load(config_file.open()) + return metadata_definition + else: + raise NotImplementedError(f"Project not supported: {project}") diff --git a/src/miranda/validators.py b/src/miranda/validators.py index f14d222f..d01e78fe 100644 --- a/src/miranda/validators.py +++ b/src/miranda/validators.py @@ -9,12 +9,12 @@ from pandas._libs.tslibs import NaTType # noqa from schema import Literal, Optional, Or, Regex, Schema -from .cv import VALIDATION_ENABLED +from miranda.cv import VALIDATION_ENABLED __all__ = ["url_validate"] if VALIDATION_ENABLED: - from .cv import ( + from miranda.cv import ( ACTIVITIES, BIAS_ADJUST_INSTITUTIONS, DRIVING_MODELS, diff --git a/src/miranda/vocabularies/__init__.py b/src/miranda/vocabularies/__init__.py new file mode 100644 index 00000000..8d108418 --- /dev/null +++ b/src/miranda/vocabularies/__init__.py @@ -0,0 +1,5 @@ +"""Controlled Vocabulary module.""" + +from __future__ import annotations + +from . import eccc diff --git a/src/miranda/vocabularies/eccc.py b/src/miranda/vocabularies/eccc.py new file mode 100644 index 00000000..f668ec63 --- /dev/null +++ b/src/miranda/vocabularies/eccc.py @@ -0,0 +1,95 @@ +"""Definition lists of variables from ECCC for each type of archive.""" + +# For more information see the ECCC Technical Documentation + +__all__ = [ + "obs_groupings", + "obs_vocabularies", +] + +obs_vocabularies = dict() + +# Hourly Data + +obs_vocabularies["HLY01"] = [] +obs_vocabularies["HLY01"].extend(list(range(71, 123))) # Hourly variables +obs_vocabularies["HLY01"].extend([209, 210]) # Wind character and gust speed +obs_vocabularies["HLY01"].extend(list(range(219, 231))) # Cloud layers +obs_vocabularies["HLY01"].append(244) # Precipitation type +obs_vocabularies["HLY01"].append(260) # Freezing fog + +obs_vocabularies["HLY01_RCS"] = obs_vocabularies["HLY01"].copy() +obs_vocabularies["HLY01_RCS"].extend( + list(range(262, 281)) +) # Reference Climate Surface (RCS) weather stations + +obs_vocabularies["HLY03"] = [] +obs_vocabularies["HLY03"].extend(list(range(123, 133))) # Hourly rainfall +obs_vocabularies["HLY03"].extend([160, 161]) + +obs_vocabularies["HLY10"] = [] +obs_vocabularies["HLY10"].extend(list(range(61, 69))) # Sunshine +obs_vocabularies["HLY10"].extend([133, 169, 170, 171, 172]) # Solar radiation + +obs_vocabularies["HLY15"] = [69, 70, 76, 156] # Wind + +obs_vocabularies["HLY21"] = [123] # Fischer/Porter precipitation + +# Daily Data + +obs_vocabularies["DLY02"] = [] +obs_vocabularies["DLY02"].extend(list(range(1, 26))) # Daily variables +obs_vocabularies["DLY02"].append(157) # Direction of extreme gust +obs_vocabularies["DLY02"].append(179) # Daily bright sunshine + +obs_vocabularies["DLY03"] = [] +obs_vocabularies["DLY03"].extend(list(range(124, 133))) +obs_vocabularies["DLY03"].extend([160, 161]) + +obs_vocabularies["DLY04"] = obs_vocabularies["DLY02"].copy() + +obs_vocabularies["DLY12"] = [] +obs_vocabularies["DLY12"].extend(list(range(134, 151))) # Soil temperatures + +obs_vocabularies["DLY13"] = list(range(151, 156)) # Pan evaporation + +obs_vocabularies["DLY21"] = [12] # Precipitation +obs_vocabularies["DLY21"].extend(list(range(127, 133))) # Precipitation over time +obs_vocabularies["DLY21"].append(161) # Most precipitation in 25 hours + +obs_vocabularies["DLY44"] = [] +obs_vocabularies["DLY44"].extend([1, 2, 3]) # Temperature +obs_vocabularies["DLY44"].extend(list(range(10, 18))) # Precipitation + +# Monthly data + +obs_vocabularies["MLY04"] = [] +obs_vocabularies["MLY04"].extend(list(range(26, 39))) # Days with variables +obs_vocabularies["MLY04"].extend(list(range(39, 61))) # Means of variables +obs_vocabularies["MLY04"].append(158) # Direction of extreme gust + +# Groupings + +obs_groupings = dict() +obs_groupings["HLY"] = list( + set( + obs_vocabularies["HLY01"] + + obs_vocabularies["HLY01_RCS"] + + obs_vocabularies["HLY03"] + + obs_vocabularies["HLY10"] + + obs_vocabularies["HLY15"] + + obs_vocabularies["HLY21"] + ) +) +obs_groupings["DLY"] = list( + set( + obs_vocabularies["DLY02"] + + obs_vocabularies["DLY03"] + + obs_vocabularies["DLY04"] + + obs_vocabularies["DLY12"] + + obs_vocabularies["DLY13"] + + obs_vocabularies["DLY21"] + + obs_vocabularies["DLY44"] + ) +) +obs_groupings["MLY"] = list(set(obs_vocabularies["MLY04"])) diff --git a/templates/eccc-ahccd_preprocess.py b/templates/eccc-ahccd_preprocess.py new file mode 100644 index 00000000..27a88072 --- /dev/null +++ b/templates/eccc-ahccd_preprocess.py @@ -0,0 +1,10 @@ +from pathlib import Path + +from miranda.preprocess import convert_ahccd, merge_ahccd + +in_files = Path("~/Desktop/ec_data/ahccd").expanduser() +output = Path().cwd().parent / "test" +variable = "tas" + +convert_ahccd(in_files, output, variable, generation=3) +merge_ahccd(output.joinpath("tas"), output.joinpath("merged"), variable, overwrite=True) diff --git a/templates/eccc-obs_preprocess.py b/templates/eccc-obs_preprocess.py new file mode 100644 index 00000000..e69de29b diff --git a/templates/eccc_ahccd_conversion.py b/templates/eccc_ahccd_conversion.py deleted file mode 100644 index e29dd643..00000000 --- a/templates/eccc_ahccd_conversion.py +++ /dev/null @@ -1,28 +0,0 @@ -from os import getenv -from pathlib import Path - -from miranda.eccc import convert_ahccd - -if __name__ == "__main__": - in_files = getenv("in") - out_files = getenv("out") - - source_files = Path(in_files) - output_path = Path(out_files) - - source_var_gens = { - "Generation3/Homog_daily_mean_temp_v2019/": ("tas", 3), - "Generation3/Homog_daily_max_temp_v2019/": ("tasmax", 3), - "Generation3/Homog_daily_min_temp_v2019/": ("tasmin", 3), - "Generation2/Adj_Daily_Total_v2017/": ("pr", 2), - "Generation2/Adj_Daily_Snow_v2017/": ("prsn", 2), - "Generation2/Adj_Daily_Rain_v2017/": ("prlp", 2), - } - - for folder, (variable, generation) in source_var_gens.items(): - convert_ahccd( - source_files.expanduser().joinpath(folder), - output_path, - variable, - generation, - ) diff --git a/templates/eccc_raw_daily_conversion.py b/templates/eccc_raw_daily_conversion.py index 4fb64de7..583c8d10 100644 --- a/templates/eccc_raw_daily_conversion.py +++ b/templates/eccc_raw_daily_conversion.py @@ -11,25 +11,25 @@ time_step = "daily" n_workers = 3 var_codes = [ - 1, - 2, - 3, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, + # 1, + # 2, + # 3, + # 10, + # 11, + # 12, + # 13, + # 14, + # 15, + # 16, + # 17, + # 18, + # 19, + # 20, + # 21, + # 22, 23, 24, - 25, + # 25, ] in_files = getenv("in") diff --git a/templates/eccc_raw_hourly_conversion.py b/templates/eccc_raw_hourly_conversion.py index 68a24405..303d3d10 100644 --- a/templates/eccc_raw_hourly_conversion.py +++ b/templates/eccc_raw_hourly_conversion.py @@ -1,8 +1,8 @@ from os import getenv from pathlib import Path -from miranda.eccc import ( - aggregate_stations, +from miranda.preprocess._eccc_obs import ( + merge_stations, convert_flat_files, merge_converted_variables, ) @@ -11,39 +11,41 @@ time_step = "hourly" n_workers = 3 var_codes = [ - 76, - 77, - 78, - 79, - 80, - 89, - 94, - 107, - 108, - 109, - 110, - 123, - 133, - 156, - 262, - 263, - 264, - 265, - 266, - 267, - 268, - 269, - 270, - 271, - 272, - 273, - 274, - 275, - 276, - 277, - 278, - 279, - 280, + 209, + 210, + # 76, + # 77, + # 78, + # 79, + # 80, + # 89, + # 94, + # 107, + # 108, + # 109, + # 110, + # 123, + # 133, + # 156, + # 262, + # 263, + # 264, + # 265, + # 266, + # 267, + # 268, + # 269, + # 270, + # 271, + # 272, + # 273, + # 274, + # 275, + # 276, + # 277, + # 278, + # 279, + # 280, ] in_files = getenv("in") @@ -74,7 +76,7 @@ n_workers=n_workers, ) - aggregate_stations( + merge_stations( source_files=merged, output_folder=final, time_step=time_step, diff --git a/templates/eccc_rdrs_processing.py b/templates/eccc_rdrs_processing.py index 1d68da11..deecc494 100644 --- a/templates/eccc_rdrs_processing.py +++ b/templates/eccc_rdrs_processing.py @@ -1,7 +1,7 @@ import logging from pathlib import Path -from miranda.convert.eccc_rdrs import convert_rdrs, rdrs_to_daily +from miranda.preprocess.eccc_rdrs import convert_rdrs, rdrs_to_daily from miranda.io import concat_rechunk_zarr diff --git a/templates/emdna_processing.py b/templates/emdna_processing.py index 194526fc..3ee9eb07 100644 --- a/templates/emdna_processing.py +++ b/templates/emdna_processing.py @@ -4,6 +4,7 @@ from dask.diagnostics import ProgressBar +import miranda.convert.corrections from miranda import convert, io, structure @@ -23,7 +24,7 @@ def main(): files_by_member = convert.gather_emdna(path) for member, files in files_by_member.items(): if member == "OI": - ds = convert.dataset_conversion( + ds = miranda.convert.corrections.dataset_conversion( files, project="EMDNA", preprocess=preprocess_dna ) diff --git a/templates/era5-land_reanalysis_processing.py b/templates/era5-land_reanalysis_processing.py index c58aa430..3fc27945 100644 --- a/templates/era5-land_reanalysis_processing.py +++ b/templates/era5-land_reanalysis_processing.py @@ -1,5 +1,6 @@ from pathlib import Path +import miranda.convert.corrections from miranda import convert, io @@ -7,7 +8,7 @@ def main(): path_era5_land_out = Path("~/Desktop").expanduser() era5_land_files = convert.gather_ecmwf("era5-land", path_era5_land_out) - ds = convert.dataset_conversion( + ds = miranda.convert.corrections.dataset_conversion( era5_land_files, project="era5-land-monthly-means", ) diff --git a/templates/espo-g6.py b/templates/espo-g6.py index ac43168f..ea9a3e4d 100644 --- a/templates/espo-g6.py +++ b/templates/espo-g6.py @@ -4,6 +4,7 @@ from dask.diagnostics import ProgressBar +import miranda.convert.corrections from miranda import convert, io, structure from miranda.decode import Decoder @@ -42,7 +43,7 @@ def main(): ) if not os.path.exists(new_path): # and path not in skip: # open as dataset - ds = convert.dataset_conversion( + ds = miranda.convert.corrections.dataset_conversion( [f], add_version_hashes=False, project=project, diff --git a/templates/nasa_nex-gddp-cmip6_processing.py b/templates/nasa_nex-gddp-cmip6_processing.py index 78f51687..3fb7572b 100644 --- a/templates/nasa_nex-gddp-cmip6_processing.py +++ b/templates/nasa_nex-gddp-cmip6_processing.py @@ -1,5 +1,6 @@ from pathlib import Path +import miranda.convert.corrections from miranda import convert, io @@ -10,7 +11,7 @@ def main(): for path, list_files in nex_files.items(): # open as dataset - ds = convert.dataset_conversion( + ds = miranda.convert.corrections.dataset_conversion( list_files, add_version_hashes=False, project="NEX-GDDP-CMIP6", diff --git a/templates/restructure_datasets.py b/templates/restructure_datasets.py index d10fa8dc..f0d45ee9 100644 --- a/templates/restructure_datasets.py +++ b/templates/restructure_datasets.py @@ -17,5 +17,5 @@ guess=False, method="copy", make_dirs=True, - filename_pattern="*.zarr", + suffix="zarr", ) diff --git a/tests/test_utils.py b/tests/test_utils.py index c525f821..c6a529bb 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,9 +4,9 @@ from datetime import date from pathlib import Path -import pytest # noqa +import pytest -import miranda.eccc._utils as eccc_utils # noqa +import miranda.preprocess._metadata as metadata import miranda.utils @@ -28,12 +28,13 @@ def test_hourly_cf_dictionaries(self): codes = list() variables = dict() for key in keys: - variables[key] = eccc_utils.cf_station_metadata(key) - codes.append(variables[key]["standard_name"]) - if variables[key]["standard_name"] == "dry_bulb_temperature": - assert variables[key]["raw_units"] == "degC" - assert variables[key]["units"] == "K" - assert variables[key]["missing_flags"] == "M" + variables[key] = metadata.eccc_variable_metadata(key, "eccc-obs") + var_name = next(iter(variables[key]["metadata"])) + var_metadata = variables[key]["metadata"][var_name] + codes.append(var_metadata["standard_name"]) + if var_metadata["standard_name"] == "dry_bulb_temperature": + assert var_metadata["units"] == "degC" + assert var_metadata["missing_flags"] == "M" assert set(codes) == { "wind_speed_u2a", @@ -57,15 +58,17 @@ def test_daily_cf_dictionaries(self): codes = list() variables = dict() for key in keys: - variables[key] = eccc_utils.cf_station_metadata(key) - codes.append(variables[key]["standard_name"]) - if variables[key]["standard_name"].startswith("air_temperature"): - assert variables[key]["raw_units"] == "degC" - assert variables[key]["units"] == "K" - elif variables[key]["standard_name"].endswith("precipitation_amount"): - assert variables[key]["raw_units"] in ["cm", "mm"] - assert variables[key]["units"] == "m" - assert variables[key]["missing_flags"] == "M" + variables[key] = metadata.eccc_variable_metadata(key, "eccc-obs") + + var_name = next(iter(variables[key]["metadata"])) + var_metadata = variables[key]["metadata"][var_name] + codes.append(var_metadata["standard_name"]) + + if var_name.startswith("air_temperature"): + assert var_metadata["units"] == "degC" + elif var_name.endswith("precipitation_amount"): + assert var_metadata["units"] in ["cm", "mm"] + assert var_metadata["missing_flags"] == "M" assert set(codes) == { "air_temperature", diff --git a/tox.ini b/tox.ini index 67a06e39..6bac4077 100644 --- a/tox.ini +++ b/tox.ini @@ -1,12 +1,12 @@ [tox] -min_version = 4.18.0 +min_version = 4.23.2 envlist = lint py{39,310,311,312,313} docs requires = flit >= 3.9.0,<4.0 - pip >= 24.2.0 + pip >= 24.3.1 opts = --verbose @@ -21,12 +21,12 @@ python = [testenv:lint] skip_install = True deps = - black ==24.8.0 + black ==24.10.0 blackdoc ==0.3.9 isort ==5.13.2 flake8 >=7.1.1 flake8-rst-docstrings >=0.3.0 - ruff >=0.5.7 + ruff >=0.8.2 numpydoc >=1.8.0 commands_pre = pip list @@ -41,7 +41,8 @@ extras = commands = mkdir {envtmpdir}/.esdoc git clone https://github.com/ES-DOC/pyessv-archive.git {envtmpdir}/.esdoc/pyessv-archive - make docs + make autodoc + make --directory=docs clean html allowlist_externals = git make