diff --git a/.cruft.json b/.cruft.json
index 4c8ccbea..d1d98051 100644
--- a/.cruft.json
+++ b/.cruft.json
@@ -1,6 +1,6 @@
 {
   "template": "https://github.com/Ouranosinc/cookiecutter-pypackage",
-  "commit": "1d9ee5f08d3e8e4f78a4aabb75e2ce4eff8750bf",
+  "commit": "63f44fcbfe2e16118a4fa6b09fe847aa44e0715a",
   "checkout": null,
   "context": {
     "cookiecutter": {
diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml
index 1298f336..aa1e1a86 100644
--- a/.github/workflows/bump-version.yml
+++ b/.github/workflows/bump-version.yml
@@ -56,7 +56,7 @@ jobs:
             github.com:443
             pypi.org:443
       - name: Checkout Repository (no persist-credentials)
-        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
           persist-credentials: false
           fetch-depth: 0
@@ -68,23 +68,20 @@ jobs:
         run: |
           git config --local user.email "bumpversion[bot]@ouranos.ca"
           git config --local user.name "bumpversion[bot]"
-      - name: Current Version
-        run: |
-          CURRENT_VERSION="$(grep -E '__version__' src/miranda/__init__.py | cut -d ' ' -f3)"
-          echo "CURRENT_VERSION=${CURRENT_VERSION}" >> $GITHUB_ENV
       - name: Install CI libraries
         run: |
           python -m pip install --require-hashes -r CI/requirements_ci.txt
       - name: Conditional Bump Version
         run: |
-          if [[ ${{ env.CURRENT_VERSION }} =~ -dev(\.\d+)? ]]; then
+          CURRENT_VERSION=$(bump-my-version show current_version)
+          if [[ ${CURRENT_VERSION} =~ -dev(\.\d+)? ]]; then
             echo "Development version (ends in 'dev(\.\d+)?'), bumping 'build' version"
             bump-my-version bump build
           else
             echo "Version is stable, bumping 'patch' version"
             bump-my-version bump patch
           fi
-          bump-my-version show-bump
+          echo "new_version=$(bump-my-version show current_version)"
       - name: Push Changes
         uses: ad-m/github-push-action@d91a481090679876dfc4178fef17f286781251df # v0.8.0
         with:
diff --git a/.github/workflows/cache-cleaner.yml b/.github/workflows/cache-cleaner.yml
index 825fa33f..3b6d68ec 100644
--- a/.github/workflows/cache-cleaner.yml
+++ b/.github/workflows/cache-cleaner.yml
@@ -26,7 +26,9 @@ jobs:
             objects.githubusercontent.com:443
 
       - name: Checkout Repository
-        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
 
       - name: Cleanup
         run: |
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index f8af451a..40b47c3c 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -13,7 +13,8 @@ name: "CodeQL"
 
 on:
   push:
-    branches: [ "main" ]
+    branches:
+      - main
     paths-ignore:
       - ../../CHANGELOG.rst
       - pyproject.toml
@@ -43,6 +44,8 @@ jobs:
     steps:
     - name: Checkout repository
       uses: actions/checkout@v4
+      with:
+        persist-credentials: false
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml
index 32eaf0ca..b5f9acea 100644
--- a/.github/workflows/dependency-review.yml
+++ b/.github/workflows/dependency-review.yml
@@ -28,7 +28,9 @@ jobs:
             github.com:443
 
       - name: Checkout Repository
-        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
 
       - name: Dependency Review
         uses: actions/dependency-review-action@3b139cfc5fae8b618d3eae3675e383bb1769c019 # v4.5.0
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 8d173938..86ddc6b8 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -38,7 +38,9 @@ jobs:
         with:
           egress-policy: audit
       - name: Checkout Repository
-        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
       - name: Set up Python${{ matrix.python-version }}
         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
         with:
@@ -60,14 +62,21 @@ jobs:
     strategy:
       matrix:
         os: [ 'ubuntu-latest' ]
-        python-version: [ "3.9", "3.10", "3.11", "3.12" ] # "3.13"
+        python-version:
+          - "3.9"
+          - "3.10"
+          - "3.11"
+          - "3.12"
+          # - "3.13"
     steps:
       - name: Harden Runner
         uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
         with:
           egress-policy: audit
       - name: Checkout Repository
-        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
         with:
@@ -102,7 +111,12 @@ jobs:
     strategy:
       matrix:
         os: [ 'ubuntu-latest' ]
-        python-version: [ "3.9", "3.10", "3.11", "3.12" ]
+        python-version:
+          - "3.9"
+          - "3.10"
+          - "3.11"
+          - "3.12"
+          # - "3.13"
     defaults:
       run:
         shell: bash -l {0}
@@ -112,7 +126,9 @@ jobs:
         with:
           egress-policy: audit
       - name: Checkout Repository
-        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
       - name: Setup Conda (Micromamba) with Python${{ matrix.python-version }}
         uses: mamba-org/setup-micromamba@068f1ab4b37ed9b3d9f73da7db90a0cda0a48d29 # v2.0.3
         with:
@@ -120,7 +136,6 @@ jobs:
           environment-file: environment-dev.yml
           create-args: >-
             python=${{ matrix.python-version }}
-          micromamba-version: 1.5.10-0 # Pin micromamba version because of following issue: https://github.com/mamba-org/setup-micromamba/issues/225
       - name: Install miranda
         run: |
           python -m pip install --no-deps .
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 235d064d..b9eab553 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -28,7 +28,9 @@ jobs:
             pypi.org:443
             upload.pypi.org:443
       - name: Checkout Repository
-        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
       - name: Set up Python3
         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
         with:
diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
index 2ca0fdb0..2ed6d774 100644
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -16,7 +16,9 @@ on:
       - main
 
 # Declare default permissions as read only.
-permissions: read-all
+# Read-all permission is not technically needed for this workflow.
+permissions:
+  contents: read
 
 jobs:
   analysis:
@@ -47,7 +49,7 @@ jobs:
             www.bestpractices.dev:443
 
       - name: Checkout Repository
-        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
           persist-credentials: false
 
diff --git a/.github/workflows/tag-testpypi.yml b/.github/workflows/tag-testpypi.yml
index 1fd111bb..0bf53932 100644
--- a/.github/workflows/tag-testpypi.yml
+++ b/.github/workflows/tag-testpypi.yml
@@ -21,7 +21,9 @@ jobs:
         with:
           egress-policy: audit
       - name: Checkout Repository
-        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
       - name: Create Release
         uses: softprops/action-gh-release@7b4da11513bf3f43f9999e90eabced41ab8bb048 # 2.2.0
         env:
@@ -52,7 +54,9 @@ jobs:
             pypi.org:443
             test.pypi.org:443
       - name: Checkout Repository
-        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
       - name: Set up Python3
         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
         with:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 35300cf7..64517d23 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ repos:
     rev: v3.19.0
     hooks:
       - id: pyupgrade
-        args: [ '--py38-plus' ]
+        args: [ '--py39-plus' ]
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v5.0.0
     hooks:
@@ -41,7 +41,7 @@ repos:
     hooks:
       - id: isort
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.1
+    rev: v0.8.2
     hooks:
       - id: ruff
         args: [ '--fix' ]
@@ -78,6 +78,11 @@ repos:
     hooks:
       - id: check-github-workflows
       - id: check-readthedocs
+  - repo: https://github.com/woodruffw/zizmor-pre-commit
+    rev: v0.8.0
+    hooks:
+      - id: zizmor
+        args: [ '--config=.zizmor.yml' ]
   - repo: meta
     hooks:
       - id: check-hooks-apply
diff --git a/.zizmor.yml b/.zizmor.yml
new file mode 100644
index 00000000..6ac32154
--- /dev/null
+++ b/.zizmor.yml
@@ -0,0 +1,6 @@
+rules:
+  dangerous-triggers:
+    ignore:
+      - label.yml:9
+      - first-pull-request.yml:3
+      - workflow-warning.yml:3
diff --git a/CI/requirements_ci.in b/CI/requirements_ci.in
index 6c0f500d..291e299f 100644
--- a/CI/requirements_ci.in
+++ b/CI/requirements_ci.in
@@ -1,6 +1,6 @@
-bump-my-version==0.27.0
+bump-my-version==0.28.0
 coveralls==4.0.1
 pip==24.3.1
 flit==3.9.0
 tox==4.23.2
-tox-gh==1.3.2
+tox-gh==1.4.4
diff --git a/CI/requirements_ci.txt b/CI/requirements_ci.txt
index 1ecaf346..9d4c7f7d 100644
--- a/CI/requirements_ci.txt
+++ b/CI/requirements_ci.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.9
 # by the following command:
 #
 #    pip-compile --generate-hashes --output-file=CI/requirements_ci.txt CI/requirements_ci.in
@@ -12,9 +12,9 @@ bracex==2.4 \
     --hash=sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb \
     --hash=sha256:efdc71eff95eaff5e0f8cfebe7d01adf2c8637c8c92edaf63ef348c241a82418
     # via wcmatch
-bump-my-version==0.27.0 \
-    --hash=sha256:483c517af91559644d45036648e5d99f4f8c85f8d01394097d3d3e42c9e6acad \
-    --hash=sha256:911bfaf7d847d4348844c8fd16f7a11322233fb8dc90123f638069a369003642
+bump-my-version==0.28.0 \
+    --hash=sha256:cc84ace477022a4cc8c401ef5c035f2f752df45488be90ccb764a47f7de0e395 \
+    --hash=sha256:ff3cb51bb15509ae8ebb8e8efa3eaa7c02209677f45457c8b007ef2f5bef7179
     # via -r CI/requirements_ci.in
 cachetools==5.5.0 \
     --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \
@@ -399,27 +399,25 @@ tox==4.23.2 \
     # via
     #   -r CI/requirements_ci.in
     #   tox-gh
-tox-gh==1.3.2 \
-    --hash=sha256:beb8d277d5d7c1a1f09c107e4ef80bd7dd2f8f5d020edfaf4c1e3ae8fd45bf6f \
-    --hash=sha256:c2d6e977f66712e7cd5e5d1b655a1bd4c91ebaf3be104befdb53c81587292d7e
+tox-gh==1.4.4 \
+    --hash=sha256:4ea585f66585b90f5826b1677cfc9453747792a0f9ff83d468603bc17556e07b \
+    --hash=sha256:b962e0f8c4619e98d11c2a135939876691e148b843b7dac4cff7de1dc4f7c215
     # via -r CI/requirements_ci.in
 typing-extensions==4.12.2 \
     --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \
     --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8
     # via
-    #   annotated-types
     #   pydantic
     #   pydantic-core
-    #   rich
     #   rich-click
     #   tox
 urllib3==2.2.2 \
     --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \
     --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168
     # via requests
-virtualenv==20.26.6 \
-    --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \
-    --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2
+virtualenv==20.27.1 \
+    --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \
+    --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4
     # via tox
 wcmatch==8.5.2 \
     --hash=sha256:17d3ad3758f9d0b5b4dedc770b65420d4dac62e680229c287bf24c9db856a478 \
diff --git a/environment-dev.yml b/environment-dev.yml
index f17af588..45b1a76c 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -33,20 +33,20 @@ dependencies:
   - xesmf
   - zarr
   # Dev tools and testing
-  - pip >=24.2.0
-  - bump-my-version >=0.25.1
-  - watchdog >=4.0.0
+  - pip >=24.3.1
+  - black ==24.10.0
+  - blackdoc ==0.3.9
+  - bump-my-version >=0.28.0
+  - coverage >=7.5.0
+  - coveralls >=4.0.1
   - flake8 >=7.1.1
   - flake8-rst-docstrings >=0.3.0
   - flit >=3.9.0,<4.0
-  - tox >=4.17.1
-  - coverage >=7.5.0
-  - coveralls >=4.0.1
-  - pytest >=8.3.2
-  - pytest-cov >=5.0.0
-  - black ==24.8.0
-  - blackdoc ==0.3.9
   - isort ==5.13.2
   - numpydoc >=1.8.0
   - pre-commit >=3.5.0
-  - ruff >=0.5.7
+  - pytest >=8.3.2
+  - pytest-cov >=5.0.0
+  - ruff >=0.8.2
+  - tox >=4.23.2
+  - watchdog >=4.0.0
diff --git a/environment-docs.yml b/environment-docs.yml
index f8442cd8..64ed695a 100644
--- a/environment-docs.yml
+++ b/environment-docs.yml
@@ -3,16 +3,16 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - python >=3.12,<3.13
-  - sphinx >=7.0.0
-  - pandoc
-  - furo >=2023.07.26
-  - ipython
+  - python >=3.9,<3.13
   - ipykernel
+  - ipython
   - nbsphinx
+  # Docs
+  - furo >=2023.07.26
+  - pandoc
+  - sphinx >=7.1.0
   - sphinx-autoapi
   - sphinx-codeautolink
   - sphinx-copybutton
   - sphinx-intl
   - sphinx-mdinclude
-  - sphinxcontrib-napoleon
diff --git a/pyproject.toml b/pyproject.toml
index 702825e2..e1cb9d4e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ maintainers = [
   {name = "Pascal Bourgault", email = "bourgault.pascal@ouranos.ca"}
 ]
 readme = {file = "README.rst", content-type = "text/x-rst"}
-requires-python = ">=3.8.0"
+requires-python = ">=3.9.0"
 keywords = ["xarray", "climate", "meteorology", "hydrology", "archiving", "collection", "conversion", "miranda"]
 license = {file = "LICENSE"}
 classifiers = [
@@ -52,39 +52,37 @@ dependencies = [
 [project.optional-dependencies]
 dev = [
   # Dev tools and testing
-  "pip >=24.2.0",
-  "bump-my-version >=0.26.0",
-  "watchdog >=4.0.0",
+  "black ==24.10.0",
+  "blackdoc ==0.3.9",
+  "bump-my-version >=0.28.0",
+  "coverage >=7.5.0",
+  "coveralls >=4.0.1",
   "flake8 >=7.1.1",
   "flake8-rst-docstrings >=0.3.0",
   "flit >=3.9.0,<4.0",
-  "tox >=4.18.0",
-  "coverage >=7.5.0",
-  "coveralls >=4.0.1",
+  "isort ==5.13.2",
   "mypy",
   "numpydoc >=1.8.0",
+  "pip >=24.3.1",
+  "pre-commit >=3.5.0",
   "pytest >=8.3.2",
   "pytest-cov >=5.0.0",
-  "black ==24.8.0",
-  "blackdoc ==0.3.9",
-  "isort ==5.13.2",
-  "ruff >=0.5.7",
-  "pre-commit >=3.5.0"
+  "ruff >=0.8.2",
+  "tox >=4.23.2",
+  "watchdog >=4.0.0"
 ]
 docs = [
   # Documentation and examples
+  "furo >=2023.07.26",
+  "ipykernel",
+  "ipython",
+  "jupyter_client",
+  "nbsphinx",
   "sphinx >=7.0.0",
-  "sphinx_codeautolink",
-  "sphinx_copybutton",
   "sphinx-intl",
   "sphinx-mdinclude",
-  "sphinxcontrib-napoleon",
-  "nbsphinx",
-  "pandoc",
-  "ipython",
-  "ipykernel",
-  "jupyter_client",
-  "furo >=2023.07.26"
+  "sphinx_codeautolink",
+  "sphinx_copybutton"
 ]
 gis = [
   # GIS library support
@@ -234,6 +232,7 @@ exclude = [
   ".pre-commit-config.yaml",
   ".readthedocs.yml",
   ".yamllint.yaml",
+  ".zizmor.yml",
   "docs/_*",
   "docs/apidoc/modules.rst",
   "docs/apidoc/miranda*.rst",
diff --git a/src/miranda/__init__.py b/src/miranda/__init__.py
index 955ef0e8..1a36d24e 100644
--- a/src/miranda/__init__.py
+++ b/src/miranda/__init__.py
@@ -3,7 +3,7 @@
 ###################################################################################
 # Apache Software License 2.0
 #
-# Copyright (c) 2019-2024, Trevor James Smith
+# Copyright (c) 2019-2025, Trevor James Smith
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,10 +31,12 @@
     cv,
     decode,
     io,
+    preprocess,
     scripting,
     structure,
     units,
     utils,
     validators,
+    vocabularies,
 )
 from .storage import FileMeta, StorageState
diff --git a/src/miranda/archive/_groupings.py b/src/miranda/archive/_groupings.py
index 1881427e..d542ced1 100644
--- a/src/miranda/archive/_groupings.py
+++ b/src/miranda/archive/_groupings.py
@@ -7,14 +7,13 @@
 from logging.config import dictConfig
 from pathlib import Path
 from types import GeneratorType
-from typing import Dict, List, Optional
 
 from miranda.scripting import LOGGING_CONFIG
 from miranda.storage import report_file_size
 
 dictConfig(LOGGING_CONFIG)
-Nested_List = List[List[Path]]
-PathDict = Dict[str, List[Path]]
+Nested_List = list[list[Path]]
+PathDict = dict[str, list[Path]]
 
 
 GiB = int(pow(2, 30))
diff --git a/src/miranda/convert/__init__.py b/src/miranda/convert/__init__.py
index 2c427170..fcaed839 100644
--- a/src/miranda/convert/__init__.py
+++ b/src/miranda/convert/__init__.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from . import deh, eccc, ecmwf, hq, melcc, utils
+from . import deh, eccc_canswe, eccc_rdrs, hq, melcc, utils
 from ._aggregation import *
 from ._data_corrections import *
 from ._data_definitions import *
diff --git a/src/miranda/convert/_data_corrections.py b/src/miranda/convert/_data_corrections.py
index 0ef24be9..73999530 100644
--- a/src/miranda/convert/_data_corrections.py
+++ b/src/miranda/convert/_data_corrections.py
@@ -82,7 +82,7 @@ def load_json_data_mappings(project: str) -> dict[str, Any]:
         )
     elif project.startswith("ec"):
         metadata_definition = json.load(
-            data_folder.joinpath("eccc_cf_attrs.json").open("r")
+            data_folder.joinpath("eccc_canswe_cf_attrs.json").open("r")
         )
     elif project in ["NEX-GDDP-CMIP6"]:
         metadata_definition = json.load(
diff --git a/src/miranda/convert/_data_definitions.py b/src/miranda/convert/_data_definitions.py
index b72e1214..7af3f009 100644
--- a/src/miranda/convert/_data_definitions.py
+++ b/src/miranda/convert/_data_definitions.py
@@ -16,13 +16,13 @@
     "era5_variables",
     "gather_agcfsr",
     "gather_agmerra",
+    "gather_eccc_rdrs",
     "gather_ecmwf",
     "gather_emdna",
     "gather_grnch",
     "gather_nex",
     "gather_nrcan_gridded_obs",
     "gather_raw_rdrs_by_years",
-    "gather_rdrs",
     "gather_sc_earth",
     "gather_wfdei_gem_capa",
     "nasa_ag_variables",
@@ -33,7 +33,8 @@
     "xarray_frequencies_to_cmip6like",
 ]
 
-_data_folder = Path(__file__).parent / "data"
+_data_folder = Path(__file__).resolve().parent / "data"
+
 
 eccc_rdrs_variables = {}
 eccc_rdrs_variables["raw"] = [
@@ -85,6 +86,7 @@
 # Manually map xarray frequencies to CMIP6/CMIP5 controlled vocabulary.
 # see: https://github.com/ES-DOC/pyessv-archive
 xarray_frequencies_to_cmip6like = {
+    "h": "hr",
     "H": "hr",
     "D": "day",
     "W": "sem",
@@ -237,7 +239,7 @@ def gather_sc_earth(path: str | os.PathLike) -> dict[str, list[Path]]:
     )
 
 
-def gather_rdrs(
+def gather_eccc_rdrs(
     name: str, path: str | os.PathLike, suffix: str, key: str
 ) -> dict[str, dict[str, list[Path]]]:
     """Gather RDRS processed source data.
diff --git a/src/miranda/convert/_reconstruction.py b/src/miranda/convert/_reconstruction.py
index 9311ec92..bfa72325 100644
--- a/src/miranda/convert/_reconstruction.py
+++ b/src/miranda/convert/_reconstruction.py
@@ -17,8 +17,8 @@
 from miranda.utils import chunk_iterables
 
 from ._aggregation import aggregate as aggregate_func
-from ._data_corrections import dataset_corrections
 from ._data_definitions import project_institutes, xarray_frequencies_to_cmip6like
+from .corrections import dataset_corrections
 
 logging.config.dictConfig(LOGGING_CONFIG)
 
diff --git a/src/miranda/convert/corrections.py b/src/miranda/convert/corrections.py
new file mode 100644
index 00000000..c2c79d97
--- /dev/null
+++ b/src/miranda/convert/corrections.py
@@ -0,0 +1,201 @@
+"""Dataset corrections submodule."""
+
+from __future__ import annotations
+
+import datetime
+import pathlib
+from collections.abc import Iterator, Sequence
+from functools import partial
+from typing import Callable
+
+import xarray as xr
+
+from miranda.convert.utils import find_version_hash
+from miranda.gis import conservative_regrid, subset_domain, threshold_mask
+from miranda.treatments import (
+    cf_units_conversion,
+    clip_values,
+    correct_unit_names,
+    dimensions_compliance,
+    ensure_correct_time_frequency,
+    invert_value_sign,
+    metadata_conversion,
+    offset_time_dimension,
+    preprocessing_corrections,
+    transform_values,
+    variable_conversion,
+)
+from miranda.treatments.utils import load_json_data_mappings
+
+CONFIG_FOLDER = pathlib.Path(__file__).parent / "data"
+CONFIG_FILES = {
+    "EMDNA": "emdna_cf_attrs.json",
+    "ESPO-G6-E5L": "espo-g6-e5l_attrs.json",
+    "ESPO-G6-R2": "espo-g6-r2_attrs.json",
+    "NEX-GDDP-CMIP6": "nex-gddp-cmip6_attrs.json",
+    "agcfsr": "agcfsr_agmerra2_cf_attrs.json",
+    "agmerra2": "agcfsr_agmerra2_cf_attrs.json",
+    "cmip": "cmip5_cmip6_cordex_ouranos_attrs.json",
+    "cordex": "cmip5_cmip6_cordex_ouranos_attrs.json",
+    "eccc-canswe": "eccc-canswe_cf_attrs.json",
+    "eccc-ahccd": "eccc-ahccd_cf_attrs.json",
+    "eccc-obs": "eccc-obs_cf_attrs.json",
+    "era5-land": "era5_era5-land_cf_attrs.json",
+    "era5-land-monthly-means": "era5_era5-land_cf_attrs.json",
+    "era5-pressure-levels": "era5_era5-land_cf_attrs.json",
+    "era5-pressure-levels-monthly-means": "era5_era5-land_cf_attrs.json",
+    "era5-pressure-levels-monthly-means-preliminary-back-extension": "era5_era5-land_cf_attrs.json",
+    "era5-pressure-levels-preliminary-back-extension": "era5_era5-land_cf_attrs.json",
+    "era5-single-levels": "era5_era5-land_cf_attrs.json",
+    "era5-single-levels-monthly-means": "era5_era5-land_cf_attrs.json",
+    "era5-single-levels-monthly-means-preliminary-back-extension": "era5_era5-land_cf_attrs.json",
+    "era5-single-levels-preliminary-back-extension": "era5_era5-land_cf_attrs.json",
+    "ets-grnch": "ets-grnch_cf_attrs.json",
+    "melcc": "melcc_cf_attrs.json",
+    "rdrs-v21": "eccc-rdrs_cf_attrs.json",
+    "wfdei-gem-capa": "wfdei-gem-capa_cf_attrs.json",
+}
+for k, v in CONFIG_FILES.items():
+    CONFIG_FILES[k] = CONFIG_FOLDER / v
+
+
+def dataset_corrections(ds: xr.Dataset, project: str) -> xr.Dataset:
+    """
+    Convert variables to CF-compliant format.
+
+    Parameters
+    ----------
+    ds : xr.Dataset
+        Data to be converted.
+    project : str
+        Project name for decoding/handling purposes.
+
+    Returns
+    -------
+    xr.Dataset
+        The corrected dataset.
+    """
+    metadata_definition = load_json_data_mappings(project, CONFIG_FILES)
+
+    ds = correct_unit_names(ds, project, metadata_definition)
+    ds = transform_values(ds, project, metadata_definition)
+    ds = invert_value_sign(ds, project, metadata_definition)
+    ds = cf_units_conversion(ds, metadata_definition)
+    ds = clip_values(ds, project, metadata_definition)
+
+    ds = dimensions_compliance(ds, project, metadata_definition)
+    ds = ensure_correct_time_frequency(ds, project, metadata_definition)
+    ds = offset_time_dimension(ds, project, metadata_definition)
+
+    ds = variable_conversion(ds, project, metadata_definition)
+
+    ds = metadata_conversion(ds, project, metadata_definition)
+
+    ds.attrs["history"] = (
+        f"{datetime.datetime.now()}: "
+        f"Variables converted from original files using miranda.convert.{dataset_corrections.__name__}. "
+        f"{ds.attrs.get('history')}".strip()
+    )
+
+    return ds
+
+
+def dataset_conversion(
+    input_files: (
+        str
+        | pathlib.Path
+        | Sequence[str | pathlib.Path]
+        | Iterator[pathlib.Path]
+        | xr.Dataset
+    ),
+    project: str,
+    domain: str | None = None,
+    mask: xr.Dataset | xr.DataArray | None = None,
+    mask_cutoff: float | bool = False,
+    regrid: bool = False,
+    add_version_hashes: bool = True,
+    preprocess: Callable | str | None = "auto",
+    **xr_kwargs,
+) -> xr.Dataset | xr.DataArray:
+    r"""
+    Convert an existing Xarray-compatible dataset to another format with variable corrections applied.
+
+    Parameters
+    ----------
+    input_files : str or pathlib.Path or Sequence[str or pathlib.Path] or Iterator[pathlib.Path] or xr.Dataset
+        Files or objects to be converted.
+        If sent a list or GeneratorType, will open with :py:func:`xarray.open_mfdataset` and concatenate files.
+    project : {"cordex", "cmip5", "cmip6", "ets-grnch", "isimip-ft", "pcic-candcs-u6", "converted"}
+        Project name for decoding/handling purposes.
+    domain : {"global", "nam", "can", "qc", "mtl"}, optional
+        Domain to perform subsetting for. Default: None.
+    mask : Optional[Union[xr.Dataset, xr.DataArray]]
+        DataArray or single data_variable dataset containing mask.
+    mask_cutoff : float or bool
+        If land_sea_mask supplied, the threshold above which to mask with land_sea_mask. Default: False.
+    regrid : bool
+        Performing regridding with xesmf. Default: False.
+    add_version_hashes : bool
+        If True, version name and sha256sum of source file(s) will be added as a field among the global attributes.
+    preprocess : callable or str, optional
+        Preprocessing functions to perform over each Dataset.
+        Default: "auto" - Run preprocessing fixes based on supplied fields from metadata definition.
+        Callable - Runs function over Dataset (single) or supplied to `preprocess` (multifile dataset).
+    \*\*xr_kwargs : Any
+        Arguments passed directly to xarray.
+
+    Returns
+    -------
+    xr.Dataset or xr.DataArray
+        The corrected dataset.
+    """
+    if isinstance(input_files, xr.Dataset):
+        ds = input_files
+    else:
+        if isinstance(input_files, (str, pathlib.Path)):
+            if pathlib.Path(input_files).is_dir():
+                files = []
+                files.extend([f for f in pathlib.Path(input_files).glob("*.nc")])
+                files.extend([f for f in pathlib.Path(input_files).glob("*.zarr")])
+            else:
+                files = [pathlib.Path(input_files)]
+        elif isinstance(input_files, (Sequence, Iterator)):
+            files = [pathlib.Path(f) for f in input_files]
+        else:
+            files = input_files
+        version_hashes = dict()
+        if add_version_hashes:
+            for file in files:
+                version_hashes[file.name] = find_version_hash(file)
+
+        preprocess_kwargs = dict()
+        if preprocess:
+            if preprocess == "auto":
+                preprocess_kwargs.update(
+                    preprocess=partial(preprocessing_corrections, project=project)
+                )
+            elif isinstance(preprocess, Callable):
+                preprocess_kwargs.update(preprocess=preprocess)
+
+        if len(files) == 1:
+            ds = xr.open_dataset(files[0], **xr_kwargs)
+            for process in preprocess_kwargs.values():
+                ds = process(ds)
+        else:
+            ds = xr.open_mfdataset(files, **xr_kwargs, **preprocess_kwargs)
+        if version_hashes:
+            ds.attrs.update(dict(original_files=str(version_hashes)))
+
+    ds = dataset_corrections(ds, project)
+
+    if domain:
+        ds = subset_domain(ds, domain)
+
+    if isinstance(mask, (str, pathlib.Path)):
+        mask = xr.open_dataset(mask)
+    if isinstance(mask, (xr.Dataset, xr.DataArray)):
+        if regrid:
+            mask = conservative_regrid(ds, mask)
+        ds = threshold_mask(ds, mask=mask, mask_cutoff=mask_cutoff)
+
+    return ds
diff --git a/src/miranda/eccc/eccc_homogenized_cf_attrs.json b/src/miranda/convert/data/eccc-ahccd_cf_attrs.json
similarity index 80%
rename from src/miranda/eccc/eccc_homogenized_cf_attrs.json
rename to src/miranda/convert/data/eccc-ahccd_cf_attrs.json
index 92c3b0f1..594de4e2 100644
--- a/src/miranda/eccc/eccc_homogenized_cf_attrs.json
+++ b/src/miranda/convert/data/eccc-ahccd_cf_attrs.json
@@ -1,29 +1,56 @@
 {
   "Header": {
-    "Conventions": "CF-1.8",
+    "Conventions": "CF-1.9",
+    "_citation": {
+      "gen2": "Mekis, É and L.A. Vincent, 2011: An overview of the second generation adjusted daily precipitation dataset for trend analysis in Canada. Atmosphere-Ocean 49(2), 163-177 doi:10.1080/07055900.2011.583910",
+      "gen3": "Vincent, L.A., M.M. Hartwell and X.L. Wang, 2020: A Third Generation of Homogenized Temperature for Trend Analysis and Monitoring Changes in Canada’s Climate. Atmosphere-Ocean. https://doi.org/10.1080/07055900.2020.1765728"
+    },
+    "_frequency": true,
+    "_generation": true,
+    "_miranda_version": true,
+    "_missing_values": [
+      "-999",
+      "1e20"
+    ],
     "_product": {
       "gen2": "ECCC Adjusted and Homogenized Canadian Climate Data (AHCCD) version 2",
       "gen3": "ECCC Adjusted and Homogenized Canadian Climate Data (AHCCD) version 3"
     },
-    "citation": {
-      "gen2": "Mekis, É and L.A. Vincent, 2011: An overview of the second generation adjusted daily precipitation dataset for trend analysis in Canada. Atmosphere-Ocean 49(2), 163-177 doi:10.1080/07055900.2011.583910",
-      "gen3": "Vincent, L.A., M.M. Hartwell and X.L. Wang, 2020: A Third Generation of Homogenized Temperature for Trend Analysis and Monitoring Changes in Canada’s Climate. Atmosphere-Ocean. https://doi.org/10.1080/07055900.2020.1765728"
-    },
     "contact": "info.cccs-ccsc@canada.ca",
     "documentation": "https://www.canada.ca/en/environment-climate-change/services/climate-change/canadian-centre-climate-services/display-download/technical-documentation-adjusted-climate-data.html",
-    "float_missing_value": "1e20",
-    "frequency": "day",
     "institution": "GovCan",
-    "int_missing_value": "-999",
     "license": "https://climate.weather.gc.ca/prods_servs/attachment1_e.html",
     "license_type": "permissive",
     "organization": "ECCC",
     "realm": "atmos",
+    "source": "AHCCD",
     "table_date": "2023-03-23",
     "table_id": "ECCC"
   },
-  "variable_entry": {
+  "dimensions:": {
+    "lat": {
+      "axis": "Y",
+      "long_name": "Latitude",
+      "standard_name": "latitude",
+      "units": "degrees_north"
+    },
+    "long": {
+      "_cf_dimension_name": "lon",
+      "axis": "X",
+      "long_name": "Longitude",
+      "standard_name": "longitude",
+      "units": "degrees_east"
+    },
+    "time": {
+      "axis": "T",
+      "calendar": "gregorian",
+      "long_name": "Time",
+      "standard_name": "time"
+    }
+  },
+  "variables": {
     "dm": {
+      "_cf_variable_name": "tas",
       "add_offset": 273.15,
       "cell_methods": "time: mean",
       "comments": "Station data converted from Mean Temp (°C)",
@@ -31,13 +58,12 @@
       "grid_mapping": "regular_lon_lat",
       "long_name": "Near-Surface Air Temperature",
       "original_field": "Mean Temp (°C)",
-      "out_name": "tas",
-      "scale_factor": 1,
       "standard_name": "air_temperature",
       "type": "real",
       "units": "K"
     },
     "dn": {
+      "_cf_variable_name": "tasmin",
       "add_offset": 273.15,
       "cell_methods": "time: minimum",
       "comments": "Station data converted from Min Temp (°C)",
@@ -45,55 +71,51 @@
       "grid_mapping": "regular_lon_lat",
       "long_name": "Daily Minimum Near-Surface Air Temperature",
       "original_field": "Min Temp (°C)",
-      "out_name": "tasmin",
-      "scale_factor": 1,
       "standard_name": "air_temperature",
       "type": "real",
       "units": "K"
     },
     "dr": {
-      "add_offset": 0,
+      "_cf_variable_name": "prlp",
       "cell_methods": "time: mean",
       "comments": "Station data converted from Total Rain (mm) using a density of 1000 kg/m³",
       "frequency": "day",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Liquid Precipitation",
       "original_field": "Total Rain (mm)",
-      "out_name": "prlp",
       "scale_factor": 1.1574074074074073e-05,
       "standard_name": "rainfall_flux",
       "type": "real",
       "units": "kg m-2 s-1"
     },
     "ds": {
-      "add_offset": 0,
+      "_cf_variable_name": "prsn",
       "cell_methods": "time: mean",
       "comments": "station data converted from Total Snow (cm) using a density of 100 kg/m³",
       "frequency": "day",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Snowfall Flux",
       "original_field": "Total Snow (cm)",
-      "out_name": "prsn",
       "scale_factor": 1.1574074074074073e-05,
       "standard_name": "snowfall_flux",
       "type": "real",
       "units": "kg m-2 s-1"
     },
     "dt": {
-      "add_offset": 0,
+      "_cf_variable_name": "pr",
       "cell_methods": "time: mean",
       "comments": "Station data converted from Total Precip (mm) using a density of 1000 kg/m³",
       "frequency": "day",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Precipitation",
       "original_field": "Total Precip (mm)",
-      "out_name": "pr",
       "scale_factor": 1.1574074074074073e-05,
       "standard_name": "precipitation_flux",
       "type": "real",
       "units": "kg m-2 s-1"
     },
     "dx": {
+      "_cf_variable_name": "tasmax",
       "add_offset": 273.15,
       "cell_methods": "time: maximum",
       "comments": "station data converted from Max Temp (°C)",
@@ -101,8 +123,6 @@
       "grid_mapping": "regular_lon_lat",
       "long_name": "Daily Maximum Near-Surface Air Temperature",
       "original_field": "Max Temp (°C)",
-      "out_name": "tasmax",
-      "scale_factor": 1,
       "standard_name": "air_temperature",
       "type": "real",
       "units": "K"
diff --git a/src/miranda/eccc/eccc_obs_cf_attrs.json b/src/miranda/convert/data/eccc-obs_cf_attrs.json
similarity index 54%
rename from src/miranda/eccc/eccc_obs_cf_attrs.json
rename to src/miranda/convert/data/eccc-obs_cf_attrs.json
index 7c882e31..c504965e 100644
--- a/src/miranda/eccc/eccc_obs_cf_attrs.json
+++ b/src/miranda/convert/data/eccc-obs_cf_attrs.json
@@ -1,996 +1,1128 @@
 {
   "Header": {
-    "Conventions": "CF-1.8",
+    "Conventions": "CF-1.9",
+    "_frequency": true,
+    "_miranda_version": true,
+    "_missing_flags": "M",
+    "_missing_values": [
+      "-999",
+      "1e20",
+      "-9999",
+      "#####"
+    ],
     "contact": "climatcentre-climatecentral@ec.gc.ca",
     "institution": "GovCan",
-    "int_missing_value": "-999",
     "license": "https://climate.weather.gc.ca/prods_servs/attachment1_e.html",
     "license_preamble": "The data is owned by the Government of Canada (Environment and Climate Change Canada), and fall under the licence agreement for use of Environment and Climate Change Canada data.",
     "license_type": "permissive",
-    "missing_value": "1e20",
     "organization": "ECCC",
     "processing_level": "raw",
-    "realm": "atmos",
-    "source": "msc",
+    "source": "ECCC-OBS",
     "table_date": "2023-03-23",
     "type": "station-obs"
   },
-  "variable_entry": {
+  "dimensions": {
+    "latitude": {
+      "_cf_dimension_name": "lat",
+      "_precision": 4,
+      "axis": "Y",
+      "standard_name": "latitude"
+    },
+    "longitude": {
+      "_cf_dimension_name": "lon",
+      "_precision": 4,
+      "axis": "X",
+      "standard_name": "longitude"
+    },
+    "time": {
+      "_ensure_correct_time": {
+        "obs-daily": "1D",
+        "obs-hourly": "1H"
+      },
+      "_strict_time": false,
+      "axis": "T",
+      "long_name": "time",
+      "standard_name": "time"
+    }
+  },
+  "variables": {
     "001": {
+      "_cf_variable_name": "tasmax",
+      "_corrected_units": "degC",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04",
         "DLY44"
       ],
-      "add_offset": 0,
-      "nc_name": "tasmax",
+      "_transformation": "op / 10 degC",
       "original_units": "0.1 °C",
       "original_variable": "Daily Maximum Temperature",
-      "raw_units": "degC",
-      "scale_factor": 0.1,
       "standard_name": "air_temperature_maximum",
       "units": "K"
     },
     "002": {
+      "_cf_variable_name": "tasmin",
+      "_corrected_units": "degC",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04",
         "DLY44"
       ],
-      "add_offset": 0,
-      "nc_name": "tasmin",
+      "_transformation": "op / 10 degC",
       "original_units": "0.1 °C",
       "original_variable": "Daily Minimum Temperature",
-      "raw_units": "degC",
-      "scale_factor": 0.1,
       "standard_name": "air_temperature_minimum",
       "units": "K"
     },
     "003": {
+      "_cf_variable_name": "tas",
+      "_corrected_units": "degC",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04",
         "DLY44"
       ],
-      "add_offset": 0,
-      "nc_name": "tas",
+      "_transformation": "op / 10 degC",
       "original_units": "0.1 °C",
       "original_variable": "Daily Mean Temperature",
-      "raw_units": "degC",
-      "scale_factor": 0.1,
       "standard_name": "air_temperature",
       "units": "K"
     },
     "010": {
+      "_cf_variable_name": "prlptot",
+      "_corrected_units": "mm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04",
         "DLY44"
       ],
-      "add_offset": 0,
-      "nc_name": "prlptot",
+      "_transformation": "op / 10 mm day-1",
       "original_units": "0.1 mm day-1",
       "original_variable": "Daily Total Rainfall",
-      "raw_units": "mm",
-      "scale_factor": 0.1,
       "standard_name": "liquid_precipitation_amount",
       "units": "m"
     },
     "011": {
+      "_cf_variable_name": "prsntot",
+      "_corrected_units": "cm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04",
         "DLY44"
       ],
-      "add_offset": 0,
-      "nc_name": "prsntot",
+      "_transformation": "op / 10 cm day-1",
       "original_units": "0.1 cm day-1",
       "original_variable": "Daily Total Snowfall",
-      "raw_units": "cm",
-      "scale_factor": 0.1,
       "standard_name": "solid_precipitation_amount",
       "units": "m"
     },
     "012": {
+      "_cf_variable_name": "prcptot",
+      "_corrected_units": "mm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04",
         "DLY44"
       ],
-      "add_offset": 0,
-      "nc_name": "prcptot",
+      "_transformation": "op / 10 mm day-1",
       "original_units": "0.1 mm day-1",
       "original_variable": "Daily Total Precipitation",
-      "raw_units": "mm",
-      "scale_factor": 0.1,
       "standard_name": "precipitation_amount",
       "units": "m"
     },
     "013": {
+      "_cf_variable_name": "sndtot",
+      "_corrected_units": "cm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04",
         "DLY44"
       ],
-      "add_offset": 0,
-      "nc_name": "sndtot",
+      "_transformation": false,
       "original_units": "cm",
       "original_variable": "Snow on the Ground",
-      "raw_units": "cm",
-      "scale_factor": 1,
       "standard_name": "surface_snow_thickness",
       "units": "m"
     },
     "014": {
+      "_cf_variable_name": "thunder",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04",
         "DLY44"
       ],
-      "add_offset": 0,
-      "nc_name": "thunder",
+      "_transformation": false,
       "original_variable": "Thunderstorms",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "thunderstorm_presence",
       "units": "1"
     },
     "015": {
+      "_cf_variable_name": "freezing_rain_drizzle",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04",
         "DLY44"
       ],
-      "add_offset": 0,
-      "nc_name": "freezing_rain_drizzle",
+      "_transformation": false,
       "original_variable": "Freezing rain or drizzle",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "freeze_rain_drizzle_presence",
       "units": "1"
     },
     "016": {
+      "_cf_variable_name": "hail",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04",
         "DLY44"
       ],
-      "add_offset": 0,
-      "nc_name": "hail",
+      "_transformation": false,
       "original_variable": "Hail",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "hail_presence",
       "units": "1"
     },
     "017": {
+      "_cf_variable_name": "fog_ice_fog",
+      "_corrected_units": "1",
       "_table_name": [
         "DLY02",
         "DLY04",
         "DLY44"
       ],
-      "add_offset": 0,
-      "nc_name": "fog_ice_fog",
       "original_variable": "Fog or Ice Fog",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "fog_ice_fog_presence",
       "units": "1"
     },
     "018": {
+      "_cf_variable_name": "smoke_haze",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04"
       ],
-      "add_offset": 0,
-      "nc_name": "smoke_haze",
+      "_transformation": false,
       "original_variable": "Smoke or Haze",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "smoke_haze_presence",
       "units": "1"
     },
     "019": {
+      "_cf_variable_name": "blowing_dust_sand",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04"
       ],
-      "add_offset": 0,
-      "nc_name": "blowing_dust_sand",
+      "_transformation": false,
       "original_variable": "Blowing Dust or Sand",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "blowing_dust_sand_presence",
       "units": "1"
     },
     "020": {
+      "_cf_variable_name": "blow_snow",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04"
       ],
-      "add_offset": 0,
-      "nc_name": "blow_snow",
+      "_transformation": false,
       "original_variable": "Blowing snow",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "blowing_snow_presence",
       "units": "1"
     },
     "021": {
+      "_cf_variable_name": "wind_gt_28kt",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04"
       ],
-      "add_offset": 0,
-      "nc_name": "wind_gt_28kt",
+      "_transformation": false,
       "original_variable": "Wind speed >= 28 Knots",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "wind_exceeding_28_knots",
       "units": "1"
     },
     "022": {
+      "_cf_variable_name": "wind_gt_34kt",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04"
       ],
-      "add_offset": 0,
-      "nc_name": "wind_gt_34kt",
+      "_transformation": false,
       "original_variable": "Wind speed >= 34 Knots",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "wind_exceeding_34_knots",
       "units": "1"
     },
     "023": {
+      "_cf_variable_name": "gust_dir_16pts",
+      "_corrected_units": "deg",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04"
       ],
-      "add_offset": 0,
-      "nc_name": "gust_dir_16pts",
+      "_transformation": "op * 10 deg",
       "original_units": "10's of degrees",
       "original_variable": "Direction of extreme gust (16 pts) to December 1976",
-      "raw_units": "deg",
-      "scale_factor": 10,
       "standard_name": "gust_to_direction",
       "units": "deg"
     },
     "024": {
+      "_cf_variable_name": "gust_speed",
+      "_corrected_units": "km h-1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04"
       ],
-      "add_offset": 0,
-      "nc_name": "gust_speed",
+      "_transformation": false,
       "original_units": "km/h",
       "original_variable": "Speed of extreme gust",
-      "raw_units": "km h-1",
-      "scale_factor": 1,
       "standard_name": "wind_speed_of_gust",
       "units": "m s-1"
     },
     "025": {
+      "_cf_variable_name": "gust_hour",
+      "_corrected_units": "h",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "DLY02",
         "DLY04"
       ],
-      "add_offset": 0,
-      "nc_name": "gust_hour",
+      "_transformation": false,
       "original_variable": "UTC hour of extreme gust",
-      "raw_units": "h",
-      "scale_factor": 1,
       "standard_name": "hour_of_extreme_gust",
       "units": "h"
     },
     "061": {
+      "_cf_variable_name": "rf1_radiation",
+      "_corrected_units": "MJ m-2",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY11"
       ],
-      "add_offset": 0,
-      "nc_name": "rf1_radiation",
+      "_transformation": "op / 1000 MJ m-2",
       "original_units": "0.001 MJ/m",
       "original_variable": "RF1 global solar radiation",
-      "raw_units": "W m-2 h-1",
-      "scale_factor": 277.77777777777777,
       "standard_name": "solar_radiation_flux",
-      "units": "W m-2 h-1"
+      "units": "W h m-2"
     },
     "062": {
+      "_cf_variable_name": "rf2_radiation",
+      "_corrected_units": "MJ m-2",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY11"
       ],
-      "add_offset": 0,
-      "nc_name": "rf2_radiation",
+      "_transformation": "op / 1000 MJ m-2",
       "original_units": "0.001 MJ/m",
       "original_variable": "RF2 sky (diffuse) radiation",
-      "raw_units": "W m-2 h-1",
-      "scale_factor": 277.77777777777777,
       "standard_name": "solar_radiation_flux",
-      "units": "W m-2 h-1"
+      "units": "W h m-2"
     },
     "063": {
+      "_cf_variable_name": "rf3_radiation",
+      "_corrected_units": "MJ m-2",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY11"
       ],
-      "add_offset": 0,
-      "nc_name": "rf3_radiation",
+      "_transformation": "op / 1000 MJ m-2",
       "original_units": "0.001 MJ/m",
       "original_variable": "RF3 reflected solar radiation",
-      "raw_units": "W m-2 h-1",
-      "scale_factor": 277.77777777777777,
       "standard_name": "solar_radiation_flux",
-      "units": "W m-2 h-1"
+      "units": "W h m-2"
     },
     "064": {
+      "_cf_variable_name": "rf4_radiation",
+      "_corrected_units": "MJ m-2",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY11"
       ],
-      "add_offset": 0,
-      "nc_name": "rf4_radiation",
+      "_transformation": "op / 1000 MJ m-2",
       "original_units": "0.001 MJ/m",
       "original_variable": "RF4 net all wave radiation",
-      "raw_units": "W m-2 h-1",
-      "scale_factor": 277.77777777777777,
       "standard_name": "solar_radiation_flux",
-      "units": "W m-2 h-1"
+      "units": "W h m-2"
     },
     "067": {
+      "_cf_variable_name": "rf7_radiation",
+      "_corrected_units": "klx h",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY11"
       ],
-      "add_offset": 0,
-      "nc_name": "rf7_radiation",
+      "_transformation": "op / 100 klx h",
       "original_units": "0.01 Kilolux_hrs",
       "original_variable": "RF7 daylight illumination",
-      "raw_units": "lux h",
-      "scale_factor": 10,
       "standard_name": "solar_radiation_flux",
-      "units": "lux h"
+      "units": "klx h"
     },
     "068": {
+      "_cf_variable_name": "rf8_radiation",
+      "_corrected_units": "MJ m-2",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY11"
       ],
-      "add_offset": 0,
-      "nc_name": "rf8_radiation",
+      "_transformation": "op / 1000 MJ m-2",
       "original_units": "0.001 MJ/m",
       "original_variable": "RF8 direct solar radiation",
-      "raw_units": "W m-2 h-1",
-      "scale_factor": 277.77777777777777,
       "standard_name": "solar_radiation_flux",
-      "units": "W m-2 h-1"
+      "units": "W h m-2"
     },
     "069": {
+      "_cf_variable_name": "wind_dir_45B",
+      "_corrected_units": "deg",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY15"
       ],
-      "add_offset": 0,
-      "nc_name": "wind_dir_45B",
+      "_transformation": "op * 10 deg",
       "original_units": "10's of degrees",
       "original_variable": "Direction - 45B anemometer (8 pts)",
-      "raw_units": "deg",
-      "scale_factor": 1,
       "standard_name": "wind_to_direction",
       "units": "deg"
     },
     "071": {
+      "_cf_variable_name": "ceiling_hgt",
+      "_corrected_units": "m",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "ceiling_hgt",
+      "_transformation": "op * 30 m",
       "original_units": "30's of meters",
       "original_variable": "Ceiling height of lowest layer of clouds",
-      "raw_units": "m",
-      "scale_factor": 30,
       "standard_name": "ceiling_cloud_height",
       "units": "m"
     },
     "072": {
+      "_cf_variable_name": "visibility",
+      "_corrected_units": "km",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "visibility",
+      "_transformation": "op / 10 km",
       "original_units": "0.1 km",
       "original_variable": "Visibility",
-      "raw_units": "km",
-      "scale_factor": 0.1,
       "standard_name": "visibility_in_air",
       "units": "m"
     },
     "073": {
+      "_cf_variable_name": "psl",
+      "_corrected_units": "Pa",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "psl",
+      "_transformation": "op / 100 kPa",
       "original_units": "0.01 kPa",
       "original_variable": "Sea Level Pressure",
-      "raw_units": "Pa",
-      "scale_factor": 10,
       "standard_name": "air_pressure_at_mean_sea_level",
       "units": "Pa"
     },
     "074": {
+      "_cf_variable_name": "tds",
+      "_corrected_units": "degC",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "tds",
+      "_transformation": "op / 10 degC",
       "original_units": "0.1 °C",
       "original_variable": "Dew Point Temperature",
-      "raw_units": "degC",
-      "scale_factor": 0.1,
       "standard_name": "dew_point_temperature",
       "units": "K"
     },
     "075": {
+      "_cf_variable_name": "wind_dir_u2a_16",
+      "_corrected_units": "deg",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "wind_dir_u2a_16",
+      "_transformation": "op * 10 deg",
       "original_units": "10's of degrees",
       "original_variable": "Wind Direction at 2 m (U2A Anemometer) (16 pts)",
-      "raw_units": "deg",
-      "scale_factor": 10,
       "standard_name": "wind_direction_u2a",
       "units": "deg"
     },
     "076": {
+      "_cf_variable_name": "wind_speed_u2a",
+      "_corrected_units": "km h-1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "wind_speed_u2a",
+      "_transformation": false,
       "original_units": "km/h",
       "original_variable": "Wind Speed - U2A (16 pts) to December 1970",
-      "raw_units": "km h-1",
-      "scale_factor": 1,
       "standard_name": "wind_speed_u2a",
       "units": "m s-1"
     },
     "077": {
+      "_cf_variable_name": "pressure",
+      "_corrected_units": "Pa",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "pressure",
+      "_transformation": "op / 100 kPa",
       "original_units": "0.01 kPa",
       "original_variable": "Station Pressure",
-      "raw_units": "Pa",
-      "scale_factor": 10,
       "standard_name": "atmospheric_pressure",
       "units": "Pa"
     },
     "078": {
+      "_cf_variable_name": "tas_dry",
+      "_corrected_units": "degC",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "tas_dry",
+      "_transformation": "op / 10 degC",
       "original_units": "0.1 °C",
       "original_variable": "Dry Bulb Temperature",
-      "raw_units": "degC",
-      "scale_factor": 0.1,
       "standard_name": "dry_bulb_temperature",
       "units": "K"
     },
     "079": {
+      "_cf_variable_name": "tas_wet",
+      "_corrected_units": "degC",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "tas_wet",
+      "_transformation": "op / 10 degC",
       "original_units": "0.1 °C",
       "original_variable": "Wet Bulb temperature",
-      "raw_units": "degC",
-      "scale_factor": 0.1,
       "standard_name": "wet_bulb_temperature",
       "units": "K"
     },
     "080": {
+      "_cf_variable_name": "hur",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "hur",
+      "_transformation": false,
       "original_units": "%",
       "original_variable": "Relative Humidity",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "relative_humidity",
       "units": "1"
     },
     "081": {
+      "_cf_variable_name": "clo",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "clo",
-      "original_units": "%",
+      "_transformation": "op * 10",
+      "original_units": "Tenths",
       "original_variable": "Total Cloud Opacity",
-      "raw_units": "1",
       "scale_factor": 10,
       "standard_name": "cloud_albedo",
       "units": "1"
     },
     "082": {
+      "_cf_variable_name": "clt",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "clt",
-      "original_units": "%",
+      "_transformation": "op * 10",
+      "original_units": "Tenths",
       "original_variable": "Total Cloud Amount",
-      "raw_units": "1",
       "scale_factor": 10,
       "standard_name": "cloud_area_fraction",
       "units": "1"
     },
     "089": {
+      "_cf_variable_name": "freeze_rain",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "freeze_rain",
+      "_transformation": false,
       "original_variable": "Freezing Rain",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "freezing_rain",
       "units": "1"
     },
     "094": {
+      "_cf_variable_name": "ice_pellets",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "ice_pellets",
+      "_transformation": false,
       "original_variable": "Ice Pellets",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "ice_pellet_presence",
       "units": "1"
     },
     "107": {
+      "_cf_variable_name": "1low_cloud_opac",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "1low_cloud_opac",
+      "_transformation": "op * 10",
       "original_units": "Tenths",
       "original_variable": "Lowest cloud layer opacity",
-      "raw_units": "1",
-      "scale_factor": 10,
       "standard_name": "low_type_cloud_opacity_fraction",
       "units": "1"
     },
     "108": {
+      "_cf_variable_name": "1low_cloud_frac",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "1low_cloud_frac",
+      "_transformation": "op * 10",
       "original_units": "Tenths",
       "original_variable": "Lowest cloud layer amount or condition",
-      "raw_units": "1",
-      "scale_factor": 10,
       "standard_name": "low_type_cloud_area_fraction",
       "units": "1"
     },
     "109": {
+      "_cf_variable_name": "1low_cloud_type",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "1low_cloud_type",
+      "_transformation": false,
       "original_variable": "Lowest cloud layer type",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "low_type_cloud_type",
       "units": "1"
     },
     "110": {
+      "_cf_variable_name": "1low_cloud_hgt",
+      "_corrected_units": "m",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "1low_cloud_hgt",
+      "_transformation": "op * 30 m",
       "original_units": "30's of meters",
       "original_variable": "Lowest cloud layer height",
-      "raw_units": "m",
-      "scale_factor": 30,
       "standard_name": "low_type_cloud_height",
       "units": "m"
     },
     "111": {
+      "_cf_variable_name": "2low_cloud_opac",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "2low_cloud_opac",
+      "_transformation": "op * 30 m",
       "original_units": "Tenths",
       "original_variable": "Second lowest cloud layer opacity",
-      "raw_units": "1",
-      "scale_factor": 10,
       "standard_name": "low_type_cloud_opacity_fraction",
       "units": "1"
     },
     "112": {
+      "_cf_variable_name": "2low_cloud_frac",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "2low_cloud_frac",
+      "_transformation": "op * 10",
       "original_units": "Tenths",
       "original_variable": "Second lowest cloud layer amount or condition",
-      "raw_units": "1",
-      "scale_factor": 10,
       "standard_name": "low_type_cloud_area_fraction",
       "units": "1"
     },
     "113": {
+      "_cf_variable_name": "2low_cloud_type",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "2low_cloud_type",
+      "_transformation": false,
       "original_units": "",
       "original_variable": "Second lowest cloud layer type",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "low_type_cloud_type",
       "units": "1"
     },
     "114": {
+      "_cf_variable_name": "2low_cloud_hgt",
+      "_corrected_units": "m",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "2low_cloud_hgt",
+      "_transformation": "op * 30 m",
       "original_units": "30's of meters",
       "original_variable": "Second lowest cloud layer height",
-      "raw_units": "m",
-      "scale_factor": 30,
       "standard_name": "low_type_cloud_height",
       "units": "m"
     },
     "115": {
+      "_cf_variable_name": "3low_cloud_opac",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "3low_cloud_opac",
+      "_transformation": "op * 10",
       "original_units": "Tenths",
       "original_variable": "Thirsd lowest cloud layer opacity",
-      "raw_units": "1",
-      "scale_factor": 10,
       "standard_name": "low_type_cloud_opacity_fraction",
       "units": "1"
     },
     "116": {
+      "_cf_variable_name": "3low_cloud_frac",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "3low_cloud_frac",
+      "_transformation": "op * 10",
       "original_units": "Tenths",
       "original_variable": "Third lowest cloud layer amount or condition",
-      "raw_units": "1",
-      "scale_factor": 10,
       "standard_name": "low_type_cloud_area_fraction",
       "units": "1"
     },
     "117": {
+      "_cf_variable_name": "3low_cloud_type",
+      "_corrected_units": "1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "3low_cloud_type",
+      "_transformation": false,
       "original_units": "",
       "original_variable": "Third lowest cloud layer type",
-      "raw_units": "1",
-      "scale_factor": 1,
       "standard_name": "low_type_cloud_type",
       "units": "1"
     },
     "118": {
+      "_cf_variable_name": "3low_cloud_hgt",
+      "_corrected_units": "m",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "3low_cloud_hgt",
+      "_transformation": "op * 30 m",
       "original_units": "30's of meters",
       "original_variable": "Third lowest cloud layer height",
-      "raw_units": "m",
-      "scale_factor": 30,
       "standard_name": "low_type_cloud_height",
       "units": "m"
     },
     "123": {
+      "_cf_variable_name": "rainfall",
+      "_corrected_units": "mm h-1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "add_offset": 0,
-      "nc_name": "rainfall",
+      "_transformation": "op / 10 mm h-1",
       "original_units": "0.1 mm",
       "original_variable": "Total Rainfall",
-      "raw_units": "mm h-1",
-      "scale_factor": 0.1,
       "standard_name": "rainfall_flux",
       "units": "kg m2 s-1"
     },
     "133": {
+      "_cf_variable_name": "sun",
+      "_corrected_units": "h",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY10"
       ],
-      "add_offset": 0,
-      "nc_name": "sun",
+      "_transformation": "op / 10 h",
       "original_units": "0.1 hrs",
       "original_variable": "Sunshine",
-      "raw_units": "h",
-      "scale_factor": 0.1,
       "standard_name": "duration_of_sunshine",
       "units": "s"
     },
     "156": {
+      "_cf_variable_name": "wind_dir_u2a_36",
+      "_corrected_units": "deg",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01"
       ],
-      "nc_name": "wind_dir_u2a_36",
-      "original_units": "10's of degrees",
+      "_transformation": "op * 10 deg",
       "original_variable": "Wind Direction - U2A (36 pts) from January 1971",
-      "raw_units": "deg",
-      "scale_factor": 10,
       "standard_name": "wind_direction_u2a",
       "units": "deg"
     },
+    "209": {
+      "_cf_variable_name": "wind_character",
+      "_corrected_units": "",
+      "_invert_sign": false,
+      "_offset_time": false,
+      "_table_name": [
+        "HLY01"
+      ],
+      "_transformation": false,
+      "description": "Gust (G)=1, Squall (Q)=2",
+      "long_name": "wind_direction_u2a",
+      "original_units": "1, 2",
+      "original_variable": "Wind character at 10 m",
+      "units": ""
+    },
+    "210": {
+      "_cf_variable_name": "",
+      "_corrected_units": "km h-1",
+      "_invert_sign": false,
+      "_offset_time": false,
+      "_table_name": [
+        "HLY01"
+      ],
+      "_transformation": false,
+      "original_units": "km/h",
+      "original_variable": "Wind gust speed at 10 m",
+      "standard_name": "wind_speed_of_gust",
+      "units": "m s-1"
+    },
     "262": {
+      "_cf_variable_name": "prtot",
+      "_corrected_units": "mm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "prtot",
+      "_transformation": "op / 10 mm",
       "original_units": "0.1 mm",
       "original_variable": "Total Precipitation (minutes 00-60)",
-      "raw_units": "mm",
-      "scale_factor": 0.1,
       "standard_name": "precipitation_amount",
       "units": "kg m-2"
     },
     "263": {
+      "_cf_variable_name": "prtot_q1",
+      "_corrected_units": "mm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "prtot_q1",
+      "_transformation": "op / 10 mm",
       "original_units": "0.1 mm",
       "original_variable": "Total Precipitation (minutes 00-15)",
-      "raw_units": "mm",
-      "scale_factor": 0.1,
       "standard_name": "precipitation_amount",
       "units": "kg m-2"
     },
     "264": {
+      "_cf_variable_name": "prtot_q2",
+      "_corrected_units": "mm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "prtot_q2",
+      "_transformation": "op / 10 mm",
       "original_units": "0.1 mm",
       "original_variable": "Total Precipitation (minutes 15-30)",
-      "raw_units": "mm",
-      "scale_factor": 0.1,
       "standard_name": "precipitation_amount",
       "units": "kg m-2"
     },
     "265": {
+      "_cf_variable_name": "prtot_q3",
+      "_corrected_units": "mm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "prtot_q3",
+      "_transformation": "op / 10 mm",
       "original_units": "0.1 mm",
       "original_variable": "Total Precipitation (minutes 30-45)",
-      "raw_units": "mm",
-      "scale_factor": 0.1,
       "standard_name": "precipitation_amount",
       "units": "kg m-2"
     },
     "266": {
+      "_cf_variable_name": "prtot_q4",
+      "_corrected_units": "mm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "prtot_q4",
+      "_transformation": "op / 10 mm",
       "original_units": "0.1 mm",
       "original_variable": "Total Precipitation (minutes 45-60)",
-      "raw_units": "mm",
-      "scale_factor": 0.1,
       "standard_name": "precipitation_amount",
       "units": "kg m-2"
     },
     "267": {
+      "_cf_variable_name": "precipitation_weight_q1",
+      "_corrected_units": "kg m-2",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "precipitation_weight_q1",
+      "_transformation": "op / 10 kg m-2",
       "original_units": "0.1 kg/m²",
       "original_variable": "Precipitation Gauge Weight per Unit Area (at minute 15)",
-      "raw_units": "kg m-2",
-      "scale_factor": 0.1,
       "standard_name": "precipitation_amount",
       "units": "kg m-2"
     },
     "268": {
+      "_cf_variable_name": "precipitation_weight_q2",
+      "_corrected_units": "kg m-2",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "precipitation_weight_q2",
+      "_transformation": "op / 10 kg m-2",
       "original_units": "0.1 kg/m²",
       "original_variable": "Precipitation Gauge Weight per Unit Area (at minute 30)",
-      "raw_units": "kg m-2",
-      "scale_factor": 0.1,
       "standard_name": "precipitation_amount",
       "units": "kg m-2"
     },
     "269": {
+      "_cf_variable_name": "precipitation_weight_q3",
+      "_corrected_units": "kg m-2",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "precipitation_weight_q3",
+      "_transformation": "op / 10 kg m-2",
       "original_units": "0.1 kg/m²",
       "original_variable": "Precipitation Gauge Weight per Unit Area (at minute 45)",
-      "raw_units": "kg m-2",
-      "scale_factor": 0.1,
       "standard_name": "precipitation_amount",
       "units": "kg m-2"
     },
     "270": {
+      "_cf_variable_name": "precipitation_weight_q4",
+      "_corrected_units": "kg m-2",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "precipitation_weight_q4",
+      "_transformation": "op / 10 kg m-2",
       "original_units": "0.1 kg/m²",
       "original_variable": "Precipitation Gauge Weight per Unit Area (at minute 60)",
-      "raw_units": "kg m-2",
-      "scale_factor": 0.1,
       "standard_name": "precipitation_amount",
       "units": "kg m-2"
     },
     "271": {
+      "_cf_variable_name": "wind_speed_q1",
+      "_corrected_units": "km h-1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "wind_speed_q1",
-      "nc_units": "m s-1",
+      "_transformation": "op / 10 km h-1",
       "original_units": "0.1 km/h",
       "original_variable": "Wind Speed at 2 m (minutes 00-15)",
-      "raw_units": "km h-1",
-      "scale_factor": 0.1,
-      "standard_name": "wind_speed"
+      "standard_name": "wind_speed",
+      "units": "m s-1"
     },
     "272": {
+      "_cf_variable_name": "wind_speed_q2",
+      "_corrected_units": "km h-1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "wind_speed_q2",
-      "nc_units": "m s-1",
+      "_transformation": "op / 10 km h-1",
       "original_units": "0.1 km/h",
       "original_variable": "Wind Speed at 2 m (minutes 15-30)",
-      "raw_units": "km h-1",
-      "scale_factor": 0.1,
-      "standard_name": "wind_speed"
+      "standard_name": "wind_speed",
+      "units": "m s-1"
     },
     "273": {
+      "_cf_variable_name": "wind_speed_q3",
+      "_corrected_units": "km h-1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "wind_speed_q3",
-      "nc_units": "m s-1",
+      "_transformation": "op / 10 km h-1",
       "original_units": "0.1 km/h",
       "original_variable": "Wind Speed at 2 m (minutes 30-45)",
-      "raw_units": "km h-1",
-      "scale_factor": 0.1,
-      "standard_name": "wind_speed"
+      "standard_name": "wind_speed",
+      "units": "m s-1"
     },
     "274": {
+      "_cf_variable_name": "wind_speed_q4",
+      "_corrected_units": "km h-1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "wind_speed_q4",
-      "nc_units": "m s-1",
+      "_transformation": "op / 10 km h-1",
       "original_units": "0.1 km/h",
       "original_variable": "Wind Speed at 2 m (minutes 45-60)",
-      "raw_units": "km h-1",
-      "scale_factor": 0.1,
-      "standard_name": "wind_speed"
+      "standard_name": "wind_speed",
+      "units": "m s-1"
     },
     "275": {
+      "_cf_variable_name": "snd",
+      "_corrected_units": "cm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "snd_q4",
+      "_transformation": false,
       "original_units": "cm",
       "original_variable": "Snow Depth (at minute 60)",
-      "raw_units": "cm",
-      "scale_factor": 1,
       "standard_name": "surface_snow_thickness",
       "units": "m"
     },
     "276": {
+      "_cf_variable_name": "snd_q1",
+      "_corrected_units": "cm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "snd_q1",
+      "_transformation": false,
       "original_units": "cm",
       "original_variable": "Snow Depth (at minute 15)",
-      "raw_units": "cm",
-      "scale_factor": 1,
       "standard_name": "surface_snow_thickness",
       "units": "m"
     },
     "277": {
+      "_cf_variable_name": "snd_q2",
+      "_corrected_units": "cm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "snd_q2",
+      "_transformation": false,
       "original_units": "cm",
       "original_variable": "Snow Depth (at minute 30)",
-      "raw_units": "cm",
-      "scale_factor": 1,
       "standard_name": "surface_snow_thickness",
       "units": "m"
     },
     "278": {
+      "_cf_variable_name": "snd_q3",
+      "_corrected_units": "cm",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "snd_q3",
+      "_transformation": false,
       "original_units": "cm",
       "original_variable": "Snow Depth (at minute 45)",
-      "raw_units": "cm",
-      "scale_factor": 1,
       "standard_name": "surface_snow_thickness",
       "units": "m"
     },
     "279": {
+      "_cf_variable_name": "wind_dir",
+      "_corrected_units": "deg",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "wind_dir",
-      "nc_units": "deg",
+      "_transformation": false,
       "original_units": "Degrees",
       "original_variable": "Wind Direction at 2 m (minutes 50-60)",
-      "raw_units": "deg",
-      "scale_factor": 1,
-      "standard_name": "wind_direction"
+      "standard_name": "wind_direction",
+      "units": "deg"
     },
     "280": {
+      "_cf_variable_name": "wind_speed",
+      "_corrected_units": "km h-1",
+      "_invert_sign": false,
+      "_offset_time": false,
       "_table_name": [
         "HLY01_RCS"
       ],
-      "add_offset": 0,
-      "nc_name": "wind_speed",
+      "_transformation": "op / 10 km h-1",
       "original_units": "0.1 km/h",
       "original_variable": "Wind Speed at 2 m (minutes 50-60)",
-      "raw_units": "km h-1",
-      "scale_factor": 0.1,
       "standard_name": "wind_speed",
       "units": "m s-1"
     }
diff --git a/src/miranda/convert/data/eccc_cf_attrs.json b/src/miranda/convert/data/eccc_canswe_cf_attrs.json
similarity index 60%
rename from src/miranda/convert/data/eccc_cf_attrs.json
rename to src/miranda/convert/data/eccc_canswe_cf_attrs.json
index 4424ae76..4b48eb98 100644
--- a/src/miranda/convert/data/eccc_cf_attrs.json
+++ b/src/miranda/convert/data/eccc_canswe_cf_attrs.json
@@ -2,31 +2,32 @@
   "Header": {
     "Conventions": "CF-1.9",
     "_contact": {
-      "ec-canswe": "vincent.vionnet@canada.ca"
+      "eccc-canswe": "vincent.vionnet@canada.ca"
     },
     "_doi": {
-      "ec-canswe": "10.5281/zenodo.6638382"
+      "eccc-canswe": "10.5281/zenodo.6638382"
     },
     "_license": {
-      "ec-canswe": "https://open.canada.ca/en/open-government-licence-canada"
+      "eccc-canswe": "https://open.canada.ca/en/open-government-licence-canada"
     },
     "_miranda_version": true,
     "_reference": {
-      "ec-canswe": "https://zenodo.org/record/6638382"
+      "eccc-canswe": "https://zenodo.org/record/6638382"
     },
     "_source": {
-      "ec-canswe": "CanSWE"
+      "eccc-canswe": "CanSWE"
     },
     "_version": {
-      "ec-canswe": "v4"
+      "eccc-canswe": "v4"
     },
     "institution": "GovCan",
     "license_type": {
-      "ec-canswe": "permissive"
+      "eccc-canswe": "permissive"
     },
     "organisation": "ECCC",
     "processing_level": "raw",
     "realm": "atmos",
+    "source": "ECCC-CANSWE",
     "table_date": "2023-03-23",
     "table_id": "eccc",
     "type": "station-obs"
diff --git a/src/miranda/convert/data/ecmwf_cf_attrs.json b/src/miranda/convert/data/ecmwf_cf_attrs.json
index 1c080ac3..7acb0081 100644
--- a/src/miranda/convert/data/ecmwf_cf_attrs.json
+++ b/src/miranda/convert/data/ecmwf_cf_attrs.json
@@ -45,6 +45,7 @@
         "era5-land-monthly-means": 4
       },
       "axis": "Y",
+      "long_name": "Latitude",
       "standard_name": "latitude"
     },
     "longitude": {
@@ -54,6 +55,7 @@
         "era5-land-monthly-means": 4
       },
       "axis": "X",
+      "long_name": "Longitude",
       "standard_name": "longitude"
     },
     "time": {
@@ -71,7 +73,7 @@
       },
       "_strict_time": false,
       "axis": "T",
-      "long_name": "time",
+      "long_name": "Time",
       "standard_name": "time"
     }
   },
diff --git a/src/miranda/convert/data/espo-g6-e5l_attrs.json b/src/miranda/convert/data/espo-g6-e5l_attrs.json
index e4e76045..71a2c80a 100644
--- a/src/miranda/convert/data/espo-g6-e5l_attrs.json
+++ b/src/miranda/convert/data/espo-g6-e5l_attrs.json
@@ -14,6 +14,7 @@
     "domain": "NAM",
     "mip_era": "CMIP6",
     "processing_level": "biasadjusted",
+    "source": "ESPO-G6-E5L",
     "table_date": "2023-04-24",
     "table_id": "ESPO-G6-E5L",
     "type": "simulation",
diff --git a/src/miranda/convert/data/espo-g6-r2_attrs.json b/src/miranda/convert/data/espo-g6-r2_attrs.json
index ad57313f..c0e73f03 100644
--- a/src/miranda/convert/data/espo-g6-r2_attrs.json
+++ b/src/miranda/convert/data/espo-g6-r2_attrs.json
@@ -14,6 +14,7 @@
     "domain": "NAM",
     "mip_era": "CMIP6",
     "processing_level": "biasadjusted",
+    "source": "ESPO-G6-R2",
     "table_date": "2023-04-24",
     "table_id": "ESPO-G6-R2",
     "type": "simulation",
diff --git a/src/miranda/convert/data/nex-gddp-cmip6_attrs.json b/src/miranda/convert/data/nex-gddp-cmip6_attrs.json
index a58f29de..2e962b6e 100644
--- a/src/miranda/convert/data/nex-gddp-cmip6_attrs.json
+++ b/src/miranda/convert/data/nex-gddp-cmip6_attrs.json
@@ -12,6 +12,7 @@
     "domain": "QC",
     "mip_era": "CMIP6",
     "processing_level": "biasadjusted",
+    "source": "NASA-NEX-GDDP",
     "table_date": "2023-04-11",
     "table_id": "NEX-GDDP-CMIP6",
     "type": "simulation"
diff --git a/src/miranda/convert/deh.py b/src/miranda/convert/deh.py
index 3fa5acd1..72cbbeb7 100644
--- a/src/miranda/convert/deh.py
+++ b/src/miranda/convert/deh.py
@@ -25,7 +25,8 @@
     "variable_entry"
 ]
 
-# TODO: Some potentially useful attributes were skipped, because they would be complicated to include in a dataset since they vary per station
+# TODO: Some potentially useful attributes were skipped
+# because they would be complicated to include in a dataset since they vary per station
 meta_patterns = {
     "Station: ": "name",
     "Bassin versant: ": "bv",
diff --git a/src/miranda/convert/eccc.py b/src/miranda/convert/eccc_canswe.py
similarity index 96%
rename from src/miranda/convert/eccc.py
rename to src/miranda/convert/eccc_canswe.py
index becf0509..e788bc60 100644
--- a/src/miranda/convert/eccc.py
+++ b/src/miranda/convert/eccc_canswe.py
@@ -8,7 +8,7 @@
 import pandas as pd
 import xarray as xr
 
-from ._data_corrections import dataset_corrections
+from .corrections import dataset_corrections
 
 __all__ = ["convert_canswe"]
 
@@ -91,7 +91,7 @@ def parse_desc(desc: str) -> dict:
     ds.snd.attrs["ancillary_variables"] = "data_flag_snd qc_flag_snd"
     ds.snw.attrs["ancillary_variables"] = "data_flag_snw qc_flag_snw"
 
-    ds = dataset_corrections(ds, "ec-canswe")
+    ds = dataset_corrections(ds, "eccc-canswe")
     ds.attrs["frequency"] = "day"
     date = "-".join(ds.indexes["time"][[0, -1]].strftime("%Y%m"))
     for var in ["snd", "snw"]:
diff --git a/src/miranda/convert/eccc_rdrs.py b/src/miranda/convert/eccc_rdrs.py
index 6542b228..c1174bc3 100644
--- a/src/miranda/convert/eccc_rdrs.py
+++ b/src/miranda/convert/eccc_rdrs.py
@@ -4,24 +4,29 @@
 
 import logging.config
 import os
-from pathlib import Path
-from typing import Any
 
 import xarray as xr
-from numpy import unique
 
-from miranda.io import fetch_chunk_config, write_dataset_dict
 from miranda.scripting import LOGGING_CONFIG
-from miranda.units import get_time_frequency
 
-from ._aggregation import aggregate
-from ._data_corrections import dataset_conversion, load_json_data_mappings
-from ._data_definitions import gather_raw_rdrs_by_years, gather_rdrs
+# from pathlib import Path
+# from typing import Any
+
+
+# from numpy import unique
+
+
+# from miranda.treatments import load_json_data_mappings
+# from miranda.units import get_time_frequency
+#
+# from ._aggregation import aggregate
+# from ._data_definitions import gather_eccc_rdrs, gather_raw_rdrs_by_years
+# from .corrections import dataset_conversion
 
 logging.config.dictConfig(LOGGING_CONFIG)
 
 
-__all__ = ["convert_rdrs", "rdrs_to_daily"]
+# __all__ = ["convert_rdrs", "rdrs_to_daily"]
 
 
 # FIXME: Can we use `name_output_file` instead? We already have a better version of this function.
@@ -47,185 +52,186 @@ def _get_drop_vars(file: str | os.PathLike[str], *, keep_vars: list[str] | set[s
     return list(set(drop_vars) - set(keep_vars))
 
 
-def convert_rdrs(
-    project: str,
-    input_folder: str | os.PathLike[str],
-    output_folder: str | os.PathLike[str],
-    output_format: str = "zarr",
-    working_folder: str | os.PathLike[str] | None = None,
-    overwrite: bool = False,
-    cfvariable_list: list | None = None,
-    **dask_kwargs: dict[str, Any],
-) -> None:
-    r"""
-    Convert RDRS dataset.
-
-    Parameters
-    ----------
-    project : str
-        The project name.
-    input_folder : str or os.PathLike
-        The input folder.
-    output_folder : str or os.PathLike
-        The output folder.
-    output_format : {"netcdf", "zarr"}
-        The output format.
-    working_folder : str or os.PathLike, optional
-        The working folder.
-    overwrite : bool
-        Whether to overwrite existing files. Default: False.
-    cfvariable_list : list, optional
-        The CF variable list.
-    \*\*dask_kwargs : dict
-        Additional keyword arguments passed to the Dask scheduler.
-    """
-    # TODO: This setup configuration is near-universally portable. Should we consider applying it to all conversions?
-    var_attrs = load_json_data_mappings(project=project)["variables"]
-    if cfvariable_list:
-        var_attrs = {
-            v: var_attrs[v]
-            for v in var_attrs
-            if var_attrs[v]["_cf_variable_name"] in cfvariable_list
-        }
-    freq_dict = dict(h="hr", d="day")
-
-    if isinstance(input_folder, str):
-        input_folder = Path(input_folder).expanduser()
-    if isinstance(output_folder, str):
-        output_folder = Path(output_folder).expanduser()
-    if isinstance(working_folder, str):
-        working_folder = Path(working_folder).expanduser()
-
-    # FIXME: Do we want to collect everything? Maybe return a dictionary with years and associated files?
-    out_freq = None
-    gathered = gather_raw_rdrs_by_years(input_folder)
-    for year, ncfiles in gathered[project].items():
-        ds_allvars = None
-        if len(ncfiles) >= 28:
-            for nc in ncfiles:
-                ds1 = xr.open_dataset(nc, chunks="auto")
-                if ds_allvars is None and out_freq is None:
-                    ds_allvars = ds1
-                    out_freq, meaning = get_time_frequency(ds1)
-                    out_freq = (
-                        f"{out_freq[0]}{freq_dict[out_freq[1]]}"
-                        if meaning == "hour"
-                        else freq_dict[out_freq[1]]
-                    )
-                    ds_allvars.attrs["frequency"] = out_freq
-                else:
-                    ds_allvars = xr.concat(
-                        [ds_allvars, ds1], data_vars="minimal", dim="time"
-                    )
-            ds_allvars = ds_allvars.sel(time=f"{year}")
-            # This is the heart of the conversion utility; We could apply this to multiple projects.
-            for month in unique(ds_allvars.time.dt.month):
-                ds_month = ds_allvars.sel(time=f"{year}-{str(month).zfill(2)}")
-                for var_attr in var_attrs.keys():
-                    drop_vars = _get_drop_vars(
-                        ncfiles[0], keep_vars=[var_attr, "rotated_pole"]
-                    )
-                    ds_out = ds_month.drop_vars(drop_vars)
-                    ds_out = ds_out.assign_coords(rotated_pole=ds_out["rotated_pole"])
-                    ds_corr = dataset_conversion(
-                        ds_out,
-                        project=project,
-                        add_version_hashes=False,
-                        overwrite=overwrite,
-                    )
-                    chunks = fetch_chunk_config(
-                        priority="time", freq=out_freq, dims=ds_corr.dims
-                    )
-                    chunks["time"] = len(ds_corr.time)
-                    write_dataset_dict(
-                        {var_attrs[var_attr]["_cf_variable_name"]: ds_corr},
-                        output_folder=output_folder.joinpath(out_freq),
-                        temp_folder=working_folder,
-                        output_format=output_format,
-                        overwrite=overwrite,
-                        chunks=chunks,
-                        **dask_kwargs,
-                    )
+# FIXME: This looks like a utility function. Should it be moved to a utils module?
+# def convert_rdrs(
+#     project: str,
+#     input_folder: str | os.PathLike[str],
+#     output_folder: str | os.PathLike[str],
+#     output_format: str = "zarr",
+#     working_folder: str | os.PathLike[str] | None = None,
+#     overwrite: bool = False,
+#     cfvariable_list: list | None = None,
+#     **dask_kwargs: dict[str, Any],
+# ) -> None:
+#     r"""
+#     Convert RDRS dataset.
+#
+#     Parameters
+#     ----------
+#     project : str
+#         The project name.
+#     input_folder : str or os.PathLike
+#         The input folder.
+#     output_folder : str or os.PathLike
+#         The output folder.
+#     output_format : {"netcdf", "zarr"}
+#         The output format.
+#     working_folder : str or os.PathLike, optional
+#         The working folder.
+#     overwrite : bool
+#         Whether to overwrite existing files. Default: False.
+#     cfvariable_list : list, optional
+#         The CF variable list.
+#     \*\*dask_kwargs : dict
+#         Additional keyword arguments passed to the Dask scheduler.
+#     """
+#     # TODO: This setup configuration is near-universally portable. Should we consider applying it to all conversions?
+#     var_attrs = load_json_data_mappings(project=project)["variables"]
+#     if cfvariable_list:
+#         var_attrs = {
+#             v: var_attrs[v]
+#             for v in var_attrs
+#             if var_attrs[v]["_cf_variable_name"] in cfvariable_list
+#         }
+#     freq_dict = dict(h="hr", d="day")
+#
+#     if isinstance(input_folder, str):
+#         input_folder = Path(input_folder).expanduser()
+#     if isinstance(output_folder, str):
+#         output_folder = Path(output_folder).expanduser()
+#     if isinstance(working_folder, str):
+#         working_folder = Path(working_folder).expanduser()
+#
+#     # FIXME: Do we want to collect everything? Maybe return a dictionary with years and associated files?
+#     out_freq = None
+#     gathered = gather_raw_rdrs_by_years(input_folder)
+#     for year, ncfiles in gathered[project].items():
+#         ds_allvars = None
+#         if len(ncfiles) >= 28:
+#             for nc in ncfiles:
+#                 ds1 = xr.open_dataset(nc, chunks="auto")
+#                 if ds_allvars is None and out_freq is None:
+#                     ds_allvars = ds1
+#                     out_freq, meaning = get_time_frequency(ds1)
+#                     out_freq = (
+#                         f"{out_freq[0]}{freq_dict[out_freq[1]]}"
+#                         if meaning == "hour"
+#                         else freq_dict[out_freq[1]]
+#                     )
+#                     ds_allvars.attrs["frequency"] = out_freq
+#                 else:
+#                     ds_allvars = xr.concat(
+#                         [ds_allvars, ds1], data_vars="minimal", dim="time"
+#                     )
+#             ds_allvars = ds_allvars.sel(time=f"{year}")
+#             # This is the heart of the conversion utility; We could apply this to multiple projects.
+#             for month in unique(ds_allvars.time.dt.month):
+#                 ds_month = ds_allvars.sel(time=f"{year}-{str(month).zfill(2)}")
+#                 for var_attr in var_attrs.keys():
+#                     drop_vars = _get_drop_vars(
+#                         ncfiles[0], keep_vars=[var_attr, "rotated_pole"]
+#                     )
+#                     ds_out = ds_month.drop_vars(drop_vars)
+#                     ds_out = ds_out.assign_coords(rotated_pole=ds_out["rotated_pole"])
+#                     ds_corr = dataset_conversion(
+#                         ds_out,
+#                         project=project,
+#                         add_version_hashes=False,
+#                         overwrite=overwrite,
+#                     )
+#                     chunks = fetch_chunk_config(
+#                         priority="time", freq=out_freq, dims=ds_corr.dims
+#                     )
+#                     chunks["time"] = len(ds_corr.time)
+#                     write_dataset_dict(
+#                         {var_attrs[var_attr]["_cf_variable_name"]: ds_corr},
+#                         output_folder=output_folder.joinpath(out_freq),
+#                         temp_folder=working_folder,
+#                         output_format=output_format,
+#                         overwrite=overwrite,
+#                         chunks=chunks,
+#                         **dask_kwargs,
+#                     )
 
 
 # FIXME: This looks mostly like code to stage writing out files. Should it be moved to an IO module?
-def rdrs_to_daily(
-    project: str,
-    input_folder: str | os.PathLike,
-    output_folder: str | os.PathLike,
-    working_folder: str | os.PathLike | None = None,
-    overwrite: bool = False,
-    output_format: str = "zarr",
-    year_start: int | None = None,
-    year_end: int | None = None,
-    process_variables: list[str] | None = None,
-    **dask_kwargs: dict[str, Any],
-) -> None:
-    r"""
-    Write out RDRS files to daily-timestep files.
-
-    Parameters
-    ----------
-    project : str
-        The project name.
-    input_folder : str or os.PathLike
-        The input folder.
-    output_folder : str or os.PathLike
-        The output folder.
-    working_folder : str or os.PathLike
-        The working folder.
-    overwrite : bool
-        Whether to overwrite existing files. Default: False.
-    output_format : {"netcdf", "zarr"}
-        The output format.
-    year_start : int, optional
-        The start year.
-        If not provided, the minimum year in the dataset will be used.
-    year_end : int, optional
-        The end year.
-        If not provided, the maximum year in the dataset will be used.
-    process_variables : list of str, optional
-        The variables to process.
-        If not provided, all variables will be processed.
-    \*\*dask_kwargs : dict
-        Additional keyword arguments passed to the Dask scheduler.
-    """
-    if isinstance(input_folder, str):
-        input_folder = Path(input_folder).expanduser()
-    if isinstance(output_folder, str):
-        output_folder = Path(output_folder).expanduser()  # noqa
-    if isinstance(working_folder, str):
-        working_folder = Path(working_folder).expanduser()
-
-    # GATHER ALL RDRS FILES
-    gathered = gather_rdrs(project, input_folder, "zarr", "cf")
-    files = gathered["rdrs-v21"]  # noqa
-    if process_variables:
-        for vv in [f for f in files.keys() if f not in process_variables]:
-            files.pop(vv)
-    for vv, zarrs in files.items():
-        zarrs = sorted(zarrs)
-        if not year_start:
-            year_start = xr.open_zarr(zarrs[0]).time.dt.year.min().values
-        if not year_end:
-            year_end = xr.open_zarr(zarrs[-1]).time.dt.year.max().values
-        for year in range(year_start, year_end + 1):
-            infiles = [z for z in zarrs if f"_{year}" in z.name]
-            if len(infiles) != 12:
-                raise ValueError(f"Found {len(infiles)} input files. Expected 12.")
-            #
-            out_variables = aggregate(
-                xr.open_mfdataset(infiles, engine="zarr"), freq="day"
-            )
-            # FIXME: Fetch chunk config has been modified to accept different arguments.
-            chunks = fetch_chunk_config(project=project, freq="day")
-            chunks["time"] = len(out_variables[list(out_variables.keys())[0]].time)
-            write_dataset_dict(
-                out_variables,
-                output_folder=output_folder,
-                temp_folder=working_folder,
-                output_format=output_format,
-                overwrite=overwrite,
-                chunks=chunks,
-                **dask_kwargs,
-            )
+# def rdrs_to_daily(
+#     project: str,
+#     input_folder: str | os.PathLike,
+#     output_folder: str | os.PathLike,
+#     working_folder: str | os.PathLike | None = None,
+#     overwrite: bool = False,
+#     output_format: str = "zarr",
+#     year_start: int | None = None,
+#     year_end: int | None = None,
+#     process_variables: list[str] | None = None,
+#     **dask_kwargs: dict[str, Any],
+# ) -> None:
+#     r"""
+#     Write out RDRS files to daily-timestep files.
+#
+#     Parameters
+#     ----------
+#     project : str
+#         The project name.
+#     input_folder : str or os.PathLike
+#         The input folder.
+#     output_folder : str or os.PathLike
+#         The output folder.
+#     working_folder : str or os.PathLike
+#         The working folder.
+#     overwrite : bool
+#         Whether to overwrite existing files. Default: False.
+#     output_format : {"netcdf", "zarr"}
+#         The output format.
+#     year_start : int, optional
+#         The start year.
+#         If not provided, the minimum year in the dataset will be used.
+#     year_end : int, optional
+#         The end year.
+#         If not provided, the maximum year in the dataset will be used.
+#     process_variables : list of str, optional
+#         The variables to process.
+#         If not provided, all variables will be processed.
+#     \*\*dask_kwargs : dict
+#         Additional keyword arguments passed to the Dask scheduler.
+#     """
+#     if isinstance(input_folder, str):
+#         input_folder = Path(input_folder).expanduser()
+#     if isinstance(output_folder, str):
+#         output_folder = Path(output_folder).expanduser()  # noqa
+#     if isinstance(working_folder, str):
+#         working_folder = Path(working_folder).expanduser()
+#
+#     # GATHER ALL RDRS FILES
+#     gathered = gather_eccc_rdrs(project, input_folder, "zarr", "cf")
+#     files = gathered["rdrs-v21"]  # noqa
+#     if process_variables:
+#         for vv in [f for f in files.keys() if f not in process_variables]:
+#             files.pop(vv)
+#     for vv, zarrs in files.items():
+#         zarrs = sorted(zarrs)
+#         if not year_start:
+#             year_start = xr.open_zarr(zarrs[0]).time.dt.year.min().values
+#         if not year_end:
+#             year_end = xr.open_zarr(zarrs[-1]).time.dt.year.max().values
+#         for year in range(year_start, year_end + 1):
+#             infiles = [z for z in zarrs if f"_{year}" in z.name]
+#             if len(infiles) != 12:
+#                 raise ValueError(f"Found {len(infiles)} input files. Expected 12.")
+#             #
+#             out_variables = aggregate(
+#                 xr.open_mfdataset(infiles, engine="zarr"), freq="day"
+#             )
+#             # FIXME: Fetch chunk config has been modified to accept different arguments.
+#             chunks = fetch_chunk_config(project=project, freq="day")
+#             chunks["time"] = len(out_variables[list(out_variables.keys())[0]].time)
+#             write_dataset_dict(
+#                 out_variables,
+#                 output_folder=output_folder,
+#                 temp_folder=working_folder,
+#                 output_format=output_format,
+#                 overwrite=overwrite,
+#                 chunks=chunks,
+#                 **dask_kwargs,
+#             )
diff --git a/src/miranda/convert/melcc.py b/src/miranda/convert/melcc.py
index 3cadecd4..096748f6 100644
--- a/src/miranda/convert/melcc.py
+++ b/src/miranda/convert/melcc.py
@@ -23,13 +23,10 @@
 from xclim.core.units import convert_units_to, pint_multiply, str2pint
 
 from miranda import __version__
+from miranda.convert.corrections import dataset_corrections
 from miranda.scripting import LOGGING_CONFIG
-
-from ._data_corrections import (
-    dataset_corrections,
-    load_json_data_mappings,
-    metadata_conversion,
-)
+from miranda.treatments import metadata_conversion
+from miranda.treatments.utils import load_json_data_mappings
 
 logging.config.dictConfig(LOGGING_CONFIG)
 logger = logging.getLogger(__name__)
@@ -676,7 +673,7 @@ def convert_snow_table(
         )
 
     ds.attrs.update(frequency="2sem")
-    meta = load_json_data_mappings("melcc-snow")
+    meta = load_json_data_mappings("melcc")
     ds = metadata_conversion(ds, "melcc-snow", meta)
     date = "-".join(ds.indexes["time"][[0, -1]].strftime("%Y%m"))
     # Save
diff --git a/src/miranda/cv.py b/src/miranda/cv.py
index 396885bd..2ba4534c 100644
--- a/src/miranda/cv.py
+++ b/src/miranda/cv.py
@@ -1,4 +1,4 @@
-"""Controlled Vocabulary module."""
+"""ESGF Controlled Vocabulary module."""
 
 from __future__ import annotations
 
diff --git a/src/miranda/eccc/__init__.py b/src/miranda/eccc/__init__.py
index 4e05996d..507f571a 100644
--- a/src/miranda/eccc/__init__.py
+++ b/src/miranda/eccc/__init__.py
@@ -1,7 +1,3 @@
 """Environment and Climate Change Canada specialized conversion module."""
 
 from __future__ import annotations
-
-from ._homogenized import *
-from ._raw import *
-from ._summaries import *
diff --git a/src/miranda/eccc/_homogenized.py b/src/miranda/eccc/_homogenized.py
deleted file mode 100644
index daf28039..00000000
--- a/src/miranda/eccc/_homogenized.py
+++ /dev/null
@@ -1,286 +0,0 @@
-"""Adjusted and Homogenized Canadian Clime Data module."""
-
-from __future__ import annotations
-
-import calendar
-import logging.config
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-import xarray as xr
-from dask.diagnostics import ProgressBar
-
-from miranda.scripting import LOGGING_CONFIG
-
-from ._utils import cf_ahccd_metadata
-
-logging.config.dictConfig(LOGGING_CONFIG)
-logger = logging.Logger("miranda")
-
-__all__ = ["convert_ahccd", "convert_ahccd_fwf_files"]
-
-
-def convert_ahccd(
-    data_source: str | Path,
-    output_dir: str | Path,
-    variable: str,
-    generation: int | None = None,
-) -> None:
-    """Convert Adjusted and Homogenized Canadian Climate Dataset files.
-
-    Parameters
-    ----------
-    data_source: str or Path
-    output_dir: str or Path
-    variable: str
-    generation: int, optional
-
-    Returns
-    -------
-    None
-    """
-    output_dir = Path(output_dir).resolve().joinpath(variable)
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    code = dict(tasmax="dx", tasmin="dn", tas="dm", pr="dt", prsn="ds", prlp="dr").get(
-        variable
-    )
-    var, col_names, col_spaces, header_row, global_attrs = cf_ahccd_metadata(
-        code, generation
-    )
-    gen = {2: "Second", 3: "Third"}.get(generation)
-    if generation == 3 and code in {"dx", "dn", "dm"}:
-        meta = "ahccd_gen3_temperature.csv"
-    elif generation == 2 and code in {"dt", "ds", "dr"}:
-        meta = "ahccd_gen2_precipitation.csv"
-
-    else:
-        raise NotImplementedError(f"Code '{code} for generation {gen}.")
-    metadata_source = Path(__file__).resolve().parent.joinpath("data").joinpath(meta)
-
-    if "tas" in variable:
-        metadata = pd.read_csv(metadata_source, header=2)
-        metadata.columns = col_names.keys()
-        cols_specs = col_spaces
-
-    elif "pr" in variable:
-        metadata = pd.read_csv(metadata_source, header=3)
-        metadata.columns = col_names.keys()
-        cols_specs = col_spaces
-        for index, row in metadata.iterrows():
-            if isinstance(row["stnid"], str):
-                metadata.loc[index, "stnid"] = metadata.loc[index, "stnid"].replace(
-                    " ", ""
-                )
-    else:
-        raise KeyError(f"{variable} does not include 'pr' or 'tas'.")
-
-    # Convert station .txt files to netcdf
-    for ff in Path(data_source).glob("*d*.txt"):
-        outfile = output_dir.joinpath(ff.name.replace(".txt", ".nc"))
-        if not outfile.exists():
-            logger.info(ff.name)
-
-            stid = ff.name.replace(code, "").split(".txt")[0]
-            try:
-                metadata_st = metadata[metadata["stnid"] == int(stid)]
-            except ValueError:
-                metadata_st = metadata[metadata["stnid"] == stid]
-
-            if len(metadata_st) == 1:
-                ds_out = convert_ahccd_fwf_files(
-                    ff, metadata_st, variable, generation, cols_specs, var
-                )
-                ds_out.attrs = global_attrs
-
-                ds_out.to_netcdf(outfile, engine="h5netcdf")
-            else:
-                msg = f"metadata info for station {ff.name} not found : skipping"
-
-                logger.warning(msg)
-
-    # merge individual stations to single .nc file
-    # variable
-    ncfiles = list(output_dir.glob("*.nc"))
-    outfile = output_dir.parent.joinpath(
-        "merged_stations", f"ahccd_gen{generation}_{variable}.nc"
-    )
-
-    if not outfile.exists():
-        logger.info("merging stations :", variable)
-        with ProgressBar():
-            ds_ahccd = xr.open_mfdataset(
-                ncfiles, concat_dim="station", combine="nested"
-            ).load()
-
-            for coord in ds_ahccd.coords:
-                # xarray object datatypes mix string and int (e.g. stnid) convert to string for merged nc files
-                # Do not apply to datetime object
-                if coord != "time" and ds_ahccd[coord].dtype == "O":
-                    ds_ahccd[coord] = ds_ahccd[coord].astype(str)
-
-            for v in ds_ahccd.data_vars:
-                # xarray object datatypes mix string and int (e.g. stnid) convert to string for merged nc files
-                # Do not apply to flag timeseries
-                if ds_ahccd[v].dtype == "O" and "flag" not in v:
-                    logger.info(v)
-                    ds_ahccd[v] = ds_ahccd[v].astype(str)
-
-            ds_ahccd[f"{variable}_flag"].attrs[
-                "long_name"
-            ] = f"{ds_ahccd[f'{variable}'].attrs['long_name']} flag"
-            ds_ahccd.lon.attrs["units"] = "degrees_east"
-            ds_ahccd.lon.attrs["long_name"] = "longitude"
-            ds_ahccd.lat.attrs["units"] = "degrees_north"
-            ds_ahccd.lat.attrs["long_name"] = "latitude"
-
-            for clean_name, orig_name in col_names.items():
-                if clean_name in ["lat", "long"]:
-                    continue
-                ds_ahccd[clean_name].attrs["long_name"] = orig_name
-
-            outfile.parent.mkdir(parents=True, exist_ok=True)
-            ds_ahccd.to_netcdf(
-                outfile, engine="h5netcdf", format="NETCDF4_CLASSIC", mode="w"
-            )
-
-            del ds_ahccd
-    for nc in outfile.parent.glob("*.nc"):
-        logger.info(nc)
-        ds = xr.open_dataset(nc)
-        logger.info(ds)
-
-
-def convert_ahccd_fwf_files(
-    ff: Path | str,
-    metadata: pd.DataFrame,
-    variable: str,
-    generation: int | None = None,
-    cols_specs: list[tuple[int, int]] | None = None,
-    attrs: dict | None = None,
-) -> xr.Dataset:
-    """Convert AHCCD fixed-width files.
-
-    Parameters
-    ----------
-    ff: str or Path
-    metadata: pandas.DataFrame
-    variable: str
-    generation
-    cols_specs
-    attrs
-
-    Returns
-    -------
-    xarray.Dataset
-    """
-    code = dict(tasmax="dx", tasmin="dn", tas="dm", pr="dt", prsn="ds", prlp="dr").get(
-        variable
-    )
-
-    if attrs is None:
-        attrs, _, _, _, _ = cf_ahccd_metadata(code, generation)
-    if cols_specs is None:
-        _, _, cols_specs, _, _ = cf_ahccd_metadata(code, generation)
-    _, _, _, nhead, _ = cf_ahccd_metadata(code, generation)
-
-    df = pd.read_fwf(ff, header=nhead, colspecs=cols_specs)
-    if "pr" in variable:
-        cols = list(df.columns[0:3])
-        cols = cols[0::2]
-        cols.extend(list(df.columns[4::2]))
-        flags = list(df.columns[5::2])
-        dfflags = df[flags]
-    else:
-        cols = [c for c in df.columns if "Unnamed" not in c]
-        flags = [c for c in df.columns if "Unnamed" in c]
-        dfflags = df[flags[2:]]
-
-    df = df[cols]
-    df.replace(attrs["NaN_value"], np.NaN, inplace=True)
-
-    for i, j in enumerate(["Year", "Month"]):
-        df = df.rename(columns={df.columns[i]: j})
-    start_date = f"{df['Year'][0]}-{str(df['Month'][0]).zfill(2)}-01"
-
-    _, ndays = calendar.monthrange(df["Year"].iloc[-1], df["Month"].iloc[-1])
-    end_date = f"{df['Year'].iloc[-1]}-{str(df['Month'].iloc[-1]).zfill(2)}-{str(ndays).zfill(2)}"
-    time1 = pd.date_range(start=start_date, end=end_date)
-
-    index = pd.MultiIndex.from_arrays([df["Year"], df["Month"]])
-    df.index = index
-    dfflags.index = index
-    cols = [c for c in df.columns if "Year" not in c and "Month" not in c]
-    df = df[cols]
-    df.columns = np.arange(1, 32)
-    dfflags.columns = np.arange(1, 32)
-    ds = df.stack().to_frame()
-    ds = ds.rename(columns={0: variable})
-    ds_flag = dfflags.stack().to_frame()
-    ds_flag = ds_flag.rename(columns={0: "flag"})
-    ds.index.names = ["Year", "Month", "Day"]
-    ds_flag.index.names = ["Year", "Month", "Day"]
-    ds[f"{variable}_flag"] = ds_flag["flag"]
-    del ds_flag
-
-    # find invalid dates
-    for y in time1.year.unique():
-        for m in (
-            ds[ds.index.get_level_values("Year") == y]
-            .index.get_level_values("Month")
-            .unique()
-        ):
-            _, exp_ndays = calendar.monthrange(y, m)
-            ndays = (
-                (ds.index.get_level_values("Year") == y)
-                & (ds.index.get_level_values("Month") == m)
-            ).sum()
-            if ndays > np.int(exp_ndays):
-                print(f"year {y}, month {m}, ndays={ndays}, exp_ndays={exp_ndays}")
-                raise RuntimeError("Unknown days present.")
-
-    time_ds = pd.DataFrame(
-        {
-            "year": ds.index.get_level_values("Year"),
-            "month": ds.index.get_level_values("Month"),
-            "day": ds.index.get_level_values("Day"),
-        }
-    )
-
-    ds.index = pd.to_datetime(time_ds)
-
-    ds = ds.to_xarray().rename({"index": "time"})
-
-    ds_out = xr.Dataset(coords={"time": time1})
-    for v in ds.data_vars:
-        ds_out[v] = ds[v]
-
-    ds_out[variable].attrs = attrs
-    # ds_out
-    metadata = metadata.to_xarray().rename({"index": "station"}).drop_vars("station")
-    metadata = metadata.assign_coords(
-        {
-            "stnid": metadata["stnid"].astype(str),
-            "station_name": metadata["station_name"],
-        }
-    )
-    # ds_out = ds_out.assign_coords({'lon': metadata['long'], 'lat': metadata['lat'], 'elevation': metadata['elev']})
-    #
-    ds_out = ds_out.assign_coords(station=metadata.stnid)
-    metadata = metadata.drop_vars(["stnid", "station_name"])
-
-    ds_out["lon"] = metadata["long"]
-    ds_out["lon"].attrs["units"] = "degrees_east"
-    ds_out["lat"] = metadata["lat"]
-    ds_out["lat"].attrs["units"] = "degrees_north"
-    ds_out["elev"] = metadata["elev"]
-    ds_out["elev"].attrs["units"] = "m"
-
-    metadata = metadata.drop_vars(["long", "lat", "elev"])
-    for vv in metadata.data_vars:
-        if metadata[vv].dtype == "O" and (variable not in vv):
-            ds_out[vv] = metadata[vv].astype(str)
-        else:
-            ds_out[vv] = metadata[vv]
-    return ds_out
diff --git a/src/miranda/eccc/_raw.py b/src/miranda/eccc/_raw.py
deleted file mode 100644
index 9b6f1f63..00000000
--- a/src/miranda/eccc/_raw.py
+++ /dev/null
@@ -1,968 +0,0 @@
-######################################################################
-# S.Biner, Ouranos, mai 2019
-#
-# methodologie
-#
-# 1) on rassemble les fichiers netcdf des differentes eccc en un seul fichier netCDF.
-#
-# 2) on scan les fichiers sources annuels en cherchant une variable et on sauve
-# ce qu'on trouve dans des fichiers netcdf. On applique aussi les flags
-# et on fait les changements d'unites
-#
-# obtenu via http://climate.weather.gc.ca/index_e.html en cliquant sur 'about the data'
-#######################################################################
-from __future__ import annotations
-
-import contextlib
-import functools
-import logging
-import multiprocessing as mp
-import os
-import re
-import sys
-import tempfile
-import time
-from calendar import monthrange
-from datetime import datetime as dt
-from logging import config
-from pathlib import Path
-from typing import Optional
-from urllib.error import HTTPError
-
-import dask.dataframe as dd
-import numpy as np
-import pandas as pd
-import xarray as xr
-from dask.diagnostics import ProgressBar
-from xclim.core.units import convert_units_to
-
-from miranda.archive import group_by_length
-from miranda.scripting import LOGGING_CONFIG
-from miranda.storage import file_size, report_file_size
-from miranda.units import GiB, MiB
-from miranda.utils import generic_extract_archive
-
-from ._utils import cf_station_metadata
-
-config.dictConfig(LOGGING_CONFIG)
-
-__all__ = [
-    "aggregate_stations",
-    "convert_flat_files",
-    "merge_converted_variables",
-]
-
-TABLE_DATE = dt.now().strftime("%d %B %Y")
-
-
-def load_station_metadata(meta: str | os.PathLike) -> xr.Dataset:
-    if meta:
-        df_inv = pd.read_csv(meta, header=0)
-    else:
-        try:
-            import geopandas as gpd
-
-            station_metadata_url = "https://api.weather.gc.ca/collections/climate-stations/items?f=json&limit=15000000"
-            df_inv = gpd.read_file(station_metadata_url)
-        except HTTPError as err:
-            raise RuntimeError(
-                f"Station metadata table unable to be fetched. Considering downloading directly: {err}"
-            )
-    df_inv["LONGITUDE"] = df_inv.geometry.x
-    df_inv["LATITUDE"] = df_inv.geometry.y
-    df_inv["ELEVATION"] = df_inv.ELEVATION.astype(float)
-    df_inv["CLIMATE_IDENTIFIER"] = df_inv["CLIMATE_IDENTIFIER"].astype(str)
-
-    df_inv = df_inv.drop(["geometry"], axis=1)
-    return df_inv.to_xarray()
-
-
-def _remove_duplicates(ds):
-    if any(ds.get_index("time").duplicated()):
-        msg = (
-            f"Found {ds.get_index('time').duplicated().sum()} duplicated time coordinates "
-            f"for station {ds.station_id.values}. Assuming first value."
-        )
-        logging.info(msg)
-    return ds.sel(time=~ds.get_index("time").duplicated())
-
-
-def _convert_station_file(
-    fichier: Path,
-    output_path: Path,
-    errored_files: list[Path],
-    mode: str,
-    add_offset: float,
-    column_dtypes: list[str],
-    column_names: list[str],
-    long_name: str,
-    missing_flags: set[str],
-    missing_values: set[str],
-    nc_name: str,
-    raw_units: str,
-    units: str,
-    scale_factor: float,
-    standard_name: str,
-    variable_code: str,
-    **kwargs,
-):
-    if mode.lower() in ["h", "hour", "hourly"]:
-        num_observations = 24
-        column_widths = [7, 4, 2, 2, 3] + [6, 1] * num_observations
-    elif mode.lower() in ["d", "day", "daily"]:
-        num_observations = 31
-        column_widths = [7, 4, 2, 3] + [6, 1] * num_observations
-    else:
-        raise NotImplementedError("`mode` must be 'h'/'hourly or 'd'/'daily'.")
-
-    if not missing_values:
-        missing_values = {-9999, "#####"}
-
-    with tempfile.TemporaryDirectory() as temp_folder:
-        if fichier.suffix in [".gz", ".tar", ".zip", ".7z"]:
-            data_files = generic_extract_archive(fichier, output_dir=temp_folder)
-        else:
-            data_files = [fichier]
-        msg = f"Processing file: {fichier}."
-        logging.info(msg)
-
-        size_limit = 1 * GiB
-
-        for data in data_files:
-            if file_size(data) > size_limit and "dask" in sys.modules:
-                msg = f"File exceeds {report_file_size(size_limit)} - Using dask.dataframes."
-
-                logging.info(msg)
-                pandas_reader = dd
-                using_dask_array = True
-                chunks = dict(blocksize=200 * MiB)
-                client = ProgressBar
-            else:
-                msg = f"File below {report_file_size(size_limit)} - Using pandas.dataframes."
-
-                logging.info(msg)
-                pandas_reader = pd
-                chunks = dict()
-                using_dask_array = False
-                client = contextlib.nullcontext
-
-            with client() as c:
-                # Create a dataframe from the files
-                try:
-                    df = pandas_reader.read_fwf(
-                        data,
-                        widths=column_widths,
-                        names=column_names,
-                        dtype={
-                            name: data_type
-                            for name, data_type in zip(column_names, column_dtypes)
-                        },
-                        assume_missing=True,
-                        **chunks,
-                    )
-                    if using_dask_array:
-                        df = c.persist(df)
-
-                except FileNotFoundError:
-                    msg = f"File {data} was not found."
-                    logging.error(msg)
-                    errored_files.append(data)
-                    return
-
-                except UnicodeDecodeError:
-                    msg = (
-                        f"File {data.name} was unable to be read. "
-                        f"This is probably an issue with the file."
-                    )
-                    logging.error(msg)
-                    errored_files.append(data)
-                    return
-
-                # Loop through the station codes
-                station_codes = df["code"].unique()
-                for code in station_codes:
-                    df_code = df[df["code"] == code]
-
-                    # Abort if the variable is not found
-                    if using_dask_array:
-                        has_variable_codes = (
-                            (df_code["code_var"] == variable_code).compute()
-                        ).any()
-                    else:
-                        has_variable_codes = (
-                            df_code["code_var"] == variable_code
-                        ).any()
-                    if not has_variable_codes:
-                        msg = f"Variable `{nc_name}` not found for station code: {code} in file {data}. Continuing..."
-
-                        logging.info(msg)
-                        continue
-
-                    # Perform the data treatment
-                    msg = f"Converting `{nc_name}` for station code: {code}."
-                    logging.info(msg)
-
-                    # Dump the data into a DataFrame
-                    df_var = df_code[df_code["code_var"] == variable_code].copy()
-
-                    # Mask the data according to the missing values flag
-                    df_var = df_var.replace(missing_values, np.nan)
-
-                    # Decode the values and flags
-                    dfd = df_var.loc[
-                        :, [f"D{i:0n}" for i in range(1, num_observations + 1)]
-                    ]
-                    dff = df_var.loc[
-                        :, [f"F{i:0n}" for i in range(1, num_observations + 1)]
-                    ]
-
-                    # Remove the "NaN" flag
-                    dff = dff.fillna("")
-
-                    # Use the flag to mask the values
-                    try:
-                        val = np.asarray(dfd.values, float)
-                    except ValueError as e:
-                        msg = f"Issues with {dfd}. Continuing..."
-                        logging.error(msg)
-                        continue
-                    try:
-                        flag = np.asarray(dff.values, str)
-                    except ValueError:
-                        msg = f"Issues with {dff}. Continuing..."
-                        logging.error(msg)
-                        continue
-                    mask = np.isin(flag, missing_flags)
-                    val[mask] = np.nan
-
-                    # Treat according to units conversions
-                    val = val * scale_factor + add_offset
-
-                    # Create the DataArray
-                    date_summations = dict(time=list())
-                    if mode == "hourly":
-                        for index, row in df_var.iterrows():
-                            period = pd.Period(
-                                year=row.year, month=row.month, day=row.day, freq="D"
-                            )
-                            dates = pd.Series(
-                                pd.date_range(
-                                    start=period.start_time,
-                                    end=period.end_time,
-                                    freq="H",
-                                )
-                            )
-                            date_summations["time"].extend(dates)
-                        written_values = val.flatten()
-                        written_flags = flag.flatten()
-                    elif mode == "daily":
-                        value_days = list()
-                        flag_days = list()
-                        for i, (index, row) in enumerate(df_var.iterrows()):
-                            period = pd.Period(year=row.year, month=row.month, freq="M")
-                            dates = pd.Series(
-                                pd.date_range(
-                                    start=period.start_time,
-                                    end=period.end_time,
-                                    freq="D",
-                                )
-                            )
-                            date_summations["time"].extend(dates)
-
-                            value_days.extend(
-                                val[i][
-                                    range(monthrange(int(row.year), int(row.month))[1])
-                                ]
-                            )
-                            flag_days.extend(
-                                flag[i][
-                                    range(monthrange(int(row.year), int(row.month))[1])
-                                ]
-                            )
-                        written_values = value_days
-                        written_flags = flag_days
-
-                    ds = xr.Dataset()
-                    da_val = xr.DataArray(
-                        written_values, coords=date_summations, dims=["time"]
-                    )
-
-                    if raw_units != units:
-                        da_val.attrs["units"] = raw_units
-                        da_val = convert_units_to(da_val, units)
-                    else:
-                        da_val.attrs["units"] = units
-
-                    da_val = da_val.rename(nc_name)
-                    variable_attributes = dict(
-                        variable_code=variable_code,
-                        standard_name=standard_name,
-                        long_name=long_name,
-                    )
-                    if "original_units" in kwargs:
-                        variable_attributes["original_units"] = kwargs["original_units"]
-                    da_val.attrs.update(variable_attributes)
-
-                    da_flag = xr.DataArray(
-                        written_flags, coords=date_summations, dims=["time"]
-                    )
-                    da_flag = da_flag.rename("flag")
-                    flag_attributes = dict(
-                        long_name="data flag",
-                        note="See ECCC technical documentation for details",
-                    )
-                    da_flag.attrs.update(flag_attributes)
-
-                    ds[nc_name] = da_val
-                    ds["flag"] = da_flag
-
-                    # save the file in NetCDF format
-                    start_year = ds.time.dt.year.values[0]
-                    end_year = ds.time.dt.year.values[-1]
-
-                    station_folder = output_path.joinpath(str(code))
-                    station_folder.mkdir(parents=True, exist_ok=True)
-
-                    f_nc = (
-                        f"{code}_{variable_code}_{nc_name}_"
-                        f"{start_year if start_year == end_year else '_'.join([str(start_year), str(end_year)])}.nc"
-                    )
-
-                    if station_folder.joinpath(f_nc).exists():
-                        msg = f"File `{f_nc}` already exists. Continuing..."
-                        logging.warning(msg)
-
-                    history = (
-                        f"{dt.now().strftime('%Y-%m-%d %X')} converted from flat station file "
-                        f"(`{fichier.name}`) to n-dimensional array."
-                    )
-
-                    # TODO: This info should eventually be sourced from a JSON definition
-                    global_attrs = dict(
-                        Conventions="CF-1.8",
-                        comment="Acquired on demand from data specialists at "
-                        "ECCC Climate Services / Services Climatiques.",
-                        contact="John Richard",
-                        contact_email="climatcentre-climatecentral@ec.gc.ca",
-                        domain="CAN",
-                    )
-                    if mode == "hourly":
-                        global_attrs.update(dict(frequency="1hr"))
-                    elif mode == "daily":
-                        global_attrs.update(dict(frequency="day"))
-                    global_attrs.update(
-                        dict(
-                            history=history,
-                            internal_comment=f"Converted by {os.environ.get('USER', os.environ.get('USERNAME'))}.",
-                            institution="ECCC",
-                            license="https://climate.weather.gc.ca/prods_servs/attachment1_e.html",
-                            member=code,
-                            processing_level="raw",
-                            redistribution="Redistribution permitted.",
-                            references="https://climate.weather.gc.ca/doc/Technical_Documentation.pdf",
-                            source="historical-station-records",
-                            table_date=TABLE_DATE,
-                            title="Environment and Climate Change Canada (ECCC) weather station observations",
-                            type="station-obs",
-                            usage="The original data is owned by the Government of Canada (Environment and Climate "
-                            "Change Canada), and falls under the licence agreement for use of Environment and "
-                            "Climate Change Canada data",
-                            variable=str(nc_name),
-                            version=f"v{dt.now().strftime('%Y.%m.%V')}",  # Year.Month.Week
-                        )
-                    )
-                    ds.attrs.update(global_attrs)
-
-                    msg = f"Exporting to: {station_folder.joinpath(f_nc)}"
-                    logging.info(msg)
-                    ds.to_netcdf(station_folder.joinpath(f_nc))
-                    del ds
-                    del val
-                    del mask
-                    del flag
-                    del da_val
-                    del da_flag
-                    del dfd
-                    del dff
-                    del written_values
-                    del written_flags
-                    del date_summations
-
-                del df
-
-        if os.listdir(temp_folder):
-            for temporary_file in Path(temp_folder).glob("*"):
-                if temporary_file in data_files:
-                    temporary_file.unlink()
-
-
-def convert_flat_files(
-    source_files: str | os.PathLike,
-    output_folder: str | os.PathLike | list[str | int],
-    variables: str | int | list[str | int],
-    mode: str = "hourly",
-    n_workers: int = 4,
-) -> None:
-    """Convert flat formatted files.
-
-    Parameters
-    ----------
-    source_files : str or Path
-    output_folder : str or Path
-    variables : str or List[str]
-    mode : {"hourly", "daily"}
-    n_workers : int
-
-    Returns
-    -------
-    None
-    """
-    func_time = time.time()
-
-    if mode.lower() in ["h", "hour", "hourly"]:
-        num_observations = 24
-        column_names = ["code", "year", "month", "day", "code_var"]
-        column_dtypes = [str, float, float, float, str]
-    elif mode.lower() in ["d", "day", "daily"]:
-        num_observations = 31
-        column_names = ["code", "year", "month", "code_var"]
-        column_dtypes = [str, float, float, str]
-    else:
-        raise NotImplementedError("`mode` must be 'h'/'hourly or 'd'/'daily'.")
-
-    # Preparing the data column headers
-    for i in range(1, num_observations + 1):
-        data_entry, flag_entry = f"D{i:0n}", f"F{i:0n}"
-        column_names.append(data_entry)
-        column_names.append(flag_entry)
-        column_dtypes.extend([str, str])
-
-    if isinstance(variables, (str, int)):
-        variables = [variables]
-
-    for variable_code in variables:
-        variable_code = str(variable_code).zfill(3)
-        metadata = cf_station_metadata(variable_code)
-        nc_name = metadata["nc_name"]
-
-        rep_nc = Path(output_folder).joinpath(nc_name)
-        rep_nc.mkdir(parents=True, exist_ok=True)
-
-        # Loop on the files
-        msg = (
-            f"Collecting files for variable '{metadata['standard_name']}' "
-            f"(filenames containing '{metadata['_table_name']}')."
-        )
-        logging.info(msg)
-        list_files = list()
-        if isinstance(source_files, list) or Path(source_files).is_file():
-            list_files.append(source_files)
-        else:
-            glob_patterns = [g for g in metadata["_table_name"]]
-            for pattern in glob_patterns:
-                list_files.extend(
-                    [f for f in Path(source_files).rglob(f"{pattern}*") if f.is_file()]
-                )
-        manager = mp.Manager()
-        errored_files = manager.list()
-        converter_func = functools.partial(
-            _convert_station_file,
-            output_path=rep_nc,
-            errored_files=errored_files,
-            mode=mode,
-            variable_code=variable_code,
-            column_names=column_names,
-            column_dtypes=column_dtypes,
-            **metadata,
-        )
-        with mp.Pool(processes=n_workers) as pool:
-            pool.map(converter_func, list_files)
-            pool.close()
-            pool.join()
-
-        if errored_files:
-            msg = "Some files failed to be properly parsed:\n", ", ".join(errored_files)
-
-            logging.warning(msg)
-
-    msg = f"Process completed in {time.time() - func_time:.2f} seconds"
-    logging.warning()
-
-
-def aggregate_stations(
-    source_files: str | os.PathLike | None = None,
-    output_folder: str | os.PathLike | None = None,
-    time_step: str | None = None,
-    variables: str | int | list[str | int] | None = None,
-    include_flags: bool = True,
-    groupings: int | None = None,
-    mf_dataset_freq: str | None = None,
-    temp_directory: str | os.PathLike | None = None,
-    n_workers: int = 1,
-) -> None:
-    """Aggregate stations.
-
-    Parameters
-    ----------
-    source_files : str or Path
-        Source files to be aggregated.
-    output_folder : str or Path
-        Output folder for the aggregated files.
-    variables : str or int or list of str or int, optional
-        The variable codes to be aggregated.
-    time_step : {"hourly", "daily"}
-        The time step to be used for aggregation.
-    include_flags : bool
-        Include flags in the output files.
-    groupings : int
-        The number of files in each group used for converting to multi-file Datasets.
-    mf_dataset_freq : str, optional
-        Resampling frequency for creating output multi-file Datasets. E.g. 'YS': 1 year per file, '5YS': 5 years per file.
-    temp_directory : str or Path, optional
-        Use another temporary directory location in case default location is not spacious enough.
-    n_workers : int
-        The number of workers to use.
-
-    Returns
-    -------
-    None
-    """
-    func_time = time.time()
-
-    if isinstance(source_files, str):
-        source_files = Path(source_files)
-
-    if time_step.lower() in ["h", "hour", "hourly"]:
-        mode = "hourly"
-    elif time_step.lower() in ["d", "day", "daily"]:
-        mode = "daily"
-    else:
-        raise ValueError("Time step must be `h` / `hourly` or `d` / `daily`.")
-
-    if isinstance(variables, list):
-        pass
-    elif isinstance(variables, (str, int)):
-        variables = [variables]
-    # TODO: have the variable gathered from a JSON file
-    elif variables is None:
-        if mode == "hourly":
-            variables = [
-                89,
-                94,
-                123,
-            ]
-            variables.extend(range(76, 81))
-            variables.extend(range(262, 281))
-        elif mode == "daily":
-            variables = [1, 2, 3]
-            variables.extend(range(10, 26))
-    else:
-        raise NotImplementedError()
-
-    for variable_code in variables:
-        info = cf_station_metadata(variable_code)
-        variable_name = info["nc_name"]
-        msg = f"Merging `{variable_name}` using `{time_step}` time step."
-        logging.info(msg)
-
-        # Only perform aggregation on available data with corresponding metadata
-        logging.info("Performing glob and sort.")
-        nc_list = [str(nc) for nc in source_files.joinpath(variable_name).rglob("*.nc")]
-
-        if not groupings:
-            groupings = max(n_workers**2, 4)
-
-        if nc_list:
-            nc_lists = group_by_length(nc_list, groupings)
-
-            with tempfile.TemporaryDirectory(
-                prefix="eccc", dir=temp_directory
-            ) as temp_dir:
-                combinations = sorted(
-                    (ii, nc, temp_dir, len(nc_lists)) for ii, nc in enumerate(nc_lists)
-                )
-
-                with mp.Pool(processes=n_workers) as pool:
-                    pool.starmap(_tmp_zarr, combinations)
-                    pool.close()
-                    pool.join()
-
-                zarrs_found = [f for f in Path(temp_dir).glob("*.zarr")]
-                msg = f"Found {len(zarrs_found)} intermediary aggregation files."
-
-                logging.info(msg)
-
-                ds = xr.open_mfdataset(
-                    zarrs_found,
-                    engine="zarr",
-                    combine="nested",
-                    concat_dim={"station"},
-                )
-
-                if ds:
-                    station_file_codes = [Path(x).name.split("_")[0] for x in nc_list]
-                    if not include_flags:
-                        drop_vars = [vv for vv in ds.data_vars if "flag" in vv]
-                        ds = ds.drop_vars(drop_vars)
-                    ds = ds.sortby(ds.station_id, "time")
-
-                # Rearrange column order to have lon, lat, elev first
-                # # FIXME: This doesn't work as intended - Assign coordinates instead
-                # cols = meta.columns.tolist()
-                # cols1 = [
-                #     "latitude",
-                #     "longitude",
-                #     "elevation",
-                # ]
-                # for rr in cols1:
-                #     cols.remove(rr)
-                # cols1.extend(cols)
-                # meta = meta[cols1]
-                # meta.index.rename("station", inplace=True)
-                # meta = meta.to_xarray()
-                # meta.sortby(meta["climate_identifier"])
-                # meta = meta.assign({"station": ds.station.values})
-
-                # np.testing.assert_array_equal(
-                #     sorted(meta["climate_identifier"].values), sorted(ds.station_id.values)
-                # )
-                # for vv in meta.data_vars:
-                #     ds = ds.assign_coords({vv: meta[vv]})
-                # ds = xr.merge([ds, meta])
-                # ds.attrs = attrs1
-
-                # export done within tmddir context otherwise data is erased before final export!!
-                valid_stations = list(sorted(ds.station_id.values))
-                valid_stations_count = len(valid_stations)
-
-                msg = f"Processing stations for variable `{variable_name}`."
-                logging.info(msg)
-
-                if len(station_file_codes) == 0:
-                    msg = f"No stations were found containing variable filename `{variable_name}`. Exiting."
-                    logging.error(msg)
-                    return
-
-                msg = (
-                    f"Files exist for {len(station_file_codes)} ECCC stations. "
-                    f"Metadata found for {valid_stations_count} stations. "
-                )
-                logging.info(msg)
-
-                # FIXME: Is this still needed?
-                # logging.info("Preparing the NetCDF time period.")
-                # Create the time period timestamps
-                # year_start = ds.time.dt.year.min().values
-                # year_end = ds.time.dt.year.max().values
-
-                # Calculate the time index dimensions of the output NetCDF
-                # time_index = pd.date_range(
-                #     start=f"{year_start}-01-01",
-                #     end=f"{year_end + 1}-01-01",
-                #     freq=mode[0].capitalize(),
-                # )[:-1]
-                # logging.info(
-                #     f"Number of ECCC stations: {valid_stations_count}, time steps: {time_index.size}."
-                # )
-
-                Path(output_folder).mkdir(parents=True, exist_ok=True)
-                file_out = Path(output_folder).joinpath(f"{variable_name}_eccc_{mode}")
-
-                ds = ds.assign_coords(station=range(0, len(ds.station))).sortby("time")
-                if mf_dataset_freq is not None:
-                    # output mf_dataset using resampling frequency
-                    _, datasets = zip(*ds.resample(time=mf_dataset_freq))
-                else:
-                    datasets = [ds]
-
-                paths = [
-                    f"{file_out}_{data.time.dt.year.min().values}-{data.time.dt.year.max().values}.nc"
-                    for data in datasets
-                ]
-
-                # FIXME: chunks need to be dealt with
-                # chunks = [1, len(ds.time)]
-                # comp = dict(zlib=True, complevel=5)  # , chunk sizes=chunks)
-
-                with ProgressBar():
-                    # FIXME: looping seems to cause increasing memory over time use a pool of one or 2??
-                    # for dataset, path in zip(datasets, paths):
-                    #     _export_agg_nc(dataset,path)
-                    combs = zip(datasets, paths)
-                    pool = mp.Pool(2)
-                    pool.map(_export_agg_nc, combs)
-                    pool.close()
-                    pool.join()
-                ds.close()
-                del ds
-
-        else:
-            msg = f"No files found for variable: `{variable_name}`."
-            logging.info(msg)
-
-    runtime = f"Process completed in {time.time() - func_time:.2f} seconds"
-    logging.warning(runtime)
-
-
-def _export_agg_nc(args):
-    dataset, path = args
-    comp = dict(zlib=True, complevel=5)
-    encoding = {var: comp for var in dataset.data_vars}
-    dataset.load().to_netcdf(
-        path,
-        engine="h5netcdf",
-        format="NETCDF4_CLASSIC",
-        encoding=encoding,
-    )
-    dataset.close()
-    del dataset
-
-
-def _tmp_zarr(
-    iterable: int,
-    nc: list[str | os.PathLike],
-    tempdir: str | os.PathLike,
-    group: int | None = None,
-) -> None:
-    msg = (
-        f"Processing batch of files {iterable + 1}"
-        f"{' of ' + str(group) if group is not None else ''}."
-    )
-    logging.info(msg)
-    station_file_codes = [Path(x).name.split("_")[0] for x in nc]
-
-    try:
-        ds = xr.open_mfdataset(
-            nc, combine="nested", concat_dim={"station"}, preprocess=_remove_duplicates
-        )
-    except ValueError as e:
-        errored_nc_files = ", ".join([Path(f).name for f in nc])
-        msg = f"Issues found with the following files: [{errored_nc_files}]: {e}"
-
-        logging.error(msg)
-        return
-
-    ds = ds.assign_coords(
-        station_id=xr.DataArray(station_file_codes, dims="station").astype(str)
-    )
-    if "flag" in ds.data_vars:
-        ds1 = ds.drop_vars("flag").copy(deep=True)
-        ds1["flag"] = ds.flag.astype(str)
-        ds = ds1
-
-    with ProgressBar():
-        ds.load().to_zarr(
-            Path(tempdir).joinpath(f"{str(iterable).zfill(4)}.zarr"),
-        )
-    del ds
-
-
-def _combine_years(
-    station_folder: str,
-    varia: str,
-    out_folder: str | os.PathLike,
-    meta_file: str | os.PathLike,
-    rejected: list[str],
-    _verbose: bool = False,
-) -> None:
-    nc_files = sorted(list(Path(station_folder).glob("*.nc")))
-    if len(nc_files):
-        msg = (
-            f"Found {len(nc_files)} files for station code {Path(station_folder).name}."
-        )
-
-        logging.info(msg)
-    else:
-        msg = f"No readings found for station code {Path(station_folder).name}. Continuing..."
-
-        logging.warning(msg)
-        return
-
-    # Remove range files if years are all present, otherwise default to range_file.
-    years_found = dict()
-    range_files_found = dict()
-    years_parsed = True
-    for f in nc_files:
-        groups = re.findall(r"_\d{4}", f.stem)
-        if len(groups) == 1:
-            year = int(groups[0].strip("_"))
-            years_found[year] = f
-        elif len(groups) == 2:
-            year_start, year_end = int(groups[0].strip("_")), int(groups[1].strip("_"))
-            range_files_found[f] = set(range(year_start, year_end))
-        else:
-            logging.warning(
-                "Years unable to be effectively parsed from series. Continuing with xarray solver..."
-            )
-            years_parsed = False
-            break
-    if years_parsed:
-        if len(range_files_found) > 0:
-            msg = f"Overlapping single-year and multi-year files found for station code {station_folder}. Removing overlaps."
-            logging.warning(msg)
-            for ranged_file, years in range_files_found.items():
-                if years.issubset(years_found.values()):
-                    nc_files.remove(ranged_file)
-                else:
-                    for y in years:
-                        try:
-                            nc_files.remove(years_found[y])
-                        except (KeyError, ValueError) as err:  # noqa: PERF203
-                            logging.error(err)
-                            continue
-
-        year_range = min(years_found.keys()), max(years_found.keys())
-        msg = f"Year(s) covered: {year_range[0]}{'-' + str(year_range[1]) if year_range[0] != year_range[1] else ''}."
-        logging.info(msg)
-
-    if _verbose:
-        msg = f"Opening: {', '.join([p.name for p in nc_files])}"
-        logging.info(msg)
-    ds = xr.open_mfdataset(nc_files, combine="nested", concat_dim={"time"})
-    outfile = Path(out_folder).joinpath(
-        f'{nc_files[0].name.split(f"_{varia}_")[0]}_{varia}_'
-        f"{ds.time.dt.year.min().values}-{ds.time.dt.year.max().values}.nc"
-    )
-
-    df_inv = xr.open_dataset(meta_file)
-
-    station_id = ds.attrs["member"]
-    meta = df_inv.isel(index=df_inv.CLIMATE_IDENTIFIER == station_id)
-    meta = meta.rename({"index": "station", "CLIMATE_IDENTIFIER": "station_id"})
-    try:
-        meta = meta.assign_coords(station=[0])
-    except ValueError:
-        rejected.append(Path(station_folder).name)
-        msg = f"Something went wrong at the assign_coords step for station {station_folder}. Continuing..."
-        logging.error(msg)
-        return
-    if len(meta.indexes) > 1:
-        raise ValueError("Found more than 1 station.")
-    elif len(meta.indexes) == 0:
-        rejected.append(Path(station_folder).name)
-        msg = f"No metadata found for station code {station_folder}. Continuing..."
-        logging.warning(msg)
-        return
-
-    keep_coords = [
-        "time",
-        "station",
-        "station_id",
-        "latitude",
-        "longitude",
-        "elevation",
-    ]
-    for vv in meta.data_vars:
-        if vv.lower() not in keep_coords:
-            continue
-        ds = ds.assign_coords({vv.lower(): meta[vv]})
-
-    for vv in ds.data_vars:
-        if ds[vv].dtype == "O":
-            ds[vv] = ds[vv].astype(str)
-
-    if not outfile.exists():
-        msg = f"Merging to {outfile.name}"
-        logging.info(msg)
-        comp = dict(zlib=True, complevel=5)
-        encoding = {data_var: comp for data_var in ds.data_vars}
-        encoding["time"] = {"dtype": "single"}
-        with ProgressBar():
-            ds.to_netcdf(
-                outfile,
-                engine="h5netcdf",
-                format="NETCDF4_CLASSIC",
-                encoding=encoding,
-            )
-    else:
-        msg = f"Files exist for {outfile.name}. Continuing..."
-        logging.info(msg)
-
-
-def merge_converted_variables(
-    source_files: str | os.PathLike,
-    output_folder: str | os.PathLike,
-    variables: str | int | list[str | int] | None = None,
-    station_metadata: str | os.PathLike | None = None,
-    overwrite: bool = False,
-    n_workers: int = 1,
-) -> None:
-    """Merge converted variables.
-
-    Parameters
-    ----------
-    source_files : str, Path
-    output_folder : str, Path
-    variables : str or int or list of str or int, optional
-    station_metadata : str or Path, optional
-    overwrite : bool
-    n_workers : int
-
-    Returns
-    -------
-    None
-    """
-    meta = load_station_metadata(station_metadata)
-    metadata_file = Path(tempfile.NamedTemporaryFile(suffix=".nc", delete=False).name)
-    meta.to_netcdf(metadata_file)
-
-    if isinstance(source_files, str):
-        source_files = Path(source_files)
-    if isinstance(output_folder, str):
-        output_folder = Path(output_folder)
-
-    selected_variables = list()
-    if variables is not None:
-        if not isinstance(variables, list):
-            variables = [variables]
-        selected_variables = [cf_station_metadata(var) for var in variables]
-
-    variables_found = [x.name for x in source_files.iterdir() if x.is_dir()]
-    if selected_variables:
-        variables_found = [
-            x
-            for x in variables_found
-            if x in [item["nc_name"] for item in selected_variables]
-        ]
-
-    for variable in variables_found:
-        msg = f"Merging files found for variable: `{variable}`."
-        logging.info(msg)
-        station_dirs = [
-            x for x in source_files.joinpath(variable).iterdir() if x.is_dir()
-        ]
-        msg = f"Number of stations found: {len(station_dirs)}."
-        logging.info(msg)
-
-        output_rep = output_folder.joinpath(variable)
-        Path(output_rep).mkdir(parents=True, exist_ok=True)
-
-        if (
-            len(list(output_rep.iterdir())) >= (len(meta.CLIMATE_IDENTIFIER) * 0.75)
-        ) and not overwrite:
-            msg = (
-                f"Variable {variable} appears to have already been converted. Will be skipped. "
-                f"To force conversion of this variable, set `overwrite=True`."
-            )
-            logging.warning(msg)
-            continue
-
-        manager = mp.Manager()
-        rejected_stations = manager.list()
-
-        combine_func = functools.partial(
-            _combine_years,
-            varia=variable,
-            out_folder=output_rep,
-            meta_file=metadata_file,
-            rejected=rejected_stations,
-        )
-
-        with mp.Pool(processes=n_workers) as pool:
-            pool.map(combine_func, station_dirs)
-            pool.close()
-            pool.join()
-
-        if rejected_stations:
-            msg = f"Rejected station codes are the following: {', '.join(rejected_stations)}."
-            logging.warning(msg)
diff --git a/src/miranda/eccc/_utils.py b/src/miranda/eccc/_utils.py
deleted file mode 100644
index 2f0673a8..00000000
--- a/src/miranda/eccc/_utils.py
+++ /dev/null
@@ -1,1003 +0,0 @@
-from __future__ import annotations
-
-import logging.config
-from collections.abc import Mapping
-from datetime import datetime as dt
-
-from miranda.scripting import LOGGING_CONFIG
-
-__all__ = ["cf_ahccd_metadata", "cf_station_metadata"]
-
-logging.config.dictConfig(LOGGING_CONFIG)
-
-
-def cf_station_metadata(variable_code: int | str) -> Mapping[str, int | float | str]:
-    """CF metadata for hourly station data.
-
-    Parameters
-    ----------
-    variable_code: int or  str
-
-    Returns
-    -------
-    dict
-    """
-    ec_hourly_variables = {
-        "001": {
-            "_table_name": {"DLY02", "DLY04", "DLY44"},
-            "original_units": "0.1 °C",
-            "raw_units": "degC",
-            "units": "K",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Daily Maximum Temperature",
-            "standard_name": "air_temperature_maximum",
-            "nc_name": "tasmax",
-        },
-        "002": {
-            "_table_name": {"DLY02", "DLY04", "DLY44"},
-            "original_units": "0.1 °C",
-            "raw_units": "degC",
-            "units": "K",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Daily Minimum Temperature",
-            "standard_name": "air_temperature_minimum",
-            "nc_name": "tasmin",
-        },
-        "003": {
-            "_table_name": {"DLY02", "DLY04", "DLY44"},
-            "original_units": "0.1 °C",
-            "raw_units": "degC",
-            "units": "K",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Daily Mean Temperature",
-            "standard_name": "air_temperature",
-            "nc_name": "tas",
-        },
-        "010": {
-            "_table_name": {"DLY02", "DLY04", "DLY44"},
-            "original_units": "0.1 mm day-1",
-            "raw_units": "mm",
-            "units": "m",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Daily Total Rainfall",
-            "standard_name": "liquid_precipitation_amount",
-            "nc_name": "prlptot",
-        },
-        "011": {
-            "_table_name": {"DLY02", "DLY04", "DLY44"},
-            "original_units": "0.1 cm day-1",
-            "raw_units": "cm",
-            "units": "m",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Daily Total Snowfall",
-            "standard_name": "solid_precipitation_amount",
-            "nc_name": "prsntot",
-        },
-        "012": {
-            "_table_name": {"DLY02", "DLY04", "DLY44"},
-            "original_units": "0.1 mm day-1",
-            "raw_units": "mm",
-            "units": "m",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Daily Total Precipitation",
-            "standard_name": "precipitation_amount",
-            "nc_name": "prcptot",
-        },
-        "013": {
-            "_table_name": {"DLY02", "DLY04", "DLY44"},
-            "original_units": "cm",
-            "raw_units": "cm",
-            "units": "m",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Snow on the Ground",
-            "standard_name": "surface_snow_thickness",
-            "nc_name": "sndtot",
-        },
-        "014": {
-            "_table_name": {"DLY02", "DLY04", "DLY44"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Thunderstorms",
-            "standard_name": "thunderstorm_presence",
-            "nc_name": "thunder",
-        },
-        "015": {
-            "_table_name": {"DLY02", "DLY04", "DLY44"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Freezing rain or drizzle",
-            "standard_name": "freeze_rain_drizzle_presence",
-            "nc_name": "freezing_rain_drizzle",
-        },
-        "016": {
-            "_table_name": {"DLY02", "DLY04", "DLY44"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Hail",
-            "standard_name": "hail_presence",
-            "nc_name": "hail",
-        },
-        "017": {
-            "_table_name": {"DLY02", "DLY04", "DLY44"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Fog or Ice Fog",
-            "standard_name": "fog_ice_fog_presence",
-            "nc_name": "fog_ice_fog",
-        },
-        "018": {
-            "_table_name": {"DLY02", "DLY04"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Smoke or Haze",
-            "standard_name": "smoke_haze_presence",
-            "nc_name": "smoke_haze",
-        },
-        "019": {
-            "_table_name": {"DLY02", "DLY04"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Blowing Dust or Sand",
-            "standard_name": "blowing_dust_sand_presence",
-            "nc_name": "blowing_dust_sand",
-        },
-        "020": {
-            "_table_name": {"DLY02", "DLY04"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Blowing snow",
-            "standard_name": "blowing_snow_presence",
-            "nc_name": "blow_snow",
-        },
-        "021": {
-            "_table_name": {"DLY02", "DLY04"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Wind speed >= 28 Knots",
-            "standard_name": "wind_exceeding_28_knots",
-            "nc_name": "wind_gt_28kt",
-        },
-        "022": {
-            "_table_name": {"DLY02", "DLY04"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Wind speed >= 34 Knots",
-            "standard_name": "wind_exceeding_34_knots",
-            "nc_name": "wind_gt_34kt",
-        },
-        "023": {
-            "_table_name": {"DLY02", "DLY04"},
-            "original_units": "10's of degrees",
-            "raw_units": "deg",
-            "units": "deg",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Direction of extreme gust (16 pts) to December 1976",
-            "standard_name": "gust_to_direction",
-            "nc_name": "gust_dir_16pts",
-        },
-        "024": {
-            "_table_name": {"DLY02", "DLY04"},
-            "original_units": "km/h",
-            "raw_units": "km h-1",
-            "units": "m s-1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Speed of extreme gust",
-            "standard_name": "wind_speed_of_gust",
-            "nc_name": "gust_speed",
-        },
-        "025": {
-            "_table_name": {"DLY02", "DLY04"},
-            "raw_units": "h",
-            "units": "h",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "UTC hour of extreme gust",
-            "standard_name": "hour_of_extreme_gust",
-            "nc_name": "gust_hour",
-        },
-        "061": {
-            "_table_name": {"HLY11"},
-            "original_units": "0.001 MJ/m",
-            "raw_units": "W m-2 h-1",
-            "units": "W m-2 h-1",
-            "scale_factor": 1e6 / (60 * 60),
-            "add_offset": 0,
-            "long_name": "RF1 global solar radiation",
-            "standard_name": "solar_radiation_flux",
-            "nc_name": "rf1_radiation",
-        },
-        "062": {
-            "_table_name": {"HLY11"},
-            "original_units": "0.001 MJ/m",
-            "raw_units": "W m-2 h-1",
-            "units": "W m-2 h-1",
-            "scale_factor": 1e6 / (60 * 60),
-            "add_offset": 0,
-            "long_name": "RF2 sky (diffuse) radiation",
-            "standard_name": "solar_radiation_flux",
-            "nc_name": "rf2_radiation",
-        },
-        "063": {
-            "_table_name": {"HLY11"},
-            "original_units": "0.001 MJ/m",
-            "raw_units": "W m-2 h-1",
-            "units": "W m-2 h-1",
-            "scale_factor": 1e6 / (60 * 60),
-            "add_offset": 0,
-            "long_name": "RF3 reflected solar radiation",
-            "standard_name": "solar_radiation_flux",
-            "nc_name": "rf3_radiation",
-        },
-        "064": {
-            "_table_name": {"HLY11"},
-            "original_units": "0.001 MJ/m",
-            "raw_units": "W m-2 h-1",
-            "units": "W m-2 h-1",
-            "scale_factor": 1e6 / (60 * 60),
-            "add_offset": 0,
-            "long_name": "RF4 net all wave radiation",
-            "standard_name": "solar_radiation_flux",
-            "nc_name": "rf4_radiation",
-        },
-        "067": {
-            "_table_name": {"HLY11"},
-            "original_units": "0.01 Kilolux_hrs",
-            "raw_units": "lux h",
-            "units": "lux h",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "RF7 daylight illumination",
-            "standard_name": "solar_radiation_flux",
-            "nc_name": "rf7_radiation",
-        },
-        "068": {
-            "_table_name": {"HLY11"},
-            "original_units": "0.001 MJ/m",
-            "raw_units": "W m-2 h-1",
-            "units": "W m-2 h-1",
-            "scale_factor": 1e6 / (60 * 60),
-            "add_offset": 0,
-            "long_name": "RF8 direct solar radiation",
-            "standard_name": "solar_radiation_flux",
-            "nc_name": "rf8_radiation",
-        },
-        "069": {
-            "_table_name": {"HLY15"},
-            "original_units": "10's of degrees",
-            "raw_units": "deg",
-            "units": "deg",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Direction - 45B anemometer (8 pts)",
-            "standard_name": "wind_to_direction",
-            "nc_name": "wind_dir_45B",
-        },
-        "071": {
-            "_table_name": {"HLY01"},
-            "original_units": "30's of meters",
-            "raw_units": "m",
-            "units": "m",
-            "scale_factor": 30,
-            "add_offset": 0,
-            "long_name": "Ceiling height of lowest layer of clouds",
-            "standard_name": "ceiling_cloud_height",
-            "nc_name": "ceiling_hgt",
-        },
-        "072": {
-            "_table_name": {"HLY01"},
-            "original_units": "0.1 km",
-            "raw_units": "km",
-            "units": "m",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Visibility",
-            "standard_name": "visibility_in_air",
-            "nc_name": "visibility",
-        },
-        "073": {
-            "_table_name": {"HLY01"},
-            "original_units": "0.01 kPa",
-            "raw_units": "Pa",
-            "units": "Pa",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Sea Level Pressure",
-            "standard_name": "air_pressure_at_mean_sea_level",
-            "nc_name": "psl",
-        },
-        "074": {
-            "_table_name": {"HLY01"},
-            "original_units": "0.1 °C",
-            "raw_units": "degC",
-            "units": "K",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Dew Point Temperature",
-            "standard_name": "dew_point_temperature",
-            "nc_name": "tds",
-        },
-        "075": {
-            "_table_name": {"HLY01"},
-            "original_units": "10's of degrees",
-            "raw_units": "deg",
-            "units": "deg",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Wind Direction at 2 m (U2A Anemometer) (16 pts)",
-            "standard_name": "wind_direction_u2a",
-            "nc_name": "wind_dir_u2a_16",
-        },
-        "076": {
-            "_table_name": {"HLY01"},
-            "original_units": "km/h",
-            "raw_units": "km h-1",
-            "units": "m s-1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Wind Speed - U2A (16 pts) to December 1970",
-            "standard_name": "wind_speed_u2a",
-            "nc_name": "wind_speed_u2a",
-        },
-        "077": {
-            "_table_name": {"HLY01"},
-            "original_units": "0.01 kPa",
-            "raw_units": "Pa",
-            "units": "Pa",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Station Pressure",
-            "standard_name": "atmospheric_pressure",
-            "nc_name": "pressure",
-        },
-        "078": {
-            "_table_name": {"HLY01"},
-            "original_units": "0.1 °C",
-            "raw_units": "degC",
-            "units": "K",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Dry Bulb Temperature",
-            "standard_name": "dry_bulb_temperature",
-            "nc_name": "tas_dry",
-        },
-        "079": {
-            "_table_name": {"HLY01"},
-            "original_units": "0.1 °C",
-            "raw_units": "degC",
-            "units": "K",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Wet Bulb temperature",
-            "standard_name": "wet_bulb_temperature",
-            "nc_name": "tas_wet",
-        },
-        "080": {
-            "_table_name": {"HLY01"},
-            "original_units": "%",
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Relative Humidity",
-            "standard_name": "relative_humidity",
-            "nc_name": "hur",
-        },
-        "081": {
-            "_table_name": {"HLY01"},
-            "original_units": "%",
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Total Cloud Opacity",
-            "standard_name": "cloud_albedo",
-            "nc_name": "clo",
-        },
-        "082": {
-            "_table_name": {"HLY01"},
-            "original_units": "%",
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Total Cloud Amount",
-            "standard_name": "cloud_area_fraction",
-            "nc_name": "clt",
-        },
-        "089": {
-            "_table_name": {"HLY01"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Freezing Rain",
-            "standard_name": "freezing_rain",
-            "nc_name": "freeze_rain",
-        },
-        "094": {
-            "_table_name": {"HLY01"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Ice Pellets",
-            "standard_name": "ice_pellet_presence",
-            "nc_name": "ice_pellets",
-        },
-        "107": {
-            "_table_name": {"HLY01"},
-            "original_units": "Tenths",
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Lowest cloud layer opacity",
-            "standard_name": "low_type_cloud_opacity_fraction",
-            "nc_name": "1low_cloud_opac",
-        },
-        "108": {
-            "_table_name": {"HLY01"},
-            "original_units": "Tenths",
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Lowest cloud layer amount or condition",
-            "standard_name": "low_type_cloud_area_fraction",
-            "nc_name": "1low_cloud_frac",
-        },
-        "109": {
-            "_table_name": {"HLY01"},
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Lowest cloud layer type",
-            "standard_name": "low_type_cloud_type",
-            "nc_name": "1low_cloud_type",
-        },
-        "110": {
-            "_table_name": {"HLY01"},
-            "original_units": "30's of meters",
-            "raw_units": "m",
-            "units": "m",
-            "scale_factor": 30,
-            "add_offset": 0,
-            "long_name": "Lowest cloud layer height",
-            "standard_name": "low_type_cloud_height",
-            "nc_name": "1low_cloud_hgt",
-        },
-        "111": {
-            "_table_name": {"HLY01"},
-            "original_units": "Tenths",
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Second lowest cloud layer opacity",
-            "standard_name": "low_type_cloud_opacity_fraction",
-            "nc_name": "2low_cloud_opac",
-        },
-        "112": {
-            "_table_name": {"HLY01"},
-            "original_units": "Tenths",
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Second lowest cloud layer amount or condition",
-            "standard_name": "low_type_cloud_area_fraction",
-            "nc_name": "2low_cloud_frac",
-        },
-        "113": {
-            "_table_name": {"HLY01"},
-            "original_units": "",
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Second lowest cloud layer type",
-            "standard_name": "low_type_cloud_type",
-            "nc_name": "2low_cloud_type",
-        },
-        "114": {
-            "_table_name": {"HLY01"},
-            "original_units": "30's of meters",
-            "raw_units": "m",
-            "units": "m",
-            "scale_factor": 30,
-            "add_offset": 0,
-            "long_name": "Second lowest cloud layer height",
-            "standard_name": "low_type_cloud_height",
-            "nc_name": "2low_cloud_hgt",
-        },
-        "115": {
-            "_table_name": {"HLY01"},
-            "original_units": "Tenths",
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Thirsd lowest cloud layer opacity",
-            "standard_name": "low_type_cloud_opacity_fraction",
-            "nc_name": "3low_cloud_opac",
-        },
-        "116": {
-            "_table_name": {"HLY01"},
-            "original_units": "Tenths",
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 10,
-            "add_offset": 0,
-            "long_name": "Third lowest cloud layer amount or condition",
-            "standard_name": "low_type_cloud_area_fraction",
-            "nc_name": "3low_cloud_frac",
-        },
-        "117": {
-            "_table_name": {"HLY01"},
-            "original_units": "",
-            "raw_units": "1",
-            "units": "1",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Third lowest cloud layer type",
-            "standard_name": "low_type_cloud_type",
-            "nc_name": "3low_cloud_type",
-        },
-        "118": {
-            "_table_name": {"HLY01"},
-            "original_units": "30's of meters",
-            "raw_units": "m",
-            "units": "m",
-            "scale_factor": 30,
-            "add_offset": 0,
-            "long_name": "Third lowest cloud layer height",
-            "standard_name": "low_type_cloud_height",
-            "nc_name": "3low_cloud_hgt",
-        },
-        "123": {
-            "_table_name": {"HLY01"},
-            "original_units": "0.1 mm",
-            "raw_units": "mm h-1",
-            "units": "kg m2 s-1",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Total Rainfall",
-            "standard_name": "rainfall_flux",
-            "nc_name": "rainfall",
-        },
-        "133": {
-            "_table_name": {"HLY10"},
-            "original_units": "0.1 hrs",
-            "raw_units": "h",
-            "units": "s",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Sunshine",
-            "standard_name": "duration_of_sunshine",
-            "nc_name": "sun",
-        },
-        "156": {
-            "_table_name": {"HLY01"},
-            "original_units": "10's of degrees",
-            "raw_units": "deg",
-            "units": "deg",
-            "scale_factor": 10,
-            "long_name": "Wind Direction - U2A (36 pts) from January 1971",
-            "standard_name": "wind_direction_u2a",
-            "nc_name": "wind_dir_u2a_36",
-        },
-        "262": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 mm",
-            "raw_units": "mm",
-            "units": "kg m-2",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Total Precipitation (minutes 00-60)",
-            "standard_name": "precipitation_amount",
-            "nc_name": "prtot",
-        },
-        "263": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 mm",
-            "raw_units": "mm",
-            "units": "kg m-2",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Total Precipitation (minutes 00-15)",
-            "standard_name": "precipitation_amount",
-            "nc_name": "prtot_q1",
-        },
-        "264": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 mm",
-            "raw_units": "mm",
-            "units": "kg m-2",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Total Precipitation (minutes 15-30)",
-            "standard_name": "precipitation_amount",
-            "nc_name": "prtot_q2",
-        },
-        "265": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 mm",
-            "raw_units": "mm",
-            "units": "kg m-2",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Total Precipitation (minutes 30-45)",
-            "standard_name": "precipitation_amount",
-            "nc_name": "prtot_q3",
-        },
-        "266": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 mm",
-            "raw_units": "mm",
-            "units": "kg m-2",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Total Precipitation (minutes 45-60)",
-            "standard_name": "precipitation_amount",
-            "nc_name": "prtot_q4",
-        },
-        "267": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 kg/m²",
-            "raw_units": "kg m-2",
-            "units": "kg m-2",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Precipitation Gauge Weight per Unit Area (at minute 15)",
-            "standard_name": "precipitation_amount",
-            "nc_name": "precipitation_weight_q1",
-        },
-        "268": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 kg/m²",
-            "raw_units": "kg m-2",
-            "units": "kg m-2",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Precipitation Gauge Weight per Unit Area (at minute 30)",
-            "standard_name": "precipitation_amount",
-            "nc_name": "precipitation_weight_q2",
-        },
-        "269": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 kg/m²",
-            "raw_units": "kg m-2",
-            "units": "kg m-2",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Precipitation Gauge Weight per Unit Area (at minute 45)",
-            "standard_name": "precipitation_amount",
-            "nc_name": "precipitation_weight_q3",
-        },
-        "270": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 kg/m²",
-            "raw_units": "kg m-2",
-            "units": "kg m-2",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Precipitation Gauge Weight per Unit Area (at minute 60)",
-            "standard_name": "precipitation_amount",
-            "nc_name": "precipitation_weight_q4",
-        },
-        "271": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 km/h",
-            "raw_units": "km h-1",
-            "nc_units": "m s-1",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Wind Speed at 2 m (minutes 00-15)",
-            "standard_name": "wind_speed",
-            "nc_name": "wind_speed_q1",
-        },
-        "272": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 km/h",
-            "raw_units": "km h-1",
-            "nc_units": "m s-1",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Wind Speed at 2 m (minutes 15-30)",
-            "standard_name": "wind_speed",
-            "nc_name": "wind_speed_q2",
-        },
-        "273": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 km/h",
-            "raw_units": "km h-1",
-            "nc_units": "m s-1",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Wind Speed at 2 m (minutes 30-45)",
-            "standard_name": "wind_speed",
-            "nc_name": "wind_speed_q3",
-        },
-        "274": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 km/h",
-            "raw_units": "km h-1",
-            "nc_units": "m s-1",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Wind Speed at 2 m (minutes 45-60)",
-            "standard_name": "wind_speed",
-            "nc_name": "wind_speed_q4",
-        },
-        "275": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "cm",
-            "raw_units": "cm",
-            "units": "m",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Snow Depth (at minute 60)",
-            "standard_name": "surface_snow_thickness",
-            "nc_name": "snd_q4",
-        },
-        "276": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "cm",
-            "raw_units": "cm",
-            "units": "m",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Snow Depth (at minute 15)",
-            "standard_name": "surface_snow_thickness",
-            "nc_name": "snd_q1",
-        },
-        "277": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "cm",
-            "raw_units": "cm",
-            "units": "m",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Snow Depth (at minute 30)",
-            "standard_name": "surface_snow_thickness",
-            "nc_name": "snd_q2",
-        },
-        "278": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "cm",
-            "raw_units": "cm",
-            "units": "m",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Snow Depth (at minute 45)",
-            "standard_name": "surface_snow_thickness",
-            "nc_name": "snd_q3",
-        },
-        "279": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "Degrees",
-            "raw_units": "deg",
-            "nc_units": "deg",
-            "scale_factor": 1,
-            "add_offset": 0,
-            "long_name": "Wind Direction at 2 m (minutes 50-60)",
-            "standard_name": "wind_direction",
-            "nc_name": "wind_dir",
-        },
-        "280": {
-            "_table_name": {"HLY01_RCS"},
-            "original_units": "0.1 km/h",
-            "raw_units": "km h-1",
-            "units": "m s-1",
-            "scale_factor": 0.1,
-            "add_offset": 0,
-            "long_name": "Wind Speed at 2 m (minutes 50-60)",
-            "standard_name": "wind_speed",
-            "nc_name": "wind_speed",
-        },
-    }
-    code = str(variable_code).zfill(3)
-    if code in ["061"]:
-        raise NotImplementedError()
-    try:
-        variable = ec_hourly_variables[code]
-        variable["missing_flags"] = "M"
-        variable["missing_values"] = {-9999, "#####"}
-        variable["least_significant_digit"] = ""
-    except KeyError:
-        msg = f"Hourly variable `{code}` not supported."
-        logging.error(msg)
-        raise
-    return variable
-
-
-def cf_ahccd_metadata(
-    code: str, gen: int
-) -> (dict[str, int | float | str], dict, list[tuple[int, int]], int):
-    """CF compliant metadata for ECCC Adjusted and Homogenized Climate Data (AHCCD).
-
-    Parameters
-    ----------
-    code: {"dx", "dn", "dm", "dt", "ds", "dr"}
-    gen: {1, 2, 3}
-
-    Returns
-    -------
-    dict[str, int or str or float], dict, list[tuple[int, int]], int
-    """
-    generation = {1: "First", 2: "Second", 3: "Third"}.get(gen)
-
-    ec_ahccd_attrs = dict(
-        dx=dict(
-            variable="tasmax",
-            units="degC",
-            standard_name="air_temperature",
-            long_name="Near-Surface Maximum Daily Air Temperature",
-            comment=f"ECCC {generation} Generation of Adjusted and Homogenized Temperature Data",
-        ),
-        dn=dict(
-            variable="tasmin",
-            units="degC",
-            standard_name="air_temperature",
-            long_name="Near-Surface Minimum Daily Air Temperature",
-            comment=f"ECCC {generation} Generation of Adjusted and Homogenized Temperature Data",
-        ),
-        dm=dict(
-            variable="tas",
-            units="degC",
-            standard_name="air_temperature",
-            long_name="Near-Surface Daily Mean Air Temperature",
-            comment=f"ECCC {generation} Generation of Adjusted and Homogenized Temperature Data",
-        ),
-        dt=dict(
-            variable="pr",
-            units="mm d-1",
-            standard_name="precipitation_flux",
-            long_name="Daily Total Precipitation",
-            comment=f"ECCC {generation} Generation of Adjusted and Homogenized Precipitation Data",
-        ),
-        ds=dict(
-            variable="prsn",
-            units="mm d-1",
-            standard_name="snowfall_flux",
-            long_name="Daily Snowfall",
-            comment=f"ECCC {generation} Generation of Adjusted and Homogenized Precipitation Data",
-        ),
-        dr=dict(
-            variable="prlp",
-            units="mm d-1",
-            standard_name="rainfall_flux",
-            long_name="Daily Rainfall",
-            comment=f"ECCC {generation} Generation of Adjusted and Homogenized Precipitation Data",
-        ),
-    )
-    try:
-        variable = ec_ahccd_attrs[code]
-        variable["missing_flags"] = "M"
-        if variable["variable"].startswith("tas"):
-            variable["NaN_value"] = -9999.9
-            column_names = [
-                "No",
-                "StnId",
-                "Station name",
-                "Prov",
-                "FromYear",
-                "FromMonth",
-                "ToYear",
-                "ToMonth",
-                "%Miss",
-                "Lat(deg)",
-                "Long(deg)",
-                "Elev(m)",
-                "Joined",
-                "RCS",
-            ]
-            column_spaces = [(0, 5), (5, 6), (6, 8), (8, 9)]
-            ii = 9
-            for i in range(1, 32):
-                column_spaces.append((ii, ii + 7))
-                ii += 7
-                column_spaces.append((ii, ii + 1))
-                ii += 1
-            header_row = 3
-
-        elif variable["variable"].startswith("pr"):
-            variable["NaN_value"] = -9999.99
-            column_names = [
-                "Prov",
-                "Station name",
-                "stnid",
-                "beg yr",
-                "beg mon",
-                "end yr",
-                "end mon",
-                "lat (deg)",
-                "long (deg)",
-                "elev (m)",
-                "stns joined",
-            ]
-            column_spaces = [(0, 4), (4, 5), (5, 7), (7, 8)]
-            ii = 8
-            for i in range(1, 32):
-                column_spaces.append((ii, ii + 8))
-                ii += 8
-                column_spaces.append((ii, ii + 1))
-                ii += 1
-            header_row = 0
-
-        else:
-            raise KeyError
-
-        column_names = {
-            col.lower()
-            .split("(")[0]
-            .replace("%", "pct_")
-            .strip()
-            .replace(" ", "_"): col
-            for col in list(column_names)
-        }
-
-        if gen == 3:
-            _citation = (
-                "Vincent, L.A., M.M. Hartwell and X.L. Wang, 2020: A Third Generation of Homogenized "
-                "Temperature for Trend Analysis and Monitoring Changes in Canada’s Climate. "
-                "Atmosphere-Ocean. https://doi.org/10.1080/07055900.2020.1765728"
-            )
-        elif gen == 2:
-            _citation = (
-                "Mekis, É and L.A. Vincent, 2011: An overview of the second generation adjusted daily "
-                "precipitation dataset for trend analysis in Canada. Atmosphere-Ocean 49(2), "
-                "163-177 doi:10.1080/07055900.2011.583910"
-            )
-        else:
-            msg = f"Generation '{gen}' not supported."
-            raise NotImplementedError(msg)
-
-        global_attrs = dict(
-            title=f"{generation} Generation of Homogenized Daily {variable['variable']} "
-            "for Canada (Updated to December 2019)",
-            history=f"{dt.today().strftime('%Y-%m-%d')}: Convert from original format to NetCDF",
-            type="station_obs",
-            institute="Environment and Climate Change Canada",
-            institute_id="ECCC",
-            dataset_id=f"AHCCD_gen{gen}_day_{variable['variable']}",
-            frequency="day",
-            license_type="permissive",
-            license="https:/open.canada.ca/en/open-government-licence-canada",
-            citation=_citation,
-        )
-
-    except KeyError as e:
-        msg = f"AHCCD variable '{code}' or generation '{gen}' not supported."
-        logging.error(msg)
-        raise NotImplementedError(msg) from e
-
-    return variable, column_names, column_spaces, header_row, global_attrs
diff --git a/src/miranda/eccc/geomet.py b/src/miranda/eccc/geomet.py
new file mode 100644
index 00000000..1b73f9d3
--- /dev/null
+++ b/src/miranda/eccc/geomet.py
@@ -0,0 +1,44 @@
+"""ECCC Geomet Module."""
+
+from __future__ import annotations
+
+import os
+from urllib.error import HTTPError
+
+import pandas as pd
+import xarray as xr
+
+
+def load_station_metadata(meta: str | os.PathLike | None) -> xr.Dataset:
+    """
+    Method to load station metadata from a file or URL.
+
+    Parameters
+    ----------
+    meta : str or os.PathLike or None
+        The path to the file or URL.
+
+    Returns
+    -------
+    xr.Dataset
+        The station metadata.
+    """
+    if meta:
+        df_inv = pd.read_csv(meta, header=0)
+    else:
+        try:
+            import geopandas as gpd
+
+            station_metadata_url = "https://api.weather.gc.ca/collections/climate-stations/items?f=json&limit=15000000"
+            df_inv = gpd.read_file(station_metadata_url)
+        except HTTPError as err:
+            raise RuntimeError(
+                f"Station metadata table unable to be fetched. Considering downloading directly: {err}"
+            )
+    df_inv["LONGITUDE"] = df_inv.geometry.x
+    df_inv["LATITUDE"] = df_inv.geometry.y
+    df_inv["ELEVATION"] = df_inv.ELEVATION.astype(float)
+    df_inv["CLIMATE_IDENTIFIER"] = df_inv["CLIMATE_IDENTIFIER"].astype(str)
+
+    df_inv = df_inv.drop(["geometry"], axis=1)
+    return df_inv.to_xarray()
diff --git a/src/miranda/gis/__init__.py b/src/miranda/gis/__init__.py
index d3b5e40a..844c8daf 100644
--- a/src/miranda/gis/__init__.py
+++ b/src/miranda/gis/__init__.py
@@ -3,3 +3,4 @@
 from __future__ import annotations
 
 from ._domains import *
+from .utils import *
diff --git a/src/miranda/gis/utils.py b/src/miranda/gis/utils.py
new file mode 100644
index 00000000..54c5dd42
--- /dev/null
+++ b/src/miranda/gis/utils.py
@@ -0,0 +1,180 @@
+"""Utility functions for GIS operations."""
+
+from __future__ import annotations
+
+import datetime
+import logging
+import warnings
+
+import numpy as np
+import xarray as xr
+
+__all__ = [
+    "conservative_regrid",
+    "threshold_mask",
+]
+
+
+def _simple_fix_dims(d: xr.Dataset | xr.DataArray) -> xr.Dataset | xr.DataArray:
+    """
+    Adjust dimensions found in a file so that it can be used for regridding purposes.
+
+    Parameters
+    ----------
+    d : xr.Dataset or xr.DataArray
+        The dataset to adjust.
+
+    Returns
+    -------
+    xr.Dataset or xr.DataArray
+        The adjusted dataset.
+    """
+    if "lon" not in d.dims or "lat" not in d.dims:
+        dim_rename = dict()
+        for dim in d.dims:
+            if str(dim).lower().startswith("lon"):
+                dim_rename[str(dim)] = "lon"
+            if str(dim).lower().startswith("lat"):
+                dim_rename[str(dim)] = "lat"
+        d = d.rename(dim_rename)
+    if np.any(d.lon > 180):
+        lon_wrapped = d.lon.where(d.lon <= 180.0, d.lon - 360.0)
+        d["lon"] = lon_wrapped
+        d = d.sortby(["lon"])
+
+    if "time" in d.dims:
+        d = d.isel(time=0, drop=True)
+
+    return d
+
+
+def conservative_regrid(
+    ds: xr.DataArray | xr.Dataset, ref_grid: xr.DataArray | xr.Dataset
+) -> xr.DataArray | xr.Dataset:
+    """
+    Perform a conservative_normed regridding.
+
+    Parameters
+    ----------
+    ds : xr.DataArray or xr.Dataset
+        The dataset to regrid.
+    ref_grid : xr.DataArray or xr.Dataset
+        The reference grid.
+
+    Returns
+    -------
+    xr.DataArray or xr.Dataset
+        The regridded dataset.
+    """
+    try:
+        import xesmf as xe  # noqa
+    except ModuleNotFoundError:
+        raise ModuleNotFoundError(
+            "This function requires the `xesmf` library which is not installed. "
+            "Regridding step will be skipped."
+        )
+
+    ref_grid = _simple_fix_dims(ref_grid)
+    method = "conservative_normed"
+
+    msg = f"Performing regridding and masking with `xesmf` using method: {method}."
+    logging.info(msg)
+
+    regridder = xe.Regridder(ds, ref_grid, method, periodic=False)
+    ds = regridder(ds)
+
+    ds.attrs["history"] = (
+        f"{datetime.datetime.now()}:"
+        f"Regridded dataset using xesmf with method: {method}. "
+        f"{ds.attrs.get('history')}".strip()
+    )
+    return ds
+
+
+def threshold_mask(
+    ds: xr.Dataset | xr.DataArray,
+    *,
+    mask: xr.Dataset | xr.DataArray,
+    mask_cutoff: float | bool = False,
+) -> xr.Dataset | xr.DataArray:
+    """
+    Land-Sea mask operations.
+
+    Parameters
+    ----------
+    ds : xr.Dataset or str or os.PathLike
+        The dataset to be masked.
+    mask : xr.Dataset or xr.DataArray
+        The land-sea mask.
+    mask_cutoff : float or bool
+        The mask cutoff value.
+
+    Returns
+    -------
+    xr.Dataset or xr.DataArray
+        The masked dataset.
+    """
+    mask = _simple_fix_dims(mask)
+
+    if isinstance(mask, xr.Dataset):
+        if len(mask.data_vars) == 1:
+            mask_variable = list(mask.data_vars)[0]
+            mask = mask[mask_variable]
+        else:
+            raise ValueError(
+                "More than one data variable found in land-sea mask. Supply a DataArray instead."
+            )
+    else:
+        mask_variable = mask.name
+
+    try:
+        from clisops.core import subset_bbox  # noqa
+
+        log_msg = f"Masking dataset with {mask_variable}."
+        if mask_cutoff:
+            log_msg = f"{log_msg.strip('.')} at `{mask_cutoff}` cutoff value."
+        logging.info(log_msg)
+
+        lon_bounds = np.array([ds.lon.min(), ds.lon.max()])
+        lat_bounds = np.array([ds.lat.min(), ds.lat.max()])
+
+        mask_subset = subset_bbox(
+            mask,
+            lon_bnds=lon_bounds,
+            lat_bnds=lat_bounds,
+        ).load()
+    except ModuleNotFoundError:
+        log_msg = (
+            "This function requires the `clisops` library which is not installed. "
+            "subsetting step will be skipped."
+        )
+        warnings.warn(log_msg)
+        mask_subset = mask.load()
+
+    if mask_subset.dtype == bool:
+        if mask_cutoff:
+            logging.warning("Mask value cutoff set for boolean mask. Ignoring.")
+        mask_subset = mask_subset.where(mask)
+    else:
+        mask_subset = mask_subset.where(mask >= mask_cutoff)
+    ds = ds.where(mask_subset.notnull())
+
+    if mask_subset.min() >= 0:
+        if mask_subset.max() <= 1.00000001:
+            cutoff_info = f"{mask_cutoff * 100} %"
+        elif mask_subset.max() <= 100.00000001:
+            cutoff_info = f"{mask_cutoff} %"
+        else:
+            cutoff_info = f"{mask_cutoff}"
+    else:
+        cutoff_info = f"{mask_cutoff}"
+    ds.attrs["mask_cutoff"] = cutoff_info
+
+    prev_history = ds.attrs.get("history", "")
+    history_msg = f"Mask calculated using `{mask_variable}`."
+    if mask_cutoff:
+        history_msg = f"{history_msg.strip('.')} with cutoff value `{cutoff_info}`."
+    history = f"{history_msg} {prev_history}".strip()
+    ds.attrs.update(dict(history=history))
+
+    return ds
diff --git a/src/miranda/io/_input.py b/src/miranda/io/_input.py
index d9b0141b..e91992a5 100644
--- a/src/miranda/io/_input.py
+++ b/src/miranda/io/_input.py
@@ -50,7 +50,7 @@ def discover_data(
                 input_files = sorted(list(input_files.glob(f"*.{suffix}")))
             else:
                 input_files = input_files.rglob(f"*.{suffix}")
-        if input_files.is_file():
+        elif input_files.is_file():
             logging.warning(
                 "Data discovery yielded a single file. Casting to `list[Path]`."
             )
diff --git a/src/miranda/io/_output.py b/src/miranda/io/_output.py
index be360144..6ad791ab 100644
--- a/src/miranda/io/_output.py
+++ b/src/miranda/io/_output.py
@@ -35,6 +35,7 @@ def write_dataset(
     ds: xr.DataArray | xr.Dataset,
     output_path: str | os.PathLike,
     output_format: str,
+    output_name: str | None = None,
     chunks: dict | None = None,
     overwrite: bool = False,
     compute: bool = True,
@@ -49,6 +50,8 @@ def write_dataset(
         Output folder path.
     output_format: {"netcdf", "zarr"}
         Output data container type.
+    output_name: str, optional
+        Output file name.
     chunks : dict, optional
         Chunking layout to be written to new files. If None, chunking will be left to the relevant backend engine.
     overwrite : bool
@@ -65,8 +68,12 @@ def write_dataset(
     if isinstance(output_path, str):
         output_path = Path(output_path)
 
-    outfile = name_output_file(ds, output_format)
-    outfile_path = output_path.joinpath(outfile)
+    if not output_name:
+        output_name = name_output_file(ds, output_format)
+    else:
+        output_name = str(output_name)
+
+    outfile_path = output_path.joinpath(output_name)
 
     if overwrite and outfile_path.exists():
         msg = f"Removing existing {output_format} files for {outfile}."
@@ -77,8 +84,15 @@ def write_dataset(
             outfile_path.unlink()
 
     if chunks is None and "frequency" in ds.attrs:
-        freq = ds.attrs["frequency"]  # TOD0: check that this is really there
-        chunks = fetch_chunk_config(priority="time", freq=freq, dims=ds.dims)
+        freq = ds.attrs.get("frequency")
+        if not freq:
+            raise ValueError(
+                "If 'chunks' are not provided, the 'frequency' attribute must be set."
+            )
+        if "lat" in ds.dims and "lon" in ds.dims:
+            chunks = fetch_chunk_config(priority="time", freq=freq, dims=ds.dims)
+        elif "lat" not in ds.dims and "lon" not in ds.dims:
+            chunks = fetch_chunk_config(priority="stations", freq=freq, dims=ds.dims)
 
     msg = f"Writing {outfile}."
     logging.info(msg)
diff --git a/src/miranda/io/data/ouranos_chunk_config.json b/src/miranda/io/data/ouranos_chunk_config.json
index 2ac759b7..0f18928d 100644
--- a/src/miranda/io/data/ouranos_chunk_config.json
+++ b/src/miranda/io/data/ouranos_chunk_config.json
@@ -37,6 +37,20 @@
       }
     }
   },
+  "stations": {
+    "1hr": {
+      "default": {
+        "station": 50,
+        "time": "5 years"
+      }
+    },
+    "day": {
+      "default": {
+        "station": 200,
+        "time": "10 years"
+      }
+    }
+  },
   "time": {
     "1hr": {
       "default": {
diff --git a/src/miranda/io/utils.py b/src/miranda/io/utils.py
index 00264ba4..f192b1ca 100644
--- a/src/miranda/io/utils.py
+++ b/src/miranda/io/utils.py
@@ -37,7 +37,9 @@
 
 
 def name_output_file(
-    ds_or_dict: xr.Dataset | dict[str, str], output_format: str
+    ds_or_dict: xr.Dataset | dict[str, str],
+    output_format: str,
+    data_vars: str | None = None,
 ) -> str:
     """
     Name an output file based on facets within a Dataset or a dictionary.
@@ -48,6 +50,8 @@ def name_output_file(
         A miranda-converted Dataset or a dictionary containing the appropriate facets.
     output_format : {"netcdf", "zarr"}
         Output filetype to be used for generating filename suffix.
+    data_vars : str, optional
+        If using a Dataset, the name of the data variable to be used for naming the file.
 
     Returns
     -------
@@ -68,7 +72,9 @@ def name_output_file(
     facets["suffix"] = suffix
 
     if isinstance(ds_or_dict, xr.Dataset):
-        if len(ds_or_dict.data_vars) == 1:
+        if data_vars is not None:
+            facets["variable"] = data_vars
+        elif len(ds_or_dict.data_vars) == 1:
             facets["variable"] = list(ds_or_dict.data_vars.keys())[0]
         elif (
             len(ds_or_dict.data_vars) == 2
@@ -79,7 +85,7 @@ def name_output_file(
             ][0]
         else:
             raise NotImplementedError(
-                f"Too many `data_vars` in Dataset: {' ,'.join(ds_or_dict.data_vars.keys())}."
+                f"Too many `data_vars` in Dataset: {', '.join(ds_or_dict.data_vars.keys())}."
             )
         for f in [
             "bias_adjust_project",
diff --git a/src/miranda/preprocess/__init__.py b/src/miranda/preprocess/__init__.py
new file mode 100644
index 00000000..84c999af
--- /dev/null
+++ b/src/miranda/preprocess/__init__.py
@@ -0,0 +1,7 @@
+"""Preprocessing tools for Miranda."""
+
+from __future__ import annotations
+
+from ._eccc_ahccd import *
+from ._eccc_obs import *
+from ._eccc_summaries import *
diff --git a/src/miranda/preprocess/_eccc_ahccd.py b/src/miranda/preprocess/_eccc_ahccd.py
new file mode 100644
index 00000000..0bc553e4
--- /dev/null
+++ b/src/miranda/preprocess/_eccc_ahccd.py
@@ -0,0 +1,326 @@
+"""Adjusted and Homogenized Canadian Clime Data module."""
+
+from __future__ import annotations
+
+import calendar
+import logging.config
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import xarray as xr
+
+from miranda.io import write_dataset
+from miranda.io.utils import name_output_file
+from miranda.preprocess._metadata import (
+    eccc_variable_metadata,
+    homogenized_column_definitions,
+)
+from miranda.scripting import LOGGING_CONFIG
+from miranda.treatments import find_project_variable_codes, load_json_data_mappings
+
+logging.config.dictConfig(LOGGING_CONFIG)
+logger = logging.Logger("miranda")
+
+__all__ = ["convert_ahccd", "convert_ahccd_fwf_file", "merge_ahccd"]
+
+
+def convert_ahccd_fwf_file(
+    ff: Path | str,
+    metadata: pd.DataFrame,
+    variable: str,
+    *,
+    generation: int,
+) -> xr.Dataset:
+    """Convert AHCCD fixed-width files.
+
+    Parameters
+    ----------
+    ff: str or Path
+    metadata: pandas.DataFrame
+    variable: str
+    generation: int
+
+    Returns
+    -------
+    xarray.Dataset
+    """
+    configuration = load_json_data_mappings("eccc-ahccd")
+    code = find_project_variable_codes(variable, configuration)
+
+    variable_meta, global_attrs = eccc_variable_metadata(
+        code, "eccc-ahccd", generation, configuration
+    )
+    column_names, column_spaces, column_dtypes, header = homogenized_column_definitions(
+        code
+    )
+
+    df = pd.read_fwf(ff, header=header, colspecs=column_spaces, dtype=column_dtypes)
+
+    # Handle different variable types
+    if "pr" in variable:
+        cols = list(df.columns[0:3])
+        cols = cols[0::2]
+        cols.extend(list(df.columns[4::2]))
+        flags = list(df.columns[5::2])
+        dfflags = df[flags]
+    elif "tas" in variable:
+        cols = [c for c in df.columns if "Unnamed" not in c]
+        flags = [c for c in df.columns if "Unnamed" in c]
+        dfflags = df[flags[2:]]
+    else:
+        raise NotImplementedError(f"Variable `{variable}` not supported.")
+
+    # Extract relevant columns
+    df = df[cols]
+    df.replace(variable_meta[variable]["NaN_value"], np.NaN, inplace=True)
+
+    for i, j in enumerate(["Year", "Month"]):
+        df = df.rename(columns={df.columns[i]: j})
+    start_date = f"{df['Year'][0]}-{str(df['Month'][0]).zfill(2)}-01"
+
+    _, ndays = calendar.monthrange(df["Year"].iloc[-1], df["Month"].iloc[-1])
+    end_date = f"{df['Year'].iloc[-1]}-{str(df['Month'].iloc[-1]).zfill(2)}-{str(ndays).zfill(2)}"
+    time1 = pd.date_range(start=start_date, end=end_date)
+
+    index = pd.MultiIndex.from_arrays([df["Year"], df["Month"]])
+    df.index = index
+    cols = [c for c in df.columns if "Year" not in c and "Month" not in c]
+    df = df[cols]
+    df.columns = np.arange(1, 32)
+    ds = df.stack().to_frame()
+    ds = ds.rename(columns={0: variable})
+    ds.index.names = ["Year", "Month", "Day"]
+
+    dfflags.index = index
+    dfflags.columns = np.arange(1, 32)
+    ds_flag = dfflags.stack().to_frame()
+    ds_flag = ds_flag.rename(columns={0: "flag"})
+    ds_flag.index.names = ["Year", "Month", "Day"]
+
+    ds[f"{variable}_flag"] = ds_flag["flag"]
+    del ds_flag
+
+    # find invalid dates
+    for y in time1.year.unique():
+        for m in (
+            ds[ds.index.get_level_values("Year") == y]
+            .index.get_level_values("Month")
+            .unique()
+        ):
+            _, exp_ndays = calendar.monthrange(y, m)
+            ndays = (
+                (ds.index.get_level_values("Year") == y)
+                & (ds.index.get_level_values("Month") == m)
+            ).sum()
+            if ndays > np.int(exp_ndays):
+                print(f"year {y}, month {m}, ndays={ndays}, exp_ndays={exp_ndays}")
+                raise RuntimeError("Unknown days present.")
+
+    time_ds = pd.DataFrame(
+        {
+            "year": ds.index.get_level_values("Year"),
+            "month": ds.index.get_level_values("Month"),
+            "day": ds.index.get_level_values("Day"),
+        }
+    )
+
+    ds.index = pd.to_datetime(time_ds)  # noqa
+    ds = ds.to_xarray().rename({"index": "time"})
+    ds_out = xr.Dataset(coords={"time": time1})
+    for v in ds.data_vars:
+        ds_out[v] = ds[v]
+
+    ds_out[variable].attrs = variable_meta[variable]
+    metadata = metadata.to_xarray().rename({"index": "station"}).drop_vars("station")
+    metadata = metadata.assign_coords(dict(station_name=metadata["station_name"]))
+    ds_out = ds_out.assign_coords(station=metadata.stnid.astype(str))
+    metadata = metadata.drop_vars(["stnid", "station_name"])
+
+    ds_out[f"{variable}_flag"].attrs["long_name"] = variable_meta[variable]["long_name"]
+
+    ds_out["lon"] = metadata["long"]
+    ds_out.lon.attrs["units"] = "degrees_east"
+    ds_out.lon.attrs["axis"] = "X"
+    ds_out["lat"] = metadata["lat"]
+    ds_out.lat.attrs["units"] = "degrees_north"
+    ds_out.lat.attrs["axis"] = "Y"
+    ds_out["elev"] = metadata["elev"]
+    ds_out.elev.attrs["units"] = "meters"
+    ds_out.elev.attrs["positive"] = "up"
+    ds_out.elev.attrs["axis"] = "Z"
+    metadata = metadata.drop_vars(["long", "lat", "elev"])
+    for vv in metadata.data_vars:
+        if metadata[vv].dtype == "O" and (variable not in vv):
+            ds_out[vv] = metadata[vv].astype(str)
+        else:
+            ds_out[vv] = metadata[vv]
+    return ds_out
+
+
+def convert_ahccd(
+    data_source: str | Path,
+    output_dir: str | Path,
+    variable: str,
+    *,
+    generation: int,
+    merge: bool = False,
+    overwrite: bool = False,
+) -> None:
+    """Convert Adjusted and Homogenized Canadian Climate Dataset files.
+
+    Parameters
+    ----------
+    data_source: str or Path
+    output_dir: str or Path
+    variable: str
+    generation: int
+    merge: bool
+    overwrite: bool
+
+    Returns
+    -------
+    None
+    """
+    configuration = load_json_data_mappings("eccc-ahccd")
+
+    output_dir = Path(output_dir).resolve().joinpath(variable)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    code = find_project_variable_codes(variable, configuration)
+    variable_meta, global_attrs = eccc_variable_metadata(
+        code, "eccc-ahccd", generation, configuration
+    )
+    (
+        column_names,
+        column_spaces,
+        column_dtypes,
+        header_row,
+    ) = homogenized_column_definitions(code)
+
+    gen = {2: "Second", 3: "Third"}.get(generation)
+    if generation == 3 and code in {"dx", "dn", "dm"}:
+        station_meta = "ahccd_gen3_temperature.csv"
+    elif generation == 2 and code in {"dt", "ds", "dr"}:
+        station_meta = "ahccd_gen2_precipitation.csv"
+    else:
+        raise NotImplementedError(f"Code '{code} for generation {gen}.")
+    metadata_source = (
+        Path(__file__).resolve().parent.joinpath("configs").joinpath(station_meta)
+    )
+
+    if "tas" in variable:
+        metadata = pd.read_csv(metadata_source, header=2)
+        metadata.columns = column_names.keys()
+
+    elif "pr" in variable:
+        metadata = pd.read_csv(metadata_source, header=3)
+        metadata.columns = column_names.keys()
+        for index, row in metadata.iterrows():
+            if isinstance(row["stnid"], str):
+                metadata.loc[index, "stnid"] = metadata.loc[index, "stnid"].replace(
+                    " ", ""
+                )
+    else:
+        raise KeyError(f"{variable} does not include 'pr' or 'tas'.")
+
+    # Convert station .txt files to netcdf
+    for ff in Path(data_source).glob(f"{code}*.txt"):
+        output_name = ff.name.replace(".txt", ".nc")
+        if not output_dir.joinpath(output_name).exists() or overwrite:
+            logger.info(ff.name)
+
+            station_id = ff.stem[2:]
+            metadata_st = metadata[metadata["stnid"] == station_id]
+
+            if len(metadata_st) == 1:
+                ds_out = convert_ahccd_fwf_file(
+                    ff, metadata_st, variable, generation=generation
+                )
+                ds_out.attrs = global_attrs
+
+                write_dataset(
+                    ds_out,
+                    output_dir,
+                    output_format="netcdf",
+                    output_name=output_name,
+                    overwrite=overwrite,
+                    compute=True,
+                )
+            else:
+                msg = f"Metadata info for station {ff.name} not found: Skipping..."
+                logger.warning(msg)
+        else:
+            msg = f"{output_name} already exists: Skipping..."
+            logger.info(msg)
+    if merge:
+        merge_ahccd(data_source, output_dir, variable)
+    return
+
+
+def merge_ahccd(
+    data_source: str | Path,
+    output_dir: str | Path | None = None,
+    variable: str | None = None,
+    overwrite: bool = False,
+) -> None:
+    """Merge Adjusted and Homogenized Canadian Climate Dataset files."""
+    configuration = load_json_data_mappings("eccc-ahccd")
+
+    if variable:
+        code = find_project_variable_codes(variable, configuration)
+        glob_pattern = f"{code}*.nc"
+        output_dir = Path(output_dir).resolve().joinpath(variable)
+    else:
+        glob_pattern = "*.nc"
+        output_dir = Path(output_dir).resolve()
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    # Merge individual stations to single .nc file
+    ds_ahccd = xr.open_mfdataset(
+        list(data_source.glob(glob_pattern)), concat_dim="station", combine="nested"
+    )
+
+    for coord in ds_ahccd.coords:
+        # xarray object datatypes mix string and int (e.g. station) convert to string for merged nc files
+        # Do not apply to datetime object
+        if coord != "time" and ds_ahccd[coord].dtype == "O":
+            ds_ahccd[coord] = ds_ahccd[coord].astype(str)
+
+    variables_found = set()
+    for v in ds_ahccd.data_vars:
+        # xarray object datatypes mix string and int (e.g. station) convert to string for merged nc files
+        # Do not apply to flag timeseries
+        if ds_ahccd[v].dtype == "O" and "flag" not in v:
+            ds_ahccd[v] = ds_ahccd[v].astype(str)
+        try:
+            variables_found.add(find_project_variable_codes(str(v), configuration))
+        except NotImplementedError:
+            msg = f"Variable {v} not found in metadata."
+            logging.info(msg)
+            pass
+
+    # Name output file
+    ds_ahccd.attrs["variable"] = ", ".join(variables_found)
+    if len(variables_found) > 1:
+        variables = "-".join(variables_found)
+        msg = f"Many variables found. Merging station and variables files in {data_source}."
+        logger.info(msg)
+    else:
+        variables = variables_found.pop()
+    output_name = name_output_file(ds_ahccd, "netcdf", variables)
+
+    try:
+        msg = f"Writing merged file to: {output_dir}."
+        logger.info(msg)
+        write_dataset(
+            ds_ahccd,
+            output_dir,
+            output_format="netcdf",
+            output_name=output_name,
+            overwrite=overwrite,
+            compute=True,
+        )
+        del ds_ahccd
+    except FileExistsError:
+        logger.info("Merged file already exists. Use overwrite=`True` to overwrite.")
diff --git a/src/miranda/preprocess/_eccc_obs.py b/src/miranda/preprocess/_eccc_obs.py
new file mode 100644
index 00000000..b60eb023
--- /dev/null
+++ b/src/miranda/preprocess/_eccc_obs.py
@@ -0,0 +1,836 @@
+"""Specialized conversion tools for Environment and Climate Change Canada / Meteorological Service of Canada data."""
+
+from __future__ import annotations
+
+import functools
+import logging
+import multiprocessing as mp
+import os
+import re
+import tempfile
+import time
+
+# from calendar import monthrange
+from datetime import datetime as dt
+from logging import config
+from pathlib import Path
+from typing import Any
+
+import dask.dataframe as dd
+
+# import numpy as np
+import pandas as pd
+import xarray as xr
+from dask.diagnostics import ProgressBar
+
+from miranda.archive import group_by_length
+from miranda.preprocess._metadata import eccc_variable_metadata, obs_column_definitions
+from miranda.scripting import LOGGING_CONFIG
+from miranda.treatments import find_project_variable_codes, load_json_data_mappings
+from miranda.vocabularies.eccc import obs_vocabularies
+
+# from xclim.core.units import convert_units_to
+
+
+config.dictConfig(LOGGING_CONFIG)
+
+__all__ = [
+    "convert_station",
+    "merge_converted_variables",
+    "merge_stations",
+]
+TABLE_DATE = dt.now().strftime("%d %B %Y")
+
+
+def _remove_duplicates(ds):
+    if any(ds.get_index("time").duplicated()):
+        msg = (
+            f"Found {ds.get_index('time').duplicated().sum()} duplicated time coordinates "
+            f"for station {ds.station_id.values}. Assuming first value."
+        )
+        logging.info(msg)
+    return ds.sel(time=~ds.get_index("time").duplicated())
+
+
+def convert_observation(
+    data_source: str | Path | list[str | Path],
+    output_dir: str | Path,
+    variable: str,
+    *,
+    generation: int | None = None,
+    merge: bool = False,
+    overwrite: bool = False,
+):
+    """Convert a single station's data from the fixed-width format to a netCDF file."""
+    output_dir = Path(output_dir).resolve().joinpath(variable)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    code = find_project_variable_codes(variable, "eccc-obs")
+    var_meta, global_attrs = eccc_variable_metadata(code, "eccc-obs", generation)
+    (
+        column_names,
+        column_spaces,
+        column_dtypes,
+        header_row,
+    ) = obs_column_definitions(code)
+
+    archives = list()
+    if isinstance(data_source, list) or Path(data_source).is_file():
+        archives.append(data_source)
+    else:
+        tables = [
+            str(repository.keys())
+            for repository in obs_vocabularies
+            if code in repository.values()
+        ]
+        msg = (
+            f"Collecting files for variable '{variable}'. "
+            f"Filename patterns containing variable code '{code}: {', '.join(tables)}'."
+        )
+        logging.info(msg)
+        for table in tables:
+            archives.extend([f for f in Path(data_source).rglob(f"{table}*.gz")])
+
+    # Create the output directory
+    output_variable_dir = Path(output_dir).joinpath(variable)
+    output_variable_dir.mkdir(parents=True, exist_ok=True)
+
+    # Loop on the files
+    errored_files = []
+    for file in archives:
+        # FIXME: convert the file using the appropriate function
+        pass
+
+    if errored_files:
+        msg = "Some files failed to be properly parsed:\n", ", ".join(errored_files)
+        logging.warning(msg)
+
+
+def convert_station(
+    data: str | os.PathLike,
+    variable: str,
+    mode: str,
+    # missing_flags: set[str],
+    # missing_values: set[str],
+    using_dask_array: bool = False,
+    *,
+    client: Any,
+    **kwargs,
+):
+    """Convert a single station's data from the fixed-width format to a netCDF file."""
+    data = Path(data)
+    variable_code = find_project_variable_codes(variable, "eccc-obs")
+    column_names, column_widths, column_dtypes, header = obs_column_definitions(mode)
+
+    # if not missing_values:
+    #     missing_values = {-9999, "#####"}
+
+    if using_dask_array:
+        pandas_reader = dd
+        # set the block size to 200 MB
+        chunks = dict(blocksize=200 * 2**20)
+    else:
+        pandas_reader = pd
+        chunks = dict()
+        using_dask_array = False
+
+    # Create a dataframe from the files
+    try:
+        df = pandas_reader.read_fwf(
+            data,
+            widths=column_widths,
+            names=column_names,
+            dtype={
+                name: data_type for name, data_type in zip(column_names, column_dtypes)
+            },
+            assume_missing=True,
+            **chunks,
+        )
+        if using_dask_array:
+            df = client.persist(df)
+
+    except FileNotFoundError as e:
+        msg = f"File {data} was not found: {e}"
+        logging.error(msg)
+        raise FileNotFoundError(msg)
+
+    except UnicodeDecodeError as e:
+        msg = f"File {data.name} was unable to be read. This is probably an issue with the file: {e}"
+        logging.error(msg)
+        raise
+
+    # Loop through the station codes
+    station_codes = df["code"].unique()
+    for code in station_codes:
+        df_code = df[df["code"] == code]
+
+        # Abort if the variable is not found
+        if using_dask_array:
+            has_variable_codes = (
+                (df_code["code_var"] == variable_code).compute()
+            ).any()
+        else:
+            has_variable_codes = (df_code["code_var"] == variable_code).any()
+        if not has_variable_codes:
+            msg = f"Variable `{variable}` not found for station code: {code} in file {data}. Continuing..."
+            logging.info(msg)
+            continue
+
+        # # Perform the data treatment
+        # logging.info(f"Converting `{variable}` for station code: {code}")
+        #
+        # # Dump the data into a DataFrame
+        # df_var = df_code[df_code["code_var"] == variable_code].copy()
+        #
+        # # Mask the data according to the missing values flag
+        # df_var = df_var.replace(missing_values, np.nan)
+        #
+        # # Decode the values and flags
+        # dfd = df_var.loc[:, [f"D{i:0n}" for i in range(1, num_observations + 1)]]
+        # dff = df_var.loc[:, [f"F{i:0n}" for i in range(1, num_observations + 1)]]
+        #
+        # # Remove the "NaN" flag
+        # dff = dff.fillna("")
+        #
+        # # Use the flag to mask the values
+        # try:
+        #     val = np.asarray(dfd.values, float)
+        # except ValueError as e:
+        #     logging.error(f"{e} raised from {dfd}, continuing...")
+        #     continue
+        # try:
+        #     flag = np.asarray(dff.values, str)
+        # except ValueError as e:
+        #     logging.error(f"{e} raised from {dff}, continuing...")
+        #     continue
+        # mask = np.isin(flag, missing_flags)
+        # val[mask] = np.nan
+        #
+        # # Treat according to units conversions
+        # val = val * scale_factor + add_offset
+
+        # Create the DataArray
+        # date_summations = dict(time=list())
+        # if mode == "hourly":
+        #     for index, row in df_var.iterrows():
+        #         period = pd.Period(
+        #             year=row.year, month=row.month, day=row.day, freq="D"
+        #         )
+        #         dates = pd.Series(
+        #             pd.date_range(
+        #                 start=period.start_time,
+        #                 end=period.end_time,
+        #                 freq="H",
+        #             )
+        #         )
+        #         date_summations["time"].extend(dates)
+        #     written_values = val.flatten()
+        #     written_flags = flag.flatten()
+    #     elif mode == "daily":
+    #         value_days = list()
+    #         flag_days = list()
+    #         for i, (index, row) in enumerate(df_var.iterrows()):
+    #             period = pd.Period(year=row.year, month=row.month, freq="M")
+    #             dates = pd.Series(
+    #                 pd.date_range(
+    #                     start=period.start_time,
+    #                     end=period.end_time,
+    #                     freq="D",
+    #                 )
+    #             )
+    #             date_summations["time"].extend(dates)
+    #
+    #             value_days.extend(
+    #                 val[i][range(monthrange(int(row.year), int(row.month))[1])]
+    #             )
+    #             flag_days.extend(
+    #                 flag[i][range(monthrange(int(row.year), int(row.month))[1])]
+    #             )
+    #         written_values = value_days
+    #         written_flags = flag_days
+    #
+    #     ds = xr.Dataset()
+    #     da_val = xr.DataArray(written_values, coords=date_summations, dims=["time"])
+    #
+    #     if raw_units != units:
+    #         da_val.attrs["units"] = raw_units
+    #         da_val = convert_units_to(da_val, units)
+    #     else:
+    #         da_val.attrs["units"] = units
+    #
+    #     da_val = da_val.rename(nc_name)
+    #     variable_attributes = dict(
+    #         variable_code=variable_code,
+    #         standard_name=standard_name,
+    #         long_name=long_name,
+    #     )
+    #     if "original_units" in kwargs:
+    #         variable_attributes["original_units"] = kwargs["original_units"]
+    #     da_val.attrs.update(variable_attributes)
+    #
+    #     da_flag = xr.DataArray(written_flags, coords=date_summations, dims=["time"])
+    #     da_flag = da_flag.rename("flag")
+    #     flag_attributes = dict(
+    #         long_name="data flag",
+    #         note="See ECCC technical documentation for details",
+    #     )
+    #     da_flag.attrs.update(flag_attributes)
+    #
+    #     ds[nc_name] = da_val
+    #     ds["flag"] = da_flag
+    #
+    #     # save the file in NetCDF format
+    #     start_year = ds.time.dt.year.values[0]
+    #     end_year = ds.time.dt.year.values[-1]
+    #
+    #     station_folder = output_path.joinpath(str(code))
+    #     station_folder.mkdir(parents=True, exist_ok=True)
+    #
+    #     f_nc = (
+    #         f"{code}_{variable_code}_{nc_name}_"
+    #         f"{start_year if start_year == end_year else '_'.join([str(start_year), str(end_year)])}.nc"
+    #     )
+    #
+    #     if station_folder.joinpath(f_nc).exists():
+    #         logging.warning(f"File `{f_nc}` already exists. Continuing...")
+    #
+    #     history = (
+    #         f"{dt.now().strftime('%Y-%m-%d %X')} converted from flat station file "
+    #         f"(`{file.name}`) to n-dimensional array."
+    #     )
+    #
+    #     # TODO: This info should eventually be sourced from a JSON definition
+    #     global_attrs = dict(
+    #         Conventions="CF-1.8",
+    #         comment="Acquired on demand from data specialists at "
+    #         "ECCC Climate Services / Services Climatiques.",
+    #         contact="John Richard",
+    #         contact_email="climatcentre-climatecentral@ec.gc.ca",
+    #         domain="CAN",
+    #     )
+    #     if mode == "hourly":
+    #         global_attrs.update(dict(frequency="1hr"))
+    #     elif mode == "daily":
+    #         global_attrs.update(dict(frequency="day"))
+    #     global_attrs.update(
+    #         dict(
+    #             history=history,
+    #             internal_comment=f"Converted by {os.environ.get('USER', os.environ.get('USERNAME'))}.",
+    #             institution="ECCC",
+    #             license="https://climate.weather.gc.ca/prods_servs/attachment1_e.html",
+    #             member=code,
+    #             processing_level="raw",
+    #             redistribution="Redistribution permitted.",
+    #             references="https://climate.weather.gc.ca/doc/Technical_Documentation.pdf",
+    #             source="historical-station-records",
+    #             table_date=TABLE_DATE,
+    #             title="Environment and Climate Change Canada (ECCC) weather station observations",
+    #             type="station-obs",
+    #             usage="The original data is owned by the Government of Canada (Environment and Climate "
+    #             "Change Canada), and falls under the licence agreement for use of Environment and "
+    #             "Climate Change Canada data",
+    #             variable=str(nc_name),
+    #             version=f"v{dt.now().strftime('%Y.%m.%V')}",  # Year.Month.Week
+    #         )
+    #     )
+    #     ds.attrs.update(global_attrs)
+    #
+    #     logging.info(f"Exporting to: {station_folder.joinpath(f_nc)}")
+    #     ds.to_netcdf(station_folder.joinpath(f_nc))
+    #     del ds
+    #     del val
+    #     del mask
+    #     del flag
+    #     del da_val
+    #     del da_flag
+    #     del dfd
+    #     del dff
+    #     del written_values
+    #     del written_flags
+    #     del date_summations
+    #
+    # del df
+
+
+def merge_stations(
+    source_files: str | os.PathLike | None = None,
+    output_folder: str | os.PathLike | None = None,
+    *,
+    time_step: str,
+    variables: str | int | list[str | int] | None = None,
+    include_flags: bool = True,
+    groupings: int | None = None,
+    mf_dataset_freq: str | None = None,
+    temp_directory: str | os.PathLike | None = None,
+    n_workers: int = 1,
+) -> None:
+    """Merge stations.
+
+    Parameters
+    ----------
+    source_files : str or Path
+        Source files to be aggregated.
+    output_folder : str or Path
+        Output folder for the aggregated files.
+    variables : str or int or list of str or int, optional
+        The variable codes to be aggregated.
+    time_step : {"hourly", "daily"}
+        The time step to be used for aggregation.
+    include_flags : bool
+        Include flags in the output files.
+    groupings : int
+        The number of files in each group used for converting to multi-file Datasets.
+    mf_dataset_freq : str, optional
+        Resampling frequency for creating output multi-file Datasets. E.g. 'YS': 1 year per file, '5YS': 5 years per file.
+    temp_directory : str or Path, optional
+        Use another temporary directory location in case default location is not spacious enough.
+    n_workers : int
+        The number of workers to use.
+
+    Returns
+    -------
+    None
+    """
+    func_time = time.time()
+
+    if isinstance(source_files, str):
+        source_files = Path(source_files)
+
+    if time_step.lower() in ["h", "hour", "hourly"]:
+        mode = "hourly"
+    elif time_step.lower() in ["d", "day", "daily"]:
+        mode = "daily"
+    else:
+        raise ValueError("Time step must be `h` / `hourly` or `d` / `daily`.")
+
+    if isinstance(variables, list):
+        pass
+    elif isinstance(variables, (str, int)):
+        variables = [variables]
+
+    # TODO: have the variable gathered from a JSON file
+    elif variables is None:
+        if mode == "hourly":
+            variables = [
+                89,
+                94,
+                123,
+            ]
+            variables.extend(range(76, 81))
+            variables.extend(range(262, 281))
+        elif mode == "daily":
+            variables = [1, 2, 3]
+            variables.extend(range(10, 26))
+    else:
+        raise NotImplementedError()
+
+    for variable_code in variables:
+        info = load_json_data_mappings("eccc-obs")["variables"][variable_code]
+        variable_name = info["cf_variable_name"]
+        msg = f"Merging `{variable_name}` using `{time_step}` time step."
+        logging.info(msg)
+
+        # Only perform aggregation on available data with corresponding metadata
+        logging.info("Performing glob and sort.")
+        nc_list = [str(nc) for nc in source_files.joinpath(variable_name).rglob("*.nc")]
+
+        if not groupings:
+            groupings = max(n_workers**2, 4)
+
+        if nc_list:
+            nc_lists = group_by_length(nc_list, groupings)
+
+            with tempfile.TemporaryDirectory(
+                prefix="eccc", dir=temp_directory
+            ) as temp_dir:
+                combinations = sorted(
+                    (ii, nc, temp_dir, len(nc_lists)) for ii, nc in enumerate(nc_lists)
+                )
+
+                with mp.Pool(processes=n_workers) as pool:
+                    pool.starmap(_tmp_zarr, combinations)
+                    pool.close()
+                    pool.join()
+
+                zarrs_found = [f for f in Path(temp_dir).glob("*.zarr")]
+                msg = f"Found {len(zarrs_found)} intermediary aggregation files."
+                logging.info(msg)
+
+                ds = xr.open_mfdataset(
+                    zarrs_found,
+                    engine="zarr",
+                    combine="nested",
+                    concat_dim={"station"},
+                )
+
+                if ds:
+                    station_file_codes = [Path(x).name.split("_")[0] for x in nc_list]
+                    if not include_flags:
+                        drop_vars = [vv for vv in ds.data_vars if "flag" in vv]
+                        ds = ds.drop_vars(drop_vars)
+                    ds = ds.sortby(ds.station_id, "time")
+
+                # Rearrange column order to have lon, lat, elev first
+                # # FIXME: This doesn't work as intended - Assign coordinates instead
+                # cols = meta.columns.tolist()
+                # cols1 = [
+                #     "latitude",
+                #     "longitude",
+                #     "elevation",
+                # ]
+                # for rr in cols1:
+                #     cols.remove(rr)
+                # cols1.extend(cols)
+                # meta = meta[cols1]
+                # meta.index.rename("station", inplace=True)
+                # meta = meta.to_xarray()
+                # meta.sortby(meta["climate_identifier"])
+                # meta = meta.assign({"station": ds.station.values})
+
+                # np.testing.assert_array_equal(
+                #     sorted(meta["climate_identifier"].values), sorted(ds.station_id.values)
+                # )
+                # for vv in meta.data_vars:
+                #     ds = ds.assign_coords({vv: meta[vv]})
+                # ds = xr.merge([ds, meta])
+                # ds.attrs = attrs1
+
+                # export done within tmddir context otherwise data is erased before final export!!
+                valid_stations = list(sorted(ds.station_id.values))
+                valid_stations_count = len(valid_stations)
+
+                msg = f"Processing stations for variable `{variable_name}`."
+                logging.info(msg)
+
+                if len(station_file_codes) == 0:
+                    msg = f"No stations were found containing variable filename `{variable_name}`. Exiting."
+                    logging.error(msg)
+                    return
+
+                msg = (
+                    f"Files exist for {len(station_file_codes)} ECCC stations. "
+                    f"Metadata found for {valid_stations_count} stations. "
+                )
+                logging.info(msg)
+
+                # FIXME: Is this still needed?
+                # logging.info("Preparing the NetCDF time period.")
+                # Create the time period timestamps
+                # year_start = ds.time.dt.year.min().values
+                # year_end = ds.time.dt.year.max().values
+
+                # Calculate the time index dimensions of the output NetCDF
+                # time_index = pd.date_range(
+                #     start=f"{year_start}-01-01",
+                #     end=f"{year_end + 1}-01-01",
+                #     freq=mode[0].capitalize(),
+                # )[:-1]
+                # logging.info(
+                #     f"Number of ECCC stations: {valid_stations_count}, time steps: {time_index.size}."
+                # )
+
+                Path(output_folder).mkdir(parents=True, exist_ok=True)
+                file_out = Path(output_folder).joinpath(f"{variable_name}_eccc_{mode}")
+
+                ds = ds.assign_coords(station=range(0, len(ds.station))).sortby("time")
+                if mf_dataset_freq is not None:
+                    # output mf_dataset using resampling frequency
+                    _, datasets = zip(*ds.resample(time=mf_dataset_freq))
+                else:
+                    datasets = [ds]
+
+                paths = [
+                    f"{file_out}_{data.time.dt.year.min().values}-{data.time.dt.year.max().values}.nc"
+                    for data in datasets
+                ]
+
+                # FIXME: chunks need to be dealt with
+                # chunks = [1, len(ds.time)]
+                # comp = dict(zlib=True, complevel=5)  # , chunk sizes=chunks)
+
+                with ProgressBar():
+                    # FIXME: looping seems to cause increasing memory over time use a pool of one or 2??
+                    # for dataset, path in zip(datasets, paths):
+                    #     _export_agg_nc(dataset,path)
+                    combs = zip(datasets, paths)
+                    pool = mp.Pool(2)
+                    pool.map(_export_agg_nc, combs)
+                    pool.close()
+                    pool.join()
+                ds.close()
+                del ds
+
+        else:
+            msg = f"No files found for variable: `{variable_name}`."
+            logging.info(msg)
+
+    runtime = f"Process completed in {time.time() - func_time:.2f} seconds."
+    logging.warning(runtime)
+
+
+def _export_agg_nc(args):
+    dataset, path = args
+    comp = dict(zlib=True, complevel=5)
+    encoding = {var: comp for var in dataset.data_vars}
+    dataset.load().to_netcdf(
+        path,
+        engine="h5netcdf",
+        format="NETCDF4_CLASSIC",
+        encoding=encoding,
+    )
+    dataset.close()
+    del dataset
+
+
+def _tmp_zarr(
+    iterable: int,
+    nc: list[str | os.PathLike],
+    tempdir: str | os.PathLike,
+    group: int | None = None,
+) -> None:
+    msg = (
+        f"Processing batch of files {iterable + 1}"
+        f"{' of ' + str(group) if group is not None else ''}."
+    )
+    logging.info(msg)
+    station_file_codes = [Path(x).name.split("_")[0] for x in nc]
+
+    try:
+        ds = xr.open_mfdataset(
+            nc, combine="nested", concat_dim="station", preprocess=_remove_duplicates
+        )
+    except ValueError as e:
+        errored_nc_files = ", ".join([Path(f).name for f in nc])
+        msg = f"Issues found with the following files: [{errored_nc_files}]: {e}"
+        logging.error(msg)
+        return
+
+    ds = ds.assign_coords(
+        station_id=xr.DataArray(station_file_codes, dims="station").astype(str)
+    )
+    if "flag" in ds.data_vars:
+        ds1 = ds.drop_vars("flag").copy(deep=True)
+        ds1["flag"] = ds.flag.astype(str)
+        ds = ds1
+
+    with ProgressBar():
+        ds.load().to_zarr(
+            Path(tempdir).joinpath(f"{str(iterable).zfill(4)}.zarr"),
+        )
+    del ds
+
+
+def _combine_years(
+    station_folder: str,
+    varia: str,
+    out_folder: str | os.PathLike,
+    meta_file: str | os.PathLike,
+    rejected: list[str],
+    _verbose: bool = False,
+) -> None:
+    nc_files = sorted(list(Path(station_folder).glob("*.nc")))
+    if len(nc_files):
+        msg = (
+            f"Found {len(nc_files)} files for station code {Path(station_folder).name}."
+        )
+        logging.info(msg)
+    else:
+        msg = f"No readings found for station code {Path(station_folder).name}. Continuing..."
+        logging.warning(msg)
+        return
+
+    # Remove range files if years are all present, otherwise default to range_file.
+    years_found = dict()
+    range_files_found = dict()
+    years_parsed = True
+    for f in nc_files:
+        groups = re.findall(r"_\d{4}", f.stem)
+        if len(groups) == 1:
+            year = int(groups[0].strip("_"))
+            years_found[year] = f
+        elif len(groups) == 2:
+            year_start, year_end = int(groups[0].strip("_")), int(groups[1].strip("_"))
+            range_files_found[f] = set(range(year_start, year_end))
+        else:
+            msg = "Years unable to be effectively parsed from series. Continuing with xarray solver..."
+            logging.warning(msg)
+            years_parsed = False
+            break
+    if years_parsed:
+        if len(range_files_found) > 0:
+            msg = (
+                f"Overlapping single-year and multi-year files found for station code {station_folder}. "
+                "Removing overlaps."
+            )
+            logging.warning(msg)
+            for ranged_file, years in range_files_found.items():
+                if years.issubset(years_found.values()):
+                    nc_files.remove(ranged_file)
+                else:
+                    missing_years = []
+                    for y in years:
+                        try:
+                            nc_files.remove(years_found[y])
+                        except (KeyError, ValueError):  # noqa: PERF203
+                            missing_years.append(str(y))
+                            continue
+                        if missing_years:
+                            msg = f"Missing years {', '.join(missing_years)} from multi-year file {ranged_file}. "
+                            logging.warning(msg)
+
+        year_range = min(years_found.keys()), max(years_found.keys())
+        msg = (
+            "Year(s) covered: "
+            f"{year_range[0]}{'-' + str(year_range[1]) if year_range[0] != year_range[1] else ''}. "
+        )
+        logging.info(msg)
+
+    if _verbose:
+        msg = f"Opening: {', '.join([p.name for p in nc_files])}"
+        logging.info(msg)
+    ds = xr.open_mfdataset(nc_files, combine="nested", concat_dim="time")
+    outfile = Path(out_folder).joinpath(
+        f'{nc_files[0].name.split(f"_{varia}_")[0]}_{varia}_'
+        f"{ds.time.dt.year.min().values}-{ds.time.dt.year.max().values}.nc"
+    )
+
+    df_inv = xr.open_dataset(meta_file)
+
+    station_id = ds.attrs["member"]
+    meta = df_inv.isel(index=df_inv.CLIMATE_IDENTIFIER == station_id)
+    meta = meta.rename({"index": "station", "CLIMATE_IDENTIFIER": "station_id"})
+    try:
+        meta = meta.assign_coords(station=[0])
+    except ValueError:
+        rejected.append(Path(station_folder).name)
+        msg = f"Something went wrong at the assign_coords step for station {station_folder}. Continuing..."
+        logging.error(msg)
+        return
+    if len(meta.indexes) > 1:
+        raise ValueError("Found more than 1 station.")
+    elif len(meta.indexes) == 0:
+        rejected.append(Path(station_folder).name)
+        msg = f"No metadata found for station code {station_folder}. Continuing..."
+        logging.warning(msg)
+        return
+
+    keep_coords = [
+        "time",
+        "station",
+        "station_id",
+        "latitude",
+        "longitude",
+        "elevation",
+    ]
+    for vv in meta.data_vars:
+        if str(vv).lower() not in keep_coords:
+            continue
+        ds = ds.assign_coords({str(vv).lower(): meta[vv]})
+
+    for vv in ds.data_vars:
+        if ds[vv].dtype == "O":
+            ds[vv] = ds[vv].astype(str)
+
+    if not outfile.exists():
+        msg = f"Merging to {outfile.name}."
+        logging.info(msg)
+        comp = dict(zlib=True, complevel=5)
+        encoding = {data_var: comp for data_var in ds.data_vars}
+        encoding["time"] = {"dtype": "single"}
+        with ProgressBar():
+            ds.to_netcdf(
+                outfile,
+                engine="h5netcdf",
+                format="NETCDF4_CLASSIC",
+                encoding=encoding,
+            )
+    else:
+        msg = f"Files exist for {outfile.name}. Continuing..."
+        logging.info(msg)
+
+
+def merge_converted_variables(
+    source_files: str | os.PathLike,
+    output_folder: str | os.PathLike,
+    variables: str | int | list[str | int] | None = None,
+    overwrite: bool = False,
+    n_workers: int = 1,
+) -> None:
+    """Merge converted variables into a single file per variable.
+
+    Parameters
+    ----------
+    source_files : str, Path
+    output_folder : str, Path
+    variables : str or int or list of str or int, optional
+    overwrite : bool
+    n_workers : int
+
+    Returns
+    -------
+    None
+    """
+    meta = load_json_data_mappings("eccc-obs")
+    metadata_file = Path(tempfile.NamedTemporaryFile(suffix=".nc", delete=False).name)
+    meta.to_netcdf(metadata_file)
+
+    if isinstance(source_files, str):
+        source_files = Path(source_files)
+    if isinstance(output_folder, str):
+        output_folder = Path(output_folder)
+
+    selected_variables = list()
+    if variables is not None:
+        if not isinstance(variables, list):
+            variables = [variables]
+        selected_variables.extend(meta[var] for var in variables)
+
+    variables_found = [x.name for x in source_files.iterdir() if x.is_dir()]
+    if selected_variables:
+        variables_found = [
+            x
+            for x in variables_found
+            if x in [item["nc_name"] for item in selected_variables]
+        ]
+
+    for variable in variables_found:
+        msg = f"Merging files found for variable: `{variable}`."
+        logging.info(msg)
+        station_dirs = [
+            x for x in source_files.joinpath(variable).iterdir() if x.is_dir()
+        ]
+        msg = f"Number of stations found: {len(station_dirs)}."
+        logging.info(msg)
+
+        output_rep = output_folder.joinpath(variable)
+        Path(output_rep).mkdir(parents=True, exist_ok=True)
+
+        if (
+            len(list(output_rep.iterdir())) >= (len(meta.CLIMATE_IDENTIFIER) * 0.75)
+        ) and not overwrite:
+            msg = (
+                f"Variable {variable} appears to have already been converted. Will be skipped. "
+                f"To force conversion of this variable, set `overwrite=True`."
+            )
+            logging.warning(msg)
+            continue
+
+        manager = mp.Manager()
+        rejected_stations = manager.list()
+
+        combine_func = functools.partial(
+            _combine_years,
+            varia=variable,
+            out_folder=output_rep,
+            meta_file=metadata_file,
+            rejected=rejected_stations,
+        )
+
+        with mp.Pool(processes=n_workers) as pool:
+            pool.map(combine_func, station_dirs)
+            pool.close()
+            pool.join()
+
+        if rejected_stations:
+            msg = f"Rejected station codes are the following: {', '.join(rejected_stations)}."
+            logging.warning(msg)
diff --git a/src/miranda/eccc/_summaries.py b/src/miranda/preprocess/_eccc_summaries.py
similarity index 99%
rename from src/miranda/eccc/_summaries.py
rename to src/miranda/preprocess/_eccc_summaries.py
index f77d3649..6c8ce6f2 100755
--- a/src/miranda/eccc/_summaries.py
+++ b/src/miranda/preprocess/_eccc_summaries.py
@@ -32,8 +32,10 @@
 
 eccc_metadata = json.load(
     Path(__file__)
-    .parent.joinpath("eccc_obs_summary_cf_attrs.json")
-    .open("r", encoding="utf-8")
+    .resolve()
+    .parent.joinpath("configs")
+    .joinpath("eccc-obs-summary_attrs.json")
+    .open()
 )["variable_entry"]
 
 
diff --git a/src/miranda/preprocess/_metadata.py b/src/miranda/preprocess/_metadata.py
new file mode 100644
index 00000000..3b65d2b9
--- /dev/null
+++ b/src/miranda/preprocess/_metadata.py
@@ -0,0 +1,214 @@
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from miranda import __version__ as __miranda_version__
+from miranda.treatments.utils import load_json_data_mappings
+
+__all__ = [
+    "eccc_variable_metadata",
+    "homogenized_column_definitions",
+    "obs_column_definitions",
+]
+
+
+def eccc_variable_metadata(
+    variable_code: str | int,
+    project: str,
+    generation: int | None = None,
+    metadata: dict | None = None,
+) -> dict[str, Any]:
+    """Return the metadata for a given variable code and project.
+
+    Parameters
+    ----------
+    variable_code: str or int
+    project: {"eccc-ahccd", "eccc-obs", "eccc-obs-summary"}
+    generation: {1, 2, 3}, optional
+    metadata: dict, optional
+
+    Returns
+    -------
+    dict
+    """
+    if project == "eccc-ahccd":
+        generation = {1: "First", 2: "Second", 3: "Third"}.get(generation)
+        if not generation:
+            raise NotImplementedError(f"Generation '{generation}' not supported")
+    else:
+        generation = None
+
+    if not metadata:
+        metadata = load_json_data_mappings(project)
+
+    if isinstance(variable_code, int):
+        variable_code = str(variable_code).zfill(3)
+
+    # code = find_project_variable_codes(variable_code, metadata)
+
+    # Variable metadata
+    variable_meta = metadata["variables"].get(variable_code)
+    if variable_meta is None:
+        raise ValueError(f"No metadata found for variable code: {variable_code}")
+
+    variable_name = ""
+    variable_name_fields = ["_variable_name", "_cf_variable_name"]
+    if set(variable_name_fields).issubset(variable_meta.keys()):
+        for variable_field in variable_name_fields:
+            variable_name = variable_meta.get(variable_field)
+            if variable_name:
+                variable_meta["original_variable_code"] = variable_code
+                del variable_meta[variable_field]
+                variable_meta = {variable_name: variable_meta}
+    else:
+        variable_meta = {variable_code: variable_meta}
+    if not variable_name:
+        variable_name = variable_code
+
+    # Dataset metadata
+    header = metadata.get("Header")
+    # Static handling of version global attributes
+    miranda_version = header.get("_miranda_version")
+    if miranda_version:
+        if isinstance(miranda_version, bool):
+            header["miranda_version"] = __miranda_version__
+        elif isinstance(miranda_version, dict):
+            if project in miranda_version.keys():
+                header["miranda_version"] = __miranda_version__
+        else:
+            msg = f"`_miranda_version` not properly configured for project `{project}`. Not appending."
+            logging.warning(msg)
+    if "_miranda_version" in header:
+        del header["_miranda_version"]
+
+    to_delete = []
+    # Conditional handling of global attributes based on fields
+    for field in [f for f in header if f.startswith("_")]:
+        if isinstance(header[field], bool):
+            if header[field] and field == "_variable":
+                header[field[1:]] = variable_name
+        elif isinstance(header[field], dict) and generation:
+            attr_treatment = header[field]["generation"]
+            if field in ["_citation" "_product"]:
+                for attribute, value in attr_treatment.items():
+                    if attribute == generation:
+                        header[field[1:]] = value
+        else:
+            raise AttributeError(
+                f"Attribute treatment configuration for field `{field}` is not properly configured. Verify JSON."
+            )
+        to_delete.append(field)
+
+    for field in to_delete:
+        del header[field]
+
+    return dict(metadata=variable_meta, header=header)
+
+
+def homogenized_column_definitions(
+    variable_code: str,
+) -> tuple[dict, list[tuple[int, int]], dict[str, type[str | int | float] | Any], int]:
+    """Return the column names, widths, and data types for the AHCCD fixed-width format data.
+
+    Parameters
+    ----------
+    variable_code : str
+
+    Returns
+    -------
+    tuple[dict, list[tuple[int, int]], dict[str, type[str | int | float] | Any], int]
+    """
+    metadata = load_json_data_mappings("eccc-homogenized")
+
+    variable = metadata["variables"][variable_code]["_variable_name"]
+    if variable.startswith("tas"):
+        column_dtypes = {
+            "No": str,
+            "StnId": str,
+            "Station name": str,
+            "Prov": str,
+            "FromYear": int,
+            "FromMonth": int,
+            "ToYear": int,
+            "ToMonth": int,
+            "%Miss": float,
+            "Lat(deg)": float,
+            "Long(deg)": float,
+            "Elev(m)": int,
+            "Joined": str,
+            "RCS": str,
+        }
+        column_spaces = [(0, 5), (5, 6), (6, 8), (8, 9)]
+        ii = 9
+        # 31 days in a month
+        for i in range(1, 32):
+            column_spaces.append((ii, ii + 7))
+            ii += 7
+            column_spaces.append((ii, ii + 1))
+            ii += 1
+        header_row = 3
+
+    elif variable.startswith("pr"):
+        column_dtypes = {
+            "Prov": str,
+            "Station name": str,
+            "stnid": str,
+            "beg yr": int,
+            "beg mon": int,
+            "end yr": int,
+            "end mon": int,
+            "lat (deg)": float,
+            "long (deg)": float,
+            "elev (m)": int,
+            "stns joined": str,
+        }
+        column_spaces = [(0, 4), (4, 5), (5, 7), (7, 8)]
+        ii = 8
+        # 31 days in a month
+        for i in range(1, 32):
+            column_spaces.append((ii, ii + 8))
+            ii += 8
+            column_spaces.append((ii, ii + 1))
+            ii += 1
+        header_row = 0
+
+    else:
+        raise KeyError
+
+    column_names = {
+        col.lower().split("(")[0].replace("%", "pct_").strip().replace(" ", "_"): col
+        for col in list(column_dtypes.keys())
+    }
+
+    return column_names, column_spaces, column_dtypes, header_row
+
+
+def obs_column_definitions(
+    time_frequency: str,
+) -> tuple[list[str], list[int], list[type[str | int]], int]:
+    """Return the column names, widths, and data types for the fixed-width format."""
+    if time_frequency.lower() in ["h", "hour", "hourly"]:
+        num_observations = 24
+        column_names = ["code", "year", "month", "day", "code_var"]
+        column_widths = [7, 4, 2, 2, 3]
+        column_dtypes = [str, int, int, int, str]
+    elif time_frequency.lower() in ["d", "day", "daily"]:
+        num_observations = 31
+        column_names = ["code", "year", "month", "code_var"]
+        column_widths = [7, 4, 2, 3]
+        column_dtypes = [str, int, int, str]
+    else:
+        raise NotImplementedError("`mode` must be 'h'/'hourly or 'd'/'daily'.")
+
+    header = 0
+
+    # Add the data columns
+    for i in range(1, num_observations + 1):
+        data_entry, flag_entry = f"D{i:0n}", f"F{i:0n}"
+        column_names.append(data_entry)
+        column_names.append(flag_entry)
+        column_widths.extend([6, 1] * num_observations)
+        column_dtypes.extend([str, str])
+
+    return column_names, column_widths, column_dtypes, header
diff --git a/src/miranda/eccc/data/ahccd_gen2_precipitation.csv b/src/miranda/preprocess/configs/ahccd_gen2_precipitation.csv
similarity index 66%
rename from src/miranda/eccc/data/ahccd_gen2_precipitation.csv
rename to src/miranda/preprocess/configs/ahccd_gen2_precipitation.csv
index ce59df01..6f0c0f3a 100644
--- a/src/miranda/eccc/data/ahccd_gen2_precipitation.csv
+++ b/src/miranda/preprocess/configs/ahccd_gen2_precipitation.csv
@@ -8,23 +8,23 @@ BC,ARMSTRONG HULLCAR,1160483,1912,1,1998,12,50.5,-119.216666666667,505,Yes
 BC,ATLIN,1200560,1906,1,2017,12,59.5666666666667,-133.7,674,No
 BC,BARKERVILLE,1090660,1888,1,2015,3,53.0691666666667,-121.514722222222,1283,No
 BC,BEAVERDELL NORTH,1130771,1926,1,2006,9,49.4783333333333,-119.047,838,Yes
-BC,BELLA COOLA ,1060841,1899,1,2017,11,52.3875,-126.595833333333,36,Yes
+BC,BELLA COOLA,1060841,1899,1,2017,11,52.3875,-126.595833333333,36,Yes
 BC,BIG CREEK,1080870,1904,1,1998,11,51.6672236111111,-123.073056944444,1175,No
-BC,BLUE RIVER ,1160899,1929,1,2017,12,52.1290277777778,-119.289527777778,683,Yes
+BC,BLUE RIVER,1160899,1929,1,2017,12,52.1290277777778,-119.289527777778,683,Yes
 BC,BRISCO,1171020,1924,1,2004,3,50.8205555555556,-116.258055555556,823,No
 BC,BRITANNIA BEACH FURRY CREEK,1041050,1914,1,2000,4,49.5838888888889,-123.223611111111,9,Yes
 BC,BURQUITLAM VANCOUVER GOLF COURSE,1101200,1926,1,2005,12,49.2516666666667,-122.876944444444,122,Yes
 BC,CAPE SCOTT,1031353,1921,1,2016,6,50.7822333333333,-128.427227777778,72,Yes
 BC,CAPE ST JAMES,1051350,1926,1,1992,8,51.9333333333333,-131.016666666667,89,No
 BC,CASSIAR,1191440,1954,1,1996,8,59.2833333333333,-129.833333333333,1078,No
-BC,CELISTA,116146F             ,1924,1,2004,7,50.9555555555556,-119.379444444444,515,Yes
+BC,CELISTA,116146F,1924,1,2004,7,50.9555555555556,-119.379444444444,515,Yes
 BC,CHATHAM POINT,1021480,1932,1,2016,2,50.3331944444444,-125.445555555556,23,Yes
-BC,COMOX ,1021830,1936,1,2017,12,49.7166666666667,-124.9,26,Yes
+BC,COMOX,1021830,1936,1,2017,12,49.7166666666667,-124.9,26,Yes
 BC,CORTES ISLAND TIBER BAY,1021960,1919,1,2017,12,50.0713888888889,-124.949444444444,15,Yes
-BC,CRANBROOK ,1152102,1909,1,2012,11,49.6122222222222,-115.781944444444,939,Yes
+BC,CRANBROOK,1152102,1909,1,2012,11,49.6122222222222,-115.781944444444,939,Yes
 BC,CRESTON,1142160,1912,1,2015,6,49.0970555555556,-116.517833333333,597,No
 BC,DARFIELD,1162265,1914,1,2017,11,51.2973333333333,-120.182666666667,412,Yes
-BC,DAWSON CREEK ,1182285,1952,1,2007,2,55.7416666666667,-120.181944444444,655,Yes
+BC,DAWSON CREEK,1182285,1952,1,2007,2,55.7416666666667,-120.181944444444,655,Yes
 BC,DEASE LAKE,1192340,1945,1,2008,7,58.428335,-130.010556666667,807,No
 BC,DEER PARK,1142400,1924,1,1995,9,49.4166666666667,-118.05,485,No
 BC,DRYAD POINT,1062544,1933,1,2017,12,52.1850005555556,-128.112224444444,4,Yes
@@ -33,12 +33,12 @@ BC,ESTEVAN POINT,1032730,1924,1,2017,12,49.3835,-126.550833333333,7,No
 BC,FALLS RIVER,1062790,1932,1,1992,10,53.9833333333333,-129.733333333333,18,No
 BC,FAUQUIER,1142820,1913,1,2015,6,49.8719444444444,-118.0675,490,No
 BC,FERNIE,1152850,1914,1,2017,12,49.4888888888889,-115.072222222222,1001,No
-BC,FORT NELSON ,1192940,1938,1,2012,11,58.8363888888889,-122.597222222222,382,No
+BC,FORT NELSON,1192940,1938,1,2012,11,58.8363888888889,-122.597222222222,382,No
 BC,FORT ST JAMES,1092970,1895,1,2017,12,54.4552802777778,-124.285556111111,686,No
-BC,FORT ST JOHN ,1183000,1931,1,2012,12,56.2380555555556,-120.740277777778,695,Yes
+BC,FORT ST JOHN,1183000,1931,1,2012,12,56.2380555555556,-120.740277777778,695,Yes
 BC,GERMANSEN LANDING,1183090,1952,1,2013,11,55.7855277777778,-124.701444444444,766,No
 BC,GLACIER NP ROGERS PASS,1173191,1909,1,2014,7,51.3009166666667,-117.516388888889,1323,Yes
-BC,GOLDEN ,1173210,1908,1,2017,12,51.2983333333333,-116.981666666667,785,No
+BC,GOLDEN,1173210,1908,1,2017,12,51.2983333333333,-116.981666666667,785,No
 BC,GRAND FORKS,1133270,1910,1,2008,3,49.0261666666667,-118.465666666667,532,Yes
 BC,GRASMERE,1153282,1896,1,1993,11,49.0833333333333,-115.066666666667,869,Yes
 BC,HAZELTON TEMLEHAN,1073347,1915,1,1997,4,55.2,-127.733333333333,122,Yes
@@ -58,7 +58,7 @@ BC,MASSET AIRPORT,1054920,1900,1,2008,6,54.0226111111111,-132.117472222222,7,Yes
 BC,MCINNES ISLAND,1065010,1954,1,2017,12,52.2616666666667,-128.719444444444,26,No
 BC,MERRITT STP,1125079,1919,1,2017,12,50.1141677777778,-120.800834722222,609,Yes
 BC,MICA DAM,1175122,1962,1,2017,12,52.0530555555556,-118.585277777778,579,No
-BC,NANAIMO CITY YARD,10253G0             ,1913,1,2017,12,49.1988888888889,-123.987777777778,114,Yes
+BC,NANAIMO CITY YARD,10253G0,1913,1,2017,12,49.1988888888889,-123.987777777778,114,Yes
 BC,NASS CAMP,1075384,1924,1,2015,2,55.2375,-129.029444444444,290,Yes
 BC,NELSON NE,1145442,1904,1,2017,12,49.5861111111111,-117.206388888889,570,Yes
 BC,NEW DENVER,1145460,1924,1,2017,12,49.995835,-117.370285,570,No
@@ -68,186 +68,186 @@ BC,OOTSA L SKINS L SPILLWAY,1085835,1926,1,2017,7,53.7721666666667,-125.99655555
 BC,OSOYOOS WEST,1125865,1954,1,2009,9,49.0319444444444,-119.442777777778,297,Yes
 BC,PACHENA POINT,1035940,1925,1,2017,12,48.7227777777778,-125.097222222222,37,No
 BC,PEMBERTON AIRPORT,1086082,1913,1,1991,6,50.3056461111111,-122.734088888889,204,Yes
-BC,PENTICTON ,1126150,1907,1,2012,5,49.4630555555556,-119.602222222222,344,Yes
+BC,PENTICTON,1126150,1907,1,2012,5,49.4630555555556,-119.602222222222,344,Yes
 BC,PORT ALICE,1036240,1924,1,2016,4,50.3858361111111,-127.455286111111,21,No
-BC,PORT HARDY ,1026270,1944,1,2013,6,50.6802777777778,-127.366111111111,22,No
+BC,PORT HARDY,1026270,1944,1,2013,6,50.6802777777778,-127.366111111111,22,No
 BC,POWELL RIVER,1046390,1924,1,2007,7,49.8761111111111,-124.554166666667,52,No
-BC,PRINCE GEORGE ,1096450,1913,1,2009,10,53.8908333333333,-122.678888888889,691,Yes
-BC,PRINCE RUPERT ,1066481,1909,1,2006,3,54.2925,-130.444722222222,35,Yes
-BC,PRINCETON ,1126510,1901,1,2017,12,49.4677777777778,-120.5125,700,Yes
+BC,PRINCE GEORGE,1096450,1913,1,2009,10,53.8908333333333,-122.678888888889,691,Yes
+BC,PRINCE RUPERT,1066481,1909,1,2006,3,54.2925,-130.444722222222,35,Yes
+BC,PRINCETON,1126510,1901,1,2017,12,49.4677777777778,-120.5125,700,Yes
 BC,QUATSINO,1036570,1895,1,2017,12,50.5336138888889,-127.653335833333,8,No
-BC,QUESNEL ,1096630,1900,1,2007,3,53.0261111111111,-122.51,545,Yes
+BC,QUESNEL,1096630,1900,1,2007,3,53.0261111111111,-122.51,545,Yes
 BC,QUINSAM RIVER HATCHERY,1026639,1936,1,2017,12,50.0161111111111,-125.303888888889,46,Yes
 BC,REVELSTOKE,1176751,1898,1,1999,8,50.9533333333333,-118.166388888889,450,Yes
 BC,SAANICHTON,1016940,1914,1,2017,12,48.6216666666667,-123.418888888889,61,No
-BC,SALMON RM ,1166R45             ,1911,1,2013,2,50.6855577777778,-119.233613611111,527,Yes
-BC,SANDSPIT ,1057050,1949,1,2017,3,53.2538888888889,-131.813055555556,6,No
+BC,SALMON RM,1166R45,1911,1,2013,2,50.6855577777778,-119.233613611111,527,Yes
+BC,SANDSPIT,1057050,1949,1,2017,3,53.2538888888889,-131.813055555556,6,No
 BC,SEYMOUR FALLS,1107200,1928,1,2003,9,49.4402777777778,-122.971111111111,244,No
 BC,SHALALTH,1117215,1935,1,2004,4,50.7283333333333,-122.240555555556,244,Yes
 BC,SHAWNIGAN LAKE,1017230,1911,1,2017,12,48.6469472222222,-123.626408333333,138,No
-BC,SMITHERS ,1077500,1922,1,2017,12,54.8247222222222,-127.182777777778,522,Yes
+BC,SMITHERS,1077500,1922,1,2017,12,54.8247222222222,-127.182777777778,522,Yes
 BC,STAVE FALLS,1107680,1910,1,2004,8,49.2333333333333,-122.366666666667,110,No
-BC,STEWART ,1067742,1911,1,2016,6,55.9361111111111,-129.985,7,Yes
+BC,STEWART,1067742,1911,1,2016,6,55.9361111111111,-129.985,7,Yes
 BC,STILLWATER POWER HOUSE,1047770,1931,1,2007,7,49.7666666666667,-124.316666666667,7,No
 BC,TATLAYOKO LAKE,1088010,1928,1,2005,4,51.6747222222222,-124.405,870,No
-BC,TERRACE ,1068130,1913,1,2013,1,54.4663888888889,-128.5775,217,Yes
+BC,TERRACE,1068130,1913,1,2013,1,54.4663888888889,-128.5775,217,Yes
 BC,TLELL,1058190,1950,1,1999,1,53.5,-131.95,5,No
-BC,TOFINO ,1038205,1942,1,2017,12,49.0822222222222,-125.772505555556,24,No
+BC,TOFINO,1038205,1942,1,2017,12,49.0822222222222,-125.772505555556,24,No
 BC,UCLUELET KENNEDY CAMP,1038332,1958,1,2017,12,48.9452833333333,-125.527236111111,30,Yes
-BC,VANCOUVER ,1108447,1896,1,2013,6,49.195,-123.181944444444,4,Yes
+BC,VANCOUVER,1108447,1896,1,2013,6,49.195,-123.181944444444,4,Yes
 BC,VAVENBY,1168520,1913,1,2017,12,51.5761111111111,-119.778055555556,445,No
 BC,VERNON BELLA VISTA,1128553,1900,1,2015,6,50.2643611111111,-119.308861111111,427,Yes
-BC,VICTORIA ,1018620,1899,1,2013,7,48.647225,-123.425833333333,19,Yes
+BC,VICTORIA,1018620,1899,1,2013,7,48.647225,-123.425833333333,19,Yes
 BC,WARFIELD,1148700,1928,1,2002,12,49.1,-117.75,606,No
 BC,WASA,1158730,1924,1,2017,12,49.8239722222222,-115.630777777778,930,No
 BC,WESTWOLD,1168880,1921,1,2013,5,50.4688911111111,-119.750556388889,609,No
-BC,WILLIAMS LAKE ,1098940,1936,1,2012,12,52.1830555555556,-122.054166666667,940,Yes
+BC,WILLIAMS LAKE,1098940,1936,1,2012,12,52.1830555555556,-122.054166666667,940,Yes
 NU,ALERT,2400306,1950,1,2017,12,82.5,-62.3333333333333,65,Yes
-NU,BAKER LAKE ,2300500,1949,1,2013,11,64.2988888888889,-96.0777777777778,18,No
-YK,BURWASH ,2100182,1967,1,2015,2,61.3666666666667,-139.05,807,No
-NU,BYRON BAY ,2400595,1957,1,1993,6,68.75,-109.066666666667,92,No
-NU,CAMBRIDGE BAY ,2400600,1940,1,2015,2,69.1080555555556,-105.138333333333,27,No
-NU,CAPE DORSET ,2400635,1932,1,2014,11,64.2302777777778,-76.525,50,Yes
-NU,CAPE DYER ,2400654,1960,1,1993,3,66.5833333333333,-61.6166666666667,393,No
+NU,BAKER LAKE,2300500,1949,1,2013,11,64.2988888888889,-96.0777777777778,18,No
+YK,BURWASH,2100182,1967,1,2015,2,61.3666666666667,-139.05,807,No
+NU,BYRON BAY,2400595,1957,1,1993,6,68.75,-109.066666666667,92,No
+NU,CAMBRIDGE BAY,2400600,1940,1,2015,2,69.1080555555556,-105.138333333333,27,No
+NU,CAPE DORSET,2400635,1932,1,2014,11,64.2302777777778,-76.525,50,Yes
+NU,CAPE DYER,2400654,1960,1,1993,3,66.5833333333333,-61.6166666666667,393,No
 NU,CAPE HOOPER,2400660,1958,1,2007,9,68.4725,-66.8152777777778,390,No
-NT,CAPE PARRY ,2200675,1960,1,1993,3,70.1666666666667,-124.716666666667,87,No
+NT,CAPE PARRY,2200675,1960,1,1993,3,70.1666666666667,-124.716666666667,87,No
 YK,CARMACKS,2100300,1964,1,2008,2,62.1,-136.3,525,No
-NU,CHESTERFIELD INLET ,2300707,1931,1,2014,11,63.3469444444444,-90.7311111111111,10,Yes
+NU,CHESTERFIELD INLET,2300707,1931,1,2014,11,63.3469444444444,-90.7311111111111,10,Yes
 NU,CLINTON POINT,2300750,1957,1,1993,6,69.5833333333333,-120.8,101,No
-NU,CLYDE ,2400800,1946,1,2002,6,70.4861111111111,-68.5166666666667,27,No
-NU,CORAL HARBOUR ,2301000,1945,1,2015,5,64.1933333333333,-83.3594444444445,64,No
-YK,DAWSON ,2100402,1901,1,2015,2,64.0430555555556,-139.127777777778,370,Yes
+NU,CLYDE,2400800,1946,1,2002,6,70.4861111111111,-68.5166666666667,27,No
+NU,CORAL HARBOUR,2301000,1945,1,2015,5,64.1933333333333,-83.3594444444445,64,No
+YK,DAWSON,2100402,1901,1,2015,2,64.0430555555556,-139.127777777778,370,Yes
 NU,DEWAR LAKES,2401030,1958,1,1993,3,68.65,-71.1666666666667,527,No
 YK,DRURY CREEK,2100460,1970,1,2009,4,62.2019444444444,-134.39,609,No
 NU,EUREKA,2401200,1948,1,2016,2,79.9833333333333,-85.9333333333333,10,No
-NT,FORT GOOD HOPE ,2201400,1945,1,2014,11,66.2408333333333,-128.650833333333,82,No
-NT,FORT MCPHERSON ,2201601,1932,1,2014,11,67.4077777777778,-134.860277777778,35,Yes
+NT,FORT GOOD HOPE,2201400,1945,1,2014,11,66.2408333333333,-128.650833333333,82,No
+NT,FORT MCPHERSON,2201601,1932,1,2014,11,67.4077777777778,-134.860277777778,35,Yes
 NT,FORT RELIANCE,2201903,1949,1,2007,8,62.7113888888889,-109.168333333333,168,Yes
-NT,FORT RESOLUTION ,2202000,1931,1,2014,11,61.1808333333333,-113.689722222222,160,No
-NT,FORT SIMPSON ,2202101,1898,1,2014,10,61.7602777777778,-121.236666666667,169,Yes
-NT,FORT SMITH ,2202200,1915,1,2014,11,60.0202777777778,-111.961944444444,205,Yes
+NT,FORT RESOLUTION,2202000,1931,1,2014,11,61.1808333333333,-113.689722222222,160,No
+NT,FORT SIMPSON,2202101,1898,1,2014,10,61.7602777777778,-121.236666666667,169,Yes
+NT,FORT SMITH,2202200,1915,1,2014,11,60.0202777777778,-111.961944444444,205,Yes
 NU,FOX FIVE,2400570,1959,1,2007,9,67.5355555555556,-63.7888888888889,584,No
-NU,GLADMAN POINT ,2402340,1957,1,1992,7,68.6666666666667,-97.8,14,No
+NU,GLADMAN POINT,2402340,1957,1,1992,7,68.6666666666667,-97.8,14,No
 YK,HAINES JUNCTION,2100631,1945,1,2008,9,60.7495444444445,-137.50525,596,Yes
-NU,HALL BEACH ,2402350,1957,1,2014,12,68.7758333333333,-81.2425,8,No
-NT,HAY RIVER ,2202400,1909,1,2014,9,60.8397222222222,-115.782777777778,166,Yes
+NU,HALL BEACH,2402350,1957,1,2014,12,68.7758333333333,-81.2425,8,No
+NT,HAY RIVER,2202400,1909,1,2014,9,60.8397222222222,-115.782777777778,166,Yes
 NT,INUVIK,2202578,1957,1,2007,11,68.3166666666667,-133.516666666667,103,Yes
 NU,IQALUIT,2402592,1946,1,2007,11,63.7472222222222,-68.5444444444445,34,Yes
-NU,JENNY LIND ISLAND ,2302650,1958,1,1992,7,68.65,-101.733333333333,18,No
-YK,KOMAKUK BEACH ,2100685,1959,1,1993,6,69.5833333333333,-140.183333333333,7,No
-NU,KUGAARUK ,2303092,1957,1,2012,8,68.5405555555556,-89.7972222222222,17,Yes
-NU,KUGLUKTUK ,2300902,1931,1,2014,12,67.8166666666667,-115.143888888889,23,Yes
-NU,LADY FRANKLIN POINT ,2302680,1958,1,1993,3,68.5,-113.216666666667,16,No
+NU,JENNY LIND ISLAND,2302650,1958,1,1992,7,68.65,-101.733333333333,18,No
+YK,KOMAKUK BEACH,2100685,1959,1,1993,6,69.5833333333333,-140.183333333333,7,No
+NU,KUGAARUK,2303092,1957,1,2012,8,68.5405555555556,-89.7972222222222,17,Yes
+NU,KUGLUKTUK,2300902,1931,1,2014,12,67.8166666666667,-115.143888888889,23,Yes
+NU,LADY FRANKLIN POINT,2302680,1958,1,1993,3,68.5,-113.216666666667,16,No
 NU,LONGSTAFF BLUFF,2402684,1958,1,1991,6,68.8986111111111,-75.1408333333333,161,No
-NU,LUPIN,230N002             ,1959,1,2007,7,65.7552916666667,-111.245841666667,488,Yes
+NU,LUPIN,230N002,1959,1,2007,7,65.7552916666667,-111.245841666667,488,Yes
 NU,MACKAR INLET,2402686,1958,1,1992,5,68.3,-85.6666666666667,395,No
-YK,MAYO ,2100700,1925,1,2013,11,63.6166666666667,-135.866666666667,504,No
-NT,MOULD BAY,250M001             ,1948,1,2007,11,76.2375166666667,-119.347233333333,2,Yes
-NU,NANISIVIK ,2402730,1938,1,2010,12,72.9833333333333,-84.6166666666667,642,Yes
+YK,MAYO,2100700,1925,1,2013,11,63.6166666666667,-135.866666666667,504,No
+NT,MOULD BAY,250M001,1948,1,2007,11,76.2375166666667,-119.347233333333,2,Yes
+NU,NANISIVIK,2402730,1938,1,2010,12,72.9833333333333,-84.6166666666667,642,Yes
 NT,NICHOLSON PENINSULA,2202750,1958,1,1993,6,69.9333333333333,-128.966666666667,89,No
-NT,NORMAN WELLS ,2202800,1943,1,2012,10,65.2825,-126.800277777778,73,No
-YK,OLD CROW ,2100800,1952,1,2015,2,67.5705555555556,-139.839166666667,251,No
+NT,NORMAN WELLS,2202800,1943,1,2012,10,65.2825,-126.800277777778,73,No
+YK,OLD CROW,2100800,1952,1,2015,2,67.5705555555556,-139.839166666667,251,No
 YK,PELLY RANCH,2100880,1952,1,2015,3,62.8166666666667,-137.366666666667,454,No
 NU,RESOLUTE CARS,2403500,1948,1,2014,11,74.7169444444445,-94.9694444444445,67,No
 YK,ROSS RIVER YTG,2100941,1967,1,2008,2,61.9833333333333,-132.45,698,Yes
-NT,SACHS HARBOUR ,2503650,1956,1,2013,2,72,-125.266666666667,86,No
-NU,SHEPHERD BAY ,2303685,1957,1,1993,3,68.8166666666667,-93.4333333333333,43,No
-YK,SHINGLE POINT ,2100950,1957,1,1993,3,68.95,-137.216666666667,49,No
+NT,SACHS HARBOUR,2503650,1956,1,2013,2,72,-125.266666666667,86,No
+NU,SHEPHERD BAY,2303685,1957,1,1993,3,68.8166666666667,-93.4333333333333,43,No
+YK,SHINGLE POINT,2100950,1957,1,1993,3,68.95,-137.216666666667,49,No
 YK,SWIFT RIVER,2101081,1967,1,2008,2,60,-131.183333333333,891,No
-YK,TESLIN ,2101100,1944,1,2013,12,60.1741388888889,-132.735888888889,705,No
+YK,TESLIN,2101100,1944,1,2013,12,60.1741388888889,-132.735888888889,705,No
 YK,TUCHITUA,2101135,1967,1,2014,9,60.9333333333333,-129.216666666667,724,No
 NT,TUKTOYAKTUK,2203910,1957,1,1993,6,69.45,-133,18,No
-NT,TULITA ,2201700,1904,1,2014,12,64.9086111111111,-125.568333333333,101,No
-NT,ULUKHAKTOK ,2502501,1941,1,2010,6,70.7627777777778,-117.806111111111,36,Yes
-YK,WATSON LAKE ,2101200,1939,1,2014,12,60.1165,-128.822333333333,687,No
-YK,WHITEHORSE ,2101300,1942,1,2012,12,60.7095,-135.068833333333,706,No
+NT,TULITA,2201700,1904,1,2014,12,64.9086111111111,-125.568333333333,101,No
+NT,ULUKHAKTOK,2502501,1941,1,2010,6,70.7627777777778,-117.806111111111,36,Yes
+YK,WATSON LAKE,2101200,1939,1,2014,12,60.1165,-128.822333333333,687,No
+YK,WHITEHORSE,2101300,1942,1,2012,12,60.7095,-135.068833333333,706,No
 NT,WRIGLEY ,2204000,1944,1,2014,10,63.2094444444445,-123.436666666667,149,No
 NT,YELLOWKNIFE ,2204100,1943,1,2013,1,62.4627777777778,-114.440277777778,206,No
 NT,YOHIN,2204300,1957,1,2007,9,61.2419444444444,-123.741666666667,204,No
-AB,ATHABASCA,3060L20             ,1918,1,2017,12,54.7222230555556,-113.2880575,515,Yes
+AB,ATHABASCA,3060L20,1918,1,2017,12,54.7222230555556,-113.2880575,515,Yes
 AB,BANFF,3050519,1894,1,2007,11,51.1933583333333,-115.552236111111,1397,Yes
 AB,BEAVER MINES,3050600,1913,1,2012,3,49.4672277777778,-114.176955555556,1257,No
 AB,BEAVERLODGE,3070600,1916,1,2007,11,55.1966672222222,-119.396413888889,745,Yes
-AB,CALGARY ,3031093,1885,1,2012,7,51.1138888888889,-114.020277777778,1084,No
+AB,CALGARY,3031093,1885,1,2012,7,51.1138888888889,-114.020277777778,1084,No
 AB,CALMAR,3011120,1915,1,2016,12,53.2897241666667,-113.863057777778,720,No
 AB,CAMPSIE,3061200,1910,1,2013,10,54.1322227777778,-114.677778888889,671,No
 AB,CAMROSE,3011240,1946,1,2007,11,53.0347222222222,-112.814166666667,739,No
 AB,CARWAY,3031400,1915,1,2011,11,48.999725,-113.376111111111,1354,No
-AB,CLARESHOLM MEADOW CREEK,3031F5F             ,1913,1,2005,3,49.9375222222222,-113.737519444444,1035,No
-AB,COLD LAKE ,3081680,1926,1,2017,12,54.4166666666667,-110.283333333333,541,Yes
+AB,CLARESHOLM MEADOW CREEK,3031F5F,1913,1,2005,3,49.9375222222222,-113.737519444444,1035,No
+AB,COLD LAKE,3081680,1926,1,2017,12,54.4166666666667,-110.283333333333,541,Yes
 AB,CORONATION,3011887,1928,1,2007,11,52.0741666666667,-111.449444444444,791,Yes
-AB,CROWSNEST,3051R4R             ,1913,1,2007,11,49.627525,-114.48195,1303,Yes
+AB,CROWSNEST,3051R4R,1913,1,2007,11,49.627525,-114.48195,1303,Yes
 AB,DRUMHELLER ANDREW,3022136,1954,1,2008,3,51.4666666666667,-112.866666666667,719,No
-AB,EDMONTON ,3012205,1883,1,2012,4,53.3166666666667,-113.583333333333,723,Yes
+AB,EDMONTON,3012205,1883,1,2012,4,53.3166666666667,-113.583333333333,723,Yes
 AB,EDSON,3062246,1920,1,2007,11,53.5802797222222,-116.453335277778,927,Yes
 AB,ELK POINT,3012280,1913,1,1997,6,53.8833333333333,-111.066666666667,605,No
 AB,ENILDA-BERG,3062427,1932,1,2005,4,55.4166666666667,-116.3,591,Yes
 AB,FAIRVIEW THREE FOX FARM,3072539,1932,1,1999,12,56.0833333333333,-118.533333333333,604,Yes
-AB,FORT CHIPEWYAN ,3072658,1884,1,2007,8,58.7666666666667,-111.116666666667,232,Yes
-AB,FORT MCMURRAY ,3062693,1920,1,2007,11,56.65,-111.216666666667,369,Yes
+AB,FORT CHIPEWYAN,3072658,1884,1,2007,8,58.7666666666667,-111.116666666667,232,Yes
+AB,FORT MCMURRAY,3062693,1920,1,2007,11,56.65,-111.216666666667,369,Yes
 AB,FORT VERMILION,3072723,1909,1,2007,11,58.3823055555556,-116.040166666667,289,Yes
 AB,GLEICHEN,3032800,1903,1,2006,3,50.8833333333333,-113.05,905,No
-AB,GRANDE PRAIRIE ,3072920,1931,1,2013,9,55.1797222222222,-118.885,669,Yes
+AB,GRANDE PRAIRIE,3072920,1931,1,2013,9,55.1797222222222,-118.885,669,Yes
 AB,HIGHWOOD AU,3053250,1903,1,2011,9,50.5511111111111,-114.370555555556,1580,Yes
-AB,HINTON VALLEY,306A009             ,1917,1,2017,12,53.40381,-117.537620277778,1011,Yes
+AB,HINTON VALLEY,306A009,1917,1,2017,12,53.40381,-117.537620277778,1011,Yes
 AB,JASPER WARDEN,3053536,1936,1,2007,11,52.9263888888889,-118.029722222222,1020,Yes
 AB,JENNER,3023560,1916,1,2008,1,50.7222277777778,-111.195852777778,755,No
 AB,KEG RIVER,3073641,1936,1,2009,1,57.75,-117.616666666667,405,Yes
 AB,LACOMBE,3023722,1908,1,2007,11,52.4488905555556,-113.755834722222,860,Yes
-AB,LETHBRIDGE ,3033880,1902,1,2007,8,49.6302777777778,-112.799722222222,929,Yes
-AB,MEDICINE HAT ,3034480,1886,1,2006,5,50.0188888888889,-110.720833333333,717,No
+AB,LETHBRIDGE,3033880,1902,1,2007,8,49.6302777777778,-112.799722222222,929,Yes
+AB,MEDICINE HAT,3034480,1886,1,2006,5,50.0188888888889,-110.720833333333,717,No
 AB,MOUNTAIN VIEW,3034720,1913,1,2006,3,49.1269555555556,-113.630016666667,1339,No
 AB,OLDS,3024920,1914,1,2015,6,51.7833333333333,-114.1,1040,No
 AB,ONEFOUR,3044923,1928,1,2007,10,49.1166666666667,-110.466666666667,935,Yes
-AB,PEACE RIVER ,3075040,1908,1,2014,5,56.2269444444444,-117.447222222222,571,Yes
+AB,PEACE RIVER,3075040,1908,1,2014,5,56.2269444444444,-117.447222222222,571,Yes
 AB,PINCHER CREEK,3035206,1915,1,2007,11,49.5205555555556,-113.997222222222,1190,Yes
 AB,RANFURLY 2NW,3015405,1905,1,2014,11,53.4166666666667,-111.733333333333,673,Yes
 AB,ROCKY MTN HOUSE,3015523,1917,1,2007,11,52.4213905555556,-114.912223055556,988,Yes
 AB,SCOTFIELD,3025770,1913,1,2007,10,51.5833555555556,-111.363611666667,762,Yes
 AB,SION,3015960,1906,1,2004,12,53.8833333333333,-114.116666666667,701,No
-AB,SLAVE LAKE ,3065999,1925,1,2007,8,55.2833333333333,-114.783333333333,583,Yes
+AB,SLAVE LAKE,3065999,1925,1,2007,8,55.2833333333333,-114.783333333333,583,Yes
 AB,STETTLER NORTH,3016119,1919,1,2001,8,52.3333333333333,-112.716666666667,821,Yes
 AB,VAUXHALL,3036682,1914,1,2007,11,50.05,-112.133333333333,779,Yes
 AB,WABASCA,3076908,1915,1,2009,1,55.9666666666667,-113.833333333333,545,Yes
-AB,WHITECOURT ,3067372,1943,1,2009,5,54.1438888888889,-115.786666666667,782,Yes
+AB,WHITECOURT,3067372,1943,1,2009,5,54.1438888888889,-115.786666666667,782,Yes
 SK,ANEROID,4020160,1922,1,2005,4,49.7166666666667,-107.3,754,No
 SK,BANGOR,4010400,1951,1,2005,2,50.9,-102.283333333333,526,No
-SK,BUFFALO NARROWS ,4060982,1962,1,2012,11,55.8333333333333,-108.433333333333,440,Yes
+SK,BUFFALO NARROWS,4060982,1962,1,2012,11,55.8333333333333,-108.433333333333,440,Yes
 SK,CEYLON,4011441,1922,1,2002,12,49.3833333333333,-104.65,753,Yes
 SK,CHAPLIN,4021520,1904,1,1995,9,50.4666666666667,-106.65,672,No
 SK,COLLINS BAY CAMECO,4061632,1965,1,2017,12,58.1833333333333,-103.7,490,Yes
 SK,COTE,4011846,1913,1,2006,3,51.5166666666667,-101.783333333333,450,Yes
 SK,CREE LAKE,4061861,1962,1,1993,8,57.35,-107.133333333333,495,Yes
 SK,DAVIDSON,4012120,1922,1,2005,10,51.2666666666667,-105.983333333333,619,No
-SK,ESTEVAN ,4012400,1902,1,2015,2,49.2166666666667,-102.966666666667,581,Yes
+SK,ESTEVAN,4012400,1902,1,2015,2,49.2166666666667,-102.966666666667,581,Yes
 SK,HIGH POINT,4023240,1929,1,2017,7,50.9786127777778,-107.935278611111,645,No
 SK,HUDSON BAY,4083323,1943,1,2013,12,52.8833333333333,-102.583333333333,422,Yes
 SK,INDIAN HEAD,4013480,1895,1,2007,11,50.55,-103.65,579,No
 SK,ISLAND FALLS,4063560,1931,1,2004,9,55.5333333333333,-102.35,299,No
 SK,KELLIHER,4013660,1908,1,2017,12,51.2574166666667,-103.753027777778,676,Yes
 SK,KEY LAKE,4063755,1977,1,2017,12,57.25,-105.616666666667,509,No
-SK,KINDERSLEY ,4043900,1942,1,2013,11,51.5166666666667,-109.183333333333,694,Yes
+SK,KINDERSLEY,4043900,1942,1,2013,11,51.5166666666667,-109.183333333333,694,Yes
 SK,KLINTONEL,4024080,1911,1,1994,1,49.6833333333333,-108.916666666667,1074,No
-SK,LA RONGE ,4064150,1923,1,2013,10,55.15,-105.266666666667,379,Yes
-SK,LEADER AIRPORT,402DAF0             ,1923,1,2007,11,50.9094638888889,-109.501391666667,676,Yes
+SK,LA RONGE,4064150,1923,1,2013,10,55.15,-105.266666666667,379,Yes
+SK,LEADER AIRPORT,402DAF0,1923,1,2007,11,50.9094638888889,-109.501391666667,676,Yes
 SK,LOON LAKE EPF,4064600,1930,1,2005,10,54.05,-109.1,543,Yes
 SK,MANOR,4014913,1922,1,2004,7,49.6166666666667,-102.1,633,Yes
 SK,MELFORT,4055079,1910,1,2007,11,52.8166666666667,-104.6,490,Yes
 SK,MOOSE JAW,4015322,1895,1,2007,11,50.3316805555556,-105.537508333333,577,Yes
 SK,MOOSOMIN,4015360,1900,1,2000,9,50.1333333333333,-101.666666666667,576,No
-SK,NIPAWIN ,4075518,1911,1,2005,9,53.3333333333333,-104,372,Yes
+SK,NIPAWIN,4075518,1911,1,2005,9,53.3333333333333,-104,372,Yes
 SK,NORTH BATTLEFORD,4045605,1894,1,2007,11,52.7666666666667,-108.25,548,Yes
 SK,OUTLOOK,4055736,1915,1,2007,11,51.4833333333333,-107.05,541,Yes
 SK,PASWEGIN,4015960,1951,1,2003,9,51.9833333333333,-103.916666666667,533,No
 SK,PELLY,4086000,1952,1,2016,3,52.0833333333333,-101.866666666667,509,No
 SK,PILGER,4056120,1913,1,2011,9,52.4166666666667,-105.15,552,No
-SK,PRINCE LBERT ,4056240,1889,1,2013,11,53.2166666666667,-105.666666666667,428,Yes
-SK,REGINA ,4016560,1898,1,2007,11,50.4333333333333,-104.666666666667,577,No
+SK,PRINCE LBERT,4056240,1889,1,2013,11,53.2166666666667,-105.666666666667,428,Yes
+SK,REGINA,4016560,1898,1,2007,11,50.4333333333333,-104.666666666667,577,No
 SK,SASKATOON DIEFENBAKER ,4057120,1900,1,2007,11,52.1666666666667,-106.716666666667,504,No
 SK,SCOTT,4047241,1911,1,2007,11,52.35974,-108.834723333333,660,Yes
 SK,SWIFT CURRENT,4028060,1886,1,2007,11,50.2666666666667,-107.733333333333,825,Yes
 SK,TONKIN,4019082,1941,1,2016,1,51.2,-102.233333333333,527,Yes
-SK,URANIUM CITY,406QLD0             ,1953,1,2007,10,59.5666666666667,-108.483333333333,318,Yes
+SK,URANIUM CITY,406QLD0,1953,1,2007,10,59.5666666666667,-108.483333333333,318,Yes
 SK,VAL-MARIE,4038400,1937,1,2010,5,49.3700138888889,-107.847525,808,No
 SK,WASECA,4048520,1908,1,2014,12,53.1308555555556,-109.403902777778,638,No
 SK,WASKESIU LAKE,4068559,1966,1,2007,11,53.9166666666667,-106.066666666667,569,Yes
@@ -258,13 +258,13 @@ MB,ARBORG,5030080,1951,1,2016,6,50.9333333333333,-97.0833333333333,224,No
 MB,BERENS RIVER,5030203,1905,1,2013,11,52.3597366666667,-97.0219533333333,222,Yes
 MB,BIRTLE,5010240,1917,1,2000,11,50.4333333333333,-101.05,522,No
 MB,BISSETT,5030282,1933,1,1997,6,51.0333333333333,-95.7,259,Yes
-MB,BRANDON ,5010480,1890,1,2012,12,49.91,-99.9519444444445,409,Yes
+MB,BRANDON,5010480,1890,1,2012,12,49.91,-99.9519444444445,409,Yes
 MB,CHURCHILL,5060606,1932,1,2015,12,58.7333333333333,-94.0666666666667,29,Yes
 MB,CYPRESS RIVER,5010640,1948,1,2012,3,49.55,-99.0833333333333,374,No
 MB,DAUPHIN,5040681,1911,1,2007,10,51.1003888888889,-100.056888888889,305,Yes
 MB,EMERSON,5020882,1942,1,2003,1,49,-97.2375,242,Yes
 MB,FLIN FLON,5050920,1927,1,2017,12,54.7666666666667,-101.883333333333,320,No
-MB,GILLAM ,5061001,1943,1,2014,10,56.3575,-94.7105555555556,145,Yes
+MB,GILLAM,5061001,1943,1,2014,10,56.3575,-94.7105555555556,145,Yes
 MB,GIMLI,5031039,1944,1,2008,3,50.6333333333333,-97.0166666666667,223,Yes
 MB,GRAND RAPIDS HYDRO,5031111,1962,1,2017,12,53.1580558333333,-99.2833444444444,223,Yes
 MB,GREAT FALLS,5031200,1923,1,2002,12,50.4666666666667,-96,249,No
@@ -273,17 +273,17 @@ MB,LANGRUTH WEST,5041535,1958,1,2005,2,50.4138888888889,-98.8027777777778,264,Ye
 MB,LYNN LAKE,5061648,1952,1,2007,11,56.8638888888889,-101.076111111111,357,Yes
 MB,MORDEN,5021849,1888,1,2007,11,49.1876388888889,-98.0839444444444,298,Yes
 MB,NEEPAWA MURRAY 6 SOUTHWEST,5042004,1881,1,2008,11,50.15,-99.5666666666667,412,Yes
-MB,NINETTE,50220M0             ,1916,1,1996,5,49.4166666666667,-99.65,419,Yes
+MB,NINETTE,50220M0,1916,1,1996,5,49.4166666666667,-99.65,419,Yes
 MB,NORWAY HOUSE,5062045,1896,1,2007,11,53.9666666666667,-97.85,224,Yes
 MB,PIERSON,5012080,1933,1,2007,3,49.1833333333333,-101.266666666667,469,No
 MB,PINAWA WNRE,5032162,1915,1,2017,3,50.1805555555556,-96.0583333333333,267,Yes
 MB,PORTAGE LA PRAIRIE,5012321,1942,1,2017,12,49.95,-98.2666666666667,259,Yes
 MB,SPRAGUE,5022759,1916,1,2007,11,49.0236111111111,-95.5983358333333,329,Yes
 MB,STEINBACH,5022780,1956,1,2005,3,49.5333333333333,-96.7666666666667,254,No
-MB,SWAN RIVER,504K80K             ,1960,1,2007,10,52.1149722222222,-101.232916666667,335,Yes
-MB,THE PAS ,5052880,1910,1,2014,11,53.9666666666667,-101.1,270,Yes
+MB,SWAN RIVER,504K80K,1960,1,2007,10,52.1149722222222,-101.232916666667,335,Yes
+MB,THE PAS,5052880,1910,1,2014,11,53.9666666666667,-101.1,270,Yes
 MB,THOMPSON ,5062922,1967,1,2014,11,55.8033333333333,-97.8625,222,No
-MB,WINNIPEG RICHARDSON ,5023222,1872,1,2007,11,49.9166666666667,-97.2333333333333,239,Yes
+MB,WINNIPEG RICHARDSON,5023222,1872,1,2007,11,49.9166666666667,-97.2333333333333,239,Yes
 ON,AMHERSTBURG,6130257,1917,1,2017,12,42.1033583333333,-83.0944633333333,182,Yes
 ON,ARMSTRONG JELLIEN,6040330,1939,1,1992,10,50.25,-89.1,341,Yes
 ON,ATIKOKAN MARMION,6020384,1919,1,2007,7,48.8,-91.5833333333333,442,Yes
@@ -293,20 +293,20 @@ ON,BIG TROUT LAKE,6010738,1939,1,1992,10,53.8333333333333,-89.8666666666667,224,
 ON,BISCOTASING,6060773,1914,1,2000,10,47.3,-82.1,407,No
 ON,BROCKVILLE PCC,6100971,1915,1,2017,12,44.6,-75.6666666666667,96,Yes
 ON,CAMERON FALLS,6041109,1924,1,1998,8,49.15,-88.35,229,No
-ON,CHAPLEAU ,6061361,1914,1,2015,3,47.82,-83.3466666666667,447,Yes
+ON,CHAPLEAU,6061361,1914,1,2015,3,47.82,-83.3466666666667,447,Yes
 ON,CORNWALL,6101874,1951,1,2017,12,45.0155783333333,-74.7489,64,No
-ON,DRYDEN ,6032119,1914,1,2005,1,49.8333333333333,-92.75,413,Yes
-ON,EARLTON ,6072225,1939,1,2005,1,47.7,-79.85,243,No
-ON,FORT FRANCES ,6022476,1912,1,2011,5,48.65,-93.4333333333333,342,Yes
-ON,GERALDTON ,6042716,1950,1,2015,2,49.7828027777778,-86.9305694444445,349,Yes
+ON,DRYDEN,6032119,1914,1,2005,1,49.8333333333333,-92.75,413,Yes
+ON,EARLTON,6072225,1939,1,2005,1,47.7,-79.85,243,No
+ON,FORT FRANCES,6022476,1912,1,2011,5,48.65,-93.4333333333333,342,Yes
+ON,GERALDTON,6042716,1950,1,2015,2,49.7828027777778,-86.9305694444445,349,Yes
 ON,GODFREY,6102857,1924,1,2003,5,44.5666666666667,-76.6333333333333,160,Yes
-ON,GORE BAY ,6092925,1916,1,1994,1,45.8833333333333,-82.5666666666667,194,Yes
+ON,GORE BAY,6092925,1916,1,1994,1,45.8833333333333,-82.5666666666667,194,Yes
 ON,HALIBURTON,6163171,1883,1,2017,12,45.0322483333333,-78.531115,330,Yes
-ON,HAMILTON ,6153194,1866,1,2011,12,43.1716866666667,-79.9341766666667,238,Yes
-ON,HORNEPAYNE ,6053575,1917,1,1995,7,49.2,-84.7666666666667,335,Yes
+ON,HAMILTON,6153194,1866,1,2011,12,43.1716866666667,-79.9341766666667,238,Yes
+ON,HORNEPAYNE,6053575,1917,1,1995,7,49.2,-84.7666666666667,335,Yes
 ON,IROQUOIS FALLS,6073810,1913,1,1998,12,48.75,-80.6666666666667,259,No
-ON,KAPUSKASING ,6073975,1918,1,2014,9,49.4138888888889,-82.4675,227,Yes
-ON,KENORA ,6034075,1900,1,2013,2,49.7902791666667,-94.3652786111111,406,Yes
+ON,KAPUSKASING,6073975,1918,1,2014,9,49.4138888888889,-82.4675,227,Yes
+ON,KENORA,6034075,1900,1,2013,2,49.7902791666667,-94.3652786111111,406,Yes
 ON,KINGSTON PUMPING STATION,6104175,1872,1,2007,12,44.2439033333333,-76.4805666666667,77,Yes
 ON,LANSDOWNE HOUSE,6014350,1941,1,1989,6,52.2333333333333,-87.8833333333333,255,No
 ON,LONDON AIRPORT,6144475,1883,1,2017,4,43.0330555555556,-81.1511111111111,278,Yes
@@ -315,68 +315,68 @@ ON,MADAWASKA,6084770,1916,1,2000,11,45.5,-77.9833333333333,316,No
 ON,MINE CENTRE SOUTHWEST,6025205,1914,1,2017,12,48.7597388888889,-92.6227777777778,361,Yes
 ON,MOOSONEE,6075425,1892,1,2017,12,51.2666666666667,-80.65,10,Yes
 ON,MORRISBURG,6105460,1913,1,2008,12,44.9236183333333,-75.1883433333333,82,No
-ON,NORTH BAY ,6085700,1915,1,2013,1,46.3636111111111,-79.4227777777778,370,Yes
+ON,NORTH BAY,6085700,1915,1,2013,1,46.3636111111111,-79.4227777777778,370,Yes
 ON,ORANGEVILLE MOE,6155790,1887,1,2015,12,43.9183516666667,-80.0864066666667,412,Yes
 ON,ORILLIA BRAIN,6115811,1871,1,2017,12,44.6027777777778,-79.4388888888889,250,Yes
 ON,OTTAWA,6105976,1890,1,2017,12,45.3833333333333,-75.7166666666667,79,No
 ON,OWEN SOUND MOE,6116132,1879,1,2007,12,44.5833333333333,-80.9333333333333,179,Yes
-ON,PELEE ISLAND ,6136336,1888,1,1994,9,41.7833333333333,-82.6833333333333,174,Yes
-ON,PETERBOROUGH ,6166418,1866,1,2007,5,44.2333333333333,-78.3666666666667,191,Yes
-ON,PICKLE LAKE ,6016527,1933,1,2012,7,51.4463888888889,-90.2141666666667,386,Yes
-ON,RED LAKE ,6016975,1939,1,2012,5,51.0669444444445,-93.7930555555556,386,No
+ON,PELEE ISLAND,6136336,1888,1,1994,9,41.7833333333333,-82.6833333333333,174,Yes
+ON,PETERBOROUGH,6166418,1866,1,2007,5,44.2333333333333,-78.3666666666667,191,Yes
+ON,PICKLE LAKE,6016527,1933,1,2012,7,51.4463888888889,-90.2141666666667,386,Yes
+ON,RED LAKE,6016975,1939,1,2012,5,51.0669444444445,-93.7930555555556,386,No
 ON,RIDGETOWN,6137149,1883,1,1997,4,42.45,-81.8833333333333,206,Yes
-ON,SAULT STE MARIE ,6057592,1945,1,2012,3,46.4833333333333,-84.5094444444444,192,Yes
-ON,SIOUX LOOKOUT ,6037775,1914,1,2013,2,50.1166666666667,-91.9,383,Yes
+ON,SAULT STE MARIE,6057592,1945,1,2012,3,46.4833333333333,-84.5094444444444,192,Yes
+ON,SIOUX LOOKOUT,6037775,1914,1,2013,2,50.1166666666667,-91.9,383,Yes
 ON,SMOKY FALLS,6077845,1934,1,1997,4,50.0666666666667,-82.1666666666667,183,No
-ON,SUDBURY ,6068150,1921,1,2013,3,46.6255555555556,-80.7977777777778,348,Yes
-ON,TERRACE BAY ,6048231,1910,1,2007,9,48.8166666666667,-87.1,290,Yes
-ON,TIMMINS VICTOR POWER ,6078285,1955,1,2011,2,48.5697222222222,-81.3766666666667,295,No
+ON,SUDBURY,6068150,1921,1,2013,3,46.6255555555556,-80.7977777777778,348,Yes
+ON,TERRACE BAY,6048231,1910,1,2007,9,48.8166666666667,-87.1,290,Yes
+ON,TIMMINS VICTOR POWER,6078285,1955,1,2011,2,48.5697222222222,-81.3766666666667,295,No
 ON,TOBERMORY CYPRUS LAKE,6128323,1915,1,1994,12,45.2333333333333,-81.5333333333333,190,Yes
 ON,TORONTO,6158350,1840,1,2017,4,43.6666666666667,-79.4,113,No
-ON,TORONTO LESTER B. PEARSON ,6158733,1938,1,2013,6,43.6772222222222,-79.6305555555556,173,No
+ON,TORONTO LESTER B. PEARSON,6158733,1938,1,2013,6,43.6772222222222,-79.6305555555556,173,No
 ON,TRANQUILLO RIDGE,6048864,1877,1,2007,12,48.2333333333333,-89.5166666666667,317,Yes
 ON,VINELAND,6139141,1919,1,2013,12,43.15,-79.4166666666667,110,Yes
 ON,WALLACEBURG,6139265,1906,1,1997,4,42.5833333333333,-82.4,177,No
-ON,WAWA ,6059D09             ,1940,1,2014,9,47.9666666666667,-84.7833333333333,287,Yes
+ON,WAWA ,6059D09,1940,1,2014,9,47.9666666666667,-84.7833333333333,287,Yes
 ON,WELLAND,6139445,1873,1,2014,8,42.9925266666667,-79.2611383333333,175,No
-ON,WIARTON ,6119500,1948,1,2014,11,44.7458333333333,-81.1072222222222,222,No
-ON,WINDSOR ,6139525,1866,1,2014,10,42.2755555555556,-82.9555555555556,190,Yes
+ON,WIARTON,6119500,1948,1,2014,11,44.7458333333333,-81.1072222222222,222,No
+ON,WINDSOR,6139525,1866,1,2014,10,42.2755555555556,-82.9555555555556,190,Yes
 ON,WOODSTOCK,6149625,1870,1,2017,12,43.1361233333333,-80.7705666666667,282,No
 QC,ARMAGH,7050240,1916,1,1994,5,46.75,-70.5333333333333,358,Yes
 QC,ARUNDEL,7030310,1914,1,2017,5,45.95,-74.6166666666667,191,Yes
-QC,BAGOTVILLE ,7060400,1876,1,2017,12,48.3333333333333,-71,159,Yes
+QC,BAGOTVILLE,7060400,1876,1,2017,12,48.3333333333333,-71,159,Yes
 QC,BARRAGE ANGLIERS,7080452,1911,1,1996,5,47.5519444444444,-79.2358333333333,267,No
 QC,BARRAGE TEMISCAMINGUE,7080468,1910,1,1995,10,46.7097222222222,-79.1011111111111,181,No
 QC,BELLETERRE,7080600,1952,1,2004,4,47.3833333333333,-78.7,322,No
 QC,BROME,7020840,1877,1,2014,7,45.1833333333333,-72.5666666666667,206,No
 QC,CAUSAPSCAL,7051200,1921,1,2017,8,48.3666666666667,-67.2333333333333,168,No
-QC,CHIBOUGAMAU CHAPAIS ,7091404,1937,1,2016,11,49.7666666666667,-74.5333333333333,387,Yes
+QC,CHIBOUGAMAU CHAPAIS,7091404,1937,1,2016,11,49.7666666666667,-74.5333333333333,387,Yes
 QC,CHELSEA,7031360,1928,1,2017,8,45.5166666666667,-75.7833333333333,113,No
 QC,DONNACONA,7012071,1919,1,2008,11,46.6833333333333,-71.7333333333333,46,Yes
 QC,DRUMMONDVILLE,7022160,1914,1,2017,8,45.8833333333333,-72.4833333333333,82,No
 QC,GASPE ,7052605,1916,1,2013,3,48.7769444444445,-64.4780555555556,33,Yes
 QC,GRANDE VALLEE,7052865,1883,1,2004,4,49.2,-65.15,8,Yes
-QC,ILES DE LA MADELEINE ,705C2G9             ,1934,1,2002,11,47.4166666666667,-61.7833333333333,11,Yes
+QC,ILES DE LA MADELEINE,705C2G9,1934,1,2002,11,47.4166666666667,-61.7833333333333,11,Yes
 QC,INUKJUAK,7103282,1938,1,1994,2,58.4666666666667,-78.0833333333333,24,No
 QC,JOLIETTE VILLE,7013362,1914,1,2011,4,46.0166666666667,-73.4333333333333,56,Yes
-QC,KUUJJUAQ ,7113534,1947,1,2014,3,58.1,-68.4166666666667,39,No
-QC,KUUJJUARAPIK ,7103536,1934,1,2014,4,55.2833333333333,-77.75,10,No
+QC,KUUJJUAQ,7113534,1947,1,2014,3,58.1,-68.4166666666667,39,No
+QC,KUUJJUARAPIK,7103536,1934,1,2014,4,55.2833333333333,-77.75,10,No
 QC,LA MALBAIE,7043960,1914,1,2004,4,47.6666666666667,-70.15,23,No
 QC,LA POCATIERE,7054095,1913,1,1996,3,47.35,-70.0333333333333,31,No
 QC,LA SARRE,7094120,1952,1,2004,4,48.7833333333333,-79.2166666666667,244,No
 QC,LA TUQUE,7074240,1912,1,2004,4,47.4,-72.7833333333333,152,No
 QC,LABRIEVILLE,7043540,1955,1,1994,12,49.3,-69.55,152,No
-QC,LAC BERRY,709CEE9             ,1914,1,2017,8,48.8,-78.2833333333333,305,Yes
+QC,LAC BERRY,709CEE9,1914,1,2017,8,48.8,-78.2833333333333,305,Yes
 QC,LAUZON,7024254,1872,1,2017,8,46.8166666666667,-71.1,69,Yes
 QC,LEBEL SUR QUEVILLON,7094275,1967,1,2004,4,49.05,-76.9666666666667,305,No
 QC,LENNOXVILLE,7024280,1915,1,1995,10,45.3688888888889,-71.8236111111111,181,No
 QC,LES BUISSONS,7044288,1947,1,2017,8,49.1166666666667,-68.3833333333333,15,Yes
 QC,LES CEDRES,7014290,1913,1,2017,8,45.3,-74.05,47,No
-QC,MATAGAMI ,7094639,1964,1,1991,6,49.7666666666667,-77.8166666666667,281,Yes
+QC,MATAGAMI,7094639,1964,1,1991,6,49.7666666666667,-77.8166666666667,281,Yes
 QC,MONT LAURIER,7035160,1920,1,2014,6,46.5666666666667,-75.55,244,Yes
-QC,MONT-JOLI ,7055120,1943,1,2013,3,48.6,-68.2166666666667,52,No
-QC,MONTREAL/PIERRE ELLIOTT TRUDEAU ,7025250,1872,1,2016,9,45.4666666666667,-73.75,36,Yes
-QC,NATASHQUAN ,7045400,1915,1,2003,3,50.1833333333333,-61.8166666666667,11,No
+QC,MONT-JOLI,7055120,1943,1,2013,3,48.6,-68.2166666666667,52,No
+QC,MONTREAL/PIERRE ELLIOTT TRUDEAU,7025250,1872,1,2016,9,45.4666666666667,-73.75,36,Yes
+QC,NATASHQUAN,7045400,1915,1,2003,3,50.1833333333333,-61.8166666666667,11,No
 QC,NICOLET,7025440,1914,1,2017,8,46.2,-72.6166666666667,30,No
 QC,NOMININGUE,7035520,1914,1,2013,11,46.4,-75.0833333333333,274,No
 QC,NORMANDIN,7065640,1936,1,1992,8,48.85,-72.5333333333333,137,No
@@ -384,8 +384,8 @@ QC,PARENT S,7075799,1943,1,2004,4,47.9166666666667,-74.6166666666667,410,Yes
 QC,POINTE AU CHENE,7036063,1919,1,2009,6,45.65,-74.8,51,Yes
 QC,QUAQTAQ,7116270,1930,1,1988,5,61.05,-69.6333333333333,30,Yes
 QC,RIMOUSKI,7056480,1877,1,2017,8,48.45,-68.5166666666667,36,Yes
-QC,ROBERVAL ,7066685,1914,1,2014,3,48.5166666666667,-72.2666666666667,179,Yes
-QC,SCHEFFERVILLE ,7117825,1949,1,1993,9,54.8,-66.8166666666667,522,No
+QC,ROBERVAL,7066685,1914,1,2014,3,48.5166666666667,-72.2666666666667,179,Yes
+QC,SCHEFFERVILLE,7117825,1949,1,1993,9,54.8,-66.8166666666667,522,No
 QC,SENNETERRE,7097900,1940,1,1994,5,48.3333333333333,-77.2666666666667,310,Yes
 QC,SEPT-ILES,7047912,1945,1,2017,5,50.2166666666667,-66.25,53,Yes
 QC,SHAWINIGAN,7018000,1902,1,2004,4,46.5666666666667,-72.75,122,No
@@ -402,43 +402,43 @@ QC,TADOUSSAC,7048320,1914,1,2004,4,48.15,-69.7,70,No
 QC,TETE A LA BALEINE,7048421,1912,1,1995,3,50.7,-59.3166666666667,9,Yes
 QC,THETFORD MINES,7028441,1922,1,2016,7,46.1,-71.35,381,Yes
 QC,TRINITE DES MONTS,7058520,1951,1,2004,4,48.1333333333333,-68.4833333333333,262,No
-QC,VAL-D'OR ,7098600,1952,1,2017,12,48.0563888888889,-77.7866666666667,337,No
+QC,VAL-D'OR,7098600,1952,1,2017,12,48.0563888888889,-77.7866666666667,337,No
 QC,VILLE MARIE,7088760,1914,1,2004,4,47.35,-79.4333333333333,213,No
 QC,WRIGHT,7038975,1914,1,2017,8,46.0666666666667,-76.05,142,Yes
 NS,ANNAPOLIS ROYAL,8200100,1915,1,2007,12,44.75,-65.5166666666667,8,No
 NB,AROOSTOOK,8100300,1920,1,2017,12,46.7122222222222,-67.7155555555556,91,Yes
-NB,BATHURST ,8100503,1884,1,2013,10,47.6291805555556,-65.7483388888889,59,Yes
+NB,BATHURST,8100503,1884,1,2013,10,47.6291805555556,-65.7483388888889,59,Yes
 NL,BAY D'ESPOIR,8400413,1968,1,2017,12,47.9833333333333,-55.8,23,No
 NS,BEAR RIVER,8200500,1915,1,2006,2,44.5666666666667,-65.6333333333333,8,Yes
 NL,BURGEO,8400798,1939,1,1995,7,47.6166666666667,-57.6166666666667,11,Yes
 NL,CARTWRIGHT,8501100,1936,1,2015,3,53.7083333333333,-57.035,14,No
-NB,CHARLO ,8100880,1934,1,2002,10,47.9833333333333,-66.3333333333333,40,Yes
+NB,CHARLO,8100880,1934,1,2002,10,47.9833333333333,-66.3333333333333,40,Yes
 PE,CHARLOTTETOWN ,8300300,1872,1,2012,9,46.2886166666667,-63.1286305555556,49,Yes
-NL,CHURCHILL FALLS,850A131             ,1969,1,1998,4,53.5333333333333,-63.9666666666667,489,Yes
+NL,CHURCHILL FALLS,850A131,1969,1,1998,4,53.5333333333333,-63.9666666666667,489,Yes
 NS,COLLEGEVILLE,8201000,1916,1,2014,6,45.4833333333333,-62.0166666666667,76,No
 NL,CORNER BROOK,8401300,1933,1,2017,12,48.95,-57.95,5,No
 NL,DANIELS HARBOUR,8401400,1947,1,1998,1,50.2363888888889,-57.5811111111111,19,No
 NL,DEER LAKE ,8401501,1933,1,2012,3,49.2166666666667,-57.4,22,Yes
 NS,DEMING,8201410,1884,1,2011,12,45.2163908333333,-61.1778027777778,16,Yes
 NB,DOAKTOWN,8101200,1944,1,2009,6,46.5525138888889,-66.1402916666667,38,No
-NB,EDMUNDSTON,810AL00             ,1916,1,2009,7,47.3463888888889,-68.1877777777778,163,Yes
+NB,EDMUNDSTON,810AL00,1916,1,2009,7,47.3463888888889,-68.1877777777778,163,Yes
 NL,EXPLOITS DAM,8401550,1956,1,2009,2,48.7666666666667,-56.6,154,No
 NB,FREDERICTON ,8101500,1874,1,2010,4,45.8721305555556,-66.5278916666667,21,Yes
-NL,GANDER ,8401700,1937,1,2012,3,48.9463888888889,-54.5769444444444,151,No
-NL,GOOSE ,8501900,1942,1,2017,12,53.3166666666667,-60.4166666666667,49,No
+NL,GANDER,8401700,1937,1,2012,3,48.9463888888889,-54.5769444444444,151,No
+NL,GOOSE,8501900,1942,1,2017,12,53.3166666666667,-60.4166666666667,49,No
 NL,GRAND FALLS,8402050,1937,1,2009,1,48.9333333333333,-55.6666666666667,60,No
-NS,GREENWOOD ,8202000,1943,1,2017,12,44.9833333333333,-64.9166666666667,28,No
-NS,HALIFAX STANFIELD ,8202250,1872,1,2012,9,44.8800166666667,-63.5000138888889,145,Yes
+NS,GREENWOOD,8202000,1943,1,2017,12,44.9833333333333,-64.9166666666667,28,No
+NS,HALIFAX STANFIELD,8202250,1872,1,2012,9,44.8800166666667,-63.5000138888889,145,Yes
 NL,ISLE UX MORTS,8402450,1909,1,2004,10,47.5833333333333,-58.9666666666667,5,Yes
 NB,KEDGWICK,8102300,1932,1,1994,9,47.65,-67.35,274,No
 NS,LIVERPOOL BIG FALLS,8203100,1940,1,2012,10,44.1333333333333,-64.9333333333333,50,No
-NL,MAKKOVIK ,8502NHR             ,1942,1,2014,11,55.0822222222222,-59.1886111111111,71,Yes
-NL,MARY'S HARBOUR ,8502591,1881,1,1998,1,52.3036111111111,-55.8336111111111,12,Yes
-NB,MIRAMICHI ,8101000,1873,1,2005,8,47.0094694444444,-65.4677888888889,33,Yes
-NB,MONCTON ,8103200,1898,1,2012,6,46.1053055555556,-64.6838055555556,71,Yes
+NL,MAKKOVIK,8502NHR,1942,1,2014,11,55.0822222222222,-59.1886111111111,71,Yes
+NL,MARY'S HARBOUR,8502591,1881,1,1998,1,52.3036111111111,-55.8336111111111,12,Yes
+NB,MIRAMICHI,8101000,1873,1,2005,8,47.0094694444444,-65.4677888888889,33,Yes
+NB,MONCTON,8103200,1898,1,2012,6,46.1053055555556,-64.6838055555556,71,Yes
 PE,MONTICELLO,8300447,1960,1,2003,12,46.4666666666667,-62.4666666666667,32,No
 NS,MOUNT UNIACKE,8203600,1920,1,2003,7,44.9,-63.8333333333333,159,No
-NL,NAIN ,8502800,1939,1,2013,3,56.55,-61.6833333333333,7,No
+NL,NAIN,8502800,1939,1,2013,3,56.55,-61.6833333333333,7,No
 NS,NAPPAN,8203700,1913,1,2003,7,45.7666666666667,-64.25,20,No
 NB,NEPISIGUIT FALLS,8103500,1922,1,2006,2,47.4,-65.7833333333333,106,No
 NL,NORTH HARBOUR,8402874,1939,1,2007,11,47.1333333333333,-53.6666666666667,11,Yes
@@ -446,22 +446,22 @@ NS,PARRSBORO,8204400,1897,1,2002,9,45.4,-64.3333333333333,24,No
 NL,PLUM POINT,8402958,1972,1,2016,6,51.0666666666667,-56.8833333333333,6,No
 NB,REXTON,8104400,1923,1,2009,12,46.6666666666667,-64.8666666666667,5,No
 NS,SABLE ISLAND,8204700,1891,1,2001,12,43.9322222222222,-60.0094444444444,5,No
-NB,SAINT JOHN ,8104900,1871,1,2012,6,45.3180555555556,-65.8855694444444,109,Yes
+NB,SAINT JOHN,8104900,1871,1,2012,6,45.3180555555556,-65.8855694444444,109,Yes
 NL,SPRINGDALE,8403700,1956,1,1993,6,49.5,-56.0833333333333,23,No
 NS,SPRINGFIELD,8205200,1920,1,2003,8,44.6666666666667,-64.85,167,No
-NL,ST ANTHONY ,840C401             ,1883,1,2008,1,51.3833333333333,-56.1,33,Yes
-NL,ST JOHN'S ,8403506,1874,1,2012,3,47.6222222222222,-52.7427777777778,141,Yes
+NL,ST ANTHONY,840C401,1883,1,2008,1,51.3833333333333,-56.1,33,Yes
+NL,ST JOHN'S,8403506,1874,1,2012,3,47.6222222222222,-52.7427777777778,141,Yes
 NS,ST MARGARET'S BAY,8204800,1922,1,2017,12,44.7,-63.9,17,No
-NL,STEPHENVILLE ,8403800,1935,1,2014,10,48.5333333333333,-58.55,26,Yes
-PE,SUMMERSIDE ,8300700,1936,1,2002,6,46.4388888888889,-63.8316666666667,20,Yes
+NL,STEPHENVILLE,8403800,1935,1,2014,10,48.5333333333333,-58.55,26,Yes
+PE,SUMMERSIDE,8300700,1936,1,2002,6,46.4388888888889,-63.8316666666667,20,Yes
 NB,SUSSEX,8105200,1898,1,2009,5,45.7166666666667,-65.5333333333333,21,No
-NS,SYDNEY ,8205700,1870,1,2014,8,46.1666666666667,-60.0481388888889,62,Yes
+NS,SYDNEY,8205700,1870,1,2014,8,46.1666666666667,-60.0481388888889,62,Yes
 NS,TRURO,8205990,1910,1,2002,10,45.3666666666667,-63.2666666666667,40,Yes
 NS,UPPER STEWIACKE,8206200,1916,1,2008,4,45.2166666666667,-63,23,No
-NL,WABUSH LAKE ,8504175,1961,1,2013,2,52.9272222222222,-66.8741666666667,551,No
+NL,WABUSH LAKE,8504175,1961,1,2013,2,52.9272222222222,-66.8741666666667,551,No
 NL,WESTBROOK ST LAWRENCE,8404201,1957,1,1995,7,46.95,-55.3833333333333,31,No
 NS,WESTPORT,8206260,1937,1,1993,6,44.25,-66.3666666666667,18,Yes
 NS,WHITE ROCK,8206316,1913,1,2017,6,45.05,-64.3833333333333,38,Yes
 NB,WOODSTOCK,8105600,1914,1,2017,12,46.1702777777778,-67.5536111111111,153,No
 NS,WRECK COVE BROOK,8206450,1951,1,2012,12,46.5333333333333,-60.45,76,Yes
-NS,YARMOUTH ,8206500,1880,1,2012,4,43.8308333333333,-66.0886111111111,43,Yes
+NS,YARMOUTH,8206500,1880,1,2012,4,43.8308333333333,-66.0886111111111,43,Yes
diff --git a/src/miranda/eccc/data/ahccd_gen3_temperature.csv b/src/miranda/preprocess/configs/ahccd_gen3_temperature.csv
similarity index 99%
rename from src/miranda/eccc/data/ahccd_gen3_temperature.csv
rename to src/miranda/preprocess/configs/ahccd_gen3_temperature.csv
index 8c56a6b5..4a65dc15 100644
--- a/src/miranda/eccc/data/ahccd_gen3_temperature.csv
+++ b/src/miranda/preprocess/configs/ahccd_gen3_temperature.csv
@@ -24,7 +24,7 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long
 21,1161663,CLINTON_AUT,BC,1993,1,2019,12,4.6,51.1,-121.5,105,y,y
 22,1021830,COMOX,BC,1935,11,2019,12,1.2,49.7,-124.9,2,y,n
 23,1021960,CORTES_ISLAND,BC,1947,3,2019,2,9.9,50,-124.9,1,y,n
-24,1012010,COWICHAN_BAY_CHERRY_,BC,1913,10,1984,3,7.7,48.7,-123.5,0,n,n
+24,1012010,COWICHAN_BAY_CHERRY,BC,1913,10,1984,3,7.7,48.7,-123.5,0,n,n
 25,1152106,CRANBROOK,BC,1901,1,2019,12,6.6,49.6,-115.7,92,y,y
 26,114B1F0,CRESTON,BC,1912,6,2019,12,0.5,49,-116.5,64,y,y
 27,1022250,CUMBERLAND,BC,1922,5,1977,6,4.7,49.6,-125,15,n,n
@@ -102,7 +102,7 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long
 99,1176755,REVELSTOKE,BC,1898,5,2019,12,7.3,50.9,-118.1,44,y,y
 100,1016940,SAANICHTON_CDA,BC,1914,3,2019,7,0.6,48.6,-123.4,6,n,n
 101,1167337,SALMON_ARM,BC,1911,7,2019,12,1.1,50.5,-119.3,41,y,n
-102,1016995,SALTSPRING_,BC,1909,11,2019,12,1,48.8,-123.5,4,y,n
+102,1016995,SALTSPRING,BC,1909,11,2019,12,1,48.8,-123.5,4,y,n
 103,1057051,SANDSPIT,BC,1945,9,2019,12,4.2,53.2,-131.8,0,y,y
 104,1017099,SATURNA_CAPMON,BC,1989,6,2019,12,3,48.7,-123.1,17,y,y
 105,1017230,SHAWNIGAN_LAKE,BC,1913,4,2019,12,0.6,48.6,-123.6,15,n,n
@@ -620,7 +620,7 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long
 617,7055122,MONT_JOLI,QUE,1875,10,2019,12,0.6,48.6,-68.2,5,y,y
 618,7035160,MONT_LAURIER,QUE,1920,7,2014,6,7.2,46.5,-75.5,24,y,n
 619,7024745,MONTREAL_TAVISH,QUE,1871,7,2019,12,2.8,45.5,-73.5,7,y,n
-620,702S006,MONTREAL__TRUDEAU_IN,QUE,1953,1,2019,12,0.5,45.4,-73.7,3,y,y
+620,702S006,MONTREAL_TRUDEAU_INTERNATIONAL,QUE,1953,1,2019,12,0.5,45.4,-73.7,3,y,y
 621,7045401,NATASHQUAN,QUE,1914,10,2019,12,4.1,50.1,-61.8,1,y,y
 622,7055422,NEW_CARLISLE,QUE,1963,1,2019,12,17.8,48,-65.3,4,y,n
 623,7025442,NICOLET,QUE,1913,11,2019,12,2.9,46.2,-72.6,0,y,n
@@ -657,8 +657,8 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long
 654,7016800,ST_ALBAN,QUE,1949,9,2019,10,2.3,46.7,-72,7,n,n
 655,7066820,ST_AMBROISE,QUE,1954,9,2019,10,4.5,48.5,-71.3,12,n,n
 656,702FQLF,ST_ANICET,QUE,1960,11,2019,12,2,45.1,-74.2,4,y,y
-657,7056930,ST_CAMILLE_,QUE,1963,7,2019,10,2,46.4,-70.2,39,n,n
-658,7016960,ST_CHARLES_DE_MANDE_,QUE,1976,6,2019,10,21.4,46.3,-73.3,16,n,n
+657,7056930,ST_CAMILLE,QUE,1963,7,2019,10,2,46.4,-70.2,39,n,n
+658,7016960,ST_CHARLES_DE_MANDE,QUE,1976,6,2019,10,21.4,46.3,-73.3,16,n,n
 659,7017080,ST_COME,QUE,1950,12,2018,11,4.6,46.2,-73.7,24,n,n
 660,7027083,ST_COME_DE_LINIERE,QUE,1965,9,2019,10,3.7,46,-70.5,24,n,n
 661,7027200,ST_EPHREM,QUE,1929,2,2019,10,18.1,46,-70.9,31,n,n
@@ -666,7 +666,7 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long
 663,7027259,ST_FLAVIEN,QUE,1963,1,2016,8,2.1,46.4,-71.5,13,n,n
 664,7027302,ST_GUILLAUME,QUE,1963,1,2015,10,7.6,45.8,-72.7,4,n,n
 665,7037310,ST_HIPPOLYTE,QUE,1961,2,2019,10,4.9,45.9,-74,36,n,n
-666,7027329,ST_HUBERT_MONT_,QUE,1953,1,2019,12,0.8,45.5,-73.4,2,y,n
+666,7027329,ST_HUBERT_MONT,QUE,1953,1,2019,12,0.8,45.5,-73.4,2,y,n
 667,7027361,ST_HYACINTHE,QUE,1935,1,2019,10,8.4,45.5,-72.9,3,y,n
 668,7037400,ST_JEROME,QUE,1932,5,2019,10,4.3,45.8,-74,17,n,n
 669,7027516,ST_LUDGER,QUE,1964,10,2019,10,3.1,45.7,-70.6,33,n,n
@@ -778,6 +778,6 @@ No,StnId,Station name,Prov,FromYear,FromMonth,ToYear,ToMonth,%Miss,Lat(deg),Long
 775,8403603,ST_JOHN_WEST,NFLD,1950,11,2019,12,6.6,47.5,-52.7,11,y,y
 776,8403619,ST_LAWRENCE,NFLD,1989,11,2019,12,14.6,46.9,-55.3,4,y,y
 777,8403820,STEPHENVILLE,NFLD,1895,6,2019,12,6.6,48.5,-58.5,5,y,y
-778,8403851,TERRA_NOVA_NAT_PARK_,NFLD,1962,3,2019,12,7.1,48.5,-53.9,10,y,y
+778,8403851,TERRA_NOVA_NAT_PARK,NFLD,1962,3,2019,12,7.1,48.5,-53.9,10,y,y
 779,8504177,WABUSH_LAKE,NFLD,1960,11,2019,12,0.8,52.9,-66.8,55,y,y
 780,8404343,WRECKHOUSE,NFLD,1981,6,2019,12,1.5,47.7,-59.3,3,y,y
diff --git a/src/miranda/preprocess/configs/eccc-ahccd_attrs.json b/src/miranda/preprocess/configs/eccc-ahccd_attrs.json
new file mode 100644
index 00000000..3de37b07
--- /dev/null
+++ b/src/miranda/preprocess/configs/eccc-ahccd_attrs.json
@@ -0,0 +1,131 @@
+{
+  "Header": {
+    "_citation": {
+      "generation": {
+        "Second": "Mekis, É and L.A. Vincent, 2011: An overview of the second generation adjusted daily precipitation dataset for trend analysis in Canada. Atmosphere-Ocean 49(2), 163-177 doi:10.1080/07055900.2011.583910",
+        "Third": "Vincent, L.A., M.M. Hartwell and X.L. Wang, 2020: A Third Generation of Homogenized Temperature for Trend Analysis and Monitoring Changes in Canada’s Climate. Atmosphere-Ocean. https://doi.org/10.1080/07055900.2020.1765728"
+      }
+    },
+    "_miranda_version": true,
+    "_product": {
+      "generation": {
+        "Second": "ECCC Adjusted and Homogenized Canadian Climate Data (AHCCD) version 2",
+        "Third": "ECCC Adjusted and Homogenized Canadian Climate Data (AHCCD) version 3"
+      }
+    },
+    "_variable": true,
+    "acknowledgement": "This data is provided by Environment and Climate Change Canada (ECCC).",
+    "author": "Environment and Climate Change Canada (ECCC)",
+    "contact": "info.cccs-ccsc@canada.ca",
+    "dataset_id": "d6813de6-b20a-46cc-8990-01862ae15c5f",
+    "documentation": "https://www.canada.ca/en/environment-climate-change/services/climate-change/canadian-centre-climate-services/display-download/technical-documentation-adjusted-climate-data.html",
+    "domain": "CAN",
+    "frequency": "day",
+    "institution": "GovCan",
+    "license": "https://climate.weather.gc.ca/prods_servs/attachment1_e.html",
+    "license_preamble": "The data is owned by the Government of Canada (Environment and Climate Change Canada), and fall under the licence agreement for use of Environment and Climate Change Canada data.",
+    "license_type": "permissive",
+    "organization": "ECCC",
+    "processing_level": "adjusted",
+    "project": "AHCCD",
+    "realm": "atmos",
+    "source": "msc",
+    "table_date": "2023-08-03",
+    "table_id": "ECCC",
+    "type": "station-obs"
+  },
+  "dimensions:": {
+    "lat": {
+      "axis": "Y",
+      "long_name": "Latitude",
+      "standard_name": "latitude",
+      "units": "degrees_north"
+    },
+    "long": {
+      "axis": "X",
+      "long_name": "Longitude",
+      "standard_name": "longitude",
+      "units": "degrees_east"
+    },
+    "time": {
+      "axis": "T",
+      "long_name": "Time",
+      "standard_name": "time"
+    }
+  },
+  "variables": {
+    "dm": {
+      "NaN_value": -9999.9,
+      "_variable_name": "tas",
+      "cell_methods": "time: mean",
+      "comments": "Station data converted from Mean Temp (°C)",
+      "frequency": "day",
+      "grid_mapping": "regular_lon_lat",
+      "long_name": "Near-Surface Air Temperature",
+      "missing_flags": "M",
+      "original_field": "Mean Temp (°C)",
+      "units": "degC"
+    },
+    "dn": {
+      "NaN_value": -9999.9,
+      "_variable_name": "tasmin",
+      "cell_methods": "time: minimum",
+      "comments": "Station data converted from Min Temp (°C)",
+      "frequency": "day",
+      "grid_mapping": "regular_lon_lat",
+      "long_name": "Daily Minimum Near-Surface Air Temperature",
+      "missing_flags": "M",
+      "original_field": "Min Temp (°C)",
+      "units": "degC"
+    },
+    "dr": {
+      "NaN_value": -9999.99,
+      "_variable_name": "prlp",
+      "cell_methods": "time: mean",
+      "comments": "Station data converted from Total Rain (mm) using a density of 1000 kg/m³",
+      "frequency": "day",
+      "grid_mapping": "regular_lon_lat",
+      "long_name": "Liquid Precipitation",
+      "missing_flags": "M",
+      "original_field": "Total Rain (mm)",
+      "units": "mm"
+    },
+    "ds": {
+      "NaN_value": -9999.99,
+      "_variable_name": "prsn",
+      "cell_methods": "time: mean",
+      "comments": "station data converted from Total Snow (cm) using a density of 100 kg/m³",
+      "frequency": "day",
+      "grid_mapping": "regular_lon_lat",
+      "long_name": "Snowfall Flux",
+      "missing_flags": "M",
+      "original_field": "Total Snow (cm)",
+      "units": "mm"
+    },
+    "dt": {
+      "NaN_value": -9999.99,
+      "_variable_name": "pr",
+      "cell_methods": "time: mean",
+      "comments": "Station data converted from Total Precip (mm) using a density of 1000 kg/m³",
+      "frequency": "day",
+      "grid_mapping": "regular_lon_lat",
+      "long_name": "Precipitation",
+      "missing_flags": "M",
+      "original_field": "Total Precip (mm)",
+      "units": "mm"
+    },
+    "dx": {
+      "NaN_value": -9999.9,
+      "_variable_name": "tasmax",
+      "cell_methods": "time: maximum",
+      "comments": "station data converted from Max Temp (°C)",
+      "frequency": "day",
+      "grid_mapping": "regular_lon_lat",
+      "long_name": "Daily Maximum Near-Surface Air Temperature",
+      "missing_flags": "M",
+      "original_field": "Max Temp (°C)",
+      "standard_name": "air_temperature",
+      "units": "degC"
+    }
+  }
+}
diff --git a/src/miranda/eccc/eccc_obs_summary_cf_attrs.json b/src/miranda/preprocess/configs/eccc-obs-summary_attrs.json
similarity index 54%
rename from src/miranda/eccc/eccc_obs_summary_cf_attrs.json
rename to src/miranda/preprocess/configs/eccc-obs-summary_attrs.json
index b21f224e..11b3dc51 100644
--- a/src/miranda/eccc/eccc_obs_summary_cf_attrs.json
+++ b/src/miranda/preprocess/configs/eccc-obs-summary_attrs.json
@@ -1,173 +1,160 @@
 {
   "Header": {
-    "Conventions": "CF-1.8",
+    "_miranda_version": true,
+    "_variable": true,
+    "acknowledgement": "This data is provided by Environment and Climate Change Canada (ECCC).",
+    "author": "Environment and Climate Change Canada (ECCC)",
     "contact": "info.cccs-ccsc@canada.ca",
+    "dataset_id": "b24efb37-11b6-5d03-ab19-5759f83db546",
+    "documentation": "https://climate.weather.gc.ca/doc/Technical_Documentation.pdf",
+    "domain": "CAN",
+    "frequency": "mon",
     "institution": "GovCan",
-    "int_missing_value": "-999",
     "license": "https://climate.weather.gc.ca/prods_servs/attachment1_e.html",
+    "license_preamble": "The data is owned by the Government of Canada (Environment and Climate Change Canada), and fall under the licence agreement for use of Environment and Climate Change Canada data.",
     "license_type": "permissive",
-    "missing_value": "1e20",
     "organization": "ECCC",
     "processing_level": "raw",
+    "product": "A cross-country summary of the averages and extremes for the month, including precipitation totals, max-min temperatures, and degree days.",
+    "project": "ECCC-SUMMARIES",
     "realm": "atmos",
     "source": "msc",
-    "table_date": "2023-03-23",
+    "table_date": "2023-08-07",
+    "table_id": "ECCC",
     "type": "station-obs"
   },
   "variable_entry": {
     "cdd": {
-      "add_offset": 0,
+      "_variable_name": "cdd",
       "cell_methods": "time: sum",
       "comments": "Station data converted from Cool Deg Days (°C)",
       "frequency": "day",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Number of Degrees Celsius Over a Mean Temperature of 18 °C",
-      "original_variable": "Cool Deg Days (°C)",
-      "out_name": "cdd",
-      "scale_factor": 1,
-      "standard_name": "cooling_degree_days",
+      "original_field": "Cool Deg Days (°C)",
       "type": "real",
-      "units": "C"
+      "units": "degC"
     },
     "hdd": {
-      "add_offset": 0,
+      "_variable_name": "hdd",
       "cell_methods": "time: sum",
       "comments": "Station data converted from Heat Deg Days (°C)",
-      "frequency": "day",
+      "frequency": "mon",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Number of Degrees Celsius Under a Mean Temperature of 18 °C",
-      "original_variable": "Heat Deg Days (°C)",
-      "out_name": "hdd",
-      "scale_factor": 1,
-      "standard_name": "heating_degree_days",
+      "original_field": "Heat Deg Days (°C)",
       "type": "real",
-      "units": "C"
+      "units": "degC"
     },
     "pr": {
-      "add_offset": 0,
+      "_variable_name": "pr",
       "cell_methods": "time: mean",
       "comments": "Station data converted from Total Precip (mm) using a density of 1000 kg/m³",
-      "frequency": "day",
+      "frequency": "mon",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Precipitation",
-      "original_variable": "Total Precip (mm)",
-      "out_name": "pr",
-      "scale_factor": 1.1574074074074073e-05,
-      "standard_name": "precipitation_flux",
+      "original_field": "Total Precip (mm)",
       "type": "real",
-      "units": "kg m-2 s-1"
+      "units": "mm"
     },
     "prlp": {
-      "add_offset": 0,
+      "_variable_name": "prlp",
       "cell_methods": "time: mean",
       "comments": "Station data converted from Total Rain (mm) using a density of 1000 kg/m³",
-      "frequency": "day",
+      "frequency": "mon",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Liquid Precipitation",
-      "original_variable": "Total Rain (mm)",
-      "out_name": "prlp",
-      "scale_factor": 1.1574074074074073e-05,
-      "standard_name": "rainfall_flux",
+      "original_field": "Total Rain (mm)",
       "type": "real",
-      "units": "kg m-2 s-1"
+      "units": "mm"
     },
     "prsn": {
-      "add_offset": 0,
+      "_variable_name": "prsn",
       "cell_methods": "time: mean",
       "comments": "station data converted from Total Snow (cm) using a density of 100 kg/m³",
-      "frequency": "day",
+      "frequency": "mon",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Snowfall Flux",
-      "original_variable": "Total Snow (cm)",
-      "out_name": "prsn",
-      "scale_factor": 1.1574074074074073e-05,
-      "standard_name": "snowfall_flux",
+      "original_field": "Total Snow (cm)",
       "type": "real",
-      "units": "kg m-2 s-1"
+      "units": "cm"
     },
     "sfcWindAz": {
-      "add_offset": 0,
+      "_variable_name": "sfcWindAz",
       "cell_methods": "time: mean",
       "comments": "Station data converted from Dir of Max Gust (10s deg)",
-      "frequency": "day",
+      "frequency": "mon",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Direction from which the Daily Maximum Near-Surface Gust Wind Speed maximum Blows",
-      "original_variable": "Dir of Max Gust (10s deg)",
-      "out_name": "sfcWindAz",
-      "scale_factor": 1,
-      "standard_name": "wind_direction",
+      "original_field": "Dir of Max Gust (10s deg)",
       "type": "real",
       "units": "degree"
     },
     "sfcWindMax": {
-      "add_offset": 0,
+      "_variable_name": "sfcWindMax",
       "cell_methods": "time: max",
       "comments": "Station data converted from Spd of Max Gust (km/h)",
-      "frequency": "day",
+      "frequency": "mon",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Daily Maximum Near-Surface Gust Wind Speed maximum",
-      "original_variable": "Spd of Max Gust (km/h)",
-      "out_name": "sfcWindMax",
-      "scale_factor": 0.2777777777777778,
-      "standard_name": "wind_speed_of_gust maximum",
+      "original_field": "Spd of Max Gust (km/h)",
       "type": "real",
-      "units": "m s-1"
+      "units": "km h-1"
     },
     "snd": {
-      "add_offset": 0,
+      "_variable_name": "snd",
       "cell_methods": "time: mean",
       "comments": "Station data converted from Snow on Grnd (cm)",
-      "frequency": "day",
+      "frequency": "mon",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Snow Depth",
-      "original_variable": "Snow on Grnd (cm)",
-      "out_name": "snd",
-      "scale_factor": 0.01,
-      "standard_name": "surface_snow_thickness",
+      "original_field": "Snow on Grnd (cm)",
       "type": "real",
-      "units": "m"
+      "units": "cm"
     },
     "tas": {
-      "add_offset": 273.15,
+      "_variable_name": "tas",
       "cell_methods": "time: mean",
-      "comments": "Station data converted from Mean Temp (°C)",
-      "frequency": "day",
+      "comments": "Station data converted from Mean Temperature (°C)",
+      "frequency": "mon",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Near-Surface Air Temperature",
-      "original_variable": "Mean Temp (°C)",
-      "out_name": "tas",
-      "scale_factor": 1,
-      "standard_name": "air_temperature",
+      "original_field": "Mean Temperature",
+      "type": "real",
+      "units": "degC"
+    },
+    "tas_days": {
+      "_variable_name": "tas_days",
+      "cell_methods": "time: count",
+      "comments": "Station data converted from Days With Valid Mean Temperature",
+      "frequency": "mon",
+      "grid_mapping": "regular_lon_lat",
+      "long_name": "Number of Days With Valid Near-Surface Air Temperature",
+      "original_field": "Days With Valid Mean Temp",
       "type": "real",
-      "units": "K"
+      "units": "1"
     },
     "tasmax": {
-      "add_offset": 273.15,
+      "_variable_name": "tasmax",
       "cell_methods": "time: maximum",
       "comments": "station data converted from Max Temp (°C)",
-      "frequency": "day",
+      "frequency": "mon",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Daily Maximum Near-Surface Air Temperature",
-      "original_variable": "Max Temp (°C)",
-      "out_name": "tasmax",
-      "scale_factor": 1,
-      "standard_name": "air_temperature",
+      "original_field": "Max Temp (°C)",
       "type": "real",
-      "units": "K"
+      "units": "degC"
     },
     "tasmin": {
-      "add_offset": 273.15,
+      "_variable_name": "tasmin",
       "cell_methods": "time: minimum",
       "comments": "Station data converted from Min Temp (°C)",
-      "frequency": "day",
+      "frequency": "mon",
       "grid_mapping": "regular_lon_lat",
       "long_name": "Daily Minimum Near-Surface Air Temperature",
-      "original_variable": "Min Temp (°C)",
-      "out_name": "tasmin",
-      "scale_factor": 1,
-      "standard_name": "air_temperature",
+      "original_field": "Min Temp (°C)",
       "type": "real",
-      "units": "K"
+      "units": "degC"
     }
   }
 }
diff --git a/src/miranda/preprocess/configs/eccc-obs_attrs.json b/src/miranda/preprocess/configs/eccc-obs_attrs.json
new file mode 100644
index 00000000..8f438b16
--- /dev/null
+++ b/src/miranda/preprocess/configs/eccc-obs_attrs.json
@@ -0,0 +1,745 @@
+{
+  "Header": {
+    "_frequency": true,
+    "_miranda_version": true,
+    "acknowledgement": "This data is provided by Environment and Climate Change Canada (ECCC).",
+    "author": "Environment and Climate Change Canada (ECCC)",
+    "contact": "ccsc-cccs@ec.gc.ca",
+    "documentation": "https://climate.weather.gc.ca/doc/Technical_Documentation.pdf",
+    "institution": "GovCan",
+    "license": "https://climate.weather.gc.ca/prods_servs/attachment1_e.html",
+    "license_preamble": "The data is owned by the Government of Canada (Environment and Climate Change Canada), and fall under the licence agreement for use of Environment and Climate Change Canada data.",
+    "license_type": "permissive",
+    "organization": "ECCC",
+    "processing_level": "raw",
+    "source": "msc",
+    "table_date": "2023-08-02",
+    "title": "Environment and Climate Change Canada (ECCC) weather station observations",
+    "type": "station-obs",
+    "usage": "The original data is owned by the Government of Canada (Environment and Climate Change Canada), and falls under the licence agreement for use of Environment and Climate Change Canada data"
+  },
+  "variables": {
+    "001": {
+      "_variable_name": "tasmax",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Daily Maximum Temperature",
+      "original_units": "0.1 °C",
+      "scale_factor": 0.1,
+      "standard_name": "air_temperature_maximum",
+      "units": "degC"
+    },
+    "002": {
+      "_variable_name": "tasmin",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Daily Minimum Temperature",
+      "original_units": "0.1 °C",
+      "scale_factor": 0.1,
+      "standard_name": "air_temperature_minimum",
+      "units": "degC"
+    },
+    "003": {
+      "_variable_name": "tas",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Daily Mean Temperature",
+      "original_units": "0.1 °C",
+      "scale_factor": 0.1,
+      "standard_name": "air_temperature",
+      "units": "degC"
+    },
+    "010": {
+      "_variable_name": "prlptot",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Daily Total Rainfall",
+      "original_units": "0.1 mm day-1",
+      "scale_factor": 0.1,
+      "standard_name": "liquid_precipitation_amount",
+      "units": "mmn day-1"
+    },
+    "011": {
+      "_variable_name": "prsntot",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Daily Total Snowfall",
+      "original_units": "0.1 cm day-1",
+      "scale_factor": 0.1,
+      "standard_name": "solid_precipitation_amount",
+      "units": "cm day-1"
+    },
+    "012": {
+      "_variable_name": "prcptot",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Daily Total Precipitation",
+      "original_units": "0.1 mm day-1",
+      "scale_factor": 0.1,
+      "standard_name": "precipitation_amount",
+      "units": "mm day-1"
+    },
+    "013": {
+      "_variable_name": "sndtot",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Snow on the Ground",
+      "original_units": "cm",
+      "scale_factor": 1,
+      "standard_name": "surface_snow_thickness",
+      "units": "cm"
+    },
+    "014": {
+      "_variable_name": "thunder",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Thunderstorms",
+      "scale_factor": 1,
+      "standard_name": "thunderstorm_presence",
+      "units": "1"
+    },
+    "015": {
+      "_variable_name": "freezing_rain_drizzle",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Freezing rain or drizzle",
+      "scale_factor": 1,
+      "standard_name": "freeze_rain_drizzle_presence",
+      "units": "1"
+    },
+    "016": {
+      "_variable_name": "hail",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Hail",
+      "scale_factor": 1,
+      "standard_name": "hail_presence",
+      "units": "1"
+    },
+    "017": {
+      "_variable_name": "fog_ice_fog",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Fog or Ice Fog",
+      "scale_factor": 1,
+      "standard_name": "fog_ice_fog_presence",
+      "units": "1"
+    },
+    "018": {
+      "_variable_name": "smoke_haze",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Smoke or Haze",
+      "scale_factor": 1,
+      "standard_name": "smoke_haze_presence",
+      "units": "1"
+    },
+    "019": {
+      "_variable_name": "blowing_dust_sand",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Blowing Dust or Sand",
+      "scale_factor": 1,
+      "standard_name": "blowing_dust_sand_presence",
+      "units": "1"
+    },
+    "020": {
+      "_variable_name": "blow_snow",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Blowing snow",
+      "scale_factor": 1,
+      "standard_name": "blowing_snow_presence",
+      "units": "1"
+    },
+    "021": {
+      "_variable_name": "wind_gt_28kt",
+      "long_name": "Wind speed >= 28 Knots",
+      "scale_factor": 1,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "wind_exceeding_28_knots",
+      "units": "1"
+    },
+    "022": {
+      "_variable_name": "wind_gt_34kt",
+      "long_name": "Wind speed >= 34 Knots",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 1,
+      "standard_name": "wind_exceeding_34_knots",
+      "units": "1"
+    },
+    "023": {
+      "_variable_name": "gust_dir_16pts",
+      "long_name": "Direction of extreme gust (16 pts) to December 1976",
+      "original_units": "10's of degrees",
+      "scale_factor": 10,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "gust_to_direction",
+      "units": "deg"
+    },
+    "024": {
+      "_variable_name": "gust_speed",
+      "long_name": "Speed of extreme gust",
+      "original_units": "km/h",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "wind_speed_of_gust",
+      "units": "km h-1"
+    },
+    "025": {
+      "_variable_name": "gust_hour",
+      "long_name": "UTC hour of extreme gust",
+      "standard_name": "hour_of_extreme_gust",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "units": "h"
+    },
+    "061": {
+      "_variable_name": "rf1_radiation",
+      "long_name": "RF1 global solar radiation",
+      "original_units": "0.001 MJ/m",
+      "scale_factor": 0.001,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "solar_radiation_flux",
+      "units": "MJ m-1"
+    },
+    "062": {
+      "_variable_name": "rf2_radiation",
+      "long_name": "RF2 sky (diffuse) radiation",
+      "original_units": "0.001 MJ/m",
+      "scale_factor": 277.77777777777777,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "solar_radiation_flux",
+      "units": "MJ m-1"
+    },
+    "063": {
+      "_variable_name": "rf3_radiation",
+      "long_name": "RF3 reflected solar radiation",
+      "original_units": "0.001 MJ/m",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 277.77777777777777,
+      "standard_name": "solar_radiation_flux",
+      "units": "MJ m-1"
+    },
+    "064": {
+      "_variable_name": "rf4_radiation",
+      "long_name": "RF4 net all wave radiation",
+      "original_units": "0.001 MJ/m",
+      "scale_factor": 277.77777777777777,
+      "standard_name": "solar_radiation_flux",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "units": "MJ m-1"
+    },
+    "067": {
+      "_variable_name": "rf7_radiation",
+      "long_name": "RF7 daylight illumination",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "original_units": "0.01 Kilolux_hrs",
+      "scale_factor": 0.01,
+      "standard_name": "solar_radiation_flux",
+      "units": "klux h"
+    },
+    "068": {
+      "_variable_name": "rf8_radiation",
+      "long_name": "RF8 direct solar radiation",
+      "original_units": "0.001 MJ/m",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 277.77777777777777,
+      "standard_name": "solar_radiation_flux",
+      "units": "W m-2 h-1"
+    },
+    "069": {
+      "_variable_name": "wind_dir_45B",
+      "long_name": "Direction - 45B anemometer (8 pts)",
+      "original_units": "10's of degrees",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 10,
+      "standard_name": "wind_to_direction",
+      "units": "deg"
+    },
+    "071": {
+      "_variable_name": "ceiling_hgt",
+      "long_name": "Ceiling height of lowest layer of clouds",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "original_units": "30's of meters",
+      "scale_factor": 30,
+      "standard_name": "ceiling_cloud_height",
+      "units": "m"
+    },
+    "072": {
+      "_variable_name": "visibility",
+      "long_name": "Visibility",
+      "original_units": "0.1 km",
+      "scale_factor": 0.1,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "visibility_in_air",
+      "units": "km"
+    },
+    "073": {
+      "_variable_name": "psl",
+      "long_name": "Sea Level Pressure",
+      "original_units": "0.01 kPa",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 0.01,
+      "standard_name": "air_pressure_at_mean_sea_level",
+      "units": "kPa"
+    },
+    "074": {
+      "_variable_name": "tds",
+      "long_name": "Dew Point Temperature",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "original_units": "0.1 °C",
+      "scale_factor": 0.1,
+      "standard_name": "dew_point_temperature",
+      "units": "degC"
+    },
+    "075": {
+      "_variable_name": "wind_dir_u2a_16",
+      "long_name": "Wind Direction at 2 m (U2A Anemometer) (16 pts)",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "original_units": "10's of degrees",
+      "scale_factor": 10,
+      "standard_name": "wind_direction_u2a",
+      "units": "deg"
+    },
+    "076": {
+      "_variable_name": "wind_speed_u2a",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Wind Speed - U2A (16 pts) to December 1970",
+      "original_units": "km/h",
+      "scale_factor": 1,
+      "standard_name": "wind_speed_u2a",
+      "units": "km h-1"
+    },
+    "077": {
+      "_variable_name": "pressure",
+      "long_name": "Station Pressure",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "original_units": "0.01 kPa",
+      "scale_factor": 0.01,
+      "standard_name": "atmospheric_pressure",
+      "units": "kPa"
+    },
+    "078": {
+      "_variable_name": "tas_dry",
+      "long_name": "Dry Bulb Temperature",
+      "original_units": "0.1 °C",
+      "scale_factor": 0.1,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "dry_bulb_temperature",
+      "units": "degC"
+    },
+    "079": {
+      "_variable_name": "tas_wet",
+      "long_name": "Wet Bulb temperature",
+      "original_units": "0.1 °C",
+      "scale_factor": 0.1,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "wet_bulb_temperature",
+      "units": "degC"
+    },
+    "080": {
+      "_variable_name": "hur",
+      "long_name": "Relative Humidity",
+      "original_units": "%",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 1,
+      "standard_name": "relative_humidity",
+      "units": "1"
+    },
+    "081": {
+      "_variable_name": "clo",
+      "long_name": "Total Cloud Opacity",
+      "original_units": "%",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 10,
+      "standard_name": "cloud_albedo",
+      "units": "1"
+    },
+    "082": {
+      "_variable_name": "clt",
+      "long_name": "Total Cloud Amount",
+      "original_units": "%",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 10,
+      "standard_name": "cloud_area_fraction",
+      "units": "1"
+    },
+    "089": {
+      "_variable_name": "freeze_rain",
+      "long_name": "Freezing Rain",
+      "scale_factor": 1,
+      "standard_name": "freezing_rain",
+      "units": "1",
+      "missing_flags": "M",
+      "missing_values": "-99999"
+    },
+    "094": {
+      "_variable_name": "ice_pellets",
+      "long_name": "Ice Pellets",
+      "scale_factor": 1,
+      "standard_name": "ice_pellet_presence",
+      "units": "1",
+      "missing_flags": "M",
+      "missing_values": "-99999"
+    },
+    "107": {
+      "_variable_name": "1low_cloud_opac",
+      "long_name": "Lowest cloud layer opacity",
+      "original_units": "Tenths",
+      "scale_factor": 10,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "low_type_cloud_opacity_fraction",
+      "units": "1"
+    },
+    "108": {
+      "_variable_name": "1low_cloud_frac",
+      "long_name": "Lowest cloud layer amount or condition",
+      "original_units": "Tenths",
+      "scale_factor": 10,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "low_type_cloud_area_fraction",
+      "units": "1"
+    },
+    "109": {
+      "_variable_name": "1low_cloud_type",
+      "long_name": "Lowest cloud layer type",
+      "standard_name": "low_type_cloud_type",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "units": "1"
+    },
+    "110": {
+      "_variable_name": "1low_cloud_hgt",
+      "long_name": "Lowest cloud layer height",
+      "original_units": "30's of meters",
+      "scale_factor": 30,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "low_type_cloud_height",
+      "units": "m"
+    },
+    "111": {
+      "_variable_name": "2low_cloud_opac",
+      "long_name": "Second lowest cloud layer opacity",
+      "original_units": "Tenths",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 10,
+      "standard_name": "low_type_cloud_opacity_fraction",
+      "units": "1"
+    },
+    "112": {
+      "_variable_name": "2low_cloud_frac",
+      "long_name": "Second lowest cloud layer amount or condition",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "original_units": "Tenths",
+      "scale_factor": 10,
+      "standard_name": "low_type_cloud_area_fraction",
+      "units": "1"
+    },
+    "113": {
+      "_variable_name": "2low_cloud_type",
+      "long_name": "Second lowest cloud layer type",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "original_units": "",
+      "scale_factor": 1,
+      "standard_name": "low_type_cloud_type",
+      "units": "1"
+    },
+    "114": {
+      "_variable_name": "2low_cloud_hgt",
+      "long_name": "Second lowest cloud layer height",
+      "original_units": "30's of meters",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 30,
+      "standard_name": "low_type_cloud_height",
+      "units": "m"
+    },
+    "115": {
+      "_variable_name": "3low_cloud_opac",
+      "long_name": "Thirsd lowest cloud layer opacity",
+      "original_units": "Tenths",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 10,
+      "standard_name": "low_type_cloud_opacity_fraction",
+      "units": "1"
+    },
+    "116": {
+      "_variable_name": "3low_cloud_frac",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Third lowest cloud layer amount or condition",
+      "original_units": "Tenths",
+      "scale_factor": 10,
+      "standard_name": "low_type_cloud_area_fraction",
+      "units": "1"
+    },
+    "117": {
+      "_variable_name": "3low_cloud_type",
+      "long_name": "Third lowest cloud layer type",
+      "original_units": "",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 1,
+      "standard_name": "low_type_cloud_type",
+      "units": "1"
+    },
+    "118": {
+      "_variable_name": "3low_cloud_hgt",
+      "long_name": "Third lowest cloud layer height",
+      "original_units": "30's of meters",
+      "scale_factor": 30,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "low_type_cloud_height",
+      "units": "m"
+    },
+    "123": {
+      "_variable_name": "rainfall",
+      "long_name": "Total Rainfall",
+      "original_units": "0.1 mm",
+      "scale_factor": 0.1,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "rainfall_flux",
+      "units": "mm h-1"
+    },
+    "133": {
+      "_variable_name": "sun",
+      "long_name": "Sunshine",
+      "original_units": "0.1 hrs",
+      "scale_factor": 0.1,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "duration_of_sunshine",
+      "units": "h"
+    },
+    "156": {
+      "_variable_name": "wind_dir_u2a_36",
+      "long_name": "Wind Direction - U2A (36 pts) from January 1971",
+      "original_units": "10's of degrees",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 10,
+      "standard_name": "wind_direction_u2a",
+      "units": "deg"
+    },
+    "262": {
+      "_variable_name": "prtot",
+      "long_name": "Total Precipitation (minutes 00-60)",
+      "original_units": "0.1 mm",
+      "scale_factor": 0.1,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "precipitation_amount",
+      "units": "mm"
+    },
+    "263": {
+      "_variable_name": "prtot_q1",
+      "long_name": "Total Precipitation (minutes 00-15)",
+      "original_units": "0.1 mm",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 0.1,
+      "standard_name": "precipitation_amount",
+      "units": "mm"
+    },
+    "264": {
+      "_variable_name": "prtot_q2",
+      "long_name": "Total Precipitation (minutes 15-30)",
+      "original_units": "0.1 mm",
+      "scale_factor": 0.1,
+      "standard_name": "precipitation_amount",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "units": "mm"
+    },
+    "265": {
+      "_variable_name": "prtot_q3",
+      "long_name": "Total Precipitation (minutes 30-45)",
+      "original_units": "0.1 mm",
+      "scale_factor": 0.1,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "precipitation_amount",
+      "units": "mm"
+    },
+    "266": {
+      "_variable_name": "prtot_q4",
+      "long_name": "Total Precipitation (minutes 45-60)",
+      "original_units": "0.1 mm",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 0.1,
+      "standard_name": "precipitation_amount",
+      "units": "mm"
+    },
+    "267": {
+      "_variable_name": "precipitation_weight_q1",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "long_name": "Precipitation Gauge Weight per Unit Area (at minute 15)",
+      "original_units": "0.1 kg/m²",
+      "scale_factor": 0.1,
+      "standard_name": "precipitation_amount",
+      "units": "kg m-2"
+    },
+    "268": {
+      "_variable_name": "precipitation_weight_q2",
+      "long_name": "Precipitation Gauge Weight per Unit Area (at minute 30)",
+      "original_units": "0.1 kg/m²",
+      "scale_factor": 0.1,
+      "standard_name": "precipitation_amount",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "units": "kg m-2"
+    },
+    "269": {
+      "_variable_name": "precipitation_weight_q3",
+      "long_name": "Precipitation Gauge Weight per Unit Area (at minute 45)",
+      "original_units": "0.1 kg/m²",
+      "scale_factor": 0.1,
+      "standard_name": "precipitation_amount",
+      "units": "kg m-2",
+      "missing_flags": "M",
+      "missing_values": "-99999"
+    },
+    "270": {
+      "_variable_name": "precipitation_weight_q4",
+      "long_name": "Precipitation Gauge Weight per Unit Area (at minute 60)",
+      "original_units": "0.1 kg/m²",
+      "scale_factor": 0.1,
+      "standard_name": "precipitation_amount",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "units": "kg m-2"
+    },
+    "271": {
+      "_variable_name": "wind_speed_q1",
+      "long_name": "Wind Speed at 2 m (minutes 00-15)",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "original_units": "0.1 km/h",
+      "scale_factor": 0.1,
+      "standard_name": "wind_speed",
+      "units": "km h-1"
+    },
+    "272": {
+      "_variable_name": "wind_speed_q2",
+      "long_name": "Wind Speed at 2 m (minutes 15-30)",
+      "original_units": "0.1 km/h",
+      "scale_factor": 0.1,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "wind_speed",
+      "units": "km h-1"
+    },
+    "273": {
+      "_variable_name": "wind_speed_q3",
+      "long_name": "Wind Speed at 2 m (minutes 30-45)",
+      "original_units": "0.1 km/h",
+      "scale_factor": 0.1,
+      "standard_name": "wind_speed",
+      "units": "km h-1",
+      "missing_flags": "M",
+      "missing_values": "-99999"
+    },
+    "274": {
+      "_variable_name": "wind_speed_q4",
+      "long_name": "Wind Speed at 2 m (minutes 45-60)",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "original_units": "0.1 km/h",
+      "scale_factor": 0.1,
+      "standard_name": "wind_speed",
+      "units": "km h-1"
+    },
+    "275": {
+      "_variable_name": "snd_q4",
+      "long_name": "Snow Depth (at minute 60)",
+      "original_units": "cm",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "surface_snow_thickness",
+      "units": "cm"
+    },
+    "276": {
+      "_variable_name": "snd_q1",
+      "long_name": "Snow Depth (at minute 15)",
+      "original_units": "cm",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 1,
+      "standard_name": "surface_snow_thickness",
+      "units": "cm"
+    },
+    "277": {
+      "_variable_name": "snd_q2",
+      "long_name": "Snow Depth (at minute 30)",
+      "original_units": "cm",
+      "scale_factor": 1,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "surface_snow_thickness",
+      "units": "cm"
+    },
+    "278": {
+      "_variable_name": "snd_q3",
+      "long_name": "Snow Depth (at minute 45)",
+      "original_units": "cm",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "scale_factor": 1,
+      "standard_name": "surface_snow_thickness",
+      "units": "cm"
+    },
+    "279": {
+      "_variable_name": "wind_dir",
+      "long_name": "Wind Direction at 2 m (minutes 50-60)",
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "nc_units": "deg",
+      "original_units": "Degrees",
+      "standard_name": "wind_direction"
+    },
+    "280": {
+      "_variable_name": "wind_speed",
+      "long_name": "Wind Speed at 2 m (minutes 50-60)",
+      "original_units": "0.1 km/h",
+      "scale_factor": 0.1,
+      "missing_flags": "M",
+      "missing_values": "-99999",
+      "standard_name": "wind_speed",
+      "units": "km h-1"
+    }
+  }
+}
diff --git a/src/miranda/preprocess/eccc.py b/src/miranda/preprocess/eccc.py
new file mode 100644
index 00000000..583a1bef
--- /dev/null
+++ b/src/miranda/preprocess/eccc.py
@@ -0,0 +1,159 @@
+"""Specialized conversion tools for Environment and Climate Change Canada / Meteorological Service of Canada data."""
+
+from __future__ import annotations
+
+import contextlib
+import logging.config
+
+# import os
+import tempfile
+from pathlib import Path
+from typing import Callable
+
+from dask.diagnostics import ProgressBar
+
+from miranda.scripting import LOGGING_CONFIG
+from miranda.storage import file_size, report_file_size
+from miranda.utils import generic_extract_archive
+
+logging.config.dictConfig(LOGGING_CONFIG)
+
+
+_data_folder = Path(__file__).parent / "configs"
+
+
+def _run_func_on_archive_with_optional_dask(
+    file: Path,
+    function: Callable,
+    errored_files: list[Path],
+    **dask_kwargs,
+) -> None:
+    r"""
+    Run a function on a file archive, extracting it if necessary.
+
+    Parameters
+    ----------
+    file : Path
+        File archive to process.
+    function : Callable
+        Function to run on the file.
+    errored_files : list[Path]
+        List of files that errored during processing.
+    \*\*dask_kwargs : Any
+        Keyword arguments to pass to dask.distributed.Client.
+
+    Notes
+    -----
+    If the file is larger than 1 GiB or dask_kwargs are passed, dask.dataframes will be used.
+    Partial function requires the function to accept the following parameters:
+    - file: Path
+    - using_dask: bool
+    - client: dask.distributed.Client
+    """
+    with tempfile.TemporaryDirectory() as temp_folder:
+        if file.suffix in [".gz", ".tar", ".zip", ".7z"]:
+            data_files = generic_extract_archive(file, output_dir=temp_folder)
+        else:
+            data_files = [file]
+        msg = f"Processing file: {file}."
+        logging.info(msg)
+
+        # 1 GiB
+        size_limit = 2**30
+
+        for data in data_files:
+            size = file_size(data)
+            if size > size_limit or dask_kwargs:
+                if dask_kwargs:
+                    logging.info("`dask_kwargs` provided - Using dask.dataframes.")
+                elif size > size_limit:
+                    msg = f"File exceeds {report_file_size(size_limit)} - Using dask.dataframes."
+                    logging.info(msg)
+                client = ProgressBar
+                using_dask = True
+            else:
+                msg = f"File below {report_file_size(size_limit)} - Using pandas.dataframes."
+                logging.info(msg)
+                client = contextlib.nullcontext
+                using_dask = False
+
+            with client(**dask_kwargs) as c:
+                try:
+                    function(data, using_dask=using_dask, client=c)
+                except FileNotFoundError:
+                    errored_files.append(data)
+
+        if Path(temp_folder).iterdir():
+            for temporary_file in Path(temp_folder).glob("*"):
+                if temporary_file in data_files:
+                    temporary_file.unlink()
+
+
+# def convert_flat_files(
+#     source_files: str | os.PathLike,
+#     output_folder: str | os.PathLike | list[str | int],
+#     variables: str | int | list[str | int],
+#     project: str = "eccc-obs",
+#     mode: str = "hourly",
+#     **dask_kwargs,
+# ) -> None:
+#     """
+#
+#     Parameters
+#     ----------
+#     source_files: str or Path
+#     output_folder: str or Path
+#     variables: str or List[str]
+#     project: {"eccc-obs", "eccc-obs-summary", "eccc-homogenized"}
+#     mode: {"hourly", "daily"}
+#
+#     Returns
+#     -------
+#     None
+#     """
+#
+#     if isinstance(variables, (str, int)):
+#         variables = [variables]
+#
+#     for variable_code in variables:
+#         variable_code = str(variable_code).zfill(3)
+#         metadata = load_json_data_mappings("eccc-obs").get(variable_code)
+#
+#
+#
+#         # Loop on the files
+#         logging.info(
+#             f"Collecting files for variable '{metadata['standard_name']}' "
+#             f"(filenames containing '{metadata['_table_name']}')."
+#         )
+#         list_files = list()
+#         if isinstance(source_files, list) or Path(source_files).is_file():
+#             list_files.append(source_files)
+#         else:
+#             glob_patterns = [g for g in metadata["_table_name"]]
+#             for pattern in glob_patterns:
+#                 list_files.extend(
+#                     [f for f in Path(source_files).rglob(f"{pattern}*") if f.is_file()]
+#                 )
+#
+#
+#
+#
+#         manager = mp.Manager()
+#         errored_files = manager.list()
+#         converter_func = partial(
+#             _convert_station_file,
+#             output_path=rep_nc,
+#             errored_files=errored_files,
+#             mode=mode,
+#             variable_code=variable_code,
+#             column_names=column_names,
+#             column_dtypes=column_dtypes,
+#             **metadata,
+#         )
+#         with mp.Pool(processes=n_workers) as pool:
+#             pool.map(converter_func, list_files)
+#             pool.close()
+#             pool.join()
+#
+#
diff --git a/src/miranda/convert/ecmwf.py b/src/miranda/preprocess/ecmwf_tigge.py
similarity index 100%
rename from src/miranda/convert/ecmwf.py
rename to src/miranda/preprocess/ecmwf_tigge.py
diff --git a/src/miranda/structure/_structure.py b/src/miranda/structure/_structure.py
index ab4a8302..d335e438 100644
--- a/src/miranda/structure/_structure.py
+++ b/src/miranda/structure/_structure.py
@@ -305,7 +305,9 @@ def build_path_from_schema(
     Path or None
     """
     if schema is None:
-        schema = Path(__file__).parent.joinpath("data").joinpath("ouranos_schema.yml")
+        schema = (
+            Path(__file__).parent.joinpath("configs").joinpath("ouranos_schema.yml")
+        )
 
     tree = parse_schema(facets, schema, top_folder)
     branch = tree[0]
diff --git a/src/miranda/treatments/__init__.py b/src/miranda/treatments/__init__.py
new file mode 100644
index 00000000..4b57c5d4
--- /dev/null
+++ b/src/miranda/treatments/__init__.py
@@ -0,0 +1,115 @@
+"""Treatments module."""
+
+from __future__ import annotations
+
+import datetime
+import logging.config
+
+import xarray
+
+from miranda import __version__ as __miranda_version__
+from miranda.scripting import LOGGING_CONFIG
+from miranda.treatments._dimensions import *
+from miranda.treatments._preprocessing import *
+from miranda.treatments._variables import *
+from miranda.treatments.utils import *
+from miranda.units import get_time_frequency
+
+logging.config.dictConfig(LOGGING_CONFIG)
+VERSION = datetime.datetime.now().strftime("%Y.%m.%d")
+
+
+def metadata_conversion(d: xarray.Dataset, p: str, m: dict) -> xarray.Dataset:
+    """Update xarray dataset and data_vars with project-specific metadata fields.
+
+    Parameters
+    ----------
+    d : xarray.Dataset
+        Dataset with metadata to be updated.
+    p : str
+        Dataset project name.
+    m : dict
+        Metadata definition dictionary for project and variable(s).
+
+    Returns
+    -------
+    xarray.Dataset
+    """
+    logging.info("Converting metadata to CF-like conventions.")
+
+    header = m["Header"]
+
+    # Static handling of version global attributes
+    miranda_version = header.get("_miranda_version")
+    if miranda_version:
+        if isinstance(miranda_version, bool):
+            header["miranda_version"] = __miranda_version__
+        elif isinstance(miranda_version, dict):
+            if p in miranda_version.keys():
+                header["miranda_version"] = __miranda_version__
+        else:
+            msg = f"`_miranda_version` not set for project `{p}`. Not appending."
+            logging.warning(msg)
+    if "_miranda_version" in header:
+        del header["_miranda_version"]
+
+    frequency = m["Header"].get("_frequency")
+    if frequency:
+        if isinstance(frequency, bool):
+            _, m["Header"]["frequency"] = get_time_frequency(d)
+        elif isinstance(frequency, dict):
+            if p in frequency.keys():
+                m["Header"]["frequency"] = get_time_frequency(d)
+        else:
+            logging.warning("`frequency` not set for project. Not appending.")
+    if "_frequency" in m["Header"]:
+        del m["Header"]["_frequency"]
+
+    # Conditional handling of global attributes based on project name
+    for field in [f for f in header.keys() if f.startswith("_")]:
+        if isinstance(header[field], list):
+            if p in header[field]:
+                attr_treatments = header[field][p]
+            else:
+                msg = f"Attribute handling (`{field}`) not set for project `{p}`. Continuing..."
+                logging.warning(msg)
+                continue
+        elif isinstance(header[field], dict):
+            attr_treatments = header[field]
+        else:
+            msg = f"Attribute treatment configuration for field `{field}` is not properly configured. Verify JSON."
+            raise AttributeError(msg)
+
+        if field[1:] in d.attrs:
+            msg = f"Overwriting `{field[1:]}` based on JSON configuration."
+            logging.warning(msg)
+        if field == "_map_attrs":
+            for attribute, mapping in attr_treatments.items():
+                header[mapping] = d.attrs[attribute]
+                del d.attrs[attribute]
+        elif field == "_remove_attrs":
+            for ff in attr_treatments:
+                del d.attrs[ff]
+        elif field.startswith("_") and p in attr_treatments:
+            header[field[1:]] = attr_treatments[p]
+        else:
+            header[field[1:]] = attr_treatments
+        del header[field]
+
+    # Add global attributes
+    d.attrs.update(header)
+    d.attrs.update(dict(project=p))
+
+    # Date-based versioning
+    if not d.attrs.get("version"):
+        d.attrs.update(dict(version=f"v{VERSION}"))
+
+    prev_history = d.attrs.get("history", "")
+    history = (
+        f"[{datetime.datetime.now()}] "
+        "Converted variables and modified metadata for CF-like compliance: "
+        f"{prev_history}".strip()
+    )
+    d.attrs.update(dict(history=history))
+
+    return d
diff --git a/src/miranda/treatments/_dimensions.py b/src/miranda/treatments/_dimensions.py
new file mode 100644
index 00000000..e5185507
--- /dev/null
+++ b/src/miranda/treatments/_dimensions.py
@@ -0,0 +1,249 @@
+from __future__ import annotations
+
+import logging
+import warnings
+from typing import Any
+
+import numpy as np
+import xarray as xr
+from xclim.core.calendar import parse_offset
+
+from miranda.treatments.utils import _get_section_entry_key, _iter_entry_key  # noqa
+from miranda.units import get_time_frequency
+
+
+def find_project_variable_codes(code: str, configuration: dict[str, Any]) -> str:
+    """Find the variable code for a given variable name and project.
+
+    Parameters
+    ----------
+    code : str
+        Variable name.
+    configuration : dict
+        Configuration dictionary.
+
+    Returns
+    -------
+    str
+    """
+    variable_codes = {}
+
+    if "variables" not in configuration:
+        raise ValueError("No `variables` section found in configuration. Check JSON.")
+
+    for variable_code in configuration["variables"]:
+        variable_name = configuration["variables"][variable_code].get("_variable_name")
+        if variable_name:
+            variable_codes[variable_name] = variable_code
+        else:
+            warnings.warn(
+                f"Variable `{variable_code}` does not have accompanying `variable_name`. "
+                f"Verify JSON. Continuing with `{variable_code}` as `variable_name`."
+            )
+            variable_codes[variable_code] = variable_code
+
+    if code in variable_codes.values():
+        variable = code
+    else:
+        variable = variable_codes.get(code)
+    if not variable:
+        raise NotImplementedError(f"Variable `{code}` not supported.")
+
+    return variable
+
+
+def dimensions_compliance(ds: xr.Dataset, project: str, metadata: dict) -> xr.Dataset:
+    """Rename dimensions to CF to their equivalents and reorder them if needed.
+
+    Parameters
+    ----------
+    ds : xarray.Dataset
+        Dataset with dimensions to be updated.
+    project : str
+        Dataset project name.
+    metadata : dict
+        Metadata definition dictionary for project and variable(s).
+
+    Returns
+    -------
+    xarray.Dataset
+    """
+    rename_dims = dict()
+    for dim in ds.dims:
+        if dim in metadata["dimensions"].keys():
+            cf_name = _get_section_entry_key(
+                metadata, "dimensions", dim, "_cf_dimension_name", project
+            )
+            if cf_name:
+                rename_dims[dim] = cf_name
+
+    # Rename dimensions
+    _rename_dims = [str(d) for d in rename_dims.keys()]
+    msg = f"Renaming dimensions: {', '.join(_rename_dims)}."
+    logging.info(msg)
+    ds = ds.rename(rename_dims)
+    for new in ["lon", "lat"]:
+        if new == "lon" and "lon" in ds.coords:
+            if np.any(ds.lon > 180):
+                lon1 = ds.lon.where(ds.lon <= 180.0, ds.lon - 360.0)
+                ds[new] = lon1
+
+        coord_precision = _get_section_entry_key(
+            metadata, "dimensions", new, "_precision", project
+        )
+        if coord_precision is not None:
+            ds[new] = ds[new].round(coord_precision)
+
+    # Ensure that lon and lat are written in proper order for plotting purposes
+    logging.info("Reordering dimensions.")
+    transpose_order = []
+    if "lat" in ds.dims and "lon" in ds.dims:
+        transpose_order = ["lat", "lon"]
+    elif "rlat" in ds.dims and "rlon" in ds.dims:
+        transpose_order = ["rlat", "rlon"]
+    if "time" in ds.dims and transpose_order:
+        transpose_order.insert(0, "time")
+        transpose_order.extend(list(set(ds.dims) - set(transpose_order)))
+    ds = ds.transpose(*transpose_order)
+    ds = ds.sortby(transpose_order)
+
+    # Add dimension original name and update attrs
+    logging.info("Updating dimension attributes.")
+    dim_descriptions = metadata["dimensions"]
+    for dim in metadata["dimensions"].keys():
+        cf_name = dim_descriptions[dim].get("_cf_dimension_name")
+        if cf_name is not None and cf_name in ds.dims:
+            ds[cf_name].attrs.update(dict(original_variable=dim))
+        else:
+            # variable name already follows CF standards
+            cf_name = dim
+        for field in dim_descriptions[dim].keys():
+            if not field.startswith("_"):
+                ds[cf_name].attrs.update({field: dim_descriptions[dim][field]})
+
+    prev_history = ds.attrs.get("history", "")
+    history = f"Transposed and renamed dimensions. {prev_history}"
+    ds.attrs.update(dict(history=history))
+
+    return ds
+
+
+def ensure_correct_time_frequency(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
+    """Ensure that time frequency is consistent with expected frequency for project."""
+    key = "_ensure_correct_time"
+    strict_time = "_strict_time"
+
+    if "time" not in m["dimensions"].keys():
+        msg = f"No time corrections listed for project `{p}`. Continuing..."
+        warnings.warn(msg)
+        return d
+
+    if "time" not in list(d.variables.keys()):
+        msg = (
+            "No time dimension among data variables: "
+            f"{' ,'.join([str(v) for v in d.variables.keys()])}. "
+            "Continuing..."
+        )
+        logging.info(msg)
+        return d
+
+    if key in m["dimensions"]["time"].keys():
+        freq_found = xr.infer_freq(d.time)
+        if strict_time in m["dimensions"]["time"].keys():
+            if not freq_found:
+                msg = (
+                    "Time frequency could not be found. There may be missing timesteps."
+                )
+                if m["dimensions"]["time"].get(strict_time):
+                    raise ValueError(msg)
+                else:
+                    warnings.warn(f"{msg} Continuing...")
+                    return d
+
+        correct_time_entry = m["dimensions"]["time"][key]
+        if isinstance(correct_time_entry, str):
+            correct_times = [parse_offset(correct_time_entry)[1]]
+        elif isinstance(correct_time_entry, dict):
+            correct_times = correct_time_entry.get(p)
+            if isinstance(correct_times, list):
+                correct_times = [parse_offset(t)[1] for t in correct_times]
+            if correct_times is None:
+                warnings.warn(f"No expected times set for specified project `{p}`.")
+        elif isinstance(correct_time_entry, list):
+            correct_times = correct_time_entry
+        else:
+            warnings.warn("No expected times set for family of projects.")
+            return d
+
+        if freq_found not in correct_times:
+            error_msg = (
+                f"Time frequency {freq_found} not among allowed frequencies: "
+                f"{', '.join(correct_times) if isinstance(correct_times, list) else correct_times}"
+            )
+            if isinstance(correct_time_entry, dict):
+                error_msg = f"{error_msg} for project `{p}`."
+            else:
+                error_msg = f"{error_msg}."
+            raise ValueError(error_msg)
+
+        msg = f"Resampling dataset with time frequency: {freq_found}."
+        logging.info(msg)
+        with xr.set_options(keep_attrs=True):
+            d_out = d.assign_coords(
+                time=d.time.resample(time=freq_found).mean(dim="time").time
+            )
+            d_out.time.attrs.update(d.time.attrs)
+
+        prev_history = d.attrs.get("history", "")
+        history = f"Resampled time with `freq={freq_found}`. {prev_history}"
+        d_out.attrs.update(dict(history=history))
+        return d_out
+
+    return d
+
+
+def offset_time_dimension(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
+    """Offset time dimension using listed frequency."""
+    key = "_offset_time"
+    d_out = xr.Dataset(coords=d.coords, attrs=d.attrs)
+    converted = []
+    offset, offset_meaning = None, None
+
+    time_freq = dict()
+    expected_period = _get_section_entry_key(
+        m, "dimensions", "time", "_ensure_correct_time", p
+    )
+    if isinstance(expected_period, str):
+        time_freq["expected_period"] = expected_period
+
+    for vv, offs in _iter_entry_key(d, m, "dimensions", key, p):
+        if offs:
+            # Offset time by value of one time-step
+            if offset is None and offset_meaning is None:
+                try:
+                    offset, offset_meaning = get_time_frequency(d, **time_freq)
+                except TypeError:
+                    msg = "Unable to parse the time frequency. Verify data integrity before retrying."
+                    logging.error(msg)
+                    raise
+
+            msg = f"Offsetting data for `{vv}` by `{offset[0]} {offset_meaning}(s)`."
+            logging.info(msg)
+            with xr.set_options(keep_attrs=True):
+                out = d[vv]
+                out["time"] = out.time - np.timedelta64(offset[0], offset[1])
+                d_out[vv] = out
+            converted.append(vv)
+        elif offs is False:
+            msg = f"No time offsetting needed for `{vv}` in `{p}` (Explicitly set to False)."
+            logging.info(msg)
+            continue
+        prev_history = d.attrs.get("history", "")
+        history = f"Offset variable `{vv}` values by `{offset[0]} {offset_meaning}(s). {prev_history}"
+        d_out.attrs.update(dict(history=history))
+
+    # Copy unconverted variables
+    for vv in d.data_vars:
+        if vv not in converted:
+            d_out[vv] = d[vv]
+    return d_out
diff --git a/src/miranda/treatments/_preprocessing.py b/src/miranda/treatments/_preprocessing.py
new file mode 100644
index 00000000..b09cceb6
--- /dev/null
+++ b/src/miranda/treatments/_preprocessing.py
@@ -0,0 +1,111 @@
+from __future__ import annotations
+
+from functools import partial
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import xarray as xr
+
+from miranda.convert.utils import date_parser
+
+
+def correct_time_entries(
+    ds: xr.Dataset,
+    split: str = "_",
+    location: int = -1,
+    field: str = "time",
+) -> xr.Dataset:
+    """Correct time entries in dataset.
+
+    Parameters
+    ----------
+    ds : xarray.Dataset
+    split : str
+    location : int
+    field : str
+
+    Returns
+    -------
+    xarray.Dataset
+    """
+    filename = ds.encoding["source"]
+    date = date_parser(Path(filename).stem.split(split)[location])
+    vals = np.arange(len(ds[field]))
+    days_since = f"days since {date}"
+    time = xr.coding.times.decode_cf_datetime(
+        vals, units=days_since, calendar="standard"
+    )
+    ds = ds.assign_coords({field: time})
+
+    prev_history = ds.attrs.get("history", "")
+    history = (
+        f"Time index recalculated in preprocessing step ({days_since}). {prev_history}"
+    )
+    ds.attrs.update(dict(history=history))
+
+    return ds
+
+
+def correct_var_names(
+    ds: xr.Dataset, split: str = "_", location: int = 0
+) -> xr.Dataset:
+    """Correct variable names in dataset.
+
+    Parameters
+    ----------
+    ds : xarray.Dataset
+    split : str
+    location : int
+
+    Returns
+    -------
+    xarray.Dataset
+    """
+    filename = ds.encoding["source"]
+    new_name = Path(filename).stem.split(split)[location]
+    old_name = list(ds.data_vars.keys())[0]
+
+    prev_history = ds.attrs.get("history", "")
+    history = f"Variable renamed in preprocessing step ({old_name}: {new_name}). {prev_history}"
+    ds.attrs.update(dict(history=history))
+
+    return ds.rename({old_name: new_name})
+
+
+def preprocessing_corrections(
+    ds: xr.Dataset, configuration: dict[str, Any]
+) -> xr.Dataset:
+    """Corrections function dispatcher to ensure minimal dataset validity on open.
+
+    Parameters
+    ----------
+    ds : xarray.Dataset
+    configuration : dict
+
+    Returns
+    -------
+    xarray.Dataset
+    """
+
+    def _preprocess_correct(d: xr.Dataset, *, ops: list[partial]) -> xr.Dataset:
+        for correction in ops:
+            d = correction(d)
+        return d
+
+    correction_fields = configuration.get("_preprocess")
+    if correction_fields:
+        preprocess_ops = []
+        for field in correction_fields:
+            if field == "_variable_name":
+                preprocess_ops.append(
+                    partial(correct_var_names, **correction_fields[field])
+                )
+            if field == "_time":
+                preprocess_ops.append(
+                    partial(correct_time_entries, **correction_fields[field])
+                )
+        if preprocess_ops:
+            corrector = partial(_preprocess_correct, ops=preprocess_ops)
+            return corrector(ds)
+    return ds
diff --git a/src/miranda/treatments/_variables.py b/src/miranda/treatments/_variables.py
new file mode 100644
index 00000000..f5695570
--- /dev/null
+++ b/src/miranda/treatments/_variables.py
@@ -0,0 +1,271 @@
+from __future__ import annotations
+
+import logging.config
+
+import xarray as xr
+import xclim.core.units
+from xclim.core import units
+
+from miranda.treatments.utils import _get_section_entry_key  # noqa
+from miranda.treatments.utils import _iter_entry_key  # noqa
+from miranda.units import get_time_frequency
+
+__all__ = [
+    "cf_units_conversion",
+    "clip_values",
+    "correct_unit_names",
+    "invert_value_sign",
+    "transform_values",
+    "variable_conversion",
+]
+
+
+def correct_unit_names(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
+    """Correct unit names."""
+    key = "_corrected_units"
+    for var, val in _iter_entry_key(d, m, "variables", key, p):
+        if val:
+            d[var].attrs["units"] = val
+            prev_history = d.attrs.get("history", "")
+            history = (
+                f"Corrected units name for variable `{var}` to `{val}`. {prev_history}"
+            )
+            d.attrs.update(dict(history=history))
+
+    return d
+
+
+# for de-accumulation or conversion to flux
+def transform_values(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
+    """Transform dataset values according to operation listed."""
+    key = "_transformation"
+    d_out = xr.Dataset(coords=d.coords, attrs=d.attrs)
+    converted = []
+    offset, offset_meaning = None, None
+
+    time_freq = dict()
+    expected_period = _get_section_entry_key(
+        m, "dimensions", "time", "_ensure_correct_time", p
+    )
+    if isinstance(expected_period, str):
+        time_freq["expected_period"] = expected_period
+
+    for vv, trans in _iter_entry_key(d, m, "variables", key, p):
+        if trans:
+            if trans == "deaccumulate":
+                # Time-step accumulated total to time-based flux (de-accumulation)
+                if offset is None and offset_meaning is None:
+                    try:
+                        offset, offset_meaning = get_time_frequency(d, **time_freq)
+                    except TypeError:
+                        logging.error(
+                            "Unable to parse the time frequency. Verify data integrity before retrying."
+                        )
+                        raise
+
+                msg = f"De-accumulating units for variable `{vv}`."
+                logging.info(msg)
+                with xr.set_options(keep_attrs=True):
+                    out = d[vv].diff(dim="time")
+                    out = d[vv].where(
+                        getattr(d[vv].time.dt, offset_meaning) == offset[0],
+                        out.broadcast_like(d[vv]),
+                    )
+                    out = units.amount2rate(out, out_units=m["variables"][vv]["units"])
+                    d_out[vv] = out
+                converted.append(vv)
+            elif trans == "amount2rate":
+                # NOTE: This treatment is no longer needed in xclim v0.43.0+ but is kept for backwards compatibility
+                # frequency-based totals to time-based flux
+                msg = f"Performing amount-to-rate units conversion for variable `{vv}`."
+                logging.info(msg)
+                with xr.set_options(keep_attrs=True):
+                    out = units.amount2rate(
+                        d[vv],
+                        out_units=m["variables"][vv]["units"],
+                    )
+                    d_out[vv] = out
+                converted.append(vv)
+            elif isinstance(trans, str):
+                if trans.startswith("op "):
+                    op = trans[3]
+                    value = trans[4:].strip()
+                    if value.startswith("attrs"):
+                        value = units.str2pint(d[vv].attrs[value[6:]])
+                    else:
+                        value = units.str2pint(value)
+                    with xr.set_options(keep_attrs=True):
+                        if op == "+":
+                            value = units.convert_units_to(value, d[vv])
+                            d_out[vv] = d[vv] + value
+                        elif op == "-":
+                            value = units.convert_units_to(value, d[vv])
+                            d_out[vv] = d[vv] - value
+                        elif op == "*":
+                            d_out[vv] = units.pint_multiply(d[vv], value)
+                        elif op == "/":
+                            d_out[vv] = units.pint_multiply(d[vv], 1 / value)
+                        else:
+                            raise NotImplementedError(
+                                f"Op transform doesn't implement the «{op}» operator."
+                            )
+                converted.append(vv)
+            else:
+                raise NotImplementedError(f"Unknown transformation: {trans}")
+        elif trans is False:
+            msg = f"No transformations needed for `{vv}` (Explicitly set to False)."
+            logging.info(msg)
+            continue
+
+        prev_history = d.attrs.get("history", "")
+        history = (
+            f"Transformed variable `{vv}` values using method `{trans}`. {prev_history}"
+        )
+        d_out.attrs.update(dict(history=history))
+
+    # Copy unconverted variables
+    for vv in d.data_vars:
+        if vv not in converted:
+            d_out[vv] = d[vv]
+    return d_out
+
+
+def invert_value_sign(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
+    """Flip value of DataArray."""
+    key = "_invert_sign"
+    d_out = xr.Dataset(coords=d.coords, attrs=d.attrs)
+    converted = []
+    for vv, inv_sign in _iter_entry_key(d, m, "variables", key, p):
+        if inv_sign:
+            msg = f"Inverting sign for `{vv}` (switching direction of values)."
+            logging.info(msg)
+            with xr.set_options(keep_attrs=True):
+                out = d[vv]
+                d_out[out.name] = -out
+            converted.append(vv)
+        elif inv_sign is False:
+            msg = f"No sign inversion needed for `{vv}` in `{p}` (Explicitly set to False)."
+            logging.info(msg)
+            continue
+        prev_history = d.attrs.get("history", "")
+        history = f"Inverted sign for variable `{vv}` (switched direction of values). {prev_history}"
+        d_out.attrs.update(dict(history=history))
+
+    # Copy unconverted variables
+    for vv in d.data_vars:
+        if vv not in converted:
+            d_out[vv] = d[vv]
+    return d_out
+
+
+# For converting variable units to standard workflow units
+def cf_units_conversion(d: xr.Dataset, m: dict) -> xr.Dataset:
+    """Perform pint-based units-conversion."""
+    if "time" in m["dimensions"].keys():
+        if m["dimensions"]["time"].get("units"):
+            d["time"]["units"] = m["dimensions"]["time"]["units"]
+
+    for vv, unit in _iter_entry_key(d, m, "variables", "units", None):
+        if unit:
+            with xr.set_options(keep_attrs=True):
+                d[vv] = units.convert_units_to(d[vv], unit, context="hydro")
+            prev_history = d.attrs.get("history", "")
+            history = f"Converted variable `{vv}` to CF-compliant units (`{unit}`). {prev_history}"
+            d.attrs.update(dict(history=history))
+
+    return d
+
+
+# For clipping variable values to an established maximum/minimum
+def clip_values(d: xr.Dataset, p: str, m: dict) -> xr.Dataset:
+    """Clip values to an appropriate range,."""
+    key = "_clip_values"
+    d_out = xr.Dataset(coords=d.coords, attrs=d.attrs)
+    converted = []
+    for vv in d.data_vars:
+        if vv in m["variables"].keys():
+            clip_vals = _get_section_entry_key(m, "variables", vv, key, p)
+            if clip_values:
+                min_value, max_value = None, None
+                # Gather unit conversion context, if applicable
+                context = clip_vals.get("context", None)
+                for op, value in clip_vals.items():
+                    if op == "min":
+                        min_value = xclim.core.units.convert_units_to(
+                            value, d[vv], context
+                        )
+                    if op == "max":
+                        max_value = xclim.core.units.convert_units_to(
+                            value, d[vv], context
+                        )
+                msg = f"Clipping min/max values for `{vv}` ({min_value}/{max_value})."
+                logging.info(msg)
+                with xr.set_options(keep_attrs=True):
+                    out = d[vv]
+                    d_out[out.name] = out.clip(min_value, max_value)
+                converted.append(vv)
+            elif clip_values is False:
+                msg = f"No clipping of values needed for `{vv}` in `{p}` (Explicitly set to False)."
+                logging.info(msg)
+                continue
+            else:
+                msg = f"Unknown clipping values for `{vv}` in `{p}`."
+                logging.info(msg)
+                continue
+
+            prev_history = d.attrs.get("history", "")
+            history = f"Clipped variable `{vv}` with `min={min_value}` and `max={max_value}`. {prev_history}"
+            d_out.attrs.update(dict(history=history))
+
+    # Copy unconverted variables
+    for vv in d.data_vars:
+        if vv not in converted:
+            d_out[vv] = d[vv]
+
+    return d_out
+
+
+# For renaming and reordering lat and lon dims
+
+
+def variable_conversion(d: xr.Dataset, p: str | None, m: dict) -> xr.Dataset:
+    """Add variable metadata and remove nonstandard entries.
+
+    Parameters
+    ----------
+    d : xarray.Dataset
+        Dataset with variable(s) to be updated.
+    p : str
+        Dataset project name.
+    m : dict
+        Metadata definition dictionary for project and variable(s).
+
+    Returns
+    -------
+    xarray.Dataset
+    """
+    var_descriptions = m["variables"]
+    var_correction_fields = [
+        "_clip_values",
+        "_corrected_units",
+        "_invert_sign",
+        "_offset_time",
+        "_transformation",
+    ]
+    for var in d.variables:
+        if var in var_descriptions.keys():
+            for field in var_correction_fields:
+                if field in var_descriptions[var].keys():
+                    del var_descriptions[var][field]
+            d[var].attrs.update(var_descriptions[var])
+
+    # Rename data variables
+    for orig_var_name, cf_name in _iter_entry_key(
+        d, m, "variables", "_cf_variable_name", p
+    ):
+        if cf_name is not None:
+            d = d.rename({orig_var_name: cf_name})
+            d[cf_name].attrs.update(dict(original_variable=orig_var_name))
+            del d[cf_name].attrs["_cf_variable_name"]
+
+    return d
diff --git a/src/miranda/treatments/utils.py b/src/miranda/treatments/utils.py
new file mode 100644
index 00000000..4ee19470
--- /dev/null
+++ b/src/miranda/treatments/utils.py
@@ -0,0 +1,114 @@
+"""Utility functions for GIS operations."""
+
+from __future__ import annotations
+
+import inspect
+import json
+from pathlib import Path
+from typing import Any
+
+__all__ = [
+    "load_json_data_mappings",
+]
+
+
+def _get_section_entry_key(
+    meta: dict, entry: str, var: str, key: str, project: str
+) -> Any:
+    """
+    Get a specific key from a section of the metadata.
+
+    Parameters
+    ----------
+    meta : dict
+        The metadata dictionary.
+    entry : str
+        The entry to look for.
+    var : str
+        The variable to look for.
+    key : str
+        The key to look for.
+    project : str
+        The project name.
+
+    Returns
+    -------
+    Any
+        The value of the key.
+    """
+    var_meta = meta[entry].get(var, {})
+    if key in var_meta:
+        if isinstance(var_meta[key], dict):
+            config = var_meta[key].get(project)
+            if config is None and "all" in var_meta[key].keys():
+                config = var_meta[key].get("all")
+            return config
+        return var_meta[key]
+    return None
+
+
+def _iter_entry_key(ds, meta, entry, key, project) -> tuple[str, Any]:
+    """
+    Iterate through entry keys.
+
+    Parameters
+    ----------
+    ds : xr.Dataset
+        The dataset.
+    meta : dict
+        The metadata dictionary.
+    entry : str
+        The entry to look for.
+    key : str
+        The key to look for.
+    project : str
+        The project name.
+
+    Yields
+    ------
+    tuple[str, Any]
+        The variable and value.
+    """
+    for vv in set(ds.data_vars).intersection(meta[entry]):
+        val = _get_section_entry_key(meta, entry, vv, key, project)
+        yield vv, val
+
+
+def load_json_data_mappings(
+    project: str, configurations: dict[str, Path] | None = None
+) -> dict[str, Any]:
+    """
+    Load JSON mappings for supported dataset conversions.
+
+    Parameters
+    ----------
+    project : str
+        The project name.
+    configurations : dict, optional
+        Configuration files for the project.
+        If not provided, the function will try to find the configuration files in the `configs` folder.
+
+    Returns
+    -------
+    dict[str, Any]
+        The metadata definition.
+    """
+    if configurations is None:
+        calling_frame = inspect.currentframe().f_back
+        calling_file_path = calling_frame.f_globals["__file__"]
+        config_folder = Path(calling_file_path).parent / "configs"
+
+        configurations = {}
+        for configuration in config_folder.glob("*attrs.json"):
+            project_config = str(configuration.stem).split("_")[0]
+            if "|" in project:
+                for p in project_config.split("|"):
+                    configurations[p] = configuration
+            configurations[project_config] = configuration
+
+    if project in configurations.keys():
+        config_file = configurations[project]
+        metadata_definition = json.load(config_file.open())
+        return metadata_definition
+    else:
+        raise NotImplementedError(f"Project not supported: {project}")
diff --git a/src/miranda/validators.py b/src/miranda/validators.py
index f14d222f..d01e78fe 100644
--- a/src/miranda/validators.py
+++ b/src/miranda/validators.py
@@ -9,12 +9,12 @@
 from pandas._libs.tslibs import NaTType  # noqa
 from schema import Literal, Optional, Or, Regex, Schema
 
-from .cv import VALIDATION_ENABLED
+from miranda.cv import VALIDATION_ENABLED
 
 __all__ = ["url_validate"]
 
 if VALIDATION_ENABLED:
-    from .cv import (
+    from miranda.cv import (
         ACTIVITIES,
         BIAS_ADJUST_INSTITUTIONS,
         DRIVING_MODELS,
diff --git a/src/miranda/vocabularies/__init__.py b/src/miranda/vocabularies/__init__.py
new file mode 100644
index 00000000..8d108418
--- /dev/null
+++ b/src/miranda/vocabularies/__init__.py
@@ -0,0 +1,5 @@
+"""Controlled Vocabulary module."""
+
+from __future__ import annotations
+
+from . import eccc
diff --git a/src/miranda/vocabularies/eccc.py b/src/miranda/vocabularies/eccc.py
new file mode 100644
index 00000000..f668ec63
--- /dev/null
+++ b/src/miranda/vocabularies/eccc.py
@@ -0,0 +1,95 @@
+"""Definition lists of variables from ECCC for each type of archive."""
+
+# For more information see the ECCC Technical Documentation
+
+__all__ = [
+    "obs_groupings",
+    "obs_vocabularies",
+]
+
+obs_vocabularies = dict()
+
+# Hourly Data
+
+obs_vocabularies["HLY01"] = []
+obs_vocabularies["HLY01"].extend(list(range(71, 123)))  # Hourly variables
+obs_vocabularies["HLY01"].extend([209, 210])  # Wind character and gust speed
+obs_vocabularies["HLY01"].extend(list(range(219, 231)))  # Cloud layers
+obs_vocabularies["HLY01"].append(244)  # Precipitation type
+obs_vocabularies["HLY01"].append(260)  # Freezing fog
+
+obs_vocabularies["HLY01_RCS"] = obs_vocabularies["HLY01"].copy()
+obs_vocabularies["HLY01_RCS"].extend(
+    list(range(262, 281))
+)  # Reference Climate Surface (RCS) weather stations
+
+obs_vocabularies["HLY03"] = []
+obs_vocabularies["HLY03"].extend(list(range(123, 133)))  # Hourly rainfall
+obs_vocabularies["HLY03"].extend([160, 161])
+
+obs_vocabularies["HLY10"] = []
+obs_vocabularies["HLY10"].extend(list(range(61, 69)))  # Sunshine
+obs_vocabularies["HLY10"].extend([133, 169, 170, 171, 172])  # Solar radiation
+
+obs_vocabularies["HLY15"] = [69, 70, 76, 156]  # Wind
+
+obs_vocabularies["HLY21"] = [123]  # Fischer/Porter precipitation
+
+# Daily Data
+
+obs_vocabularies["DLY02"] = []
+obs_vocabularies["DLY02"].extend(list(range(1, 26)))  # Daily variables
+obs_vocabularies["DLY02"].append(157)  # Direction of extreme gust
+obs_vocabularies["DLY02"].append(179)  # Daily bright sunshine
+
+obs_vocabularies["DLY03"] = []
+obs_vocabularies["DLY03"].extend(list(range(124, 133)))
+obs_vocabularies["DLY03"].extend([160, 161])
+
+obs_vocabularies["DLY04"] = obs_vocabularies["DLY02"].copy()
+
+obs_vocabularies["DLY12"] = []
+obs_vocabularies["DLY12"].extend(list(range(134, 151)))  # Soil temperatures
+
+obs_vocabularies["DLY13"] = list(range(151, 156))  # Pan evaporation
+
+obs_vocabularies["DLY21"] = [12]  # Precipitation
+obs_vocabularies["DLY21"].extend(list(range(127, 133)))  # Precipitation over time
+obs_vocabularies["DLY21"].append(161)  # Most precipitation in 25 hours
+
+obs_vocabularies["DLY44"] = []
+obs_vocabularies["DLY44"].extend([1, 2, 3])  # Temperature
+obs_vocabularies["DLY44"].extend(list(range(10, 18)))  # Precipitation
+
+# Monthly data
+
+obs_vocabularies["MLY04"] = []
+obs_vocabularies["MLY04"].extend(list(range(26, 39)))  # Days with variables
+obs_vocabularies["MLY04"].extend(list(range(39, 61)))  # Means of variables
+obs_vocabularies["MLY04"].append(158)  # Direction of extreme gust
+
+# Groupings
+
+obs_groupings = dict()
+obs_groupings["HLY"] = list(
+    set(
+        obs_vocabularies["HLY01"]
+        + obs_vocabularies["HLY01_RCS"]
+        + obs_vocabularies["HLY03"]
+        + obs_vocabularies["HLY10"]
+        + obs_vocabularies["HLY15"]
+        + obs_vocabularies["HLY21"]
+    )
+)
+obs_groupings["DLY"] = list(
+    set(
+        obs_vocabularies["DLY02"]
+        + obs_vocabularies["DLY03"]
+        + obs_vocabularies["DLY04"]
+        + obs_vocabularies["DLY12"]
+        + obs_vocabularies["DLY13"]
+        + obs_vocabularies["DLY21"]
+        + obs_vocabularies["DLY44"]
+    )
+)
+obs_groupings["MLY"] = list(set(obs_vocabularies["MLY04"]))
diff --git a/templates/eccc-ahccd_preprocess.py b/templates/eccc-ahccd_preprocess.py
new file mode 100644
index 00000000..27a88072
--- /dev/null
+++ b/templates/eccc-ahccd_preprocess.py
@@ -0,0 +1,10 @@
+from pathlib import Path
+
+from miranda.preprocess import convert_ahccd, merge_ahccd
+
+in_files = Path("~/Desktop/ec_data/ahccd").expanduser()
+output = Path().cwd().parent / "test"
+variable = "tas"
+
+convert_ahccd(in_files, output, variable, generation=3)
+merge_ahccd(output.joinpath("tas"), output.joinpath("merged"), variable, overwrite=True)
diff --git a/templates/eccc-obs_preprocess.py b/templates/eccc-obs_preprocess.py
new file mode 100644
index 00000000..e69de29b
diff --git a/templates/eccc_ahccd_conversion.py b/templates/eccc_ahccd_conversion.py
deleted file mode 100644
index e29dd643..00000000
--- a/templates/eccc_ahccd_conversion.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from os import getenv
-from pathlib import Path
-
-from miranda.eccc import convert_ahccd
-
-if __name__ == "__main__":
-    in_files = getenv("in")
-    out_files = getenv("out")
-
-    source_files = Path(in_files)
-    output_path = Path(out_files)
-
-    source_var_gens = {
-        "Generation3/Homog_daily_mean_temp_v2019/": ("tas", 3),
-        "Generation3/Homog_daily_max_temp_v2019/": ("tasmax", 3),
-        "Generation3/Homog_daily_min_temp_v2019/": ("tasmin", 3),
-        "Generation2/Adj_Daily_Total_v2017/": ("pr", 2),
-        "Generation2/Adj_Daily_Snow_v2017/": ("prsn", 2),
-        "Generation2/Adj_Daily_Rain_v2017/": ("prlp", 2),
-    }
-
-    for folder, (variable, generation) in source_var_gens.items():
-        convert_ahccd(
-            source_files.expanduser().joinpath(folder),
-            output_path,
-            variable,
-            generation,
-        )
diff --git a/templates/eccc_raw_daily_conversion.py b/templates/eccc_raw_daily_conversion.py
index 4fb64de7..583c8d10 100644
--- a/templates/eccc_raw_daily_conversion.py
+++ b/templates/eccc_raw_daily_conversion.py
@@ -11,25 +11,25 @@
     time_step = "daily"
     n_workers = 3
     var_codes = [
-        1,
-        2,
-        3,
-        10,
-        11,
-        12,
-        13,
-        14,
-        15,
-        16,
-        17,
-        18,
-        19,
-        20,
-        21,
-        22,
+        # 1,
+        # 2,
+        # 3,
+        # 10,
+        # 11,
+        # 12,
+        # 13,
+        # 14,
+        # 15,
+        # 16,
+        # 17,
+        # 18,
+        # 19,
+        # 20,
+        # 21,
+        # 22,
         23,
         24,
-        25,
+        # 25,
     ]
 
     in_files = getenv("in")
diff --git a/templates/eccc_raw_hourly_conversion.py b/templates/eccc_raw_hourly_conversion.py
index 68a24405..303d3d10 100644
--- a/templates/eccc_raw_hourly_conversion.py
+++ b/templates/eccc_raw_hourly_conversion.py
@@ -1,8 +1,8 @@
 from os import getenv
 from pathlib import Path
 
-from miranda.eccc import (
-    aggregate_stations,
+from miranda.preprocess._eccc_obs import (
+    merge_stations,
     convert_flat_files,
     merge_converted_variables,
 )
@@ -11,39 +11,41 @@
     time_step = "hourly"
     n_workers = 3
     var_codes = [
-        76,
-        77,
-        78,
-        79,
-        80,
-        89,
-        94,
-        107,
-        108,
-        109,
-        110,
-        123,
-        133,
-        156,
-        262,
-        263,
-        264,
-        265,
-        266,
-        267,
-        268,
-        269,
-        270,
-        271,
-        272,
-        273,
-        274,
-        275,
-        276,
-        277,
-        278,
-        279,
-        280,
+        209,
+        210,
+        # 76,
+        # 77,
+        # 78,
+        # 79,
+        # 80,
+        # 89,
+        # 94,
+        # 107,
+        # 108,
+        # 109,
+        # 110,
+        # 123,
+        # 133,
+        # 156,
+        # 262,
+        # 263,
+        # 264,
+        # 265,
+        # 266,
+        # 267,
+        # 268,
+        # 269,
+        # 270,
+        # 271,
+        # 272,
+        # 273,
+        # 274,
+        # 275,
+        # 276,
+        # 277,
+        # 278,
+        # 279,
+        # 280,
     ]
 
     in_files = getenv("in")
@@ -74,7 +76,7 @@
         n_workers=n_workers,
     )
 
-    aggregate_stations(
+    merge_stations(
         source_files=merged,
         output_folder=final,
         time_step=time_step,
diff --git a/templates/eccc_rdrs_processing.py b/templates/eccc_rdrs_processing.py
index 1d68da11..deecc494 100644
--- a/templates/eccc_rdrs_processing.py
+++ b/templates/eccc_rdrs_processing.py
@@ -1,7 +1,7 @@
 import logging
 from pathlib import Path
 
-from miranda.convert.eccc_rdrs import convert_rdrs, rdrs_to_daily
+from miranda.preprocess.eccc_rdrs import convert_rdrs, rdrs_to_daily
 from miranda.io import concat_rechunk_zarr
 
 
diff --git a/templates/emdna_processing.py b/templates/emdna_processing.py
index 194526fc..3ee9eb07 100644
--- a/templates/emdna_processing.py
+++ b/templates/emdna_processing.py
@@ -4,6 +4,7 @@
 
 from dask.diagnostics import ProgressBar
 
+import miranda.convert.corrections
 from miranda import convert, io, structure
 
 
@@ -23,7 +24,7 @@ def main():
     files_by_member = convert.gather_emdna(path)
     for member, files in files_by_member.items():
         if member == "OI":
-            ds = convert.dataset_conversion(
+            ds = miranda.convert.corrections.dataset_conversion(
                 files, project="EMDNA", preprocess=preprocess_dna
             )
 
diff --git a/templates/era5-land_reanalysis_processing.py b/templates/era5-land_reanalysis_processing.py
index c58aa430..3fc27945 100644
--- a/templates/era5-land_reanalysis_processing.py
+++ b/templates/era5-land_reanalysis_processing.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 
+import miranda.convert.corrections
 from miranda import convert, io
 
 
@@ -7,7 +8,7 @@ def main():
     path_era5_land_out = Path("~/Desktop").expanduser()
     era5_land_files = convert.gather_ecmwf("era5-land", path_era5_land_out)
 
-    ds = convert.dataset_conversion(
+    ds = miranda.convert.corrections.dataset_conversion(
         era5_land_files,
         project="era5-land-monthly-means",
     )
diff --git a/templates/espo-g6.py b/templates/espo-g6.py
index ac43168f..ea9a3e4d 100644
--- a/templates/espo-g6.py
+++ b/templates/espo-g6.py
@@ -4,6 +4,7 @@
 
 from dask.diagnostics import ProgressBar
 
+import miranda.convert.corrections
 from miranda import convert, io, structure
 from miranda.decode import Decoder
 
@@ -42,7 +43,7 @@ def main():
                 )
                 if not os.path.exists(new_path):  # and path not in skip:
                     # open as dataset
-                    ds = convert.dataset_conversion(
+                    ds = miranda.convert.corrections.dataset_conversion(
                         [f],
                         add_version_hashes=False,
                         project=project,
diff --git a/templates/nasa_nex-gddp-cmip6_processing.py b/templates/nasa_nex-gddp-cmip6_processing.py
index 78f51687..3fb7572b 100644
--- a/templates/nasa_nex-gddp-cmip6_processing.py
+++ b/templates/nasa_nex-gddp-cmip6_processing.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 
+import miranda.convert.corrections
 from miranda import convert, io
 
 
@@ -10,7 +11,7 @@ def main():
 
     for path, list_files in nex_files.items():
         # open as dataset
-        ds = convert.dataset_conversion(
+        ds = miranda.convert.corrections.dataset_conversion(
             list_files,
             add_version_hashes=False,
             project="NEX-GDDP-CMIP6",
diff --git a/templates/restructure_datasets.py b/templates/restructure_datasets.py
index d10fa8dc..f0d45ee9 100644
--- a/templates/restructure_datasets.py
+++ b/templates/restructure_datasets.py
@@ -17,5 +17,5 @@
         guess=False,
         method="copy",
         make_dirs=True,
-        filename_pattern="*.zarr",
+        suffix="zarr",
     )
diff --git a/tests/test_utils.py b/tests/test_utils.py
index c525f821..c6a529bb 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -4,9 +4,9 @@
 from datetime import date
 from pathlib import Path
 
-import pytest  # noqa
+import pytest
 
-import miranda.eccc._utils as eccc_utils  # noqa
+import miranda.preprocess._metadata as metadata
 import miranda.utils
 
 
@@ -28,12 +28,13 @@ def test_hourly_cf_dictionaries(self):
         codes = list()
         variables = dict()
         for key in keys:
-            variables[key] = eccc_utils.cf_station_metadata(key)
-            codes.append(variables[key]["standard_name"])
-            if variables[key]["standard_name"] == "dry_bulb_temperature":
-                assert variables[key]["raw_units"] == "degC"
-                assert variables[key]["units"] == "K"
-            assert variables[key]["missing_flags"] == "M"
+            variables[key] = metadata.eccc_variable_metadata(key, "eccc-obs")
+            var_name = next(iter(variables[key]["metadata"]))
+            var_metadata = variables[key]["metadata"][var_name]
+            codes.append(var_metadata["standard_name"])
+            if var_metadata["standard_name"] == "dry_bulb_temperature":
+                assert var_metadata["units"] == "degC"
+            assert var_metadata["missing_flags"] == "M"
 
         assert set(codes) == {
             "wind_speed_u2a",
@@ -57,15 +58,17 @@ def test_daily_cf_dictionaries(self):
         codes = list()
         variables = dict()
         for key in keys:
-            variables[key] = eccc_utils.cf_station_metadata(key)
-            codes.append(variables[key]["standard_name"])
-            if variables[key]["standard_name"].startswith("air_temperature"):
-                assert variables[key]["raw_units"] == "degC"
-                assert variables[key]["units"] == "K"
-            elif variables[key]["standard_name"].endswith("precipitation_amount"):
-                assert variables[key]["raw_units"] in ["cm", "mm"]
-                assert variables[key]["units"] == "m"
-            assert variables[key]["missing_flags"] == "M"
+            variables[key] = metadata.eccc_variable_metadata(key, "eccc-obs")
+
+            var_name = next(iter(variables[key]["metadata"]))
+            var_metadata = variables[key]["metadata"][var_name]
+            codes.append(var_metadata["standard_name"])
+
+            if var_name.startswith("air_temperature"):
+                assert var_metadata["units"] == "degC"
+            elif var_name.endswith("precipitation_amount"):
+                assert var_metadata["units"] in ["cm", "mm"]
+            assert var_metadata["missing_flags"] == "M"
 
         assert set(codes) == {
             "air_temperature",
diff --git a/tox.ini b/tox.ini
index 67a06e39..6bac4077 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,12 +1,12 @@
 [tox]
-min_version = 4.18.0
+min_version = 4.23.2
 envlist =
     lint
     py{39,310,311,312,313}
     docs
 requires =
     flit >= 3.9.0,<4.0
-    pip >= 24.2.0
+    pip >= 24.3.1
 opts =
     --verbose
 
@@ -21,12 +21,12 @@ python =
 [testenv:lint]
 skip_install = True
 deps =
-    black ==24.8.0
+    black ==24.10.0
     blackdoc ==0.3.9
     isort ==5.13.2
     flake8 >=7.1.1
     flake8-rst-docstrings >=0.3.0
-    ruff >=0.5.7
+    ruff >=0.8.2
     numpydoc >=1.8.0
 commands_pre =
     pip list
@@ -41,7 +41,8 @@ extras =
 commands =
     mkdir {envtmpdir}/.esdoc
     git clone https://github.com/ES-DOC/pyessv-archive.git {envtmpdir}/.esdoc/pyessv-archive
-    make docs
+    make autodoc
+    make --directory=docs clean html
 allowlist_externals =
     git
     make