From a81af5acaa7053b7239feeb0beabf5d1a63b4b7f Mon Sep 17 00:00:00 2001 From: Veronika Samborska Date: Mon, 10 Mar 2025 09:41:04 +0000 Subject: [PATCH] climate: updated era5 dataset march 2025 --- dag/climate.yml | 4 +- pyproject.toml | 2 +- snapshots/climate/2025-03-07/sst.csv.dvc | 28 ++++++ snapshots/climate/2025-03-07/sst.py | 91 +++++++++++++++++++ .../climate/2025-03-07/surface_temperature.py | 55 +++++++++++ .../2025-03-07/surface_temperature.zip.dvc | 26 ++++++ uv.lock | 2 +- 7 files changed, 204 insertions(+), 4 deletions(-) create mode 100644 snapshots/climate/2025-03-07/sst.csv.dvc create mode 100644 snapshots/climate/2025-03-07/sst.py create mode 100644 snapshots/climate/2025-03-07/surface_temperature.py create mode 100644 snapshots/climate/2025-03-07/surface_temperature.zip.dvc diff --git a/dag/climate.yml b/dag/climate.yml index 3219346c125..7d3f0085cc2 100644 --- a/dag/climate.yml +++ b/dag/climate.yml @@ -32,7 +32,7 @@ steps: # Copernicus Climate Change Service - Surface temperature. # data://meadow/climate/2025-01-07/surface_temperature: - - snapshot://climate/2025-02-07/surface_temperature.zip + - snapshot://climate/2025-03-07/surface_temperature.zip - snapshot://countries/2023-12-27/world_bank.zip data://garden/climate/2025-01-07/surface_temperature: - data://meadow/climate/2025-01-07/surface_temperature @@ -269,7 +269,7 @@ steps: # Equatorial Pacific Sea Surface Temperatures (SST) data - El Niño or La Niña # data://meadow/climate/2025-02-12/sst: - - snapshot://climate/2025-02-12/sst.csv + - snapshot://climate/2025-03-07/sst.csv data://garden/climate/2025-02-12/sst: - data://meadow/climate/2025-02-12/sst data://grapher/climate/2025-02-12/sst: diff --git a/pyproject.toml b/pyproject.toml index dca74e53fd2..940904ad67c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dependencies = [ "geopandas>=0.14.1", "shapely>=2.0.3", "fasteners>=0.19", - "cdsapi>=0.7.0", + "cdsapi>=0.7.4", "rioxarray>=0.15.1", "html2text>=2020.1.16", "pygithub>=2.3.0", diff --git a/snapshots/climate/2025-03-07/sst.csv.dvc b/snapshots/climate/2025-03-07/sst.csv.dvc new file mode 100644 index 00000000000..95300ad5203 --- /dev/null +++ b/snapshots/climate/2025-03-07/sst.csv.dvc @@ -0,0 +1,28 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: Equatorial Pacific Sea Surface Temperatures (SST) data + description: |- + The dataset contains data on El Niño and La Niña, phenomena in the equatorial Pacific Ocean defined by a five consecutive 3-month running mean of sea surface temperature (SST) anomalies in the Niño 3.4 region. El Niño occurs when the anomalies exceed +0.5°C, while La Niña occurs when they fall below -0.5°C. This measure is known as the Oceanic Niño Index (ONI). + date_published: "2025" + + # Citation + producer: NOAA National Centers for Environmental Information + citation_full: |- + National Oceanic and Atmospheric Administration (NOAA) National Centers for Environmental Information (NCEI) - Equatorial Pacific Sea Surface Temperatures (SST) data + + # Files + url_main: https://www.ncei.noaa.gov/access/monitoring/enso/sst + url_download: https://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices + date_accessed: 2025-03-07 + + # License + license: + name: CC BY 4.0 + url: https://gml.noaa.gov/about/disclaimer.html +outs: + - md5: 948aff567ecc30b5a3edeeb3dafe9301 + size: 43446 + path: sst.csv diff --git a/snapshots/climate/2025-03-07/sst.py b/snapshots/climate/2025-03-07/sst.py new file mode 100644 index 00000000000..aa5f3e41da0 --- /dev/null +++ b/snapshots/climate/2025-03-07/sst.py @@ -0,0 +1,91 @@ +"""Script to create a snapshot of dataset.""" + +from io import StringIO +from pathlib import Path + +import click +import pandas as pd +import requests +from owid.datautils.io import df_to_file + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name +BASE_URL_ONI = "https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt" +BASE_URL = "https://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices" + + +def season_to_month(season: str) -> int: + """ + Convert the season string to the corresponding month. + We are mapping to the last month as the La Niña and El Niño events are classified based on the average temperature of the ocean surface in the previous three months. + + Parameters: + season (str): The season string (e.g., "DJF"). + + Returns: + int: The corresponding month (1-12). + """ + season_to_month_map = { + "DJF": 2, # December-January-February -> February + "JFM": 3, # January-February-March -> March + "FMA": 4, # February-March-April -> April + "MAM": 5, # March-April-May -> May + "AMJ": 6, # April-May-June -> June + "MJJ": 7, # May-June-July -> July + "JJA": 8, # June-July-August -> August + "JAS": 9, # July-August-September -> September + "ASO": 10, # August-September-October -> October + "SON": 11, # September-October-November -> November + "OND": 12, # October-November-December -> December + "NDJ": 1, # November-December-January -> January + } + return season_to_month_map[season] + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"climate/{SNAPSHOT_VERSION}/sst.csv") + + dfs = [] + for url in [BASE_URL, BASE_URL_ONI]: + response = requests.get(url) + + data = response.text + + # Skip header lines and read into a DataFrame + data_io = StringIO(data) + df = pd.read_csv(data_io, sep="\s+", skiprows=1, header=None) + if url == BASE_URL_ONI: + columns = ["month", "year", "oni", "oni_anomaly"] + df.columns = columns + df["month"] = df["month"].apply(season_to_month) + # Add 1 to the year if the month is January because of the way the data is structured + df.loc[df["month"] == 1, "year"] += 1 + else: + # Assign column names + columns = [ + "year", + "month", + "nino1_2", + "nino1_2_anomaly", + "nino3", + "nino3_anomaly", + "nino4", + "nino4_anomaly", + "nino3_4", + "nino3_4_anomaly", + ] + df.columns = columns + + dfs.append(df) + df = pd.merge(dfs[0], dfs[1], on=["year", "month"], how="outer") + df_to_file(df, file_path=snap.path) + snap.dvc_add(upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/climate/2025-03-07/surface_temperature.py b/snapshots/climate/2025-03-07/surface_temperature.py new file mode 100644 index 00000000000..2af77a7d32a --- /dev/null +++ b/snapshots/climate/2025-03-07/surface_temperature.py @@ -0,0 +1,55 @@ +"""Script to create a snapshot of the monthly averaged surface temperature data from 1950 to present from the Copernicus Climate Change Service. + +The script assumes that the data is available on the CDS API. +Instructions on how to access the API on a Mac are here: https://confluence.ecmwf.int/display/CKB/How+to+install+and+use+CDS+API+on+macOS + +More information on how to access the data is here: hhttps://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview + +The data is downloaded as a NetCDF file. Tutorials for using the Copernicus API are here and work with the NETCDF format are here: https://ecmwf-projects.github.io/copernicus-training-c3s/cds-tutorial.html +""" + +import tempfile +from pathlib import Path + +# CDS API +import cdsapi +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"climate/{SNAPSHOT_VERSION}/surface_temperature.zip") + + # Save data as a compressed temporary file. + with tempfile.TemporaryDirectory() as temp_dir: + output_file = Path(temp_dir) / "era5_monthly_t2m_eur.nc" + + client = cdsapi.Client() + + dataset = "reanalysis-era5-single-levels-monthly-means" + request = { + "product_type": ["monthly_averaged_reanalysis"], + "variable": ["2m_temperature"], + "year": [str(year) for year in range(1940, 2026)], + "month": ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"], + "time": ["00:00"], + "data_format": "grib", + "download_format": "zip", + "area": [90, -180, -90, 180], + } + + client.retrieve(dataset, request, output_file) + + # Upload snapshot. + snap.create_snapshot(filename=output_file, upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/climate/2025-03-07/surface_temperature.zip.dvc b/snapshots/climate/2025-03-07/surface_temperature.zip.dvc new file mode 100644 index 00000000000..174ca185b1f --- /dev/null +++ b/snapshots/climate/2025-03-07/surface_temperature.zip.dvc @@ -0,0 +1,26 @@ +meta: + origin: + title_snapshot: ERA5 Monthly Averaged Data on Single Levels from 1940 to Present - Monthly Averages of 2m Surface Temperature + title: ERA5 monthly averaged data on single levels from 1940 to present + description: |- + ERA5 is the latest climate reanalysis produced by ECMWF, providing hourly data on many atmospheric, land-surface and sea-state parameters together with estimates of uncertainty. + + ERA5 data are available in the Climate Data Store on regular latitude-longitude grids at 0.25° x 0.25° resolution, with atmospheric parameters on 37 pressure levels. + + ERA5 is available from 1940 and continues to be extended forward in time, with daily updates being made available 5 days behind real time + + Initial release data, i.e., data no more than three months behind real time, are called ERA5T. + producer: Contains modified Copernicus Climate Change Service information + version_producer: 2 + citation_full: |- + Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 monthly averaged data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.f17050d7 (Accessed on 07-March-2025) + url_main: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview + date_accessed: 2025-03-07 + date_published: 2025-03-06 + license: + name: Copernicus License + url: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview +outs: + - md5: a12df214e690664a78bdabbfaf99b91b + size: 1714413532 + path: surface_temperature.zip diff --git a/uv.lock b/uv.lock index e65a30107e9..49b25161ff1 100644 --- a/uv.lock +++ b/uv.lock @@ -1066,7 +1066,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "cdsapi", specifier = ">=0.7.0" }, + { name = "cdsapi", specifier = ">=0.7.4" }, { name = "cfgrib", specifier = ">=0.9.15.0" }, { name = "click", specifier = ">=8.0.1" }, { name = "deprecated", specifier = ">=1.2.14" },