-
-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
climate: updated era5 dataset march 2025
- Loading branch information
1 parent
bac1903
commit a81af5a
Showing
7 changed files
with
204 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Learn more at: | ||
# http://docs.owid.io/projects/etl/architecture/metadata/reference/ | ||
meta: | ||
origin: | ||
# Data product / Snapshot | ||
title: Equatorial Pacific Sea Surface Temperatures (SST) data | ||
description: |- | ||
The dataset contains data on El Niño and La Niña, phenomena in the equatorial Pacific Ocean defined by a five consecutive 3-month running mean of sea surface temperature (SST) anomalies in the Niño 3.4 region. El Niño occurs when the anomalies exceed +0.5°C, while La Niña occurs when they fall below -0.5°C. This measure is known as the Oceanic Niño Index (ONI). | ||
date_published: "2025" | ||
|
||
# Citation | ||
producer: NOAA National Centers for Environmental Information | ||
citation_full: |- | ||
National Oceanic and Atmospheric Administration (NOAA) National Centers for Environmental Information (NCEI) - Equatorial Pacific Sea Surface Temperatures (SST) data | ||
|
||
# Files | ||
url_main: https://www.ncei.noaa.gov/access/monitoring/enso/sst | ||
url_download: https://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices | ||
date_accessed: 2025-03-07 | ||
|
||
# License | ||
license: | ||
name: CC BY 4.0 | ||
url: https://gml.noaa.gov/about/disclaimer.html | ||
outs: | ||
- md5: 948aff567ecc30b5a3edeeb3dafe9301 | ||
size: 43446 | ||
path: sst.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
"""Script to create a snapshot of dataset.""" | ||
|
||
from io import StringIO | ||
from pathlib import Path | ||
|
||
import click | ||
import pandas as pd | ||
import requests | ||
from owid.datautils.io import df_to_file | ||
|
||
from etl.snapshot import Snapshot | ||
|
||
# Version for current snapshot dataset. | ||
SNAPSHOT_VERSION = Path(__file__).parent.name | ||
BASE_URL_ONI = "https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt" | ||
BASE_URL = "https://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices" | ||
|
||
|
||
def season_to_month(season: str) -> int: | ||
""" | ||
Convert the season string to the corresponding month. | ||
We are mapping to the last month as the La Niña and El Niño events are classified based on the average temperature of the ocean surface in the previous three months. | ||
Parameters: | ||
season (str): The season string (e.g., "DJF"). | ||
Returns: | ||
int: The corresponding month (1-12). | ||
""" | ||
season_to_month_map = { | ||
"DJF": 2, # December-January-February -> February | ||
"JFM": 3, # January-February-March -> March | ||
"FMA": 4, # February-March-April -> April | ||
"MAM": 5, # March-April-May -> May | ||
"AMJ": 6, # April-May-June -> June | ||
"MJJ": 7, # May-June-July -> July | ||
"JJA": 8, # June-July-August -> August | ||
"JAS": 9, # July-August-September -> September | ||
"ASO": 10, # August-September-October -> October | ||
"SON": 11, # September-October-November -> November | ||
"OND": 12, # October-November-December -> December | ||
"NDJ": 1, # November-December-January -> January | ||
} | ||
return season_to_month_map[season] | ||
|
||
|
||
@click.command() | ||
@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") | ||
def main(upload: bool) -> None: | ||
# Create a new snapshot. | ||
snap = Snapshot(f"climate/{SNAPSHOT_VERSION}/sst.csv") | ||
|
||
dfs = [] | ||
for url in [BASE_URL, BASE_URL_ONI]: | ||
response = requests.get(url) | ||
|
||
data = response.text | ||
|
||
# Skip header lines and read into a DataFrame | ||
data_io = StringIO(data) | ||
df = pd.read_csv(data_io, sep="\s+", skiprows=1, header=None) | ||
if url == BASE_URL_ONI: | ||
columns = ["month", "year", "oni", "oni_anomaly"] | ||
df.columns = columns | ||
df["month"] = df["month"].apply(season_to_month) | ||
# Add 1 to the year if the month is January because of the way the data is structured | ||
df.loc[df["month"] == 1, "year"] += 1 | ||
else: | ||
# Assign column names | ||
columns = [ | ||
"year", | ||
"month", | ||
"nino1_2", | ||
"nino1_2_anomaly", | ||
"nino3", | ||
"nino3_anomaly", | ||
"nino4", | ||
"nino4_anomaly", | ||
"nino3_4", | ||
"nino3_4_anomaly", | ||
] | ||
df.columns = columns | ||
|
||
dfs.append(df) | ||
df = pd.merge(dfs[0], dfs[1], on=["year", "month"], how="outer") | ||
df_to_file(df, file_path=snap.path) | ||
snap.dvc_add(upload=upload) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
"""Script to create a snapshot of the monthly averaged surface temperature data from 1950 to present from the Copernicus Climate Change Service. | ||
The script assumes that the data is available on the CDS API. | ||
Instructions on how to access the API on a Mac are here: https://confluence.ecmwf.int/display/CKB/How+to+install+and+use+CDS+API+on+macOS | ||
More information on how to access the data is here: hhttps://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview | ||
The data is downloaded as a NetCDF file. Tutorials for using the Copernicus API are here and work with the NETCDF format are here: https://ecmwf-projects.github.io/copernicus-training-c3s/cds-tutorial.html | ||
""" | ||
|
||
import tempfile | ||
from pathlib import Path | ||
|
||
# CDS API | ||
import cdsapi | ||
import click | ||
|
||
from etl.snapshot import Snapshot | ||
|
||
# Version for current snapshot dataset. | ||
SNAPSHOT_VERSION = Path(__file__).parent.name | ||
|
||
|
||
@click.command() | ||
@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") | ||
def main(upload: bool) -> None: | ||
# Create a new snapshot. | ||
snap = Snapshot(f"climate/{SNAPSHOT_VERSION}/surface_temperature.zip") | ||
|
||
# Save data as a compressed temporary file. | ||
with tempfile.TemporaryDirectory() as temp_dir: | ||
output_file = Path(temp_dir) / "era5_monthly_t2m_eur.nc" | ||
|
||
client = cdsapi.Client() | ||
|
||
dataset = "reanalysis-era5-single-levels-monthly-means" | ||
request = { | ||
"product_type": ["monthly_averaged_reanalysis"], | ||
"variable": ["2m_temperature"], | ||
"year": [str(year) for year in range(1940, 2026)], | ||
"month": ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"], | ||
"time": ["00:00"], | ||
"data_format": "grib", | ||
"download_format": "zip", | ||
"area": [90, -180, -90, 180], | ||
} | ||
|
||
client.retrieve(dataset, request, output_file) | ||
|
||
# Upload snapshot. | ||
snap.create_snapshot(filename=output_file, upload=upload) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
meta: | ||
origin: | ||
title_snapshot: ERA5 Monthly Averaged Data on Single Levels from 1940 to Present - Monthly Averages of 2m Surface Temperature | ||
title: ERA5 monthly averaged data on single levels from 1940 to present | ||
description: |- | ||
ERA5 is the latest climate reanalysis produced by ECMWF, providing hourly data on many atmospheric, land-surface and sea-state parameters together with estimates of uncertainty. | ||
|
||
ERA5 data are available in the Climate Data Store on regular latitude-longitude grids at 0.25° x 0.25° resolution, with atmospheric parameters on 37 pressure levels. | ||
|
||
ERA5 is available from 1940 and continues to be extended forward in time, with daily updates being made available 5 days behind real time | ||
|
||
Initial release data, i.e., data no more than three months behind real time, are called ERA5T. | ||
producer: Contains modified Copernicus Climate Change Service information | ||
version_producer: 2 | ||
citation_full: |- | ||
Hersbach, H., Bell, B., Berrisford, P., Biavati, G., Horányi, A., Muñoz Sabater, J., Nicolas, J., Peubey, C., Radu, R., Rozum, I., Schepers, D., Simmons, A., Soci, C., Dee, D., Thépaut, J-N. (2023): ERA5 monthly averaged data on single levels from 1940 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS), DOI: 10.24381/cds.f17050d7 (Accessed on 07-March-2025) | ||
url_main: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview | ||
date_accessed: 2025-03-07 | ||
date_published: 2025-03-06 | ||
license: | ||
name: Copernicus License | ||
url: https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-monthly-means?tab=overview | ||
outs: | ||
- md5: a12df214e690664a78bdabbfaf99b91b | ||
size: 1714413532 | ||
path: surface_temperature.zip |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.