diff --git a/dag/climate.yml b/dag/climate.yml index 3219346c125..300386de3bd 100644 --- a/dag/climate.yml +++ b/dag/climate.yml @@ -272,9 +272,13 @@ steps: - snapshot://climate/2025-02-12/sst.csv data://garden/climate/2025-02-12/sst: - data://meadow/climate/2025-02-12/sst + data://garden/climate/2025-02-12/sst_annual: + - data://garden/climate/2025-02-12/sst data://grapher/climate/2025-02-12/sst: - data://garden/climate/2025-02-12/sst data://grapher/climate/2025-02-12/sst_by_month: - data://garden/climate/2025-02-12/sst + data://grapher/climate/2025-02-12/sst_annual: + - data://garden/climate/2025-02-12/sst_annual diff --git a/etl/steps/data/garden/climate/2025-02-12/sst.py b/etl/steps/data/garden/climate/2025-02-12/sst.py index 7d558257e57..292696968ed 100644 --- a/etl/steps/data/garden/climate/2025-02-12/sst.py +++ b/etl/steps/data/garden/climate/2025-02-12/sst.py @@ -31,7 +31,7 @@ def run(dest_dir: str) -> None: tb["nino_classification"] = tb["nino_classification"].astype(int) for col in ["nino_classification"]: - tb[col].metadata.origins = tb["nino3_4_anomaly"].metadata.origins + tb[col] = tb[col].copy_metadata(tb["nino3_4_anomaly"]) tb = tb.drop(columns={"nino4_anomaly", "nino3_4_anomaly"}) diff --git a/etl/steps/data/garden/climate/2025-02-12/sst_annual.meta.yml b/etl/steps/data/garden/climate/2025-02-12/sst_annual.meta.yml new file mode 100644 index 00000000000..a2cba7ff645 --- /dev/null +++ b/etl/steps/data/garden/climate/2025-02-12/sst_annual.meta.yml @@ -0,0 +1,20 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + processing_level: major + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 31 + title: Equatorial Pacific Sea Surface Temperatures (SST) data - El Niño or La Niña Annual Anomaly + +tables: + sst: + variables: + annual_oni_anomaly: + title: Annual Oceanic Niño Index (ONI) anomaly + unit: "" + description_processing: |- + Annual anomalies of the Oceanic Niño Index (ONI) are calculated by taking the average of the monthly ONI values for a given year. + diff --git a/etl/steps/data/garden/climate/2025-02-12/sst_annual.py b/etl/steps/data/garden/climate/2025-02-12/sst_annual.py new file mode 100644 index 00000000000..91200bc19ad --- /dev/null +++ b/etl/steps/data/garden/climate/2025-02-12/sst_annual.py @@ -0,0 +1,38 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_garden = paths.load_dataset("sst") + + # Read table from meadow dataset. + tb = ds_garden.read("sst") + + # + # Process data. + # + # Calculate the annual average for the dataset + tb_annual = tb.groupby(["country", "year"]).mean().reset_index() + tb_annual = tb_annual.rename(columns={"oni_anomaly": "annual_oni_anomaly"}) + tb_annual = tb_annual.drop(columns={"month", "nino_classification"}) + + tb_annual = tb_annual.format(["country", "year"]) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb_annual], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() diff --git a/etl/steps/data/grapher/climate/2025-02-12/sst_annual.py b/etl/steps/data/grapher/climate/2025-02-12/sst_annual.py new file mode 100644 index 00000000000..5aecbe42029 --- /dev/null +++ b/etl/steps/data/grapher/climate/2025-02-12/sst_annual.py @@ -0,0 +1,30 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("sst_annual") + + # Read table from garden dataset. + tb = ds_garden.read("sst", reset_index=True) + tb["country"] = tb["country"].replace({"World": "Global"}) + tb = tb.format(["year", "country"]) + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save()