Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete historical national + global emissions 1750-2022 #21

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
data/national/ceds/processed
data/national/gfed/processed
data/national/gfed-bb4cmip/processed
data/national/gcb/processed

# RCMIP data
data/global/rcmip/data_raw
Expand All @@ -13,6 +14,10 @@ data/national/ceds/data_raw
data/national/gfed/data_raw
data/national/gfed-bb4cmip/data_raw

# GCB data
data/national/gcb/data_raw
data/global/gcb/data_raw

# pixi stuff
.pixi

Expand Down
Binary file removed data/global/gcb/Global_Carbon_Budget_2023v1.1.xlsx
Binary file not shown.
9 changes: 9 additions & 0 deletions data/national/gcb/data_raw/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Download data from source:
https://globalcarbonbudgetdata.org/latest-data.html
https://zenodo.org/records/14106218

Download the following links:
"Global Carbon Budget v2024"
GCB2024v18_MtCO2_flat.csv

Last accessed: 20.11.2024
1 change: 1 addition & 0 deletions data/national/gcb/processed/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The processed data from Global Carbon Budget goes here.
1 change: 1 addition & 0 deletions data/national/gfed-bb4cmip/processed/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This location will host processed data from BB4CMIP.
37 changes: 30 additions & 7 deletions notebooks/0101_CEDS-prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@
ceds_processed_output_file = DATA_ROOT / Path("national", "ceds", "processed", "ceds_cmip7_national_alpha.csv")

# %% [markdown]
# Specify gases to processes
# Specify species to processes

# %%
# use all gases covered in CEDS
gases = [
# use all species covered in CEDS
species = [
"BC",
"CH4",
"CO",
Expand All @@ -67,7 +67,8 @@

# %%
ceds = pd.concat(
read_CEDS(Path(ceds_data_folder) / f"{gas}_CEDS_emissions_by_country_sector_v{ceds_release}.csv") for gas in gases
read_CEDS(Path(ceds_data_folder) / f"{specie}_CEDS_emissions_by_country_sector_v{ceds_release}.csv")
for specie in species
).rename_axis(index={"region": "country"})
ceds.attrs["name"] = "CEDS21"
ceds = ceds.pix.semijoin(ceds_map, how="outer")
Expand All @@ -91,15 +92,19 @@
ceds = add_global(ceds)

# %%
# See Hoesly et al. 2018 plots of NOx for suggestion that NOx units is NO2
# https://gmd.copernicus.org/articles/11/369/2018/gmd-11-369-2018.pdf

unit_wishes = pd.MultiIndex.from_tuples(
[
("BC", "Mt BC/yr"),
("CH4", "Mt CH4/yr"),
("CO", "Mt CO/yr"),
("CO2", "Mt CO2/yr"),
("N2O", "Mt N2O/yr"),
("NH3", "Mt NH3/yr"),
("NMVOC", "Mt NMVOC/yr"),
("NOx", "Mt NOx/yr"),
("NOx", "Mt NO2/yr"), # CEDS is reported in NO2 units
("OC", "Mt OC/yr"),
("SO2", "Mt SO2/yr"),
],
Expand All @@ -112,12 +117,30 @@
# %%
ceds.pix.unique(unit_wishes.names).symmetric_difference(unit_wishes)

# %%
# reformat
ceds_ref = (
ceds.droplevel("unit")
.pix.semijoin(unit_wishes, how="left")
.rename_axis(index={"em": "variable", "country": "region"})
)

# %%
# rename to IAMC-style variable names including standard index order
ceds_ref = (
ceds_ref.rename(index={"SO2": "Sulfur"}, level="variable")
.pix.format(variable="CMIP7 History|Emissions|{variable}|{sector}", drop=True)
.pix.assign(model="History", scenario=f"CEDSv{ceds_release}")
.reorder_levels(["model", "scenario", "region", "variable", "unit"])
).sort_values(by=["region", "variable"])

# %% [markdown]
# Save formatted CEDS data

# %%
# reformat
ceds_ref = ceds.droplevel("unit").pix.semijoin(unit_wishes, how="left").rename_axis(index={"em": "gas"})
ceds_ref

# %%
(ceds_ref.to_csv(ceds_processed_output_file))

# %%
81 changes: 60 additions & 21 deletions notebooks/0102_GFED4-prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
# import external packages and functions
from pathlib import Path

import numpy as np
import pandas as pd
import ptolemy
import xarray as xr
import xarray_regrid # noqa: F401
from pandas_indexing import set_openscm_registry_as_default

from emissions_harmonization_historical.constants import DATA_ROOT
Expand Down Expand Up @@ -62,11 +64,11 @@
gfed_temp_file = DATA_ROOT / Path("national", "gfed", "processed", "gfed_temporaryfile.csv")

# %% [markdown]
# Specify gases to processes
# Specify species to processes

# %%
# use all gases covered in CEDS
gases = [
# use all species covered in CEDS
species = [
"BC",
"CH4",
"CO",
Expand Down Expand Up @@ -105,6 +107,23 @@
# show xarray
emissions

# %%
dummy = xr.Dataset(
data_vars=dict(
DM=(["lat", "lon"], np.zeros((360, 720))),
),
coords=dict(
lat=("lat", np.arange(-89.75, 90, 0.5)),
lon=("lon", np.arange(-179.75, 180, 0.5)),
),
)

# %%
dummy

# %%
emissions["DM"].regrid.conservative(dummy)

# %% [markdown]
# Get emissions factor for different species

Expand Down Expand Up @@ -138,7 +157,7 @@

ef.loc["NMVOC"] = ef.multiply(nmvoc_factors, axis=0).sum()

ef_per_DM = ef.loc[gases] / ef.loc["DM"]
ef_per_DM = ef.loc[species] / ef.loc["DM"]
# in kg {species} / kg DM
ef_per_DM

Expand All @@ -151,18 +170,27 @@
# 'chunks={"iso": 1}' uses Dask to enable chunking for memory efficiency, loading one ISO code at a time.
idxr = xr.open_dataarray(gfed_isomask, chunks={"iso": 1})

# %%
# Step 2: Open a NetCDF file to use as a grid template for latitude and longitude coordinates.
# The template file provides the lat/lon grid for regridding the emissions data.
with xr.open_dataset(gfed_grid_template) as template:
# Interpolate the "DM" (Dry Matter) emissions data to the lat/lon grid from the template,
# using linear interpolation. This matches the emissions data to the same grid resolution.
dm_regrid = emissions["DM"].interp(lon=template.lon, lat=template.lat, method="linear")

# Interpolate the "DM" (Dry Matter) emissions data to the lat/lon grid from the template,
# using conservative interpolation. This matches the emissions data to the same grid resolution.
dm_regrid = emissions["DM"].regrid.conservative(dummy)

# %%
dm_regrid

# %%
# Step 3: Compute the area of each grid cell using the 'ptolemy.cell_area' function.
# This function calculates the area of each grid cell based on the interpolated lat/lon grid.
# The resulting cell areas are stored in an xarray DataArray, with units of square meters ("m2").
cell_area = xr.DataArray(ptolemy.cell_area(lats=dm_regrid.lat, lons=dm_regrid.lon), attrs=dict(unit="m2"))

# %%
cell_area

# %%
# calculate emissions by country by:
# taking the country cell IDs (idxr), multiplying it by the area (cell_area),
# and by the regridded lat/lon grid resummed to per year (dm_regrid.groupby("time.year").sum())
Expand Down Expand Up @@ -191,7 +219,7 @@
("N2O", "kg N2O"),
("NH3", "kg NH3"),
("NOx", "kg NOx"),
("VOC", voc_unit),
("NMVOC", voc_unit),
("SO2", "kg SO2"),
],
names=["em", "unit"],
Expand Down Expand Up @@ -228,18 +256,18 @@

# %%
# set units
unit = pd.MultiIndex.from_tuples(
unit_wishes = pd.MultiIndex.from_tuples(
[
("BC", "kt BC/yr"),
("OC", "kt OC/yr"),
("CO", "kt CO/yr"),
("CO2", "kt CO2/yr"),
("CH4", "kt CH4/yr"),
("N2O", "kt N2O/yr"),
("NH3", "kt NH3/yr"),
("NOx", "kt NOx/yr"),
("NMVOC", "kt VOC/yr"),
("SO2", "kt SO2/yr"),
("BC", "Mt BC/yr"),
("OC", "Mt OC/yr"),
("CO", "Mt CO/yr"),
("CO2", "Mt CO2/yr"),
("CH4", "Mt CH4/yr"),
("N2O", "Mt N2O/yr"),
("NH3", "Mt NH3/yr"),
("NOx", "Mt NO/yr"), # we know NO mass units, so label as such
("NMVOC", "Mt NMVOC/yr"),
("SO2", "Mt SO2/yr"),
],
names=["em", "unit"],
)
Expand All @@ -257,6 +285,8 @@
.sum()
)

# format units
burningCMIP7_ref = burningCMIP7_ref.droplevel("unit").pix.semijoin(unit_wishes, how="left")

# rename to IAMC-style variable names
burningCMIP7_ref = (
Expand All @@ -266,7 +296,14 @@
)

# add global level aggregation ("World")
burningCMIP7_ref = add_global(burningCMIP7_ref, groups=["model", "scenario", "variable", "unit"])
burningCMIP7_ref = add_global(burningCMIP7_ref, groups=["model", "scenario", "variable", "unit"]).rename_axis(
index={"country": "region"}
)

# fix order
burningCMIP7_ref = burningCMIP7_ref.reorder_levels(["model", "scenario", "region", "variable", "unit"]).sort_values(
by=["region", "variable"]
)

# %%
burningCMIP7.pix
Expand All @@ -279,3 +316,5 @@

# %%
(burningCMIP7_ref.to_csv(gfed_processed_output_file))

# %%
Loading