-
-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ea222f7
commit b17735f
Showing
7 changed files
with
223 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
40 changes: 40 additions & 0 deletions
40
etl/steps/data/garden/social_expenditure/2025-03-07/social_expenditure_omm.meta.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# NOTE: To learn more about the fields, hover over their names. | ||
definitions: | ||
common: | ||
presentation: | ||
topic_tags: | ||
- Government Spending | ||
|
||
|
||
# Learn more about the available fields: | ||
# http://docs.owid.io/projects/etl/architecture/metadata/reference/ | ||
dataset: | ||
update_period_days: 365 | ||
title: Social expenditure in the long run | ||
|
||
|
||
tables: | ||
social_expenditure_omm: | ||
variables: | ||
share_gdp: | ||
title: Public social expenditure as a share of GDP | ||
unit: "% of GDP" | ||
short_unit: "%" | ||
description_short: Public social expenditure divided bt gross domestic product, expressed as a percentage. | ||
description_key: | ||
- "This indicator combines three different datasets: Lindert (1994), OECD (1985), and the OECD Social Expenditure Database (SOCX). We combine the two OECD datasets by using the implicit growth rate from the older series, so we can extend the series back to 1960. We also use the data from Lindert (1994) to extend the series to 1880." | ||
description_from_producer: "" | ||
processing_level: major | ||
description_processing: |- | ||
We extrapolated the data available from the OECD Social Expenditure Database (Government/compulsory schemes) using the earliest available observation from this dataset and applying the growth rates implied by the OECD (1985) data to obtain a series starting in 1960. These steps are necessary because the data in these years is not exactly the same for the two datasets due to changes in definitions and measurement, though the trends are consistent for common years (1970-1991). | ||
We don't transform the data from Lindert (1994), the values are the same as in the original source. | ||
presentation: | ||
attribution_short: Lindert, OECD | ||
title_public: Public social expenditure as a share of GDP | ||
title_variant: Historical data | ||
display: | ||
name: Public social expenditure as a share of GDP | ||
numDecimalPlaces: 1 | ||
tolerance: 5 | ||
|
142 changes: 142 additions & 0 deletions
142
etl/steps/data/garden/social_expenditure/2025-03-07/social_expenditure_omm.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
"""Load a meadow dataset and create a garden dataset.""" | ||
|
||
import owid.catalog.processing as pr | ||
from owid.catalog import Table | ||
|
||
from etl.helpers import PathFinder | ||
|
||
# Get paths and naming conventions for current step. | ||
paths = PathFinder(__file__) | ||
|
||
# Define category to select from OECD Social Expenditure Database | ||
EXPENDITURE_SOURCE_OECD = "Public" | ||
SPENDING_TYPE_OECD = "In-cash and in-kind spending" | ||
PROGRAMME_TYPE_OECD = "All" | ||
|
||
|
||
def run() -> None: | ||
# | ||
# Load inputs. | ||
# | ||
# Load meadow dataset. | ||
ds_oecd = paths.load_dataset("social_expenditure") | ||
ds_oecd_1985 = paths.load_dataset("social_expenditure_1985") | ||
ds_lindert = paths.load_dataset("lindert") | ||
|
||
# Read table from meadow dataset. | ||
tb_oecd = ds_oecd.read("social_expenditure") | ||
tb_oecd_1985 = ds_oecd_1985.read("social_expenditure_1985") | ||
tb_lindert = ds_lindert.read("lindert") | ||
|
||
# | ||
# Process data. | ||
# | ||
# Select the right categories from the OECD SOCX dataset | ||
tb_oecd = tb_oecd[ | ||
(tb_oecd["expenditure_source"] == EXPENDITURE_SOURCE_OECD) | ||
& (tb_oecd["spending_type"] == SPENDING_TYPE_OECD) | ||
& (tb_oecd["programme_type_category"] == PROGRAMME_TYPE_OECD) | ||
].reset_index(drop=True) | ||
|
||
# Keep only the necessary columns | ||
tb_oecd = tb_oecd[["country", "year", "share_gdp"]] | ||
|
||
# Save the countries available in the OECD dataset | ||
countries_oecd = list(tb_oecd["country"].unique()) | ||
|
||
# Merge the three tables | ||
tb = pr.merge(tb_oecd, tb_oecd_1985, on=["country", "year"], how="outer", suffixes=("", "_oecd_1985")) | ||
tb = pr.merge(tb, tb_lindert, on=["country", "year"], how="outer", suffixes=("", "_lindert")) | ||
|
||
# Rename share_gdp to share_gdp_oecd | ||
tb = tb.rename(columns={"share_gdp": "share_gdp_oecd"}) | ||
|
||
# Keep only countries available in the OECD dataset | ||
tb = tb[tb["country"].isin(countries_oecd)].reset_index(drop=True) | ||
|
||
# Merge the three series, by applying the growth retroactively | ||
tb = create_estimations_from_growth(tb=tb, reference_var_suffix="_oecd_1985", to_adjust_var_suffix="_oecd") | ||
|
||
# Fill data from Lindert where there is no data in share_gdp | ||
tb["share_gdp"] = tb["share_gdp"].fillna(tb["share_gdp_lindert"]) | ||
|
||
# Keep only the necessary columns | ||
tb = tb[["country", "year", "share_gdp"]] | ||
|
||
# Improve table format. | ||
tb = tb.format(["country", "year"], short_name="social_expenditure_omm") | ||
|
||
# | ||
# Save outputs. | ||
# | ||
# Initialize a new garden dataset. | ||
ds_garden = paths.create_dataset(tables=[tb], default_metadata=ds_oecd.metadata) | ||
|
||
# Save garden dataset. | ||
ds_garden.save() | ||
|
||
|
||
def create_estimations_from_growth(tb: Table, reference_var_suffix: str, to_adjust_var_suffix: str) -> Table: | ||
""" | ||
Adjust estimations of variables according to the growth of a reference variable. | ||
Parameters | ||
---------- | ||
tb : Table | ||
Table that contains both the reference variable (the one the growth is extracted from) and the variable to be adjusted (the one the growth is applied to). | ||
reference_var_suffix : str | ||
Suffix of the reference variable (the one the growth is extracted from). In this project, "_mpd" or "_md". | ||
to_adjust_var_suffix : str | ||
Suffix of the variable to be adjusted (the one the growth is applied to). In this project, "_wdi" or "". | ||
Returns | ||
------- | ||
tb : Table | ||
Table with the adjusted variables. | ||
""" | ||
|
||
# Save the original columns | ||
columns_list = list(tb.columns) | ||
|
||
# Sort by country and year | ||
tb = tb.sort_values(by=["country", "year"]).reset_index(drop=True) | ||
|
||
# Define the first year in common between the two series, share_gdp{reference_var_suffix} and share_gdp{to_adjust_var_suffix} | ||
# First, define all the years in common between the two series | ||
tb["years_in_common"] = tb.loc[ | ||
tb[f"share_gdp{reference_var_suffix}"].notnull() & tb[f"share_gdp{to_adjust_var_suffix}"].notnull(), "year" | ||
] | ||
|
||
# Define the first year in common | ||
tb["reference_year"] = tb.groupby("country")["years_in_common"].transform("min") | ||
|
||
# Get value from the reference variable in the reference year | ||
tb["reference_value"] = tb.groupby("country")[f"share_gdp{reference_var_suffix}"].transform( | ||
lambda x: x.loc[tb["year"] == tb["reference_year"]].iloc[0] | ||
if not x.loc[tb["year"] == tb["reference_year"]].empty | ||
else None | ||
) | ||
|
||
# The scalar is the previous value divided by the reference variable. This is the growth that will be applied retroactively to the variable to be adjusted. | ||
tb["share_gdp_scalar"] = tb[f"share_gdp{reference_var_suffix}"] / tb["reference_value"] | ||
|
||
# Get value to be adjusted in the reference year | ||
tb["to_adjust_value"] = tb.groupby("country")[f"share_gdp{to_adjust_var_suffix}"].transform( | ||
lambda x: x.loc[tb["year"] == tb["reference_year"]].iloc[0] | ||
if not x.loc[tb["year"] == tb["reference_year"]].empty | ||
else None | ||
) | ||
|
||
# The estimated values are the division between the reference value and the scalars. This is the variable to be adjusted effectively adjusted by the growth of the reference variable. | ||
tb["share_gdp_estimated"] = tb["to_adjust_value"] * tb["share_gdp_scalar"] | ||
|
||
# Rename the estimated variables without the suffix | ||
tb["share_gdp"] = tb[f"share_gdp{to_adjust_var_suffix}"].astype("Float64").fillna(tb["share_gdp_estimated"]) | ||
|
||
# Keep only new variables | ||
if "share_gdp" not in columns_list: | ||
columns_list.append("share_gdp") | ||
|
||
tb = tb[columns_list] | ||
|
||
return tb |
26 changes: 26 additions & 0 deletions
26
etl/steps/data/grapher/social_expenditure/2025-03-07/social_expenditure_omm.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
"""Load a garden dataset and create a grapher dataset.""" | ||
|
||
from etl.helpers import PathFinder | ||
|
||
# Get paths and naming conventions for current step. | ||
paths = PathFinder(__file__) | ||
|
||
|
||
def run() -> None: | ||
# | ||
# Load inputs. | ||
# | ||
# Load garden dataset. | ||
ds_garden = paths.load_dataset("social_expenditure_omm") | ||
|
||
# Read table from garden dataset. | ||
tb = ds_garden.read("social_expenditure_omm", reset_index=False) | ||
|
||
# | ||
# Save outputs. | ||
# | ||
# Initialize a new grapher dataset. | ||
ds_grapher = paths.create_dataset(tables=[tb], default_metadata=ds_garden.metadata) | ||
|
||
# Save grapher dataset. | ||
ds_grapher.save() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters