diff --git a/dag/redistribution.yml b/dag/redistribution.yml index 10aab44a428..fe254865ca6 100644 --- a/dag/redistribution.yml +++ b/dag/redistribution.yml @@ -29,6 +29,32 @@ steps: data://grapher/oecd/2025-02-25/social_expenditure: - data://garden/oecd/2025-02-25/social_expenditure + # + # Social expenditure OMM + # + data://garden/social_expenditure/2025-03-07/social_expenditure_omm: + - data://garden/oecd/2025-02-25/social_expenditure + - data://garden/oecd/2025-03-07/social_expenditure_1985 + - data://garden/social_expenditure/2025-03-07/lindert + data://grapher/social_expenditure/2025-03-07/social_expenditure_omm: + - data://garden/social_expenditure/2025-03-07/social_expenditure_omm + + # + # Social transfers 1880-1930 (Lindert, 1994) + # + data://meadow/social_expenditure/2025-03-07/lindert: + - snapshot://social_expenditure/2025-03-07/lindert.csv + data://garden/social_expenditure/2025-03-07/lindert: + - data://meadow/social_expenditure/2025-03-07/lindert + + # + # OECD social expenditure data (1985) + # + data://meadow/oecd/2025-03-07/social_expenditure_1985: + - snapshot://oecd/2025-03-07/social_expenditure_1985.xlsx + data://garden/oecd/2025-03-07/social_expenditure_1985: + - data://meadow/oecd/2025-03-07/social_expenditure_1985 + ############################################################ # HEALTH ############################################################ diff --git a/etl/steps/data/garden/oecd/2025-02-25/social_expenditure.meta.yml b/etl/steps/data/garden/oecd/2025-02-25/social_expenditure.meta.yml index d5c5ecf89ac..c80678a390f 100644 --- a/etl/steps/data/garden/oecd/2025-02-25/social_expenditure.meta.yml +++ b/etl/steps/data/garden/oecd/2025-02-25/social_expenditure.meta.yml @@ -114,7 +114,7 @@ dataset: tables: social_expenditure: variables: - share_of_gdp: + share_gdp: title: Social expenditure as a share of GDP - <> - <> - <> programs (<>) unit: "% of GDP" short_unit: "%" @@ -131,7 +131,7 @@ tables: numDecimalPlaces: 1 tolerance: 5 - share_of_gov_expenditure: + share_gov_expenditure: title: Social expenditure as a share of government expenditure - <> - <> - <> programs (<>) unit: "% of government expenditure" short_unit: "%" diff --git a/etl/steps/data/garden/oecd/2025-02-25/social_expenditure.py b/etl/steps/data/garden/oecd/2025-02-25/social_expenditure.py index 6c2dca1108f..b201e641f4b 100644 --- a/etl/steps/data/garden/oecd/2025-02-25/social_expenditure.py +++ b/etl/steps/data/garden/oecd/2025-02-25/social_expenditure.py @@ -8,8 +8,8 @@ # Define indicator columns and their new names. INDICATOR_COLUMNS = { - "Percentage of GDP": "share_of_gdp", - "Percentage of general government expenditure": "share_of_gov_expenditure", + "Percentage of GDP": "share_gdp", + "Percentage of general government expenditure": "share_gov_expenditure", "US dollars per person, PPP converted": "usd_per_person_ppp", } diff --git a/etl/steps/data/garden/oecd/2025-03-07/social_expenditure_1985.countries.json b/etl/steps/data/garden/oecd/2025-03-07/social_expenditure_1985.countries.json new file mode 100644 index 00000000000..e1f179c48e1 --- /dev/null +++ b/etl/steps/data/garden/oecd/2025-03-07/social_expenditure_1985.countries.json @@ -0,0 +1,21 @@ +{ + "AUSTRALIA": "Australia", + "BELGIUM": "Belgium", + "CANADA": "Canada", + "DENMARK": "Denmark", + "FRANCE": "France", + "GERMANY": "Germany", + "GREECE": "Greece", + "ITALY": "Italy", + "Ireland": "Ireland", + "JAPAN": "Japan", + "NETHERLANDS": "Netherlands", + "NEW ZEALAND": "New Zealand", + "NORWAY": "Norway", + "SWITZERLAND": "Switzerland", + "UNITED KINGDOM": "United Kingdom", + "UNITED STATES OF AMERICA": "United States", + "AUSTRIA ": "Austria", + "SWEDEN": "Sweden", + "FINLAND": "Finland" +} \ No newline at end of file diff --git a/etl/steps/data/garden/oecd/2025-03-07/social_expenditure_1985.meta.yml b/etl/steps/data/garden/oecd/2025-03-07/social_expenditure_1985.meta.yml new file mode 100644 index 00000000000..5328f55e570 --- /dev/null +++ b/etl/steps/data/garden/oecd/2025-03-07/social_expenditure_1985.meta.yml @@ -0,0 +1,36 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Government Spending + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 0 + + +tables: + social_expenditure_1985: + variables: + share_gdp: + title: Public social expenditure as a share of GDP + unit: "% of GDP" + short_unit: "%" + description_short: Public social expenditure divided by gross domestic product, expressed as a percentage. + processing_level: major + description_processing: |- + We calculated this indicator by subtracting education expenditure from the total social expenditure published by OECD (1985), and divided by GDP. + + We do this to ensure that the indicator uses the same definition of social expenditure as the OECD Social Expenditure Database (SOCX). + presentation: + attribution_short: OECD + title_public: Public social expenditure as a share of GDP + title_variant: Data between 1960-1981 + display: + name: Public social expenditure as a share of GDP + numDecimalPlaces: 1 + tolerance: 5 + diff --git a/etl/steps/data/garden/oecd/2025-03-07/social_expenditure_1985.py b/etl/steps/data/garden/oecd/2025-03-07/social_expenditure_1985.py new file mode 100644 index 00000000000..96e2ed6a9fa --- /dev/null +++ b/etl/steps/data/garden/oecd/2025-03-07/social_expenditure_1985.py @@ -0,0 +1,45 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run() -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("social_expenditure_1985") + + # Read table from meadow dataset. + tb = ds_meadow.read("social_expenditure_1985") + + # + # Process data. + # + # Harmonize country names. + tb = geo.harmonize_countries( + df=tb, + countries_file=paths.country_mapping_path, + ) + + # Calculate the share of social expenditure in GDP. + tb["share_gdp"] = (tb["total_social_expenditure_with_education"] - tb["education"]) / tb["gdp"] * 100 + + # Improve table format. + tb = tb.format(["country", "year"]) + + # Keep only share_gdp column. + tb = tb[["share_gdp"]] + + # + # Save outputs. + # + # Initialize a new garden dataset. + ds_garden = paths.create_dataset(tables=[tb], default_metadata=ds_meadow.metadata) + + # Save garden dataset. + ds_garden.save() diff --git a/etl/steps/data/garden/social_expenditure/2025-03-07/lindert.countries.json b/etl/steps/data/garden/social_expenditure/2025-03-07/lindert.countries.json new file mode 100644 index 00000000000..1b5be838476 --- /dev/null +++ b/etl/steps/data/garden/social_expenditure/2025-03-07/lindert.countries.json @@ -0,0 +1,20 @@ +{ + "Australia": "Australia", + "Austria": "Austria", + "Belgium": "Belgium", + "Canada": "Canada", + "Denmark": "Denmark", + "Finland": "Finland", + "France": "France", + "Germany": "Germany", + "Ireland": "Ireland", + "Italy": "Italy", + "Japan": "Japan", + "Netherlands": "Netherlands", + "New Zealand": "New Zealand", + "Norway": "Norway", + "Sweden": "Sweden", + "Switzerland": "Switzerland", + "United Kingdom": "United Kingdom", + "United States": "United States" +} \ No newline at end of file diff --git a/etl/steps/data/garden/social_expenditure/2025-03-07/lindert.meta.yml b/etl/steps/data/garden/social_expenditure/2025-03-07/lindert.meta.yml new file mode 100644 index 00000000000..02baf70a91f --- /dev/null +++ b/etl/steps/data/garden/social_expenditure/2025-03-07/lindert.meta.yml @@ -0,0 +1,33 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Government Spending + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 0 + + +tables: + lindert: + variables: + share_gdp: + title: Public social expenditure as a share of GDP + unit: "% of GDP" + short_unit: "%" + description_short: Public social expenditure divided bt gross domestic product, expressed as a percentage. + description_from_producer: "Social transfers, 1880–1930, as percentages of national product at current prices: all four kinds of government social spending (welfare–unemployment, pensions, health, and housing)." + processing_level: minor + presentation: + attribution_short: Lindert + title_public: Public social expenditure as a share of GDP + title_variant: Data between 1880-1930 + display: + name: Public social expenditure as a share of GDP + numDecimalPlaces: 1 + tolerance: 5 + diff --git a/etl/steps/data/garden/social_expenditure/2025-03-07/lindert.py b/etl/steps/data/garden/social_expenditure/2025-03-07/lindert.py new file mode 100644 index 00000000000..226d00e66d2 --- /dev/null +++ b/etl/steps/data/garden/social_expenditure/2025-03-07/lindert.py @@ -0,0 +1,39 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from etl.data_helpers import geo +from etl.helpers import PathFinder + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run() -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("lindert") + + # Read table from meadow dataset. + tb = ds_meadow.read("lindert") + + # + # Process data. + # + # Harmonize country names. + tb = geo.harmonize_countries( + df=tb, + countries_file=paths.country_mapping_path, + ) + + # Improve table format. + tb = tb.format(["country", "year"]) + + # + # Save outputs. + # + # Initialize a new garden dataset. + ds_garden = paths.create_dataset(tables=[tb], default_metadata=ds_meadow.metadata) + + # Save garden dataset. + ds_garden.save() diff --git a/etl/steps/data/garden/social_expenditure/2025-03-07/social_expenditure_omm.meta.yml b/etl/steps/data/garden/social_expenditure/2025-03-07/social_expenditure_omm.meta.yml new file mode 100644 index 00000000000..3c10e22a5dc --- /dev/null +++ b/etl/steps/data/garden/social_expenditure/2025-03-07/social_expenditure_omm.meta.yml @@ -0,0 +1,40 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Government Spending + + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + title: Social expenditure in the long run + + +tables: + social_expenditure_omm: + variables: + share_gdp: + title: Public social expenditure as a share of GDP + unit: "% of GDP" + short_unit: "%" + description_short: Public social expenditure divided bt gross domestic product, expressed as a percentage. + description_key: + - "This indicator combines three different datasets: Lindert (2004), OECD (1985), and the OECD Social Expenditure Database (SOCX). We combine the two OECD datasets by using the implicit growth rate from the older series, so we can extend the series back to 1960. We also use the data from Lindert (2004) to extend the series to 1880." + description_from_producer: "" + processing_level: major + description_processing: |- + We extrapolated the data available from the OECD Social Expenditure Database (public, in-cash and in-kind spending, all programs) using the earliest available observation from this dataset and applying the growth rates implied by the OECD (1985) data to obtain a series starting in 1960. These steps are necessary because the data in common years is not exactly the same for the two datasets due to changes in definitions and measurement. Nevertheless, we assume that trends stay the same in both cases. + + We don't transform the data from Lindert (2004), the values are the same as in the original source. + presentation: + attribution_short: Lindert, OECD + title_public: Public social expenditure as a share of GDP + title_variant: Historical data + display: + name: Public social expenditure as a share of GDP + numDecimalPlaces: 1 + tolerance: 5 + diff --git a/etl/steps/data/garden/social_expenditure/2025-03-07/social_expenditure_omm.py b/etl/steps/data/garden/social_expenditure/2025-03-07/social_expenditure_omm.py new file mode 100644 index 00000000000..efd59ba8a91 --- /dev/null +++ b/etl/steps/data/garden/social_expenditure/2025-03-07/social_expenditure_omm.py @@ -0,0 +1,142 @@ +"""Load a meadow dataset and create a garden dataset.""" + +import owid.catalog.processing as pr +from owid.catalog import Table + +from etl.helpers import PathFinder + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Define category to select from OECD Social Expenditure Database +EXPENDITURE_SOURCE_OECD = "Public" +SPENDING_TYPE_OECD = "In-cash and in-kind spending" +PROGRAMME_TYPE_OECD = "All" + + +def run() -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_oecd = paths.load_dataset("social_expenditure") + ds_oecd_1985 = paths.load_dataset("social_expenditure_1985") + ds_lindert = paths.load_dataset("lindert") + + # Read table from meadow dataset. + tb_oecd = ds_oecd.read("social_expenditure") + tb_oecd_1985 = ds_oecd_1985.read("social_expenditure_1985") + tb_lindert = ds_lindert.read("lindert") + + # + # Process data. + # + # Select the right categories from the OECD SOCX dataset + tb_oecd = tb_oecd[ + (tb_oecd["expenditure_source"] == EXPENDITURE_SOURCE_OECD) + & (tb_oecd["spending_type"] == SPENDING_TYPE_OECD) + & (tb_oecd["programme_type_category"] == PROGRAMME_TYPE_OECD) + ].reset_index(drop=True) + + # Keep only the necessary columns + tb_oecd = tb_oecd[["country", "year", "share_gdp"]] + + # Save the countries available in the OECD dataset + countries_oecd = list(tb_oecd["country"].unique()) + + # Merge the three tables + tb = pr.merge(tb_oecd, tb_oecd_1985, on=["country", "year"], how="outer", suffixes=("", "_oecd_1985")) + tb = pr.merge(tb, tb_lindert, on=["country", "year"], how="outer", suffixes=("", "_lindert")) + + # Rename share_gdp to share_gdp_oecd + tb = tb.rename(columns={"share_gdp": "share_gdp_oecd"}) + + # Keep only countries available in the OECD dataset + tb = tb[tb["country"].isin(countries_oecd)].reset_index(drop=True) + + # Merge the three series, by applying the growth retroactively + tb = create_estimations_from_growth(tb=tb, reference_var_suffix="_oecd_1985", to_adjust_var_suffix="_oecd") + + # Fill data from Lindert where there is no data in share_gdp + tb["share_gdp"] = tb["share_gdp"].fillna(tb["share_gdp_lindert"]) + + # Keep only the necessary columns + tb = tb[["country", "year", "share_gdp"]] + + # Improve table format. + tb = tb.format(["country", "year"], short_name="social_expenditure_omm") + + # + # Save outputs. + # + # Initialize a new garden dataset. + ds_garden = paths.create_dataset(tables=[tb], default_metadata=ds_oecd.metadata) + + # Save garden dataset. + ds_garden.save() + + +def create_estimations_from_growth(tb: Table, reference_var_suffix: str, to_adjust_var_suffix: str) -> Table: + """ + Adjust estimations of variables according to the growth of a reference variable. + + Parameters + ---------- + tb : Table + Table that contains both the reference variable (the one the growth is extracted from) and the variable to be adjusted (the one the growth is applied to). + reference_var_suffix : str + Suffix of the reference variable (the one the growth is extracted from). In this project, "_mpd" or "_md". + to_adjust_var_suffix : str + Suffix of the variable to be adjusted (the one the growth is applied to). In this project, "_wdi" or "". + + Returns + ------- + tb : Table + Table with the adjusted variables. + """ + + # Save the original columns + columns_list = list(tb.columns) + + # Sort by country and year + tb = tb.sort_values(by=["country", "year"]).reset_index(drop=True) + + # Define the first year in common between the two series, share_gdp{reference_var_suffix} and share_gdp{to_adjust_var_suffix} + # First, define all the years in common between the two series + tb["years_in_common"] = tb.loc[ + tb[f"share_gdp{reference_var_suffix}"].notnull() & tb[f"share_gdp{to_adjust_var_suffix}"].notnull(), "year" + ] + + # Define the first year in common + tb["reference_year"] = tb.groupby("country")["years_in_common"].transform("min") + + # Get value from the reference variable in the reference year + tb["reference_value"] = tb.groupby("country")[f"share_gdp{reference_var_suffix}"].transform( + lambda x: x.loc[tb["year"] == tb["reference_year"]].iloc[0] + if not x.loc[tb["year"] == tb["reference_year"]].empty + else None + ) + + # The scalar is the previous value divided by the reference variable. This is the growth that will be applied retroactively to the variable to be adjusted. + tb["share_gdp_scalar"] = tb[f"share_gdp{reference_var_suffix}"] / tb["reference_value"] + + # Get value to be adjusted in the reference year + tb["to_adjust_value"] = tb.groupby("country")[f"share_gdp{to_adjust_var_suffix}"].transform( + lambda x: x.loc[tb["year"] == tb["reference_year"]].iloc[0] + if not x.loc[tb["year"] == tb["reference_year"]].empty + else None + ) + + # The estimated values are the division between the reference value and the scalars. This is the variable to be adjusted effectively adjusted by the growth of the reference variable. + tb["share_gdp_estimated"] = tb["to_adjust_value"] * tb["share_gdp_scalar"] + + # Rename the estimated variables without the suffix + tb["share_gdp"] = tb[f"share_gdp{to_adjust_var_suffix}"].astype("Float64").fillna(tb["share_gdp_estimated"]) + + # Keep only new variables + if "share_gdp" not in columns_list: + columns_list.append("share_gdp") + + tb = tb[columns_list] + + return tb diff --git a/etl/steps/data/grapher/social_expenditure/2025-03-07/social_expenditure_omm.py b/etl/steps/data/grapher/social_expenditure/2025-03-07/social_expenditure_omm.py new file mode 100644 index 00000000000..80374836ef1 --- /dev/null +++ b/etl/steps/data/grapher/social_expenditure/2025-03-07/social_expenditure_omm.py @@ -0,0 +1,26 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run() -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("social_expenditure_omm") + + # Read table from garden dataset. + tb = ds_garden.read("social_expenditure_omm", reset_index=False) + + # + # Save outputs. + # + # Initialize a new grapher dataset. + ds_grapher = paths.create_dataset(tables=[tb], default_metadata=ds_garden.metadata) + + # Save grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/oecd/2025-03-07/social_expenditure_1985.py b/etl/steps/data/meadow/oecd/2025-03-07/social_expenditure_1985.py new file mode 100644 index 00000000000..4a475bff41d --- /dev/null +++ b/etl/steps/data/meadow/oecd/2025-03-07/social_expenditure_1985.py @@ -0,0 +1,70 @@ +"""Load a snapshot and create a meadow dataset.""" + +import pandas as pd + +from etl.helpers import PathFinder + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + +# Define columns and their new names +COLUMNS_WITH_VALUES = { + "Education": "education", + "Health": "health", + "Pensions": "pensions", + "Unemp": "unemployment", + "Other soc exp.'": "other_social_expenditure", + "Tot soc exp. \n(with educ)": "total_social_expenditure_with_education", + "GDP": "gdp", +} + + +def run() -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("social_expenditure_1985.xlsx") + + # Load data from snapshot. + tb = snap.read(sheet_name="OECD 1985 original", header=[0, 1]) + + # + # Process data. + # + # Move the first level of columns to the rows + tb = tb.stack(level=0, future_stack=True).reset_index(drop=False) + + # Rename Unnamed: 0_level_1 to year and level_1 to country + tb = tb.rename(columns={"Unnamed: 0_level_1": "year", "level_1": "country"}) + + # Drop level_0 + tb = tb.drop(columns=["level_0"]) + + # Fill missing rows in year with the first non-missing value + tb["year"] = tb["year"].ffill() + + # Drop the country value Unnamed: 0_level_0 + tb = tb[tb["country"] != "Unnamed: 0_level_0"].reset_index(drop=True) + + # Rename relevant columns + tb = tb.rename(columns=COLUMNS_WITH_VALUES) + + # Make all the columns with values numeric + for col in COLUMNS_WITH_VALUES.values(): + tb[col] = tb[col].apply(pd.to_numeric, errors="coerce") + + # Keep only the columns we are interested in + tb = tb[["country", "year"] + list(COLUMNS_WITH_VALUES.values())] + + # Improve tables format. + tables = [tb.format(["country", "year"])] + + # + # Save outputs. + # + # Initialize a new meadow dataset. + ds_meadow = paths.create_dataset(tables=tables, default_metadata=snap.metadata) + + # Save meadow dataset. + ds_meadow.save() diff --git a/etl/steps/data/meadow/social_expenditure/2025-03-07/lindert.py b/etl/steps/data/meadow/social_expenditure/2025-03-07/lindert.py new file mode 100644 index 00000000000..7be850e1235 --- /dev/null +++ b/etl/steps/data/meadow/social_expenditure/2025-03-07/lindert.py @@ -0,0 +1,35 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run() -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("lindert.csv") + + # Load data from snapshot. + tb = snap.read() + + # + # Process data. + # + # Make the table long, with columns "country" and "year". + tb = tb.melt(id_vars=["country"], var_name="year", value_name="share_gdp") + + # Improve tables format. + tables = [tb.format(["country", "year"])] + + # + # Save outputs. + # + # Initialize a new meadow dataset. + ds_meadow = paths.create_dataset(tables=tables, default_metadata=snap.metadata) + + # Save meadow dataset. + ds_meadow.save() diff --git a/snapshots/health_expenditure/2025-03-04/lindert.csv.dvc b/snapshots/health_expenditure/2025-03-04/lindert.csv.dvc index 8dff9c51002..9085bb0569b 100644 --- a/snapshots/health_expenditure/2025-03-04/lindert.csv.dvc +++ b/snapshots/health_expenditure/2025-03-04/lindert.csv.dvc @@ -5,7 +5,7 @@ meta: # Data product / Snapshot title: The rise of social spending (1880-1930) description: |- - A closer look at the dawn of social spending before 1930 reinterprets the timing, sources, and effects of its rise. New data and tests suggest that income growth played less of a role in shaping the rise of social transfers than did democracy, demography, and religion. Even in that early half-century the aging of the adult population was a leading force raising government transfers, especially pensions, and cutting support for schooling. + A closer look at the dawn of social spending before 1930 reinterprets the timing, sources, and effects of its rise. New data and tests suggest that income growth played less of a role in shaping the rise of social transfers than did democracy, demography, and religion. Even in that early half-century the aging of the adult population was a leading force raising government transfers, especially pensions, and cutting support for schooling. date_published: "1994-01-01" title_snapshot: The rise of social spending (1880-1930) - Table 1D description_snapshot: |- @@ -14,7 +14,7 @@ meta: # Citation producer: Lindert citation_full: |- - Lindert, P. H. (1994). The Rise of Social Spending, 1880-1930. Explorations in Economic History, 31(1), 1-37. https://doi.org/10.1006/exeh.1994.1001 + Lindert, P. H. (1994). The Rise of Social Spending, 1880-1930. Table 1D. Explorations in Economic History, 31(1), 1-37. https://doi.org/10.1006/exeh.1994.1001 attribution_short: Lindert # Files diff --git a/snapshots/oecd/2025-03-07/social_expenditure_1985.py b/snapshots/oecd/2025-03-07/social_expenditure_1985.py new file mode 100644 index 00000000000..3b781e5aa92 --- /dev/null +++ b/snapshots/oecd/2025-03-07/social_expenditure_1985.py @@ -0,0 +1,35 @@ +""" +Script to create a snapshot of dataset. + +The file was extracted from the tables in Annex C - OECD social expenditure statistics in the book available in the Internet Archive: +https://archive.org/details/socialexpenditur0000unse + +I uploaded the data available in the "OECD 1985 original" sheet in the Google Sheets: +https://docs.google.com/spreadsheets/d/112vwOK9WIAc0s-yfeLUvjhC-cVP1swPR/edit?gid=498003109#gid=498003109 + +That table didn't include the complete data for Finland and Sweden, so I added the data from the book. +""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +@click.option("--path-to-file", "-f", prompt=True, type=str, help="Path to local data file.") +def main(path_to_file: str, upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"oecd/{SNAPSHOT_VERSION}/social_expenditure_1985.xlsx") + + # Copy local data file to snapshots data folder, add file to DVC and upload to S3. + snap.create_snapshot(filename=path_to_file, upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/oecd/2025-03-07/social_expenditure_1985.xlsx.dvc b/snapshots/oecd/2025-03-07/social_expenditure_1985.xlsx.dvc new file mode 100644 index 00000000000..43e57b4f446 --- /dev/null +++ b/snapshots/oecd/2025-03-07/social_expenditure_1985.xlsx.dvc @@ -0,0 +1,28 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: "Social Expenditure 1960-1990: Problems of Growth and Control" + description: |- + After the OECD Conference on Social Policies in the 1980s, held in October 1980, and in light of the discussion of budgetary issues by the Economic Policy Committee, a Group of Experts on the Growth and Control of Social Expenditure met at the OECD in October 1983 under the auspices of the Manpower and Social Affairs Committee. The Chairman of the Group, Mr. R. Maldague of the Belgian Planning Commission, presented a report on the meeting to the 60th Session of the MSA Committee in December 1983. The Committee then recommended that the Secretariat prepare a report on social expenditure. After review by members of the Committee in July 1984, this report is published on the responsibility of the Secretary-General. + date_published: "1985" + + # Citation + producer: OECD + citation_full: |- + OECD (1985). Social Expenditure 1960-1990: Problems of Growth and Control. OECD Social Policy Studies. + + # Files + url_main: https://archive.org/details/socialexpenditur0000unse + date_accessed: 2025-03-07 + + # License + license: + name: CC BY 4.0 + url: https://archive.org/details/socialexpenditur0000unse + +outs: + - md5: d0581e9ff1f0875683cc4485fcbba056 + size: 110821 + path: social_expenditure_1985.xlsx diff --git a/snapshots/social_expenditure/2025-03-07/lindert.csv.dvc b/snapshots/social_expenditure/2025-03-07/lindert.csv.dvc new file mode 100644 index 00000000000..7325ba81b09 --- /dev/null +++ b/snapshots/social_expenditure/2025-03-07/lindert.csv.dvc @@ -0,0 +1,32 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: "Growing Public: Social Spending and Economic Growth since the Eighteenth Century" + description: |- + Growing Public examines the question of whether social policies that redistribute income impose constraints on economic growth. Taxes and transfers have been debated for centuries, but only now can we get a clear view of the whole evolution of social spending. What kept prospering nations from using taxes for social programs until the end of the nineteenth century? Why did taxes and spending then grow so much, and what are the prospects for social spending in this century? Why did North America become a leader in public education in some ways and not others? Lindert finds answers in the economic history and logic of political voice, population aging, and income growth. Contrary to traditional beliefs, the net national costs of government social programs are virtually zero. This book not only shows that no Darwinian mechanism has punished the welfare states, but uses history to explain why this surprising result makes sense. Contrary to the intuition of many economists and the ideology of many politicians, social spending has contributed to, rather than inhibited, economic growth. + date_published: "2004" + title_snapshot: "Growing Public: Social Spending and Economic Growth since the Eighteenth Century - Table 1.2" + description_snapshot: |- + Social Transfers in OECD Countries, 1880–1995, as Percentages of Gross Domestic Product at Current Prices. + + # Citation + producer: Lindert + citation_full: |- + Lindert, P. H. (2004). Growing Public: Social Spending and Economic Growth since the Eighteenth Century. Table 1.2. Cambridge University Press. https://doi.org/10.1017/CBO9780511510717 + attribution_short: Lindert + + # Files + url_main: https://www.cambridge.org/core/books/growing-public/EAF17EB3BDFB5A6568930DBEC2CD1218 + date_accessed: 2025-03-07 + + # License + license: + name: CC BY 4.0 + url: https://www.cambridge.org/core/books/growing-public/EAF17EB3BDFB5A6568930DBEC2CD1218 + +outs: + - md5: 78b4f9341ac773ece655caa2de53f579 + size: 651 + path: lindert.csv diff --git a/snapshots/social_expenditure/2025-03-07/lindert.py b/snapshots/social_expenditure/2025-03-07/lindert.py new file mode 100644 index 00000000000..332a2579962 --- /dev/null +++ b/snapshots/social_expenditure/2025-03-07/lindert.py @@ -0,0 +1,32 @@ +""" +Script to create a snapshot of dataset. + +The file was extracted from Table 1.2 in this book: +https://www.cambridge.org/core/books/growing-public/EAF17EB3BDFB5A6568930DBEC2CD1218 + +The table was replicated manually into a CSV file. +""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +@click.option("--path-to-file", "-f", prompt=True, type=str, help="Path to local data file.") +def main(path_to_file: str, upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"social_expenditure/{SNAPSHOT_VERSION}/lindert.csv") + + # Copy local data file to snapshots data folder, add file to DVC and upload to S3. + snap.create_snapshot(filename=path_to_file, upload=upload) + + +if __name__ == "__main__": + main()