From 3a8d97b135f3a6612c5e38dd3cc6bcf9ab986597 Mon Sep 17 00:00:00 2001 From: Nikhil Woodruff Date: Wed, 19 Jun 2024 10:48:59 +0100 Subject: [PATCH] Add calibration improvements too --- Makefile | 3 +++ tax_microdata_benchmarking/utils/reweight.py | 2 +- .../utils/soi_replication.py | 15 +++++++++------ 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 5a50de6d..cc4a0268 100644 --- a/Makefile +++ b/Makefile @@ -18,3 +18,6 @@ data: install flat-file test documentation: jb build docs/book + +reweighting-visualisation: + tensorboard --logdir=tax_microdata_benchmarking/storage/output/reweighting diff --git a/tax_microdata_benchmarking/utils/reweight.py b/tax_microdata_benchmarking/utils/reweight.py index 3dae7c75..762e93fb 100644 --- a/tax_microdata_benchmarking/utils/reweight.py +++ b/tax_microdata_benchmarking/utils/reweight.py @@ -118,7 +118,7 @@ def build_loss_matrix(df): / f"{time_period}_{datetime.now().isoformat()}" ) - for i in tqdm(range(1_000), desc="Optimising weights"): + for i in tqdm(range(10_000), desc="Optimising weights"): optimizer.zero_grad() outputs = (weights * output_matrix_tensor.T).sum(axis=1) weight_deviation = ( diff --git a/tax_microdata_benchmarking/utils/soi_replication.py b/tax_microdata_benchmarking/utils/soi_replication.py index a06eb242..e458268a 100644 --- a/tax_microdata_benchmarking/utils/soi_replication.py +++ b/tax_microdata_benchmarking/utils/soi_replication.py @@ -2,6 +2,9 @@ import numpy as np from tqdm import tqdm from tax_microdata_benchmarking.storage import STORAGE_FOLDER +import warnings + +warnings.filterwarnings("ignore") soi = pd.read_csv(STORAGE_FOLDER / "input" / "soi.csv") @@ -16,7 +19,7 @@ def pe_to_soi(pe_dataset, year): pe_sim.calculate(variable, map_to="tax_unit") ) - df["agi"] = pe("adjusted_gross_income") + df["adjusted_gross_income"] = pe("adjusted_gross_income") df["exemption"] = pe("exemptions") df["itemded"] = pe("itemized_taxable_income_deductions") df["income_tax_after_credits"] = pe("income_tax") @@ -82,7 +85,7 @@ def pe_to_soi(pe_dataset, year): def puf_to_soi(puf, year): df = pd.DataFrame() - df["agi"] = puf.E00100 + df["adjusted_gross_income"] = puf.E00100 df["total_income_tax"] = puf.E06500 df["employment_income"] = puf.E00200 df["capital_gains_distributions"] = puf.E01100 @@ -148,7 +151,7 @@ def tc_to_soi(puf, year): puf.columns = puf.columns.str.upper() - df["agi"] = puf.C00100 + df["adjusted_gross_income"] = puf.C00100 df["total_income_tax"] = puf.C05800 df["employment_income"] = puf.E00200 df["capital_gains_distributions"] = puf.E01100 @@ -215,12 +218,12 @@ def compare_soi_replication_to_soi(df, year): if row.Variable not in df.columns: continue - subset = df[df.agi >= row["AGI lower bound"]][ - df.agi < row["AGI upper bound"] + subset = df[df.adjusted_gross_income >= row["AGI lower bound"]][ + df.adjusted_gross_income < row["AGI upper bound"] ] if row["Variable"] == "count": - variable = "agi" + variable = "adjusted_gross_income" else: variable = row["Variable"]