Skip to content

Commit

Permalink
Add calibration improvements too
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilwoodruff committed Jun 19, 2024
1 parent 007cce6 commit 3a8d97b
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 7 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,6 @@ data: install flat-file test

documentation:
jb build docs/book

reweighting-visualisation:
tensorboard --logdir=tax_microdata_benchmarking/storage/output/reweighting
2 changes: 1 addition & 1 deletion tax_microdata_benchmarking/utils/reweight.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def build_loss_matrix(df):
/ f"{time_period}_{datetime.now().isoformat()}"
)

for i in tqdm(range(1_000), desc="Optimising weights"):
for i in tqdm(range(10_000), desc="Optimising weights"):
optimizer.zero_grad()
outputs = (weights * output_matrix_tensor.T).sum(axis=1)
weight_deviation = (
Expand Down
15 changes: 9 additions & 6 deletions tax_microdata_benchmarking/utils/soi_replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
import numpy as np
from tqdm import tqdm
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
import warnings

warnings.filterwarnings("ignore")

soi = pd.read_csv(STORAGE_FOLDER / "input" / "soi.csv")

Expand All @@ -16,7 +19,7 @@ def pe_to_soi(pe_dataset, year):
pe_sim.calculate(variable, map_to="tax_unit")
)

df["agi"] = pe("adjusted_gross_income")
df["adjusted_gross_income"] = pe("adjusted_gross_income")
df["exemption"] = pe("exemptions")
df["itemded"] = pe("itemized_taxable_income_deductions")
df["income_tax_after_credits"] = pe("income_tax")
Expand Down Expand Up @@ -82,7 +85,7 @@ def pe_to_soi(pe_dataset, year):
def puf_to_soi(puf, year):
df = pd.DataFrame()

df["agi"] = puf.E00100
df["adjusted_gross_income"] = puf.E00100
df["total_income_tax"] = puf.E06500
df["employment_income"] = puf.E00200
df["capital_gains_distributions"] = puf.E01100
Expand Down Expand Up @@ -148,7 +151,7 @@ def tc_to_soi(puf, year):

puf.columns = puf.columns.str.upper()

df["agi"] = puf.C00100
df["adjusted_gross_income"] = puf.C00100
df["total_income_tax"] = puf.C05800
df["employment_income"] = puf.E00200
df["capital_gains_distributions"] = puf.E01100
Expand Down Expand Up @@ -215,12 +218,12 @@ def compare_soi_replication_to_soi(df, year):
if row.Variable not in df.columns:
continue

subset = df[df.agi >= row["AGI lower bound"]][
df.agi < row["AGI upper bound"]
subset = df[df.adjusted_gross_income >= row["AGI lower bound"]][
df.adjusted_gross_income < row["AGI upper bound"]
]

if row["Variable"] == "count":
variable = "agi"
variable = "adjusted_gross_income"
else:
variable = row["Variable"]

Expand Down

0 comments on commit 3a8d97b

Please sign in to comment.