Skip to content

Commit

Permalink
Add basic pension contributions imputation
Browse files Browse the repository at this point in the history
  • Loading branch information
nikhilwoodruff committed Jun 19, 2024
1 parent 482e4be commit 007cce6
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 1 deletion.
10 changes: 9 additions & 1 deletion tax_microdata_benchmarking/datasets/puf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from survey_enhance import Imputation
from microdf import MicroDataFrame
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
from tax_microdata_benchmarking.utils.pension_contributions import (
impute_pension_contributions_to_puf,
)

DEFAULT_W2_WAGE_RATE = 0.19824 # Solved for JCT Tax Expenditures in 2021

Expand Down Expand Up @@ -246,6 +249,7 @@ def preprocess_puf(puf: pd.DataFrame) -> pd.DataFrame:
"savers_credit",
"recapture_of_investment_credit",
"unreported_payroll_tax",
"pre_tax_contributions",
]


Expand All @@ -268,6 +272,10 @@ def generate(self, puf: pd.DataFrame, demographics: pd.DataFrame):
puf = preprocess_puf(puf)
print("Imputing missing demographics...")
puf = impute_missing_demographics(puf, demographics)
print("Imputing pension contributions...")
puf["pre_tax_contributions"] = impute_pension_contributions_to_puf(
puf[["employment_income"]]
)

# Sort in original PUF order
puf = puf.set_index("RECID").loc[original_recid].reset_index()
Expand Down Expand Up @@ -467,5 +475,5 @@ def create_pe_puf_2021():


if __name__ == "__main__":
# create_pe_puf_2015()
create_pe_puf_2015()
create_pe_puf_2021()
19 changes: 19 additions & 0 deletions tax_microdata_benchmarking/utils/pension_contributions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from survey_enhance import Imputation
from policyengine_us import Microsimulation
from tax_microdata_benchmarking.datasets.cps import CPS_2021


def impute_pension_contributions_to_puf(puf_df):

cps = Microsimulation(dataset=CPS_2021)
cps_df = cps.calculate_dataframe(
["employment_income", "household_weight", "pre_tax_contributions"]
)

pension_contributions = Imputation()
pension_contributions.train(
X=cps_df[["employment_income"]],
Y=cps_df[["pre_tax_contributions"]],
sample_weight=cps_df["household_weight"],
)
return pension_contributions.predict(X=puf_df[["employment_income"]])

0 comments on commit 007cce6

Please sign in to comment.