diff --git a/tax_microdata_benchmarking/datasets/tmd.py b/tax_microdata_benchmarking/datasets/tmd.py index 9aad3a99..2dc61e98 100644 --- a/tax_microdata_benchmarking/datasets/tmd.py +++ b/tax_microdata_benchmarking/datasets/tmd.py @@ -6,6 +6,7 @@ from tax_microdata_benchmarking.datasets.taxcalc_dataset import ( create_tc_dataset, ) +from tax_microdata_benchmarking.utils.trace import trace1 from tax_microdata_benchmarking.utils.taxcalc_utils import add_taxcalc_outputs from tax_microdata_benchmarking.utils.reweight import reweight from tax_microdata_benchmarking.storage import STORAGE_FOLDER @@ -33,13 +34,20 @@ def create_tmd_2021(): combined = pd.concat([tc_puf_21, tc_cps_21], ignore_index=True) + trace1("A", combined) + # Add Tax-Calculator outputs print("Adding Tax-Calculator outputs...") combined = add_taxcalc_outputs(combined, 2021) combined["s006_original"] = combined.s006.values + + trace1("B", combined) + print("Reweighting...") combined = reweight(combined, 2021, weight_deviation_penalty=0) + trace1("C", combined) + return combined diff --git a/tax_microdata_benchmarking/utils/trace.py b/tax_microdata_benchmarking/utils/trace.py new file mode 100644 index 00000000..d643987c --- /dev/null +++ b/tax_microdata_benchmarking/utils/trace.py @@ -0,0 +1,35 @@ +""" +This module provides tracing utilities for working with the repository. +""" + +import pandas as pd + + +def trace1(loc: str, vdf: pd.DataFrame) -> None: + """ + Write to stdout loc and trace1 tabulation of specified DataFrame. + + Args: + loc (str): Identifies location of call to trace1. + vdf (DataFrame): Contains variable to tabulate. + + Returns: + None + """ + tracing = True + if not tracing: + return + # weight tabulations + wght = vdf.s006 + filer = vdf.data_source == 1 + wtot = wght.sum() * 1e-6 + wpuf = (wght * filer).sum() * 1e-6 + wcps = (wght * ~filer).sum() * 1e-6 + print(f">{loc} weights tot,puf,cps (#M)= {wtot:.3f} {wpuf:.3f} {wcps:.3f}") + # PT_binc_w2_wages tabulations + w2wages = vdf.PT_binc_w2_wages + wages_min = w2wages.min() + wages_max = w2wages.max() + wages_wtot = (wght * w2wages).sum() * 1e-9 + print(f">{loc} W2_wages min,max ($)= {wages_min:.0f} {wages_max:.0f}") + print(f">{loc} total weighted W2_wages ($B)= {wages_wtot:.3f}")