Skip to content

Commit

Permalink
Merge pull request #109 from PSLmodels/add-trace
Browse files Browse the repository at this point in the history
Add utils/trace.py module and use in datasets/tmd.py module
  • Loading branch information
martinholmer authored Jun 26, 2024
2 parents 13447c8 + f4438d3 commit d3af0b7
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 0 deletions.
8 changes: 8 additions & 0 deletions tax_microdata_benchmarking/datasets/tmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from tax_microdata_benchmarking.datasets.taxcalc_dataset import (
create_tc_dataset,
)
from tax_microdata_benchmarking.utils.trace import trace1
from tax_microdata_benchmarking.utils.taxcalc_utils import add_taxcalc_outputs
from tax_microdata_benchmarking.utils.reweight import reweight
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
Expand Down Expand Up @@ -33,13 +34,20 @@ def create_tmd_2021():

combined = pd.concat([tc_puf_21, tc_cps_21], ignore_index=True)

trace1("A", combined)

# Add Tax-Calculator outputs
print("Adding Tax-Calculator outputs...")
combined = add_taxcalc_outputs(combined, 2021)
combined["s006_original"] = combined.s006.values

trace1("B", combined)

print("Reweighting...")
combined = reweight(combined, 2021, weight_deviation_penalty=0)

trace1("C", combined)

return combined


Expand Down
35 changes: 35 additions & 0 deletions tax_microdata_benchmarking/utils/trace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""
This module provides tracing utilities for working with the repository.
"""

import pandas as pd


def trace1(loc: str, vdf: pd.DataFrame) -> None:
"""
Write to stdout loc and trace1 tabulation of specified DataFrame.
Args:
loc (str): Identifies location of call to trace1.
vdf (DataFrame): Contains variable to tabulate.
Returns:
None
"""
tracing = True
if not tracing:
return
# weight tabulations
wght = vdf.s006
filer = vdf.data_source == 1
wtot = wght.sum() * 1e-6
wpuf = (wght * filer).sum() * 1e-6
wcps = (wght * ~filer).sum() * 1e-6
print(f">{loc} weights tot,puf,cps (#M)= {wtot:.3f} {wpuf:.3f} {wcps:.3f}")
# PT_binc_w2_wages tabulations
w2wages = vdf.PT_binc_w2_wages
wages_min = w2wages.min()
wages_max = w2wages.max()
wages_wtot = (wght * w2wages).sum() * 1e-9
print(f">{loc} W2_wages min,max ($)= {wages_min:.0f} {wages_max:.0f}")
print(f">{loc} total weighted W2_wages ($B)= {wages_wtot:.3f}")

0 comments on commit d3af0b7

Please sign in to comment.