Skip to content

Commit

Permalink
Enhance tracing capabilities
Browse files Browse the repository at this point in the history
  • Loading branch information
martinholmer committed Jun 26, 2024
1 parent d3af0b7 commit 420bce0
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 5 deletions.
4 changes: 2 additions & 2 deletions tax_microdata_benchmarking/datasets/puf.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,14 +446,14 @@ def add_dependent(self, row, tax_unit_id, dependent_id):


class PUF_2015(PUF):
label = "PUF (2015)"
label = "PUF 2015"
name = "puf_2015"
time_period = 2015
file_path = STORAGE_FOLDER / "output" / "pe_puf_2015.h5"


class PUF_2021(PUF):
label = "PUF (2021)"
label = "PUF 2021"
name = "puf_2021"
time_period = 2021
file_path = STORAGE_FOLDER / "output" / "pe_puf_2021.h5"
Expand Down
2 changes: 1 addition & 1 deletion tax_microdata_benchmarking/datasets/taxcalc_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def create_tc_dataset(pe_dataset: Type, year: int = 2015) -> pd.DataFrame:
pe_sim = Microsimulation(dataset=pe_dataset)
df = pd.DataFrame()

print(f"Creating tc dataset for year {year}...")
print(f"Creating tc dataset from {pe_dataset.label} for year {year}...")

is_non_dep = ~pe_sim.calculate("is_tax_unit_dependent").values
tax_unit = pe_sim.populations["tax_unit"]
Expand Down
14 changes: 12 additions & 2 deletions tax_microdata_benchmarking/utils/trace.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
This module provides tracing utilities for working with the repository.
This module provides tracing utilities for working with this repository.
"""

import pandas as pd
Expand All @@ -11,7 +11,7 @@ def trace1(loc: str, vdf: pd.DataFrame) -> None:
Args:
loc (str): Identifies location of call to trace1.
vdf (DataFrame): Contains variable to tabulate.
vdf (DataFrame): Contains variables to tabulate.
Returns:
None
Expand All @@ -25,11 +25,21 @@ def trace1(loc: str, vdf: pd.DataFrame) -> None:
wtot = wght.sum() * 1e-6
wpuf = (wght * filer).sum() * 1e-6
wcps = (wght * ~filer).sum() * 1e-6
wght_min = wght.min()
wght_max = wght.max()
print(f">{loc} weights tot,puf,cps (#M)= {wtot:.3f} {wpuf:.3f} {wcps:.3f}")
print(f">{loc} weights tot_min,tot_max (#)= {wght_min:.1f} {wght_max:.1f}")
# PT_binc_w2_wages tabulations
w2wages = vdf.PT_binc_w2_wages
wages_min = w2wages.min()
wages_max = w2wages.max()
wages_wtot = (wght * w2wages).sum() * 1e-9
print(f">{loc} W2_wages min,max ($)= {wages_min:.0f} {wages_max:.0f}")
print(f">{loc} total weighted W2_wages ($B)= {wages_wtot:.3f}")
# QBID tabulations
if "qbided" in vdf:
qbid = vdf.qbided
qbid_wtot = (wght * qbid).sum() * 1e-9
print(f">{loc} total weighted QBID ($B)= {qbid_wtot:.3f}")
else:
print(f">{loc} QBID not in DataFrame")

0 comments on commit 420bce0

Please sign in to comment.