diff --git a/tax_microdata_benchmarking/datasets/puf.py b/tax_microdata_benchmarking/datasets/puf.py index 076de76b..cf4e8d89 100644 --- a/tax_microdata_benchmarking/datasets/puf.py +++ b/tax_microdata_benchmarking/datasets/puf.py @@ -446,14 +446,14 @@ def add_dependent(self, row, tax_unit_id, dependent_id): class PUF_2015(PUF): - label = "PUF (2015)" + label = "PUF 2015" name = "puf_2015" time_period = 2015 file_path = STORAGE_FOLDER / "output" / "pe_puf_2015.h5" class PUF_2021(PUF): - label = "PUF (2021)" + label = "PUF 2021" name = "puf_2021" time_period = 2021 file_path = STORAGE_FOLDER / "output" / "pe_puf_2021.h5" diff --git a/tax_microdata_benchmarking/datasets/taxcalc_dataset.py b/tax_microdata_benchmarking/datasets/taxcalc_dataset.py index 8aabe1df..c387dc66 100644 --- a/tax_microdata_benchmarking/datasets/taxcalc_dataset.py +++ b/tax_microdata_benchmarking/datasets/taxcalc_dataset.py @@ -14,7 +14,7 @@ def create_tc_dataset(pe_dataset: Type, year: int = 2015) -> pd.DataFrame: pe_sim = Microsimulation(dataset=pe_dataset) df = pd.DataFrame() - print(f"Creating tc dataset for year {year}...") + print(f"Creating tc dataset from {pe_dataset.label} for year {year}...") is_non_dep = ~pe_sim.calculate("is_tax_unit_dependent").values tax_unit = pe_sim.populations["tax_unit"] diff --git a/tax_microdata_benchmarking/utils/trace.py b/tax_microdata_benchmarking/utils/trace.py index d643987c..65750961 100644 --- a/tax_microdata_benchmarking/utils/trace.py +++ b/tax_microdata_benchmarking/utils/trace.py @@ -1,5 +1,5 @@ """ -This module provides tracing utilities for working with the repository. +This module provides tracing utilities for working with this repository. """ import pandas as pd @@ -11,7 +11,7 @@ def trace1(loc: str, vdf: pd.DataFrame) -> None: Args: loc (str): Identifies location of call to trace1. - vdf (DataFrame): Contains variable to tabulate. + vdf (DataFrame): Contains variables to tabulate. Returns: None @@ -25,7 +25,10 @@ def trace1(loc: str, vdf: pd.DataFrame) -> None: wtot = wght.sum() * 1e-6 wpuf = (wght * filer).sum() * 1e-6 wcps = (wght * ~filer).sum() * 1e-6 + wght_min = wght.min() + wght_max = wght.max() print(f">{loc} weights tot,puf,cps (#M)= {wtot:.3f} {wpuf:.3f} {wcps:.3f}") + print(f">{loc} weights tot_min,tot_max (#)= {wght_min:.1f} {wght_max:.1f}") # PT_binc_w2_wages tabulations w2wages = vdf.PT_binc_w2_wages wages_min = w2wages.min() @@ -33,3 +36,10 @@ def trace1(loc: str, vdf: pd.DataFrame) -> None: wages_wtot = (wght * w2wages).sum() * 1e-9 print(f">{loc} W2_wages min,max ($)= {wages_min:.0f} {wages_max:.0f}") print(f">{loc} total weighted W2_wages ($B)= {wages_wtot:.3f}") + # QBID tabulations + if "qbided" in vdf: + qbid = vdf.qbided + qbid_wtot = (wght * qbid).sum() * 1e-9 + print(f">{loc} total weighted QBID ($B)= {qbid_wtot:.3f}") + else: + print(f">{loc} QBID not in DataFrame")