From 0be995ca5c586000011e5dbdeaf0aad063bd2112 Mon Sep 17 00:00:00 2001 From: "martin.holmer@gmail.com" Date: Mon, 27 May 2024 10:12:40 -0400 Subject: [PATCH 1/6] Add reweighting option to create_taxcalc_input_variables.py --- .../create_taxcalc_input_variables.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tax_microdata_benchmarking/create_taxcalc_input_variables.py b/tax_microdata_benchmarking/create_taxcalc_input_variables.py index 2b9fae25..18c3f1a7 100644 --- a/tax_microdata_benchmarking/create_taxcalc_input_variables.py +++ b/tax_microdata_benchmarking/create_taxcalc_input_variables.py @@ -2,17 +2,15 @@ Construct tmd.csv, a Tax-Calculator-style input variable file for 2021. """ +TAXYEAR = 2021 +DO_REWEIGHTING = True INITIAL_W2_WAGES_SCALE = 0.32051 INCLUDE_ORIGINAL_WEIGHTS = True -def create_variable_file( - initial_pt_w2_wages_scale=INITIAL_W2_WAGES_SCALE, - create_from_scratch=False, - write_file=True, -): +def create_variable_file(write_file=True): """ - Create Tax-Calculator-style input variable file for 2021. + Create Tax-Calculator-style input variable file for TAXYEAR. """ import taxcalc as tc from tax_microdata_benchmarking.datasets.policyengine.puf_ecps import ( @@ -23,13 +21,13 @@ def create_variable_file( ) from tax_microdata_benchmarking.storage import STORAGE_FOLDER - taxyear = 2021 # construct dataframe containing input and output variables print(f"Creating {taxyear} PUF-ECPS file using initial pt_w2_wages_scale") vdf = create_puf_ecps_flat_file( - target_year=taxyear, - pt_w2_wages_scale=initial_pt_w2_wages_scale, - from_scratch=create_from_scratch, + target_year=TAXYEAR, + reweight=DO_REWEIGHTING, + pt_w2_wages_scale=INITIAL_W2_WAGES_SCALE, + from_scratch=False, ) vdf.FLPDYR = taxyear (vdf, pt_w2_wages_scale) = add_pt_w2_wages(vdf) From 813120d679756700ef0055e6dc02ff55be123ece Mon Sep 17 00:00:00 2001 From: "martin.holmer@gmail.com" Date: Mon, 27 May 2024 10:36:58 -0400 Subject: [PATCH 2/6] Fix typos in create_taxcalc_input_variables.py --- .../create_taxcalc_input_variables.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tax_microdata_benchmarking/create_taxcalc_input_variables.py b/tax_microdata_benchmarking/create_taxcalc_input_variables.py index 18c3f1a7..ef00626c 100644 --- a/tax_microdata_benchmarking/create_taxcalc_input_variables.py +++ b/tax_microdata_benchmarking/create_taxcalc_input_variables.py @@ -22,20 +22,23 @@ def create_variable_file(write_file=True): from tax_microdata_benchmarking.storage import STORAGE_FOLDER # construct dataframe containing input and output variables - print(f"Creating {taxyear} PUF-ECPS file using initial pt_w2_wages_scale") + print(f"Creating {TAXYEAR} PUF-ECPS file assuming:") + print(f" DO_REWEIGHTING = {DO_REWEIGHTING}") + print(f" INITIAL_W2_WAGES_SCALE = {INITIAL_W2_WAGES_SCALE:.5f}") + print(f" INCLUDE_ORIGINAL_WEIGHTS = {INCLUDE_ORIGINAL_WEIGHTS}") vdf = create_puf_ecps_flat_file( target_year=TAXYEAR, reweight=DO_REWEIGHTING, pt_w2_wages_scale=INITIAL_W2_WAGES_SCALE, from_scratch=False, ) - vdf.FLPDYR = taxyear + vdf.FLPDYR = TAXYEAR (vdf, pt_w2_wages_scale) = add_pt_w2_wages(vdf) - abs_diff = abs(pt_w2_wages_scale - initial_pt_w2_wages_scale) + abs_diff = abs(pt_w2_wages_scale - INITIAL_W2_WAGES_SCALE) if abs_diff > 1e-6: msg = ( f"\nFINAL vs INITIAL scale diff = {abs_diff:.6f}" - f"\n INITIAL pt_w2_wages_scale = {initial_pt_w2_wages_scale:.6f}" + f"\n INITIAL pt_w2_wages_scale = {INITIAL_W2_WAGES_SCALE:.6f}" f"\n FINAL pt_w2_wages_scale = {pt_w2_wages_scale:.6f}" ) raise ValueError(msg) @@ -46,7 +49,7 @@ def create_variable_file(write_file=True): # streamline dataframe so that it includes only input variables rec = tc.Records( data=vdf, - start_year=taxyear, + start_year=TAXYEAR, gfactors=None, weights=None, adjust_ratios=None, From 2d44c1b268544cb4be35fc4c3046dad76eaba141 Mon Sep 17 00:00:00 2001 From: "martin.holmer@gmail.com" Date: Mon, 27 May 2024 11:21:14 -0400 Subject: [PATCH 3/6] Always include s006_original variable --- tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py b/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py index e919eeca..0cee2eaa 100644 --- a/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py +++ b/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py @@ -46,14 +46,15 @@ def create_puf_ecps_flat_file( stacked_file["PT_binc_w2_wages"] = ( qbi * pt_w2_wages_scale # Solved in 2021 using adjust_qbi.py ) + + stacked_file["s006_original"] = stacked_file.s006 + stacked_file = add_taxcalc_outputs(stacked_file, target_year) if reweight: from tax_microdata_benchmarking.utils.reweight import ( reweight, ) - stacked_file["s006_original"] = stacked_file.s006 - if target_year > 2021: path_to_21 = STORAGE_FOLDER / "output" / "puf_ecps_2021.csv.gz" if path_to_21.exists(): From 955caf88b839ed08ae9ec6690feec79fa5ae3dd9 Mon Sep 17 00:00:00 2001 From: "martin.holmer@gmail.com" Date: Mon, 27 May 2024 11:53:09 -0400 Subject: [PATCH 4/6] Add pytest.mark for a test --- tests/test_flat_file.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_flat_file.py b/tests/test_flat_file.py index 0c98e0d1..bab41839 100644 --- a/tests/test_flat_file.py +++ b/tests/test_flat_file.py @@ -132,6 +132,7 @@ def test_tax_expenditure_estimates( ), f"{reform} differs to official estimates by {estimate / target - 1:.1%} ({estimate:.1f}bn vs {target:.1f}bn)" +@pytest.mark.create_tmd_file def test_create_taxcalc_tmd_file(): from tax_microdata_benchmarking.create_taxcalc_input_variables import ( create_variable_file, From 59f36172808c0abc8cce46dbc8bdcc1610fe354c Mon Sep 17 00:00:00 2001 From: "martin.holmer@gmail.com" Date: Mon, 27 May 2024 12:33:59 -0400 Subject: [PATCH 5/6] Undo changes in puf_ecps.py script --- tax_microdata_benchmarking/create_taxcalc_input_variables.py | 5 ++++- tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py | 5 +++-- tests/test_flat_file.py | 1 - 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tax_microdata_benchmarking/create_taxcalc_input_variables.py b/tax_microdata_benchmarking/create_taxcalc_input_variables.py index ef00626c..e437c0a2 100644 --- a/tax_microdata_benchmarking/create_taxcalc_input_variables.py +++ b/tax_microdata_benchmarking/create_taxcalc_input_variables.py @@ -55,7 +55,10 @@ def create_variable_file(write_file=True): adjust_ratios=None, ) weights = vdf.s006.copy() - original_weights = vdf.s006_original.copy() + if DO_REWEIGHTING and write_file: + original_weights = vdf.s006_original.copy() + else: + original_weights = vdf.s006.copy() vdf.drop(columns=rec.IGNORED_VARS, inplace=True) # round all float variables to nearest integer except for weights vdf = vdf.astype(int) diff --git a/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py b/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py index 0cee2eaa..00b0ce23 100644 --- a/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py +++ b/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py @@ -47,14 +47,15 @@ def create_puf_ecps_flat_file( qbi * pt_w2_wages_scale # Solved in 2021 using adjust_qbi.py ) - stacked_file["s006_original"] = stacked_file.s006 - stacked_file = add_taxcalc_outputs(stacked_file, target_year) + if reweight: from tax_microdata_benchmarking.utils.reweight import ( reweight, ) + stacked_file["s006_original"] = stacked_file.s006 + if target_year > 2021: path_to_21 = STORAGE_FOLDER / "output" / "puf_ecps_2021.csv.gz" if path_to_21.exists(): diff --git a/tests/test_flat_file.py b/tests/test_flat_file.py index bab41839..0c98e0d1 100644 --- a/tests/test_flat_file.py +++ b/tests/test_flat_file.py @@ -132,7 +132,6 @@ def test_tax_expenditure_estimates( ), f"{reform} differs to official estimates by {estimate / target - 1:.1%} ({estimate:.1f}bn vs {target:.1f}bn)" -@pytest.mark.create_tmd_file def test_create_taxcalc_tmd_file(): from tax_microdata_benchmarking.create_taxcalc_input_variables import ( create_variable_file, From 6c9e9829636f4972d966523a976fb0058c59a085 Mon Sep 17 00:00:00 2001 From: "martin.holmer@gmail.com" Date: Mon, 27 May 2024 13:00:20 -0400 Subject: [PATCH 6/6] Eliminate extraneous blank lines --- tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py b/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py index 00b0ce23..e919eeca 100644 --- a/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py +++ b/tax_microdata_benchmarking/datasets/policyengine/puf_ecps.py @@ -46,9 +46,7 @@ def create_puf_ecps_flat_file( stacked_file["PT_binc_w2_wages"] = ( qbi * pt_w2_wages_scale # Solved in 2021 using adjust_qbi.py ) - stacked_file = add_taxcalc_outputs(stacked_file, target_year) - if reweight: from tax_microdata_benchmarking.utils.reweight import ( reweight,