Skip to content

Commit

Permalink
Merge pull request #201 from PSLmodels/taxcalc-upgrade
Browse files Browse the repository at this point in the history
Require Tax-Calculator version 4.2.2 or higher
  • Loading branch information
martinholmer authored Sep 16, 2024
2 parents f491c2e + a5ebf51 commit 78d81e8
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ install:

.PHONY=clean
clean:
rm -f tmd/storage/output/*
rm -f tmd/storage/output/tmd*

tmd/storage/output/tmd.csv.gz: \
setup.py \
Expand Down
5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@

setup(
name="tmd",
version="0.2.0",
version="0.3.0",
packages=find_packages(),
python_requires=">=3.10,<3.13",
install_requires=[
"policyengine_us==1.55.0",
"tables", # required by policyengine_us
"marshmallow<3.22", # to work around paramtools bug
"taxcalc>=4.2.1", # requires paramtools
"taxcalc>=4.2.2",
"scikit-learn",
"torch",
"tensorboard",
Expand Down
16 changes: 11 additions & 5 deletions tests/test_area_make.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Tests of tmd/areas/create_area_weights.py script.
Tests of tmd/areas/make_all.py script.
"""

import sys
Expand All @@ -12,10 +12,9 @@
@pytest.mark.skip
def test_area_make():
"""
Make area weights for faux bb area using the faux bb area targets.
Compare areas/weights/bb.log file with areas/weights/bb.log-expect file.
"""
make_all_areas(only_list=["bb"])
# compare area/weights/bb.log file with area/weights/bb.log-expect file
make_all_areas(make_only_list=["bb"])
wpath = AREAS_FOLDER / "weights"
with open(wpath / "bb.log", "r", encoding="utf-8") as afile:
act = afile.readlines()
Expand All @@ -25,5 +24,12 @@ def test_area_make():
context_diff(act, exp, fromfile="ACTUAL", tofile="EXPECT", n=0)
)
if len(diffs) > 0:
sys.stdout.write(">>>>> FULL FILE:\n")
sys.stdout.write("------------------------------------------------\n")
sys.stdout.writelines(act)
sys.stdout.write("------------------------------------------------\n")
sys.stdout.write(">>>>> DIFFS FILE:\n")
sys.stdout.write("------------------------------------------------\n")
sys.stdout.writelines(diffs)
raise ValueError("ACT vs EXP differences for area/weights/bb.log")
sys.stdout.write("------------------------------------------------\n")
raise ValueError("ACT vs EXP differences for areas/weights/bb.log")
4 changes: 2 additions & 2 deletions tmd/areas/make_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def time_of_newest_other_dependency():
# --- High-level logic of the script


def make_all_areas(only_list=None):
def make_all_areas(make_only_list=None):
"""
Call create_area_weights.py for each out-of-date or non-existent
weights file for which there is a targets file.
Expand All @@ -58,7 +58,7 @@ def make_all_areas(only_list=None):
tpaths = sorted(list(tfolder.glob("*_targets.csv")))
for tpath in tpaths:
area = tpath.name.split("_")[0]
if only_list and area not in only_list:
if make_only_list and area not in make_only_list:
continue # skip this area
wpath = AREAS_FOLDER / "weights" / f"{area}_tmd_weights.csv.gz"
if wpath.exists():
Expand Down
4 changes: 2 additions & 2 deletions tmd/areas/weights/bb.log-expect
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ target_matrix sparsity ratio = 0.597
OPTIMIZE WEIGHT RATIOS IN A REGULARIZATION LOOP
where REGULARIZATION DELTA starts at 1.000000e-09
and where target_matrix.shape= (225256, 16)
::loop,delta,misses,exectime(secs): 1 1.000000e-09 0 14.9
>>> final delta loop exectime= 14.9 secs iterations=168 success=True
::loop,delta,misses,exectime(secs): 1 1.000000e-09 0 24.3
>>> final delta loop exectime= 24.3 secs iterations=168 success=True
>>> message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
>>> L-BFGS-B optimized objective function value: 1.261425278e-04
AREA-OPTIMIZED_TARGET_MISSES= 0
Expand Down
65 changes: 65 additions & 0 deletions tmd/areas/weights/bb.log-expect-github
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
CREATING WEIGHTS FILE FOR AREA bb ...
INITIAL WEIGHTS STATISTICS:
weights_scale= 9.871864e-02
s006 wght_us
count 225256.000000 225256.000000
mean 816.957848 80.648965
std 1140.733386 112.611644
min 0.110000 0.010859
25% 23.590000 2.328773
50% 389.970000 38.497307
75% 1282.730000 126.629357
max 15801.890000 1559.941035
USING bb_targets.csv FILE CONTAINING 16 TARGETS
DISTRIBUTION OF TARGET ACT/EXP RATIOS (n=16):
low bin ratio high bin ratio bin # cum # bin % cum %
>= 0.400000, < 0.800000: 1 1 6.25% 6.25%
>= 0.800000, < 0.900000: 1 2 6.25% 12.50%
>= 0.900000, < 0.990000: 0 2 0.00% 12.50%
>= 0.990000, < 0.999500: 0 2 0.00% 12.50%
>= 0.999500, < 1.000500: 1 3 6.25% 18.75%
>= 1.000500, < 1.010000: 0 3 0.00% 18.75%
>= 1.010000, < 1.100000: 0 3 0.00% 18.75%
>= 1.100000, < 1.200000: 0 3 0.00% 18.75%
>= 1.200000, < 1.600000: 3 6 18.75% 37.50%
>= 1.600000, < 2.000000: 0 6 0.00% 37.50%
>= 2.000000, < 3.000000: 3 9 18.75% 56.25%
>= 3.000000, < 4.000000: 2 11 12.50% 68.75%
>= 4.000000, < 5.000000: 3 14 18.75% 87.50%
>= 5.000000, < inf: 2 16 12.50% 100.00%
US_PROPORTIONALLY_SCALED_TARGET_RMSE= 3.033037311e+00
target_matrix sparsity ratio = 0.597
OPTIMIZE WEIGHT RATIOS IN A REGULARIZATION LOOP
where REGULARIZATION DELTA starts at 1.000000e-09
and where target_matrix.shape= (225256, 16)
::loop,delta,misses,exectime(secs): 1 1.000000e-09 0 141.3
>>> final delta loop exectime= 141.3 secs iterations=148 success=True
>>> message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
>>> L-BFGS-B optimized objective function value: 1.267485693e-04
AREA-OPTIMIZED_TARGET_MISSES= 0
DISTRIBUTION OF TARGET ACT/EXP RATIOS (n=16):
with REGULARIZATION_DELTA= 1.000000e-09
low bin ratio high bin ratio bin # cum # bin % cum %
>= 0.999500, < 1.000500: 16 16 100.00% 100.00%
AREA-OPTIMIZED_TARGET_RMSE= 8.050660463e-05
DISTRIBUTION OF AREA/US WEIGHT RATIO (n=225256):
with REGULARIZATION_DELTA= 1.000000e-09
low bin ratio high bin ratio bin # cum # bin % cum %
>= 0.000000, < 0.000001: 195 195 0.09% 0.09%
>= 0.000001, < 0.100000: 51665 51860 22.94% 23.02%
>= 0.100000, < 0.200000: 7918 59778 3.52% 26.54%
>= 0.200000, < 0.500000: 18162 77940 8.06% 34.60%
>= 0.500000, < 0.800000: 28977 106917 12.86% 47.46%
>= 0.800000, < 0.850000: 9350 116267 4.15% 51.62%
>= 0.850000, < 0.900000: 12610 128877 5.60% 57.21%
>= 0.900000, < 0.950000: 14978 143855 6.65% 63.86%
>= 0.950000, < 1.000000: 18899 162754 8.39% 72.25%
>= 1.000000, < 1.050000: 11204 173958 4.97% 77.23%
>= 1.050000, < 1.100000: 5570 179528 2.47% 79.70%
>= 1.100000, < 1.150000: 4483 184011 1.99% 81.69%
>= 1.150000, < 1.200000: 4235 188246 1.88% 83.57%
>= 1.200000, < 2.000000: 28187 216433 12.51% 96.08%
>= 2.000000, < 5.000000: 8344 224777 3.70% 99.79%
>= 5.000000, < 10.000000: 426 225203 0.19% 99.98%
>= 10.000000, < 100.000000: 53 225256 0.02% 100.00%
SUM OF SQUARED AREA/US WEIGHT RATIO DEVIATIONS= 1.266449e+05
65 changes: 65 additions & 0 deletions tmd/areas/weights/bb.log-expect-mrh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
CREATING WEIGHTS FILE FOR AREA bb ...
INITIAL WEIGHTS STATISTICS:
weights_scale= 9.874809e-02
s006 wght_us
count 225256.000000 225256.000000
mean 816.774828 80.654957
std 1140.652664 112.637275
min 0.110000 0.010862
25% 23.570000 2.327493
50% 389.695000 38.481638
75% 1282.230000 126.617767
max 15801.890000 1560.406500
USING bb_targets.csv FILE CONTAINING 16 TARGETS
DISTRIBUTION OF TARGET ACT/EXP RATIOS (n=16):
low bin ratio high bin ratio bin # cum # bin % cum %
>= 0.400000, < 0.800000: 1 1 6.25% 6.25%
>= 0.800000, < 0.900000: 1 2 6.25% 12.50%
>= 0.900000, < 0.990000: 0 2 0.00% 12.50%
>= 0.990000, < 0.999500: 0 2 0.00% 12.50%
>= 0.999500, < 1.000500: 1 3 6.25% 18.75%
>= 1.000500, < 1.010000: 0 3 0.00% 18.75%
>= 1.010000, < 1.100000: 0 3 0.00% 18.75%
>= 1.100000, < 1.200000: 0 3 0.00% 18.75%
>= 1.200000, < 1.600000: 3 6 18.75% 37.50%
>= 1.600000, < 2.000000: 0 6 0.00% 37.50%
>= 2.000000, < 3.000000: 3 9 18.75% 56.25%
>= 3.000000, < 4.000000: 2 11 12.50% 68.75%
>= 4.000000, < 5.000000: 3 14 18.75% 87.50%
>= 5.000000, < inf: 2 16 12.50% 100.00%
US_PROPORTIONALLY_SCALED_TARGET_RMSE= 3.031008582e+00
target_matrix sparsity ratio = 0.597
OPTIMIZE WEIGHT RATIOS IN A REGULARIZATION LOOP
where REGULARIZATION DELTA starts at 1.000000e-09
and where target_matrix.shape= (225256, 16)
::loop,delta,misses,exectime(secs): 1 1.000000e-09 0 24.3
>>> final delta loop exectime= 24.3 secs iterations=168 success=True
>>> message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
>>> L-BFGS-B optimized objective function value: 1.261425278e-04
AREA-OPTIMIZED_TARGET_MISSES= 0
DISTRIBUTION OF TARGET ACT/EXP RATIOS (n=16):
with REGULARIZATION_DELTA= 1.000000e-09
low bin ratio high bin ratio bin # cum # bin % cum %
>= 0.999500, < 1.000500: 16 16 100.00% 100.00%
AREA-OPTIMIZED_TARGET_RMSE= 9.594020637e-05
DISTRIBUTION OF AREA/US WEIGHT RATIO (n=225256):
with REGULARIZATION_DELTA= 1.000000e-09
low bin ratio high bin ratio bin # cum # bin % cum %
>= 0.000000, < 0.000001: 633 633 0.28% 0.28%
>= 0.000001, < 0.100000: 50851 51484 22.57% 22.86%
>= 0.100000, < 0.200000: 8125 59609 3.61% 26.46%
>= 0.200000, < 0.500000: 17983 77592 7.98% 34.45%
>= 0.500000, < 0.800000: 28654 106246 12.72% 47.17%
>= 0.800000, < 0.850000: 9340 115586 4.15% 51.31%
>= 0.850000, < 0.900000: 12669 128255 5.62% 56.94%
>= 0.900000, < 0.950000: 15103 143358 6.70% 63.64%
>= 0.950000, < 1.000000: 19013 162371 8.44% 72.08%
>= 1.000000, < 1.050000: 11368 173739 5.05% 77.13%
>= 1.050000, < 1.100000: 5612 179351 2.49% 79.62%
>= 1.100000, < 1.150000: 4492 183843 1.99% 81.62%
>= 1.150000, < 1.200000: 4289 188132 1.90% 83.52%
>= 1.200000, < 2.000000: 28332 216464 12.58% 96.10%
>= 2.000000, < 5.000000: 8312 224776 3.69% 99.79%
>= 5.000000, < 10.000000: 428 225204 0.19% 99.98%
>= 10.000000, < 100.000000: 52 225256 0.02% 100.00%
SUM OF SQUARED AREA/US WEIGHT RATIO DEVIATIONS= 1.259953e+05
5 changes: 4 additions & 1 deletion tmd/storage/output/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Output files

Flat files suitable for input to Tax-Calculator.
Three national files suitable for input to Tax-Calculator:
- tmd.csv.gz (after gunzip)
- tmd_weights.csv.gz
- tmd_growfactors.csv

0 comments on commit 78d81e8

Please sign in to comment.