Skip to content

Commit

Permalink
Merge in recent changes on master branch
Browse files Browse the repository at this point in the history
  • Loading branch information
martinholmer committed Oct 27, 2024
2 parents f805fa1 + a559002 commit 959594b
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 29 deletions.
4 changes: 3 additions & 1 deletion taxcalc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@
from taxcalc.utils import *
from taxcalc.cli import *

__version__ = '4.3.0'
__version__ = '4.3.0e'
__min_python3_version__ = 10
__max_python3_version__ = 12
36 changes: 26 additions & 10 deletions taxcalc/cli/tc.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,23 @@ def cli_tc_main():
default=False,
action="store_true")
args = parser.parse_args()
# show Tax-Calculator version and quit if --version option specified
# check Python version
pyv = sys.version_info
pymin = tc.__min_python3_version__
pymax = tc.__max_python3_version__
if pyv[0] != 3 or pyv[1] < pymin or pyv[1] > pymax: # pragma: no cover
pyreq = f'at least Python 3.{pymin} and at most Python 3.{pymax}'
sys.stderr.write(
f'ERROR: Tax-Calculator requires {pyreq}\n'
f' but Python {pyv[0]}.{pyv[1]} is installed\n'
)
return 1
# show Tax-Calculator version and quit if --version option is specified
if args.version:
sys.stdout.write('Tax-Calculator {}\n'.format(tc.__version__))
pyver = f'Python 3.{pyv[1]}'
sys.stdout.write(f'Tax-Calculator {tc.__version__} on {pyver}\n')
return 0
# write test input and expected output files if --test option specified
# write test input and expected output files if --test option is specified
if args.test:
_write_expected_test_output()
inputfn = TEST_INPUT_FILENAME
Expand Down Expand Up @@ -181,7 +193,7 @@ def cli_tc_main():
dumpvar_set = None
if args.dvars and (args.dump or args.sqldb):
if os.path.exists(args.dvars):
with open(args.dvars) as dfile:
with open(args.dvars, 'r', encoding='utf-8') as dfile:
dump_vars_str = dfile.read()
dumpvar_set = tcio.custom_dump_variables(dump_vars_str)
if tcio.errmsg:
Expand Down Expand Up @@ -213,8 +225,8 @@ def cli_tc_main():
# end of cli_tc_main function code


EXPECTED_TEST_OUTPUT_FILENAME = 'test-{}-out.csv'.format(str(TEST_TAXYEAR)[2:])
ACTUAL_TEST_OUTPUT_FILENAME = 'test-{}-#-#-#.csv'.format(str(TEST_TAXYEAR)[2:])
EXPECTED_TEST_OUTPUT_FILENAME = f'test-{str(TEST_TAXYEAR)[2:]}-out.csv'
ACTUAL_TEST_OUTPUT_FILENAME = f'test-{str(TEST_TAXYEAR)[2:]}-#-#-#.csv'


def _write_expected_test_output():
Expand All @@ -226,14 +238,14 @@ def _write_expected_test_output():
'1, 2, 3, 1, 40000, 40000, 0, 0, 3000, 4000\n'
'2, 2, 3, 1,200000, 200000, 0, 0, 15000, 20000\n'
)
with open(TEST_INPUT_FILENAME, 'w') as ifile:
with open(TEST_INPUT_FILENAME, 'w', encoding='utf-8') as ifile:
ifile.write(input_data)
expected_output_data = (
'RECID,YEAR,WEIGHT,INCTAX,LSTAX,PAYTAX\n'
'1,2018,0.00,131.88,0.00,6120.00\n'
'2,2018,0.00,28879.00,0.00,21721.60\n'
)
with open(EXPECTED_TEST_OUTPUT_FILENAME, 'w') as ofile:
with open(EXPECTED_TEST_OUTPUT_FILENAME, 'w', encoding='utf-8') as ofile:
ofile.write(expected_output_data)


Expand All @@ -242,8 +254,12 @@ def _compare_test_output_files():
Private function that compares expected and actual tc --test output files;
returns 0 if pass test, otherwise returns 1.
"""
explines = open(EXPECTED_TEST_OUTPUT_FILENAME, 'r').readlines()
actlines = open(ACTUAL_TEST_OUTPUT_FILENAME, 'r').readlines()
explines = open(
EXPECTED_TEST_OUTPUT_FILENAME, 'r', encoding='utf-8'
).readlines()
actlines = open(
ACTUAL_TEST_OUTPUT_FILENAME, 'r', encoding='utf-8'
).readlines()
if ''.join(explines) == ''.join(actlines):
sys.stdout.write('PASSED TEST\n')
retcode = 0
Expand Down
18 changes: 13 additions & 5 deletions taxcalc/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ class Data():
NOTE: when using custom weights, set this argument to a DataFrame.
NOTE: assumes weights are integers that are 100 times the real weights.
weights_scale: float
specifies the weights scaling factor used to convert contents
of weights file into the s006 variable. PUF and CPS input data
generated in the taxdata repository use a weights_scale of 0.01,
while TMD input data generated in the tax-microdata repository
use a 1.0 weights_scale value.
Raises
------
ValueError:
Expand All @@ -66,7 +73,8 @@ class instance: Data
VARINFO_FILE_NAME = None
VARINFO_FILE_PATH = None

def __init__(self, data, start_year, gfactors=None, weights=None):
def __init__(self, data, start_year, gfactors=None,
weights=None, weights_scale=0.01):
# initialize data variable info sets and read variable information
self.INTEGER_READ_VARS = set()
self.MUST_READ_VARS = set()
Expand Down Expand Up @@ -97,6 +105,7 @@ def __init__(self, data, start_year, gfactors=None, weights=None):
self.gfactors = gfactors
# read sample weights
self.WT = None
self.weights_scale = weights_scale
if self.__aging_data:
self._read_weights(weights)
# ... weights must be same size as data
Expand All @@ -114,7 +123,7 @@ def __init__(self, data, start_year, gfactors=None, weights=None):
assert wt_colname in self.WT.columns, (
f'no weights for start year {self.current_year}'
)
self.s006 = self.WT[wt_colname] * 0.01
self.s006 = self.WT[wt_colname] * self.weights_scale

@property
def data_year(self):
Expand Down Expand Up @@ -152,7 +161,7 @@ def increment_year(self):
assert wt_colname in self.WT.columns, (
f'no weights for new year {self.current_year}'
)
self.s006 = self.WT[wt_colname] * 0.01
self.s006 = self.WT[wt_colname] * self.weights_scale

# ----- begin private methods of Data class -----

Expand Down Expand Up @@ -260,7 +269,6 @@ def _read_weights(self, weights):
Read sample weights from file or
use specified DataFrame as weights or
create empty DataFrame if None.
NOTE: assumes weights are integers equal to 100 times the real weight.
"""
if weights is None:
return
Expand All @@ -276,7 +284,7 @@ def _read_weights(self, weights):
msg = 'weights is not None or a string or a Pandas DataFrame'
raise ValueError(msg)
assert isinstance(WT, pd.DataFrame)
setattr(self, 'WT', WT.astype(np.int32))
setattr(self, 'WT', WT.astype(np.float64))
del WT

def _extrapolate(self, year):
Expand Down
17 changes: 13 additions & 4 deletions taxcalc/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class Records(Data):
None creates empty sample-weights DataFrame;
default value is filename of the PUF weights.
NOTE: when using custom weights, set this argument to a DataFrame.
NOTE: assumes weights are integers that are 100 times the real weights.
NOTE: see weights_scale documentation below.
adjust_ratios: string or Pandas DataFrame or None
string describes CSV file in which adjustment ratios reside;
Expand All @@ -69,6 +69,13 @@ class Records(Data):
any smoothing of stair-step provisions in income tax law;
default value is false.
weights_scale: float
specifies the weights scaling factor used to convert contents
of weights file into the s006 variable. PUF and CPS input data
generated in the taxdata repository use a weights_scale of 0.01,
while TMD input data generated in the tax-microdata repository
use a 1.0 weights_scale value.
Raises
------
ValueError:
Expand Down Expand Up @@ -127,11 +134,12 @@ def __init__(self,
gfactors=GrowFactors(),
weights=PUF_WEIGHTS_FILENAME,
adjust_ratios=PUF_RATIOS_FILENAME,
exact_calculations=False):
exact_calculations=False,
weights_scale=0.01):
# pylint: disable=no-member,too-many-branches
if isinstance(weights, str):
weights = os.path.join(Records.CODE_PATH, weights)
super().__init__(data, start_year, gfactors, weights)
super().__init__(data, start_year, gfactors, weights, weights_scale)
if data is None:
return # because there are no data
# read adjustment ratios
Expand Down Expand Up @@ -228,7 +236,7 @@ def tmd_constructor(
data_path: Path,
weights_path: Path,
growfactors_path: Path,
exact_calculations=False
exact_calculations=False,
): # pragma: no cover
"""
Static method returns a Records object instantiated with TMD
Expand All @@ -250,6 +258,7 @@ def tmd_constructor(
gfactors=GrowFactors(growfactors_filename=str(growfactors_path)),
adjust_ratios=None,
exact_calculations=exact_calculations,
weights_scale=1.0,
)

def increment_year(self):
Expand Down
23 changes: 17 additions & 6 deletions taxcalc/taxcalcio.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,15 +355,17 @@ def init(self, input_data, tax_year, baseline, reform, assump,
weights=wghts,
gfactors=gfactors_ref,
adjust_ratios=None,
exact_calculations=exact_calculations
exact_calculations=exact_calculations,
weights_scale=1.0,
)
recs_base = Records(
data=pd.read_csv(input_data),
start_year=Records.TMDCSV_YEAR,
weights=wghts,
gfactors=gfactors_base,
adjust_ratios=None,
exact_calculations=exact_calculations
exact_calculations=exact_calculations,
weights_scale=1.0,
)
else: # if not {cps|tmd}_input_data but aging_input_data: puf
recs = Records(
Expand Down Expand Up @@ -548,8 +550,17 @@ def write_output_file(self, output_dump, dump_varset,
outdf = self.minimal_output()
column_order = outdf.columns
assert len(outdf.index) == self.calc.array_len
outdf.to_csv(self._output_filename, columns=column_order,
index=False, float_format='%.2f')
if self.tmd_input_data: # pragma: no cover
if "s006" in outdf:
weights = outdf["s006"].round(5)
outdf = outdf.round(2)
if "s006" in outdf:
outdf["s006"] = weights
outdf.to_csv(self._output_filename, columns=column_order,
index=False)
else:
outdf.to_csv(self._output_filename, columns=column_order,
index=False, float_format='%.2f')
del outdf
gc.collect()

Expand Down Expand Up @@ -786,8 +797,8 @@ def dump_output(self, calcx, dump_varset, mtr_inctax, mtr_paytax):
vardata = calcx.array(varname)
if varname in recs_vinfo.INTEGER_VARS:
odf[varname] = vardata
else:
odf[varname] = vardata.round(2) # rounded to nearest cent
else: # specify precision that can handle small TMD area weights
odf[varname] = vardata.round(5)
odf = odf.copy()
# specify mtr values in percentage terms
if 'mtr_inctax' in varset:
Expand Down
6 changes: 3 additions & 3 deletions taxcalc/tests/test_benefits.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ def test_benefits(tests_path, cps_fullsample):
if diffs:
msg = 'CPS BENEFITS RESULTS DIFFER\n'
msg += '-------------------------------------------------\n'
msg += '--- NEW RESULTS IN benefits_actual.txt FILE ---\n'
msg += '--- if new OK, copy benefits_actual.txt to ---\n'
msg += '--- benefits_expect.txt ---\n'
msg += '--- NEW RESULTS IN benefits_actual.csv FILE ---\n'
msg += '--- if new OK, copy benefits_actual.csv to ---\n'
msg += '--- benefits_expect.csv ---\n'
msg += '--- and rerun test. ---\n'
msg += '-------------------------------------------------\n'
raise ValueError(msg)
Expand Down

0 comments on commit 959594b

Please sign in to comment.