Merge in recent changes on master branch

PSLmodels · Oct 27, 2024 · 959594b · 959594b
2 parents f805fa1 + a559002
commit 959594b
Show file tree

Hide file tree

Showing 6 changed files with 75 additions and 29 deletions.
diff --git a/taxcalc/__init__.py b/taxcalc/__init__.py
@@ -14,4 +14,6 @@
 from taxcalc.utils import *
 from taxcalc.cli import *
 
-__version__ = '4.3.0'
+__version__ = '4.3.0e'
+__min_python3_version__ = 10
+__max_python3_version__ = 12
diff --git a/taxcalc/cli/tc.py b/taxcalc/cli/tc.py
@@ -138,11 +138,23 @@ def cli_tc_main():
                         default=False,
                         action="store_true")
     args = parser.parse_args()
-    # show Tax-Calculator version and quit if --version option specified
+    # check Python version
+    pyv = sys.version_info
+    pymin = tc.__min_python3_version__
+    pymax = tc.__max_python3_version__
+    if pyv[0] != 3 or pyv[1] < pymin or pyv[1] > pymax:  # pragma: no cover
+        pyreq = f'at least Python 3.{pymin} and at most Python 3.{pymax}'
+        sys.stderr.write(
+            f'ERROR: Tax-Calculator requires {pyreq}\n'
+            f'       but Python {pyv[0]}.{pyv[1]} is installed\n'
+        )
+        return 1
+    # show Tax-Calculator version and quit if --version option is specified
     if args.version:
-        sys.stdout.write('Tax-Calculator {}\n'.format(tc.__version__))
+        pyver = f'Python 3.{pyv[1]}'
+        sys.stdout.write(f'Tax-Calculator {tc.__version__} on {pyver}\n')
         return 0
-    # write test input and expected output files if --test option specified
+    # write test input and expected output files if --test option is specified
     if args.test:
         _write_expected_test_output()
         inputfn = TEST_INPUT_FILENAME
@@ -181,7 +193,7 @@ def cli_tc_main():
     dumpvar_set = None
     if args.dvars and (args.dump or args.sqldb):
         if os.path.exists(args.dvars):
-            with open(args.dvars) as dfile:
+            with open(args.dvars, 'r', encoding='utf-8') as dfile:
                 dump_vars_str = dfile.read()
             dumpvar_set = tcio.custom_dump_variables(dump_vars_str)
             if tcio.errmsg:
@@ -213,8 +225,8 @@ def cli_tc_main():
 # end of cli_tc_main function code
 
 
-EXPECTED_TEST_OUTPUT_FILENAME = 'test-{}-out.csv'.format(str(TEST_TAXYEAR)[2:])
-ACTUAL_TEST_OUTPUT_FILENAME = 'test-{}-#-#-#.csv'.format(str(TEST_TAXYEAR)[2:])
+EXPECTED_TEST_OUTPUT_FILENAME = f'test-{str(TEST_TAXYEAR)[2:]}-out.csv'
+ACTUAL_TEST_OUTPUT_FILENAME = f'test-{str(TEST_TAXYEAR)[2:]}-#-#-#.csv'
 
 
 def _write_expected_test_output():
@@ -226,14 +238,14 @@ def _write_expected_test_output():
         '1,       2,   3,  1, 40000,  40000,      0,     0,  3000,  4000\n'
         '2,       2,   3,  1,200000, 200000,      0,     0, 15000, 20000\n'
     )
-    with open(TEST_INPUT_FILENAME, 'w') as ifile:
+    with open(TEST_INPUT_FILENAME, 'w', encoding='utf-8') as ifile:
         ifile.write(input_data)
     expected_output_data = (
         'RECID,YEAR,WEIGHT,INCTAX,LSTAX,PAYTAX\n'
         '1,2018,0.00,131.88,0.00,6120.00\n'
         '2,2018,0.00,28879.00,0.00,21721.60\n'
     )
-    with open(EXPECTED_TEST_OUTPUT_FILENAME, 'w') as ofile:
+    with open(EXPECTED_TEST_OUTPUT_FILENAME, 'w', encoding='utf-8') as ofile:
         ofile.write(expected_output_data)
 
 
@@ -242,8 +254,12 @@ def _compare_test_output_files():
     Private function that compares expected and actual tc --test output files;
     returns 0 if pass test, otherwise returns 1.
     """
-    explines = open(EXPECTED_TEST_OUTPUT_FILENAME, 'r').readlines()
-    actlines = open(ACTUAL_TEST_OUTPUT_FILENAME, 'r').readlines()
+    explines = open(
+        EXPECTED_TEST_OUTPUT_FILENAME, 'r', encoding='utf-8'
+    ).readlines()
+    actlines = open(
+        ACTUAL_TEST_OUTPUT_FILENAME, 'r', encoding='utf-8'
+    ).readlines()
     if ''.join(explines) == ''.join(actlines):
         sys.stdout.write('PASSED TEST\n')
         retcode = 0

diff --git a/taxcalc/data.py b/taxcalc/data.py
@@ -42,6 +42,13 @@ class Data():
         NOTE: when using custom weights, set this argument to a DataFrame.
         NOTE: assumes weights are integers that are 100 times the real weights.
 
+    weights_scale: float
+        specifies the weights scaling factor used to convert contents
+        of weights file into the s006 variable.  PUF and CPS input data
+        generated in the taxdata repository use a weights_scale of 0.01,
+        while TMD input data generated in the tax-microdata repository
+        use a 1.0 weights_scale value.
+
     Raises
     ------
     ValueError:
@@ -66,7 +73,8 @@ class instance: Data
     VARINFO_FILE_NAME = None
     VARINFO_FILE_PATH = None
 
-    def __init__(self, data, start_year, gfactors=None, weights=None):
+    def __init__(self, data, start_year, gfactors=None,
+                 weights=None, weights_scale=0.01):
         # initialize data variable info sets and read variable information
         self.INTEGER_READ_VARS = set()
         self.MUST_READ_VARS = set()
@@ -97,6 +105,7 @@ def __init__(self, data, start_year, gfactors=None, weights=None):
             self.gfactors = gfactors
             # read sample weights
             self.WT = None
+            self.weights_scale = weights_scale
             if self.__aging_data:
                 self._read_weights(weights)
                 # ... weights must be same size as data
@@ -114,7 +123,7 @@ def __init__(self, data, start_year, gfactors=None, weights=None):
                 assert wt_colname in self.WT.columns, (
                     f'no weights for start year {self.current_year}'
                 )
-                self.s006 = self.WT[wt_colname] * 0.01
+                self.s006 = self.WT[wt_colname] * self.weights_scale
 
     @property
     def data_year(self):
@@ -152,7 +161,7 @@ def increment_year(self):
             assert wt_colname in self.WT.columns, (
                 f'no weights for new year {self.current_year}'
             )
-            self.s006 = self.WT[wt_colname] * 0.01
+            self.s006 = self.WT[wt_colname] * self.weights_scale
 
     # ----- begin private methods of Data class -----
 
@@ -260,7 +269,6 @@ def _read_weights(self, weights):
         Read sample weights from file or
         use specified DataFrame as weights or
         create empty DataFrame if None.
-        NOTE: assumes weights are integers equal to 100 times the real weight.
         """
         if weights is None:
             return
@@ -276,7 +284,7 @@ def _read_weights(self, weights):
             msg = 'weights is not None or a string or a Pandas DataFrame'
             raise ValueError(msg)
         assert isinstance(WT, pd.DataFrame)
-        setattr(self, 'WT', WT.astype(np.int32))
+        setattr(self, 'WT', WT.astype(np.float64))
         del WT
 
     def _extrapolate(self, year):

diff --git a/taxcalc/records.py b/taxcalc/records.py
@@ -53,7 +53,7 @@ class Records(Data):
         None creates empty sample-weights DataFrame;
         default value is filename of the PUF weights.
         NOTE: when using custom weights, set this argument to a DataFrame.
-        NOTE: assumes weights are integers that are 100 times the real weights.
+        NOTE: see weights_scale documentation below.
 
     adjust_ratios: string or Pandas DataFrame or None
         string describes CSV file in which adjustment ratios reside;
@@ -69,6 +69,13 @@ class Records(Data):
         any smoothing of stair-step provisions in income tax law;
         default value is false.
 
+    weights_scale: float
+        specifies the weights scaling factor used to convert contents
+        of weights file into the s006 variable.  PUF and CPS input data
+        generated in the taxdata repository use a weights_scale of 0.01,
+        while TMD input data generated in the tax-microdata repository
+        use a 1.0 weights_scale value.
+
     Raises
     ------
     ValueError:
@@ -127,11 +134,12 @@ def __init__(self,
                  gfactors=GrowFactors(),
                  weights=PUF_WEIGHTS_FILENAME,
                  adjust_ratios=PUF_RATIOS_FILENAME,
-                 exact_calculations=False):
+                 exact_calculations=False,
+                 weights_scale=0.01):
         # pylint: disable=no-member,too-many-branches
         if isinstance(weights, str):
             weights = os.path.join(Records.CODE_PATH, weights)
-        super().__init__(data, start_year, gfactors, weights)
+        super().__init__(data, start_year, gfactors, weights, weights_scale)
         if data is None:
             return  # because there are no data
         # read adjustment ratios
@@ -228,7 +236,7 @@ def tmd_constructor(
             data_path: Path,
             weights_path: Path,
             growfactors_path: Path,
-            exact_calculations=False
+            exact_calculations=False,
     ):  # pragma: no cover
         """
         Static method returns a Records object instantiated with TMD
@@ -250,6 +258,7 @@ def tmd_constructor(
             gfactors=GrowFactors(growfactors_filename=str(growfactors_path)),
             adjust_ratios=None,
             exact_calculations=exact_calculations,
+            weights_scale=1.0,
         )
 
     def increment_year(self):

diff --git a/taxcalc/taxcalcio.py b/taxcalc/taxcalcio.py
@@ -355,15 +355,17 @@ def init(self, input_data, tax_year, baseline, reform, assump,
                     weights=wghts,
                     gfactors=gfactors_ref,
                     adjust_ratios=None,
-                    exact_calculations=exact_calculations
+                    exact_calculations=exact_calculations,
+                    weights_scale=1.0,
                 )
                 recs_base = Records(
                     data=pd.read_csv(input_data),
                     start_year=Records.TMDCSV_YEAR,
                     weights=wghts,
                     gfactors=gfactors_base,
                     adjust_ratios=None,
-                    exact_calculations=exact_calculations
+                    exact_calculations=exact_calculations,
+                    weights_scale=1.0,
                 )
             else:  # if not {cps|tmd}_input_data but aging_input_data: puf
                 recs = Records(
@@ -548,8 +550,17 @@ def write_output_file(self, output_dump, dump_varset,
             outdf = self.minimal_output()
             column_order = outdf.columns
         assert len(outdf.index) == self.calc.array_len
-        outdf.to_csv(self._output_filename, columns=column_order,
-                     index=False, float_format='%.2f')
+        if self.tmd_input_data:  # pragma: no cover
+            if "s006" in outdf:
+                weights = outdf["s006"].round(5)
+            outdf = outdf.round(2)
+            if "s006" in outdf:
+                outdf["s006"] = weights
+            outdf.to_csv(self._output_filename, columns=column_order,
+                         index=False)
+        else:
+            outdf.to_csv(self._output_filename, columns=column_order,
+                         index=False, float_format='%.2f')
         del outdf
         gc.collect()
 
@@ -786,8 +797,8 @@ def dump_output(self, calcx, dump_varset, mtr_inctax, mtr_paytax):
             vardata = calcx.array(varname)
             if varname in recs_vinfo.INTEGER_VARS:
                 odf[varname] = vardata
-            else:
-                odf[varname] = vardata.round(2)  # rounded to nearest cent
+            else:  # specify precision that can handle small TMD area weights
+                odf[varname] = vardata.round(5)
             odf = odf.copy()
         # specify mtr values in percentage terms
         if 'mtr_inctax' in varset:

diff --git a/taxcalc/tests/test_benefits.py b/taxcalc/tests/test_benefits.py
@@ -77,9 +77,9 @@ def test_benefits(tests_path, cps_fullsample):
     if diffs:
         msg = 'CPS BENEFITS RESULTS DIFFER\n'
         msg += '-------------------------------------------------\n'
-        msg += '--- NEW RESULTS IN benefits_actual.txt FILE   ---\n'
-        msg += '--- if new OK, copy benefits_actual.txt to    ---\n'
-        msg += '---                 benefits_expect.txt       ---\n'
+        msg += '--- NEW RESULTS IN benefits_actual.csv FILE   ---\n'
+        msg += '--- if new OK, copy benefits_actual.csv to    ---\n'
+        msg += '---                 benefits_expect.csv       ---\n'
         msg += '---            and rerun test.                ---\n'
         msg += '-------------------------------------------------\n'
         raise ValueError(msg)