Merge pull request #166 from PSLmodels/rename-directory

Rename the tax_microdata_benchmarking directory as tmd
PSLmodels · Aug 22, 2024 · a549ad8 · a549ad8
2 parents 2312ea6 + 64809bb
commit a549ad8
Show file tree

Hide file tree

Showing 89 changed files with 126 additions and 137 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,14 +2,12 @@
 **/*.pyc
 **/*.csv.zip
 **/*.csv.gz
+**/*.csv
 **/*.egg-info
-tax_microdata_benchmarking/calibration
 **/_build/
 **/*tfevents*
-**/*.csv
-!tax_microdata_benchmarking/utils/uprating/*.csv*
-!tax_microdata_benchmarking/storage/input/*.csv
-tax_microdata_benchmarking/storage/output/tax_expenditures
+tmd/storage/output/tax_expenditures
+!tmd/storage/input/*.csv
 **demographics_2015.csv
 **puf_2015.csv
 *.DS_STORE
diff --git a/Makefile b/Makefile
@@ -1,44 +1,44 @@
 .PHONY=clean
 clean:
-	rm -f tax_microdata_benchmarking/storage/output/*
+	rm -f tmd/storage/output/*
 
 .PHONY=install
 install:
 	pip install -e .
-	python tax_microdata_benchmarking/download_prerequisites.py
+	python tmd/download_prerequisites.py
 
-tax_microdata_benchmarking/storage/output/tmd.csv.gz: \
-  tax_microdata_benchmarking/imputation_assumptions.py \
-  tax_microdata_benchmarking/datasets/tmd.py \
-  tax_microdata_benchmarking/datasets/puf.py \
-  tax_microdata_benchmarking/datasets/cps.py \
-  tax_microdata_benchmarking/datasets/taxcalc_dataset.py \
-  tax_microdata_benchmarking/utils/taxcalc_utils.py \
-  tax_microdata_benchmarking/utils/imputation.py \
-  tax_microdata_benchmarking/utils/is_tax_filer.py \
-  tax_microdata_benchmarking/utils/pension_contributions.py \
-  tax_microdata_benchmarking/utils/soi_replication.py \
-  tax_microdata_benchmarking/utils/soi_targets.py \
-  tax_microdata_benchmarking/utils/reweight.py \
-  tax_microdata_benchmarking/utils/trace.py \
-  tax_microdata_benchmarking/create_taxcalc_input_variables.py
-	python tax_microdata_benchmarking/create_taxcalc_input_variables.py
+tmd/storage/output/tmd.csv.gz: \
+  tmd/imputation_assumptions.py \
+  tmd/datasets/tmd.py \
+  tmd/datasets/puf.py \
+  tmd/datasets/cps.py \
+  tmd/datasets/taxcalc_dataset.py \
+  tmd/utils/taxcalc_utils.py \
+  tmd/utils/imputation.py \
+  tmd/utils/is_tax_filer.py \
+  tmd/utils/pension_contributions.py \
+  tmd/utils/soi_replication.py \
+  tmd/utils/soi_targets.py \
+  tmd/utils/reweight.py \
+  tmd/utils/trace.py \
+  tmd/create_taxcalc_input_variables.py
+	python tmd/create_taxcalc_input_variables.py
 
-tax_microdata_benchmarking/storage/output/tmd_growfactors.csv: \
-  tax_microdata_benchmarking/storage/input/puf_growfactors.csv \
-  tax_microdata_benchmarking/create_taxcalc_growth_factors.py
-	python tax_microdata_benchmarking/create_taxcalc_growth_factors.py
+tmd/storage/output/tmd_growfactors.csv: \
+  tmd/storage/input/puf_growfactors.csv \
+  tmd/create_taxcalc_growth_factors.py
+	python tmd/create_taxcalc_growth_factors.py
 
-tax_microdata_benchmarking/storage/output/tmd_weights.csv.gz: \
-  tax_microdata_benchmarking/storage/input/cbo_population_forecast.yaml \
-  tax_microdata_benchmarking/storage/output/tmd.csv.gz \
-  tax_microdata_benchmarking/create_taxcalc_sampling_weights.py
-	python tax_microdata_benchmarking/create_taxcalc_sampling_weights.py
+tmd/storage/output/tmd_weights.csv.gz: \
+  tmd/storage/input/cbo_population_forecast.yaml \
+  tmd/storage/output/tmd.csv.gz \
+  tmd/create_taxcalc_sampling_weights.py
+	python tmd/create_taxcalc_sampling_weights.py
 
 .PHONY=tmd_files
-tmd_files: tax_microdata_benchmarking/storage/output/tmd.csv.gz \
-  tax_microdata_benchmarking/storage/output/tmd_growfactors.csv \
-  tax_microdata_benchmarking/storage/output/tmd_weights.csv.gz
+tmd_files: tmd/storage/output/tmd.csv.gz \
+  tmd/storage/output/tmd_growfactors.csv \
+  tmd/storage/output/tmd_weights.csv.gz
 
 .PHONY=test
 test: tmd_files
@@ -57,10 +57,10 @@ documentation:
 
 .PHONY=reweighting-visualisation
 reweighting-visualisation:
-	tensorboard --logdir=tax_microdata_benchmarking/storage/output/reweighting
+	tensorboard --logdir=tmd/storage/output/reweighting
 
 .PHONY=tax-expenditures-report
 tax-expenditures-report: tmd_files
 	-pytest . --disable-warnings -m taxexp
-	diff tax_microdata_benchmarking/storage/output/tax_expenditures \
-             tax_microdata_benchmarking/examination/tax_expenditures
+	diff tmd/storage/output/tax_expenditures \
+             tmd/examination/tax_expenditures
diff --git a/README.md b/README.md
@@ -19,8 +19,8 @@ The two tokens can be obtained from [Don Boyd](mailto:[email protected]).
 To assess, review the data examination results that compare federal
 agency tax estimates with those generated using the microdata file
 created in each project phase: [phase 1
-results](./tax_microdata_benchmarking/examination/results1.md) and
+results](./tmd/examination/results1.md) and
 [phase 2
-results](./tax_microdata_benchmarking/examination/results2.md) and
+results](./tmd/examination/results2.md) and
 [phase 3
-results](./tax_microdata_benchmarking/examination/results3.md).
+results](./tmd/examination/results3.md).
diff --git a/app.py b/app.py
@@ -1,9 +1,10 @@
 import streamlit as st
-
-from tax_microdata_benchmarking.utils.soi_replication import *
-from tax_microdata_benchmarking.storage import STORAGE_FOLDER
-from tax_microdata_benchmarking.datasets import *
 import pandas as pd
+import plotly.express as px
+from tmd.datasets import *
+from tmd.utils.soi_replication import *
+from tmd.storage import STORAGE_FOLDER
+
 
 INPUTS = STORAGE_FOLDER / "input"
 OUTPUTS = STORAGE_FOLDER / "output"
@@ -54,8 +55,6 @@ def soi_statistic_passes_quality_test(df):
 
 st.dataframe(comparisons.sort_values("Absolute error", ascending=False))
 
-import plotly.express as px
-
 histogram = px.histogram(
     comparisons,
     x="Absolute error",

diff --git a/docs/app.py b/docs/app.py
@@ -1,9 +1,10 @@
 import streamlit as st
-
-from tax_microdata_benchmarking.utils.soi_replication import *
-from tax_microdata_benchmarking.storage import STORAGE_FOLDER
-from tax_microdata_benchmarking.datasets import *
 import pandas as pd
+import plotly.express as px
+from tmd.datasets import *
+from tmd.utils.soi_replication import *
+from tmd.storage import STORAGE_FOLDER
+
 
 INPUTS = STORAGE_FOLDER / "input"
 OUTPUTS = STORAGE_FOLDER / "output"
@@ -91,8 +92,6 @@ def soi_statistic_passes_quality_test(df):
 
 st.dataframe(comparisons.sort_values("Absolute error", ascending=False))
 
-import plotly.express as px
-
 histogram = px.histogram(
     comparisons,
     x="Absolute error",

diff --git a/docs/book/_config.yml b/docs/book/_config.yml
@@ -1,7 +1,7 @@
 # Book settings
 # Learn more at https://jupyterbook.org/customize/config.html
 
-title: Tax microdata benchmarking
+title: Tax microdata
 author: Policy Simulation Library
 
 # Force re-execution of notebooks on each build.

diff --git a/docs/book/intro.md b/docs/book/intro.md
@@ -1,4 +1,4 @@
-# Tax microdata benchmarking
+# tax-microdata
 
 This repository contains all working files for a project to develop a
 general-purpose validated microdata file for use in
@@ -12,15 +12,15 @@ To generate the microdata files:
 
 1. Run `export POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN=***`
 2. Run `export PSL_TAX_MICRODATA_RELEASE_AUTH_TOKEN=***`
-3. Run `make flat-file`
+3. Run `make data`
 
-The two environment tokens can be obtained from [Nikhil Woodruff](mailto:[email protected]).
+The two tokens can be obtained from [Don Boyd](mailto:[email protected]).
 
 To assess, review the data examination results that compare federal
 agency tax estimates with those generated using the microdata file
 created in each project phase: [phase 1
-results](./tax_microdata_benchmarking/examination/results1.md) and
+results](./tmd/examination/results1.md) and
 [phase 2
-results](./tax_microdata_benchmarking/examination/results2.md) and
-[VERY PRELIMINARY phase 3
-results](./tax_microdata_benchmarking/examination/results3.md).
+results](./tmd/examination/results2.md) and
+[phase 3
+results](./tmd/examination/results3.md).
diff --git a/docs/book/uprating.ipynb b/docs/book/uprating.ipynb
@@ -180,9 +180,9 @@
    ],
    "source": [
     "import pandas as pd\n",
-    "from tax_microdata_benchmarking.storage import STORAGE_FOLDER\n",
+    "from tmd.storage import STORAGE_FOLDER\n",
     "from microdf import MicroDataFrame\n",
-    "from tax_microdata_benchmarking.datasets.uprate_puf import (\n",
+    "from tmd.datasets.uprate_puf import (\n",
     "    SOI_TO_PUF_STRAIGHT_RENAMES,\n",
     "    SOI_TO_PUF_NEG_ONLY_RENAMES,\n",
     "    SOI_TO_PUF_POS_ONLY_RENAMES,\n",

diff --git a/docs/book/validation.ipynb b/docs/book/validation.ipynb
@@ -116,9 +116,9 @@
     }
    ],
    "source": [
-    "from tax_microdata_benchmarking.utils.soi_replication import *\n",
-    "from tax_microdata_benchmarking.storage import STORAGE_FOLDER\n",
-    "from tax_microdata_benchmarking.datasets import *\n",
+    "from tmd.utils.soi_replication import *\n",
+    "from tmd.storage import STORAGE_FOLDER\n",
+    "from tmd.datasets import *\n",
     "import pandas as pd\n",
     "\n",
     "INPUTS = STORAGE_FOLDER / \"input\"\n",

diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,8 @@
+[pytest]
+testpaths =
+    tmd
+markers =
+    vartotals
+    taxexp
+    qbid
+    taxexpdiffs
diff --git a/setup.py b/setup.py
@@ -1,8 +1,8 @@
 from setuptools import setup, find_packages
 
 setup(
-    name="tax_microdata_benchmarking",
-    version="0.1.0",
+    name="tmd",
+    version="0.2.0",
     packages=find_packages(),
     python_requires=">=3.10,<3.13",
     install_requires=[

diff --git a/tests/test_tmd_files.py b/tests/test_tmd_files.py
@@ -8,22 +8,19 @@
 import yaml
 from pathlib import Path
 import subprocess
-import warnings
 import difflib
 import numpy as np
 import pandas as pd
 import pytest
-from tax_microdata_benchmarking.storage import STORAGE_FOLDER
-from tax_microdata_benchmarking.create_taxcalc_input_variables import (
-    create_variable_file,
-)
-from tax_microdata_benchmarking.utils.taxcalc_utils import (
-    get_tax_expenditure_results,
-)
+from tmd.utils.taxcalc_utils import get_tax_expenditure_results
+from tmd.storage import STORAGE_FOLDER
 
 
-# include this test only to gather warnings information
+# run following test only to generate pytest warnings
+# @pytest.mark.skip
 def test_create_taxcalc_tmd_file():
+    from tmd.create_taxcalc_input_variables import create_variable_file
+
     create_variable_file(write_file=False)
 
 

diff --git a/tax_microdata_benchmarking/README.md → tmd/README.md b/tax_microdata_benchmarking/README.md → tmd/README.md
diff --git a/tax_microdata_benchmarking/__init__.py → tmd/__init__.py b/tax_microdata_benchmarking/__init__.py → tmd/__init__.py
diff --git a/...odata_benchmarking/create_all_datasets.py → tmd/create_all_datasets.py b/...odata_benchmarking/create_all_datasets.py → tmd/create_all_datasets.py
@@ -2,18 +2,16 @@
 This module enables generation of all datasets involved in the repo.
 """
 
-from tax_microdata_benchmarking.datasets import *
-from tax_microdata_benchmarking.create_taxcalc_growth_factors import (
-    create_factors_file,
-)
-from tax_microdata_benchmarking.create_taxcalc_sampling_weights import (
-    create_weights_file,
-)
-from tax_microdata_benchmarking.storage import STORAGE_FOLDER
 import time
+from tmd.datasets import *
+from tmd.create_taxcalc_growth_factors import create_factors_file
+from tmd.create_taxcalc_sampling_weights import create_weights_file
+from tmd.storage import STORAGE_FOLDER
+
 
 outputs = STORAGE_FOLDER / "output"
 
+
 generation_functions = [
     (create_pe_puf_2015, None),
     (create_pe_puf_2021, None),

diff --git a/...hmarking/create_taxcalc_growth_factors.py → tmd/create_taxcalc_growth_factors.py b/...hmarking/create_taxcalc_growth_factors.py → tmd/create_taxcalc_growth_factors.py
@@ -4,7 +4,8 @@
 """
 
 import pandas as pd
-from tax_microdata_benchmarking.storage import STORAGE_FOLDER
+from tmd.storage import STORAGE_FOLDER
+
 
 FIRST_YEAR = 2021
 LAST_YEAR = 2074

diff --git a/...marking/create_taxcalc_input_variables.py → tmd/create_taxcalc_input_variables.py b/...marking/create_taxcalc_input_variables.py → tmd/create_taxcalc_input_variables.py
@@ -3,15 +3,15 @@
 """
 
 import taxcalc as tc
-from tax_microdata_benchmarking.datasets.tmd import create_tmd_2021
-from tax_microdata_benchmarking.imputation_assumptions import (
+from tmd.datasets.tmd import create_tmd_2021
+from tmd.imputation_assumptions import (
     IMPUTATION_RF_RNG_SEED,
     IMPUTATION_BETA_RNG_SEED,
     W2_WAGES_SCALE,
     REWEIGHT_DEVIATION_PENALTY,
     ITMDED_GROW_RATE,
 )
-from tax_microdata_benchmarking.storage import STORAGE_FOLDER
+from tmd.storage import STORAGE_FOLDER
 
 
 TAXYEAR = 2021

diff --git a/...arking/create_taxcalc_sampling_weights.py → tmd/create_taxcalc_sampling_weights.py b/...arking/create_taxcalc_sampling_weights.py → tmd/create_taxcalc_sampling_weights.py
@@ -6,7 +6,8 @@
 
 import yaml
 import pandas as pd
-from tax_microdata_benchmarking.storage import STORAGE_FOLDER
+from tmd.storage import STORAGE_FOLDER
+
 
 FIRST_YEAR = 2021
 LAST_YEAR = 2074

diff --git a/...microdata_benchmarking/datasets/README.md → tmd/datasets/README.md b/...microdata_benchmarking/datasets/README.md → tmd/datasets/README.md
diff --git a/...crodata_benchmarking/datasets/__init__.py → tmd/datasets/__init__.py b/...crodata_benchmarking/datasets/__init__.py → tmd/datasets/__init__.py
diff --git a/tax_microdata_benchmarking/datasets/cps.py → tmd/datasets/cps.py b/tax_microdata_benchmarking/datasets/cps.py → tmd/datasets/cps.py
@@ -10,7 +10,7 @@
 from tqdm import tqdm
 import h5py
 from policyengine_core.data import Dataset
-from tax_microdata_benchmarking.storage import STORAGE_FOLDER
+from tmd.storage import STORAGE_FOLDER
 
 
 AGED_RNG = np.random.default_rng(seed=374651932)

diff --git a/tax_microdata_benchmarking/datasets/puf.py → tmd/datasets/puf.py b/tax_microdata_benchmarking/datasets/puf.py → tmd/datasets/puf.py
@@ -2,20 +2,18 @@
 from tqdm import tqdm
 import numpy as np
 import pandas as pd
-from microdf import MicroDataFrame
-from policyengine_us.system import system
-from policyengine_core.data import Dataset
-from tax_microdata_benchmarking.storage import STORAGE_FOLDER
-from tax_microdata_benchmarking.utils.pension_contributions import (
-    impute_pension_contributions_to_puf,
-)
-from tax_microdata_benchmarking.datasets.uprate_puf import uprate_puf
-from tax_microdata_benchmarking.utils.imputation import Imputation
-from tax_microdata_benchmarking.imputation_assumptions import (
+from tmd.storage import STORAGE_FOLDER
+from tmd.utils.pension_contributions import impute_pension_contributions_to_puf
+from tmd.datasets.uprate_puf import uprate_puf
+from tmd.utils.imputation import Imputation
+from tmd.imputation_assumptions import (
     IMPUTATION_RF_RNG_SEED,
     IMPUTATION_BETA_RNG_SEED,
     W2_WAGES_SCALE,
 )
+from microdf import MicroDataFrame
+from policyengine_core.data import Dataset
+from policyengine_us.system import system
 
 
 FILER_AGE_RNG = np.random.default_rng(seed=64963751)

diff --git a/..._benchmarking/datasets/taxcalc_dataset.py → tmd/datasets/taxcalc_dataset.py b/..._benchmarking/datasets/taxcalc_dataset.py → tmd/datasets/taxcalc_dataset.py
@@ -3,8 +3,8 @@
 from typing import Type
 import numpy as np
 import pandas as pd
-from tax_microdata_benchmarking.storage import STORAGE_FOLDER
-from tax_microdata_benchmarking.datasets.puf import PUF_2015, PUF_2021
+from tmd.storage import STORAGE_FOLDER
+from tmd.datasets.puf import PUF_2015, PUF_2021
 from policyengine_us import Microsimulation
 from policyengine_us.system import system