Skip to content

Commit

Permalink
Merge pull request #166 from PSLmodels/rename-directory
Browse files Browse the repository at this point in the history
Rename the tax_microdata_benchmarking directory as tmd
  • Loading branch information
martinholmer authored Aug 22, 2024
2 parents 2312ea6 + 64809bb commit a549ad8
Show file tree
Hide file tree
Showing 89 changed files with 126 additions and 137 deletions.
8 changes: 3 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@
**/*.pyc
**/*.csv.zip
**/*.csv.gz
**/*.csv
**/*.egg-info
tax_microdata_benchmarking/calibration
**/_build/
**/*tfevents*
**/*.csv
!tax_microdata_benchmarking/utils/uprating/*.csv*
!tax_microdata_benchmarking/storage/input/*.csv
tax_microdata_benchmarking/storage/output/tax_expenditures
tmd/storage/output/tax_expenditures
!tmd/storage/input/*.csv
**demographics_2015.csv
**puf_2015.csv
*.DS_STORE
66 changes: 33 additions & 33 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,44 +1,44 @@
.PHONY=clean
clean:
rm -f tax_microdata_benchmarking/storage/output/*
rm -f tmd/storage/output/*

.PHONY=install
install:
pip install -e .
python tax_microdata_benchmarking/download_prerequisites.py
python tmd/download_prerequisites.py

tax_microdata_benchmarking/storage/output/tmd.csv.gz: \
tax_microdata_benchmarking/imputation_assumptions.py \
tax_microdata_benchmarking/datasets/tmd.py \
tax_microdata_benchmarking/datasets/puf.py \
tax_microdata_benchmarking/datasets/cps.py \
tax_microdata_benchmarking/datasets/taxcalc_dataset.py \
tax_microdata_benchmarking/utils/taxcalc_utils.py \
tax_microdata_benchmarking/utils/imputation.py \
tax_microdata_benchmarking/utils/is_tax_filer.py \
tax_microdata_benchmarking/utils/pension_contributions.py \
tax_microdata_benchmarking/utils/soi_replication.py \
tax_microdata_benchmarking/utils/soi_targets.py \
tax_microdata_benchmarking/utils/reweight.py \
tax_microdata_benchmarking/utils/trace.py \
tax_microdata_benchmarking/create_taxcalc_input_variables.py
python tax_microdata_benchmarking/create_taxcalc_input_variables.py
tmd/storage/output/tmd.csv.gz: \
tmd/imputation_assumptions.py \
tmd/datasets/tmd.py \
tmd/datasets/puf.py \
tmd/datasets/cps.py \
tmd/datasets/taxcalc_dataset.py \
tmd/utils/taxcalc_utils.py \
tmd/utils/imputation.py \
tmd/utils/is_tax_filer.py \
tmd/utils/pension_contributions.py \
tmd/utils/soi_replication.py \
tmd/utils/soi_targets.py \
tmd/utils/reweight.py \
tmd/utils/trace.py \
tmd/create_taxcalc_input_variables.py
python tmd/create_taxcalc_input_variables.py

tax_microdata_benchmarking/storage/output/tmd_growfactors.csv: \
tax_microdata_benchmarking/storage/input/puf_growfactors.csv \
tax_microdata_benchmarking/create_taxcalc_growth_factors.py
python tax_microdata_benchmarking/create_taxcalc_growth_factors.py
tmd/storage/output/tmd_growfactors.csv: \
tmd/storage/input/puf_growfactors.csv \
tmd/create_taxcalc_growth_factors.py
python tmd/create_taxcalc_growth_factors.py

tax_microdata_benchmarking/storage/output/tmd_weights.csv.gz: \
tax_microdata_benchmarking/storage/input/cbo_population_forecast.yaml \
tax_microdata_benchmarking/storage/output/tmd.csv.gz \
tax_microdata_benchmarking/create_taxcalc_sampling_weights.py
python tax_microdata_benchmarking/create_taxcalc_sampling_weights.py
tmd/storage/output/tmd_weights.csv.gz: \
tmd/storage/input/cbo_population_forecast.yaml \
tmd/storage/output/tmd.csv.gz \
tmd/create_taxcalc_sampling_weights.py
python tmd/create_taxcalc_sampling_weights.py

.PHONY=tmd_files
tmd_files: tax_microdata_benchmarking/storage/output/tmd.csv.gz \
tax_microdata_benchmarking/storage/output/tmd_growfactors.csv \
tax_microdata_benchmarking/storage/output/tmd_weights.csv.gz
tmd_files: tmd/storage/output/tmd.csv.gz \
tmd/storage/output/tmd_growfactors.csv \
tmd/storage/output/tmd_weights.csv.gz

.PHONY=test
test: tmd_files
Expand All @@ -57,10 +57,10 @@ documentation:

.PHONY=reweighting-visualisation
reweighting-visualisation:
tensorboard --logdir=tax_microdata_benchmarking/storage/output/reweighting
tensorboard --logdir=tmd/storage/output/reweighting

.PHONY=tax-expenditures-report
tax-expenditures-report: tmd_files
-pytest . --disable-warnings -m taxexp
diff tax_microdata_benchmarking/storage/output/tax_expenditures \
tax_microdata_benchmarking/examination/tax_expenditures
diff tmd/storage/output/tax_expenditures \
tmd/examination/tax_expenditures
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ The two tokens can be obtained from [Don Boyd](mailto:[email protected]).
To assess, review the data examination results that compare federal
agency tax estimates with those generated using the microdata file
created in each project phase: [phase 1
results](./tax_microdata_benchmarking/examination/results1.md) and
results](./tmd/examination/results1.md) and
[phase 2
results](./tax_microdata_benchmarking/examination/results2.md) and
results](./tmd/examination/results2.md) and
[phase 3
results](./tax_microdata_benchmarking/examination/results3.md).
results](./tmd/examination/results3.md).
11 changes: 5 additions & 6 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import streamlit as st

from tax_microdata_benchmarking.utils.soi_replication import *
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
from tax_microdata_benchmarking.datasets import *
import pandas as pd
import plotly.express as px
from tmd.datasets import *
from tmd.utils.soi_replication import *
from tmd.storage import STORAGE_FOLDER


INPUTS = STORAGE_FOLDER / "input"
OUTPUTS = STORAGE_FOLDER / "output"
Expand Down Expand Up @@ -54,8 +55,6 @@ def soi_statistic_passes_quality_test(df):

st.dataframe(comparisons.sort_values("Absolute error", ascending=False))

import plotly.express as px

histogram = px.histogram(
comparisons,
x="Absolute error",
Expand Down
11 changes: 5 additions & 6 deletions docs/app.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import streamlit as st

from tax_microdata_benchmarking.utils.soi_replication import *
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
from tax_microdata_benchmarking.datasets import *
import pandas as pd
import plotly.express as px
from tmd.datasets import *
from tmd.utils.soi_replication import *
from tmd.storage import STORAGE_FOLDER


INPUTS = STORAGE_FOLDER / "input"
OUTPUTS = STORAGE_FOLDER / "output"
Expand Down Expand Up @@ -91,8 +92,6 @@ def soi_statistic_passes_quality_test(df):

st.dataframe(comparisons.sort_values("Absolute error", ascending=False))

import plotly.express as px

histogram = px.histogram(
comparisons,
x="Absolute error",
Expand Down
2 changes: 1 addition & 1 deletion docs/book/_config.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Book settings
# Learn more at https://jupyterbook.org/customize/config.html

title: Tax microdata benchmarking
title: Tax microdata
author: Policy Simulation Library

# Force re-execution of notebooks on each build.
Expand Down
14 changes: 7 additions & 7 deletions docs/book/intro.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Tax microdata benchmarking
# tax-microdata

This repository contains all working files for a project to develop a
general-purpose validated microdata file for use in
Expand All @@ -12,15 +12,15 @@ To generate the microdata files:

1. Run `export POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN=***`
2. Run `export PSL_TAX_MICRODATA_RELEASE_AUTH_TOKEN=***`
3. Run `make flat-file`
3. Run `make data`

The two environment tokens can be obtained from [Nikhil Woodruff](mailto:[email protected]).
The two tokens can be obtained from [Don Boyd](mailto:[email protected]).

To assess, review the data examination results that compare federal
agency tax estimates with those generated using the microdata file
created in each project phase: [phase 1
results](./tax_microdata_benchmarking/examination/results1.md) and
results](./tmd/examination/results1.md) and
[phase 2
results](./tax_microdata_benchmarking/examination/results2.md) and
[VERY PRELIMINARY phase 3
results](./tax_microdata_benchmarking/examination/results3.md).
results](./tmd/examination/results2.md) and
[phase 3
results](./tmd/examination/results3.md).
4 changes: 2 additions & 2 deletions docs/book/uprating.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,9 @@
],
"source": [
"import pandas as pd\n",
"from tax_microdata_benchmarking.storage import STORAGE_FOLDER\n",
"from tmd.storage import STORAGE_FOLDER\n",
"from microdf import MicroDataFrame\n",
"from tax_microdata_benchmarking.datasets.uprate_puf import (\n",
"from tmd.datasets.uprate_puf import (\n",
" SOI_TO_PUF_STRAIGHT_RENAMES,\n",
" SOI_TO_PUF_NEG_ONLY_RENAMES,\n",
" SOI_TO_PUF_POS_ONLY_RENAMES,\n",
Expand Down
6 changes: 3 additions & 3 deletions docs/book/validation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@
}
],
"source": [
"from tax_microdata_benchmarking.utils.soi_replication import *\n",
"from tax_microdata_benchmarking.storage import STORAGE_FOLDER\n",
"from tax_microdata_benchmarking.datasets import *\n",
"from tmd.utils.soi_replication import *\n",
"from tmd.storage import STORAGE_FOLDER\n",
"from tmd.datasets import *\n",
"import pandas as pd\n",
"\n",
"INPUTS = STORAGE_FOLDER / \"input\"\n",
Expand Down
8 changes: 8 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[pytest]
testpaths =
tmd
markers =
vartotals
taxexp
qbid
taxexpdiffs
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from setuptools import setup, find_packages

setup(
name="tax_microdata_benchmarking",
version="0.1.0",
name="tmd",
version="0.2.0",
packages=find_packages(),
python_requires=">=3.10,<3.13",
install_requires=[
Expand Down
15 changes: 6 additions & 9 deletions tests/test_tmd_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,19 @@
import yaml
from pathlib import Path
import subprocess
import warnings
import difflib
import numpy as np
import pandas as pd
import pytest
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
from tax_microdata_benchmarking.create_taxcalc_input_variables import (
create_variable_file,
)
from tax_microdata_benchmarking.utils.taxcalc_utils import (
get_tax_expenditure_results,
)
from tmd.utils.taxcalc_utils import get_tax_expenditure_results
from tmd.storage import STORAGE_FOLDER


# include this test only to gather warnings information
# run following test only to generate pytest warnings
# @pytest.mark.skip
def test_create_taxcalc_tmd_file():
from tmd.create_taxcalc_input_variables import create_variable_file

create_variable_file(write_file=False)


Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,16 @@
This module enables generation of all datasets involved in the repo.
"""

from tax_microdata_benchmarking.datasets import *
from tax_microdata_benchmarking.create_taxcalc_growth_factors import (
create_factors_file,
)
from tax_microdata_benchmarking.create_taxcalc_sampling_weights import (
create_weights_file,
)
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
import time
from tmd.datasets import *
from tmd.create_taxcalc_growth_factors import create_factors_file
from tmd.create_taxcalc_sampling_weights import create_weights_file
from tmd.storage import STORAGE_FOLDER


outputs = STORAGE_FOLDER / "output"


generation_functions = [
(create_pe_puf_2015, None),
(create_pe_puf_2021, None),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
"""

import pandas as pd
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
from tmd.storage import STORAGE_FOLDER


FIRST_YEAR = 2021
LAST_YEAR = 2074
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
"""

import taxcalc as tc
from tax_microdata_benchmarking.datasets.tmd import create_tmd_2021
from tax_microdata_benchmarking.imputation_assumptions import (
from tmd.datasets.tmd import create_tmd_2021
from tmd.imputation_assumptions import (
IMPUTATION_RF_RNG_SEED,
IMPUTATION_BETA_RNG_SEED,
W2_WAGES_SCALE,
REWEIGHT_DEVIATION_PENALTY,
ITMDED_GROW_RATE,
)
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
from tmd.storage import STORAGE_FOLDER


TAXYEAR = 2021
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

import yaml
import pandas as pd
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
from tmd.storage import STORAGE_FOLDER


FIRST_YEAR = 2021
LAST_YEAR = 2074
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from tqdm import tqdm
import h5py
from policyengine_core.data import Dataset
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
from tmd.storage import STORAGE_FOLDER


AGED_RNG = np.random.default_rng(seed=374651932)
Expand Down
18 changes: 8 additions & 10 deletions tax_microdata_benchmarking/datasets/puf.py → tmd/datasets/puf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,18 @@
from tqdm import tqdm
import numpy as np
import pandas as pd
from microdf import MicroDataFrame
from policyengine_us.system import system
from policyengine_core.data import Dataset
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
from tax_microdata_benchmarking.utils.pension_contributions import (
impute_pension_contributions_to_puf,
)
from tax_microdata_benchmarking.datasets.uprate_puf import uprate_puf
from tax_microdata_benchmarking.utils.imputation import Imputation
from tax_microdata_benchmarking.imputation_assumptions import (
from tmd.storage import STORAGE_FOLDER
from tmd.utils.pension_contributions import impute_pension_contributions_to_puf
from tmd.datasets.uprate_puf import uprate_puf
from tmd.utils.imputation import Imputation
from tmd.imputation_assumptions import (
IMPUTATION_RF_RNG_SEED,
IMPUTATION_BETA_RNG_SEED,
W2_WAGES_SCALE,
)
from microdf import MicroDataFrame
from policyengine_core.data import Dataset
from policyengine_us.system import system


FILER_AGE_RNG = np.random.default_rng(seed=64963751)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from typing import Type
import numpy as np
import pandas as pd
from tax_microdata_benchmarking.storage import STORAGE_FOLDER
from tax_microdata_benchmarking.datasets.puf import PUF_2015, PUF_2021
from tmd.storage import STORAGE_FOLDER
from tmd.datasets.puf import PUF_2015, PUF_2021
from policyengine_us import Microsimulation
from policyengine_us.system import system

Expand Down
Loading

0 comments on commit a549ad8

Please sign in to comment.