Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update file calls in calibration modules #112

Merged
merged 22 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ htmlcov/*
*.asv
*.nav
*.snm
*.gz
*.bib.bak
*.fls
*.m~
Expand All @@ -47,6 +46,7 @@ examples/OG-USA-Example/*
cs-config/cs_config/OUTPUT_BASELINE/*
data/csv_output_files/*
data/images/*
data/PSID/psid_lifetime_income.csv
ogusa/csv_output_files/*
ogusa/images/*
.vscode/
Expand Down
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [0.1.8] - 2024-05-20 12:00:00

### Added

- Updates the file calls in some of the calibration modules (`bequest_transmission.py`, `deterministic_profiles.py`, `psid_data_setup.py`, and `transfer_distirbution.py`). These files called `.RData` files that were not included in the `ogusa` package, so users who pip installed ogusa could not instantiate the `Calibrate` class object. These changes allow for Jupyter notebook users to automatically download the data files from the master branch of the GitHub repository if they are not working in their own branch of the repository.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rickecon I think we should update this. Perhaps:

Updates the `ogusa` package to include the zipped `psid_lifetime_income.csv.gz` file, which is now called in some calibration modules (`bequest_transmission.py`,  `deterministic_profiles.py`, and `transfer_distirbution.py`), but with an option for the user to provide their own custom datafile.  These changes allow for Jupyter notebook users to execute the `Calibration` class object and for those who install the `ogusa` package from PyPI to have the required datafile for the major calibration modules.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jdebacker. I just made this change.



## [0.1.7] - 2024-05-14 16:30:00

### Added
Expand Down Expand Up @@ -97,6 +105,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0



[0.1.8]: https://github.com/PSLmodels/OG-USA/compare/v0.1.7...v0.1.8
[0.1.7]: https://github.com/PSLmodels/OG-USA/compare/v0.1.6...v0.1.7
[0.1.6]: https://github.com/PSLmodels/OG-USA/compare/v0.1.5...v0.1.6
[0.1.5]: https://github.com/PSLmodels/OG-USA/compare/v0.1.4...v0.1.5
Expand Down
109,156 changes: 0 additions & 109,156 deletions data/PSID/psid_lifetime_income.csv

This file was deleted.

90 changes: 47 additions & 43 deletions ogusa/bequest_transmission.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,40 @@
import matplotlib.pyplot as plt
import os
from ogusa.utils import MVKDE

CURDIR = os.path.split(os.path.abspath(__file__))[0]
from ogusa.constants import CODE_PATH


def get_bequest_matrix(
J=7,
lambdas=np.array([0.25, 0.25, 0.2, 0.1, 0.1, 0.09, 0.01]),
graphs=False,
data_path=None,
output_path=None,
):
"""
Returns S x J matrix representing the fraction of aggregate
bequests that go to each household by age and lifetime income group.

Args:
J (int): number of lifetime income groups
lambdas (Numpy array): length J array of lifetime income group
proportions
data_path (str): path to PSID data
output_path (str): path to save output plots and data

Returns:
kde_matrix (Numpy array): SxJ shaped array that represents the
smoothed distribution of proportions going to each (s,j)

"""
# Create directory if output directory does not already exist
CURDIR = os.path.split(os.path.abspath(__file__))[0]
output_fldr = "csv_output_files"
output_dir = os.path.join(CURDIR, "..", "data", output_fldr)
if not os.access(output_dir, os.F_OK):
os.makedirs(output_dir)
image_fldr = "images"
image_dir = os.path.join(CURDIR, "..", "data", image_fldr)
if not os.access(image_dir, os.F_OK):
os.makedirs(image_dir)

# Define a lambda function to compute the weighted mean:
# wm = lambda x: np.average(
# x, weights=df.loc[x.index, "fam_smpl_wgt_core"])

# Read in dataframe of PSID data
# df = ogcore.utils.safe_read_pickle(
# os.path.join(CURDIR, "data", "PSID", "psid_lifetime_income.pkl")
# )
df = pd.read_csv(
os.path.join(CURDIR, "..", "data", "PSID", "psid_lifetime_income.csv")
)
# Read in PSID data
if data_path is None:
# Read data file shipped with OG-USA package
df = pd.read_csv(
os.path.join(CODE_PATH, "psid_lifetime_income.csv.gz")
)
else:
# This is the case when running this from a branch of the OG-USA repo
df = pd.read_csv(data_path)

# Do some tabs with data file...
# 'net_wealth', 'inheritance', 'value_inheritance_1st',
Expand All @@ -54,12 +52,15 @@ def get_bequest_matrix(
)
# print(df[['sum_inherit', 'inheritance']].describe())

if graphs:
if output_path is not None:
# Create plot path directory if it doesn't already exist
if not os.path.exists(output_path):
os.makedirs(output_path)
# Total inheritances by year
df.groupby("year_data").mean(numeric_only=True).plot(y="inheritance")
plt.savefig(os.path.join(image_dir, "inheritance_year.png"))
plt.savefig(os.path.join(output_path, "inheritance_year.png"))
df.groupby("year_data").mean(numeric_only=True).plot(y="sum_inherit")
plt.savefig(os.path.join(image_dir, "sum_inherit_year.png"))
plt.savefig(os.path.join(output_path, "sum_inherit_year.png"))
# not that summing up inheritances gives a much larger value than
# taking the inheritance variable

Expand All @@ -68,22 +69,22 @@ def get_bequest_matrix(
df[df["year_data"] >= 1988].groupby("age").mean(
numeric_only=True
).plot(y="net_wealth")
plt.savefig(os.path.join(image_dir, "net_wealth_age.png"))
plt.savefig(os.path.join(output_path, "net_wealth_age.png"))
df[df["year_data"] >= 1988].groupby("age").mean(
numeric_only=True
).plot(y="inheritance")
plt.savefig(os.path.join(image_dir, "inheritance_age.png"))
plt.savefig(os.path.join(output_path, "inheritance_age.png"))

# Inheritances by lifetime income group
# bar plot
df[df["year_data"] >= 1988].groupby("li_group").mean(
numeric_only=True
).plot.bar(y="net_wealth")
plt.savefig(os.path.join(image_dir, "net_wealth_li.png"))
plt.savefig(os.path.join(output_path, "net_wealth_li.png"))
df[df["year_data"] >= 1988].groupby("li_group").mean(
numeric_only=True
).plot.bar(y="inheritance")
plt.savefig(os.path.join(image_dir, "inheritance_li.png"))
plt.savefig(os.path.join(output_path, "inheritance_li.png"))

# lifecycle plots with line for each ability type
pd.pivot_table(
Expand All @@ -93,15 +94,15 @@ def get_bequest_matrix(
columns="li_group",
aggfunc="mean",
).plot(legend=True)
plt.savefig(os.path.join(image_dir, "net_wealth_age_li.png"))
plt.savefig(os.path.join(output_path, "net_wealth_age_li.png"))
pd.pivot_table(
df[df["year_data"] >= 1988],
values="inheritance",
index="age",
columns="li_group",
aggfunc="mean",
).plot(legend=True)
plt.savefig(os.path.join(image_dir, "inheritance_age_li.png"))
plt.savefig(os.path.join(output_path, "inheritance_age_li.png"))

# Matrix Fraction of inheritances in a year by age and lifetime_inc
inheritance_matrix = pd.pivot_table(
Expand All @@ -114,16 +115,18 @@ def get_bequest_matrix(
# replace NaN with zero
inheritance_matrix.fillna(value=0, inplace=True)
inheritance_matrix = inheritance_matrix / inheritance_matrix.sum().sum()
# inheritance_matrix.to_csv(os.path.join(
# output_dir, 'bequest_matrix.csv'))

# estimate kernel density of bequests
if output_path is not None:
filename = os.path.join(output_path, "inheritance_kde.png")
else:
filename = None
kde_matrix = MVKDE(
80,
7,
inheritance_matrix.to_numpy(),
filename=os.path.join(image_dir, "inheritance_kde.png"),
plot=graphs,
filename=filename,
plot=(output_path is not None),
bandwidth=0.5,
)

Expand All @@ -139,10 +142,11 @@ def get_bequest_matrix(
)
kde_matrix = kde_matrix_new

np.savetxt(
os.path.join(output_dir, "bequest_matrix_kde.csv"),
kde_matrix,
delimiter=",",
)
if output_path is not None:
np.savetxt(
os.path.join(output_path, "bequest_matrix_kde.csv"),
kde_matrix,
delimiter=",",
)

return kde_matrix
22 changes: 17 additions & 5 deletions ogusa/calibrate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ogusa import estimate_beta_j, bequest_transmission
from ogusa import macro_params, transfer_distribution, income
from ogusa import get_micro_data, psid_data_setup
from ogusa import get_micro_data
import os
import numpy as np
from ogcore import txfunc, demographics
Expand All @@ -25,6 +25,8 @@ def __init__(
data="cps",
client=None,
num_workers=1,
demographic_data_path=None,
output_path=None,
):
"""
Constructor for the Calibration class. This class is used to find
Expand All @@ -43,10 +45,15 @@ def __init__(
data (str): data source for microsimulation model
client (Dask client object): client
num_workers (int): number of workers for Dask client
output_path (str): path to save output to

Returns:
Calibration class object instance
"""
# Create output_path if it doesn't exist
if output_path is not None:
if not os.path.exists(output_path):
os.makedirs(output_path)
self.estimate_tax_functions = estimate_tax_functions
self.estimate_beta = estimate_beta
self.estimate_chi_n = estimate_chi_n
Expand Down Expand Up @@ -76,10 +83,14 @@ def __init__(
self.macro_params = macro_params.get_macro_params()

# eta estimation
self.eta = transfer_distribution.get_transfer_matrix(p.J, p.lambdas)
self.eta = transfer_distribution.get_transfer_matrix(
p.J, p.lambdas, output_path=output_path
)

# zeta estimation
self.zeta = bequest_transmission.get_bequest_matrix(p.J, p.lambdas)
self.zeta = bequest_transmission.get_bequest_matrix(
p.J, p.lambdas, output_path=output_path
)

# demographics
if estimate_pop:
Expand All @@ -92,6 +103,7 @@ def __init__(
initial_data_year=p.start_year - 1,
final_data_year=p.start_year,
GraphDiag=False,
download_path=demographic_data_path,
)

# demographics for 80 period lives (needed for getting e below)
Expand All @@ -112,15 +124,15 @@ def __init__(
self.demographic_params["omega_SS"],
demog80["omega_SS"],
p.lambdas,
plot=False,
plot_path=output_path,
)
else:
self.e = income.get_e_interp(
p.S,
p.omega_SS,
p.omega_SS,
p.lambdas,
plot=False,
plot_path=output_path,
)

# Tax Functions
Expand Down
3 changes: 3 additions & 0 deletions ogusa/constants.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import taxcalc
import os

SHOW_RUNTIME = False # Flag to display RuntimeWarnings when run model

REFORM_DIR = "OUTPUT_REFORM"
BASELINE_DIR = "OUTPUT_BASELINE"

CODE_PATH = os.path.abspath(os.path.dirname(__file__))

# Default year for model runs
DEFAULT_START_YEAR = 2021
# Tax-Calculator start year
Expand Down
Loading
Loading