diff --git a/tmd/areas/targets/prepare/prepare_cds/renv.lock b/tmd/areas/targets/prepare/prepare_cds/renv.lock index db5bbefd..bcace3ab 100644 --- a/tmd/areas/targets/prepare/prepare_cds/renv.lock +++ b/tmd/areas/targets/prepare/prepare_cds/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.4.1", + "Version": "4.4.2", "Repositories": [ { "Name": "CRAN", @@ -166,13 +166,13 @@ }, "bit": { "Package": "bit", - "Version": "4.5.0", + "Version": "4.5.0.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R" ], - "Hash": "5dc7b2677d65d0e874fc4aaf0e879987" + "Hash": "f89f074e0e49bf1dbe3eba0a15a91476" }, "bit64": { "Package": "bit64", @@ -371,13 +371,13 @@ }, "cpp11": { "Package": "cpp11", - "Version": "0.5.0", + "Version": "0.5.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R" ], - "Hash": "91570bba75d0c9d3f1040c835cee8fba" + "Hash": "9df43854f1c84685d095ed6270b52387" }, "crayon": { "Package": "crayon", @@ -406,24 +406,24 @@ }, "curl": { "Package": "curl", - "Version": "6.0.0", + "Version": "6.0.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R" ], - "Hash": "ff51697d9205fe715f29e7171e874c6e" + "Hash": "e8ba62486230951fcd2b881c5be23f96" }, "data.table": { "Package": "data.table", - "Version": "1.16.2", + "Version": "1.16.4", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R", "methods" ], - "Hash": "2e00b378fc3be69c865120d9f313039a" + "Hash": "38bbf05fc2503143db4c734a7e5cab66" }, "dbplyr": { "Package": "dbplyr", @@ -560,7 +560,7 @@ }, "fontawesome": { "Package": "fontawesome", - "Version": "0.5.2", + "Version": "0.5.3", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -568,7 +568,7 @@ "htmltools", "rlang" ], - "Hash": "c2efdd5f0bcd1ea861c2d4e2a883a67d" + "Hash": "bd1297f9b5b1fc1372d19e2c4cd82215" }, "forcats": { "Package": "forcats", @@ -985,14 +985,14 @@ }, "later": { "Package": "later", - "Version": "1.3.2", + "Version": "1.4.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "Rcpp", "rlang" ], - "Hash": "a3e051d405326b8b0012377434c62b37" + "Hash": "501744395cac0bab0fbcfab9375ae92c" }, "lattice": { "Package": "lattice", @@ -1034,7 +1034,7 @@ }, "lubridate": { "Package": "lubridate", - "Version": "1.9.3", + "Version": "1.9.4", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1043,7 +1043,7 @@ "methods", "timechange" ], - "Hash": "680ad542fbcf801442c83a6ac5a2126c" + "Hash": "be38bc740fc51783a78edb5a157e4104" }, "magrittr": { "Package": "magrittr", @@ -1137,7 +1137,7 @@ }, "nlme": { "Package": "nlme", - "Version": "3.1-165", + "Version": "3.1-166", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1147,17 +1147,17 @@ "stats", "utils" ], - "Hash": "2769a88be217841b1f33ed469675c3cc" + "Hash": "ccbb8846be320b627e6aa2b4616a2ded" }, "openssl": { "Package": "openssl", - "Version": "2.2.2", + "Version": "2.3.0", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "askpass" ], - "Hash": "d413e0fef796c9401a4419485f709ca1" + "Hash": "5bfe2927efa9f87766ca9605301ea48f" }, "pillar": { "Package": "pillar", @@ -1225,7 +1225,7 @@ }, "promises": { "Package": "promises", - "Version": "1.3.0", + "Version": "1.3.2", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1237,7 +1237,7 @@ "rlang", "stats" ], - "Hash": "434cd5388a3979e74be5c219bcd6e77d" + "Hash": "c84fd4f75ea1f5434735e08b7f50fbca" }, "proxy": { "Package": "proxy", @@ -1665,7 +1665,7 @@ }, "textshaping": { "Package": "textshaping", - "Version": "0.4.0", + "Version": "0.4.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1674,7 +1674,7 @@ "lifecycle", "systemfonts" ], - "Hash": "5142f8bc78ed3d819d26461b641627ce" + "Hash": "573e0d015b7fc3e555f83e254cad7533" }, "tibble": { "Package": "tibble", diff --git a/tmd/areas/targets/prepare/prepare_states/50_state_data_underlying_target_files.qmd b/tmd/areas/targets/prepare/prepare_states/50_state_data_underlying_target_files.qmd new file mode 100644 index 00000000..63ebea79 --- /dev/null +++ b/tmd/areas/targets/prepare/prepare_states/50_state_data_underlying_target_files.qmd @@ -0,0 +1,143 @@ +--- +output: html_document +editor_options: + chunk_output_type: console +--- + +# How to write target files + +Previous code in this project culminated in creation of "enhanced_targets.csv", stored in the "data/intermediate/" folder. These data contain more than 1,800 potential targets for each state for 2021, based largely on SOI Historical Table 2 data for the state. + +The next task, which this page discusses but does not implement, is to choose which potential targets to use, to map these potential targets to TMD variables, to choose which states to include, and to write \[xx\]\_targets.csv files for those states. + +This page does not write those target files. Instead, a utility program that users can run from the command line, \`\`, reads a json file that defines variables and states to write targets for, and writes the state target files. + +The remainder of this page (1) shows information about the enhanced targets file, and (2) explains how to run the utility program. + +```{r} +#| label: setup +#| output: false + +suppressPackageStartupMessages(source(here::here("R", "libraries.R"))) +source(here::here("R", "constants.R")) +source(here::here("R", "functions.R")) + +``` + +```{r} +#| label: get-etargets +#| output: false + +etargets <- read_csv(fs::path(DINTERMEDIATE, "enhanced_targets.csv")) + +``` + +## Show information about enhanced targets data + +```{r} +#| label: show-etargets + +cat('"glimpse" the structure of the data') +glimpse(etargets) + +cat("summarize the data") +skim(etargets) + + + +``` + +### Browse the data for a single state + +The sortable and filter-able table below shows the 2021 potential-target data for one of the Phase 6 states, Minnesota. The purpose is simply to make clear the kind of information that is available for targeting. We include just one state to keep the demands on this web page minimal. + +By selecting agistub 0, you can see one record per potential target. Putting 18400 in the upper right search box will show all entries related to e18400, "State and local income or sales tax", + +::: {.cell-output-display style="font-size: 60%;"} +```{r} +#| label: etargets-table +#| column: page + +etargets |> + filter(stabbr %in% c("MN")) |> + mutate(across(c(sort, count, scope, fstatus, agistub, basesoivname, soivname), factor)) |> + DT::datatable(rownames = FALSE, + caption = htmltools::tags$caption( + style = 'caption-side: top; text-align: center; color: black; font-size: 200%;', + "Potential targets for Minnesota" + ), + options = list(order = list(0, "asc"), # use 1st column (0) for sorting + scrollX = TRUE, scrollY = TRUE, + paging = TRUE, pageLength = 20, + # autoWidth = TRUE, + columnDefs = list(list(width = '15px', targets = c("stabbr", "sort", "count", + "scope", "fstatus", "agistub")), + list(width = '15px', targets = c("soivname", "basesoivname")))), + filter="top", + escape = FALSE, + class = "compact") |> # A default DT class that makes the table more compact + formatRound(c("target"), digits = 0) + +``` +::: + +### Reminder of what the target files will look like + +The screenshot below shows the first few rows of a typical target file. The utility program described below will select targets and states defined in a json file and write a target file in that format. + +![](images/clipboard-754780137.png) + +## How to run the utility program that writes \[xx\]\_targets.csv files + +It's a two-step process: (1) create a json file that defines what targets to write, and (2) run an R script that reads the json file and creates the desired target files. + +### The json file + +### Running the R script + +From a terminal in the prepare_states folder enter: + +`Rscript create_state_targets.R phase6.json` + +## Additional notes + +```{r} +#| label: notes +#| output: false + +# documentation for the targets.csv data file + +# sample file excerpt +# varname,count,scope,agilo,agihi,fstatus,target +# XTOT, 0, 0,-9e99, 9e99, 0, 33e6 +# e00300, 0, 1,-9e99, 9e99, 0, 20e9 +# e00900, 0, 1,-9e99, 9e99, 0, 30e9 +# e00200, 0, 1,-9e99, 9e99, 0,1000e9 +# e02000, 0, 1,-9e99, 9e99, 0, 30e9 +# e02400, 0, 1,-9e99, 9e99, 0, 60e9 + +# varname: any Tax-Calculator input variable name plus any Tax-Calculator calculated variable in the list of cached variables in the tmd/storage/__init__.py file +# count: integer in [0,4] range: +# count==0 implies dollar total of varname is tabulated +# count==1 implies number of tax units with any value of varname is tabulated +# count==2 implies number of tax units with a nonzero value of varname is tabulated +# count==3 implies number of tax units with a positive value of varname is tabulated +# count==4 implies number of tax units with a negative value of varname is tabulated + +# scope: integer in [0,2] range: +# scope==0 implies all tax units are tabulated +# scope==1 implies only PUF-derived filing units are tabulated +# scope==2 implies only CPS-derived filing units are tabulated + +# agilo: float representing lower bound of the AGI range (which is included in the range) that is tabulated. +# agihi: float representing upper bound of the AGI range (which is excluded from the range) that is tabulated. + +# fstatus: integer in [0,5] range: +# fstatus=0 implies all filing statuses are tabulated +# other fstatus values imply just the tax units with the Tax-Calculator MARS variable equal to fstatus are included in the tabulation + +# target: target amount: +# dollars if count==0 +# number of tax units if count>0 + +``` diff --git a/tmd/areas/targets/prepare/prepare_states/R/constants.R b/tmd/areas/targets/prepare/prepare_states/R/constants.R index ed100413..6e6b7e0f 100644 --- a/tmd/areas/targets/prepare/prepare_states/R/constants.R +++ b/tmd/areas/targets/prepare/prepare_states/R/constants.R @@ -1,3 +1,5 @@ +# NJ, NM, VA, AK, MN +PHASE6_STATES <- c("AK", "MN", "NJ", "NM", "VA") DRAW <- here::here("data", "data_raw") DINTERMEDIATE <- here::here("data", "intermediate") diff --git a/tmd/areas/targets/prepare/prepare_states/_quarto.yml b/tmd/areas/targets/prepare/prepare_states/_quarto.yml index 0c4f2bc7..b27e9a79 100644 --- a/tmd/areas/targets/prepare/prepare_states/_quarto.yml +++ b/tmd/areas/targets/prepare/prepare_states/_quarto.yml @@ -6,7 +6,7 @@ project: # site info: - # OLD Unique deploy URL: https://671e13320a7e7cfb68b1ba7d--tmd-areas-prepare-targets.netlify.app + # site ID: 9c6bf4ca-1b88-4735-9c29-ec5a41dab2ef # url: https://tmd-areas-prepare-state-targets.netlify.app # publishing with netlify cli: @@ -43,21 +43,18 @@ book: - download_soi_data.qmd - construct_soi_documentation.qmd - construct_long_soi_data_file.qmd - - part: "Analysis of SALT variables and other issues" - chapters: - - SALT_analysis.qmd - part: "Create data from which to extract state target files" chapters: - create_state_targets_basefile.qmd - create_additional_state_targets.qmd - combine_base_and_additional_targets.qmd - # old files maybe use as base for new work - # - cd_create_variable_mapping.qmd - # - cd_compare_us_totals_tmd_vs_irs_published.qmd - # - cd_enhance_basefile_with_special_targets.qmd - # appendices: - # - cd_issues_and_TODOs.qmd - # - cd_IRS_documentation.qmd + - part: "Writing target files" + chapters: + - 50_state_data_underlying_target_files.qmd + appendices: + - notes_on_areas_and_targets.qmd + - compare_tmdsums_vs_soisums_keyvars.qmd + - SALT_analysis.qmd format: html: diff --git a/tmd/areas/targets/prepare/prepare_states/compare_tmdsums_vs_soisums_keyvars.qmd b/tmd/areas/targets/prepare/prepare_states/compare_tmdsums_vs_soisums_keyvars.qmd new file mode 100644 index 00000000..3c55a822 --- /dev/null +++ b/tmd/areas/targets/prepare/prepare_states/compare_tmdsums_vs_soisums_keyvars.qmd @@ -0,0 +1,90 @@ +--- +output: html_document +editor_options: + chunk_output_type: console +--- + +# Compare selected tmd sums and SOI vars + +```{r} +#| label: setup +#| output: false + +suppressPackageStartupMessages(source(here::here("R", "libraries.R"))) +source(here::here("R", "constants.R")) +source(here::here("R", "functions.R")) + +``` + + +## Get data + +```{r} +#| label: get-data +#| output: false + +agilabels <- read_csv(fs::path(DINTERMEDIATE, "agilabels.csv")) +agilabels + +fpath <- fs::path(TMDDATA, "cached_allvars.csv") +tmd2021 <- vroom(fpath) +ns(tmd2021) + +base_targets <- read_csv(fs::path(DINTERMEDIATE, "base_targets.csv")) +glimpse(base_targets) +# tmp <- count(base_targets, soivname, basesoivname, description) + +``` + + + +## tmd sums + + +```{r} +#| label: get-tmdsums +#| output: false + +# E02400 Gross Social Security benefits +# E02500 Social Security benefits in AGI-- djb c02500 + +tmdvars <- c("c00100", "e00200", "e00300", "e01500", "e01700", "e02400", "c02500", "e18400", "e18500", "e26270") + +tmdsums1 <- tmd2021 |> + filter(data_source==1) |> + select(s006, all_of(tmdvars)) |> + pivot_longer(-c(s006), + names_to = "tmdvar") |> + summarize(nzcount=sum(s006 * (value !=0)), + amount=sum(s006 * value), + .by=tmdvar) |> + arrange(tmdvar) + +tmdsums1 |> + kable(format.args=list(big.mark=",", digits=13)) + +# E01500 Total pensions and annuities received +# E01700 Pensions and annuities included in AGI + +# pensions +# SOI taxable: 853,473,354,000 +# tmd taxable 858,205,915,983.9 +# tmd total 1,508,287,559,037.2 + +usvals <- base_targets |> + filter(stabbr=="US", agistub==0, count %in% c(0, 2), fstatus==0, basesoivname %in% str_sub(tmdvars, 2, -1)) |> + mutate(type=ifelse(count==0, "amount", "nzcount")) |> + select(basesoivname, type, target) |> + pivot_wider(names_from = type, values_from = target) |> + select(basesoivname, nzcount, amount) + +tmdsums1 |> + kable(format.args=list(big.mark=",", digits=13)) + +usvals |> + kable(format.args=list(big.mark=",", digits=13)) + + +``` + + diff --git a/tmd/areas/targets/prepare/prepare_states/create_additional_state_targets.qmd b/tmd/areas/targets/prepare/prepare_states/create_additional_state_targets.qmd index 84cd45c7..c5820ab6 100644 --- a/tmd/areas/targets/prepare/prepare_states/create_additional_state_targets.qmd +++ b/tmd/areas/targets/prepare/prepare_states/create_additional_state_targets.qmd @@ -26,12 +26,34 @@ source(here::here("R", "functions.R")) Define which SOI variables will be used to share which tmd variables. +- tmdvar is the puf-based variable we will eventually want to target +- basesoivname is the base name of the SOI variable that we will use to share the national aggregate tmdvar +- Pensions: + - e01500 Total pensions and annuities shared by 01700 taxable +- Social Security: + - e02400 Total Social Security will be shared by 02500 taxable +- State and local income or sales tax + - 18400 State and local income or sales tax (estimated) amount was previously created by adding 18425 State and local income taxes amount, and 18450 State and local general sales tax amount + - this sum is used to share the national aggregate for 18400 State and local income or sales tax amount + + ```{r} #| label: construct-mappings #| output: false +# tmdvar is the puf-based variable we will eventually want to target +# basesoivname is the base name of the SOI variable that we will use to share the national aggregate tmdvar +# e02400 Total Social Security will be shared by 02500 taxable +# 18400 State and local income or sales tax (estimated) amount was previously created by adding +# 18425 State and local income taxes amount, and +# 18450 State and local general sales tax amount +# this sum is used to share the national aggregate for +# 18400 State and local income or sales tax amount + mappings <- read_csv( "tmdvar, basesoivname +e01500, 01700 +e02400, 02500 e18400, 18400 e18500, 18500 ", col_types="cc") @@ -66,11 +88,21 @@ count(tmd2021, agistub, agistublab, agilo, agihi, agilabel) base_targets <- read_csv(fs::path(DINTERMEDIATE, "base_targets.csv")) glimpse(base_targets) +# tmp <- count(base_targets, soivname, basesoivname, description) ``` ## Variables shared to states +### tmd data + +Using the tmd 2021 data file: + +- Get tmd weighted counts and sums by AGI range for variables that will be targeted in a sharing manner. Only include filers (in the 2015 PUF). +- From first step get sums across all AGI ranges. +- Stack, so we have sums and counts by AGI range and in total, for all variables that will be targeted by sharing. + + ```{r} #| label: get-tmdsums #| output: false @@ -109,6 +141,19 @@ tmdsums ``` +### Use SOI data to share the target values created above to states + +With the SOI base targets data for 2021: + +- Get the variables that will be used as "sharer" values +- For each such variable, by AGI range, get the state value as a share of the US record value + +Calculate targets: + +- For each variable to be shared, by state and AGI range, target = tmd sum calculated previously x state's share of the US value of the SOI sharer variable +- Construct target variable name to tell which tmd variable is shared by which SOI variable +- Write results to additional_targets.csv + ```{r} #| label: get-variable-shares #| output: false diff --git a/tmd/areas/targets/prepare/prepare_states/images/clipboard-1601488084.png b/tmd/areas/targets/prepare/prepare_states/images/clipboard-1601488084.png new file mode 100644 index 00000000..8ce9763b Binary files /dev/null and b/tmd/areas/targets/prepare/prepare_states/images/clipboard-1601488084.png differ diff --git a/tmd/areas/targets/prepare/prepare_states/images/clipboard-1857134196.png b/tmd/areas/targets/prepare/prepare_states/images/clipboard-1857134196.png new file mode 100644 index 00000000..d4d2571a Binary files /dev/null and b/tmd/areas/targets/prepare/prepare_states/images/clipboard-1857134196.png differ diff --git a/tmd/areas/targets/prepare/prepare_states/images/clipboard-2408217854.png b/tmd/areas/targets/prepare/prepare_states/images/clipboard-2408217854.png new file mode 100644 index 00000000..6cd75e8c Binary files /dev/null and b/tmd/areas/targets/prepare/prepare_states/images/clipboard-2408217854.png differ diff --git a/tmd/areas/targets/prepare/prepare_states/images/clipboard-4243181273.png b/tmd/areas/targets/prepare/prepare_states/images/clipboard-4243181273.png new file mode 100644 index 00000000..b954c150 Binary files /dev/null and b/tmd/areas/targets/prepare/prepare_states/images/clipboard-4243181273.png differ diff --git a/tmd/areas/targets/prepare/prepare_states/images/clipboard-754780137.png b/tmd/areas/targets/prepare/prepare_states/images/clipboard-754780137.png new file mode 100644 index 00000000..b49c506f Binary files /dev/null and b/tmd/areas/targets/prepare/prepare_states/images/clipboard-754780137.png differ diff --git a/tmd/areas/targets/prepare/prepare_states/notes_on_areas_and_targets.qmd b/tmd/areas/targets/prepare/prepare_states/notes_on_areas_and_targets.qmd new file mode 100644 index 00000000..f801c434 --- /dev/null +++ b/tmd/areas/targets/prepare/prepare_states/notes_on_areas_and_targets.qmd @@ -0,0 +1,16 @@ +--- +output: html_document +editor_options: + chunk_output_type: console +--- + +# Notes on areas and targets + +- Adding e18400 + - AK + - starting point targets 29-35 natl avg vastly greater than soi-based estimate: + - ![](images/clipboard-2408217854.png) + - target values + - ![](images/clipboard-1601488084.png) + - All targets hit, albeit with substantial changes in weights vs. national average (table is ratio of final weight to scaled down national weights): + - ![](images/clipboard-1857134196.png) diff --git a/tmd/areas/targets/prepare/prepare_states/renv.lock b/tmd/areas/targets/prepare/prepare_states/renv.lock index 0825f0ea..1ecd6b5b 100644 --- a/tmd/areas/targets/prepare/prepare_states/renv.lock +++ b/tmd/areas/targets/prepare/prepare_states/renv.lock @@ -166,13 +166,13 @@ }, "bit": { "Package": "bit", - "Version": "4.5.0", + "Version": "4.5.0.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R" ], - "Hash": "5dc7b2677d65d0e874fc4aaf0e879987" + "Hash": "f89f074e0e49bf1dbe3eba0a15a91476" }, "bit64": { "Package": "bit64", @@ -371,13 +371,13 @@ }, "cpp11": { "Package": "cpp11", - "Version": "0.5.0", + "Version": "0.5.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R" ], - "Hash": "91570bba75d0c9d3f1040c835cee8fba" + "Hash": "9df43854f1c84685d095ed6270b52387" }, "crayon": { "Package": "crayon", @@ -416,14 +416,14 @@ }, "data.table": { "Package": "data.table", - "Version": "1.16.2", + "Version": "1.16.4", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R", "methods" ], - "Hash": "2e00b378fc3be69c865120d9f313039a" + "Hash": "38bbf05fc2503143db4c734a7e5cab66" }, "dbplyr": { "Package": "dbplyr", @@ -1034,7 +1034,7 @@ }, "lubridate": { "Package": "lubridate", - "Version": "1.9.3", + "Version": "1.9.4", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1043,7 +1043,7 @@ "methods", "timechange" ], - "Hash": "680ad542fbcf801442c83a6ac5a2126c" + "Hash": "be38bc740fc51783a78edb5a157e4104" }, "magrittr": { "Package": "magrittr", @@ -1137,7 +1137,7 @@ }, "nlme": { "Package": "nlme", - "Version": "3.1-165", + "Version": "3.1-166", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1147,26 +1147,25 @@ "stats", "utils" ], - "Hash": "2769a88be217841b1f33ed469675c3cc" + "Hash": "ccbb8846be320b627e6aa2b4616a2ded" }, "openssl": { "Package": "openssl", - "Version": "2.2.2", + "Version": "2.3.0", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "askpass" ], - "Hash": "d413e0fef796c9401a4419485f709ca1" + "Hash": "5bfe2927efa9f87766ca9605301ea48f" }, "pillar": { "Package": "pillar", - "Version": "1.9.0", + "Version": "1.10.0", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "cli", - "fansi", "glue", "lifecycle", "rlang", @@ -1174,7 +1173,7 @@ "utils", "vctrs" ], - "Hash": "15da5a8412f317beeee6175fbc76f4bb" + "Hash": "101ca350beea21261a15ba169d7a8513" }, "pkgconfig": { "Package": "pkgconfig", @@ -1616,14 +1615,14 @@ }, "stringdist": { "Package": "stringdist", - "Version": "0.9.12", + "Version": "0.9.14", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R", "parallel" ], - "Hash": "f360720fa3feb7db9d4133b31ebb067f" + "Hash": "be192673ec3a4efd32e8823298745a8d" }, "stringi": { "Package": "stringi", @@ -1676,7 +1675,7 @@ }, "textshaping": { "Package": "textshaping", - "Version": "0.4.0", + "Version": "0.4.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1685,7 +1684,7 @@ "lifecycle", "systemfonts" ], - "Hash": "5142f8bc78ed3d819d26461b641627ce" + "Hash": "573e0d015b7fc3e555f83e254cad7533" }, "tibble": { "Package": "tibble", diff --git a/tmd/areas/targets/prepare/target_recipes/phase6_states.json b/tmd/areas/targets/prepare/target_recipes/phase6_states.json new file mode 100644 index 00000000..db85340e --- /dev/null +++ b/tmd/areas/targets/prepare/target_recipes/phase6_states.json @@ -0,0 +1,86 @@ +// run from terminal (not console) with: +// Rscript write_area_target_files.R phase6_states.json + +// note: all text values must be in quotes +{ + "areatype": "state", // state or cd in quotes + + // "suffix": "A", // not present, or null, or a capital letter + // "session": 118, // not present, or 117 or 118 -- only present for areatype cd + + // arealist: + // cds: "all", or a list such as ["AK00", "DE00"] + // Phase 4 cds: ["AK00", "DE00", "ID01", "ID02", "ME02", "MT00", "ND00", "PA08", "SD00", "WY00"], + // states: "all", or a list such as ["ak", "de"] + // Phase 6 states: ["AK", "MN", "NJ", "NM", "VA"] plus SC + "arealist": ["AK", "MN", "NJ", "NM", "VA", "SC"], + + // target parameters + "notzero": true, // true or false -- whether to allow zero-valued targets + "notnegative": true, // true or false -- whether to allow negative-valued targets + "targets": [ + { + "varname": "c00100", + "scope": 1, + "count": 1, + "fstatus": 0 + }, + { + "varname": "c00100", + "scope": 1, + "count": 0, + "fstatus": 0 + }, + { + "varname": "c00100", + "scope": 1, + "count": 1, + "fstatus": 1 + }, + { + "varname": "c00100", + "scope": 1, + "count": 1, + "fstatus": 2 + }, + { + "varname": "c00100", + "scope": 1, + "count": 1, + "fstatus": 4 + }, + { + "varname": "e00200", + "scope": 1, + "count": 0, + "fstatus": 0 + }, + { + "varname": "e26270", + "scope": 1, + "count": 0, + "fstatus": 0, + "agi_exclude": [1, 2, 3] // not present, or a vector of agistubs to include + }, + { + "varname": "e18400", + "scope": 1, + "count": 0, + "fstatus": 0, + "agi_exclude": [1, 2] // not present, or a vector of agistubs to include + }, + { + "varname": "e18500", + "scope": 1, + "count": 0, + "fstatus": 0, + "agi_exclude": [1, 2] + }, + { + "varname": "e02400", + "scope": 1, + "count": 0, + "fstatus": 0 + } + ] +} \ No newline at end of file diff --git a/tmd/areas/targets/prepare/target_recipes/phase6_test.json b/tmd/areas/targets/prepare/target_recipes/phase6_test.json new file mode 100644 index 00000000..f9f8f018 --- /dev/null +++ b/tmd/areas/targets/prepare/target_recipes/phase6_test.json @@ -0,0 +1,100 @@ +// run from terminal (not console) with: +// Rscript write_area_target_files.R + +// note: all text values must be in quotes +{ + "areatype": "state", // state or cd in quotes + + // "suffix": "A", // not present, or null, or a capital letter + // "session": 118, // not present, or 117 or 118 -- only present for areatype cd + + // arealist: + // cds: "all", or a list such as ["AK00", "DE00"] + // Phase 4 cds: ["AK00", "DE00", "ID01", "ID02", "ME02", "MT00", "ND00", "PA08", "SD00", "WY00"], + // states: "all", or a list such as ["ak", "de"] + // Phase 6 states: ["AK", "MN", "NJ", "NM", "VA", "SC"] plus SC + // For testing: ["MN"] + "arealist": ["AK", "MN", "NJ", "NM", "VA", "SC"], + + // target parameters + "notzero": true, // true or false -- whether to allow zero-valued targets + "notnegative": true, // true or false -- whether to allow negative-valued targets + "targets": [ + { + "varname": "c00100", // agi + "scope": 1, + "count": 0, + "fstatus": 0 + }, + { + "varname": "c00100", // agi used to get total count + "scope": 1, + "count": 1, + "fstatus": 0 + }, + { + "varname": "c00100", // agi used to get total count + "scope": 1, + "count": 1, + "fstatus": 1 // Single + }, + + { + "varname": "c00100", // agi used to get total count + "scope": 1, + "count": 1, + "fstatus": 2 // Married joint + }, + { + "varname": "c00100", // agi used to get total count + "scope": 1, + "count": 1, + "fstatus": 4 // Head of household + }, + { + "varname": "e00200", + "scope": 1, + "count": 0, + "fstatus": 0 + }, + { + "varname": "e00300", + "scope": 1, + "count": 0, + "fstatus": 0 + }, + { + "varname": "e01500", // Total pensions and annuities + "scope": 1, + "count": 0, + "fstatus": 0 + }, + { + "varname": "e02400", + "scope": 1, + "count": 0, + "fstatus": 0 + }, + { + "varname": "e18400", + "scope": 1, + "count": 0, + "fstatus": 0, + "agi_exclude": [1, 2] // not present, or a vector of agistubs to include + }, + { + "varname": "e18500", + "scope": 1, + "count": 0, + "fstatus": 0, + "agi_exclude": [1, 2] + }, + { + "varname": "e26270", + "scope": 1, + "count": 0, + "fstatus": 0, + "agi_exclude": [1, 2, 3] // not present, or a vector of agistubs to include + } + ] +} \ No newline at end of file diff --git a/tmd/areas/targets/prepare/write_area_target_files.R b/tmd/areas/targets/prepare/write_area_target_files.R new file mode 100644 index 00000000..0d9fc6a7 --- /dev/null +++ b/tmd/areas/targets/prepare/write_area_target_files.R @@ -0,0 +1,240 @@ + +# run from terminal (not console) with: +# Rscript write_area_target_files.R phase6_states.json + +# json files MUST be in the target_recipes folder +# Rscript test.r > output.log 2>&1 + +# load packages quietly ----------------------------------------------------------------- + +suppressPackageStartupMessages({ + library(rlang) + library(tidyverse) + library(here) + library(fs) + library(jsonlite) +}) + + +# set folders ------------------------------------------------------------- +# assume for NOW that this is called from the prepare/prepare_states folder +# later we will move it to the prepare folder +PREPDIR <- getwd() # folder in which the terminal is open; fs::path_abs("../") +# during development use the following: +# PREPDIR <- "/home/donboyd5/Documents/python_projects/tax-microdata-benchmarking/tmd/areas/targets/prepare" + +DRECIPES <- fs::path(PREPDIR, "target_recipes") +DLIB <- fs::path(PREPDIR, "target_file_library") # output files go here + +# input data +STATEINPUTS <- fs::path(PREPDIR, "prepare_states", "data", "intermediate", "enhanced_targets.csv") +CDINPUTS <- fs::path(PREPDIR, "prepare_cds", "cds", "intermediate", "cdbasefile_enhanced.csv") + +# output folders +STATEDIR <- fs::path(DLIB, "states") +CDDIR <- fs::path(DLIB, "cds") + + +# Check command-line arguments -------------------------------------------- +print("checking arguments and getting data needed for target files...") +args <- commandArgs(trailingOnly = TRUE) + +# Check if the correct number of arguments is provided +if (length(args) < 1) { + stop("Error: No JSON file specified. Please provide the name of a JSON file in recipes folder as an argument.") +} + +# Assign the first argument as the file path +fnrecipe <- args[1] + +# ALTERNATIVE for testing: hardcode a file name ------------------------------------------- +# uncomment a line below for interactive testing +# fnrecipe <- "phase6_states.json" +# fnrecipe <- "phase6_test.json" + +# Check if the specified file exists in the target_recipes folder +fpath <- fs::path(DRECIPES, fnrecipe) +if (!file.exists(fpath)) { + stop("The specified file does not exist: ", fpath) +} + +# get target recipes and validate ------------------------------------------------------ + +recipe <- read_json(fpath) +# print(recipe) +print(names(recipe)) + +#.. determine recipe type and set folders ------------------------------------- + +stopifnot( + "areatype must be present and one of state or cd" = !is.null(recipe$areatype), + "areatype must be one of state or cd" = recipe$areatype %in% c("state", "cd") +) + +OUTDIR <- case_when( + recipe$areatype == "state" ~ STATEDIR, + recipe$areatype == "cd" ~ CDDIR, + .default = "ERROR") + +#.. check and set defaults for suffix ---- +if (is.null(recipe$suffix)) { + message("Note: Suffix value is missing. Defaulting to an empty string.") + recipe$suffix <- "" +} else if (!recipe$suffix %in% c("", LETTERS)) { + stop("Invalid suffix value: ", recipe$suffix, ". Valid values are an empty string or a single capital letter (A-Z).") +} + +# If a CD list, check and set defaults for session variable +if (recipe$areatype == "cd") { + if (is.null(recipe$session)) { + message("Session value is missing for a Congressional District json file. Defaulting to 118.") + recipe$session <- 118 + } else if (!(recipe$session %in% c(117, 118))) { + stop("Invalid session value for Congressional District json file: ", recipe$session, ". Valid values are 117, 118.") + } +} + +# TODO: error checking on arealist + +# Print updated recipe list +print(recipe) + + +# define variable mappings ------------------------------------------------ +# allowable target variables are those mapped below +# MARS mappings let us get counts by filing status by agi range + +vmap <- read_csv(fs::path(DRECIPES, "variable_mapping.csv"), + col_types = "ccci") + +allcount_vars <- c("n1", "mars1", "mars2", "mars4") +vmap2 <- crossing(vmap, count=0:4) |> + # drop combinations we do not have in the SOI data + filter(!(basesoivname == "XTOT" & (count != 0 | fstatus != 0))) |> # not allowed by definition + filter(!(count == 1 & !basesoivname %in% allcount_vars)) |> # only allcount_vars allowed here + filter(!(basesoivname %in% allcount_vars & count != 1)) + +# TODO: check whether target names are in vmap + +# prepare target rules ---------------------------------------------------- + +# general rules, before exceptions +target_rules <- recipe$targets |> + purrr::map(as_tibble) |> + purrr::list_rbind() + +# combine with agi ranges, before excluding any ranges +target_stubs <- target_rules |> + select(varname, scope, count, fstatus) |> + distinct() |> + cross_join(tibble(agistub=1:9)) |> # allow all agi ranges + arrange(varname, scope, count, fstatus, agistub) + +# update target_stubs to drop any agi ranges that are named for exclusion +if("agi_exclude" %in% names(target_rules)){ + target_drops <- target_rules |> + unnest(cols=agi_exclude) + + target_stubs <- target_stubs |> + anti_join(target_drops |> + rename(agistub=agi_exclude), + join_by(varname, scope, count, fstatus, agistub)) + } + + +# create a dataframe to match against the stack data for targets +# vmap +# allcount_vars <- c("N1", "MARS1", "MARS2", "MARS4") +# allcount_vars <- c("n1", "mars1", "mars2", "mars4") +# vmap2 <- vmap |> +# select(varname, basesoivname, fstatus) |> +# mutate(basesoivname=ifelse(basesoivname %in% allcount_vars, "00100", basesoivname)) |> +# distinct() + +# bring basesoivname in because we need it to match against targets file +targets_matchframe <- target_stubs |> + mutate(sort=row_number() + 1) |> + rows_insert(tibble(varname="XTOT", scope=0, count=0, fstatus=0, agistub=0, sort=1), + by="varname") |> + arrange(sort) |> + left_join(vmap2, by = join_by(varname, fstatus, count)) |> + relocate(sort) + +# set up filters for areas, zero targets, negative targets, etc. -------------------- + +##.. areas filters ---- +arealist <- unlist(recipe$arealist) +arealist +if( + (length(arealist) > 1) || + ((length(arealist) ==1) && (arealist != "all")) + ){ + area_filter <- expr(area %in% arealist) +} else if(length(arealist) == 1 & arealist == "all") { + area_filter <- TRUE +} else stop('arealist must be "all" or a list of valid state or cd codes, as appropriate') + +##.. zero-target filter -------- +if(recipe$notzero) { + zero_filter <- expr(target != 0) +} else zero_filter <- TRUE + +#.. negative-target filter ---------- +if(recipe$notnegative) { + negative_filter <- expr(!(target < 0)) +} else negative_filter <- TRUE + +#.. if cd session filter ---- +if(recipe$areatype == "cd") { + session_filter <- expr(session %in% recipe$session) +} else session_filter <- TRUE + + +# TODO: make this flexible state or cd; load targets data ------------------------------------------------------- +stack <- read_csv(STATEINPUTS, show_col_types = FALSE) |> + rename(area=stabbr) +# tmd18400_shared_by_soi18400" "tmd18400_shared_by_soi18400" "tmd18500_shared_by_soi18500" "tmd18500_shared_by_soi18500 + +# create mapped targets tibble -------------------------------------------- + +mapped <- targets_matchframe |> + # inner_join -- must be in both the targets and the filtered stack + inner_join(stack |> + filter(!!area_filter, + !!zero_filter, + !!negative_filter, + session_filter) |> + rename(label=description) |> + select(-sort), + # is sort correct? + by = join_by(basesoivname, scope, count, fstatus, agistub), + relationship = "many-to-many") |> + arrange(area, sort) + +# write targets ----------------------------------------------------------- + +f <- function(data, group, suffix=""){ + area <- group$area |> + str_to_lower() |> + paste0(suffix) + fname <- paste0(area, "_targets.csv") + fpath <- fs::path(OUTDIR, fname) + # print(fpath) + write_csv(data, fpath) +} + + +print("writing targets files...") +mapped |> + select(area, varname, count, scope, agilo, agihi, fstatus, target) |> + group_by(area) |> + group_walk(~f(.x, .y, recipe$suffix)) + + +ntargets <- count(mapped, area) +print("number of targets per area") +deframe(ntargets) + +print("all done!") + + diff --git a/tmd/areas/weights/examine/renv.lock b/tmd/areas/weights/examine/renv.lock index 3bdc3f6c..bcace3ab 100644 --- a/tmd/areas/weights/examine/renv.lock +++ b/tmd/areas/weights/examine/renv.lock @@ -166,13 +166,13 @@ }, "bit": { "Package": "bit", - "Version": "4.5.0", + "Version": "4.5.0.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R" ], - "Hash": "5dc7b2677d65d0e874fc4aaf0e879987" + "Hash": "f89f074e0e49bf1dbe3eba0a15a91476" }, "bit64": { "Package": "bit64", @@ -371,13 +371,13 @@ }, "cpp11": { "Package": "cpp11", - "Version": "0.5.0", + "Version": "0.5.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R" ], - "Hash": "91570bba75d0c9d3f1040c835cee8fba" + "Hash": "9df43854f1c84685d095ed6270b52387" }, "crayon": { "Package": "crayon", @@ -406,24 +406,24 @@ }, "curl": { "Package": "curl", - "Version": "6.0.0", + "Version": "6.0.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R" ], - "Hash": "ff51697d9205fe715f29e7171e874c6e" + "Hash": "e8ba62486230951fcd2b881c5be23f96" }, "data.table": { "Package": "data.table", - "Version": "1.16.2", + "Version": "1.16.4", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R", "methods" ], - "Hash": "2e00b378fc3be69c865120d9f313039a" + "Hash": "38bbf05fc2503143db4c734a7e5cab66" }, "dbplyr": { "Package": "dbplyr", @@ -560,7 +560,7 @@ }, "fontawesome": { "Package": "fontawesome", - "Version": "0.5.2", + "Version": "0.5.3", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -568,7 +568,7 @@ "htmltools", "rlang" ], - "Hash": "c2efdd5f0bcd1ea861c2d4e2a883a67d" + "Hash": "bd1297f9b5b1fc1372d19e2c4cd82215" }, "forcats": { "Package": "forcats", @@ -985,14 +985,14 @@ }, "later": { "Package": "later", - "Version": "1.3.2", + "Version": "1.4.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "Rcpp", "rlang" ], - "Hash": "a3e051d405326b8b0012377434c62b37" + "Hash": "501744395cac0bab0fbcfab9375ae92c" }, "lattice": { "Package": "lattice", @@ -1034,7 +1034,7 @@ }, "lubridate": { "Package": "lubridate", - "Version": "1.9.3", + "Version": "1.9.4", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1043,7 +1043,7 @@ "methods", "timechange" ], - "Hash": "680ad542fbcf801442c83a6ac5a2126c" + "Hash": "be38bc740fc51783a78edb5a157e4104" }, "magrittr": { "Package": "magrittr", @@ -1137,7 +1137,7 @@ }, "nlme": { "Package": "nlme", - "Version": "3.1-165", + "Version": "3.1-166", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1147,17 +1147,17 @@ "stats", "utils" ], - "Hash": "2769a88be217841b1f33ed469675c3cc" + "Hash": "ccbb8846be320b627e6aa2b4616a2ded" }, "openssl": { "Package": "openssl", - "Version": "2.2.2", + "Version": "2.3.0", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "askpass" ], - "Hash": "d413e0fef796c9401a4419485f709ca1" + "Hash": "5bfe2927efa9f87766ca9605301ea48f" }, "pillar": { "Package": "pillar", @@ -1225,7 +1225,7 @@ }, "promises": { "Package": "promises", - "Version": "1.3.0", + "Version": "1.3.2", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1237,7 +1237,7 @@ "rlang", "stats" ], - "Hash": "434cd5388a3979e74be5c219bcd6e77d" + "Hash": "c84fd4f75ea1f5434735e08b7f50fbca" }, "proxy": { "Package": "proxy", @@ -1665,7 +1665,7 @@ }, "textshaping": { "Package": "textshaping", - "Version": "0.4.0", + "Version": "0.4.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1674,7 +1674,7 @@ "lifecycle", "systemfonts" ], - "Hash": "5142f8bc78ed3d819d26461b641627ce" + "Hash": "573e0d015b7fc3e555f83e254cad7533" }, "tibble": { "Package": "tibble",