Skip to content

Commit

Permalink
initial commit of A10 data prep file.
Browse files Browse the repository at this point in the history
  • Loading branch information
mle2718 committed Oct 30, 2024
1 parent 1a88c35 commit db3c17d
Showing 1 changed file with 159 additions and 0 deletions.
159 changes: 159 additions & 0 deletions Amendment10/writing/Amendment10_data_prep.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
---
title: "Am10_dataprep"
author: "Min-Yang Lee"
date: "2024-10-28"
output: html_document
---


1. You must be able to see this folder ``nefscfile\BLAST\READ-SSB-Lee-MRIP-BLAST\data_folder\raw``. It contains the raw MRIP data that has been imported into Rds format. These were constructed with ``https://github.com/NEFSC/READ-SSB-MRIP-BLAST/R_code/data_extracting_processing/extraction/pull_in_MRIP.R``.



```{r global_options, include=FALSE}
library("here")
library("data.table")
library("tidyverse")
library("knitr")
library("lubridate")
library("scales")
library("fredr")
library("kableExtra")
library("censusapi")
library("tigris")
library("mapview")
library("haven")
library("survey")
library("srvyr")
#turn off scientific notation
options(scipen=999)
#Handle single PSUs
options(survey.lonely.psu = "certainty")
options(tigris_use_cache = TRUE)
here::i_am("writing/Amendment10_data_prep.Rmd")
#############################################################################
#knitr options
knitr::opts_chunk$set(echo=FALSE, warning = FALSE, error = FALSE, message = FALSE, comment = FALSE, cache = FALSE, progress = TRUE, verbose = FALSE,
dpi = 600)
options(tinytex.verbose = TRUE)
# options(knitr.table.format = "latex")
#############################################################################
# RFA data
RFA_filepath<-file.path("//nefscdata","RFA_EO12866_Guidelines" ,"Ownership Data", "current data and metadata","affiliates_2024_06_01.Rdata")
BLAST_raw_filepath<-file.path("//nefscfile","BLAST" ,"READ-SSB-Lee-MRIP-BLAST", "data_folder","raw")
local_BLAST_folder<-file.path("V:","READ-SSB-Lee-MRIP-BLAST")
network_BLAST_folder<-file.path("//nefscfile","BLAST" ,"READ-SSB-Lee-MRIP-BLAST")
#############################################################################
# The census data that I'm using reports out 2022 Inflation adjusted dollars. Therefore, it makes sense to adjust everything to base year 2022.
deflate_by <- "year"
base_year = 2022 # base year for GDP deflator - Maximum = 2020, Minimum = 1947 - max(GDPDEF_quarterly$Year)
vintage_string<-Sys.Date()
vintage_string<-gsub("-","_",vintage_string)
```


```{r get_census, eval=TRUE}
# Pull in and tidy up some Census data on household income
income <- getCensus(
name = "acs/acs5/subject",
vintage = 2022,
vars = c("S1901_C01_001E", "S1901_C01_010E","S1901_C01_011E","S1901_C01_012E","S1901_C01_013E","S0101_C01_001E", "S0101_C02_028E","S0601_C01_001E","S0601_C01_022E"),
region = "zip code tabulation area:*",
show_call=FALSE)
income<-income %>%
rename ( households=S1901_C01_001E,
household_inc_150_200pct =S1901_C01_010E,
household_inc_200pct=S1901_C01_011E,
household_median_inc=S1901_C01_012E,
household_mean_inc=S1901_C01_013E,
total_population=S0101_C01_001E,
population_pct_over60=S0101_C02_028E,
total_population_s0601=S0601_C01_001E,
pct_white_alone =S0601_C01_022E
) %>%
mutate(obscured_median=case_when(household_median_inc<0 ~ 1, .default=0),
obscured_mean=case_when(household_mean_inc<0 ~ 1, .default=0)
)
# Read in the state-zcta correspondence file.
zcta_state<-read_delim("https://www2.census.gov/geo/docs/maps-data/data/rel2020/zcta520/tab20_zcta520_county20_natl.txt", delim="|", guess_max=2000)
# Contract down to the fraction in each area.
zcta_state <- zcta_state %>%
dplyr::filter(is.na(OID_ZCTA5_20)==FALSE) %>%
group_by(OID_ZCTA5_20) %>%
mutate(total_area=sum(AREALAND_PART)) %>%
mutate(fraction=AREALAND_PART/total_area) %>%
select(c(GEOID_ZCTA5_20, OID_ZCTA5_20,OID_COUNTY_20, GEOID_COUNTY_20, fraction))
# Pull in and tidy up some Census data on household income at the state level
county_income <- getCensus(
name = "acs/acs5/subject",
vintage = 2022,
vars = c("S1901_C01_012E","S1901_C01_013E"),
region = "county:*",
show_call=FALSE)
county_income<-county_income %>%
rename (county_household_median_inc=S1901_C01_012E,
county_household_mean_inc=S1901_C01_013E
) %>%
mutate(st_co=paste0(state,county))
# use county income to create an in imputed income based on county income
income_fill<-zcta_state %>%
left_join(county_income, by=join_by(GEOID_COUNTY_20==st_co), relationship="many-to-one") %>%
mutate(wmedian=fraction*county_household_median_inc,
wmean=fraction*county_household_mean_inc) %>%
group_by(GEOID_ZCTA5_20, OID_ZCTA5_20) %>%
summarise(imputed_household_median_income=sum(wmedian),
imputed_household_mean_income=sum(wmean)) %>%
ungroup()
income<-income %>%
left_join(income_fill, by=join_by(zip_code_tabulation_area==GEOID_ZCTA5_20), relationship="one-to-one")%>%
mutate(household_median_inc=case_when(obscured_median==1 ~ imputed_household_median_income, .default=household_median_inc),
household_mean_inc=case_when(obscured_mean<0 ~ imputed_household_mean_income, .default=household_mean_inc)
) %>%
select(-c(imputed_household_mean_income,imputed_household_median_income))
summary(income)
saveRDS(income,file=here("data_folder", "main", paste0("income_",vintage_string,".Rds")))
saveRDS(zcta_state,file=here("data_folder", "main", paste0("zcta_state",vintage_string,".Rds")))
```



```{r get_MRIP, eval=TRUE}
```

0 comments on commit db3c17d

Please sign in to comment.