-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from dominicwhite/dashboard
Data processing and dashboard prototype
- Loading branch information
Showing
17 changed files
with
1,116 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
--- | ||
title: "Census 2020 Cultural Messaging" | ||
runtime: shiny | ||
output: | ||
flexdashboard::flex_dashboard: | ||
orientation: columns | ||
--- | ||
|
||
```{r setup, include=FALSE} | ||
library(flexdashboard) | ||
library(here) | ||
``` | ||
|
||
```{r global, include=FALSE} | ||
# load data in 'global' chunk so it can be shared by all users of the dashboard | ||
library(sf) | ||
pop <- read_sf("foreign_country_of_birth2.csv") | ||
pop$TotalForeignBornE <- as.numeric(pop$TotalForeignBornE) | ||
pop$EuropeE <- as.numeric(pop$EuropeE) | ||
pop$Central.AmericaE <- as.numeric(pop$Central.AmericaE) | ||
pop$AsiaE <- as.numeric(pop$AsiaE) | ||
``` | ||
|
||
Column {.sidebar data-width=200} | ||
----------------------------------------------------------------------- | ||
|
||
```{r} | ||
selectInput("voi", label = h4("Show on map"), | ||
choices = list( | ||
"Foreign-born" = "TotalForeignBornE", | ||
"European-born" = "EuropeE", | ||
"Cent. Am.-born" = "Central.AmericaE", | ||
"Asian-born" = "AsiaE" | ||
), | ||
selected = "TotalForeignBornE") | ||
``` | ||
|
||
#### About | ||
|
||
Built by DataKind DC, Motifv, and Hofstede Insights. | ||
|
||
|
||
Column {data-width=450} | ||
----------------------------------------------------------------------- | ||
|
||
### Map | ||
|
||
```{r, echo=FALSE} | ||
library(leaflet) | ||
library(leaflet.providers) | ||
bins <- reactive({c(as.integer(seq(from=0, to=max(pop[[input$voi]]), length.out=10)), Inf)}) | ||
m <- renderLeaflet({ | ||
pal <- colorNumeric("YlOrRd", domain = pop[[input$voi]]) | ||
leaflet(pop) %>% | ||
setView(lng = -77.045992, lat = 38.9145, zoom = 12) %>% | ||
addProviderTiles(providers$CartoDB.Positron) %>% | ||
addPolygons( | ||
fillColor = ~pal(get(input$voi)), | ||
weight = 2, | ||
opacity = 1, | ||
color = "white", | ||
dashArray = "3", | ||
fillOpacity = 0.7 | ||
) | ||
}) | ||
m | ||
``` | ||
|
||
Column {data-width=350} | ||
----------------------------------------------------------------------- | ||
|
||
### Chart B | ||
|
||
More detailed visualization to go here... | ||
|
||
```{r} | ||
``` | ||
|
||
### Chart C | ||
|
||
More detailed visualization to go here... | ||
|
||
```{r} | ||
``` | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Data | ||
|
||
This directory contains data files, along with scripts to generate those data files. | ||
|
||
Descriptions of the main subdirectories: | ||
|
||
## Static data | ||
|
||
Some data is static (i.e. not generated from online sources and APIs via scripts). | ||
|
||
An example is the Hofstede Index data for different countries. | ||
|
||
## Raw data | ||
|
||
Files of raw data downloaded from various sources without any processing/cleaning. Scripts for generating these files should reside in this level. | ||
|
||
## Transformed | ||
|
||
Data files ready for use in analyses and visualizations, created from data sources in the `static` and `raw` directories. | ||
|
||
Scripts for running transformations should reside in this level. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,211 @@ | ||
library(tidyverse) | ||
library(tidycensus) | ||
library(sf) | ||
library(here) | ||
|
||
# sourceDir <- getSrcDirectory(function(dummy) {dummy}) | ||
# dummy <- function() {0} | ||
# here() | ||
|
||
# uncomment to avoid cache | ||
options(tigris_use_cache = TRUE) | ||
|
||
# Load API key for Census Bureau. Register here: https://api.census.gov/data/key_signup.html | ||
api_key <- Sys.getenv(c("CENSUS_API_KEY")) | ||
census_api_key(api_key) | ||
|
||
# get all acs variables | ||
v17 <- load_variables(2017, "acs5", cache = TRUE) | ||
v15 <- load_variables(2015, "acs5", cache = TRUE) | ||
v09 <- load_variables(2009, "acs5", cache = TRUE) | ||
|
||
# select acs variables for country of birth | ||
regional_splits <- v17 %>% | ||
filter(str_detect(name, "^B05006_")) %>% | ||
select(name, label) %>% | ||
separate( | ||
label, | ||
into=c("Estimate", "Total", "Continent", "Subcontinent", "Country", "Subcountry"), | ||
sep="!!" | ||
) %>% | ||
mutate( | ||
pop_label = case_when( | ||
is.na(Continent) ~ "TotalForeignBorn", | ||
is.na(Subcontinent) ~ Continent, | ||
is.na(Country) ~ Subcontinent, | ||
is.na(Subcountry) ~ Country, | ||
TRUE ~ Subcountry | ||
), | ||
) %>% | ||
select(name, pop_label) | ||
acs_vars <- regional_splits$name | ||
names(acs_vars) <- regional_splits$pop_label | ||
|
||
# select acs variables for language | ||
language_splits <- v15 %>% | ||
filter(str_detect(name, "^B16001_")) %>% | ||
select(name, label) %>% | ||
separate( | ||
label, | ||
into=c("Estimate", "Total", "MainLanguage", "EnglishAbility"), | ||
sep="!!" | ||
) %>% | ||
mutate( | ||
lang_label = case_when( | ||
is.na(MainLanguage) ~ "TotalSpeakersOver5", | ||
is.na(EnglishAbility) ~ str_c("HouseholdLanguage:",MainLanguage), | ||
TRUE ~ str_c("HouseholdLanguage:",MainLanguage,":",EnglishAbility) | ||
) | ||
) | ||
lang_vars <- language_splits$name | ||
names(lang_vars) <- language_splits$lang_label | ||
|
||
# select acs variables for birth x nativity | ||
status_splits <- v17 %>% | ||
filter(str_detect(name, "^B05002_")) %>% | ||
select(name, label) | ||
status_vars <- status_splits$name | ||
names(status_vars) <- status_splits$label | ||
|
||
# select acs variables for non-response reason | ||
house_nonresponse_splits <- v17 %>% | ||
filter(str_detect(name, "^B98021_")) %>% | ||
select(name, label) %>% | ||
separate( | ||
label, | ||
into=c("Estimate", "Rate", "Reason"), | ||
sep="!!" | ||
) %>% | ||
mutate( | ||
response_label = case_when( | ||
is.na(Reason) & Rate == "Response Rate" ~ "HouseResponseRate_Total", | ||
is.na(Reason) ~ "HouseNonResponseRate_Total", | ||
TRUE ~ str_c("HouseNonResponseRate_",Reason) | ||
) | ||
) | ||
house_nonresponse_vars <- house_nonresponse_splits$name | ||
names(house_nonresponse_vars) <- house_nonresponse_splits$response_label | ||
# select acs variables for non-response reason | ||
house_nonresponse_splits <- v09 %>% | ||
filter(str_detect(name, "^B98021_")) %>% | ||
select(name, label) %>% | ||
separate( | ||
label, | ||
into=c("Estimate", "Rate", "Reason"), | ||
sep="!!" | ||
) %>% | ||
mutate( | ||
response_label = case_when( | ||
is.na(Reason) & Rate == "Response Rate" ~ "HouseResponseRate_Total", | ||
is.na(Reason) ~ "HouseNonResponseRate_Total", | ||
TRUE ~ str_c("HouseNonResponseRate_",Reason) | ||
) | ||
) | ||
house_nonresponse_vars <- house_nonresponse_splits$name | ||
names(house_nonresponse_vars) <- house_nonresponse_splits$response_label | ||
|
||
groupquarters_nonresponse_splits <- v09 %>% | ||
filter(str_detect(name, "^B98022_")) %>% | ||
select(name, label) %>% | ||
separate( | ||
label, | ||
into=c("Estimate", "Rate", "Reason"), | ||
sep="!!" | ||
) %>% | ||
mutate( | ||
response_label = case_when( | ||
is.na(Reason) & Rate == "Response Rate" ~ "GroupQuartersResponseRate_Total", | ||
is.na(Reason) ~ "GroupQuartersNonResponseRate_Total", | ||
TRUE ~ str_c("GroupQuartersNonResponseRate_",Reason) | ||
) | ||
) | ||
groupquarters_nonresponse_vars <- groupquarters_nonresponse_splits$name | ||
names(groupquarter_nonresponse_vars) <- groupquarter_nonresponse_splits$response_label | ||
|
||
|
||
# combine all variables of interest | ||
# acs_vars <- c("TotalPop" = "B01003_001", acs_vars, lang_vars) | ||
|
||
# get block group total population data from api | ||
total_pop_bg <- get_acs('block group', | ||
variables = c("TotalPop" = "B01003_001"), | ||
state = "DC", | ||
county = "District of Columbia", | ||
geometry = TRUE, | ||
output="wide") | ||
st_write(total_pop_bg, | ||
file.path(here(),"data","raw", "bg_level_population.gpkg"), | ||
delete_dsn=TRUE) | ||
|
||
# get tract total population data from api | ||
total_pop_tract <- get_acs('tract', | ||
variables = c("TotalPop" = "B01003_001"), | ||
state = "DC", | ||
county = "District of Columbia", | ||
geometry = TRUE, | ||
output="wide") | ||
st_write(total_pop_tract, | ||
file.path(here(),"data","raw", "tract_level_population.gpkg"), | ||
delete_dsn=TRUE) | ||
|
||
# get tract-level nativity data from api | ||
nativity_data <- get_acs('tract', | ||
variables = acs_vars, | ||
state = "DC", | ||
county = "District of Columbia", | ||
geometry = TRUE, | ||
output="wide") | ||
st_write(nativity_data, | ||
file.path(here(),"data","raw", "tract_level_nativity.gpkg"), | ||
delete_dsn=TRUE) | ||
|
||
# get tract-level language data from api (note: from 2015) | ||
language_data <- get_acs('tract', | ||
variables = lang_vars, | ||
state = "DC", | ||
county = "District of Columbia", | ||
geometry = TRUE, | ||
year = 2015, | ||
output="wide") | ||
st_write(language_data, | ||
file.path(here(),"data","raw", "tract_level_language_2015.gpkg"), | ||
delete_dsn=TRUE) | ||
|
||
|
||
# get tract-level citizenship status data from api | ||
status_data <- get_acs('tract', | ||
variables = status_vars, | ||
state = "DC", | ||
county = "District of Columbia", | ||
geometry = TRUE, | ||
output="wide") | ||
st_write(status_data, | ||
file.path(here(),"data","raw", "tract_level_citizen_status.gpkg"), | ||
delete_dsn=TRUE) | ||
|
||
|
||
# get tract-level household nonresponse data from api | ||
house_response_data <- get_acs('tract', | ||
variables = house_nonresponse_vars, | ||
state = "DC", | ||
county = "District of Columbia", | ||
geometry = TRUE, | ||
output="wide", | ||
year=2009) | ||
st_write(house_response_data, | ||
file.path(here(),"data","raw", "tract_level_house_reponse_2009.gpkg"), | ||
delete_dsn=TRUE) | ||
|
||
|
||
# get tract-level group quarters nonresponse data from api | ||
gq_response_data <- get_acs('tract', | ||
variables = groupquarters_nonresponse_vars, | ||
state = "DC", | ||
county = "District of Columbia", | ||
geometry = TRUE, | ||
output="wide", | ||
year=2009) | ||
st_write(gq_response_data, | ||
file.path(here(),"data","raw", "tract_level_group_quarters_reponse_2009.gpkg"), | ||
delete_dsn=TRUE) | ||
|
File renamed without changes.
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Static Data files | ||
|
||
### dc_planning_database_subset.csv | ||
|
||
The subset of planning database variables was generated from the Block Group Planning Database available on the Census Bureau website: https://www.census.gov/topics/research/guidance/planning-databases.html | ||
|
||
The data was first filtered to just rows for just the District of Columbia (FIPS code 11) and then filtered for columns of interest. | ||
|
||
For example, the current version was generated with: | ||
|
||
```bash | ||
awk -vFPAT='[^,]*|"[^"]*"' '{ if ($2 == 11) { print } }' pdb2019bgv3_us.csv >> dc_blockgroups.csv | ||
|
||
awk -vFPAT='[^,]*|"[^"]*"' '{ print $1 "," $2 "," $3 "," $4 "," $5 "," $6 "," $7 "," $8 "," $9 "," $10 "," $11 "," $12 "," $13 "," $14 "," $15 "," $32 "," $35 "," $38 "," $41 "," $47 "," $50 "," $53 "," $56 "," $59 "," $67 "," $69 "," $91 "," $93 "," $95 "," $97 "," $99 "," $187 "," $188 "," $205 "," $208 "," $211 "," $214 "," $219 "," $220 "," $222 "," $223 "," $225 "," $226 "," $228 "," $229 "," $231 "," $232 "," $234 "," $235 "," $237 "," $238 "," $264 "," $266 "," $268 "," $270 "," $272 }' dc_blockgroups.csv > dc_planning_database_subset.csv | ||
``` | ||
|
||
### hofstede-6-dimensions-0-100.csv | ||
|
||
Contains measurements of Hofstede's 6 Cultural Dimensions for a range of countries. |
Oops, something went wrong.