Skip to content

Commit

Permalink
Merge pull request #6 from dominicwhite/dashboard
Browse files Browse the repository at this point in the history
Data processing and dashboard prototype
  • Loading branch information
dominicwhite authored Nov 14, 2019
2 parents f0a302e + 123efa2 commit 419fd56
Show file tree
Hide file tree
Showing 17 changed files with 1,116 additions and 0 deletions.
90 changes: 90 additions & 0 deletions dashboard/dashboard.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
---
title: "Census 2020 Cultural Messaging"
runtime: shiny
output:
flexdashboard::flex_dashboard:
orientation: columns
---

```{r setup, include=FALSE}
library(flexdashboard)
library(here)
```

```{r global, include=FALSE}
# load data in 'global' chunk so it can be shared by all users of the dashboard
library(sf)
pop <- read_sf("foreign_country_of_birth2.csv")
pop$TotalForeignBornE <- as.numeric(pop$TotalForeignBornE)
pop$EuropeE <- as.numeric(pop$EuropeE)
pop$Central.AmericaE <- as.numeric(pop$Central.AmericaE)
pop$AsiaE <- as.numeric(pop$AsiaE)
```

Column {.sidebar data-width=200}
-----------------------------------------------------------------------

```{r}
selectInput("voi", label = h4("Show on map"),
choices = list(
"Foreign-born" = "TotalForeignBornE",
"European-born" = "EuropeE",
"Cent. Am.-born" = "Central.AmericaE",
"Asian-born" = "AsiaE"
),
selected = "TotalForeignBornE")
```

#### About

Built by DataKind DC, Motifv, and Hofstede Insights.


Column {data-width=450}
-----------------------------------------------------------------------

### Map

```{r, echo=FALSE}
library(leaflet)
library(leaflet.providers)
bins <- reactive({c(as.integer(seq(from=0, to=max(pop[[input$voi]]), length.out=10)), Inf)})
m <- renderLeaflet({
pal <- colorNumeric("YlOrRd", domain = pop[[input$voi]])
leaflet(pop) %>%
setView(lng = -77.045992, lat = 38.9145, zoom = 12) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
fillColor = ~pal(get(input$voi)),
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.7
)
})
m
```

Column {data-width=350}
-----------------------------------------------------------------------

### Chart B

More detailed visualization to go here...

```{r}
```

### Chart C

More detailed visualization to go here...

```{r}
```

180 changes: 180 additions & 0 deletions dashboard/foreign_country_of_birth2.csv

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Data

This directory contains data files, along with scripts to generate those data files.

Descriptions of the main subdirectories:

## Static data

Some data is static (i.e. not generated from online sources and APIs via scripts).

An example is the Hofstede Index data for different countries.

## Raw data

Files of raw data downloaded from various sources without any processing/cleaning. Scripts for generating these files should reside in this level.

## Transformed

Data files ready for use in analyses and visualizations, created from data sources in the `static` and `raw` directories.

Scripts for running transformations should reside in this level.
211 changes: 211 additions & 0 deletions data/get_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
library(tidyverse)
library(tidycensus)
library(sf)
library(here)

# sourceDir <- getSrcDirectory(function(dummy) {dummy})
# dummy <- function() {0}
# here()

# uncomment to avoid cache
options(tigris_use_cache = TRUE)

# Load API key for Census Bureau. Register here: https://api.census.gov/data/key_signup.html
api_key <- Sys.getenv(c("CENSUS_API_KEY"))
census_api_key(api_key)

# get all acs variables
v17 <- load_variables(2017, "acs5", cache = TRUE)
v15 <- load_variables(2015, "acs5", cache = TRUE)
v09 <- load_variables(2009, "acs5", cache = TRUE)

# select acs variables for country of birth
regional_splits <- v17 %>%
filter(str_detect(name, "^B05006_")) %>%
select(name, label) %>%
separate(
label,
into=c("Estimate", "Total", "Continent", "Subcontinent", "Country", "Subcountry"),
sep="!!"
) %>%
mutate(
pop_label = case_when(
is.na(Continent) ~ "TotalForeignBorn",
is.na(Subcontinent) ~ Continent,
is.na(Country) ~ Subcontinent,
is.na(Subcountry) ~ Country,
TRUE ~ Subcountry
),
) %>%
select(name, pop_label)
acs_vars <- regional_splits$name
names(acs_vars) <- regional_splits$pop_label

# select acs variables for language
language_splits <- v15 %>%
filter(str_detect(name, "^B16001_")) %>%
select(name, label) %>%
separate(
label,
into=c("Estimate", "Total", "MainLanguage", "EnglishAbility"),
sep="!!"
) %>%
mutate(
lang_label = case_when(
is.na(MainLanguage) ~ "TotalSpeakersOver5",
is.na(EnglishAbility) ~ str_c("HouseholdLanguage:",MainLanguage),
TRUE ~ str_c("HouseholdLanguage:",MainLanguage,":",EnglishAbility)
)
)
lang_vars <- language_splits$name
names(lang_vars) <- language_splits$lang_label

# select acs variables for birth x nativity
status_splits <- v17 %>%
filter(str_detect(name, "^B05002_")) %>%
select(name, label)
status_vars <- status_splits$name
names(status_vars) <- status_splits$label

# select acs variables for non-response reason
house_nonresponse_splits <- v17 %>%
filter(str_detect(name, "^B98021_")) %>%
select(name, label) %>%
separate(
label,
into=c("Estimate", "Rate", "Reason"),
sep="!!"
) %>%
mutate(
response_label = case_when(
is.na(Reason) & Rate == "Response Rate" ~ "HouseResponseRate_Total",
is.na(Reason) ~ "HouseNonResponseRate_Total",
TRUE ~ str_c("HouseNonResponseRate_",Reason)
)
)
house_nonresponse_vars <- house_nonresponse_splits$name
names(house_nonresponse_vars) <- house_nonresponse_splits$response_label
# select acs variables for non-response reason
house_nonresponse_splits <- v09 %>%
filter(str_detect(name, "^B98021_")) %>%
select(name, label) %>%
separate(
label,
into=c("Estimate", "Rate", "Reason"),
sep="!!"
) %>%
mutate(
response_label = case_when(
is.na(Reason) & Rate == "Response Rate" ~ "HouseResponseRate_Total",
is.na(Reason) ~ "HouseNonResponseRate_Total",
TRUE ~ str_c("HouseNonResponseRate_",Reason)
)
)
house_nonresponse_vars <- house_nonresponse_splits$name
names(house_nonresponse_vars) <- house_nonresponse_splits$response_label

groupquarters_nonresponse_splits <- v09 %>%
filter(str_detect(name, "^B98022_")) %>%
select(name, label) %>%
separate(
label,
into=c("Estimate", "Rate", "Reason"),
sep="!!"
) %>%
mutate(
response_label = case_when(
is.na(Reason) & Rate == "Response Rate" ~ "GroupQuartersResponseRate_Total",
is.na(Reason) ~ "GroupQuartersNonResponseRate_Total",
TRUE ~ str_c("GroupQuartersNonResponseRate_",Reason)
)
)
groupquarters_nonresponse_vars <- groupquarters_nonresponse_splits$name
names(groupquarter_nonresponse_vars) <- groupquarter_nonresponse_splits$response_label


# combine all variables of interest
# acs_vars <- c("TotalPop" = "B01003_001", acs_vars, lang_vars)

# get block group total population data from api
total_pop_bg <- get_acs('block group',
variables = c("TotalPop" = "B01003_001"),
state = "DC",
county = "District of Columbia",
geometry = TRUE,
output="wide")
st_write(total_pop_bg,
file.path(here(),"data","raw", "bg_level_population.gpkg"),
delete_dsn=TRUE)

# get tract total population data from api
total_pop_tract <- get_acs('tract',
variables = c("TotalPop" = "B01003_001"),
state = "DC",
county = "District of Columbia",
geometry = TRUE,
output="wide")
st_write(total_pop_tract,
file.path(here(),"data","raw", "tract_level_population.gpkg"),
delete_dsn=TRUE)

# get tract-level nativity data from api
nativity_data <- get_acs('tract',
variables = acs_vars,
state = "DC",
county = "District of Columbia",
geometry = TRUE,
output="wide")
st_write(nativity_data,
file.path(here(),"data","raw", "tract_level_nativity.gpkg"),
delete_dsn=TRUE)

# get tract-level language data from api (note: from 2015)
language_data <- get_acs('tract',
variables = lang_vars,
state = "DC",
county = "District of Columbia",
geometry = TRUE,
year = 2015,
output="wide")
st_write(language_data,
file.path(here(),"data","raw", "tract_level_language_2015.gpkg"),
delete_dsn=TRUE)


# get tract-level citizenship status data from api
status_data <- get_acs('tract',
variables = status_vars,
state = "DC",
county = "District of Columbia",
geometry = TRUE,
output="wide")
st_write(status_data,
file.path(here(),"data","raw", "tract_level_citizen_status.gpkg"),
delete_dsn=TRUE)


# get tract-level household nonresponse data from api
house_response_data <- get_acs('tract',
variables = house_nonresponse_vars,
state = "DC",
county = "District of Columbia",
geometry = TRUE,
output="wide",
year=2009)
st_write(house_response_data,
file.path(here(),"data","raw", "tract_level_house_reponse_2009.gpkg"),
delete_dsn=TRUE)


# get tract-level group quarters nonresponse data from api
gq_response_data <- get_acs('tract',
variables = groupquarters_nonresponse_vars,
state = "DC",
county = "District of Columbia",
geometry = TRUE,
output="wide",
year=2009)
st_write(gq_response_data,
file.path(here(),"data","raw", "tract_level_group_quarters_reponse_2009.gpkg"),
delete_dsn=TRUE)

Binary file added data/raw/bg_level_population.gpkg
Binary file not shown.
Binary file added data/raw/tract_level_citizen_status.gpkg
Binary file not shown.
Binary file not shown.
Binary file added data/raw/tract_level_language_2015.gpkg
Binary file not shown.
Binary file added data/raw/tract_level_nativity.gpkg
Binary file not shown.
Binary file added data/raw/tract_level_population.gpkg
Binary file not shown.
19 changes: 19 additions & 0 deletions data/static/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Static Data files

### dc_planning_database_subset.csv

The subset of planning database variables was generated from the Block Group Planning Database available on the Census Bureau website: https://www.census.gov/topics/research/guidance/planning-databases.html

The data was first filtered to just rows for just the District of Columbia (FIPS code 11) and then filtered for columns of interest.

For example, the current version was generated with:

```bash
awk -vFPAT='[^,]*|"[^"]*"' '{ if ($2 == 11) { print } }' pdb2019bgv3_us.csv >> dc_blockgroups.csv

awk -vFPAT='[^,]*|"[^"]*"' '{ print $1 "," $2 "," $3 "," $4 "," $5 "," $6 "," $7 "," $8 "," $9 "," $10 "," $11 "," $12 "," $13 "," $14 "," $15 "," $32 "," $35 "," $38 "," $41 "," $47 "," $50 "," $53 "," $56 "," $59 "," $67 "," $69 "," $91 "," $93 "," $95 "," $97 "," $99 "," $187 "," $188 "," $205 "," $208 "," $211 "," $214 "," $219 "," $220 "," $222 "," $223 "," $225 "," $226 "," $228 "," $229 "," $231 "," $232 "," $234 "," $235 "," $237 "," $238 "," $264 "," $266 "," $268 "," $270 "," $272 }' dc_blockgroups.csv > dc_planning_database_subset.csv
```

### hofstede-6-dimensions-0-100.csv

Contains measurements of Hofstede's 6 Cultural Dimensions for a range of countries.
Loading

0 comments on commit 419fd56

Please sign in to comment.