Skip to content

Commit

Permalink
Merge pull request #127 from weecology/by-route
Browse files Browse the repository at this point in the history
Import BBS data by route
  • Loading branch information
ha0ye authored May 28, 2019
2 parents 41a2f11 + 4ff431e commit 9c89304
Show file tree
Hide file tree
Showing 19 changed files with 618 additions and 250 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
.Rhistory
.RData
.Ruserdata
/.drake/
.drake/
/output/drake-cache.sqlite
/analysis/*.html
/data/
Expand Down
24 changes: 7 additions & 17 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@
export("%>%")
export(analysis_wrapper)
export(build_analyses_plan)
export(build_bbs_datasets_plan)
export(build_datasets_plan)
export(check_data_format)
export(check_default_data_path)
export(collect_analyses)
export(get_bbs_data)
export(combine_bbs_subspecies)
export(filter_bbs_species)
export(filter_bbs_ts)
export(get_bbs_route_region_data)
export(get_cowley_lizards)
export(get_cowley_snakes)
export(get_default_data_path)
Expand All @@ -22,6 +26,8 @@ export(get_sgs_data)
export(import_retriever_data)
export(install_retriever_data)
export(interpolate_obs)
export(prepare_bbs_ts_data)
export(process_bbs_route_region_data)
export(richness)
export(summarise_effort)
export(summarise_obs)
Expand All @@ -34,21 +40,5 @@ export(ts_summary)
export(ts_summary_drake)
export(uni_ts_summary)
export(use_default_data_path)
importFrom(graphics,axis)
importFrom(graphics,mtext)
importFrom(graphics,par)
importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(graphics,rect)
importFrom(graphics,text)
importFrom(magrittr,"%>%")
importFrom(stats,D)
importFrom(stats,acf)
importFrom(stats,cor)
importFrom(stats,median)
importFrom(stats,na.omit)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(utils,data)
importFrom(utils,read.csv)
importFrom(utils,read.delim)
210 changes: 210 additions & 0 deletions R/bbs_cleaning_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
#' @title Prepare BBS population time-series data
#' @description Modified from https://github.com/weecology/bbs-forecasting
#' and https://github.com/weecology/MATSS-community-change
#' Selects sites with data spanning start_yr through end_yr containing at
#' least min_num_yrs of data samples during that period. Cleans data tables
#' and stores each individual route as a .Rds file. Saves a data table of the
#' route + region pairs.
#' @param start_yr num first year of time-series
#' @param end_yr num last year of time-series
#' @param min_num_yrs num minimum number of years of data between start_yr & end_yr
#' @param bbs_subset optional, a subset of the BBS communities to use
#' (to speed up development). As c(1:X)
#' @inheritParams get_mtquad_data
#' @return NULL
#' @export

prepare_bbs_ts_data <- function(start_yr = 1965, end_yr = 2017, min_num_yrs = 10,
path = get_default_data_path(), bbs_subset = NULL)
{
bbs_data_tables <- import_retriever_data("breed-bird-survey", path = path)

bbs_data <- bbs_data_tables$breed_bird_survey_weather %>%
dplyr::filter(runtype == 1, rpid == 101) %>%
dplyr::left_join(bbs_data_tables$breed_bird_survey_counts,
by = c('statenum', 'route', 'rpid', 'year', 'routedataid', 'countrynum')) %>%
dplyr::left_join(bbs_data_tables$breed_bird_survey_routes,
by = c('statenum', 'route', 'countrynum')) %>%
dplyr::mutate(site_id = statenum*1000 + route,
starttemp = dplyr::case_when(tempscale=='F' ~ c((starttemp - 32)*5/9),
tempscale=='C' ~ as.double(starttemp)),
endtemp = dplyr::case_when(tempscale=='F' ~ c((endtemp - 32)*5/9),
tempscale=='C' ~ as.double(endtemp))) %>%
dplyr::rename(lat = latitude,
long = longitude,
species_id = aou,
abundance = speciestotal) %>%
filter_bbs_ts(start_yr, end_yr, min_num_yrs)

# prepare and write out route and region metadata
bbs_routes_regions <- bbs_data %>%
dplyr::select(bcr, route) %>%
dplyr::distinct() %>%
dplyr::mutate(name = paste0("bbs_bcr", bcr, "_route", route))

storage_path <- file.path(path, 'breed-bird-survey-prepped')
if (!dir.exists(storage_path)) {
dir.create(storage_path)
}

write.csv(bbs_routes_regions, file.path(storage_path, "routes_and_regions_table.csv"), row.names = F)

# filter and process selected route and region combinations
if (!is.null(bbs_subset)) {
bbs_routes_regions <- bbs_routes_regions[bbs_subset, ]
}

bbs_routes_regions %>%
dplyr::select(bcr, route) %>%
purrr::pmap(function(bcr, route) {
bbs_data %>%
dplyr::filter(bcr == !!bcr,
route == !!route) %>%
process_bbs_route_region_data(species_table = bbs_data_tables$breed_bird_survey_species) %>%
saveRDS(file = file.path(storage_path, paste0("route", route, "region", bcr, ".Rds")) )
})
}

#' @title Process the BBS data for an individual route and region
#' @description Correct and otherwise filter BBS species data (see
#' \code{\link{combine_subspecies}} and \code{\link{filter_bbs_species}} for
#' more info). Generate the abundance, covariate, and metadata tables and
#' return the combined object.
#' @param bbs_data_table main bbs data table
#' @param species_table table of species for BBS
#' @return the processed BBS data
#' @export
process_bbs_route_region_data <- function(bbs_data_table, species_table)
{
# check that exactly one route and one region are represented in the data
route <- unique(bbs_data_table$route)
region <- unique(bbs_data_table$bcr)
stopifnot(length(route) == 1 &&
length(region == 1))

# process species IDs
this_bbs_data <- bbs_data_table %>%
combine_bbs_subspecies(species_table = species_table) %>%
filter_bbs_species(species_table = species_table) %>%
dplyr::mutate(species_id = paste('sp', species_id, sep=''),
date = as.Date(paste(year, month, day, sep = "-"))) %>%
dplyr::ungroup()

abundance <- this_bbs_data %>%
dplyr::group_by(year, species_id) %>%
dplyr::summarise(abundance = sum(abundance)) %>%
dplyr::ungroup() %>%
tidyr::spread(key = species_id, value = abundance, fill = 0) %>%
dplyr::arrange(year) %>%
dplyr::select(-year)

covariates <- this_bbs_data %>%
dplyr::group_by(year) %>%
dplyr::summarise(effort = dplyr::n_distinct(site_id),
starttemp = mean(starttemp), endtemp = mean(endtemp),
startwind = mean(startwind), endwind = mean(endwind),
startsky = mean(startsky), endsky = mean(endsky),
lat = mean(lat), long = mean(long), mean_date = mean(date)) %>%
dplyr::arrange(year)

metadata <- list(timename = 'year', effort = 'effort', route = route, region = region)

return(list('abundance' = abundance, 'covariates' = covariates, 'metadata' = metadata))
}


#' @title Filter BBS to specified time series period and number of samples
#'
#' @description Modified from https://github.com/weecology/bbs-forecasting
#' and https://github.com/weecology/MATSS-community-change
#'
#' @param bbs_data dataframe that contains BBS site_id and year columns
#' @param start_yr num first year of time-series
#' @param end_yr num last year of time-series
#' @param min_num_yrs num minimum number of years of data between start_yr & end_yr
#'
#' @return dataframe with original data and associated environmental data
#' @export
filter_bbs_ts <- function(bbs_data, start_yr, end_yr, min_num_yrs) {
sites_to_keep = bbs_data %>%
dplyr::filter(year >= start_yr, year <= end_yr) %>%
dplyr::group_by(site_id) %>%
dplyr::summarise(num_years = length(unique(year))) %>%
dplyr::ungroup() %>%
dplyr::filter(num_years >= min_num_yrs)

filtered_data <- bbs_data %>%
dplyr::filter(year >= start_yr, year <= end_yr) %>%
dplyr::filter(site_id %in% sites_to_keep$site_id)
}


#' Filter poorly sampled BBS species
#'
#' Modified from https://github.com/weecology/bbs-forecasting/blob/master/R/forecast-bbs-core.R
#'
#' Removes waterbirds, shorebirds, owls, kingfishers, knightjars,
#' dippers. These species are poorly sampled due to their aquatic or
#' noctural nature. Also removes taxa that were either partially unidentified
#' (e.g. "sp.") or were considered hybrids (e.g. "A x B") or were listed as more
#' than one species (e.g. "A / B")
#'
#' @param df dataframe containing an species_id column
#'
#' @return dataframe, filtered version of initial dataframe
#' @export

filter_bbs_species <- function(df, species_table){

is_unidentified = function(names) {
#Before filtering, account for this one hybrid of 2 subspecies so it's kept
names[names == 'auratus auratus x auratus cafer'] = 'auratus auratus'
grepl('sp\\.| x |\\/', names)
}

valid_taxa = species_table %>%
dplyr:: filter(!is_unidentified(species)) %>%
dplyr::filter(aou > 2880) %>%
dplyr::filter(aou < 3650 | aou > 3810) %>%
dplyr::filter(aou < 3900 | aou > 3910) %>%
dplyr::filter(aou < 4160 | aou > 4210) %>%
dplyr::filter(aou != 7010)

dplyr::filter(df, species_id %in% valid_taxa$aou)
}

#' Combine subspecies into their common species
#'
#' Modified from https://github.com/weecology/bbs-forecasting/blob/master/R/forecast-bbs-core.R
#'
#' @export
combine_bbs_subspecies = function(df, species_table){

# Subspecies have two spaces separated by non-spaces
subspecies_names = species_table %>%
dplyr::filter(aou %in% unique(df$species_id)) %>%
dplyr::pull(spanish_common_name) %>%
grep(" [^ ]+ ", ., value = TRUE)

subspecies_ids = species_table %>%
dplyr::filter(spanish_common_name %in% subspecies_names) %>%
dplyr::pull(aou)

# Drop all but the first two words to get the root species name,
# then find the AOU code
new_subspecies_ids = species_table %>%
dplyr::slice(match(stringr::word(subspecies_names, 1,2),
species_table$spanish_common_name)) %>%
dplyr::pull(aou)

# replace the full subspecies names with species-level names
for (i in seq_along(subspecies_ids)) {
df$species_id[df$species_id == subspecies_ids[i]] = new_subspecies_ids[i]
}

df_grouped <- df %>%
dplyr::group_by_at(dplyr::vars(-abundance)) %>%
dplyr::summarise(abundance = sum(abundance)) %>%
dplyr::ungroup() %>%
dplyr::distinct()
}
51 changes: 47 additions & 4 deletions R/build_plans.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,19 @@ build_analyses_plan <- function(methods, datasets, ...)
#' @title Generate a Drake Plan for Datasets
#'
#' @param data_path where to get the downloaded retriever datasets
#' @param include_downloaded_data whether to also include downloadable datasets
#' @param include_retriever_data whether to include retriever-downloaded data
#' @param include_bbs_data whether to include BBS data
#' @inheritParams build_bbs_datasets_plan
#'
#' @return a drake plan (i.e. a tibble) specifying the targets and commands
#' for gathering datasets
#'
#' @export
#'
build_datasets_plan <- function(data_path = get_default_data_path(),
include_downloaded_data = FALSE)
include_retriever_data = FALSE,
include_bbs_data = FALSE,
bbs_subset = NULL)
{
datasets <- drake::drake_plan(
maizuru_data = get_maizuru_data(),
Expand All @@ -82,17 +86,56 @@ build_datasets_plan <- function(data_path = get_default_data_path(),
karoo_data = get_karoo_data(),
kruger_data = get_kruger_data()
)
if (include_downloaded_data)

if (include_retriever_data)
{
datasets <- datasets %>%
dplyr::bind_rows(
drake::drake_plan(
portal_data = get_portal_rodents(),
bbs_data = get_bbs_data(region = 7, path = !!data_path),
sdl_data = get_sdl_data(path = !!data_path),
mtquad_data = get_mtquad_data(path = !!data_path)
)
)
}

if (include_bbs_data) {
bbs_datasets = build_bbs_datasets_plan(data_path = data_path,
bbs_subset = bbs_subset)

datasets <- datasets %>%
dplyr::bind_rows(bbs_datasets)
}

return(datasets)
}

#' @title Generate a Drake Plan for BBS Datasets
#'
#' @inheritParams build_datasets_plan
#' @inheritParams prepare_bbs_ts_data
#'
#' @return a drake plan (i.e. a tibble) specifying the targets and commands
#' for gathering BBS datasets
#'
#' @export
#'
build_bbs_datasets_plan <- function(data_path = get_default_data_path(), bbs_subset = NULL)
{
routes_and_regions_file <- file.path(data_path, "breed-bird-survey-prepped", "routes_and_regions_table.csv")

if (!file.exists(routes_and_regions_file)) {
prepare_bbs_ts_data(path = data_path, bbs_subset = bbs_subset)
}

routes_and_regions <- read.csv(routes_and_regions_file, colClasses = "character")

bbs_datasets <- drake::drake_plan(
bbs_data_rtrg = target(get_bbs_route_region_data(route, region, path = !!data_path),
transform = map(route = !!rlang::syms(routes_and_regions$route),
region = !!rlang::syms(routes_and_regions$bcr)
)
)
)
return(bbs_datasets)
}
Loading

0 comments on commit 9c89304

Please sign in to comment.