Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

By route and cleaning #124

Closed
wants to merge 10 commits into from
67 changes: 67 additions & 0 deletions R/bbs_cleaning_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#' Filter poorly sampled BBS species
#'
#' Modified from https://github.com/weecology/bbs-forecasting/blob/master/R/forecast-bbs-core.R
#'
#' Removes waterbirds, shorebirds, owls, kingfishers, knightjars,
#' dippers. These species are poorly sampled due to their aquatic or
#' noctural nature. Also removes taxa that were either partially unidentified
#' (e.g. "sp.") or were considered hybrids (e.g. "A x B") or were listed as more
#' than one species (e.g. "A / B")
#'
#' @param df dataframe containing an species_id column
#'
#' @return dataframe, filtered version of initial dataframe

filter_bbs_species <- function(df, species_table){

is_unidentified = function(names) {
#Before filtering, account for this one hybrid of 2 subspecies so it's kept
names[names=='auratus auratus x auratus cafer']='auratus auratus'
grepl('sp\\.| x |\\/', names)
}

valid_taxa = species_table %>%
dplyr:: filter(!is_unidentified(species)) %>%
dplyr::filter(aou > 2880) %>%
dplyr::filter(aou < 3650 | aou > 3810) %>%
dplyr::filter(aou < 3900 | aou > 3910) %>%
dplyr::filter(aou < 4160 | aou > 4210) %>%
dplyr::filter(aou != 7010)

dplyr::filter(df, species_id %in% valid_taxa$aou)
}

#' Combine subspecies into their common species
#'
#' Modified from https://github.com/weecology/bbs-forecasting/blob/master/R/forecast-bbs-core.R
#'
combine_subspecies = function(df, species_table){

# Subspecies have two spaces separated by non-spaces
subspecies_names = species_table %>%
dplyr::filter(aou %in% unique(df$species_id)) %>%
dplyr::pull(spanish_common_name) %>%
grep(" [^ ]+ ", ., value = TRUE)

subspecies_ids = species_table %>%
dplyr::filter(spanish_common_name %in% subspecies_names) %>%
dplyr::pull(aou)

# Drop all but the first two words to get the root species name,
# then find the AOU code
new_subspecies_ids = species_table %>%
dplyr::slice(match(stringr::word(subspecies_names, 1,2),
species_table$spanish_common_name)) %>%
dplyr::pull(aou)

# replace the full subspecies names with species-level names
for (i in seq_along(subspecies_ids)) {
df$species_id[df$species_id == subspecies_ids[i]] = new_subspecies_ids[i]
}

df_grouped <- df %>%
dplyr::group_by_at(dplyr::vars(-abundance)) %>%
dplyr::summarise(abundance = sum(abundance)) %>%
dplyr::ungroup() %>%
dplyr::distinct()
}
8 changes: 6 additions & 2 deletions R/get_retriever_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' @param end_yr num last year of time-series
#' @param min_num_yrs num minimum number of years of data between start_yr & end_yr
#' @param region region code of data to return (currently uses state codes)
#' @param this_route which route within that region
#' @inheritParams get_mtquad_data
#'
#' @return list of two dataframes (one with abundance data, the other with covariate data)
Expand All @@ -20,7 +21,7 @@
#' }
#' @export

get_bbs_data <- function(start_yr = 1965, end_yr = 2017, min_num_yrs = 10, region,
get_bbs_data <- function(start_yr = 1965, end_yr = 2017, min_num_yrs = 10, region, this_route,
path = get_default_data_path())
{
bbs_data_tables <- import_retriever_data("breed-bird-survey", path = path)
Expand All @@ -31,7 +32,7 @@ get_bbs_data <- function(start_yr = 1965, end_yr = 2017, min_num_yrs = 10, regio
by = c('statenum', 'route', 'rpid', 'year', 'routedataid', 'countrynum')) %>%
dplyr::left_join(bbs_data_tables$breed_bird_survey_routes,
by = c('statenum', 'route', 'countrynum')) %>%
dplyr::filter(bcr == region) %>%
dplyr::filter(bcr == region, route == this_route) %>%
dplyr::mutate(site_id = statenum*1000 + route,
starttemp = dplyr::case_when(tempscale=='F' ~ c((starttemp - 32)*5/9),
tempscale=='C' ~ as.double(starttemp)),
Expand All @@ -41,6 +42,8 @@ get_bbs_data <- function(start_yr = 1965, end_yr = 2017, min_num_yrs = 10, regio
long = longitude,
species_id = aou,
abundance = speciestotal) %>%
combine_subspecies(species_table = bbs_data_tables$breed_bird_survey_species) %>%
filter_bbs_species(species_table = bbs_data_tables$breed_bird_survey_species) %>%
dplyr::mutate(species_id = paste('sp', species_id, sep=''),
date = as.Date(paste(year, month, day, sep = "-"))) %>%
filter_ts(start_yr, end_yr, min_num_yrs) %>%
Expand All @@ -61,6 +64,7 @@ get_bbs_data <- function(start_yr = 1965, end_yr = 2017, min_num_yrs = 10, regio
startwind = mean(startwind), endwind = mean(endwind),
startsky = mean(startsky), endsky = mean(endsky),
lat = mean(lat), long = mean(long), mean_date = mean(date)) %>%
dplyr::mutate(route = this_route, region = region) %>%
dplyr::arrange(year)

metadata <- list(timename = 'year', effort = 'effort')
Expand Down
11 changes: 11 additions & 0 deletions man/combine_subspecies.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions man/filter_bbs_species.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/get_bbs_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 0 additions & 27 deletions man/summarize_vec.Rd

This file was deleted.