Skip to content

Commit

Permalink
Add option to work with a subset of BBS data and fix passing the path…
Browse files Browse the repository at this point in the history
… argument around in build_plans
  • Loading branch information
diazrenata committed May 27, 2019
1 parent e02c118 commit 3c1be4a
Show file tree
Hide file tree
Showing 9 changed files with 53 additions and 16 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export(collect_analyses)
export(combine_subspecies)
export(filter_bbs_species)
export(filter_ts)
export(get_bbs_route_region_data)
export(get_cowley_lizards)
export(get_cowley_snakes)
export(get_default_data_path)
Expand Down
9 changes: 6 additions & 3 deletions R/bbs_cleaning_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
#' @param start_yr num first year of time-series
#' @param end_yr num last year of time-series
#' @param min_num_yrs num minimum number of years of data between start_yr & end_yr
#' @param selected_set optional, a subset of the BBS communities to use (to speed up development). As c(1:X)
#' @inheritParams get_mtquad_data
#' @return NULL
#' @export

prepare_bbs_ts_data <- function(start_yr = 1965, end_yr = 2017, min_num_yrs = 10,
path = get_default_data_path()){
path = get_default_data_path(), selected_set = NULL){
bbs_data_tables <- import_retriever_data("breed-bird-survey", path = path)

bbs_data <- bbs_data_tables$breed_bird_survey_weather %>%
Expand Down Expand Up @@ -48,6 +49,10 @@ prepare_bbs_ts_data <- function(start_yr = 1965, end_yr = 2017, min_num_yrs = 10
}
bbs_routes_regions_list = apply(bbs_routes_regions, MARGIN = 1, FUN = make_list)

if(!is.null(selected_set)) {
bbs_routes_regions_list = bbs_routes_regions_list[selected_set]
}

lapply(bbs_routes_regions_list, FUN = subset_bbs_route_region_data, bbs_data_table = bbs_data, species_table = bbs_data_tables$breed_bird_survey_species, path = path)

}
Expand Down Expand Up @@ -105,8 +110,6 @@ subset_bbs_route_region_data <- function(route_region, bbs_data_table, species_t

saveRDS(this_bbs_result, file = file.path(storage_path, paste0("route", route, "region", region, ".Rds")) )

return()

}


Expand Down
21 changes: 14 additions & 7 deletions R/build_plans.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ build_analyses_plan <- function(methods, datasets, ...)
#' @param data_path where to get the downloaded retriever datasets
#' @param include_downloaded_data whether to also include downloadable datasets
#' @param include_bbs_data whether to include BBS data
#' @param bbs_subset optional, a subset of the BBS communities to use (to speed up development). As c(1:X)
#'
#' @return a drake plan (i.e. a tibble) specifying the targets and commands
#' for gathering datasets
Expand All @@ -73,7 +74,8 @@ build_analyses_plan <- function(methods, datasets, ...)
#'
build_datasets_plan <- function(data_path = get_default_data_path(),
include_downloaded_data = FALSE,
include_bbs_data = FALSE)
include_bbs_data = FALSE,
bbs_subset = NULL)
{
datasets <- drake::drake_plan(
maizuru_data = get_maizuru_data(),
Expand All @@ -97,7 +99,12 @@ build_datasets_plan <- function(data_path = get_default_data_path(),
}

if (include_bbs_data) {
bbs_datasets = build_bbs_datasets_plan()
bbs_datasets = build_bbs_datasets_plan(data_path = data_path)

if(!is.null(bbs_subset)) {
bbs_datasets = bbs_datasets[bbs_subset, ]
}

datasets <- datasets %>%
dplyr::bind_rows(bbs_datasets)
}
Expand All @@ -107,27 +114,27 @@ build_datasets_plan <- function(data_path = get_default_data_path(),

#' @title Generate a Drake Plan for BBS Datasets
#'
#' @param data_path path
#' @param path path
#' @param from_raw whether to re-prep BBS data
#'
#' @return a drake plan (i.e. a tibble) specifying the targets and commands
#' for gathering BBS datasets
#'
#' @export
#'
build_bbs_datasets_plan <- function(path = get_default_data_path())
build_bbs_datasets_plan <- function(data_path = get_default_data_path())
{
if(!file.exists(file.path(path, "breed-bird-survey-prepped", "routes_and_regions_table.csv"))) {
if(!file.exists(file.path(data_path, "breed-bird-survey-prepped", "routes_and_regions_table.csv"))) {
prepare_bbs_ts_data()
}

routes_and_regions = read.csv(file.path(path, "breed-bird-survey-prepped", "routes_and_regions_table.csv"), stringsAsFactors = F)
routes_and_regions = read.csv(file.path(data_path, "breed-bird-survey-prepped", "routes_and_regions_table.csv"), stringsAsFactors = F)

routes_and_regions = routes_and_regions %>%
dplyr::mutate(bcr = as.character(bcr), route = as.character(route))

bbs_datasets <- drake::drake_plan(
bbs_data_rtrg = target(get_bbs_route_region_data(route, region, path = get_default_data_path()),
bbs_data_rtrg = target(get_bbs_route_region_data(route, region, path = !!data_path),
transform = map(route = !!rlang::syms(routes_and_regions$route),
region = !!rlang::syms(routes_and_regions$bcr)
)
Expand Down
2 changes: 1 addition & 1 deletion man/build_bbs_datasets_plan.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/build_datasets_plan.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/filter_ts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions man/get_bbs_route_region_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions man/prepare_bbs_ts_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/subset_bbs_route_region_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 comment on commit 3c1be4a

@diazrenata
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd estimate running prepare_bbs_ts_data would take 13 hours to run on my machine if I ran it for all 2500 communities. This might be because it's passing the full bbs abundances table to the subset function for every route-region pair.

Please sign in to comment.