diff --git a/NEWS.md b/NEWS.md index 8330eae0..d832c816 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,3 +6,5 @@ 0.1.6 which is currently used as internal data model for camtraptor. - `get_custom_effort()` now calculates per calendar month/week (#219). - `write_dwc()` has an updated mapping for dwc_audubon.csv (#274). +- `get_record_table()` returns the number of observed individuals (#279). +- `get_cam_op()` allows to add session and camera IDs to the station names output (#288). diff --git a/R/get_cam_op.R b/R/get_cam_op.R index d366c1a1..b9a8e395 100644 --- a/R/get_cam_op.R +++ b/R/get_cam_op.R @@ -6,9 +6,9 @@ #' https://jniedballa.github.io/camtrapR/reference/cameraOperation.html). #' #' The deployment data are by default grouped by `locationName` (station ID in -#' camtrapR jargon) or another column specified by the user. -#' If multiple deployments are linked to same location, daily efforts higher -#' than 1 occur. +#' camtrapR jargon) or another column specified by the user via the +#' `station_col` argument. If multiple deployments are linked to same location, +#' daily efforts higher than 1 occur. #' #' Partially active days, e.g. the first or the last day of a deployment, result #' in decimal effort values as in [camtrapR::cameraOperation()]( @@ -16,23 +16,28 @@ #' #' @param package Camera trap data package object, as returned by #' `read_camtrap_dp()`. -#' @param station_col Column name to use for identifying the stations. -#' Default: `"locationName"`. -#' @param use_prefix Logical (`TRUE`or `FALSE`). -#' If `TRUE` the returned row names will start with prefix `"Station"` as -#' returned by [camtrapR::cameraOperation()]( +#' @param station_col Column name to use for identifying the stations. Default: +#' `"locationName"`. +#' @param camera_col Column name of the column specifying Camera ID. Default: +#' `NULL`. +#' @param session_col Column name to use for identifying the session. Default: +#' `NULL`. Use it for creating multi-session / multi-season detection +#' histories. +#' @param use_prefix Logical (`TRUE` or `FALSE`). If `TRUE` the returned row +#' names will start with prefix `"Station"` as returned by +#' [camtrapR::cameraOperation()]( #' https://jniedballa.github.io/camtrapR/reference/cameraOperation.html). #' Default: `FALSE`. -#' @param datapkg Deprecated. -#' Use `package` instead. +#' @param datapkg Deprecated. Use `package` instead. #' @param ... filter predicates for filtering on deployments. -#' @return A matrix. -#' Row names always indicate the station ID. -#' Column names are dates. +#' @return A matrix. Row names always indicate the station ID. Column names are +#' dates. #' @family exploration functions #' @importFrom dplyr %>% .data +#' @importFrom rlang !! := #' @export #' @examples +#' library(dplyr) #' get_cam_op(mica) #' #' # Applying filter(s) on deployments, e.g. deployments with latitude >= 51.18 @@ -41,11 +46,32 @@ #' # Specify column with station names #' get_cam_op(mica, station_col = "locationID") #' +#' # Specify column with session IDs +#' mica_sessions <- mica +#' mica_sessions$data$deployments <- mica_sessions$data$deployments %>% +#' dplyr::mutate(session = ifelse( +#' stringr::str_starts(.data$locationName, "B_DL_"), +#' "after2020", +#' "before2020" +#' ) +#' ) +#' get_cam_op(mica_sessions, session_col = "session") +#' +#' # Specify column with camera IDs +#' mica_cameras <- mica_sessions +#' mica_cameras$data$deployments$cameraID <- c(1, 2, 3, 4) +#' get_cam_op(mica_cameras, camera_col = "cameraID") +#' +#' # Specify both session and camera IDs +#' get_cam_op(mica_cameras, camera_col = "cameraID", session_col = "session") +#' #' # Use prefix Station as in camtrapR's camera operation matrix #' get_cam_op(mica, use_prefix = TRUE) get_cam_op <- function(package = NULL, ..., station_col = "locationName", + camera_col = NULL, + session_col = NULL, use_prefix = FALSE, datapkg = lifecycle::deprecated()) { # check camera trap data package @@ -65,7 +91,7 @@ get_cam_op <- function(package = NULL, ) ) - # Check that station_col doesn't contain empty values (NA) + # Check that `station_col` doesn't contain empty values (NA) n_na <- package$data$deployments %>% dplyr::filter(is.na(.data[[station_col]])) %>% nrow() @@ -76,7 +102,65 @@ get_cam_op <- function(package = NULL, "{n_na} NAs found." ) ) - + + # Check that `station_col` do not contain the reserved words "__SESS_" and + # "__CAM_" (no need to remove NAs beforehand as station_col must not contain + # any NA, see previous check) + assertthat::assert_that( + all(!stringr::str_detect(string = package$data$deployments[[station_col]], + pattern = "__SESS_|__CAM_")), + msg = glue::glue( + "Station column name (`{station_col}`) must not contain any of the ", + "reserved words: \"__SESS_\", \"__CAM_\"." + ) + ) + + # Check that `session_col` exists in deployments, if defined, and that its + # values do not contain the reserved words "__SESS_" and "__CAM_" + if (!is.null(session_col)) { + assertthat::assert_that(assertthat::is.string(session_col)) + assertthat::assert_that( + session_col %in% names(package$data$deployments), + msg = glue::glue( + "Session column name (`{session_col}`) is not valid: ", + "it must be one of the deployments column names." + ) + ) + session_values <- package$data$deployments[[session_col]] + session_values <- session_values[!is.na(session_values)] + assertthat::assert_that( + all(!stringr::str_detect(string = session_values, + pattern = "__SESS_|__CAM_")), + msg = glue::glue( + "Session column name (`{session_col}`) must not contain any of the ", + "reserved words: \"__SESS_\", \"__CAM_\"." + ) + ) + } + + # Check that `camera_col` exists in deployments, if defined, and that its + # values do not contain the reserved words "__SESS_" and "__CAM_" + if (!is.null(camera_col)) { + assertthat::assert_that(assertthat::is.string(camera_col)) + assertthat::assert_that( + camera_col %in% names(package$data$deployments), + msg = glue::glue( + "Camera column name (`{camera_col}`) is not valid: ", + "it must be one of the deployments column names." + ) + ) + camera_values <- package$data$deployments[[camera_col]] + camera_values <- camera_values[!is.na(camera_values)] + assertthat::assert_that( + all(!stringr::str_detect(string = camera_values, + pattern = "__SESS_|__CAM_")), + msg = glue::glue( + "Camera column name (`{camera_col}`) must not contain any of the ", + "reserved words: \"__SESS_\", \"__CAM_\"." + ) + ) + } + assertthat::assert_that( use_prefix %in% c(TRUE, FALSE), msg = "use_prefix must be TRUE or FALSE." @@ -140,8 +224,26 @@ get_cam_op <- function(package = NULL, } ) names(deployment_operational) <- deploys$deploymentID - - # get for each location which days a deployment was active + + # add session to station names + if (!is.null(session_col)) { + deploys <- deploys %>% + dplyr::mutate(!!station_col := paste(.data[[station_col]], + .data[[session_col]], + sep = "__SESS_") + ) + } + + # add camera to column names + if (!is.null(camera_col)) { + deploys <- deploys %>% + dplyr::mutate(!!station_col := paste(.data[[station_col]], + .data[[camera_col]], + sep = "__CAM_") + ) + } + + # get for each station_col which days a deployment was active camOps <- purrr::map_dfc( unique(deploys[[station_col]]), function(loc_name) { diff --git a/R/get_record_table.R b/R/get_record_table.R index 13c06024..0f3f6235 100644 --- a/R/get_record_table.R +++ b/R/get_record_table.R @@ -66,8 +66,10 @@ #' #' # Set a minDeltaTime of 20 minutes from last independent record for filtering #' # out not independent observations +#' mica_dependent <- mica +#' mica_dependent$data$observations[4,"timestamp"] <- lubridate::as_datetime("2020-07-29 05:55:00") #' get_record_table( -#' mica, +#' mica_dependent, #' minDeltaTime = 20, #' deltaTimeComparedTo = "lastIndependentRecord" #' ) @@ -75,12 +77,12 @@ #' # Set a minDeltaTime of 20 minutes from last record for filtering out not #' # independent observations #' get_record_table( -#' mica, +#' mica_dependent, #' minDeltaTime = 20, #' deltaTimeComparedTo = "lastRecord" #' ) #' -#' # Exclude observations of brown rat +#' # Exclude observations of mallard #' # Exclude is case insensitive and vernacular names are allowed #' get_record_table(mica, exclude = "wilde eend") #' @@ -91,6 +93,20 @@ #' minDeltaTime = 20, #' deltaTimeComparedTo = "lastRecord" #' ) +#' +#' # How to deal with duplicates +#' mica_dup <- mica +#' # create a duplicate at 2020-07-29 05:46:48, location: B_DL_val 5_beek kleine vijver +#' mica_dup$data$observations[4,"sequenceID"] <- mica_dup$data$observations$sequenceID[3] +#' mica_dup$data$observations[4, "deploymentID"] <- mica_dup$data$observations$deploymentID[3] +#' mica_dup$data$observations[4, "timestamp"] <- mica_dup$data$observations$timestamp[3] +#' +#' # duplicates are removed by default by get_record_table() +#' get_record_table(mica_dup) +#' +#' # duplicate not removed +#' get_record_table(mica_dup, removeDuplicateRecords = FALSE) +#' #' # Applying filter(s) on deployments, e.g. deployments with latitude >= 51.18 #' get_record_table(mica, pred_gte("latitude", 51.18)) get_record_table <- function(package = NULL, @@ -265,11 +281,13 @@ get_record_table <- function(package = NULL, Species = "scientificName", DateTimeOriginal = "timestamp", Directory = "filePath", - FileName = "fileName" + FileName = "fileName", + n = "count" ) %>% dplyr::select( "Station", "Species", + "n", "DateTimeOriginal", "Date", "Time", diff --git a/man/get_cam_op.Rd b/man/get_cam_op.Rd index 83c57185..81f01f45 100644 --- a/man/get_cam_op.Rd +++ b/man/get_cam_op.Rd @@ -8,6 +8,8 @@ get_cam_op( package = NULL, ..., station_col = "locationName", + camera_col = NULL, + session_col = NULL, use_prefix = FALSE, datapkg = lifecycle::deprecated() ) @@ -18,21 +20,26 @@ get_cam_op( \item{...}{filter predicates for filtering on deployments.} -\item{station_col}{Column name to use for identifying the stations. -Default: \code{"locationName"}.} +\item{station_col}{Column name to use for identifying the stations. Default: +\code{"locationName"}.} -\item{use_prefix}{Logical (\code{TRUE}or \code{FALSE}). -If \code{TRUE} the returned row names will start with prefix \code{"Station"} as -returned by \href{https://jniedballa.github.io/camtrapR/reference/cameraOperation.html}{camtrapR::cameraOperation()}. +\item{camera_col}{Column name of the column specifying Camera ID. Default: +\code{NULL}.} + +\item{session_col}{Column name to use for identifying the session. Default: +\code{NULL}. Use it for creating multi-session / multi-season detection +histories.} + +\item{use_prefix}{Logical (\code{TRUE} or \code{FALSE}). If \code{TRUE} the returned row +names will start with prefix \code{"Station"} as returned by +\href{https://jniedballa.github.io/camtrapR/reference/cameraOperation.html}{camtrapR::cameraOperation()}. Default: \code{FALSE}.} -\item{datapkg}{Deprecated. -Use \code{package} instead.} +\item{datapkg}{Deprecated. Use \code{package} instead.} } \value{ -A matrix. -Row names always indicate the station ID. -Column names are dates. +A matrix. Row names always indicate the station ID. Column names are +dates. } \description{ Returns the \href{https://jniedballa.github.io/camtrapR/reference/cameraOperation.html}{camera operation matrix} as @@ -40,14 +47,15 @@ returned by \href{https://jniedballa.github.io/camtrapR/reference/cameraOperatio } \details{ The deployment data are by default grouped by \code{locationName} (station ID in -camtrapR jargon) or another column specified by the user. -If multiple deployments are linked to same location, daily efforts higher -than 1 occur. +camtrapR jargon) or another column specified by the user via the +\code{station_col} argument. If multiple deployments are linked to same location, +daily efforts higher than 1 occur. Partially active days, e.g. the first or the last day of a deployment, result in decimal effort values as in \href{https://jniedballa.github.io/camtrapR/reference/cameraOperation.html}{camtrapR::cameraOperation()}. } \examples{ +library(dplyr) get_cam_op(mica) # Applying filter(s) on deployments, e.g. deployments with latitude >= 51.18 @@ -56,6 +64,25 @@ get_cam_op(mica, pred_gte("latitude", 51.18)) # Specify column with station names get_cam_op(mica, station_col = "locationID") +# Specify column with session IDs +mica_sessions <- mica +mica_sessions$data$deployments <- mica_sessions$data$deployments \%>\% + dplyr::mutate(session = ifelse( + stringr::str_starts(.data$locationName, "B_DL_"), + "after2020", + "before2020" + ) +) +get_cam_op(mica_sessions, session_col = "session") + +# Specify column with camera IDs +mica_cameras <- mica_sessions +mica_cameras$data$deployments$cameraID <- c(1, 2, 3, 4) +get_cam_op(mica_cameras, camera_col = "cameraID") + +# Specify both session and camera IDs +get_cam_op(mica_cameras, camera_col = "cameraID", session_col = "session") + # Use prefix Station as in camtrapR's camera operation matrix get_cam_op(mica, use_prefix = TRUE) } diff --git a/man/get_record_table.Rd b/man/get_record_table.Rd index 48ceae1c..62d4a141 100644 --- a/man/get_record_table.Rd +++ b/man/get_record_table.Rd @@ -88,8 +88,10 @@ get_record_table(mica) # Set a minDeltaTime of 20 minutes from last independent record for filtering # out not independent observations +mica_dependent <- mica +mica_dependent$data$observations[4,"timestamp"] <- lubridate::as_datetime("2020-07-29 05:55:00") get_record_table( - mica, + mica_dependent, minDeltaTime = 20, deltaTimeComparedTo = "lastIndependentRecord" ) @@ -97,12 +99,12 @@ get_record_table( # Set a minDeltaTime of 20 minutes from last record for filtering out not # independent observations get_record_table( - mica, + mica_dependent, minDeltaTime = 20, deltaTimeComparedTo = "lastRecord" ) -# Exclude observations of brown rat +# Exclude observations of mallard # Exclude is case insensitive and vernacular names are allowed get_record_table(mica, exclude = "wilde eend") @@ -113,6 +115,20 @@ get_record_table( minDeltaTime = 20, deltaTimeComparedTo = "lastRecord" ) + +# How to deal with duplicates +mica_dup <- mica +# create a duplicate at 2020-07-29 05:46:48, location: B_DL_val 5_beek kleine vijver +mica_dup$data$observations[4,"sequenceID"] <- mica_dup$data$observations$sequenceID[3] +mica_dup$data$observations[4, "deploymentID"] <- mica_dup$data$observations$deploymentID[3] +mica_dup$data$observations[4, "timestamp"] <- mica_dup$data$observations$timestamp[3] + +# duplicate removed +get_record_table(mica_dup) + +# duplicate not removed +get_record_table(mica_dup, removeDuplicateRecords = FALSE) + # Applying filter(s) on deployments, e.g. deployments with latitude >= 51.18 get_record_table(mica, pred_gte("latitude", 51.18)) } diff --git a/tests/testthat/test-calc_animal_pos.R b/tests/testthat/test-calc_animal_pos.R index 02d603d1..b86fdddf 100644 --- a/tests/testthat/test-calc_animal_pos.R +++ b/tests/testthat/test-calc_animal_pos.R @@ -1,10 +1,10 @@ -testthat::test_that( +test_that( "calc_animal_pos returns errors if animal_pos is not valid", { # animal_pos is not a dataframe - testthat::expect_error(calc_animal_pos(1, list(a = "a"))) + expect_error(calc_animal_pos(1, list(a = "a"))) # x, y and sequenceID columns missing - testthat::expect_error( + expect_error( calc_animal_pos( dplyr::tibble( deploymentID = "A", @@ -16,7 +16,7 @@ testthat::test_that( "Columns `sequenceID`, `x` and `y` not found in `animal_pos`." ) # imageWidth, imageHeight and deploymentID columns missing - testthat::expect_error( + expect_error( calc_animal_pos( dplyr::tibble( width = 5, @@ -31,7 +31,7 @@ testthat::test_that( } ) -testthat::test_that( +test_that( "calc_animal_post returns errors if calib_models is not valid", { df <- dplyr::tibble( @@ -44,18 +44,18 @@ testthat::test_that( deployment = 2 ) # calib_models is not a (named) list - testthat::expect_error(calc_animal_pos(df, calib_models = 2)) - testthat::expect_error( + expect_error(calc_animal_pos(df, calib_models = 2)) + expect_error( calc_animal_pos(df, calib_models = list(2)), "`calib_models` must be a named list." ) } ) -testthat::test_that("Deployments with no matching calibration model", { +test_that("Deployments with no matching calibration model", { missing_calib_model <- dep_calib_models missing_calib_model$S01 <- NULL - testthat::expect_warning( + expect_warning( calc_animal_pos(animal_positions, missing_calib_model), paste( "Some deployments have no matching calibration model", @@ -64,11 +64,11 @@ testthat::test_that("Deployments with no matching calibration model", { ) }) -testthat::test_that("Deploys with multiple values for image width/height", { +test_that("Deploys with multiple values for image width/height", { multi_pixel_dim <- animal_positions multi_pixel_dim$imageWidth[1] <- 4096 multi_pixel_dim$imageHeight[20] <- 3072 - testthat::expect_warning( + expect_warning( calc_animal_pos(multi_pixel_dim, dep_calib_models), paste( "There is more than one unique value per deployment for `imageWidth`", @@ -78,27 +78,27 @@ testthat::test_that("Deploys with multiple values for image width/height", { ) }) -testthat::test_that("Right output", { +test_that("Right output", { output <- calc_animal_pos(animal_positions, dep_calib_models) # right class - testthat::expect_true(inherits(output, c("tbl_df", "tbl", "data.frame"))) + expect_true(inherits(output, c("tbl_df", "tbl", "data.frame"))) # right number of rows - testthat::expect_true(nrow(output) == nrow(animal_positions)) + expect_true(nrow(output) == nrow(animal_positions)) # right number of columns - testthat::expect_true(ncol(output) == ncol(animal_positions) + 3) + expect_true(ncol(output) == ncol(animal_positions) + 3) # new columns have right names - testthat::expect_true( + expect_true( all(c("radius", "angle", "frame_count") %in% names(output)) ) # output is exactly the same as animal_positions except for the new columns - testthat::expect_equal( + expect_equal( output %>% dplyr::select(-c(radius, angle, frame_count)), animal_positions ) }) -testthat::test_that("Right output with non default column names", { +test_that("Right output with non default column names", { output_default <- calc_animal_pos(animal_positions, dep_calib_models) animal_positions_non_default <- dplyr::rename( animal_positions, @@ -121,5 +121,5 @@ testthat::test_that("Right output with non default column names", { # content is the same (column names are different) names(output) <- as.character(1:length(names(output))) names(output_default) <- as.character(1:length(names(output_default))) - testthat::expect_equal(output, output_default) + expect_equal(output, output_default) }) diff --git a/tests/testthat/test-check_species.R b/tests/testthat/test-check_species.R index e60681c6..a4079a15 100644 --- a/tests/testthat/test-check_species.R +++ b/tests/testthat/test-check_species.R @@ -1,10 +1,10 @@ test_that("Error is returned if species is NULL or of length 0", { - testthat::expect_error(check_species(mica, NULL)) - testthat::expect_error(check_species(mica, character(0))) + expect_error(check_species(mica, NULL)) + expect_error(check_species(mica, character(0))) }) test_that("Error is returned if one or more species are invalid", { - testthat::expect_error( + expect_error( check_species( mica, c( @@ -30,31 +30,31 @@ test_that("Error is returned if one or more species are invalid", { test_that("If input is a scientific name, the result is equal to input", { sc_name <- "Anas strepera" species <- check_species(mica, sc_name) - testthat::expect_equal(species, sc_name) + expect_equal(species, sc_name) }) test_that("Multiput scientific names are allowed", { sc_names <- c("Anas strepera", "Ardea cinerea") species <- check_species(mica, sc_names) - testthat::expect_equal(species, sc_names) + expect_equal(species, sc_names) }) test_that("Function works with vernacular names", { vn_names <- c("beech marten", "mallard") species <- suppressMessages(check_species(mica, vn_names)) - testthat::expect_equal(species, c("Martes foina", "Anas platyrhynchos")) + expect_equal(species, c("Martes foina", "Anas platyrhynchos")) }) test_that("Functions works well with vernacular names of different languages", { vn_names <- c("beech marten", "wilde eend") species <- suppressMessages(check_species(mica, vn_names)) - testthat::expect_equal(species, c("Martes foina", "Anas platyrhynchos")) + expect_equal(species, c("Martes foina", "Anas platyrhynchos")) }) test_that("Functions works with a mix of scientific and vernacular names", { mixed_names <- c("mallard", "steenmarter", "Castor fiber") species <- suppressMessages(check_species(mica, mixed_names)) - testthat::expect_equal( + expect_equal( species, c( "Anas platyrhynchos", @@ -66,11 +66,11 @@ test_that("Functions works with a mix of scientific and vernacular names", { test_that("Taxon IDs are not allowed", { taxon_id <- mica$taxonomic[[1]]$taxonID - testthat::expect_error(check_species(mica, taxon_id)) + expect_error(check_species(mica, taxon_id)) }) test_that("Functions works case insensitively", { vn_name <- suppressMessages(check_species(mica, c("MallARD"))) species <- check_species(mica, vn_name) - testthat::expect_equal(species, "Anas platyrhynchos") + expect_equal(species, "Anas platyrhynchos") }) diff --git a/tests/testthat/test-get_cam_op.R b/tests/testthat/test-get_cam_op.R index 60dda148..948f6cf0 100644 --- a/tests/testthat/test-get_cam_op.R +++ b/tests/testthat/test-get_cam_op.R @@ -28,6 +28,44 @@ test_that("input camtrap dp is checked properly", { expect_error(get_cam_op(mica_empty_location_name), "Column `locationName` must be non-empty: 2 NAs found." ) + # camera_col is not NA + expect_error( + get_cam_op(mica, camera_col = NA), + "camera_col is not a string (a length one character vector).", + fixed = TRUE) + # camera_col is length 1 + expect_error( + get_cam_op(mica, camera_col = c("locationID","locationName")), + "camera_col is not a string (a length one character vector).", + fixed = TRUE) + # station_col value is not a column of deployments + expect_error( + get_cam_op(mica, camera_col = "bla"), + paste0( + "Camera column name (`bla`) is not valid: ", + "it must be one of the deployments column names." + ), + fixed = TRUE + ) + # session_col is not NA + expect_error( + get_cam_op(mica, session_col = NA), + "session_col is not a string (a length one character vector).", + fixed = TRUE) + # session_col is length 1 + expect_error( + get_cam_op(mica, session_col = c("locationID","locationName")), + "session_col is not a string (a length one character vector).", + fixed = TRUE) + # session_col value is not a column of deployments + expect_error( + get_cam_op(mica, session_col = "bla"), + paste0( + "Session column name (`bla`) is not valid: ", + "it must be one of the deployments column names." + ), + fixed = TRUE + ) # use_prefix must be TRUE or FALSE expect_error(get_cam_op(mica, use_prefix = "bla")) expect_error(get_cam_op(mica, use_prefix = NA)) @@ -35,33 +73,150 @@ test_that("input camtrap dp is checked properly", { test_that("output is a matrix", { cam_op_matrix <- get_cam_op(mica) - expect_true("matrix" %in% class(cam_op_matrix)) + expect_true(is.matrix(cam_op_matrix)) }) test_that("output matrix has locations as rownames", { cam_op_matrix <- get_cam_op(mica) locations <- mica$data$deployments$locationName n_locations <- length(mica$data$deployments$locationName) - expect_equal(nrow(cam_op_matrix), n_locations) - expect_equal(row.names(cam_op_matrix), locations) + expect_identical(nrow(cam_op_matrix), n_locations) + expect_identical(row.names(cam_op_matrix), locations) +}) + +test_that("output matrix has sessions addded to locations as rownames", { + mica_sessions <- mica + mica_sessions$data$deployments <- mica_sessions$data$deployments %>% + dplyr::mutate(session = ifelse( + stringr::str_starts(.data$locationName, "B_DL_"), + "after2020", + "before2020" + ) + ) + cam_op_matrix <- get_cam_op(mica_sessions, session_col = "session") + locations_sessions <- paste(mica_sessions$data$deployments$locationName, + mica_sessions$data$deployments$session, + sep = "__SESS_" + ) + n_locations <- length(mica_sessions$data$deployments$locationName) + expect_identical(nrow(cam_op_matrix), n_locations) + expect_identical(row.names(cam_op_matrix), locations_sessions) +}) + +test_that("output matrix has camera IDs addded to locations as rownames", { + mica_cameras <- mica + mica_cameras$data$deployments$cameraID <- c(1, 2, 3, 4) + cam_op_matrix <- get_cam_op(mica_cameras, camera_col = "cameraID") + locations_cameras <- paste(mica_cameras$data$deployments$locationName, + mica_cameras$data$deployments$cameraID, + sep = "__CAM_" + ) + n_locations <- length(mica_cameras$data$deployments$locationName) + expect_identical(nrow(cam_op_matrix), n_locations) + expect_identical(row.names(cam_op_matrix), locations_cameras) +}) + +test_that( + "output matrix has sessions and cameras addded to locations as rownames", { + mica_sess_cam <- mica + mica_sess_cam$data$deployments$cameraID <- c(1, 2, 3, 4) + mica_sess_cam$data$deployments$session <- c(1, 2, 3, 4) + cam_op_matrix <- get_cam_op(mica_sess_cam, + camera_col = "cameraID", + session_col = "session" + ) + locations_sess_cam <- paste(mica_sess_cam$data$deployments$locationName, + mica_sess_cam$data$deployments$session, + sep = "__SESS_" + ) + locations_sess_cam <- paste(locations_sess_cam, + mica_sess_cam$data$deployments$cameraID, + sep = "__CAM_" + ) + n_locations <- length(mica_sess_cam$data$deployments$locationName) + expect_identical(nrow(cam_op_matrix), n_locations) + expect_identical(row.names(cam_op_matrix), locations_sess_cam) }) +test_that( + "__SESS_ is a reserved word not used in station, session and camera columns", + { + mica__sess <- mica + mica__sess$data$deployments$session <- c("1__SESS_1") + expect_error(get_cam_op(mica__sess, session_col = "session"), + paste0("Session column name (`session`) must not contain any ", + "of the reserved words: \"__SESS_\", \"__CAM_\"."), + fixed = TRUE + ) + mica__sess <- mica + mica__sess$data$deployments$cameraID <- paste0(c(1,2,3,4), "__SESS_") + expect_error(get_cam_op(mica__sess, camera_col = "cameraID"), + paste0("Camera column name (`cameraID`) must not contain any ", + "of the reserved words: \"__SESS_\", \"__CAM_\"."), + fixed = TRUE + ) + mica__sess <- mica + mica__sess$data$deployments$locationName[1] <- paste0( + "__SESS_", + mica__sess$data$deployments$locationName[1] + ) + expect_error( + get_cam_op(mica__sess), + paste0("Station column name (`locationName`) must not contain any ", + "of the reserved words: \"__SESS_\", \"__CAM_\"."), + fixed = TRUE + ) + } +) + +test_that( + "__CAM_ is a reserved word not used in station, session and camera columns", + { + mica__cam <- mica + mica__cam$data$deployments$session[1] <- c("1__CAM_1") + expect_error(get_cam_op(mica__cam, session_col = "session"), + paste0("Session column name (`session`) must not contain any ", + "of the reserved words: \"__SESS_\", \"__CAM_\"."), + fixed = TRUE + ) + mica__cam <- mica + mica__cam$data$deployments$cameraID <- paste0(c(1,2,3,4), "__CAM_") + expect_error(get_cam_op(mica__cam, camera_col = "cameraID"), + paste0("Camera column name (`cameraID`) must not contain any ", + "of the reserved words: \"__SESS_\", \"__CAM_\"."), + fixed = TRUE + ) + mica__cam <- mica + mica__cam$data$deployments$locationName[1] <- paste0( + "__CAM_", + mica__cam$data$deployments$locationName[1] + ) + expect_error( + get_cam_op(mica__cam), + paste0("Station column name (`locationName`) must not contain any ", + "of the reserved words: \"__SESS_\", \"__CAM_\"."), + fixed = TRUE + ) + } +) + test_that("output matrix has Station prefix in rownames", { cam_op_matrix <- get_cam_op(mica, use_prefix = TRUE) locations <- paste0("Station", mica$data$deployments$locationName) n_locations <- length(mica$data$deployments$locationName) - expect_equal(nrow(cam_op_matrix), n_locations) - expect_equal(row.names(cam_op_matrix), locations) + expect_identical(nrow(cam_op_matrix), n_locations) + expect_identical(row.names(cam_op_matrix), locations) }) test_that("output matrix has specified location column as rownames", { cam_op_matrix <- get_cam_op(mica, station_col = "locationID") locations <- mica$data$deployments$locationID n_locations <- length(mica$data$deployments$locationID) - expect_equal(nrow(cam_op_matrix), n_locations) - expect_equal(row.names(cam_op_matrix), locations) + expect_identical(nrow(cam_op_matrix), n_locations) + expect_identical(row.names(cam_op_matrix), locations) }) + test_that("output matrix has all deployment days as colnames", { cam_op_matrix <- get_cam_op(mica) days_activity <- seq(as.Date(min(mica$data$deployments$start)), @@ -70,8 +225,8 @@ test_that("output matrix has all deployment days as colnames", { ) days_activity <- as.character(days_activity) n_days <- length(days_activity) - expect_equal(ncol(cam_op_matrix), n_days) - expect_equal(colnames(cam_op_matrix), days_activity) + expect_identical(ncol(cam_op_matrix), n_days) + expect_identical(colnames(cam_op_matrix), days_activity) }) test_that("daily effort is > 0 for fully active days, NA for inactive days", { @@ -103,10 +258,10 @@ test_that("daily effort is > 0 and < 1 for partial active days (start/end)", { location <- mica$data$deployments$locationName[4] start <- as.character(as.Date(mica$data$deployments$start[4])) end <- as.character(as.Date(mica$data$deployments$end[4])) - expect_true(cam_op_matrix[4, start] > 0) - expect_true(cam_op_matrix[4, start] < 1) - expect_true(cam_op_matrix[4, end] > 0) - expect_true(cam_op_matrix[4, end] < 1) + expect_gt(cam_op_matrix[4, start], 0) + expect_lt(cam_op_matrix[4, start],1) + expect_gt(cam_op_matrix[4, end], 0) + expect_lt(cam_op_matrix[4, end], 1) }) test_that( @@ -128,8 +283,8 @@ test_that( expect_true( nrow(cam_op_matrix) == length(unique(mica1$data$deployments$locationName)) ) - expect_true(cam_op_matrix[1, first_full_day_two_deps] > 1) - expect_true(cam_op_matrix[1, last_full_day_two_deps] > 1) + expect_gt(cam_op_matrix[1, first_full_day_two_deps], 1) + expect_gt(cam_op_matrix[1, last_full_day_two_deps], 1) } ) @@ -166,7 +321,7 @@ test_that("filtering predicates are allowed and work well", { filtered_cam_op_matrix <- suppressMessages( get_cam_op(mica, pred_lt("longitude", 4.0)) ) - expect_equal(rownames(filtered_cam_op_matrix), "Mica Viane") + expect_identical(rownames(filtered_cam_op_matrix), "Mica Viane") }) test_that("Argument datapkg is deprecated: warning returned", { @@ -175,7 +330,9 @@ test_that("Argument datapkg is deprecated: warning returned", { lifecycle_verbosity = "warning", get_cam_op(datapkg = mica) ), - "The `datapkg` argument of `get_cam_op()` is deprecated as of camtraptor 0.16.0.", + paste0("The `datapkg` argument of `get_cam_op()` is deprecated ", + "as of camtraptor 0.16.0." + ), fixed = TRUE ) }) diff --git a/tests/testthat/test-get_effort.R b/tests/testthat/test-get_effort.R index 86de9ad1..3f7e3c9b 100644 --- a/tests/testthat/test-get_effort.R +++ b/tests/testthat/test-get_effort.R @@ -1,5 +1,5 @@ -testthat::test_that("get_effort returns error for invalid effort units", { - testthat::expect_error( +test_that("get_effort returns error for invalid effort units", { + expect_error( get_effort(mica, unit = "bad_unit"), paste0( "Invalid value for unit parameter: bad_unit.\n", @@ -7,7 +7,7 @@ testthat::test_that("get_effort returns error for invalid effort units", { ), fixed = TRUE ) - testthat::expect_error( + expect_error( get_effort(mica, unit = NULL), paste0( "Invalid value for unit parameter: NULL.\n", @@ -17,50 +17,50 @@ testthat::test_that("get_effort returns error for invalid effort units", { ) }) -testthat::test_that("get_effort returns error for invalid datapackage", { - testthat::expect_error(get_effort(mica$data$deployments)) +test_that("get_effort returns error for invalid datapackage", { + expect_error(get_effort(mica$data$deployments)) }) -testthat::test_that("values in column unit are all the same", { +test_that("values in column unit are all the same", { effort_df <- get_effort(mica) distinct_efffort_unit_values <- unique(effort_df$unit) - testthat::expect_equal(length(distinct_efffort_unit_values), 1) + expect_equal(length(distinct_efffort_unit_values), 1) }) -testthat::test_that("column effort_duration is of class 'Duration'", { +test_that("column effort_duration is of class 'Duration'", { effort_df <- get_effort(mica) - testthat::expect_equal(class(effort_df$effort_duration)[1], "Duration") - testthat::expect_equal( + expect_equal(class(effort_df$effort_duration)[1], "Duration") + expect_equal( attr(class(effort_df$effort_duration), which = "package"), "lubridate" ) }) -testthat::test_that("column unit is always equal to argument unit", { +test_that("column unit is always equal to argument unit", { unit_to_test <- c("second", "minute", "hour", "day", "month", "year") for (chosen_unit in unit_to_test) { effort_df <- get_effort(mica, unit = chosen_unit) efffort_unit_value <- unique(effort_df$unit) - testthat::expect_equal(efffort_unit_value, chosen_unit) + expect_equal(efffort_unit_value, chosen_unit) } }) -testthat::test_that("get_effort returns the right dataframe", { +test_that("get_effort returns the right dataframe", { effort_df <- get_effort(mica) # type list - testthat::expect_type(effort_df, "list") + expect_type(effort_df, "list") # class tibble data.frame - testthat::expect_equal( + expect_equal( class(effort_df), c("tbl_df", "tbl", "data.frame") ) # columns deploymentID, effort, unit and effort_duration only - testthat::expect_equal( + expect_equal( names(effort_df), c( "deploymentID", @@ -72,13 +72,13 @@ testthat::test_that("get_effort returns the right dataframe", { }) -testthat::test_that("get_effort returns the right number of rows", { +test_that("get_effort returns the right number of rows", { effort_df <- get_effort(mica) all_deployments <- unique(mica$data$deployments$deploymentID) n_all_deployments <- length(all_deployments) # number of rows should be equal to number of deployments - testthat::expect_equal( + expect_equal( nrow(effort_df), n_all_deployments ) diff --git a/tests/testthat/test-get_record_table.R b/tests/testthat/test-get_record_table.R index 9e0193f8..75a12015 100644 --- a/tests/testthat/test-get_record_table.R +++ b/tests/testthat/test-get_record_table.R @@ -1,33 +1,33 @@ test_that("input of get_record_table, camtrap dp, is checked properly", { - testthat::expect_error(get_record_table("aaa")) - testthat::expect_error(get_record_table(1)) + expect_error(get_record_table("aaa")) + expect_error(get_record_table(1)) }) test_that("input of get_record_table, stationCol, is checked properly", { - testthat::expect_error(get_record_table(mica, stationCol = "aaa")) + expect_error(get_record_table(mica, stationCol = "aaa")) }) test_that("input of get_record_table, exclude, is checked properly", { - testthat::expect_error(get_record_table(mica, exclude = "rattus not existing")) + expect_error(get_record_table(mica, exclude = "rattus not existing")) }) test_that("input of get_record_table, minDeltaTime, is checked properly", { - testthat::expect_error( + expect_error( get_record_table(mica, minDeltaTime = "1"), "`minDeltaTime` must be a number greater or equal to 0." ) - testthat::expect_error( + expect_error( get_record_table(mica, minDeltaTime = -10), "`minDeltaTime` must be a number greater or equal to 0." ) }) test_that("input of get_record_table, deltaTimeComparedTo, is checked properly", { - testthat::expect_error(get_record_table(mica, + expect_error(get_record_table(mica, minDeltaTime = 100, deltaTimeComparedTo = NULL )) - testthat::expect_error(get_record_table(mica, + expect_error(get_record_table(mica, minDeltaTime = 100, deltaTimeComparedTo = "not valid" )) @@ -44,22 +44,42 @@ test_that("if not integer, minDeltaTime is set to integer (floor)", { deltaTimeComparedTo = "lastRecord" ) ) - testthat::expect_equal(record_table_int, record_table_dec) + expect_identical(record_table_int, record_table_dec) }) test_that("input of get_record_table, removeDuplicateRecords, is checked properly", { # only TRUE or FALSE are allowed - testthat::expect_error(get_record_table(mica, + expect_error(get_record_table(mica, removeDuplicateRecords = 5 )) - testthat::expect_error(get_record_table(mica, + expect_error(get_record_table(mica, removeDuplicateRecords = NA )) }) +test_that("right columns are returned", { + expect_named( + get_record_table(mica), + c( + "Station", + "Species", + "n", + "DateTimeOriginal", + "Date", + "Time", + "delta.time.secs", + "delta.time.mins", + "delta.time.hours", + "delta.time.days", + "Directory", + "FileName" + ) + ) +}) + test_that("nrows = n obs of identified individuals if minDeltaTime is 0", { nrow_output <- get_record_table(mica, minDeltaTime = 0) %>% nrow() - testthat::expect_equal( + expect_identical( nrow_output, mica$data$observations %>% dplyr::filter(!is.na(scientificName)) %>% nrow() @@ -79,7 +99,7 @@ test_that("nrows = n obs of red foxes if all other species are excluded", { ) nrow_foxes <- get_record_table(mica, exclude = species_to_exclude) %>% nrow() - testthat::expect_equal( + expect_identical( nrow_foxes, mica$data$observations %>% dplyr::filter(scientificName == "Vulpes vulpes") %>% nrow() @@ -88,18 +108,18 @@ test_that("nrows = n obs of red foxes if all other species are excluded", { test_that("Higher minDeltaTime means less rows returned", { nrow_delta_0 <- get_record_table(mica) %>% nrow() - nrow_delta_1000 <- get_record_table(mica, - minDeltaTime = 1000, - deltaTimeComparedTo = "lastRecord" - ) %>% - nrow() nrow_delta_10000 <- suppressMessages(get_record_table(mica, minDeltaTime = 10000, deltaTimeComparedTo = "lastRecord" )) %>% nrow() - testthat::expect_true(nrow_delta_1000 <= nrow_delta_0) - testthat::expect_true(nrow_delta_10000 <= nrow_delta_1000) + nrow_delta_100000 <- suppressMessages(get_record_table(mica, + minDeltaTime = 100000, + deltaTimeComparedTo = "lastRecord" + )) %>% + nrow() + expect_lt(nrow_delta_10000, nrow_delta_0) + expect_lt(nrow_delta_100000, nrow_delta_10000) }) test_that("stations names are equal to values in column passed to StationCOl", { @@ -108,21 +128,21 @@ test_that("stations names are equal to values in column passed to StationCOl", { dplyr::distinct(Station) %>% dplyr::pull() location_names <- unique(mica$data$deployments$locationName) - testthat::expect_true(all(stations %in% location_names)) + expect_true(all(stations %in% location_names)) # use locationID as Station stations <- get_record_table(mica, stationCol = "locationID") %>% dplyr::distinct(Station) %>% dplyr::pull() location_ids <- unique(mica$data$deployments$locationID) - testthat::expect_true(all(stations %in% location_ids)) + expect_true(all(stations %in% location_ids)) }) test_that("Directory and Filename columns are lists", { file_values <- get_record_table(mica) %>% dplyr::select(Directory, FileName) - testthat::expect_true(class(file_values$Directory) == "list") - testthat::expect_true(class(file_values$FileName) == "list") + expect_true(class(file_values$Directory) == "list") + expect_true(class(file_values$FileName) == "list") }) test_that( @@ -151,12 +171,13 @@ test_that( n_media <- mica$data$media %>% dplyr::group_by(.data$sequenceID) %>% - dplyr::count() + dplyr::count() %>% + dplyr::rename(n_media = n) output <- output %>% dplyr::left_join(n_media, by = "sequenceID" ) - testthat::expect_equal(output$len, output$n) + expect_equal(output$len, output$n_media) } ) @@ -164,24 +185,27 @@ test_that(paste( "removeDuplicateRecords allows removing duplicates,", "but structure output remains the same" ), { - mica_duplicates <- mica - mica_duplicates$data$observations$sequenceID <- mica_duplicates$data$observations$sequenceID[1] - mica_duplicates$data$observations$deploymentID <- mica_duplicates$data$observations$deploymentID[1] - mica_duplicates$data$observations$timestamp <- mica_duplicates$data$observations$timestamp[1] - mica_duplicates$data$observations$scientificName <- "Anas strepera" - rec_table <- get_record_table(mica_duplicates) - rec_table_dup <- get_record_table(mica_duplicates, + mica_dup <- mica + # create duplicates at 2020-07-29 05:46:48, location: B_DL_val 5_beek kleine vijver + # use 3rd observation as the first two are unknown or blank (= no animal) + mica_dup$data$observations[,"sequenceID"] <- mica_dup$data$observations$sequenceID[3] + mica_dup$data$observations[, "deploymentID"] <- mica_dup$data$observations$deploymentID[3] + mica_dup$data$observations[, "timestamp"] <- mica_dup$data$observations$timestamp[3] + mica_dup$data$observations[, "scientificName"] <- mica_dup$data$observations$scientificName[3] + + rec_table <- get_record_table(mica_dup) + rec_table_dup <- get_record_table(mica_dup, removeDuplicateRecords = FALSE ) - testthat::expect_equal(nrow(rec_table), 1) - testthat::expect_equal( - rec_table$DateTimeOriginal, mica$data$observations$timestamp[1] + expect_identical(nrow(rec_table), 1L) + expect_identical( + rec_table$DateTimeOriginal, mica$data$observations$timestamp[3] ) - testthat::expect_equal(rec_table$delta.time.secs, 0) - testthat::expect_equal(names(rec_table_dup), names(rec_table)) - testthat::expect_equal( + expect_identical(rec_table$delta.time.secs, 0) + expect_identical(names(rec_table_dup), names(rec_table)) + expect_identical( nrow(rec_table_dup), - nrow(mica_duplicates$data$observations) + nrow(mica_dup$data$observations) ) }) @@ -192,7 +216,7 @@ test_that("filtering predicates are allowed and work well", { stations_calculate <- mica$data$deployments %>% dplyr::filter(longitude < 4.0) %>% dplyr::pull(locationName) - testthat::expect_identical(stations, stations_calculate) + expect_identical(stations, stations_calculate) }) test_that("Argument datapkg is deprecated: warning returned", { diff --git a/vignettes/camera-operation-matrix.Rmd b/vignettes/camera-operation-matrix.Rmd index 20a6044f..03fa0759 100644 --- a/vignettes/camera-operation-matrix.Rmd +++ b/vignettes/camera-operation-matrix.Rmd @@ -90,3 +90,35 @@ You can also decide to use prefix `"Station"` in the station names as done by ca cam_op_with_prefix <- get_cam_op(mica, use_prefix = TRUE) cam_op_with_prefix[1:4,1:2] ``` + +### Session and camera IDs + +You can specify the column containing the camera IDs to be added to the station names following the camtrapR's convention: `Station__CAM_CameraID`. Only the row names are shown: + +```{r add_camera_IDs} +mica_cameras <- mica +mica_cameras$data$deployments$cameraID <- c(1, 2, 3, 4) +cam_op_with_camera_ids <- get_cam_op(mica_cameras, camera_col = "cameraID") +row.names(cam_op_with_camera_ids) +``` + +You cans also add the session IDs using `session_col` argument, following the camtrapR's convention: `Station__SESS_sessionID`: + +```{r add_session_IDs} +mica_sessions <- mica +mica_sessions$data$deployments$session <- c(1, 2, 3, 4) +cam_op_with_session_ids <- get_cam_op(mica_sessions, session_col = "session") +row.names(cam_op_with_session_ids) +``` + +To use both camera and session IDs, the camtrapR's convention `Station__SESS_SessionID__CAM_CameraID` is followed: + +```{r add_session_camera_IDs} +mica_sessions$data$deployments$cameraID <- c(1, 2, 3, 4) +cam_op_with_session_and_camera_ids <- get_cam_op( + mica_sessions, + camera_col = "cameraID", + session_col = "session" +) +row.names(cam_op_with_session_and_camera_ids) +``` diff --git a/vignettes/record-table.Rmd b/vignettes/record-table.Rmd index 03aec8a1..772c3235 100644 --- a/vignettes/record-table.Rmd +++ b/vignettes/record-table.Rmd @@ -44,12 +44,13 @@ If we consider that all observations are independent, then, it will be sufficien get_record_table(mica) ``` -The function returns the same columns as the camtrapR's function `recordTable()`. The following mapping is applied: +The function returns the same columns as the camtrapR's function `recordTable()` except for column `n`. The following mapping is applied: column name output | description --- | --- `Station` | the station name as provided by argument `stationCol` (default: `locationName`). It has to be a column of `deployments` `Species` | `scientific_name` column in `observations` +`n` | `count` column in `observations` (number of individuals) `DateTimeOriginal` | the `timestamp` column in `observations` `Date` | the date from `timestamp` `Time` | the time part from `timestamp` @@ -72,16 +73,18 @@ As described in [Chapter 3](https://jniedballa.github.io/camtrapR/articles/camtr Again, as for `recordTable()`, we provide an argument, `deltaTimeComparedTo`, to further control how independence between records is assessed. Setting it to `“lastRecord”` returns only records taken `minDeltaTime` minutes after the last record, i.e. `minDeltaTime` minutes after `timestamp` of the last recorded media file. Example with `minDeltaTime = 60` (1 hour): ```{r example_lastRecord} -get_record_table(mica, - minDeltaTime = 60, +mica_dependent <- mica +mica_dependent$data$observations[4,"timestamp"] <- lubridate::as_datetime("2020-07-29 05:55:00") +get_record_table(mica_dependent, + minDeltaTime = 10, deltaTimeComparedTo = "lastRecord") ``` Setting `deltaTimeComparedTo` to `“lastIndependentRecord”` returns only records taken `minDeltaTime` minutes after the last independent record, i.e. `minDeltaTime` minutes after `timestamp` of the last observation. Example with `minDeltaTime = 60` (1 hour): ```{r example_lastIndependentRecord} -get_record_table(mica, - minDeltaTime = 60, +get_record_table(mica_dependent, + minDeltaTime = 10, deltaTimeComparedTo = "lastIndependentRecord") ``` @@ -106,20 +109,19 @@ get_record_table(mica, stationCol = "locationID") It can happen that "duplicates" occur, e.g. when two distinct observations of the same species are made based on the same sequence of images, e.g. same species but different `lifeStage` or `sex`. You can decide what to do with these duplicates by using the argument `removeDuplicateRecords`: by default it is equal to `TRUE`. The duplicates are therefore removed. To not remove them, set `removeDuplicateRecords` equal to `FALSE`. -Let's create an easy example with duplicates based on `mica` datapackage: +Let's create an easy example with a duplicate based on `mica` datapackage: ```{r dummy_data_with_duplicates} mica_dup <- mica - mica_dup$observations$sequenceID <- mica_dup$observations$sequenceID[1] - mica_dup$observations$deploymentID <- mica_dup$observations$deploymentID[1] - mica_dup$observations$timestamp <- mica_dup$observations$timestamp[1] - mica_dup$observations$scientificName <- "Anas strepera" +# create a duplicate at 2020-07-29 05:46:48, location: B_DL_val 5_beek kleine vijver +mica_dup$data$observations[4,"sequenceID"] <- mica_dup$data$observations$sequenceID[3] +mica_dup$data$observations[4, "deploymentID"] <- mica_dup$data$observations$deploymentID[3] +mica_dup$data$observations[4, "timestamp"] <- mica_dup$data$observations$timestamp[3] ``` Record table without duplicates: ```{r no_duplicates} -# equivalent of get_record_table(mica, removeDuplicateRecords = TRUE) get_record_table(mica_dup) ```