Skip to content

Commit

Permalink
Merge pull request #289 from inbo/fix-288-and-279
Browse files Browse the repository at this point in the history
Fix 288 and 279
  • Loading branch information
PietrH authored Nov 10, 2023
2 parents 166e246 + 41afe9a commit cee8f65
Show file tree
Hide file tree
Showing 12 changed files with 533 additions and 153 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@
0.1.6 which is currently used as internal data model for camtraptor.
- `get_custom_effort()` now calculates per calendar month/week (#219).
- `write_dwc()` has an updated mapping for dwc_audubon.csv (#274).
- `get_record_table()` returns the number of observed individuals (#279).
- `get_cam_op()` allows to add session and camera IDs to the station names output (#288).
136 changes: 119 additions & 17 deletions R/get_cam_op.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,38 @@
#' https://jniedballa.github.io/camtrapR/reference/cameraOperation.html).
#'
#' The deployment data are by default grouped by `locationName` (station ID in
#' camtrapR jargon) or another column specified by the user.
#' If multiple deployments are linked to same location, daily efforts higher
#' than 1 occur.
#' camtrapR jargon) or another column specified by the user via the
#' `station_col` argument. If multiple deployments are linked to same location,
#' daily efforts higher than 1 occur.
#'
#' Partially active days, e.g. the first or the last day of a deployment, result
#' in decimal effort values as in [camtrapR::cameraOperation()](
#' https://jniedballa.github.io/camtrapR/reference/cameraOperation.html).
#'
#' @param package Camera trap data package object, as returned by
#' `read_camtrap_dp()`.
#' @param station_col Column name to use for identifying the stations.
#' Default: `"locationName"`.
#' @param use_prefix Logical (`TRUE`or `FALSE`).
#' If `TRUE` the returned row names will start with prefix `"Station"` as
#' returned by [camtrapR::cameraOperation()](
#' @param station_col Column name to use for identifying the stations. Default:
#' `"locationName"`.
#' @param camera_col Column name of the column specifying Camera ID. Default:
#' `NULL`.
#' @param session_col Column name to use for identifying the session. Default:
#' `NULL`. Use it for creating multi-session / multi-season detection
#' histories.
#' @param use_prefix Logical (`TRUE` or `FALSE`). If `TRUE` the returned row
#' names will start with prefix `"Station"` as returned by
#' [camtrapR::cameraOperation()](
#' https://jniedballa.github.io/camtrapR/reference/cameraOperation.html).
#' Default: `FALSE`.
#' @param datapkg Deprecated.
#' Use `package` instead.
#' @param datapkg Deprecated. Use `package` instead.
#' @param ... filter predicates for filtering on deployments.
#' @return A matrix.
#' Row names always indicate the station ID.
#' Column names are dates.
#' @return A matrix. Row names always indicate the station ID. Column names are
#' dates.
#' @family exploration functions
#' @importFrom dplyr %>% .data
#' @importFrom rlang !! :=
#' @export
#' @examples
#' library(dplyr)
#' get_cam_op(mica)
#'
#' # Applying filter(s) on deployments, e.g. deployments with latitude >= 51.18
Expand All @@ -41,11 +46,32 @@
#' # Specify column with station names
#' get_cam_op(mica, station_col = "locationID")
#'
#' # Specify column with session IDs
#' mica_sessions <- mica
#' mica_sessions$data$deployments <- mica_sessions$data$deployments %>%
#' dplyr::mutate(session = ifelse(
#' stringr::str_starts(.data$locationName, "B_DL_"),
#' "after2020",
#' "before2020"
#' )
#' )
#' get_cam_op(mica_sessions, session_col = "session")
#'
#' # Specify column with camera IDs
#' mica_cameras <- mica_sessions
#' mica_cameras$data$deployments$cameraID <- c(1, 2, 3, 4)
#' get_cam_op(mica_cameras, camera_col = "cameraID")
#'
#' # Specify both session and camera IDs
#' get_cam_op(mica_cameras, camera_col = "cameraID", session_col = "session")
#'
#' # Use prefix Station as in camtrapR's camera operation matrix
#' get_cam_op(mica, use_prefix = TRUE)
get_cam_op <- function(package = NULL,
...,
station_col = "locationName",
camera_col = NULL,
session_col = NULL,
use_prefix = FALSE,
datapkg = lifecycle::deprecated()) {
# check camera trap data package
Expand All @@ -65,7 +91,7 @@ get_cam_op <- function(package = NULL,
)
)

# Check that station_col doesn't contain empty values (NA)
# Check that `station_col` doesn't contain empty values (NA)
n_na <- package$data$deployments %>%
dplyr::filter(is.na(.data[[station_col]])) %>%
nrow()
Expand All @@ -76,7 +102,65 @@ get_cam_op <- function(package = NULL,
"{n_na} NAs found."
)
)


# Check that `station_col` do not contain the reserved words "__SESS_" and
# "__CAM_" (no need to remove NAs beforehand as station_col must not contain
# any NA, see previous check)
assertthat::assert_that(
all(!stringr::str_detect(string = package$data$deployments[[station_col]],
pattern = "__SESS_|__CAM_")),
msg = glue::glue(
"Station column name (`{station_col}`) must not contain any of the ",
"reserved words: \"__SESS_\", \"__CAM_\"."
)
)

# Check that `session_col` exists in deployments, if defined, and that its
# values do not contain the reserved words "__SESS_" and "__CAM_"
if (!is.null(session_col)) {
assertthat::assert_that(assertthat::is.string(session_col))
assertthat::assert_that(
session_col %in% names(package$data$deployments),
msg = glue::glue(
"Session column name (`{session_col}`) is not valid: ",
"it must be one of the deployments column names."
)
)
session_values <- package$data$deployments[[session_col]]
session_values <- session_values[!is.na(session_values)]
assertthat::assert_that(
all(!stringr::str_detect(string = session_values,
pattern = "__SESS_|__CAM_")),
msg = glue::glue(
"Session column name (`{session_col}`) must not contain any of the ",
"reserved words: \"__SESS_\", \"__CAM_\"."
)
)
}

# Check that `camera_col` exists in deployments, if defined, and that its
# values do not contain the reserved words "__SESS_" and "__CAM_"
if (!is.null(camera_col)) {
assertthat::assert_that(assertthat::is.string(camera_col))
assertthat::assert_that(
camera_col %in% names(package$data$deployments),
msg = glue::glue(
"Camera column name (`{camera_col}`) is not valid: ",
"it must be one of the deployments column names."
)
)
camera_values <- package$data$deployments[[camera_col]]
camera_values <- camera_values[!is.na(camera_values)]
assertthat::assert_that(
all(!stringr::str_detect(string = camera_values,
pattern = "__SESS_|__CAM_")),
msg = glue::glue(
"Camera column name (`{camera_col}`) must not contain any of the ",
"reserved words: \"__SESS_\", \"__CAM_\"."
)
)
}

assertthat::assert_that(
use_prefix %in% c(TRUE, FALSE),
msg = "use_prefix must be TRUE or FALSE."
Expand Down Expand Up @@ -140,8 +224,26 @@ get_cam_op <- function(package = NULL,
}
)
names(deployment_operational) <- deploys$deploymentID

# get for each location which days a deployment was active

# add session to station names
if (!is.null(session_col)) {
deploys <- deploys %>%
dplyr::mutate(!!station_col := paste(.data[[station_col]],
.data[[session_col]],
sep = "__SESS_")
)
}

# add camera to column names
if (!is.null(camera_col)) {
deploys <- deploys %>%
dplyr::mutate(!!station_col := paste(.data[[station_col]],
.data[[camera_col]],
sep = "__CAM_")
)
}

# get for each station_col which days a deployment was active
camOps <- purrr::map_dfc(
unique(deploys[[station_col]]),
function(loc_name) {
Expand Down
26 changes: 22 additions & 4 deletions R/get_record_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,23 @@
#'
#' # Set a minDeltaTime of 20 minutes from last independent record for filtering
#' # out not independent observations
#' mica_dependent <- mica
#' mica_dependent$data$observations[4,"timestamp"] <- lubridate::as_datetime("2020-07-29 05:55:00")
#' get_record_table(
#' mica,
#' mica_dependent,
#' minDeltaTime = 20,
#' deltaTimeComparedTo = "lastIndependentRecord"
#' )
#'
#' # Set a minDeltaTime of 20 minutes from last record for filtering out not
#' # independent observations
#' get_record_table(
#' mica,
#' mica_dependent,
#' minDeltaTime = 20,
#' deltaTimeComparedTo = "lastRecord"
#' )
#'
#' # Exclude observations of brown rat
#' # Exclude observations of mallard
#' # Exclude is case insensitive and vernacular names are allowed
#' get_record_table(mica, exclude = "wilde eend")
#'
Expand All @@ -91,6 +93,20 @@
#' minDeltaTime = 20,
#' deltaTimeComparedTo = "lastRecord"
#' )
#'
#' # How to deal with duplicates
#' mica_dup <- mica
#' # create a duplicate at 2020-07-29 05:46:48, location: B_DL_val 5_beek kleine vijver
#' mica_dup$data$observations[4,"sequenceID"] <- mica_dup$data$observations$sequenceID[3]
#' mica_dup$data$observations[4, "deploymentID"] <- mica_dup$data$observations$deploymentID[3]
#' mica_dup$data$observations[4, "timestamp"] <- mica_dup$data$observations$timestamp[3]
#'
#' # duplicates are removed by default by get_record_table()
#' get_record_table(mica_dup)
#'
#' # duplicate not removed
#' get_record_table(mica_dup, removeDuplicateRecords = FALSE)
#'
#' # Applying filter(s) on deployments, e.g. deployments with latitude >= 51.18
#' get_record_table(mica, pred_gte("latitude", 51.18))
get_record_table <- function(package = NULL,
Expand Down Expand Up @@ -265,11 +281,13 @@ get_record_table <- function(package = NULL,
Species = "scientificName",
DateTimeOriginal = "timestamp",
Directory = "filePath",
FileName = "fileName"
FileName = "fileName",
n = "count"
) %>%
dplyr::select(
"Station",
"Species",
"n",
"DateTimeOriginal",
"Date",
"Time",
Expand Down
53 changes: 40 additions & 13 deletions man/get_cam_op.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 19 additions & 3 deletions man/get_record_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit cee8f65

Please sign in to comment.