Merge pull request #289 from inbo/fix-288-and-279

Fix 288 and 279
inbo · Nov 10, 2023 · cee8f65 · cee8f65
2 parents 166e246 + 41afe9a
commit cee8f65
Show file tree

Hide file tree

Showing 12 changed files with 533 additions and 153 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -6,3 +6,5 @@
   0.1.6 which is currently used as internal data model for camtraptor.
 - `get_custom_effort()` now calculates per calendar month/week (#219).
 - `write_dwc()` has an updated mapping for dwc_audubon.csv (#274).
+- `get_record_table()` returns the number of observed individuals (#279).
+- `get_cam_op()` allows to add session and camera IDs to the station names output (#288).
diff --git a/R/get_cam_op.R b/R/get_cam_op.R
@@ -6,33 +6,38 @@
 #' https://jniedballa.github.io/camtrapR/reference/cameraOperation.html).
 #'
 #' The deployment data are by default grouped by `locationName` (station ID in
-#' camtrapR jargon) or another column specified by the user.
-#' If multiple deployments are linked to same location, daily efforts higher
-#' than 1 occur.
+#' camtrapR jargon) or another column specified by the user via the 
+#' `station_col` argument. If multiple deployments are linked to same location, 
+#' daily efforts higher than 1 occur.
 #'
 #' Partially active days, e.g. the first or the last day of a deployment, result
 #' in decimal effort values as in [camtrapR::cameraOperation()](
 #' https://jniedballa.github.io/camtrapR/reference/cameraOperation.html).
 #'
 #' @param package Camera trap data package object, as returned by
 #'   `read_camtrap_dp()`.
-#' @param station_col Column name to use for identifying the stations.
-#'   Default: `"locationName"`.
-#' @param use_prefix Logical (`TRUE`or `FALSE`).
-#'   If `TRUE` the returned row names will start with prefix `"Station"` as
-#'   returned by [camtrapR::cameraOperation()](
+#' @param station_col Column name to use for identifying the stations. Default:
+#'   `"locationName"`.
+#' @param camera_col Column name of the column specifying Camera ID. Default:
+#'   `NULL`.
+#' @param session_col Column name to use for identifying the session. Default:
+#'   `NULL`. Use it for creating multi-session / multi-season detection
+#'   histories.
+#' @param use_prefix Logical (`TRUE` or `FALSE`). If `TRUE` the returned row
+#'   names will start with prefix `"Station"` as returned by
+#'   [camtrapR::cameraOperation()](
 #'   https://jniedballa.github.io/camtrapR/reference/cameraOperation.html).
 #'   Default: `FALSE`.
-#' @param datapkg Deprecated.
-#'   Use `package` instead.
+#' @param datapkg Deprecated. Use `package` instead.
 #' @param ... filter predicates for filtering on deployments.
-#' @return A matrix.
-#'   Row names always indicate the station ID.
-#'   Column names are dates.
+#' @return A matrix. Row names always indicate the station ID. Column names are
+#'   dates.
 #' @family exploration functions
 #' @importFrom dplyr %>% .data
+#' @importFrom rlang !! :=
 #' @export
 #' @examples
+#' library(dplyr)
 #' get_cam_op(mica)
 #'
 #' # Applying filter(s) on deployments, e.g. deployments with latitude >= 51.18
@@ -41,11 +46,32 @@
 #' # Specify column with station names
 #' get_cam_op(mica, station_col = "locationID")
 #'
+#' # Specify column with session IDs
+#' mica_sessions <- mica
+#' mica_sessions$data$deployments <- mica_sessions$data$deployments %>%
+#'   dplyr::mutate(session = ifelse(
+#'     stringr::str_starts(.data$locationName, "B_DL_"),
+#'       "after2020",
+#'       "before2020"
+#'   )
+#' )
+#' get_cam_op(mica_sessions, session_col = "session")
+#'
+#' # Specify column with camera IDs
+#' mica_cameras <- mica_sessions
+#' mica_cameras$data$deployments$cameraID <- c(1, 2, 3, 4)
+#' get_cam_op(mica_cameras, camera_col = "cameraID")
+#'
+#' # Specify both session and camera IDs
+#' get_cam_op(mica_cameras, camera_col = "cameraID", session_col = "session")
+#' 
 #' # Use prefix Station as in camtrapR's camera operation matrix
 #' get_cam_op(mica, use_prefix = TRUE)
 get_cam_op <- function(package = NULL,
                        ...,
                        station_col = "locationName",
+                       camera_col = NULL,
+                       session_col = NULL,
                        use_prefix = FALSE,
                        datapkg = lifecycle::deprecated()) {
   # check camera trap data package
@@ -65,7 +91,7 @@ get_cam_op <- function(package = NULL,
     )
   )
 
-  # Check that station_col doesn't contain empty values (NA)
+  # Check that `station_col` doesn't contain empty values (NA)
   n_na <- package$data$deployments %>%
     dplyr::filter(is.na(.data[[station_col]])) %>%
     nrow()
@@ -76,7 +102,65 @@ get_cam_op <- function(package = NULL,
       "{n_na} NAs found."
     )
   )
-
+
+  # Check that `station_col` do not contain the reserved words "__SESS_" and
+  # "__CAM_" (no need to remove NAs beforehand as station_col must not contain
+  # any NA, see previous check)
+  assertthat::assert_that(
+    all(!stringr::str_detect(string = package$data$deployments[[station_col]],
+                             pattern = "__SESS_|__CAM_")),
+    msg = glue::glue(
+      "Station column name (`{station_col}`) must not contain any of the ",
+      "reserved words: \"__SESS_\", \"__CAM_\"."
+    )
+  )
+
+  # Check that `session_col` exists in deployments, if defined, and that its
+  # values do not contain the reserved words "__SESS_" and "__CAM_"
+  if (!is.null(session_col)) {
+    assertthat::assert_that(assertthat::is.string(session_col))
+    assertthat::assert_that(
+      session_col %in% names(package$data$deployments),
+      msg = glue::glue(
+        "Session column name (`{session_col}`) is not valid: ",
+        "it must be one of the deployments column names."
+      )
+    )
+    session_values <- package$data$deployments[[session_col]]
+    session_values <- session_values[!is.na(session_values)]
+    assertthat::assert_that(
+      all(!stringr::str_detect(string = session_values,
+                               pattern = "__SESS_|__CAM_")),
+      msg = glue::glue(
+        "Session column name (`{session_col}`) must not contain any of the ",
+        "reserved words: \"__SESS_\", \"__CAM_\"."
+      )
+    )
+  }
+
+  # Check that `camera_col` exists in deployments, if defined, and that its
+  # values do not contain the reserved words "__SESS_" and "__CAM_"
+  if (!is.null(camera_col)) {
+    assertthat::assert_that(assertthat::is.string(camera_col))
+    assertthat::assert_that(
+      camera_col %in% names(package$data$deployments),
+      msg = glue::glue(
+        "Camera column name (`{camera_col}`) is not valid: ",
+        "it must be one of the deployments column names."
+      )
+    )
+    camera_values <- package$data$deployments[[camera_col]]
+    camera_values <- camera_values[!is.na(camera_values)]
+    assertthat::assert_that(
+      all(!stringr::str_detect(string = camera_values,
+                           pattern = "__SESS_|__CAM_")),
+      msg = glue::glue(
+        "Camera column name (`{camera_col}`) must not contain any of the ",
+        "reserved words: \"__SESS_\", \"__CAM_\"."
+      )
+    )
+  }
+
   assertthat::assert_that(
     use_prefix %in% c(TRUE, FALSE),
     msg = "use_prefix must be TRUE or FALSE."
@@ -140,8 +224,26 @@ get_cam_op <- function(package = NULL,
     }
   )
   names(deployment_operational) <- deploys$deploymentID
-
-  # get for each location which days a deployment was active
+
+  # add session to station names
+  if (!is.null(session_col)) {
+    deploys <- deploys %>%
+      dplyr::mutate(!!station_col := paste(.data[[station_col]], 
+                                         .data[[session_col]],
+                                         sep = "__SESS_")
+      )
+  }
+
+  # add camera to column names
+  if (!is.null(camera_col)) {
+    deploys <- deploys %>%
+      dplyr::mutate(!!station_col := paste(.data[[station_col]], 
+                                           .data[[camera_col]],
+                                           sep = "__CAM_")
+      )
+  }
+
+  # get for each station_col which days a deployment was active
   camOps <- purrr::map_dfc(
     unique(deploys[[station_col]]),
     function(loc_name) {

diff --git a/R/get_record_table.R b/R/get_record_table.R
@@ -66,21 +66,23 @@
 #'
 #' # Set a minDeltaTime of 20 minutes from last independent record for filtering
 #' # out not independent observations
+#' mica_dependent <- mica
+#' mica_dependent$data$observations[4,"timestamp"] <- lubridate::as_datetime("2020-07-29 05:55:00")
 #' get_record_table(
-#'   mica,
+#'   mica_dependent,
 #'   minDeltaTime = 20,
 #'   deltaTimeComparedTo = "lastIndependentRecord"
 #' )
 #'
 #' # Set a minDeltaTime of 20 minutes from last record for filtering out not
 #' # independent observations
 #' get_record_table(
-#'   mica,
+#'   mica_dependent,
 #'   minDeltaTime = 20,
 #'   deltaTimeComparedTo = "lastRecord"
 #' )
 #'
-#' # Exclude observations of brown rat
+#' # Exclude observations of mallard
 #' # Exclude is case insensitive and vernacular names are allowed
 #' get_record_table(mica, exclude = "wilde eend")
 #'
@@ -91,6 +93,20 @@
 #'   minDeltaTime = 20,
 #'   deltaTimeComparedTo = "lastRecord"
 #' )
+#' 
+#' # How to deal with duplicates
+#' mica_dup <- mica
+#' # create a duplicate at 2020-07-29 05:46:48, location: B_DL_val 5_beek kleine vijver
+#' mica_dup$data$observations[4,"sequenceID"] <- mica_dup$data$observations$sequenceID[3]
+#' mica_dup$data$observations[4, "deploymentID"] <- mica_dup$data$observations$deploymentID[3]
+#' mica_dup$data$observations[4, "timestamp"] <- mica_dup$data$observations$timestamp[3]
+#'
+#' # duplicates are removed by default by get_record_table()
+#' get_record_table(mica_dup)
+#' 
+#' # duplicate not removed
+#' get_record_table(mica_dup, removeDuplicateRecords = FALSE)
+#' 
 #' # Applying filter(s) on deployments, e.g. deployments with latitude >= 51.18
 #' get_record_table(mica, pred_gte("latitude", 51.18))
 get_record_table <- function(package = NULL,
@@ -265,11 +281,13 @@ get_record_table <- function(package = NULL,
       Species = "scientificName",
       DateTimeOriginal = "timestamp",
       Directory = "filePath",
-      FileName = "fileName"
+      FileName = "fileName",
+      n = "count"
     ) %>%
     dplyr::select(
       "Station",
       "Species",
+      "n",
       "DateTimeOriginal",
       "Date",
       "Time",

diff --git a/man/get_cam_op.Rd b/man/get_cam_op.Rd
diff --git a/man/get_record_table.Rd b/man/get_record_table.Rd