Merge pull request #223 from inbo/read-CamtrapDP-v1.0-rc.1

Read camtrap dp v1.0 rc.1
inbo · Jul 20, 2023 · c8a3c9f · c8a3c9f
2 parents 1322593 + 17184f6
commit c8a3c9f
Show file tree

Hide file tree

Showing 15 changed files with 1,219 additions and 162 deletions.
diff --git a/R/get_cam_op.R b/R/get_cam_op.R
@@ -51,6 +51,8 @@ get_cam_op <- function(package = NULL,
   # check camera trap data package
   package <- check_package(package, datapkg, "get_cam_op")
 
+  # Check that station_col is a single string
+  assertthat::assert_that(assertthat::is.string(station_col))
   # Check that station_col is one of the columns in deployments
   assertthat::assert_that(
     station_col %in% names(package$data$deployments),
@@ -59,6 +61,18 @@ get_cam_op <- function(package = NULL,
       "it must be one of the deployments column names."
     )
   )
+
+  # Check that station_col doesn't contain empty values (NA)
+  n_na <- package$data$deployments %>%
+    dplyr::filter(is.na(.data[[station_col]])) %>%
+    nrow()
+  assertthat::assert_that(
+    n_na == 0,
+    msg = glue::glue(
+      "Column `{station_col}` must be non-empty: ",
+      "{n_na} NAs found."
+    )
+  )
 
   assertthat::assert_that(
     use_prefix %in% c(TRUE, FALSE),

diff --git a/R/get_custom_effort.R b/R/get_custom_effort.R
@@ -1,9 +1,9 @@
 #' Get custom effort
 #'
 #' Gets the custom effort (deployment duration) for a custom time window and a
-#' specific time interval such as day or month.
-#' The custom effort is also calculated over all deployments, although
-#' filtering predicates can be applied as well.
+#' specific time interval such as day or month. The custom effort is also
+#' calculated over all deployments, although filtering predicates can be applied
+#' as well. This function calls `get_cam_op()` internally.
 #'
 #' @param package Camera trap data package object, as returned by
 #'   `read_camtrap_dp()`.
@@ -122,7 +122,7 @@ get_custom_effort <- function(package = NULL,
   deployments <- package$data$deployments
 
   # camera operation matrix with filter(s) on deployments
-  cam_op <- get_cam_op(package, ...)
+  cam_op <- get_cam_op(package, ..., station_col = "deploymentID")
 
   # Sum effort over all deployments for each day  (in day units)
   sum_effort <- colSums(cam_op, na.rm = TRUE, dims = 1)

diff --git a/R/get_record_table.R b/R/get_record_table.R
@@ -102,7 +102,7 @@ get_record_table <- function(package = NULL,
                              removeDuplicateRecords = TRUE,
                              datapkg = lifecycle::deprecated()) {
   # check data package
-  package <- check_package(package, datapkg, "get_record_table")
+  package <- check_package(package, datapkg, "get_record_table", media = TRUE)
 
   # check stationCol is a valid column name
   assertthat::assert_that(

diff --git a/R/get_species.R b/R/get_species.R
@@ -14,7 +14,7 @@
 #' get_species(mica)
 get_species <- function(package = NULL, datapkg = lifecycle::deprecated()) {
   # Check camera trap data package
-  package <- check_package(package, datapkg, "get_species")
+  package <- check_package(package, datapkg, "get_species", media = FALSE)
 
   # Get taxonomic information from package metadata
   if (!"taxonomic" %in% names(package)) {

diff --git a/R/map_dep.R b/R/map_dep.R
@@ -523,7 +523,7 @@ map_dep <- function(package = NULL,
     )
   } else if (feature == "rai") {
     feat_df <- get_rai(package, species = species, sex = sex, life_stage = life_stage, ...)
-    feat_df <- feat_df %>% dplyr::rename(n = rai)
+    feat_df <- feat_df %>% dplyr::rename(n = "rai")
   } else if (feature == "rai_individuals") {
     feat_df <- get_rai_individuals(
       package,
@@ -537,7 +537,7 @@ map_dep <- function(package = NULL,
       effort_unit <- "hour" # default value of get_effort()
     }
     feat_df <- get_effort(package, unit = effort_unit, ...)
-    feat_df <- feat_df %>% dplyr::rename(n = effort)
+    feat_df <- feat_df %>% dplyr::rename(n = "effort")
   }
 
   # define title legend

diff --git a/R/read_camtrap_dp.R b/R/read_camtrap_dp.R
@@ -8,8 +8,8 @@
 #' `observations` data frame.
 #'
 #' @param file Path or URL to a `datapackage.json` file.
-#' @param media If `TRUE`, read media records into memory. If `FALSE`, ignore
-#'   media file to speed up reading larger Camtrap DP packages.
+#' @param media If `TRUE` (default), read media records into memory. If `FALSE`,
+#'   ignore media file to speed up reading larger Camtrap DP packages.
 #' @param path Path to the directory containing the datapackage. Use  `file`
 #'   with path or URL to a `datapackage.json` file instead.
 #' @return List describing a Data Package (as returned by
@@ -24,7 +24,10 @@
 #' @examples
 #' \dontrun{
 #' # Read Camtrap DP package
-#' camtrap_dp_file <- system.file("extdata", "mica", "datapackage.json", package = "camtraptor")
+#' camtrap_dp_file <- system.file(
+#'   "extdata", "mica", "datapackage.json", 
+#'   package = "camtraptor"
+#' )
 #' muskrat_coypu <- read_camtrap_dp(camtrap_dp_file)
 #'
 #' # Read Camtrap DP package and ignore media file
@@ -46,8 +49,9 @@
 read_camtrap_dp <- function(file = NULL,
                             media = TRUE,
                             path = lifecycle::deprecated()) {
+  # check path (deprecated)
   warning_detail <- paste(
-    "Use parameter `file` containing the path or URL to the `datapackage.json`",
+    "Use argument `file` containing the path or URL to the `datapackage.json`",
     "file. The use of parameter `path` with path to the local directory is ",
     "deprecated since version 0.6.0."
   )
@@ -58,7 +62,6 @@ read_camtrap_dp <- function(file = NULL,
       details = warning_detail
     )
   }
-
   # define the right file value
   if (lifecycle::is_present(path)) {
     file <- file.path(path, "datapackage.json")
@@ -67,91 +70,105 @@ read_camtrap_dp <- function(file = NULL,
   if (dir.exists(file)) {
     file <- file.path(file, "datapackage.json")
   }
-
-  # check media
+  # check media arg
   assertthat::assert_that(
     media %in% c(TRUE, FALSE),
     msg = "`media` must be a logical: TRUE or FALSE"
   )
-  # read files
+
+  # read package (metadata)
   package <- frictionless::read_package(file)
-  deployments <- frictionless::read_resource(package, "deployments")
-  issues_deployments <- readr::problems(deployments)
-  if (nrow(issues_deployments) > 0) {
-    warning(glue::glue(
-      "One or more parsing issues occurred while reading deployments. ",
-      "See `?read_camtrap_dp()` for examples on how to use ",
-      "`readr::problems()`."
-    ))
-  }
-  observations <- frictionless::read_resource(package, "observations")
-  issues_observations <- readr::problems(observations)
-  if (nrow(issues_observations) > 0) {
-    warning(glue::glue(
-      "One or more parsing issues occurred while reading observations. ",
-      "See `?read_camtrap_dp()` for examples on how to use ",
-      "`readr::problems()`."
-    ))
+
+  # supported versions
+  supported_versions <- c("0.1.6", "1.0-rc.1")
+
+  # get package version
+  profile <- package$profile
+  if (profile == "https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0-rc.1/camtrap-dp-profile.json") {
+    version <- "1.0-rc.1"
+  } else {
+    if (profile == "https://raw.githubusercontent.com/tdwg/camtrap-dp/0.1.6/camtrap-dp-profile.json") {
+      version <- "0.1.6"
+    } else {
+      version <- profile
+    }
   }
 
-  # patch for non-standard values speed, radius, angle
-  # see https://github.com/inbo/camtraptor/issues/185
-  obs_col_names <- names(observations)
-  if (all(c("X22", "X23", "X24") %in% names(observations))) {
-    observations <- observations %>%
-      dplyr::rename(speed = "X22", radius = "X23", angle = "X24")
-    message(
-      paste("Three extra fields in `observations` interpreted as `speed`,",
-            "`radius` and `angle`."
-      )
+  # check version is supported
+  assertthat::assert_that(
+    version %in% supported_versions,
+    msg = paste0(
+      glue::glue("Version {version} "), 
+      "is not supported. Supported versions: ",
+      glue::glue_collapse(glue::glue("{supported_versions}"), 
+                              sep = " ", 
+                              last = " and "),
+      ".")
+  )
+
+  # get resource names
+  resource_names <- frictionless::resources(package)
+  #check needed resources are present
+  resources_to_read <- c("deployments", "media", "observations")
+  assertthat::assert_that(
+    all(resources_to_read %in% resource_names),
+    msg = glue::glue(
+      "One or more resources among ", 
+      glue::glue_collapse(resources_to_read, sep = ", ", last = " and "),
+      " is missing."
     )
+  )
+
+  # read deployments
+  deployments <- frictionless::read_resource(package, "deployments")
+  issues_deployments <- check_reading_issues(deployments, "deployments")
+  # read observations (needed to create sequenceID in media)
+  observations <- frictionless::read_resource(package, "observations")
+  issues_observations <- check_reading_issues(observations, "observations")
+
+  if (version == "0.1.6"){
+    observations <- add_speed_radius_angle(observations)
   }
 
-  # create first version datapackage with resources in data element
+  # create first version datapackage with resources in data slot
   data <- list(
     "deployments" = deployments,
     "media" = NULL,
     "observations" = observations
   )
+
   package$data <- data
-  # get taxonomic info
-  taxon_infos <- get_species(package)
-  # add vernacular names to observations
-  if (!is.null(taxon_infos)) {
-    cols_taxon_infos <- names(taxon_infos)
-    observations <-
-      dplyr::left_join(
-        observations,
-        taxon_infos,
-        by  = c("taxonID", "scientificName")
-      )
-    observations <-
-      observations %>%
-      dplyr::relocate(dplyr::one_of(cols_taxon_infos), .after = "cameraSetup")
-    # Inherit parsing issues from reading
-    attr(observations, which = "problems") <- issues_observations
-    package$data$observations <- observations
+
+  # read media if needed
+  if (media) {
+    media_df <- frictionless::read_resource(package, "media")
+    issues_media <- check_reading_issues(media_df, "media")
+    data$media <- media_df
   }
-  if (media == TRUE) {
-    media <- frictionless::read_resource(package, "media")
-    issues_media <- readr::problems(media)
-    if (nrow(issues_media) > 0) {
-      warning(glue::glue(
-        "One or more parsing issues occurred while reading media. ",
-        "See `?read_camtrap_dp()` for examples on how to use ",
-        "`readr::problems()`."
-      ))
-    }
+
+  package$data <- data
+  package <- check_package(package, media = media)
+
+  package <- add_taxonomic_info(package)
+
+  # convert to 0.1.6
+  if (version == "1.0-rc.1") {
+    package <- convert_to_0.1.6(package, version, media = media)
   }
-
-  # return list resources
-  if (is.data.frame(media)) {
-    data <- list(
-      "deployments" = deployments,
-      "media" = media,
-      "observations" = observations
-    )
-    package$data <- data
+
+  # order columns
+  package$data$deployments <- order_cols_deployments(package$data$deployments)
+  package$data$observations <- order_cols_observations(
+    package$data$observations
+  )
+  if (!is.null(package$data$media)) {
+    package$data$media <- order_cols_media(package$data$media)
   }
-  package
+
+  package <- check_package(package, media = media)
+
+  # Inherit parsing issues from reading
+  attr(package$data$observations, which = "problems") <- issues_observations
+
+  return(package)
 }
diff --git a/R/write_eml.R b/R/write_eml.R
@@ -209,8 +209,8 @@ write_eml <- function(package,
     taxonomy <- dplyr::filter(taxonomy, .data$taxonRank == "species")
   }
   sci_names <-
-    dplyr::rename(taxonomy, Species = scientificName) %>%
-    dplyr::select(Species)
+    dplyr::rename(taxonomy, Species = "scientificName") %>%
+    dplyr::select("Species")
 
   eml$dataset$coverage <- EML::set_coverage(
     begin = package$temporal$start,