Skip to content

Commit

Permalink
Merge pull request #335 from inbo/adapt-get_custom_effort
Browse files Browse the repository at this point in the history
Adapt `get_custom_effort`
  • Loading branch information
damianooldoni authored Oct 14, 2024
2 parents 31dcae3 + 093979f commit 079713f
Show file tree
Hide file tree
Showing 5 changed files with 270 additions and 97 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: camtraptor
Title: Read, Explore and Visualize Camera Trap Data Packages
Version: 0.25.0
Version: 0.26.0
Authors@R: c(
person("Damiano", "Oldoni", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-3445-7562")),
Expand Down Expand Up @@ -63,5 +63,5 @@ Encoding: UTF-8
LazyData: true
LazyDataCompression: bzip2
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Config/testthat/edition: 3
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# camtraptor 0.26.0

- `get_custom_effort()` returns now the effort for each deployment separately (#333). The returned data frame has two new columns: `deploymentID` and `locationName`.

# camtraptor 0.25.0

- `read_camtrap_dp()` detects Camtrap DP version from `package$profile` using
Expand Down
183 changes: 119 additions & 64 deletions R/get_custom_effort.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#' Get custom effort
#'
#' Gets the custom effort (deployment duration) for a custom time window and a
#' specific time interval such as day, week, month or year. The custom effort is also
#' calculated over all deployments, although filtering predicates can be applied
#' as well. This function calls `get_cam_op()` internally.
#' Gets the effort for each deployment and a specific time interval such as day,
#' week, month or year. A custom time window can also be set up. This function
#' calls `get_cam_op()` internally.
#'
#' @param package Camera trap data package object, as returned by
#' `read_camtrap_dp()`.
Expand Down Expand Up @@ -38,60 +37,100 @@
#' Use `package` instead.
#' @param ... filter predicates
#' @return A tibble data frame with following columns:
#' - `deploymentID`: Deployment unique identifier.
#' - `locationName`: Location name of the deployments.
#' - `begin`: Begin date of the interval the effort is calculated over.
#' - `effort`: The effort as number.
#' - `unit`: Character specifying the effort unit.
#' @family exploration functions
#' @importFrom dplyr .data %>%
#' @export
#' @examples
#' # A global effort over the entire duration of the project (datapackage)
#' # measured in hours
#' # Effort for each deployment over the entire duration of the project
#' # (datapackage) measured in hours (default)
#' get_custom_effort(mica)
#'
#' # Global effort expressed in days
#' # Effort for each deployment expressed in days
#' get_custom_effort(mica, unit = "day")
#'
#' # Total effort from a specific start to a specific end
#' # Effort for each deployment from a specific start to a specific end
#' get_custom_effort(
#' mica,
#' start = as.Date("2019-12-15"), # or lubridate::as_date("2019-12-15")
#' end = as.Date("2021-01-10")
#' )
#'
#' # Effort at daily interval
#' # Effort for each deployment at daily interval
#' get_custom_effort(
#' mica,
#' group_by = "day"
#' )
#'
#' # Effort at weekly interval
#' # Effort for each deployment at weekly interval
#' get_custom_effort(
#' mica,
#' group_by = "week"
#' )
#'
#' # Effort at monthly interval
#' # Effort for each deployment at monthly interval
#' get_custom_effort(
#' mica,
#' group_by = "month"
#' )
#'
#' # Effort at yearly interval
#' # Effort for each deployment at yearly interval
#' get_custom_effort(
#' mica,
#' group_by = "year"
#' )
#'
#' # Applying filter(s), e.g. deployments with latitude >= 51.18
#' get_custom_effort(mica, pred_gte("latitude", 51.18))
#' # Applying filter(s), e.g. deployments with latitude >= 51.18, can be
#' # combined with other arguments
#' get_custom_effort(mica, pred_gte("latitude", 51.18), group_by = "month")
#'
#' # You can afterwards calculate the total effort over all deployments
#' library(dplyr)
#' get_custom_effort(mica, group_by = "year", unit = "day") %>%
#' dplyr::filter(effort > 0) %>%
#' dplyr::group_by(begin) %>%
#' dplyr::summarise(
#' deploymentIDs = list(deploymentID),
#' locationNames = list(locationName),
#' ndep = length(unique(deploymentID)),
#' nloc = length(unique(locationName)),
#' effort = sum(effort),
#' unit = unique(unit)
#' )
get_custom_effort <- function(package = NULL,
...,
start = NULL,
end = NULL,
group_by = NULL,
unit = "hour",
datapkg = lifecycle::deprecated()) {
# Check start earlier than end
if (!is.null(start) & !is.null(end)) {
assertthat::assert_that(start <= end,
msg = "`start` must be earlier than `end`."
)
}

# Check start and end are both dates
assertthat::assert_that(
is.null(start) | all(class(start) == "Date"),
msg = glue::glue(
"`start` must be `NULL` or an object of class Date. ",
"Did you forget to convert a string to Date with `as.Date()`?"
)
)
assertthat::assert_that(
is.null(end) | all(class(end) == "Date"),
msg = glue::glue(
"`end` must be `NULL` or an object of class Date. ",
"Did you forget to convert a string to Date with `as.Date()`?"
)
)

# Define possible unit values
units <- c("hour", "day")

Expand Down Expand Up @@ -124,63 +163,72 @@ get_custom_effort <- function(package = NULL,
# Get deployments
deployments <- package$data$deployments

# Stop function and inform user about deployments with missing `start` date
no_start_deployments <- deployments[is.na(deployments$start),]$deploymentID
if (length(no_start_deployments) > 0) {
stop(
glue::glue(
"The deployments with the following deploymentID ",
"have missing `start` value: ",
glue::glue_collapse(no_start_deployments, sep = ", ", last = " and "),
"."
)
)
}

# Stop function and inform user about deployments with missing `end` date
no_end_deployments <- deployments[is.na(deployments$end),]$deploymentID
if (length(no_end_deployments) > 0) {
stop(
glue::glue(
"The deployments with the following deploymentID ",
"have missing `end` value: ",
glue::glue_collapse(no_end_deployments, sep = ", ", last = " and "),
"."
)
)
}

# Camera operation matrix with filter(s) on deployments
cam_op <- get_cam_op(package, ..., station_col = "deploymentID")

# Sum effort over all deployments for each day (in day units)
sum_effort <- colSums(cam_op, na.rm = TRUE, dims = 1)

sum_effort <- dplyr::tibble(
date = lubridate::as_date(names(sum_effort)),
sum_effort = sum_effort
)

# Check start and end are both dates
assertthat::assert_that(
is.null(start) | all(class(start) == "Date"),
msg = glue::glue(
"`start` must be `NULL` or an object of class Date. ",
"Did you forget to convert a string to Date with `as.Date()`?"
)
)
assertthat::assert_that(
is.null(end) | all(class(end) == "Date"),
msg = glue::glue(
"`end` must be `NULL` or an object of class Date. ",
"Did you forget to convert a string to Date with `as.Date()`?"
)
)
# Transform camera operation matrix to df with effort per deployment
dep_effort <- cam_op %>%
dplyr::as_tibble(rownames = "deploymentID") %>%
tidyr::pivot_longer(cols = -"deploymentID",
names_to = "date",
values_to = "effort") %>%
dplyr::mutate(date = lubridate::as_date(.data$date))

# Check start is earlier than end of the latest deployment
if (!is.null(start)) {
assertthat::assert_that(
start <= sum_effort$date[nrow(sum_effort)],
start <= max(dep_effort$date),
msg = glue::glue(
"`start` value is set too late. ",
"`start` value must be not later than the end of the ",
"latest deployment: {sum_effort$date[nrow(sum_effort)]}."
"latest deployment: {max(dep_effort$date)}."
)
)
}

# Check end is later than begin of the earliest deployment
if (!is.null(end)) {
assertthat::assert_that(
end >= sum_effort$date[1],
end >= min(dep_effort$date),
msg = glue::glue(
"`end` value is set too early. ",
"`end` value must be not earlier than the start of the ",
"earliest deployment: {sum_effort$date[1]}."
"earliest deployment: {min(dep_effort$date)}."
)
)
}


# Check start is not earlier than start first deployment date.
# Return a warning and set start to first day deployment otherwise.
if (!is.null(start)) {
if (lubridate::as_date(start) < sum_effort$date[1]) {
start <- sum_effort$date[1]
if (lubridate::as_date(start) < min(dep_effort$date)) {
start <- min(dep_effort$date)
warning(
glue::glue(
"`start` value is set too early. ",
Expand All @@ -191,13 +239,13 @@ get_custom_effort <- function(package = NULL,
}
} else {
# Set start to date of the earliest deployment
start <- sum_effort$date[1]
start <- min(dep_effort$date)
}
# Check end is not later than end last deployment date.
# Return a warning and set end to last day deployment otherwise.
if (!is.null(end)) {
if (lubridate::as_date(end) > sum_effort$date[nrow(sum_effort)]) {
end <- sum_effort$date[nrow(sum_effort)]
if (lubridate::as_date(end) > max(dep_effort$date)) {
end <- max(dep_effort$date)
warning(
glue::glue(
"`end` value is set too late. ",
Expand All @@ -207,55 +255,62 @@ get_custom_effort <- function(package = NULL,
}
} else {
# Set end to date of the latest deployment
end <- sum_effort$date[nrow(sum_effort)]
end <- max(dep_effort$date)
}

# Check start earlier than end
assertthat::assert_that(start <= end,
msg = "`start` must be earlier than `end`."
)

# Create df with all dates from start to end
dates_df <- dplyr::tibble(date = seq(start, end, by = "days"))

# Join dates_df to sum_effort
sum_effort <-
# Join dates_df to dep_effort
dep_effort <-
dates_df %>%
dplyr::left_join(sum_effort, by = "date")
dplyr::left_join(dep_effort, by = "date")

# Filter by start and end date
sum_effort <-
sum_effort %>%
dep_effort <-
dep_effort %>%
dplyr::filter(.data$date >= start & .data$date <= end)

if (is.null(group_by)) {
# Calculate total effort (days) over all deployments
# Calculate total effort (days) per deployment
sum_effort <-
sum_effort %>%
dep_effort %>%
dplyr::group_by(.data$deploymentID) %>%
dplyr::summarise(
begin = start,
effort = sum(.data$sum_effort, na.rm = TRUE)
effort = sum(.data$effort, na.rm = TRUE)
)
} else {
# Calculate total effort (days) per deployment and given temporal grouping
sum_effort <-
sum_effort %>%
dep_effort %>%
dplyr::mutate(
begin = lubridate::floor_date(.data$date, unit = group_by)) %>%
dplyr::group_by(.data$begin) %>%
dplyr::summarise(effort = sum(.data$sum_effort, na.rm = TRUE))
dplyr::group_by(.data$deploymentID, .data$begin) %>%
dplyr::summarise(effort = sum(.data$effort, na.rm = TRUE))
}

# Transform effort to hours if needed
if (unit == "hour") {
sum_effort <-
sum_effort %>%
dplyr::ungroup() %>%
dplyr::mutate(effort = .data$effort * 24)
}

# Add locations (`locationName`)
sum_effort <- dplyr::left_join(
sum_effort,
dplyr::select(deployments, "deploymentID", "locationName"),
by = "deploymentID"
)

# Add unit column and adjust column order
sum_effort %>%
dplyr::mutate(unit = unit) %>%
dplyr::select(
"deploymentID",
"locationName",
"begin",
"effort",
"unit"
Expand Down
Loading

0 comments on commit 079713f

Please sign in to comment.