diff --git a/R/breaks-by-group-size.R b/R/breaks-by-group-size.R new file mode 100644 index 0000000..fe5d87b --- /dev/null +++ b/R/breaks-by-group-size.R @@ -0,0 +1,143 @@ + +#' @rdname chop_quantiles +#' +#' @export +#' @order 2 +brk_quantiles <- function (probs, ..., weights = NULL) { + assert_that( + is.numeric(probs), + noNA(probs), + all(probs >= 0), + all(probs <= 1), + is.null(weights) || is.numeric(weights) + ) + probs <- sort(probs) + + function (x, extend, left, close_end) { + dots <- list(...) + dots$x <- x + if (! is.numeric(x) && ! "type" %in% names(dots)) dots$type <- 1 + dots$probs <- probs + dots$na.rm <- TRUE + + qs <- if (is.null(weights)) { + do.call(stats::quantile, dots) + } else { + rlang::check_installed("Hmisc", + reason = "to use `weights` in brk_quantiles()") + dots$weights <- weights + do.call(Hmisc::wtd.quantile, dots) + } + + if (anyNA(qs)) return(empty_breaks()) # data was all NA + + if (any(duplicated(qs))) { + warning("`x` has non-unique quantiles: break labels may be misleading") + dupe_middles <- find_duplicated_middles(qs) + qs <- qs[! dupe_middles] + probs <- probs[! dupe_middles] + } + + breaks <- create_lr_breaks(qs, left) + + needs <- needs_extend(breaks, x, extend, left, close_end) + if ((needs & LEFT) > 0) probs <- c(0, probs) + if ((needs & RIGHT) > 0) probs <- c(probs, 1) + breaks <- extend_and_close(breaks, x, extend, left, close_end) + + class(breaks) <- c("quantileBreaks", class(breaks)) + attr(breaks, "scaled_endpoints") <- probs + names(breaks) <- names(probs) + + breaks + } +} + + +#' @rdname chop_equally +#' +#' @export +#' @order 2 +brk_equally <- function (groups) { + assert_that(is.count(groups)) + + brq <- brk_quantiles(seq(0L, groups)/groups) + + function (x, extend, left, close_end) { + breaks <- brq(x = x, extend = extend, left = left, close_end = close_end) + + if (length(breaks) < groups + 1) { + warning("Fewer than ", groups, " intervals created") + } + + breaks + } +} + + +#' @rdname chop_n +#' @export +#' @order 2 +brk_n <- function (n, tail = "split") { + assert_that(is.count(n), tail == "split" || tail == "merge") + + function (x, extend, left, close_end) { + xs <- sort(x, decreasing = ! left, na.last = NA) # remove NAs + if (length(xs) < 1L) return(empty_breaks()) + + dupes <- duplicated(xs) + breaks <- xs[0] # ensures breaks has type of xs + last_x <- xs[length(xs)] + + maybe_merge_tail <- function (breaks, tail) { + if (tail == "merge" && length(breaks) > 1) { + breaks <- breaks[-length(breaks)] + } + breaks + } + + # Idea of the algorithm: + # Loop: + # if there are no dupes, just take a sequence of each nth element + # starting at 1, and exit + # if there are remaining dupes, then take the first element + # set m to the (n+1)th element which would normally be next + # if element m is a dupe: + # - we need to go up, otherwise elements to the left will be in the next + # interval, and this interval will be too small + # - so set m to the next non-dupe (i.e. strictly larger) element + # now delete the first m-1 elements + # And repeat + while (TRUE) { + if (! any(dupes)) { + breaks <- c(breaks, xs[seq(1L, length(xs), n)]) + if (length(xs) %% n > 0) { + breaks <- maybe_merge_tail(breaks, tail) + } + break + } + breaks <- c(breaks, xs[1]) + m <- n + 1 + if (length(xs) <= n || all(dupes[-seq_len(m - 1)])) { + if (length(xs) < n) { + breaks <- maybe_merge_tail(breaks, tail) + } + break + } + if (dupes[m]) { + # the first non-dupe will be the next element that is different + # we know there is one, because we checked above + m <- m + match(FALSE, dupes[-(1:m)]) + } + discard <- seq_len(m - 1) + xs <- xs[-discard] + dupes <- dupes[-discard] + } + + breaks <- c(breaks, last_x) + if (! left) breaks <- rev(breaks) + breaks <- create_extended_breaks(breaks, x, extend, left, close_end) + + breaks + } +} diff --git a/R/breaks-by-width.R b/R/breaks-by-width.R new file mode 100644 index 0000000..44f6efa --- /dev/null +++ b/R/breaks-by-width.R @@ -0,0 +1,125 @@ + +#' Equal-width intervals for dates or datetimes +#' +#' `brk_width()` can be used with time interval classes from base R or the +#' `lubridate` package. +#' +#' @param width A scalar [difftime], [Period][lubridate::Period-class] or +#' [Duration][lubridate::Duration-class] object. +#' +#' @param start A scalar of class [Date] or [POSIXct][DateTimeClasses]. +#' Can be omitted. +#' +#' @details +#' If `width` is a Period, [`lubridate::add_with_rollback()`][`lubridate::m+`] +#' is used to calculate the widths. This can be useful for e.g. calendar months. +#' +#' @examples +#' +#' if (requireNamespace("lubridate")) { +#' year2001 <- as.Date("2001-01-01") + 0:364 +#' tab_width(year2001, months(1), +#' labels = lbl_discrete(" to ", fmt = "%e %b %y")) +#' } +#' +#' @name brk_width-for-datetime +NULL + + +#' @rdname chop_width +#' @export +#' @order 2 +brk_width <- function (width, start) UseMethod("brk_width") + + +#' @rdname brk_width-for-datetime +#' @export +brk_width.Duration <- function (width, start) { + loadNamespace("lubridate") + width <- lubridate::make_difftime(as.numeric(width)) + NextMethod() +} + + +#' @rdname chop_width +#' @export +#' @order 2 +brk_width.default <- function (width, start) { + assert_that(is.scalar(width)) + + sm <- missing(start) + if (! sm) assert_that(is.scalar(start)) + + function (x, extend, left, close_end) { + # finite if x has any non-NA finite elements: + min_x <- quiet_min(x[is.finite(x)]) + max_x <- quiet_max(x[is.finite(x)]) + + if (sm) { + start <- if (sign(width) > 0) min_x else max_x + } + until <- if (sign(width) > 0) max_x else min_x + + if (is.finite(start) && is.finite(until)) { + breaks <- sequence_width(width, start, until) + } else { + return(empty_breaks()) + } + + if (sign(width) <= 0) breaks <- rev(breaks) + + breaks <- create_extended_breaks(breaks, x, extend, left, close_end) + + breaks + } +} + + +#' @rdname chop_evenly +#' @export +#' @order 2 +brk_evenly <- function(intervals) { + assert_that(is.count(intervals)) + + function (x, extend, left, close_end) { + min_x <- quiet_min(x[is.finite(x)]) + max_x <- quiet_max(x[is.finite(x)]) + if (sign(max_x - min_x) <= 0) return(empty_breaks()) + + breaks <- seq(min_x, max_x, length.out = intervals + 1L) + breaks <- create_extended_breaks(breaks, x, extend, left, close_end) + + breaks + } +} + + +#' @rdname chop_proportions +#' @export +#' @order 2 +brk_proportions <- function(proportions) { + assert_that(is.numeric(proportions), noNA(proportions), + all(proportions >= 0), all(proportions <= 1)) + proportions <- sort(proportions) + + function (x, extend, left, close_end) { + min_x <- quiet_min(x[is.finite(x)]) + max_x <- quiet_max(x[is.finite(x)]) + range_x <- max_x - min_x + if (sign(range_x) <= 0) return(empty_breaks()) + + breaks <- min_x + range_x * proportions + breaks <- create_lr_breaks(breaks, left) + + scaled_endpoints <- proportions + needs <- needs_extend(breaks, x, extend, left, close_end) + if ((needs & LEFT) > 0) scaled_endpoints <- c(0, scaled_endpoints) + if ((needs & RIGHT) > 0) scaled_endpoints <- c(scaled_endpoints, 1) + breaks <- extend_and_close(breaks, x, extend, left, close_end) + + attr(breaks, "scaled_endpoints") <- scaled_endpoints + names(breaks) <- names(scaled_endpoints) + + breaks + } +} \ No newline at end of file diff --git a/R/breaks-misc.R b/R/breaks-misc.R new file mode 100644 index 0000000..4f455d7 --- /dev/null +++ b/R/breaks-misc.R @@ -0,0 +1,147 @@ + +#' Create a `breaks` object manually +#' +#' @param breaks A vector, which must be sorted. +#' @param left_vec A logical vector, the same length as `breaks`. +#' Specifies whether each break is left-closed or right-closed. +#' +#' @inherit breaks-doc return +#' +#' @details +#' +#' All breaks must be closed on exactly one side, like `..., x) [x, ...` +#' (left-closed) or `..., x) [x, ...` (right-closed). +#' +#' For example, if `breaks = 1:3` and `left = c(TRUE, FALSE, TRUE)`, then the +#' resulting intervals are \preformatted{ +#' T F T +#' [ 1, 2 ] ( 2, 3 ) +#' } +#' +#' Singleton breaks are created by repeating a number in `breaks`. Singletons +#' must be closed on both sides, so if there is a repeated number +#' at indices `i`, `i+1`, `left[i]` *must* be `TRUE` and `left[i+1]` must be +#' `FALSE`. +#' +#' @export +#' +#' @examples +#' lbrks <- brk_manual(1:3, rep(TRUE, 3)) +#' chop(1:3, lbrks, extend = FALSE) +#' +#' rbrks <- brk_manual(1:3, rep(FALSE, 3)) +#' chop(1:3, rbrks, extend = FALSE) +#' +#' brks_singleton <- brk_manual( +#' c(1, 2, 2, 3), +#' c(TRUE, TRUE, FALSE, TRUE)) +#' +#' chop(1:3, brks_singleton, extend = FALSE) +#' +brk_manual <- function (breaks, left_vec) { + assert_that( + is.numeric(breaks), + noNA(breaks), + is.logical(left_vec), + noNA(left_vec), + length(left_vec) == length(breaks) + ) + + + function (x, extend, left, close_end) { + if (! left) warning("Ignoring `left` with `brk_manual()`") + if (close_end) warning("Ignoring `close_end` with `brk_manual()`") + breaks <- create_breaks(breaks, left_vec) + breaks <- extend_and_close(breaks, x, extend, left, close_end) + } +} + + +#' @rdname chop_fn +#' @export +#' @order 2 +brk_fn <- function (fn, ...) { + assert_that(is.function(fn)) + + function (x, extend, left, close_end) { + breaks <- fn(x, ...) + # some functions (e.g. quantile()) return a named vector + # which might create surprise labels: + breaks <- unname(breaks) + assert_that(is.numeric(breaks)) + if (length(breaks) == 0) { + return(empty_breaks()) + } + + breaks <- create_extended_breaks(breaks, x, extend, left, close_end) + + breaks + } +} + + +#' @rdname chop_pretty +#' +#' @export +#' @order 2 +brk_pretty <- function (n = 5, ...) { + assert_that(is.count(n)) + + function (x, extend, left, close_end) { + breaks <- base::pretty(x, n = n, ...) + if (length(breaks) == 0 || is.null(breaks)) { + return(empty_breaks()) + } + + breaks <- create_extended_breaks(breaks, x, extend, left, close_end) + + breaks + } +} + + +#' @rdname chop_mean_sd +#' @export +#' @order 2 +#' @importFrom lifecycle deprecated +brk_mean_sd <- function (sds = 1:3, sd = deprecated()) { + if (lifecycle::is_present(sd)) { + lifecycle::deprecate_warn( + when = "0.7.0", + what = "brk_mean_sd(sd)", + with = "brk_mean_sd(sds = 'vector of sds')" + ) + assert_that(is.number(sd), sd > 0) + # we start from 0 but remove the 0 + # this works for e.g. sd = 0.5, whereas seq(1L, sd, 1L) would not: + sds <- seq(0L, sd, 1L)[-1] + if (! sd %in% sds) sds <- c(sds, sd) + } + + assert_that(is.numeric(sds), all(sds > 0)) + + function (x, extend, left, close_end) { + x_mean <- mean(x, na.rm = TRUE) + x_sd <- stats::sd(x, na.rm = TRUE) + + if (is.na(x_mean) || is.na(x_sd) || x_sd == 0) { + return(empty_breaks()) + } + + # add negative sds, then scale them by mean and sd + sds <- sort(sds) + sds <- c(-rev(sds), 0, sds) + breaks <- sds * x_sd + x_mean + breaks <- create_lr_breaks(breaks, left) + + needs <- needs_extend(breaks, x, extend, left, close_end) + if ((needs & LEFT) > 0) sds <- c(-Inf, sds) + if ((needs & RIGHT) > 0) sds <- c(sds, Inf) + breaks <- extend_and_close(breaks, x, extend, left, close_end) + + class(breaks) <- c("sdBreaks", class(breaks)) + attr(breaks, "scaled_endpoints") <- sds + + breaks + } +} \ No newline at end of file diff --git a/R/breaks.R b/R/breaks.R index 64b2990..ccc523d 100644 --- a/R/breaks.R +++ b/R/breaks.R @@ -1,342 +1,4 @@ -#' @rdname chop_quantiles -#' -#' @export -#' @order 2 -brk_quantiles <- function (probs, ..., weights = NULL) { - assert_that( - is.numeric(probs), - noNA(probs), - all(probs >= 0), - all(probs <= 1), - is.null(weights) || is.numeric(weights) - ) - probs <- sort(probs) - - function (x, extend, left, close_end) { - dots <- list(...) - dots$x <- x - if (! is.numeric(x) && ! "type" %in% names(dots)) dots$type <- 1 - dots$probs <- probs - dots$na.rm <- TRUE - - qs <- if (is.null(weights)) { - do.call(stats::quantile, dots) - } else { - rlang::check_installed("Hmisc", - reason = "to use `weights` in brk_quantiles()") - dots$weights <- weights - do.call(Hmisc::wtd.quantile, dots) - } - - if (anyNA(qs)) return(empty_breaks()) # data was all NA - - if (any(duplicated(qs))) { - warning("`x` has non-unique quantiles: break labels may be misleading") - dupe_middles <- find_duplicated_middles(qs) - qs <- qs[! dupe_middles] - probs <- probs[! dupe_middles] - } - - breaks <- create_lr_breaks(qs, left) - - needs <- needs_extend(breaks, x, extend, left, close_end) - if ((needs & LEFT) > 0) probs <- c(0, probs) - if ((needs & RIGHT) > 0) probs <- c(probs, 1) - breaks <- extend_and_close(breaks, x, extend, left, close_end) - - class(breaks) <- c("quantileBreaks", class(breaks)) - attr(breaks, "scaled_endpoints") <- probs - names(breaks) <- names(probs) - - breaks - } -} - - -#' @rdname chop_equally -#' -#' @export -#' @order 2 -brk_equally <- function (groups) { - assert_that(is.count(groups)) - - brq <- brk_quantiles(seq(0L, groups)/groups) - - function (x, extend, left, close_end) { - breaks <- brq(x = x, extend = extend, left = left, close_end = close_end) - - if (length(breaks) < groups + 1) { - warning("Fewer than ", groups, " intervals created") - } - - breaks - } -} - - -#' @rdname chop_mean_sd -#' @export -#' @order 2 -#' @importFrom lifecycle deprecated -brk_mean_sd <- function (sds = 1:3, sd = deprecated()) { - if (lifecycle::is_present(sd)) { - lifecycle::deprecate_warn( - when = "0.7.0", - what = "brk_mean_sd(sd)", - with = "brk_mean_sd(sds = 'vector of sds')" - ) - assert_that(is.number(sd), sd > 0) - # we start from 0 but remove the 0 - # this works for e.g. sd = 0.5, whereas seq(1L, sd, 1L) would not: - sds <- seq(0L, sd, 1L)[-1] - if (! sd %in% sds) sds <- c(sds, sd) - } - - assert_that(is.numeric(sds), all(sds > 0)) - - function (x, extend, left, close_end) { - x_mean <- mean(x, na.rm = TRUE) - x_sd <- stats::sd(x, na.rm = TRUE) - - if (is.na(x_mean) || is.na(x_sd) || x_sd == 0) { - return(empty_breaks()) - } - - # add negative sds, then scale them by mean and sd - sds <- sort(sds) - sds <- c(-rev(sds), 0, sds) - breaks <- sds * x_sd + x_mean - breaks <- create_lr_breaks(breaks, left) - - needs <- needs_extend(breaks, x, extend, left, close_end) - if ((needs & LEFT) > 0) sds <- c(-Inf, sds) - if ((needs & RIGHT) > 0) sds <- c(sds, Inf) - breaks <- extend_and_close(breaks, x, extend, left, close_end) - - class(breaks) <- c("sdBreaks", class(breaks)) - attr(breaks, "scaled_endpoints") <- sds - - breaks - } -} - - - -#' @rdname chop_pretty -#' -#' @export -#' @order 2 -brk_pretty <- function (n = 5, ...) { - assert_that(is.count(n)) - - function (x, extend, left, close_end) { - breaks <- base::pretty(x, n = n, ...) - if (length(breaks) == 0 || is.null(breaks)) { - return(empty_breaks()) - } - - breaks <- create_extended_breaks(breaks, x, extend, left, close_end) - - breaks - } -} - - -#' Equal-width intervals for dates or datetimes -#' -#' `brk_width()` can be used with time interval classes from base R or the -#' `lubridate` package. -#' -#' @param width A scalar [difftime], [Period][lubridate::Period-class] or -#' [Duration][lubridate::Duration-class] object. -#' -#' @param start A scalar of class [Date] or [POSIXct][DateTimeClasses]. -#' Can be omitted. -#' -#' @details -#' If `width` is a Period, [`lubridate::add_with_rollback()`][`lubridate::m+`] -#' is used to calculate the widths. This can be useful for e.g. calendar months. -#' -#' @examples -#' -#' if (requireNamespace("lubridate")) { -#' year2001 <- as.Date("2001-01-01") + 0:364 -#' tab_width(year2001, months(1), -#' labels = lbl_discrete(" to ", fmt = "%e %b %y")) -#' } -#' -#' @name brk_width-for-datetime -NULL - - -#' @rdname chop_width -#' @export -#' @order 2 -brk_width <- function (width, start) UseMethod("brk_width") - - -#' @rdname brk_width-for-datetime -#' @export -brk_width.Duration <- function (width, start) { - loadNamespace("lubridate") - width <- lubridate::make_difftime(as.numeric(width)) - NextMethod() -} - - -#' @rdname chop_width -#' @export -#' @order 2 -brk_width.default <- function (width, start) { - assert_that(is.scalar(width)) - - sm <- missing(start) - if (! sm) assert_that(is.scalar(start)) - - function (x, extend, left, close_end) { - # finite if x has any non-NA finite elements: - min_x <- quiet_min(x[is.finite(x)]) - max_x <- quiet_max(x[is.finite(x)]) - - if (sm) { - start <- if (sign(width) > 0) min_x else max_x - } - until <- if (sign(width) > 0) max_x else min_x - - if (is.finite(start) && is.finite(until)) { - breaks <- sequence_width(width, start, until) - } else { - return(empty_breaks()) - } - - if (sign(width) <= 0) breaks <- rev(breaks) - - breaks <- create_extended_breaks(breaks, x, extend, left, close_end) - - breaks - } -} - - -#' @rdname chop_evenly -#' @export -#' @order 2 -brk_evenly <- function(intervals) { - assert_that(is.count(intervals)) - - function (x, extend, left, close_end) { - min_x <- quiet_min(x[is.finite(x)]) - max_x <- quiet_max(x[is.finite(x)]) - if (sign(max_x - min_x) <= 0) return(empty_breaks()) - - breaks <- seq(min_x, max_x, length.out = intervals + 1L) - breaks <- create_extended_breaks(breaks, x, extend, left, close_end) - - breaks - } -} - - -#' @rdname chop_proportions -#' @export -#' @order 2 -brk_proportions <- function(proportions) { - assert_that(is.numeric(proportions), noNA(proportions), - all(proportions >= 0), all(proportions <= 1)) - proportions <- sort(proportions) - - function (x, extend, left, close_end) { - min_x <- quiet_min(x[is.finite(x)]) - max_x <- quiet_max(x[is.finite(x)]) - range_x <- max_x - min_x - if (sign(range_x) <= 0) return(empty_breaks()) - - breaks <- min_x + range_x * proportions - breaks <- create_lr_breaks(breaks, left) - - scaled_endpoints <- proportions - needs <- needs_extend(breaks, x, extend, left, close_end) - if ((needs & LEFT) > 0) scaled_endpoints <- c(0, scaled_endpoints) - if ((needs & RIGHT) > 0) scaled_endpoints <- c(scaled_endpoints, 1) - breaks <- extend_and_close(breaks, x, extend, left, close_end) - - attr(breaks, "scaled_endpoints") <- scaled_endpoints - names(breaks) <- names(scaled_endpoints) - - breaks - } -} - - -#' @rdname chop_n -#' @export -#' @order 2 -brk_n <- function (n, tail = "split") { - assert_that(is.count(n), tail == "split" || tail == "merge") - - function (x, extend, left, close_end) { - xs <- sort(x, decreasing = ! left, na.last = NA) # remove NAs - if (length(xs) < 1L) return(empty_breaks()) - - dupes <- duplicated(xs) - breaks <- xs[0] # ensures breaks has type of xs - last_x <- xs[length(xs)] - - maybe_merge_tail <- function (breaks, tail) { - if (tail == "merge" && length(breaks) > 1) { - breaks <- breaks[-length(breaks)] - } - breaks - } - - # Idea of the algorithm: - # Loop: - # if there are no dupes, just take a sequence of each nth element - # starting at 1, and exit - # if there are remaining dupes, then take the first element - # set m to the (n+1)th element which would normally be next - # if element m is a dupe: - # - we need to go up, otherwise elements to the left will be in the next - # interval, and this interval will be too small - # - so set m to the next non-dupe (i.e. strictly larger) element - # now delete the first m-1 elements - # And repeat - while (TRUE) { - if (! any(dupes)) { - breaks <- c(breaks, xs[seq(1L, length(xs), n)]) - if (length(xs) %% n > 0) { - breaks <- maybe_merge_tail(breaks, tail) - } - break - } - breaks <- c(breaks, xs[1]) - m <- n + 1 - if (length(xs) <= n || all(dupes[-seq_len(m - 1)])) { - if (length(xs) < n) { - breaks <- maybe_merge_tail(breaks, tail) - } - break - } - if (dupes[m]) { - # the first non-dupe will be the next element that is different - # we know there is one, because we checked above - m <- m + match(FALSE, dupes[-(1:m)]) - } - discard <- seq_len(m - 1) - xs <- xs[-discard] - dupes <- dupes[-discard] - } - - breaks <- c(breaks, last_x) - if (! left) breaks <- rev(breaks) - breaks <- create_extended_breaks(breaks, x, extend, left, close_end) - - breaks - } -} - - #' @param breaks A numeric vector. #' @name breaks-doc #' @return A function which returns an object of class `breaks`. @@ -362,87 +24,6 @@ brk_default <- function (breaks) { } -#' Create a `breaks` object manually -#' -#' @param breaks A vector, which must be sorted. -#' @param left_vec A logical vector, the same length as `breaks`. -#' Specifies whether each break is left-closed or right-closed. -#' -#' @inherit breaks-doc return -#' -#' @details -#' -#' All breaks must be closed on exactly one side, like `..., x) [x, ...` -#' (left-closed) or `..., x) [x, ...` (right-closed). -#' -#' For example, if `breaks = 1:3` and `left = c(TRUE, FALSE, TRUE)`, then the -#' resulting intervals are \preformatted{ -#' T F T -#' [ 1, 2 ] ( 2, 3 ) -#' } -#' -#' Singleton breaks are created by repeating a number in `breaks`. Singletons -#' must be closed on both sides, so if there is a repeated number -#' at indices `i`, `i+1`, `left[i]` *must* be `TRUE` and `left[i+1]` must be -#' `FALSE`. -#' -#' @export -#' -#' @examples -#' lbrks <- brk_manual(1:3, rep(TRUE, 3)) -#' chop(1:3, lbrks, extend = FALSE) -#' -#' rbrks <- brk_manual(1:3, rep(FALSE, 3)) -#' chop(1:3, rbrks, extend = FALSE) -#' -#' brks_singleton <- brk_manual( -#' c(1, 2, 2, 3), -#' c(TRUE, TRUE, FALSE, TRUE)) -#' -#' chop(1:3, brks_singleton, extend = FALSE) -#' -brk_manual <- function (breaks, left_vec) { - assert_that( - is.numeric(breaks), - noNA(breaks), - is.logical(left_vec), - noNA(left_vec), - length(left_vec) == length(breaks) - ) - - - function (x, extend, left, close_end) { - if (! left) warning("Ignoring `left` with `brk_manual()`") - if (close_end) warning("Ignoring `close_end` with `brk_manual()`") - breaks <- create_breaks(breaks, left_vec) - breaks <- extend_and_close(breaks, x, extend, left, close_end) - } -} - - -#' @rdname chop_fn -#' @export -#' @order 2 -brk_fn <- function (fn, ...) { - assert_that(is.function(fn)) - - function (x, extend, left, close_end) { - breaks <- fn(x, ...) - # some functions (e.g. quantile()) return a named vector - # which might create surprise labels: - breaks <- unname(breaks) - assert_that(is.numeric(breaks)) - if (length(breaks) == 0) { - return(empty_breaks()) - } - - breaks <- create_extended_breaks(breaks, x, extend, left, close_end) - - breaks - } -} - - #' @rdname chop_spikes #' @export #' @order 2 diff --git a/R/chop-by-group-size.R b/R/chop-by-group-size.R new file mode 100644 index 0000000..f405b77 --- /dev/null +++ b/R/chop-by-group-size.R @@ -0,0 +1,160 @@ + +#' Chop by quantiles +#' +#' `chop_quantiles()` chops data by quantiles. +#' `chop_deciles()` is a convenience function which chops into deciles. +#' +#' @param probs A vector of probabilities for the quantiles. If `probs` has +#' names, these will be used for labels. +#' @param ... For `chop_quantiles`, passed to [chop()]. For `brk_quantiles()`, +#' passed to [stats::quantile()] or [Hmisc::wtd.quantile()]. +#' @param weights `NULL` or numeric vector of same length as `x`. If not +#' `NULL`, [Hmisc::wtd.quantile()] is used to calculate weighted quantiles. +#' +#' @inheritParams chop +#' @inherit chop-doc params return +#' +#' @details +#' For non-numeric `x`, `left` is set to `FALSE` by default. This works better +#' for calculating "type 1" quantiles, since they round down. See +#' [stats::quantile()]. +#' +#' If `x` contains duplicates, consecutive quantiles may be the same number. If +#' so, quantile labels may be misleading and a warning is emitted. +#' +#' @family chopping functions +#' +#' @export +#' @order 1 +#' +#' @examples +#' chop_quantiles(1:10, 1:3/4) +#' +#' chop_quantiles(1:10, c(Q1 = 0, Q2 = 0.25, Q3 = 0.5, Q4 = 0.75)) +#' +#' chop(1:10, brk_quantiles(1:3/4)) +#' +#' chop_deciles(1:10) +#' +#' # to label by the quantiles themselves: +#' chop_quantiles(1:10, 1:3/4, raw = TRUE) +#' +#' # duplicate quantiles: +#' tab_quantiles(c(1, 1, 1, 2, 3), 1:5/5) +#' +chop_quantiles <- function( + x, + probs, + ..., + labels = if (raw) lbl_intervals() else + lbl_intervals(single = NULL), + left = is.numeric(x), + raw = FALSE, + weights = NULL + ) { + chop(x, brk_quantiles(probs, weights = weights), labels = labels, ..., + left = left, raw = raw) +} + + +#' @rdname chop_quantiles +#' @export +#' @order 1 +chop_deciles <- function(x, ...) { + chop_quantiles(x, 0:10/10, ...) +} + + +#' Chop equal-sized groups +#' +#' `chop_equally()` chops `x` into groups with an equal number of elements. +#' +#' @param groups Number of groups. +#' @inheritParams chop +#' @inherit chop-doc params return +#' +#' @details +#' `chop_equally()` uses [brk_quantiles()] under the hood. If `x` has duplicate +#' elements, you may get fewer `groups` than requested. If so, a warning will +#' be emitted. See the examples. +#' +#' +#' +#' @family chopping functions +#' +#' @export +#' @order 1 +#' @examples +#' chop_equally(1:10, 5) +#' +#' # You can't always guarantee `groups` groups: +#' dupes <- c(1, 1, 1, 2, 3, 4, 4, 4) +#' quantile(dupes, 0:4/4) +#' chop_equally(dupes, 4) +chop_equally <- function ( + x, + groups, + ..., + labels = lbl_intervals(), + left = is.numeric(x), + close_end = TRUE, + raw = TRUE + ) { + chop(x, brk_equally(groups), ..., labels = labels, left = left, + close_end = close_end, raw = raw) +} + + +#' Chop into fixed-sized groups +#' +#' `chop_n()` creates intervals containing a fixed number of elements. +#' +#' @param n Integer. Number of elements in each interval. +#' @inheritParams chop +#' @param tail String. What to do if the final interval has fewer than `n` elements? +#' `"split"` to keep it separate. `"merge"` to merge it with the neighbouring +#' interval. +#' @inherit chop-doc params return +#' +#' +#' @details +#' +#' The algorithm guarantees that intervals contain no more than `n` elements, so +#' long as there are no duplicates in `x` and `tail = "split"`. It also +#' guarantees that intervals contain no fewer than `n` elements, except possibly +#' the last interval (or first interval if `left` is `FALSE`). +#' +#' To ensure that all intervals contain at least `n` elements (so long as there +#' are at least `n` elements in `x`!) set `tail = "merge"`. +#' +#' If `tail = "split"` and there are intervals containing duplicates with more +#' than `n` elements, a warning is given. +#' +#' @export +#' @order 1 +#' @family chopping functions +#' @examples +#' chop_n(1:10, 5) +#' +#' chop_n(1:5, 2) +#' chop_n(1:5, 2, tail = "merge") +#' +#' # too many duplicates +#' x <- rep(1:2, each = 3) +#' chop_n(x, 2) +#' +chop_n <- function ( + x, + n, + ..., + close_end = TRUE, + tail = "split" + ) { + res <- chop(x, brk_n(n, tail = tail), ..., close_end = close_end) + if (tail == "split" && max(tabulate(res)) > n) { + warning("Some intervals contain more than ", n, " elements") + } + + res +} + diff --git a/R/chop-by-width.R b/R/chop-by-width.R new file mode 100644 index 0000000..88c6bd1 --- /dev/null +++ b/R/chop-by-width.R @@ -0,0 +1,96 @@ + +#' Chop into fixed-width intervals +#' +#' `chop_width()` chops `x` into intervals of fixed `width`. +#' +#' @param width Width of intervals. +#' @param start Starting point for intervals. By default the smallest +#' finite `x` (largest if `width` is negative). +#' @inheritParams chop +#' @inherit chop-doc params return +#' +#' @details +#' If `width` is negative, `chop_width()` sets `left = FALSE` and intervals will +#' go downwards from `start`. +#' +#' @family chopping functions +#' @seealso [brk_width-for-datetime] +#' +#' @export +#' @order 1 +#' +#' @examples +#' chop_width(1:10, 2) +#' +#' chop_width(1:10, 2, start = 0) +#' +#' chop_width(1:9, -2) +#' +#' chop(1:10, brk_width(2, 0)) +#' +chop_width <- function ( + x, + width, + start, + ..., + left = sign(width) > 0 + ) { + chop(x, brk_width(width, start), ..., left = left) +} + + +#' Chop into equal-width intervals +#' +#' `chop_evenly()` chops `x` into `intervals` intervals of equal width. +#' +#' @param intervals Integer: number of intervals to create. +#' @inheritParams chop +#' @inherit chop-doc params return +#' +#' @details `chop_evenly()` sets `close_end = TRUE` by default. +#' +#' @family chopping functions +#' +#' @export +#' @order 1 +#' @examples +#' chop_evenly(0:10, 5) +#' +chop_evenly <- function ( + x, + intervals, + ..., + close_end = TRUE + ) { + chop(x, brk_evenly(intervals), ..., close_end = close_end) +} + + +#' Chop into proportions of the range of x +#' +#' `chop_proportions()` chops `x` into `proportions` of its range, excluding +#' infinite values. +#' +#' By default, labels show the raw numeric endpoints. To label intervals by +#' the proportions, use `raw = FALSE`. +#' +#' @param proportions Numeric vector between 0 and 1: proportions of x's range. +#' If `proportions` has names, these will be used for labels. +#' @inheritParams chop +#' @inherit chop-doc params return +#' +#' @export +#' @order 1 +#' @family chopping functions +#' @examples +#' chop_proportions(0:10, c(0.2, 0.8)) +#' chop_proportions(0:10, c(Low = 0, Mid = 0.2, High = 0.8)) +#' +chop_proportions <- function ( + x, + proportions, + ..., + raw = TRUE + ) { + chop(x, brk_proportions(proportions), ..., raw = raw) +} \ No newline at end of file diff --git a/R/chop-isolates.R b/R/chop-isolates.R new file mode 100644 index 0000000..64ed0cd --- /dev/null +++ b/R/chop-isolates.R @@ -0,0 +1,150 @@ + +#' Chop common values into separate categories +#' +#' `chop_spikes()` lets you isolate common values of `x` in their own +#' singleton intervals. This can help make unusual values visible. +#' +#' This function is `r lifecycle::badge("experimental")`. +#' +#' @param breaks A numeric vector of cut-points or a call to a `brk_*` function. +#' The resulting [`breaks`][breaks-class] object will be modified to add +#' singleton breaks. +#' @param n,prop Scalar. Provide either `n`, a number of values, or `prop`, +#' a proportion of `length(x)`. Values of `x` which occur at least this +#' often will get their own singleton break. +#' @inheritParams chop +#' @inherit chop-doc params return +#' +#' @export +#' @order 1 +#' @family chopping functions +#' @seealso [isolate_chop()] for a different approach. +#' @examples +#' x <- c(1:4, rep(5, 5), 6:10) +#' chop_spikes(x, c(2, 7), n = 5) +#' chop_spikes(x, c(2, 7), prop = 0.25) +#' chop_spikes(x, brk_width(5), n = 5) +#' +#' set.seed(42) +#' x <- runif(40, 0, 10) +#' x <- sample(x, 200, replace = TRUE) +#' tab_spikes(x, brk_width(2, 0), prop = 0.05) +chop_spikes <- function ( + x, + breaks, + n = NULL, + prop = NULL, + ... +) { + chop(x, brk_spikes(breaks, n = n, prop = prop), ...) +} + + +#' Cut data into intervals, isolating common elements +#' +#' Sometimes it's useful to separate out common elements of `x`. +#' `isolate_chop()` first chops `x`, then puts common elements of `x` ("spikes") +#' into separate categories. +#' +#' Unlike [chop_spikes()], `isolate_chop()` doesn't break up +#' intervals which contain a spike. As a result, unlike other `chop_*` functions, +#' `isolate_chop()` does not typically chop `x` into disjoint intervals. See +#' the examples. +#' +#' If breaks are data-dependent, their labels may be misleading after common +#' elements have been removed. See the example below. +#' +#' Levels of the result are ordered by the minimum element in each level. As +#' a result, if `drop = FALSE`, empty levels will be placed last. +#' +#' This function is `r lifecycle::badge("experimental")`. +#' +#' @param x,breaks,... Passed to [chop()]. +#' @inheritParams chop_spikes +#' @param spike_labels Glue string for spike labels. Use `"{l}"` for the spike +#' value. +#' +#' @return +#' The result of [chop()], but with common values given their own factor levels. +#' +#' @seealso [chop_spikes()] for a different approach. +#' @export +#' +#' @examples +#' x <- c(2, 3, 3, 3, 4) +#' isolate_chop(x, c(2, 4), n = 3) +#' isolate_chop(x, brk_width(2), prop = 0.5) +#' +#' set.seed(42) +#' x <- runif(40, 0, 10) +#' x <- sample(x, 200, replace = TRUE) +#' # Compare: +#' table(isolate_chop(x, brk_width(2, 0), prop = 0.05)) +#' # Versus: +#' tab_spikes(x, brk_width(2, 0), prop = 0.05) +#' +#' # Misleading data-dependent breaks: +#' set.seed(42) +#' x <- rnorm(99) +#' x[1:10] <- x[1] +#' tab_quantiles(x, 1:2/3) +#' table(isolate_chop(x, brk_quantiles(1:2/3), prop = 0.1)) +isolate_chop <- function (x, + breaks, + ..., + n = NULL, + prop = NULL, + spike_labels = "{{{l}}}") { + assert_that( + is.number(n) || is.number(prop), + is.null(n) || is.null(prop), + msg = "exactly one of `n` and `prop` must be specified as a scalar numeric" + ) + + chopped <- chop(x, breaks, ...) + + spikes <- find_spikes(x, n, prop) + + elabels <- endpoint_labels(spikes, raw = TRUE) + glue_env <- new.env() + assign("l", elabels, envir = glue_env) + spike_labels <- glue::glue(spike_labels, .envir = glue_env) + + new_levels <- c(levels(chopped), spike_labels) + levels(chopped) <- new_levels + + x_spikes <- match(x, spikes) + is_spike <- ! is.na(x_spikes) + x_spikes <- x_spikes[is_spike] + chopped[is_spike] <- spike_labels[x_spikes] + + # We reorder the levels of chopped in order of their smallest elements. + # Note that if `drop = FALSE`, empty intervals will be at the end. + # The alternative would be to call `breaks` again and get the left endpoints + # but this is complex. + chopped <- stats::reorder(chopped, x, FUN = quiet_min) + attr(chopped, "scores") <- NULL # remove leftover from reorder() + + chopped +} + + +#' Find common elements in `x` +#' +#' @param x A vector +#' @param n Number of elements that counts as common. Specify exactly one of `n` +#' and `prop`. +#' @param prop Proportion of `length(x)` that counts as common +#' +#' @return The common elements, not necessarily in order. NA values are never +#' considered as common. +#' @noRd +find_spikes <- function (x, n, prop) { + n <- n %||% (length(x) * prop) + unique_x <- unique(x) + x_counts <- tabulate(match(x, unique_x)) + spikes <- unique_x[x_counts >= n] + spikes <- spikes[! is.na(spikes)] + + spikes +} \ No newline at end of file diff --git a/R/chop-misc.R b/R/chop-misc.R new file mode 100644 index 0000000..157f0aa --- /dev/null +++ b/R/chop-misc.R @@ -0,0 +1,106 @@ + + +#' Chop by standard deviations +#' +#' Intervals are measured in standard deviations on either side of the +#' mean. +#' +#' In version 0.7.0, these functions changed to specifying `sds` as a vector. +#' To chop 1, 2 and 3 standard deviations around the mean, write +#' `chop_mean_sd(x, sds = 1:3)` instead of `chop_mean_sd(x, sd = 3)`. +#' +#' @param sds Positive numeric vector of standard deviations. +#' @param sd `r lifecycle::badge("deprecated")` +#' +#' @inheritParams chop +#' @inherit chop-doc params return +#' +#' @family chopping functions +#' +#' @export +#' @order 1 +#' +#' @examples +#' chop_mean_sd(1:10) +#' +#' chop(1:10, brk_mean_sd()) +#' +#' @importFrom lifecycle deprecated +chop_mean_sd <- function ( + x, + sds = 1:3, + ..., + raw = FALSE, + sd = deprecated() + ) { + chop(x, brk_mean_sd(sds = sds, sd = sd), ..., raw = raw) +} + + +#' Chop using pretty breakpoints +#' +#' `chop_pretty()` uses [base::pretty()] to calculate breakpoints +#' which are 1, 2 or 5 times a power of 10. These look nice in graphs. +#' +#' [base::pretty()] tries to return `n+1` breakpoints, i.e. `n` intervals, but +#' note that this is not guaranteed. There are methods for Date and POSIXct +#' objects. +#' +#' For fine-grained control over [base::pretty()] parameters, use +#' `chop(x, brk_pretty(...))`. +#' +#' @inheritParams chop +#' @inherit chop-doc params return +#' @param n Positive integer passed to [base::pretty()]. How many intervals to chop into? +#' @param ... Passed to [chop()] by `chop_pretty()` and `tab_pretty()`; passed +#' to [base::pretty()] by `brk_pretty()`. +#' +#' @export +#' @order 1 +#' +#' @examples +#' chop_pretty(1:10) +#' +#' chop(1:10, brk_pretty(n = 5, high.u.bias = 0)) +#' +chop_pretty <- function (x, n = 5, ...) { + chop(x, brk_pretty(n = n), ...) +} + + +#' Chop using an existing function +#' +#' `chop_fn()` is a convenience wrapper: `chop_fn(x, foo, ...)` +#' is the same as `chop(x, foo(x, ...))`. +#' +#' @param fn A function which returns a numeric vector of breaks. +#' @param ... Further arguments to `fn` +#' @inheritParams chop +#' @inherit chop-doc params return +#' +#' @export +#' @order 1 +#' @family chopping functions +#' @examples +#' +#' if (requireNamespace("scales")) { +#' chop_fn(rlnorm(10), scales::breaks_log(5)) +#' # same as +#' # x <- rlnorm(10) +#' # chop(x, scales::breaks_log(5)(x)) +#' } +#' +chop_fn <- function ( + x, + fn, + ..., + extend = NULL, + left = TRUE, + close_end = TRUE, + raw = NULL, + drop = TRUE +) { + chop(x, brk_fn(fn, ...), extend = extend, left = left, close_end = close_end, + raw = raw, drop = drop) +} + diff --git a/R/chop.R b/R/chop.R index b92e928..9c14f43 100644 --- a/R/chop.R +++ b/R/chop.R @@ -260,493 +260,3 @@ fillet <- function ( chop(x, breaks, labels, left = left, close_end = close_end, extend = FALSE, raw = raw, drop = FALSE) } - - -#' Chop by quantiles -#' -#' `chop_quantiles()` chops data by quantiles. -#' `chop_deciles()` is a convenience function which chops into deciles. -#' -#' @param probs A vector of probabilities for the quantiles. If `probs` has -#' names, these will be used for labels. -#' @param ... For `chop_quantiles`, passed to [chop()]. For `brk_quantiles()`, -#' passed to [stats::quantile()] or [Hmisc::wtd.quantile()]. -#' @param weights `NULL` or numeric vector of same length as `x`. If not -#' `NULL`, [Hmisc::wtd.quantile()] is used to calculate weighted quantiles. -#' -#' @inheritParams chop -#' @inherit chop-doc params return -#' -#' @details -#' For non-numeric `x`, `left` is set to `FALSE` by default. This works better -#' for calculating "type 1" quantiles, since they round down. See -#' [stats::quantile()]. -#' -#' If `x` contains duplicates, consecutive quantiles may be the same number. If -#' so, quantile labels may be misleading and a warning is emitted. -#' -#' @family chopping functions -#' -#' @export -#' @order 1 -#' -#' @examples -#' chop_quantiles(1:10, 1:3/4) -#' -#' chop_quantiles(1:10, c(Q1 = 0, Q2 = 0.25, Q3 = 0.5, Q4 = 0.75)) -#' -#' chop(1:10, brk_quantiles(1:3/4)) -#' -#' chop_deciles(1:10) -#' -#' # to label by the quantiles themselves: -#' chop_quantiles(1:10, 1:3/4, raw = TRUE) -#' -#' # duplicate quantiles: -#' tab_quantiles(c(1, 1, 1, 2, 3), 1:5/5) -#' -chop_quantiles <- function( - x, - probs, - ..., - labels = if (raw) lbl_intervals() else - lbl_intervals(single = NULL), - left = is.numeric(x), - raw = FALSE, - weights = NULL - ) { - chop(x, brk_quantiles(probs, weights = weights), labels = labels, ..., - left = left, raw = raw) -} - - -#' @rdname chop_quantiles -#' @export -#' @order 1 -chop_deciles <- function(x, ...) { - chop_quantiles(x, 0:10/10, ...) -} - - -#' Chop equal-sized groups -#' -#' `chop_equally()` chops `x` into groups with an equal number of elements. -#' -#' @param groups Number of groups. -#' @inheritParams chop -#' @inherit chop-doc params return -#' -#' @details -#' `chop_equally()` uses [brk_quantiles()] under the hood. If `x` has duplicate -#' elements, you may get fewer `groups` than requested. If so, a warning will -#' be emitted. See the examples. -#' -#' -#' -#' @family chopping functions -#' -#' @export -#' @order 1 -#' @examples -#' chop_equally(1:10, 5) -#' -#' # You can't always guarantee `groups` groups: -#' dupes <- c(1, 1, 1, 2, 3, 4, 4, 4) -#' quantile(dupes, 0:4/4) -#' chop_equally(dupes, 4) -chop_equally <- function ( - x, - groups, - ..., - labels = lbl_intervals(), - left = is.numeric(x), - close_end = TRUE, - raw = TRUE - ) { - chop(x, brk_equally(groups), ..., labels = labels, left = left, - close_end = close_end, raw = raw) -} - - -#' Chop by standard deviations -#' -#' Intervals are measured in standard deviations on either side of the -#' mean. -#' -#' In version 0.7.0, these functions changed to specifying `sds` as a vector. -#' To chop 1, 2 and 3 standard deviations around the mean, write -#' `chop_mean_sd(x, sds = 1:3)` instead of `chop_mean_sd(x, sd = 3)`. -#' -#' @param sds Positive numeric vector of standard deviations. -#' @param sd `r lifecycle::badge("deprecated")` -#' -#' @inheritParams chop -#' @inherit chop-doc params return -#' -#' @family chopping functions -#' -#' @export -#' @order 1 -#' -#' @examples -#' chop_mean_sd(1:10) -#' -#' chop(1:10, brk_mean_sd()) -#' -#' @importFrom lifecycle deprecated -chop_mean_sd <- function ( - x, - sds = 1:3, - ..., - raw = FALSE, - sd = deprecated() - ) { - chop(x, brk_mean_sd(sds = sds, sd = sd), ..., raw = raw) -} - - -#' Chop using pretty breakpoints -#' -#' `chop_pretty()` uses [base::pretty()] to calculate breakpoints -#' which are 1, 2 or 5 times a power of 10. These look nice in graphs. -#' -#' [base::pretty()] tries to return `n+1` breakpoints, i.e. `n` intervals, but -#' note that this is not guaranteed. There are methods for Date and POSIXct -#' objects. -#' -#' For fine-grained control over [base::pretty()] parameters, use -#' `chop(x, brk_pretty(...))`. -#' -#' @inheritParams chop -#' @inherit chop-doc params return -#' @param n Positive integer passed to [base::pretty()]. How many intervals to chop into? -#' @param ... Passed to [chop()] by `chop_pretty()` and `tab_pretty()`; passed -#' to [base::pretty()] by `brk_pretty()`. -#' -#' @export -#' @order 1 -#' -#' @examples -#' chop_pretty(1:10) -#' -#' chop(1:10, brk_pretty(n = 5, high.u.bias = 0)) -#' -chop_pretty <- function (x, n = 5, ...) { - chop(x, brk_pretty(n = n), ...) -} - - -#' Chop into fixed-width intervals -#' -#' `chop_width()` chops `x` into intervals of fixed `width`. -#' -#' @param width Width of intervals. -#' @param start Starting point for intervals. By default the smallest -#' finite `x` (largest if `width` is negative). -#' @inheritParams chop -#' @inherit chop-doc params return -#' -#' @details -#' If `width` is negative, `chop_width()` sets `left = FALSE` and intervals will -#' go downwards from `start`. -#' -#' @family chopping functions -#' @seealso [brk_width-for-datetime] -#' -#' @export -#' @order 1 -#' -#' @examples -#' chop_width(1:10, 2) -#' -#' chop_width(1:10, 2, start = 0) -#' -#' chop_width(1:9, -2) -#' -#' chop(1:10, brk_width(2, 0)) -#' -chop_width <- function ( - x, - width, - start, - ..., - left = sign(width) > 0 - ) { - chop(x, brk_width(width, start), ..., left = left) -} - - -#' Chop into equal-width intervals -#' -#' `chop_evenly()` chops `x` into `intervals` intervals of equal width. -#' -#' @param intervals Integer: number of intervals to create. -#' @inheritParams chop -#' @inherit chop-doc params return -#' -#' @details `chop_evenly()` sets `close_end = TRUE` by default. -#' -#' @family chopping functions -#' -#' @export -#' @order 1 -#' @examples -#' chop_evenly(0:10, 5) -#' -chop_evenly <- function ( - x, - intervals, - ..., - close_end = TRUE - ) { - chop(x, brk_evenly(intervals), ..., close_end = close_end) -} - - -#' Chop into proportions of the range of x -#' -#' `chop_proportions()` chops `x` into `proportions` of its range, excluding -#' infinite values. -#' -#' By default, labels show the raw numeric endpoints. To label intervals by -#' the proportions, use `raw = FALSE`. -#' -#' @param proportions Numeric vector between 0 and 1: proportions of x's range. -#' If `proportions` has names, these will be used for labels. -#' @inheritParams chop -#' @inherit chop-doc params return -#' -#' @export -#' @order 1 -#' @family chopping functions -#' @examples -#' chop_proportions(0:10, c(0.2, 0.8)) -#' chop_proportions(0:10, c(Low = 0, Mid = 0.2, High = 0.8)) -#' -chop_proportions <- function ( - x, - proportions, - ..., - raw = TRUE - ) { - chop(x, brk_proportions(proportions), ..., raw = raw) -} - -#' Chop into fixed-sized groups -#' -#' `chop_n()` creates intervals containing a fixed number of elements. -#' -#' @param n Integer. Number of elements in each interval. -#' @inheritParams chop -#' @param tail String. What to do if the final interval has fewer than `n` elements? -#' `"split"` to keep it separate. `"merge"` to merge it with the neighbouring -#' interval. -#' @inherit chop-doc params return -#' -#' -#' @details -#' -#' The algorithm guarantees that intervals contain no more than `n` elements, so -#' long as there are no duplicates in `x` and `tail = "split"`. It also -#' guarantees that intervals contain no fewer than `n` elements, except possibly -#' the last interval (or first interval if `left` is `FALSE`). -#' -#' To ensure that all intervals contain at least `n` elements (so long as there -#' are at least `n` elements in `x`!) set `tail = "merge"`. -#' -#' If `tail = "split"` and there are intervals containing duplicates with more -#' than `n` elements, a warning is given. -#' -#' @export -#' @order 1 -#' @family chopping functions -#' @examples -#' chop_n(1:10, 5) -#' -#' chop_n(1:5, 2) -#' chop_n(1:5, 2, tail = "merge") -#' -#' # too many duplicates -#' x <- rep(1:2, each = 3) -#' chop_n(x, 2) -#' -chop_n <- function ( - x, - n, - ..., - close_end = TRUE, - tail = "split" - ) { - res <- chop(x, brk_n(n, tail = tail), ..., close_end = close_end) - if (tail == "split" && max(tabulate(res)) > n) { - warning("Some intervals contain more than ", n, " elements") - } - - res -} - -#' Chop using an existing function -#' -#' `chop_fn()` is a convenience wrapper: `chop_fn(x, foo, ...)` -#' is the same as `chop(x, foo(x, ...))`. -#' -#' @param fn A function which returns a numeric vector of breaks. -#' @param ... Further arguments to `fn` -#' @inheritParams chop -#' @inherit chop-doc params return -#' -#' @export -#' @order 1 -#' @family chopping functions -#' @examples -#' -#' if (requireNamespace("scales")) { -#' chop_fn(rlnorm(10), scales::breaks_log(5)) -#' # same as -#' # x <- rlnorm(10) -#' # chop(x, scales::breaks_log(5)(x)) -#' } -#' -chop_fn <- function ( - x, - fn, - ..., - extend = NULL, - left = TRUE, - close_end = TRUE, - raw = NULL, - drop = TRUE -) { - chop(x, brk_fn(fn, ...), extend = extend, left = left, close_end = close_end, - raw = raw, drop = drop) -} - - -#' Chop common values into separate categories -#' -#' `chop_spikes()` lets you isolate common values of `x` in their own -#' singleton intervals. This can help make unusual values visible. -#' -#' This function is `r lifecycle::badge("experimental")`. -#' -#' @param breaks A numeric vector of cut-points or a call to a `brk_*` function. -#' The resulting [`breaks`][breaks-class] object will be modified to add -#' singleton breaks. -#' @param n,prop Scalar. Provide either `n`, a number of values, or `prop`, -#' a proportion of `length(x)`. Values of `x` which occur at least this -#' often will get their own singleton break. -#' @inheritParams chop -#' @inherit chop-doc params return -#' -#' @export -#' @order 1 -#' @family chopping functions -#' @seealso [isolate_chop()] for a different approach. -#' @examples -#' x <- c(1:4, rep(5, 5), 6:10) -#' chop_spikes(x, c(2, 7), n = 5) -#' chop_spikes(x, c(2, 7), prop = 0.25) -#' chop_spikes(x, brk_width(5), n = 5) -#' -#' set.seed(42) -#' x <- runif(40, 0, 10) -#' x <- sample(x, 200, replace = TRUE) -#' tab_spikes(x, brk_width(2, 0), prop = 0.05) -chop_spikes <- function ( - x, - breaks, - n = NULL, - prop = NULL, - ... -) { - chop(x, brk_spikes(breaks, n = n, prop = prop), ...) -} - - -#' Cut data into intervals, isolating common elements -#' -#' Sometimes it's useful to separate out common elements of `x`. -#' `isolate_chop()` first chops `x`, then puts common elements of `x` ("spikes") -#' into separate categories. -#' -#' Unlike [chop_spikes()], `isolate_chop()` doesn't break up -#' intervals which contain a spike. As a result, unlike other `chop_*` functions, -#' `isolate_chop()` does not typically chop `x` into disjoint intervals. See -#' the examples. -#' -#' If breaks are data-dependent, their labels may be misleading after common -#' elements have been removed. See the example below. -#' -#' Levels of the result are ordered by the minimum element in each level. As -#' a result, if `drop = FALSE`, empty levels will be placed last. -#' -#' This function is `r lifecycle::badge("experimental")`. -#' -#' @param x,breaks,... Passed to [chop()]. -#' @inheritParams chop_spikes -#' @param spike_labels Glue string for spike labels. Use `"{l}"` for the spike -#' value. -#' -#' @return -#' The result of [chop()], but with common values given their own factor levels. -#' -#' @seealso [chop_spikes()] for a different approach. -#' @export -#' -#' @examples -#' x <- c(2, 3, 3, 3, 4) -#' isolate_chop(x, c(2, 4), n = 3) -#' isolate_chop(x, brk_width(2), prop = 0.5) -#' -#' set.seed(42) -#' x <- runif(40, 0, 10) -#' x <- sample(x, 200, replace = TRUE) -#' # Compare: -#' table(isolate_chop(x, brk_width(2, 0), prop = 0.05)) -#' # Versus: -#' tab_spikes(x, brk_width(2, 0), prop = 0.05) -#' -#' # Misleading data-dependent breaks: -#' set.seed(42) -#' x <- rnorm(99) -#' x[1:10] <- x[1] -#' tab_quantiles(x, 1:2/3) -#' table(isolate_chop(x, brk_quantiles(1:2/3), prop = 0.1)) -isolate_chop <- function (x, - breaks, - ..., - n = NULL, - prop = NULL, - spike_labels = "{{{l}}}") { - assert_that( - is.number(n) || is.number(prop), - is.null(n) || is.null(prop), - msg = "exactly one of `n` and `prop` must be specified as a scalar numeric" - ) - - chopped <- chop(x, breaks, ...) - - spikes <- find_spikes(x, n, prop) - - elabels <- endpoint_labels(spikes, raw = TRUE) - glue_env <- new.env() - assign("l", elabels, envir = glue_env) - spike_labels <- glue::glue(spike_labels, .envir = glue_env) - - new_levels <- c(levels(chopped), spike_labels) - levels(chopped) <- new_levels - - x_spikes <- match(x, spikes) - is_spike <- ! is.na(x_spikes) - x_spikes <- x_spikes[is_spike] - chopped[is_spike] <- spike_labels[x_spikes] - - # We reorder the levels of chopped in order of their smallest elements. - # Note that if `drop = FALSE`, empty intervals will be at the end. - # The alternative would be to call `breaks` again and get the left endpoints - # but this is complex. - chopped <- stats::reorder(chopped, x, FUN = quiet_min) - attr(chopped, "scores") <- NULL # remove leftover from reorder() - - chopped -} diff --git a/R/labels-glue.R b/R/labels-glue.R new file mode 100644 index 0000000..23e409a --- /dev/null +++ b/R/labels-glue.R @@ -0,0 +1,136 @@ + +#' Label chopped intervals using the `glue` package +#' +#' Use `"{l}"` and `"{r}"` to show the left and right endpoints of the intervals. +#' +#' @inherit label-doc +#' @inherit first-last-doc params +#' @param label A glue string passed to [glue::glue()]. +#' @param ... Further arguments passed to [glue::glue()]. +#' +#' @details +#' +#' The following variables are available in the glue string: +#' +#' * `l` is a character vector of left endpoints of intervals. +#' * `r` is a character vector of right endpoints of intervals. +#' * `l_closed` is a logical vector. Elements are `TRUE` when the left +#' endpoint is closed. +#' * `r_closed` is a logical vector, `TRUE` when the right endpoint is closed. +#' +#' Endpoints will be formatted by `fmt` before being passed to `glue()`. +#' +#' @family labelling functions +#' +#' @export +#' +#' @examples +#' tab(1:10, c(1, 3, 3, 7), +#' labels = lbl_glue("{l} to {r}", single = "Exactly {l}")) +#' +#' tab(1:10 * 1000, c(1, 3, 5, 7) * 1000, +#' labels = lbl_glue("{l}-{r}", +#' fmt = function(x) prettyNum(x, big.mark=','))) +#' +#' # reproducing lbl_intervals(): +#' interval_left <- "{ifelse(l_closed, '[', '(')}" +#' interval_right <- "{ifelse(r_closed, ']', ')')}" +#' glue_string <- paste0(interval_left, "{l}", ", ", "{r}", interval_right) +#' tab(1:10, c(1, 3, 3, 7), labels = lbl_glue(glue_string, single = "{{{l}}}")) +#' +lbl_glue <- function ( + label, + fmt = NULL, + single = NULL, + first = NULL, + last = NULL, + raw = FALSE, + ... + ) { + assert_that( + is.string(label), + is.null(fmt) || is_format(fmt), + is.string(first) || is.null(first), + is.string(last) || is.null(last), + is.flag(raw) + ) + + if (! isFALSE(raw)) { + lifecycle::deprecate_warn("0.9.0", "lbl_glue(raw)", "chop(raw)") + } + + RAW <- raw # avoid "recursive default argument reference" + function (breaks, raw = RAW) { + assert_that(is.breaks(breaks)) + + len_breaks <- length(breaks) + + labels <- character(len_breaks - 1) + + elabels <- endpoint_labels(breaks, raw = raw, fmt = fmt) + + l <- elabels[-len_breaks] + r <- elabels[-1] + + left <- attr(breaks, "left") + # Breaks like [1, 2) [2, 3] have + # left TRUE, TRUE, FALSE for breaks 1,2,3 + # The first two TRUEs say that the left brackets are closed + # The last two TRUE & FALSE say that the right brackets are open + # and closed respectively. So: + l_closed <- left[-len_breaks] + r_closed <- ! left[-1] + + # check ... for anything not in glue::glue args + # effectively, we move any user-supplied arguments into + # an environment specifically for glue + # this is mostly to make the lbl_midpoints() hack + # of passing in `m` work + dots <- rlang::enexprs(...) + glue_env <- new.env() + not_glue_args <- setdiff(names(dots), names(formals(glue::glue))) + for (nm in not_glue_args) { + assign(deparse(dots[[nm]]), + eval(dots[[nm]], parent.frame()), + glue_env + ) + } + labels <- glue::glue(label, l = l, r = r, l_closed = l_closed, + r_closed = r_closed, ..., .envir = glue_env) + + if (! is.null(single)) { + # which breaks are singletons? + singletons <- singletons(breaks) + + labels[singletons] <- glue::glue(single, + l = l[singletons], + r = r[singletons], + l_closed = l_closed[singletons], + r_closed = r_closed[singletons], + ..., + .envir = glue_env + ) + } + + if (! is.null(first)) { + labels[1] <- glue::glue(first, l = l[1], r = r[1], + l_closed = l_closed[1], + r_closed = r_closed[1], + ..., + .envir = glue_env + ) + } + + if (! is.null(last)) { + ll <- len_breaks - 1 + labels[ll] <- glue::glue(last, l = l[ll], r = r[ll], + l_closed = l_closed[ll], + r_closed = r_closed[ll], + ..., + .envir = glue_env + ) + } + + return(labels) + } +} \ No newline at end of file diff --git a/R/labels-single.R b/R/labels-single.R new file mode 100644 index 0000000..690b67a --- /dev/null +++ b/R/labels-single.R @@ -0,0 +1,207 @@ + +#' Label chopped intervals by their midpoints +#' +#' This uses the midpoint of each interval for +#' its label. +#' +#' @inherit label-doc +#' @inherit first-last-doc +#' +#' @family labelling functions +#' +#' @export +#' +#' @examples +#' chop(1:10, c(2, 5, 8), lbl_midpoints()) +lbl_midpoints <- function ( + fmt = NULL, + single = NULL, + first = NULL, + last = NULL, + raw = FALSE + ) { + if (! isFALSE(raw)) { + lifecycle::deprecate_warn("0.9.0", "lbl_midpoints(raw)", "chop(raw)") + } + + RAW <- raw # avoid "recursive default argument reference" + function (breaks, raw = RAW) { + assert_that(is.breaks(breaks)) + + break_nums <- scaled_endpoints(breaks, raw = raw) + l_nums <- break_nums[-length(break_nums)] + r_nums <- break_nums[-1] + # doing this, rather than (l_nums + r_nums)/2, works for e.g. Date objects: + midpoints <- l_nums + (r_nums - l_nums)/2 + + # we've applied raw already (anyway, midpoints is just a numeric) + midpoints <- endpoint_labels(midpoints, raw = TRUE, fmt = fmt) + + gluer <- lbl_glue(label = "{m}", fmt = fmt, single = single, first = first, + last = last, m = midpoints) + labels <- gluer(breaks, raw = raw) + + labels + } +} + + +#' Label chopped intervals by their left or right endpoints +#' +#' This is useful when the left endpoint unambiguously indicates the +#' interval. In other cases it may give errors due to duplicate labels. +#' +#' `lbl_endpoint()` is `r lifecycle::badge("defunct")` and gives an +#' error since santoku 1.0.0. +#' +#' @inherit label-doc +#' @inherit first-last-doc +#' @param left Flag. Use left endpoint or right endpoint? +#' +#' @family labelling functions +#' +#' @export +#' +#' @examples +#' chop(1:10, c(2, 5, 8), lbl_endpoints(left = TRUE)) +#' chop(1:10, c(2, 5, 8), lbl_endpoints(left = FALSE)) +#' if (requireNamespace("lubridate")) { +#' tab_width( +#' as.Date("2000-01-01") + 0:365, +#' months(1), +#' labels = lbl_endpoints(fmt = "%b") +#' ) +#' } +#' +#' \dontrun{ +#' # This gives breaks `[1, 2) [2, 3) {3}` which lead to +#' # duplicate labels `"2", "3", "3"`: +#' chop(1:3, 1:3, lbl_endpoints(left = FALSE)) +#' } +lbl_endpoints <- function ( + left = TRUE, + fmt = NULL, + single = NULL, + first = NULL, + last = NULL, + raw = FALSE + ) { + assert_that(is.flag(left)) + + if (! isFALSE(raw)) { + lifecycle::deprecate_warn("0.9.0", "lbl_endpoints(raw)", "chop(raw)") + } + + label <- if (left) "{l}" else "{r}" + lbl_glue(label, fmt = fmt, single = single, first = first, last = last, + raw = raw) +} + + +#' @rdname lbl_endpoints +#' @export +lbl_endpoint <- function ( + fmt = NULL, + raw = FALSE, + left = TRUE + ) { + lifecycle::deprecate_stop(when = "0.8.0", what = "lbl_endpoint()", + with = "lbl_endpoints()") +} + + +#' Label chopped intervals in sequence +#' +#' `lbl_seq()` labels intervals sequentially, using numbers or letters. +#' +#' @param start String. A template for the sequence. See below. +#' +#' @details +#'`start` shows the first element of the sequence. It must contain exactly *one* +#' character out of the set "a", "A", "i", "I" or "1". For later elements: +#' +#' * "a" will be replaced by "a", "b", "c", ... +#' * "A" will be replaced by "A", "B", "C", ... +#' * "i" will be replaced by lower-case Roman numerals "i", "ii", "iii", ... +#' * "I" will be replaced by upper-case Roman numerals "I", "II", "III", ... +#' * "1" will be replaced by numbers "1", "2", "3", ... +#' +#' Other characters will be retained as-is. +#' +#' @family labelling functions +#' @inherit label-doc return +#' +#' @export +#' +#' @examples +#' chop(1:10, c(2, 5, 8), lbl_seq()) +#' +#' chop(1:10, c(2, 5, 8), lbl_seq("i.")) +#' +#' chop(1:10, c(2, 5, 8), lbl_seq("(A)")) +lbl_seq <- function(start = "a") { + assert_that(is.string(start)) + # check like contains just one of a, A, i, I, 1 + match <- gregexpr("(a|A|i|I|1)", start)[[1]] + if (length(match) > 1) stop("More than one a/A/i/I/1 found in `start`: ", start) + if (match == -1) stop("No a/A/i/I/1 found in `start`: ", start) + # replace that with the format-string and call lbl_manual appropriately + key <- substr(start, match, match) + fmt <- sub("(a|A|i|I|1)", "%s", start) + + res <- switch(key, + "a" = function (breaks, raw = NULL) { + if (length(breaks) > 27L) { + stop("Can't use more than 26 intervals with lbl_seq(\"a\")") + } + sprintf(fmt, letters[seq_len(length(breaks) - 1L)]) + }, + "A" = function (breaks, raw = NULL) { + if (length(breaks) > 27L) { + stop("Can't use more than 26 intervals with lbl_seq(\"A\")") + } + sprintf(fmt, LETTERS[seq_len(length(breaks) - 1L)]) + }, + "i" = function (breaks, raw = NULL) { + sprintf(fmt, tolower(utils::as.roman(seq_len(length(breaks) - 1L)))) + }, + "I" = function (breaks, raw = NULL) { + sprintf(fmt, utils::as.roman(seq_len(length(breaks) - 1L))) + }, + "1" = function (breaks, raw = NULL) { + sprintf(fmt, seq_len(length(breaks) - 1L)) + } + ) + + return(res) +} + + +#' Defunct: label chopped intervals in a user-defined sequence +#' +#' `r lifecycle::badge("defunct")` +#' +#' `lbl_manual()` is defunct since santoku 1.0.0. It is little used and is not +#' closely related to the rest of the package. It also risks mislabelling +#' intervals, e.g. if intervals are extended. Use of `lbl_manual()` will give +#' an error. +#' +#' @param sequence A character vector of labels. +#' @inherit label-doc +#' +#' @family labelling functions +#' +#' @export +#' +#' @keywords internal +#' +#' @examples +#' \dontrun{ +#' chop(1:10, c(2, 5, 8), lbl_manual(c("w", "x", "y", "z"))) +#' # -> +#' chop(1:10, c(2, 5, 8), labels = c("w", "x", "y", "z")) +#' } +lbl_manual <- function (sequence, fmt = "%s") { + lifecycle::deprecate_stop("0.9.0", "lbl_manual()", + details = "Just specify `labels = sequence` instead.") +} \ No newline at end of file diff --git a/R/labels.R b/R/labels.R index 6c6e8ef..02031e8 100644 --- a/R/labels.R +++ b/R/labels.R @@ -84,296 +84,6 @@ lbl_intervals <- function ( } -#' Label chopped intervals like 1-4, 4-5, ... -#' -#' This label style is user-friendly, but doesn't distinguish between -#' left- and right-closed intervals. It's good for continuous data -#' where you don't expect points to be exactly on the breaks. -#' -#' If you don't want unicode output, use `lbl_dash("-")`. -#' -#' @inherit label-doc -#' @inherit first-last-doc -#' -#' @family labelling functions -#' -#' @export -#' -#' @examples -#' chop(1:10, c(2, 5, 8), lbl_dash()) -#' -#' chop(1:10, c(2, 5, 8), lbl_dash(" to ", fmt = "%.1f")) -#' -#' chop(1:10, c(2, 5, 8), lbl_dash(first = "<{r}")) -#' -#' pretty <- function (x) prettyNum(x, big.mark = ",", digits = 1) -#' chop(runif(10) * 10000, c(3000, 7000), lbl_dash(" to ", fmt = pretty)) -lbl_dash <- function ( - symbol = em_dash(), - fmt = NULL, - single = "{l}", - first = NULL, - last = NULL, - raw = FALSE - ) { - if (! isFALSE(raw)) { - lifecycle::deprecate_warn("0.9.0", "lbl_dash(raw)", "chop(raw)") - } - - label_glue <- paste0("{l}", symbol, "{r}") - lbl_glue(label = label_glue, fmt = fmt, single = single, first = first, - last = last, raw = raw) -} - - -#' Label chopped intervals by their midpoints -#' -#' This uses the midpoint of each interval for -#' its label. -#' -#' @inherit label-doc -#' @inherit first-last-doc -#' -#' @family labelling functions -#' -#' @export -#' -#' @examples -#' chop(1:10, c(2, 5, 8), lbl_midpoints()) -lbl_midpoints <- function ( - fmt = NULL, - single = NULL, - first = NULL, - last = NULL, - raw = FALSE - ) { - if (! isFALSE(raw)) { - lifecycle::deprecate_warn("0.9.0", "lbl_midpoints(raw)", "chop(raw)") - } - - RAW <- raw # avoid "recursive default argument reference" - function (breaks, raw = RAW) { - assert_that(is.breaks(breaks)) - - break_nums <- scaled_endpoints(breaks, raw = raw) - l_nums <- break_nums[-length(break_nums)] - r_nums <- break_nums[-1] - # doing this, rather than (l_nums + r_nums)/2, works for e.g. Date objects: - midpoints <- l_nums + (r_nums - l_nums)/2 - - # we've applied raw already (anyway, midpoints is just a numeric) - midpoints <- endpoint_labels(midpoints, raw = TRUE, fmt = fmt) - - gluer <- lbl_glue(label = "{m}", fmt = fmt, single = single, first = first, - last = last, m = midpoints) - labels <- gluer(breaks, raw = raw) - - labels - } -} - - -#' Label chopped intervals using the `glue` package -#' -#' Use `"{l}"` and `"{r}"` to show the left and right endpoints of the intervals. -#' -#' @inherit label-doc -#' @inherit first-last-doc params -#' @param label A glue string passed to [glue::glue()]. -#' @param ... Further arguments passed to [glue::glue()]. -#' -#' @details -#' -#' The following variables are available in the glue string: -#' -#' * `l` is a character vector of left endpoints of intervals. -#' * `r` is a character vector of right endpoints of intervals. -#' * `l_closed` is a logical vector. Elements are `TRUE` when the left -#' endpoint is closed. -#' * `r_closed` is a logical vector, `TRUE` when the right endpoint is closed. -#' -#' Endpoints will be formatted by `fmt` before being passed to `glue()`. -#' -#' @family labelling functions -#' -#' @export -#' -#' @examples -#' tab(1:10, c(1, 3, 3, 7), -#' labels = lbl_glue("{l} to {r}", single = "Exactly {l}")) -#' -#' tab(1:10 * 1000, c(1, 3, 5, 7) * 1000, -#' labels = lbl_glue("{l}-{r}", -#' fmt = function(x) prettyNum(x, big.mark=','))) -#' -#' # reproducing lbl_intervals(): -#' interval_left <- "{ifelse(l_closed, '[', '(')}" -#' interval_right <- "{ifelse(r_closed, ']', ')')}" -#' glue_string <- paste0(interval_left, "{l}", ", ", "{r}", interval_right) -#' tab(1:10, c(1, 3, 3, 7), labels = lbl_glue(glue_string, single = "{{{l}}}")) -#' -lbl_glue <- function ( - label, - fmt = NULL, - single = NULL, - first = NULL, - last = NULL, - raw = FALSE, - ... - ) { - assert_that( - is.string(label), - is.null(fmt) || is_format(fmt), - is.string(first) || is.null(first), - is.string(last) || is.null(last), - is.flag(raw) - ) - - if (! isFALSE(raw)) { - lifecycle::deprecate_warn("0.9.0", "lbl_glue(raw)", "chop(raw)") - } - - RAW <- raw # avoid "recursive default argument reference" - function (breaks, raw = RAW) { - assert_that(is.breaks(breaks)) - - len_breaks <- length(breaks) - - labels <- character(len_breaks - 1) - - elabels <- endpoint_labels(breaks, raw = raw, fmt = fmt) - - l <- elabels[-len_breaks] - r <- elabels[-1] - - left <- attr(breaks, "left") - # Breaks like [1, 2) [2, 3] have - # left TRUE, TRUE, FALSE for breaks 1,2,3 - # The first two TRUEs say that the left brackets are closed - # The last two TRUE & FALSE say that the right brackets are open - # and closed respectively. So: - l_closed <- left[-len_breaks] - r_closed <- ! left[-1] - - # check ... for anything not in glue::glue args - # effectively, we move any user-supplied arguments into - # an environment specifically for glue - # this is mostly to make the lbl_midpoints() hack - # of passing in `m` work - dots <- rlang::enexprs(...) - glue_env <- new.env() - not_glue_args <- setdiff(names(dots), names(formals(glue::glue))) - for (nm in not_glue_args) { - assign(deparse(dots[[nm]]), - eval(dots[[nm]], parent.frame()), - glue_env - ) - } - labels <- glue::glue(label, l = l, r = r, l_closed = l_closed, - r_closed = r_closed, ..., .envir = glue_env) - - if (! is.null(single)) { - # which breaks are singletons? - singletons <- singletons(breaks) - - labels[singletons] <- glue::glue(single, - l = l[singletons], - r = r[singletons], - l_closed = l_closed[singletons], - r_closed = r_closed[singletons], - ..., - .envir = glue_env - ) - } - - if (! is.null(first)) { - labels[1] <- glue::glue(first, l = l[1], r = r[1], - l_closed = l_closed[1], - r_closed = r_closed[1], - ..., - .envir = glue_env - ) - } - - if (! is.null(last)) { - ll <- len_breaks - 1 - labels[ll] <- glue::glue(last, l = l[ll], r = r[ll], - l_closed = l_closed[ll], - r_closed = r_closed[ll], - ..., - .envir = glue_env - ) - } - - return(labels) - } -} - - -#' Label chopped intervals by their left or right endpoints -#' -#' This is useful when the left endpoint unambiguously indicates the -#' interval. In other cases it may give errors due to duplicate labels. -#' -#' `lbl_endpoint()` is `r lifecycle::badge("defunct")` and gives an -#' error since santoku 1.0.0. -#' -#' @inherit label-doc -#' @inherit first-last-doc -#' @param left Flag. Use left endpoint or right endpoint? -#' -#' @family labelling functions -#' -#' @export -#' -#' @examples -#' chop(1:10, c(2, 5, 8), lbl_endpoints(left = TRUE)) -#' chop(1:10, c(2, 5, 8), lbl_endpoints(left = FALSE)) -#' if (requireNamespace("lubridate")) { -#' tab_width( -#' as.Date("2000-01-01") + 0:365, -#' months(1), -#' labels = lbl_endpoints(fmt = "%b") -#' ) -#' } -#' -#' \dontrun{ -#' # This gives breaks `[1, 2) [2, 3) {3}` which lead to -#' # duplicate labels `"2", "3", "3"`: -#' chop(1:3, 1:3, lbl_endpoints(left = FALSE)) -#' } -lbl_endpoints <- function ( - left = TRUE, - fmt = NULL, - single = NULL, - first = NULL, - last = NULL, - raw = FALSE - ) { - assert_that(is.flag(left)) - - if (! isFALSE(raw)) { - lifecycle::deprecate_warn("0.9.0", "lbl_endpoints(raw)", "chop(raw)") - } - - label <- if (left) "{l}" else "{r}" - lbl_glue(label, fmt = fmt, single = single, first = first, last = last, - raw = raw) -} - - -#' @rdname lbl_endpoints -#' @export -lbl_endpoint <- function ( - fmt = NULL, - raw = FALSE, - left = TRUE - ) { - lifecycle::deprecate_stop(when = "0.8.0", what = "lbl_endpoint()", - with = "lbl_endpoints()") -} - - #' Label discrete data #' #' `lbl_discrete()` creates labels for discrete data, such as integers. @@ -408,7 +118,7 @@ lbl_endpoint <- function ( #' lbl_discrete <- function ( symbol = em_dash(), - unit = 1, + unit = 1L, fmt = NULL, single = NULL, first = NULL, @@ -483,98 +193,43 @@ lbl_discrete <- function ( } -#' Label chopped intervals in sequence -#' -#' `lbl_seq()` labels intervals sequentially, using numbers or letters. -#' -#' @param start String. A template for the sequence. See below. +#' Label chopped intervals like 1-4, 4-5, ... #' -#' @details -#'`start` shows the first element of the sequence. It must contain exactly *one* -#' character out of the set "a", "A", "i", "I" or "1". For later elements: +#' This label style is user-friendly, but doesn't distinguish between +#' left- and right-closed intervals. It's good for continuous data +#' where you don't expect points to be exactly on the breaks. #' -#' * "a" will be replaced by "a", "b", "c", ... -#' * "A" will be replaced by "A", "B", "C", ... -#' * "i" will be replaced by lower-case Roman numerals "i", "ii", "iii", ... -#' * "I" will be replaced by upper-case Roman numerals "I", "II", "III", ... -#' * "1" will be replaced by numbers "1", "2", "3", ... +#' If you don't want unicode output, use `lbl_dash("-")`. #' -#' Other characters will be retained as-is. +#' @inherit label-doc +#' @inherit first-last-doc #' #' @family labelling functions -#' @inherit label-doc return #' #' @export #' #' @examples -#' chop(1:10, c(2, 5, 8), lbl_seq()) -#' -#' chop(1:10, c(2, 5, 8), lbl_seq("i.")) -#' -#' chop(1:10, c(2, 5, 8), lbl_seq("(A)")) -lbl_seq <- function(start = "a") { - assert_that(is.string(start)) - # check like contains just one of a, A, i, I, 1 - match <- gregexpr("(a|A|i|I|1)", start)[[1]] - if (length(match) > 1) stop("More than one a/A/i/I/1 found in `start`: ", start) - if (match == -1) stop("No a/A/i/I/1 found in `start`: ", start) - # replace that with the format-string and call lbl_manual appropriately - key <- substr(start, match, match) - fmt <- sub("(a|A|i|I|1)", "%s", start) - - res <- switch(key, - "a" = function (breaks, raw = NULL) { - if (length(breaks) > 27L) { - stop("Can't use more than 26 intervals with lbl_seq(\"a\")") - } - sprintf(fmt, letters[seq_len(length(breaks) - 1L)]) - }, - "A" = function (breaks, raw = NULL) { - if (length(breaks) > 27L) { - stop("Can't use more than 26 intervals with lbl_seq(\"A\")") - } - sprintf(fmt, LETTERS[seq_len(length(breaks) - 1L)]) - }, - "i" = function (breaks, raw = NULL) { - sprintf(fmt, tolower(utils::as.roman(seq_len(length(breaks) - 1L)))) - }, - "I" = function (breaks, raw = NULL) { - sprintf(fmt, utils::as.roman(seq_len(length(breaks) - 1L))) - }, - "1" = function (breaks, raw = NULL) { - sprintf(fmt, seq_len(length(breaks) - 1L)) - } - ) - - return(res) -} - - -#' Defunct: label chopped intervals in a user-defined sequence -#' -#' `r lifecycle::badge("defunct")` -#' -#' `lbl_manual()` is defunct since santoku 1.0.0. It is little used and is not -#' closely related to the rest of the package. It also risks mislabelling -#' intervals, e.g. if intervals are extended. Use of `lbl_manual()` will give -#' an error. -#' -#' @param sequence A character vector of labels. -#' @inherit label-doc -#' -#' @family labelling functions +#' chop(1:10, c(2, 5, 8), lbl_dash()) #' -#' @export +#' chop(1:10, c(2, 5, 8), lbl_dash(" to ", fmt = "%.1f")) #' -#' @keywords internal +#' chop(1:10, c(2, 5, 8), lbl_dash(first = "<{r}")) #' -#' @examples -#' \dontrun{ -#' chop(1:10, c(2, 5, 8), lbl_manual(c("w", "x", "y", "z"))) -#' # -> -#' chop(1:10, c(2, 5, 8), labels = c("w", "x", "y", "z")) -#' } -lbl_manual <- function (sequence, fmt = "%s") { - lifecycle::deprecate_stop("0.9.0", "lbl_manual()", - details = "Just specify `labels = sequence` instead.") +#' pretty <- function (x) prettyNum(x, big.mark = ",", digits = 1) +#' chop(runif(10) * 10000, c(3000, 7000), lbl_dash(" to ", fmt = pretty)) +lbl_dash <- function ( + symbol = em_dash(), + fmt = NULL, + single = "{l}", + first = NULL, + last = NULL, + raw = FALSE + ) { + if (! isFALSE(raw)) { + lifecycle::deprecate_warn("0.9.0", "lbl_dash(raw)", "chop(raw)") + } + + label_glue <- paste0("{l}", symbol, "{r}") + lbl_glue(label = label_glue, fmt = fmt, single = single, first = first, + last = last, raw = raw) } \ No newline at end of file diff --git a/R/utils.R b/R/utils.R index 303693c..196d237 100644 --- a/R/utils.R +++ b/R/utils.R @@ -114,29 +114,6 @@ strict_as_numeric <- function (x) { } - -#' Find common elements in `x` -#' -#' @param x A vector -#' @param n Number of elements that counts as common. Specify exactly one of `n` -#' and `prop`. -#' @param prop Proportion of `length(x)` that counts as common -#' -#' @return The common elements, not necessarily in order. NA values are never -#' considered as common. -#' @noRd -find_spikes <- function (x, n, prop) { - n <- n %||% (length(x) * prop) - unique_x <- unique(x) - x_counts <- tabulate(match(x, unique_x)) - spikes <- unique_x[x_counts >= n] - spikes <- spikes[! is.na(spikes)] - - spikes -} - - - #' Test a break #' #' @param brk_fun A call to a `brk_` function