Skip to content

Commit

Permalink
Add exclude_spikes parameter to dissect().
Browse files Browse the repository at this point in the history
  • Loading branch information
hughjonesd committed Jun 20, 2024
1 parent 24fc62f commit 326a408
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 27 deletions.
53 changes: 35 additions & 18 deletions R/chop-isolates.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,20 @@ chop_spikes <- function (
}


#' Cut data into intervals, then separate out common values
#' Cut data into intervals, separating out common values
#'
#' Sometimes it's useful to separate out common elements of `x`.
#' `dissect()` first chops `x`, then puts common elements of `x` ("spikes")
#' `dissect()` chops `x`, but puts common elements of `x` ("spikes")
#' into separate categories.
#'
#' Unlike [chop_spikes()], `dissect()` doesn't break up
#' intervals which contain a spike. As a result, unlike other `chop_*` functions,
#' intervals which contain a spike. As a result, unlike `chop_*` functions,
#' `dissect()` does not chop `x` into disjoint intervals. See the examples.
#'
#' If breaks are data-dependent, their labels may be misleading after common
#' elements have been removed. See the example below.
#' elements have been removed. See the example below. To get round this,
#' set `exclude_spikes` to `TRUE`. Then breaks will be calculated after
#' removing spikes from the data.
#'
#' Levels of the result are ordered by the minimum element in each level. As
#' a result, if `drop = FALSE`, empty levels will be placed last.
Expand All @@ -60,8 +62,10 @@ chop_spikes <- function (
#'
#' @param x,breaks,... Passed to [chop()].
#' @inheritParams chop_spikes
#' @param spike_labels Glue string for spike labels. Use `"{l}"` for the spike
#' value.
#' @param spike_labels [Glue][glue::glue()] string for spike labels. Use `"{l}"`
#' for the spike value.
#' @param exclude_spikes Logical. Exclude spikes before chopping `x`? This
#' can affect the location of data-dependent breaks.
#'
#' @return
#' `dissect()` returns the result of [chop()], but with common values put into
Expand Down Expand Up @@ -89,28 +93,44 @@ chop_spikes <- function (
#' # Misleading data-dependent breaks:
#' set.seed(42)
#' x <- rnorm(99)
#' x[1:10] <- x[1]
#' x[1:9] <- x[1]
#' tab_quantiles(x, 1:2/3)
#' tab_dissect(x, brk_quantiles(1:2/3), prop = 0.1)
#' tab_dissect(x, brk_quantiles(1:2/3), n = 9)
#' # Calculate quantiles excluding spikes:
#' tab_dissect(x, brk_quantiles(1:2/3), n = 9, exclude_spikes = TRUE)
dissect <- function (x,
breaks,
...,
n = NULL,
prop = NULL,
spike_labels = "{{{l}}}") {
breaks,
...,
n = NULL,
prop = NULL,
spike_labels = "{{{l}}}",
exclude_spikes = FALSE) {
assert_that(
is.number(n) || is.number(prop),
is.null(n) || is.null(prop),
is.string(spike_labels),
is.flag(exclude_spikes),
msg = "exactly one of `n` and `prop` must be a scalar numeric"
)
assert_that(
# it's ok for one of these to be null
n >= 0 || prop >= 0
)

chopped <- chop(x, breaks, ...)

spikes <- find_spikes(x, n, prop)
x_spikes <- match(x, spikes)
is_spike <- ! is.na(x_spikes)
x_spikes <- x_spikes[is_spike]

if (exclude_spikes) {
x_not_spikes <- x[! is_spike]
chopped_not_spikes <- chop(x_not_spikes, breaks, ...)
chopped <- factor(rep(NA_integer_, length(x)),
levels = levels(chopped_not_spikes))
chopped[! is_spike] <- chopped_not_spikes
} else {
chopped <- chop(x, breaks, ...)
}

elabels <- endpoint_labels(spikes, raw = TRUE)
glue_env <- new.env()
Expand All @@ -120,9 +140,6 @@ dissect <- function (x,
new_levels <- c(levels(chopped), spike_labels)
levels(chopped) <- new_levels

x_spikes <- match(x, spikes)
is_spike <- ! is.na(x_spikes)
x_spikes <- x_spikes[is_spike]
chopped[is_spike] <- spike_labels[x_spikes]

# We reorder the levels of chopped in order of their smallest elements.
Expand Down
33 changes: 24 additions & 9 deletions man/dissect.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions tests/testthat/test-chop.R
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,15 @@ test_that("dissect", {

expect_silent(res2 <- dissect(x, breaks = c(2, 5), prop = 0.25))
expect_equivalent(res, res2)

x <- c(1, 2, 3, 4, 5, 5, 5, 5)
expect_silent(res3 <- dissect(x, breaks = brk_equally(2), n = 2,
exclude_spikes = TRUE))
expect_equivalent(
res3,
factor(c("[0%, 50%)", "[0%, 50%)", "[50%, 100%]", "[50%, 100%]",
rep("{5}", 4)))
)
})


Expand Down

0 comments on commit 326a408

Please sign in to comment.