Skip to content
This repository was archived by the owner on Jan 30, 2025. It is now read-only.

Commit

Permalink
Add credible intervals
Browse files Browse the repository at this point in the history
  • Loading branch information
nfrerebeau committed May 10, 2023
1 parent 04d982f commit 6ba601f
Show file tree
Hide file tree
Showing 12 changed files with 252 additions and 4 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ exportMethods(detect)
exportMethods(discard)
exportMethods(discard_cols)
exportMethods(discard_rows)
exportMethods(interval_credible)
exportMethods(interval_hdr)
exportMethods(jackknife)
exportMethods(keep)
exportMethods(keep_cols)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# arkhe 1.1.0.9000
## New classes and methods
* Add `math_lcm()` and `math_gcd()` to compute the lowest common multiple and the greatest common divisor.
* Add `interval_hdr()` and `interval_credible()` to compute the credible intervals.

# arkhe 1.1.0
## New classes and methods
Expand Down
46 changes: 46 additions & 0 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,52 @@ setGeneric(

# Statistics ===================================================================
## Interval --------------------------------------------------------------------
#' Highest Density Regions
#'
#' @param x A [`numeric`] vector giving the coordinates of the points where
#' the density is estimated.
#' @param y A [`numeric`] vector giving the estimated density values.
#' If `y` is missing and `x` is a `numeric` vector, density estimates will be
#' computed from `x`.
#' @param level A length-one [`numeric`] vector giving the confidence level.
#' @param ... Further arguments to be passed to [stats::density()].
#' @return
#' A three-columns `numeric` [`matrix`] giving the lower and upper boundaries
#' of the HPD interval and associated probabilities.
#' @references
#' Hyndman, R. J. (1996). Computing and graphing highest density regions.
#' *American Statistician*, 50: 120-126. \doi{10.2307/2684423}.
#' @example inst/examples/ex-interval.R
#' @author N. Frerebeau
#' @family summary statistics
#' @docType methods
#' @aliases interval_hdr-method
setGeneric(
name = "interval_hdr",
def = function(x, y, ...) standardGeneric("interval_hdr")
)

#' Bayesian Credible Interval
#'
#' Computes the shortest credible interval within which an unobserved parameter
#' value falls with a particular probability.
#' @param x A [`numeric`] vector.
#' @param level A length-one [`numeric`] vector giving the confidence level.
#' @param ... Currently not used.
#' @return
#' A three-columns `numeric` [`matrix`] giving the lower and upper boundaries
#' of the credible interval and associated probability.
#' @example inst/examples/ex-interval.R
#' @author N. Frerebeau
#' @family summary statistics
#' @docType methods
#' @aliases interval_credible-method
setGeneric(
name = "interval_credible",
def = function(x, ...) standardGeneric("interval_credible")
)

## Confidence ------------------------------------------------------------------
#' Confidence Interval for a Mean
#'
#' Computes a confidence interval for a mean at a desired level of significance.
Expand Down
73 changes: 72 additions & 1 deletion R/statistics.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,78 @@
#' @include AllGenerics.R
NULL

# Interval =====================================================================
# HPDI =========================================================================
#' @export
#' @rdname interval_hdr
#' @aliases interval_hdr,numeric,numeric-method
setMethod(
f = "interval_hdr",
signature = c(x = "numeric", y = "numeric"),
definition = function(x, y, level = 0.954) {
## Compute density
y <- y / sum(y)

## Order the sample (faster sorting with radix method)
sorted <- sort(y, decreasing = TRUE, method = "radix")
i <- min(which(cumsum(sorted) >= sum(y) * level))
h <- sorted[[i]]
idx <- which(y >= h)

gap <- which(diff(idx) > 1)
inf <- idx[c(1, gap + 1)]
sup <- idx[c(gap, length(idx))]

int <- mapply(FUN = seq, from = inf, to = sup,
SIMPLIFY = FALSE, USE.NAMES = FALSE)
p <- vapply(X = int, FUN = function(i, y) { sum(y[i]) },
FUN.VALUE = numeric(1), y = y)

cbind(start = x[inf], end = x[sup], p = round(p, digits = 2))
}
)

#' @export
#' @rdname interval_hdr
#' @aliases interval_hdr,numeric,missing-method
setMethod(
f = "interval_hdr",
signature = c(x = "numeric", y = "missing"),
definition = function(x, level = 0.954, ...) {
## Compute density
d <- stats::density(x, ...)
methods::callGeneric(x = d$x, y = d$y, level = level)
}
)

# Credible interval ============================================================
#' @export
#' @rdname interval_credible
#' @aliases interval_credible,numeric-method
setMethod(
f = "interval_credible",
signature = "numeric",
definition = function(x, level = 0.95) {
## Order the sample
sorted <- sort(x, method = "radix") # Faster sorting with radix method

## Sample size
N <- length(x)

## Number of data to be outside of the interval
outside <- as.integer(N * (1 - level))
inf <- seq(from = 1L, to = outside + 1L, by = 1L)
sup <- seq(from = N - outside, to = N, by = 1L)

## Look for the shortest interval
a <- sorted[sup]
b <- sorted[inf]
ind <- which.min(a - b)

cbind(start = b[[ind]], end = a[[ind]], p = level)
}
)

# Confidence interval ==========================================================
#' @export
#' @rdname confidence_mean
#' @aliases confidence_mean,numeric-method
Expand Down
2 changes: 2 additions & 0 deletions inst/examples/ex-interval.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
## HDR of the Old Faithful eruption times
interval_hdr(faithful$eruptions)
4 changes: 3 additions & 1 deletion man/confidence_binomial.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/confidence_mean.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/confidence_multinomial.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions man/interval_credible.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions man/interval_hdr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions tests/testthat/_snaps/statistics.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
# Highest density regions

Code
interval_hdr(faithful$eruptions)
Output
start end p
[1,] 1.307160 2.837942 0.33
[2,] 3.150567 5.295819 0.62

# Bayesian credible interval

Code
interval_credible(faithful$eruptions)
Output
start end p
[1,] 1.733 4.85 0.95

# Confidence interval for the mean

Code
Expand Down
6 changes: 6 additions & 0 deletions tests/testthat/test-statistics.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
test_that("Highest density regions", {
expect_snapshot(interval_hdr(faithful$eruptions))
})
test_that("Bayesian credible interval", {
expect_snapshot(interval_credible(faithful$eruptions))
})
test_that("Confidence interval for the mean", {
x <- c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,
16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, 30.4, 33.9, 21.5, 15.5,
Expand Down

0 comments on commit 6ba601f

Please sign in to comment.