From 6ba601f685c5f9bdf52ceabb290b189d8143e53f Mon Sep 17 00:00:00 2001 From: nfrerebeau Date: Wed, 10 May 2023 10:31:55 +0200 Subject: [PATCH] Add credible intervals --- NAMESPACE | 2 + NEWS.md | 1 + R/AllGenerics.R | 46 ++++++++++++++++++ R/statistics.R | 73 ++++++++++++++++++++++++++++- inst/examples/ex-interval.R | 2 + man/confidence_binomial.Rd | 4 +- man/confidence_mean.Rd | 4 +- man/confidence_multinomial.Rd | 4 +- man/interval_credible.Rd | 43 +++++++++++++++++ man/interval_hdr.Rd | 54 +++++++++++++++++++++ tests/testthat/_snaps/statistics.md | 17 +++++++ tests/testthat/test-statistics.R | 6 +++ 12 files changed, 252 insertions(+), 4 deletions(-) create mode 100644 inst/examples/ex-interval.R create mode 100644 man/interval_credible.Rd create mode 100644 man/interval_hdr.Rd diff --git a/NAMESPACE b/NAMESPACE index 7ec6d6d..7f88de7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -84,6 +84,8 @@ exportMethods(detect) exportMethods(discard) exportMethods(discard_cols) exportMethods(discard_rows) +exportMethods(interval_credible) +exportMethods(interval_hdr) exportMethods(jackknife) exportMethods(keep) exportMethods(keep_cols) diff --git a/NEWS.md b/NEWS.md index 5b9405e..a52ecdf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # arkhe 1.1.0.9000 ## New classes and methods * Add `math_lcm()` and `math_gcd()` to compute the lowest common multiple and the greatest common divisor. +* Add `interval_hdr()` and `interval_credible()` to compute the credible intervals. # arkhe 1.1.0 ## New classes and methods diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 5ee6450..942992b 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -365,6 +365,52 @@ setGeneric( # Statistics =================================================================== ## Interval -------------------------------------------------------------------- +#' Highest Density Regions +#' +#' @param x A [`numeric`] vector giving the coordinates of the points where +#' the density is estimated. +#' @param y A [`numeric`] vector giving the estimated density values. +#' If `y` is missing and `x` is a `numeric` vector, density estimates will be +#' computed from `x`. +#' @param level A length-one [`numeric`] vector giving the confidence level. +#' @param ... Further arguments to be passed to [stats::density()]. +#' @return +#' A three-columns `numeric` [`matrix`] giving the lower and upper boundaries +#' of the HPD interval and associated probabilities. +#' @references +#' Hyndman, R. J. (1996). Computing and graphing highest density regions. +#' *American Statistician*, 50: 120-126. \doi{10.2307/2684423}. +#' @example inst/examples/ex-interval.R +#' @author N. Frerebeau +#' @family summary statistics +#' @docType methods +#' @aliases interval_hdr-method +setGeneric( + name = "interval_hdr", + def = function(x, y, ...) standardGeneric("interval_hdr") +) + +#' Bayesian Credible Interval +#' +#' Computes the shortest credible interval within which an unobserved parameter +#' value falls with a particular probability. +#' @param x A [`numeric`] vector. +#' @param level A length-one [`numeric`] vector giving the confidence level. +#' @param ... Currently not used. +#' @return +#' A three-columns `numeric` [`matrix`] giving the lower and upper boundaries +#' of the credible interval and associated probability. +#' @example inst/examples/ex-interval.R +#' @author N. Frerebeau +#' @family summary statistics +#' @docType methods +#' @aliases interval_credible-method +setGeneric( + name = "interval_credible", + def = function(x, ...) standardGeneric("interval_credible") +) + +## Confidence ------------------------------------------------------------------ #' Confidence Interval for a Mean #' #' Computes a confidence interval for a mean at a desired level of significance. diff --git a/R/statistics.R b/R/statistics.R index 1b07f32..4072916 100644 --- a/R/statistics.R +++ b/R/statistics.R @@ -2,7 +2,78 @@ #' @include AllGenerics.R NULL -# Interval ===================================================================== +# HPDI ========================================================================= +#' @export +#' @rdname interval_hdr +#' @aliases interval_hdr,numeric,numeric-method +setMethod( + f = "interval_hdr", + signature = c(x = "numeric", y = "numeric"), + definition = function(x, y, level = 0.954) { + ## Compute density + y <- y / sum(y) + + ## Order the sample (faster sorting with radix method) + sorted <- sort(y, decreasing = TRUE, method = "radix") + i <- min(which(cumsum(sorted) >= sum(y) * level)) + h <- sorted[[i]] + idx <- which(y >= h) + + gap <- which(diff(idx) > 1) + inf <- idx[c(1, gap + 1)] + sup <- idx[c(gap, length(idx))] + + int <- mapply(FUN = seq, from = inf, to = sup, + SIMPLIFY = FALSE, USE.NAMES = FALSE) + p <- vapply(X = int, FUN = function(i, y) { sum(y[i]) }, + FUN.VALUE = numeric(1), y = y) + + cbind(start = x[inf], end = x[sup], p = round(p, digits = 2)) + } +) + +#' @export +#' @rdname interval_hdr +#' @aliases interval_hdr,numeric,missing-method +setMethod( + f = "interval_hdr", + signature = c(x = "numeric", y = "missing"), + definition = function(x, level = 0.954, ...) { + ## Compute density + d <- stats::density(x, ...) + methods::callGeneric(x = d$x, y = d$y, level = level) + } +) + +# Credible interval ============================================================ +#' @export +#' @rdname interval_credible +#' @aliases interval_credible,numeric-method +setMethod( + f = "interval_credible", + signature = "numeric", + definition = function(x, level = 0.95) { + ## Order the sample + sorted <- sort(x, method = "radix") # Faster sorting with radix method + + ## Sample size + N <- length(x) + + ## Number of data to be outside of the interval + outside <- as.integer(N * (1 - level)) + inf <- seq(from = 1L, to = outside + 1L, by = 1L) + sup <- seq(from = N - outside, to = N, by = 1L) + + ## Look for the shortest interval + a <- sorted[sup] + b <- sorted[inf] + ind <- which.min(a - b) + + cbind(start = b[[ind]], end = a[[ind]], p = level) + } +) + +# Confidence interval ========================================================== #' @export #' @rdname confidence_mean #' @aliases confidence_mean,numeric-method diff --git a/inst/examples/ex-interval.R b/inst/examples/ex-interval.R new file mode 100644 index 0000000..9df182b --- /dev/null +++ b/inst/examples/ex-interval.R @@ -0,0 +1,2 @@ +## HDR of the Old Faithful eruption times +interval_hdr(faithful$eruptions) diff --git a/man/confidence_binomial.Rd b/man/confidence_binomial.Rd index 3e2f6b0..9e05547 100644 --- a/man/confidence_binomial.Rd +++ b/man/confidence_binomial.Rd @@ -57,7 +57,9 @@ confidence_multinomial(x) \seealso{ Other summary statistics: \code{\link{confidence_mean}()}, -\code{\link{confidence_multinomial}()} +\code{\link{confidence_multinomial}()}, +\code{\link{interval_credible}()}, +\code{\link{interval_hdr}()} } \author{ N. Frerebeau diff --git a/man/confidence_mean.Rd b/man/confidence_mean.Rd index 5e7c772..caddb0e 100644 --- a/man/confidence_mean.Rd +++ b/man/confidence_mean.Rd @@ -47,7 +47,9 @@ confidence_multinomial(x) \seealso{ Other summary statistics: \code{\link{confidence_binomial}()}, -\code{\link{confidence_multinomial}()} +\code{\link{confidence_multinomial}()}, +\code{\link{interval_credible}()}, +\code{\link{interval_hdr}()} } \author{ N. Frerebeau diff --git a/man/confidence_multinomial.Rd b/man/confidence_multinomial.Rd index f4e4f2e..e88b2e4 100644 --- a/man/confidence_multinomial.Rd +++ b/man/confidence_multinomial.Rd @@ -55,7 +55,9 @@ confidence_multinomial(x) \seealso{ Other summary statistics: \code{\link{confidence_binomial}()}, -\code{\link{confidence_mean}()} +\code{\link{confidence_mean}()}, +\code{\link{interval_credible}()}, +\code{\link{interval_hdr}()} } \author{ N. Frerebeau diff --git a/man/interval_credible.Rd b/man/interval_credible.Rd new file mode 100644 index 0000000..ef26d9b --- /dev/null +++ b/man/interval_credible.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/statistics.R +\docType{methods} +\name{interval_credible} +\alias{interval_credible} +\alias{interval_credible-method} +\alias{interval_credible,numeric-method} +\title{Bayesian Credible Interval} +\usage{ +interval_credible(x, ...) + +\S4method{interval_credible}{numeric}(x, level = 0.95) +} +\arguments{ +\item{x}{A \code{\link{numeric}} vector.} + +\item{...}{Currently not used.} + +\item{level}{A length-one \code{\link{numeric}} vector giving the confidence level.} +} +\value{ +A three-columns \code{numeric} \code{\link{matrix}} giving the lower and upper boundaries +of the credible interval and associated probability. +} +\description{ +Computes the shortest credible interval within which an unobserved parameter +value falls with a particular probability. +} +\examples{ +## HDR of the Old Faithful eruption times +interval_hdr(faithful$eruptions) +} +\seealso{ +Other summary statistics: +\code{\link{confidence_binomial}()}, +\code{\link{confidence_mean}()}, +\code{\link{confidence_multinomial}()}, +\code{\link{interval_hdr}()} +} +\author{ +N. Frerebeau +} +\concept{summary statistics} diff --git a/man/interval_hdr.Rd b/man/interval_hdr.Rd new file mode 100644 index 0000000..7b0e5b3 --- /dev/null +++ b/man/interval_hdr.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/statistics.R +\docType{methods} +\name{interval_hdr} +\alias{interval_hdr} +\alias{interval_hdr-method} +\alias{interval_hdr,numeric,numeric-method} +\alias{interval_hdr,numeric,missing-method} +\title{Highest Density Regions} +\usage{ +interval_hdr(x, y, ...) + +\S4method{interval_hdr}{numeric,numeric}(x, y, level = 0.954) + +\S4method{interval_hdr}{numeric,missing}(x, level = 0.954, ...) +} +\arguments{ +\item{x}{A \code{\link{numeric}} vector giving the coordinates of the points where +the density is estimated.} + +\item{y}{A \code{\link{numeric}} vector giving the estimated density values. +If \code{y} is missing and \code{x} is a \code{numeric} vector, density estimates will be +computed from \code{x}.} + +\item{...}{Further arguments to be passed to \code{\link[stats:density]{stats::density()}}.} + +\item{level}{A length-one \code{\link{numeric}} vector giving the confidence level.} +} +\value{ +A three-columns \code{numeric} \code{\link{matrix}} giving the lower and upper boundaries +of the HPD interval and associated probabilities. +} +\description{ +Highest Density Regions +} +\examples{ +## HDR of the Old Faithful eruption times +interval_hdr(faithful$eruptions) +} +\references{ +Hyndman, R. J. (1996). Computing and graphing highest density regions. +\emph{American Statistician}, 50: 120-126. \doi{10.2307/2684423}. +} +\seealso{ +Other summary statistics: +\code{\link{confidence_binomial}()}, +\code{\link{confidence_mean}()}, +\code{\link{confidence_multinomial}()}, +\code{\link{interval_credible}()} +} +\author{ +N. Frerebeau +} +\concept{summary statistics} diff --git a/tests/testthat/_snaps/statistics.md b/tests/testthat/_snaps/statistics.md index 51e9a1e..e9d14ec 100644 --- a/tests/testthat/_snaps/statistics.md +++ b/tests/testthat/_snaps/statistics.md @@ -1,3 +1,20 @@ +# Highest density regions + + Code + interval_hdr(faithful$eruptions) + Output + start end p + [1,] 1.307160 2.837942 0.33 + [2,] 3.150567 5.295819 0.62 + +# Bayesian credible interval + + Code + interval_credible(faithful$eruptions) + Output + start end p + [1,] 1.733 4.85 0.95 + # Confidence interval for the mean Code diff --git a/tests/testthat/test-statistics.R b/tests/testthat/test-statistics.R index 8308906..d1cd2ec 100644 --- a/tests/testthat/test-statistics.R +++ b/tests/testthat/test-statistics.R @@ -1,3 +1,9 @@ +test_that("Highest density regions", { + expect_snapshot(interval_hdr(faithful$eruptions)) +}) +test_that("Bayesian credible interval", { + expect_snapshot(interval_credible(faithful$eruptions)) +}) test_that("Confidence interval for the mean", { x <- c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, 30.4, 33.9, 21.5, 15.5,