Skip to content

Commit

Permalink
Update documentation of lsmeans
Browse files Browse the repository at this point in the history
  • Loading branch information
gowerc committed Mar 20, 2024
1 parent edbf35f commit aa939cd
Show file tree
Hide file tree
Showing 7 changed files with 140 additions and 69 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ LazyData: true
Roxygen: list(markdown = TRUE)
URL: https://insightsengineering.github.io/rbmi/, https://github.com/insightsengineering/rbmi
BugReports: https://github.com/insightsengineering/rbmi/issues
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests:
dplyr,
tidyr,
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

S3method(as.data.frame,pool)
S3method(collapse_values,default)
S3method(collapse_values,factor)
S3method(collapse_values,numeric)
S3method(draws,approxbayes)
S3method(draws,bayes)
S3method(draws,bmlmi)
Expand All @@ -19,6 +22,7 @@ S3method(print,imputation_list_df)
S3method(print,imputation_list_single)
S3method(print,imputation_single)
S3method(print,pool)
S3method(repr,numeric)
S3method(validate,analysis)
S3method(validate,bmlmi)
S3method(validate,bootstrap)
Expand Down
1 change: 1 addition & 0 deletions R/dataclasses.R
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ repr <- function(x, ...) {
UseMethod("repr")
}

#' @export
repr.numeric <- function(x, ...) {
paste0("c(", paste0(round(x, 2), collapse = ", "), ")")
}
114 changes: 69 additions & 45 deletions R/lsmeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,34 @@
#' Least Square Means
#'
#'
#' Estimates the least square means from a linear model. This is done by
#' generating a prediction from the model using an hypothetical observation
#' that is constructed by averaging the data. See details for more information.
#' Estimates the least square means from a linear model. The exact implementation
#' / interpretation depends on the weighting scheme; see details for more information.
#'
#' @details
#'
#' The lsmeans are obtained by calculating hypothetical patients
#' and predicting their expected values. These hypothetical patients
#' are constructed by expanding out all possible combinations of each
#' categorical covariate and by setting any numerical covariates equal
#' to the mean.
#' For `.weights = "proportional"` (the default) the lsmeans are obtained by
#' taking the average of the fitted values for each patient.
#' In terms of the interactions between two continuous covariates
#' this is equivalent to constructing a hypothetical patient whose
#' interaction term is `mean(X * Y)`. This is opposed to the
#' `emmeans` package which calculates the interaction term
#' of the hypothetical patient as `mean(X) * mean(Y)`. The approach
#' outlined here is equivalent to standardization or g-computation.
#'
#' For `.weights = "equal"` the lsmeans are obtained by taking the model fitted
#' value of a hypothetical patient whose covariates are defined as follows:
#' - Continuous covariates are set to `mean(X)`
#' - Dummy categorical variables are set to `1/N` where `N` is the number of levels
#' - Continuous * continuous interactions are set to `mean(X) * mean(Y)`
#' - Continuous * categorical interactions are set to `mean(X) * 1/N`
#'
#' A final lsmean value is calculated by averaging these hypothetical
#' patients. If `.weights` equals `"proportional"` then the values are weighted
#' by the frequency in which they occur in the full dataset. If `.weights`
#' equals `"equal"` then each hypothetical patient is given an equal weight
#' regardless of what actually occurs in the dataset.
#' Regardless of the weighting scheme any named arguments passed via `...` will
#' fix the value of the covariate to the specified value.
#' For example, `lsmeans(model, trt = "A")` will fix the dummy variable `trtA` to 1
#' for all patients (real or hypothetical) when calculating the lsmeans.
#'
#' Use the `...` argument to fix specific variables to specific values.
#'
#' See the references for identical implementations as done in SAS and
#' in R via the `emmeans` package. This function attempts to re-implement the
#' `emmeans` derivation for standard linear models but without having to include
#' all of it's dependencies.
#' See the references for similar implementations as done in SAS and
#' in R via the `emmeans` package.
#'
#' @param model A model created by `lm`.
#' @param ... Fixes specific variables to specific values i.e.
Expand Down Expand Up @@ -67,7 +71,7 @@ lsmeans <- function(model, ..., .weights = c("proportional", "equal")) {
proportional = ls_design_proportional
)

design <- design_fun(data, frm, covars, fix)
design <- design_fun(data, frm, fix)
beta <- matrix(coef(model), nrow = 1)
list(
est = as.vector(beta %*% design),
Expand All @@ -88,41 +92,61 @@ lsmeans <- function(model, ..., .weights = c("proportional", "equal")) {
#'
#' @param data A data.frame
#' @param frm Formula used to fit the original model
#' @param covars a character vector of variables names that exist in
#' `data` which should be extracted (`ls_design_equal` only)
#' @param fix A named list of variables with fixed values
#' @name ls_design
ls_design_equal <- function(data, frm, covars, fix) {
collection <- list()
for (var in covars) {
values <- data[[var]]
if (is.factor(values)) {
lvls <- levels(values)
if (var %in% names(fix)) {
collection[[var]] <- factor(fix[[var]], levels = lvls)
} else {
collection[[var]] <- factor(lvls, levels = lvls)
}
} else if (is.numeric(values)) {
if (var %in% names(fix)) {
collection[[var]] <- fix[[var]]
} else {
collection[[var]] <- mean(values)
}
} else {
stop("invalid data type")
ls_design_equal <- function(data, frm, fix) {
assert_that(
inherits(frm, "formula"),
is.data.frame(data),
is.list(fix),
all(names(fix) %in% colnames(data))
)
frm2 <- update(frm, NULL ~ .)
data2 <- model.frame(frm2, data)
collection <- lapply(as.list(data2), collapse_values)

for (var in names(fix)) {
if (is.numeric(data2[[var]])) {
collection[[var]] <- fix[[var]]
} else if (is.factor(data2[[var]])) {
collection[[var]] <- factor(fix[[var]], levels = levels(data2[[var]]))
}
}
design <- matrix(
apply(model.matrix(frm, expand.grid(collection)), 2, mean),

all_combinations <- expand.grid(collection)

design_matrix <- model.matrix(frm2, all_combinations)

result <- matrix(
apply(design_matrix, 2, mean),
ncol = 1
)
return(design)
return(result)
}


collapse_values <- function(x) {
UseMethod("collapse_values")
}

#' @export
collapse_values.factor <- function(x) {
return(factor(levels(x), levels(x)))
}

#' @export
collapse_values.numeric <- function(x) {
return(mean(x))
}

#' @export
collapse_values.default <- function(x) {
stop("invalid data type")
}


#' @rdname ls_design
ls_design_proportional <- function(data, frm, covars, fix) {
ls_design_proportional <- function(data, frm, fix) {
data2 <- data
for (var in names(fix)) {
value <- data[[var]]
Expand Down
7 changes: 2 additions & 5 deletions man/ls_design.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 24 additions & 18 deletions man/lsmeans.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 39 additions & 0 deletions tests/testthat/test-lsmeans.R
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,44 @@ test_that("LSmeans (proportional) from rbmi equals means of average prediction",
})


test_that("LSmeans(proportional) returns equivalent results to 'counterfactual'", {
set.seed(2412)
n <- 10000
# Main goal here is to provide a more involved test of interactions between
# cont * cont, cont * cont * cont model terms
dat <- tibble(
v1 = rnorm(n),
v2 = rnorm(n),
v3 = rnorm(n),
c1 = sample(c("A", "B"), size = n, replace = TRUE, prob = c(0.8, 0.2)),
c2 = sample(c("X", "Y"), size = n, replace = TRUE, prob = c(0.6, 0.4)),
error = rnorm(n, 0, 4),
outcome = 30 +
5 * v1 +
3 * v2 +
2 * v3 +
8 * v1 * v2 +
9 * v1 * v3 +
10 * v2 * v3 +
12 * v1 * v2 * v3 +
4 * (c1 == "B") +
6 * (c2 == "Y") +
7 * (c1 == "B" & c2 == "Y") +
13 * (c1 == "B") * v1 +
error
)
mod <- lm(outcome ~ v1 * v2 * v3 + c1 * c2 + v1 * c1, data = dat)

expect_equal(
lsmeans(mod, c1 = "A", .weights = "proportional")$est,
mean(predict(mod, newdata = dat |> mutate(c1 = "A")))
)

expect_equal(
lsm2 <- lsmeans(mod, c1 = "B", c2 = "Y", .weights = "proportional")$est,
mean(predict(mod, newdata = dat |> mutate(c1 = "B", c2 = "Y")))
)
})


#### Toy code to experiment with
Expand Down Expand Up @@ -263,3 +301,4 @@ test_that("LSmeans (proportional) from rbmi equals means of average prediction",




0 comments on commit aa939cd

Please sign in to comment.