Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable multiple components simultaenously in pool_parameters() #1018

Merged
merged 3 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: parameters
Title: Processing of Model Parameters
Version: 0.22.2.12
Version: 0.22.2.13
Authors@R:
c(person(given = "Daniel",
family = "Lüdecke",
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
* `equivalence_test()` and `p_significance()` work with objects returned by
`model_parameters()`.

* `pool_parameters()` now better deals with models with multiple components
(e.g. zero-inflation or dispersion).

* Revision / enhancement of some documentation.

# parameters 0.22.2
Expand Down
156 changes: 88 additions & 68 deletions R/pool_parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@
#'
#' @note
#' Models with multiple components, (for instance, models with zero-inflation,
#' where predictors appear in the count and zero-inflation part) may fail in
#' case of identical names for coefficients in the different model components,
#' since the coefficient table is grouped by coefficient names for pooling. In
#' such cases, coefficients of count and zero-inflation model parts would be
#' combined. Therefore, the `component` argument defaults to
#' `"conditional"` to avoid this.
#' where predictors appear in the count and zero-inflation part, or models with
#' dispersion component) may fail in rare situations. In this case, compute
#' the pooled parameters for components separately, using the `component`
#' argument.
#'
#' Some model objects do not return standard errors (e.g. objects of class
#' `htest`). For these models, no pooled confidence intervals nor p-values
Expand Down Expand Up @@ -65,10 +63,10 @@
#' pool_parameters(models, ci_method = "residual")$df_error
#' @return A data frame of indices related to the model's parameters.
#' @export
pool_parameters <- function(x,

Check warning on line 66 in R/pool_parameters.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/pool_parameters.R,line=66,col=1,[cyclocomp_linter] Reduce the cyclomatic complexity of this function from 44 to at most 40.
exponentiate = FALSE,
effects = "fixed",
component = "conditional",
component = "all",
verbose = TRUE,
...) {
# check input, save original model -----
Expand All @@ -93,7 +91,7 @@

if (!all(vapply(x, inherits, TRUE, "parameters_model"))) {
insight::format_error(
"First argument `x` must be a list of `parameters_model` objects, as returned by the `model_parameters()` function."

Check warning on line 94 in R/pool_parameters.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/pool_parameters.R,line=94,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 122 characters.
)
}

Expand All @@ -103,7 +101,7 @@

if (isTRUE(attributes(x[[1]])$exponentiate) && verbose) {
insight::format_alert(
"Pooling on exponentiated parameters is not recommended. Please call `model_parameters()` with 'exponentiate = FALSE', and then call `pool_parameters(..., exponentiate = TRUE)`."

Check warning on line 104 in R/pool_parameters.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/pool_parameters.R,line=104,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 184 characters.
)
}

Expand Down Expand Up @@ -163,75 +161,92 @@
params <- params[params$Effects != "random", ]
parameter_values <- x[[1]]$Parameter[x[[1]]$Effects != "random"]
}
estimates <- split(params, factor(params$Parameter, levels = unique(parameter_values)))

# split by component
if (!is.null(params$Component) && insight::n_unique(params$Component) > 1) {
component_values <- x[[1]]$Component
estimates <- split(
params,
list(
factor(params$Parameter, levels = unique(parameter_values)),
factor(params$Component, levels = unique(component_values))
)
)
} else {
component_values <- NULL
estimates <- split(
params,
factor(params$Parameter, levels = unique(parameter_values))
)
}

# pool estimates etc. -----

pooled_params <- do.call(rbind, lapply(estimates, function(i) {
# pooled estimate
pooled_estimate <- mean(i$Coefficient)
# if we split by "component", some of the data frames might be empty
# in this case, just skip...
if (nrow(i) > 0) {
# pooled estimate
pooled_estimate <- mean(i$Coefficient)

# special models that have no standard errors (like "htest" objects)
if (is.null(i$SE) || all(is.na(i$SE))) {
out <- data.frame(
Coefficient = pooled_estimate,
SE = NA,
CI_low = NA,
CI_high = NA,
Statistic = NA,
df_error = NA,
p = NA,
stringsAsFactors = FALSE
)

if (verbose) {
insight::format_alert("Model objects had no standard errors. Cannot compute pooled confidence intervals and p-values.")

Check warning on line 206 in R/pool_parameters.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/pool_parameters.R,line=206,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 129 characters.
}

# special models that have no standard errors (like "htest" objects)
if (is.null(i$SE) || all(is.na(i$SE))) {
out <- data.frame(
Coefficient = pooled_estimate,
SE = NA,
CI_low = NA,
CI_high = NA,
Statistic = NA,
df_error = NA,
p = NA,
stringsAsFactors = FALSE
)
# regular models that have coefficients and standard errors
} else {
# pooled standard error
ubar <- mean(i$SE^2)
tmp <- ubar + (1 + 1 / len) * stats::var(i$Coefficient)
pooled_se <- sqrt(tmp)

# pooled degrees of freedom, Barnard-Rubin adjustment for small samples
df_column <- grep("(\\bdf\\b|\\bdf_error\\b)", colnames(i), value = TRUE)[1]
if (length(df_column)) {
pooled_df <- .barnad_rubin(m = nrow(i), b = stats::var(i$Coefficient), t = tmp, dfcom = unique(i[[df_column]]))

Check warning on line 219 in R/pool_parameters.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/pool_parameters.R,line=219,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 121 characters.
# validation check length
if (length(pooled_df) > 1 && length(pooled_se) == 1) {
pooled_df <- round(mean(pooled_df, na.rm = TRUE))
}
} else {
pooled_df <- Inf
}

if (verbose) {
insight::format_alert("Model objects had no standard errors. Cannot compute pooled confidence intervals and p-values.")
# pooled statistic
pooled_statistic <- pooled_estimate / pooled_se

# confidence intervals
alpha <- (1 + ci) / 2
fac <- suppressWarnings(stats::qt(alpha, df = pooled_df))

out <- data.frame(
Coefficient = pooled_estimate,
SE = pooled_se,
CI_low = pooled_estimate - pooled_se * fac,
CI_high = pooled_estimate + pooled_se * fac,
Statistic = pooled_statistic,
df_error = pooled_df,
p = 2 * stats::pt(abs(pooled_statistic), df = pooled_df, lower.tail = FALSE),
stringsAsFactors = FALSE
)
}

# regular models that have coefficients and standard errors
out
} else {
# pooled standard error
ubar <- mean(i$SE^2)
tmp <- ubar + (1 + 1 / len) * stats::var(i$Coefficient)
pooled_se <- sqrt(tmp)

# pooled degrees of freedom, Barnard-Rubin adjustment for small samples
df_column <- grep("(\\bdf\\b|\\bdf_error\\b)", colnames(i), value = TRUE)[1]
if (length(df_column)) {
pooled_df <- .barnad_rubin(m = nrow(i), b = stats::var(i$Coefficient), t = tmp, dfcom = unique(i[[df_column]]))
# validation check length
if (length(pooled_df) > 1 && length(pooled_se) == 1) {
pooled_df <- round(mean(pooled_df, na.rm = TRUE))
}
} else {
pooled_df <- Inf
}

# pooled statistic
pooled_statistic <- pooled_estimate / pooled_se

# confidence intervals
alpha <- (1 + ci) / 2
fac <- suppressWarnings(stats::qt(alpha, df = pooled_df))

out <- data.frame(
Coefficient = pooled_estimate,
SE = pooled_se,
CI_low = pooled_estimate - pooled_se * fac,
CI_high = pooled_estimate + pooled_se * fac,
Statistic = pooled_statistic,
df_error = pooled_df,
p = 2 * stats::pt(abs(pooled_statistic), df = pooled_df, lower.tail = FALSE),
stringsAsFactors = FALSE
)
}

# add component, when pooling for all components
if (identical(component, "all") && "Component" %in% colnames(i)) {
out$Component <- i$Component[1]
NULL
}
out
}))


Expand All @@ -249,13 +264,13 @@
stringsAsFactors = FALSE
)
}))
pooled_params$Effects <- "fixed"
}


# reorder ------

pooled_params$Parameter <- parameter_values
columns <- c("Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic", "df_error", "p", "Component")
columns <- c("Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic", "df_error", "p", "Effects", "Component")

Check warning on line 273 in R/pool_parameters.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/pool_parameters.R,line=273,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 123 characters.
pooled_params <- pooled_params[intersect(columns, colnames(pooled_params))]


Expand All @@ -268,6 +283,11 @@
pooled_params <- merge(pooled_params, pooled_random, all = TRUE, sort = FALSE)
}

# add back component column
if (!is.null(component_values)) {
pooled_params$Component <- component_values
}

# this needs to be done extra here, cannot call ".add_model_parameters_attributes()"
pooled_params <- .add_pooled_params_attributes(
pooled_params,
Expand Down
12 changes: 5 additions & 7 deletions man/pool_parameters.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

136 changes: 136 additions & 0 deletions tests/testthat/test-pool_parameters.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,139 @@ test_that("pooled parameters", {
pp3 <- summary(mice::pool(m_mice))
expect_equal(pp2$df_error, pp3$df, tolerance = 1e-3)
})

skip_on_cran()

test_that("pooled parameters, glmmTMB, components", {
skip_if_not_installed("mice")
skip_if_not_installed("glmmTMB")
sim1 <- function(nfac = 4, nt = 10, facsd = 0.1, tsd = 0.15, mu = 0, residsd = 1) {
dat <- expand.grid(fac = factor(letters[1:nfac]), t = 1:nt)
n <- nrow(dat)
dat$REfac <- rnorm(nfac, sd = facsd)[dat$fac]
dat$REt <- rnorm(nt, sd = tsd)[dat$t]
dat$x <- rnorm(n, mean = mu, sd = residsd) + dat$REfac + dat$REt
dat
}

set.seed(101)
d1 <- sim1(mu = 100, residsd = 10)
d2 <- sim1(mu = 200, residsd = 5)
d1$sd <- "ten"
d2$sd <- "five"
dat <- rbind(d1, d2)

set.seed(101)
dat$REfac[sample.int(nrow(dat), 10)] <- NA
dat$x[sample.int(nrow(dat), 10)] <- NA
dat$sd[sample.int(nrow(dat), 10)] <- NA

impdat <- suppressWarnings(mice::mice(dat, printFlag = FALSE))
models <- lapply(1:5, function(i) {
glmmTMB::glmmTMB(
x ~ sd + (1 | t),
dispformula = ~sd,
data = mice::complete(impdat, action = i)
)
})

out <- pool_parameters(models, component = "conditional")
expect_named(
out,
c(
"Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic",
"df_error", "p"
)
)
expect_equal(out$Coefficient, c(187.280225, -87.838969), tolerance = 1e-3)

out <- pool_parameters(models, component = "all", effects = "all")
expect_named(
out,
c(
"Parameter", "Coefficient", "Effects", "SE", "CI_low", "CI_high",
"Statistic", "df_error", "p", "Component"
)
)
expect_equal(
out$Coefficient,
c(187.280225, -87.838969, 3.51576, -1.032665, 0.610992, NaN),
tolerance = 1e-3
)

out <- pool_parameters(models, component = "all", effects = "fixed")
expect_named(
out,
c(
"Parameter", "Coefficient", "SE", "CI_low", "CI_high",
"Statistic", "df_error", "p", "Component"
)
)
expect_equal(
out$Coefficient,
c(187.280225, -87.838969, 3.51576, -1.032665),
tolerance = 1e-3
)
})


test_that("pooled parameters, glmmTMB, zero-inflated", {
skip_if_not_installed("mice")
skip_if_not_installed("glmmTMB")
data(Salamanders, package = "glmmTMB")
set.seed(123)
Salamanders$cover[sample.int(nrow(Salamanders), 50)] <- NA
Salamanders$mined[sample.int(nrow(Salamanders), 10)] <- NA

impdat <- suppressWarnings(mice::mice(Salamanders, printFlag = FALSE))
models <- lapply(1:5, function(i) {
glmmTMB::glmmTMB(
count ~ mined + cover + (1 | site),
ziformula = ~mined,
family = poisson(),
data = mice::complete(impdat, action = i)
)
})

out <- pool_parameters(models)
expect_named(
out,
c(
"Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic",
"df_error", "p", "Component"
)
)
expect_equal(
out$Coefficient,
c(0.13409, 1.198551, -0.181912, 1.253029, -1.844026),
tolerance = 1e-3
)

out <- pool_parameters(models, component = "all", effects = "all")
expect_named(
out,
c(
"Parameter", "Coefficient", "Effects", "SE", "CI_low", "CI_high",
"Statistic", "df_error", "p", "Component"
)
)
expect_equal(
out$Coefficient,
c(0.13409, 1.198551, -0.181912, 1.253029, -1.844026, 0.158795),
tolerance = 1e-3
)

out <- pool_parameters(models, component = "conditional", effects = "fixed")
expect_named(
out,
c(
"Parameter", "Coefficient", "SE", "CI_low", "CI_high",
"Statistic", "df_error", "p"
)
)
expect_equal(
out$Coefficient,
c(0.13409, 1.198551, -0.181912),
tolerance = 1e-3
)
})
Loading