From ce1a3b06dfa8ad65f31a4d124286079c02eb1549 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 24 Sep 2024 15:52:11 +0200 Subject: [PATCH 1/3] Enable multiple components simultaenously in `pool_parameters()` --- DESCRIPTION | 2 +- NEWS.md | 3 + R/pool_parameters.R | 156 +++++++++++++++----------- man/pool_parameters.Rd | 12 +- tests/testthat/test-pool_parameters.R | 136 ++++++++++++++++++++++ 5 files changed, 233 insertions(+), 76 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6effb9f70..a46bfe2a8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: parameters Title: Processing of Model Parameters -Version: 0.22.2.12 +Version: 0.22.2.13 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index 8b3726113..9122addc4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,9 @@ * `equivalence_test()` and `p_significance()` work with objects returned by `model_parameters()`. +* `pool_parameters()` now better deals with models with multiple components + (e.g. zero-inflation or dispersion). + * Revision / enhancement of some documentation. # parameters 0.22.2 diff --git a/R/pool_parameters.R b/R/pool_parameters.R index f985a84af..cfe7d1c51 100644 --- a/R/pool_parameters.R +++ b/R/pool_parameters.R @@ -17,12 +17,10 @@ #' #' @note #' Models with multiple components, (for instance, models with zero-inflation, -#' where predictors appear in the count and zero-inflation part) may fail in -#' case of identical names for coefficients in the different model components, -#' since the coefficient table is grouped by coefficient names for pooling. In -#' such cases, coefficients of count and zero-inflation model parts would be -#' combined. Therefore, the `component` argument defaults to -#' `"conditional"` to avoid this. +#' where predictors appear in the count and zero-inflation part, or models with +#' dispersion component) may fail in rare situations. In this case, compute +#' the pooled parameters for components seperately, using the `component` +#' argument. #' #' Some model objects do not return standard errors (e.g. objects of class #' `htest`). For these models, no pooled confidence intervals nor p-values @@ -68,7 +66,7 @@ pool_parameters <- function(x, exponentiate = FALSE, effects = "fixed", - component = "conditional", + component = "all", verbose = TRUE, ...) { # check input, save original model ----- @@ -163,75 +161,92 @@ pool_parameters <- function(x, params <- params[params$Effects != "random", ] parameter_values <- x[[1]]$Parameter[x[[1]]$Effects != "random"] } - estimates <- split(params, factor(params$Parameter, levels = unique(parameter_values))) + # split by component + if (!is.null(params$Component) && insight::n_unique(params$Component) > 1) { + component_values <- x[[1]]$Component + estimates <- split( + params, + list( + factor(params$Parameter, levels = unique(parameter_values)), + factor(params$Component, levels = unique(component_values)) + ) + ) + } else { + component_values <- NULL + estimates <- split( + params, + factor(params$Parameter, levels = unique(parameter_values)) + ) + } # pool estimates etc. ----- pooled_params <- do.call(rbind, lapply(estimates, function(i) { - # pooled estimate - pooled_estimate <- mean(i$Coefficient) + # if we split by "component", some of the data frames might be empty + # in this case, just skip... + if (nrow(i) > 0) { + # pooled estimate + pooled_estimate <- mean(i$Coefficient) + + # special models that have no standard errors (like "htest" objects) + if (is.null(i$SE) || all(is.na(i$SE))) { + out <- data.frame( + Coefficient = pooled_estimate, + SE = NA, + CI_low = NA, + CI_high = NA, + Statistic = NA, + df_error = NA, + p = NA, + stringsAsFactors = FALSE + ) + + if (verbose) { + insight::format_alert("Model objects had no standard errors. Cannot compute pooled confidence intervals and p-values.") + } - # special models that have no standard errors (like "htest" objects) - if (is.null(i$SE) || all(is.na(i$SE))) { - out <- data.frame( - Coefficient = pooled_estimate, - SE = NA, - CI_low = NA, - CI_high = NA, - Statistic = NA, - df_error = NA, - p = NA, - stringsAsFactors = FALSE - ) + # regular models that have coefficients and standard errors + } else { + # pooled standard error + ubar <- mean(i$SE^2) + tmp <- ubar + (1 + 1 / len) * stats::var(i$Coefficient) + pooled_se <- sqrt(tmp) + + # pooled degrees of freedom, Barnard-Rubin adjustment for small samples + df_column <- grep("(\\bdf\\b|\\bdf_error\\b)", colnames(i), value = TRUE)[1] + if (length(df_column)) { + pooled_df <- .barnad_rubin(m = nrow(i), b = stats::var(i$Coefficient), t = tmp, dfcom = unique(i[[df_column]])) + # validation check length + if (length(pooled_df) > 1 && length(pooled_se) == 1) { + pooled_df <- round(mean(pooled_df, na.rm = TRUE)) + } + } else { + pooled_df <- Inf + } - if (verbose) { - insight::format_alert("Model objects had no standard errors. Cannot compute pooled confidence intervals and p-values.") + # pooled statistic + pooled_statistic <- pooled_estimate / pooled_se + + # confidence intervals + alpha <- (1 + ci) / 2 + fac <- suppressWarnings(stats::qt(alpha, df = pooled_df)) + + out <- data.frame( + Coefficient = pooled_estimate, + SE = pooled_se, + CI_low = pooled_estimate - pooled_se * fac, + CI_high = pooled_estimate + pooled_se * fac, + Statistic = pooled_statistic, + df_error = pooled_df, + p = 2 * stats::pt(abs(pooled_statistic), df = pooled_df, lower.tail = FALSE), + stringsAsFactors = FALSE + ) } - - # regular models that have coefficients and standard errors + out } else { - # pooled standard error - ubar <- mean(i$SE^2) - tmp <- ubar + (1 + 1 / len) * stats::var(i$Coefficient) - pooled_se <- sqrt(tmp) - - # pooled degrees of freedom, Barnard-Rubin adjustment for small samples - df_column <- grep("(\\bdf\\b|\\bdf_error\\b)", colnames(i), value = TRUE)[1] - if (length(df_column)) { - pooled_df <- .barnad_rubin(m = nrow(i), b = stats::var(i$Coefficient), t = tmp, dfcom = unique(i[[df_column]])) - # validation check length - if (length(pooled_df) > 1 && length(pooled_se) == 1) { - pooled_df <- round(mean(pooled_df, na.rm = TRUE)) - } - } else { - pooled_df <- Inf - } - - # pooled statistic - pooled_statistic <- pooled_estimate / pooled_se - - # confidence intervals - alpha <- (1 + ci) / 2 - fac <- suppressWarnings(stats::qt(alpha, df = pooled_df)) - - out <- data.frame( - Coefficient = pooled_estimate, - SE = pooled_se, - CI_low = pooled_estimate - pooled_se * fac, - CI_high = pooled_estimate + pooled_se * fac, - Statistic = pooled_statistic, - df_error = pooled_df, - p = 2 * stats::pt(abs(pooled_statistic), df = pooled_df, lower.tail = FALSE), - stringsAsFactors = FALSE - ) - } - - # add component, when pooling for all components - if (identical(component, "all") && "Component" %in% colnames(i)) { - out$Component <- i$Component[1] + NULL } - out })) @@ -249,13 +264,13 @@ pool_parameters <- function(x, stringsAsFactors = FALSE ) })) + pooled_params$Effects <- "fixed" } - # reorder ------ pooled_params$Parameter <- parameter_values - columns <- c("Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic", "df_error", "p", "Component") + columns <- c("Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic", "df_error", "p", "Effects", "Component") pooled_params <- pooled_params[intersect(columns, colnames(pooled_params))] @@ -268,6 +283,11 @@ pool_parameters <- function(x, pooled_params <- merge(pooled_params, pooled_random, all = TRUE, sort = FALSE) } + # add back component column + if (!is.null(component_values)) { + pooled_params$Component <- component_values + } + # this needs to be done extra here, cannot call ".add_model_parameters_attributes()" pooled_params <- .add_pooled_params_attributes( pooled_params, diff --git a/man/pool_parameters.Rd b/man/pool_parameters.Rd index d677cd968..759b6a609 100644 --- a/man/pool_parameters.Rd +++ b/man/pool_parameters.Rd @@ -8,7 +8,7 @@ pool_parameters( x, exponentiate = FALSE, effects = "fixed", - component = "conditional", + component = "all", verbose = TRUE, ... ) @@ -64,12 +64,10 @@ small samples (\emph{Barnard and Rubin, 1999}). } \note{ Models with multiple components, (for instance, models with zero-inflation, -where predictors appear in the count and zero-inflation part) may fail in -case of identical names for coefficients in the different model components, -since the coefficient table is grouped by coefficient names for pooling. In -such cases, coefficients of count and zero-inflation model parts would be -combined. Therefore, the \code{component} argument defaults to -\code{"conditional"} to avoid this. +where predictors appear in the count and zero-inflation part, or models with +dispersion component) may fail in rare situations. In this case, compute +the pooled parameters for components seperately, using the \code{component} +argument. Some model objects do not return standard errors (e.g. objects of class \code{htest}). For these models, no pooled confidence intervals nor p-values diff --git a/tests/testthat/test-pool_parameters.R b/tests/testthat/test-pool_parameters.R index 0feb24009..fadb1534f 100644 --- a/tests/testthat/test-pool_parameters.R +++ b/tests/testthat/test-pool_parameters.R @@ -28,3 +28,139 @@ test_that("pooled parameters", { pp3 <- summary(mice::pool(m_mice)) expect_equal(pp2$df_error, pp3$df, tolerance = 1e-3) }) + +skip_on_cran() + +test_that("pooled parameters, glmmTMB, components", { + skip_if_not_installed("mice") + skip_if_not_installed("glmmTMB") + sim1 <- function(nfac = 4, nt = 10, facsd = 0.1, tsd = 0.15, mu = 0, residsd = 1) { + dat <- expand.grid(fac = factor(letters[1:nfac]), t = 1:nt) + n <- nrow(dat) + dat$REfac <- rnorm(nfac, sd = facsd)[dat$fac] + dat$REt <- rnorm(nt, sd = tsd)[dat$t] + dat$x <- rnorm(n, mean = mu, sd = residsd) + dat$REfac + dat$REt + dat + } + + set.seed(101) + d1 <- sim1(mu = 100, residsd = 10) + d2 <- sim1(mu = 200, residsd = 5) + d1$sd <- "ten" + d2$sd <- "five" + dat <- rbind(d1, d2) + + set.seed(101) + dat$REfac[sample.int(nrow(dat), 10)] <- NA + dat$x[sample.int(nrow(dat), 10)] <- NA + dat$sd[sample.int(nrow(dat), 10)] <- NA + + impdat <- mice::mice(dat, printFlag = FALSE) + models <- lapply(1:5, function(i) { + glmmTMB::glmmTMB( + x ~ sd + (1 | t), + dispformula = ~sd, + data = mice::complete(impdat, action = i) + ) + }) + + out <- pool_parameters(models, component = "conditional") + expect_named( + out, + c( + "Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic", + "df_error", "p" + ) + ) + expect_equal(out$Coefficient, c(187.280225, -87.838969), tolerance = 1e-3) + + out <- pool_parameters(models, component = "all", effects = "all") + expect_named( + out, + c( + "Parameter", "Coefficient", "Effects", "SE", "CI_low", "CI_high", + "Statistic", "df_error", "p", "Component" + ) + ) + expect_equal( + out$Coefficient, + c(187.280225, -87.838969, 3.51576, -1.032665, 0.610992, NaN), + tolerance = 1e-3 + ) + + out <- pool_parameters(models, component = "all", effects = "fixed") + expect_named( + out, + c( + "Parameter", "Coefficient", "SE", "CI_low", "CI_high", + "Statistic", "df_error", "p", "Component" + ) + ) + expect_equal( + out$Coefficient, + c(187.280225, -87.838969, 3.51576, -1.032665), + tolerance = 1e-3 + ) +}) + + +test_that("pooled parameters, glmmTMB, zero-inflated", { + skip_if_not_installed("mice") + skip_if_not_installed("glmmTMB") + data(Salamanders, package = "glmmTMB") + set.seed(123) + Salamanders$cover[sample.int(nrow(Salamanders), 50)] <- NA + Salamanders$mined[sample.int(nrow(Salamanders), 10)] <- NA + + impdat <- mice::mice(Salamanders, printFlag = FALSE) + models <- lapply(1:5, function(i) { + glmmTMB::glmmTMB( + count ~ mined + cover + (1 | site), + ziformula = ~mined, + family = poisson(), + data = mice::complete(impdat, action = i) + ) + }) + + out <- pool_parameters(models) + expect_named( + out, + c( + "Parameter", "Coefficient", "SE", "CI_low", "CI_high", "Statistic", + "df_error", "p", "Component" + ) + ) + expect_equal( + out$Coefficient, + c(0.13409, 1.198551, -0.181912, 1.253029, -1.844026), + tolerance = 1e-3 + ) + + out <- pool_parameters(models, component = "all", effects = "all") + expect_named( + out, + c( + "Parameter", "Coefficient", "Effects", "SE", "CI_low", "CI_high", + "Statistic", "df_error", "p", "Component" + ) + ) + expect_equal( + out$Coefficient, + c(0.13409, 1.198551, -0.181912, 1.253029, -1.844026, 0.158795), + tolerance = 1e-3 + ) + + out <- pool_parameters(models, component = "conditional", effects = "fixed") + expect_named( + out, + c( + "Parameter", "Coefficient", "SE", "CI_low", "CI_high", + "Statistic", "df_error", "p" + ) + ) + expect_equal( + out$Coefficient, + c(0.13409, 1.198551, -0.181912), + tolerance = 1e-3 + ) +}) From 42d407b6c2fd50ddfe4d73a5388152276792f421 Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 24 Sep 2024 15:55:58 +0200 Subject: [PATCH 2/3] typo --- R/pool_parameters.R | 2 +- man/pool_parameters.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pool_parameters.R b/R/pool_parameters.R index cfe7d1c51..8693641af 100644 --- a/R/pool_parameters.R +++ b/R/pool_parameters.R @@ -19,7 +19,7 @@ #' Models with multiple components, (for instance, models with zero-inflation, #' where predictors appear in the count and zero-inflation part, or models with #' dispersion component) may fail in rare situations. In this case, compute -#' the pooled parameters for components seperately, using the `component` +#' the pooled parameters for components separately, using the `component` #' argument. #' #' Some model objects do not return standard errors (e.g. objects of class diff --git a/man/pool_parameters.Rd b/man/pool_parameters.Rd index 759b6a609..6b02889b0 100644 --- a/man/pool_parameters.Rd +++ b/man/pool_parameters.Rd @@ -66,7 +66,7 @@ small samples (\emph{Barnard and Rubin, 1999}). Models with multiple components, (for instance, models with zero-inflation, where predictors appear in the count and zero-inflation part, or models with dispersion component) may fail in rare situations. In this case, compute -the pooled parameters for components seperately, using the \code{component} +the pooled parameters for components separately, using the \code{component} argument. Some model objects do not return standard errors (e.g. objects of class From 934c29c0018b941c9547e263b8324e3d0166172e Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 24 Sep 2024 16:04:46 +0200 Subject: [PATCH 3/3] suppress Warnings --- tests/testthat/test-pool_parameters.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-pool_parameters.R b/tests/testthat/test-pool_parameters.R index fadb1534f..a11e86db0 100644 --- a/tests/testthat/test-pool_parameters.R +++ b/tests/testthat/test-pool_parameters.R @@ -55,7 +55,7 @@ test_that("pooled parameters, glmmTMB, components", { dat$x[sample.int(nrow(dat), 10)] <- NA dat$sd[sample.int(nrow(dat), 10)] <- NA - impdat <- mice::mice(dat, printFlag = FALSE) + impdat <- suppressWarnings(mice::mice(dat, printFlag = FALSE)) models <- lapply(1:5, function(i) { glmmTMB::glmmTMB( x ~ sd + (1 | t), @@ -112,7 +112,7 @@ test_that("pooled parameters, glmmTMB, zero-inflated", { Salamanders$cover[sample.int(nrow(Salamanders), 50)] <- NA Salamanders$mined[sample.int(nrow(Salamanders), 10)] <- NA - impdat <- mice::mice(Salamanders, printFlag = FALSE) + impdat <- suppressWarnings(mice::mice(Salamanders, printFlag = FALSE)) models <- lapply(1:5, function(i) { glmmTMB::glmmTMB( count ~ mined + cover + (1 | site),