diff --git a/DESCRIPTION b/DESCRIPTION index af364f8c9..53bb65ccb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: modelbased Title: Estimation of Model-Based Predictions, Contrasts and Means -Version: 0.9.0.6 +Version: 0.9.0.8 Authors@R: c(person(given = "Dominique", family = "Makowski", @@ -92,3 +92,4 @@ Roxygen: list(markdown = TRUE) Config/Needs/check: stan-dev/cmdstanr Config/Needs/website: easystats/easystatstemplate LazyData: true +Remotes: easystats/insight diff --git a/NEWS.md b/NEWS.md index ab27f56f1..c107c0e9c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,9 @@ ## Changes +* The `print()` method is now explicitly documented and gets some new options + to customize the output for tables. + * New option `"esarey"` for the `p_adjust` argument. The `"esarey"` option is specifically for the case of Johnson-Neyman intervals, i.e. when calling `estimate_slopes()` with two numeric predictors in an interaction term. diff --git a/R/format.R b/R/format.R index 9e04bb32e..ae5ebef06 100644 --- a/R/format.R +++ b/R/format.R @@ -1,12 +1,38 @@ # Format ------------------------------------------------------------------ #' @export -format.estimate_contrasts <- function(x, format = NULL, ...) { +format.estimate_contrasts <- function(x, + format = NULL, + select = getOption("modelbased_select", NULL), + include_grid = getOption("modelbased_include_grid", FALSE), + ...) { # don't print columns of adjusted_for variables adjusted_for <- attr(x, "adjusted_for", exact = TRUE) - if (!is.null(adjusted_for) && all(adjusted_for %in% colnames(x))) { + if (!is.null(adjusted_for) && all(adjusted_for %in% colnames(x)) && !isTRUE(include_grid)) { # remove non-focal terms from data frame x[adjusted_for] <- NULL + } else if (isTRUE(include_grid)) { + # we include the data grid, so we don't need to add the same information + # to the footer + table_footer <- attributes(x)$table_footer + if (!is.null(table_footer)) { + # (Predictors controlled.*?): This is the first capturing group. + # - `Predictors controlled`: Matches the literal string "Predictors controlled". + # - `.*?`: Matches any character (.) zero or more times (*), but as few + # times as possible (?). This is important to avoid matching across + # multiple lines. This is a non-greedy quantifier. + # `(\n|$)`: This is the second capturing group. + # - \n: Matches a newline character. + # - $: Matches the end of the string. + # - |: Acts as an "OR" operator. So, this part matches either a newline + # or the end of the string. This is necessary to capture the last match + # if it's at the very end of the string and not followed by a newline. + # (powered by Gemini) + pattern <- "(Predictors controlled.*?)(\n|$)" + table_footer[1] <- gsub(pattern, "", table_footer[1]) + # add back modified footer + attr(x, "table_footer") <- table_footer + } } # arrange columns (not for contrast now) @@ -34,10 +60,15 @@ format.estimate_contrasts <- function(x, format = NULL, ...) { # remove all-NA columns x <- datawizard::remove_empty_columns(x) + # add back adjusted_for variables when we have custom column layouts + if (!is.null(select)) { + attr(x, "focal_terms") <- unique(c(attr(x, "focal_terms"), adjusted_for)) + } + if (!is.null(format) && format %in% c("md", "markdown", "html")) { - insight::format_table(x, ci_brackets = c("(", ")"), ...) + insight::format_table(x, ci_brackets = c("(", ")"), select = select, format = "html", ...) } else { - insight::format_table(x, ...) + insight::format_table(x, select = select, ...) } } diff --git a/R/print.R b/R/print.R index 276282aa5..c3cc26cdc 100644 --- a/R/print.R +++ b/R/print.R @@ -1,7 +1,71 @@ +#' Printing modelbased-objects +#' +#' `print()` method for **modelbased** objects. Can be used to tweak the output +#' of tables. +#' +#' @param x An object returned by the different `estimate_*()` functions. +#' @param include_grid Logical, if `TRUE`, the data grid is included in the +#' table output. Only applies to prediction-functions like `estimate_relation()` +#' or `estimate_link()`. +#' @param full_labels Logical, if `TRUE` (default), all labels for focal terms +#' are shown. If `FALSE`, redundant (duplicated) labels are removed from rows. +#' @param ... Arguments passed to `insight::format_table()` or +#' `insight::export_table()`. +#' +#' @inheritParams insight::format_table +#' +#' @return Invisibly returns `x`. +#' +#' @section Global Options to Customize Tables when Printing: +#' Columns and table layout can be customized using `options()`: +#' +#' - `modelbased_select`: `options(modelbased_select = )` will set a +#' default value for the `select` argument and can be used to define a custom +#' default layout for printing. +#' +#' - `modelbased_include_grid`: `options(modelbased_include_grid = TRUE)` will +#' set a default value for the `include_grid` argument and can be used to +#' include data grids in the output by default or not. +#' +#' - `modelbased_full_labels`: `options(modelbased_full_labels = FALSE)` will +#' remove redundant (duplicated) labels from rows. +#' +#' @note Use `print_html()` and `print_md()` to create tables in HTML or +#' markdown format, respectively. +#' +#' @examplesIf insight::check_if_installed("marginaleffects", quietly = TRUE) +#' model <- lm(Petal.Length ~ Species, data = iris) +#' out <- estimate_means(model, "Species") +#' +#' # default +#' print(out) +#' +#' # smaller set of columns +#' print(out, select = "minimal") +#' +#' # remove redundant labels +#' data(efc, package = "modelbased") +#' efc <- datawizard::to_factor(efc, c("c161sex", "c172code", "e16sex")) +#' levels(efc$c172code) <- c("low", "mid", "high") +#' fit <- lm(neg_c_7 ~ c161sex * c172code * e16sex , data = efc) +#' out <- estimate_means(fit, c("c161sex", "c172code", "e16sex")) +#' print(out, full_labels = FALSE, select = "{estimate} ({se})") +#' #' @export -print.estimate_contrasts <- function(x, full_labels = TRUE, ...) { +print.estimate_contrasts <- function(x, + select = getOption("modelbased_select", NULL), + include_grid = getOption("modelbased_include_grid", FALSE), + full_labels = getOption("modelbased_full_labels", TRUE), + ...) { + # copy original + out <- x + # get attributes, but remove some of them - else, matching attribute fails + attr <- attributes(x) + attr <- attr[setdiff(names(attr), c("names", "row.names"))] + # format table - out <- format(x, ...) + out <- format(out, select = select, include_grid = include_grid, ...) + attributes(out) <- utils::modifyList(attributes(out), attr) # remove redundant labels, for "by" variables out <- .remove_redundant_labels(x, out, full_labels) @@ -69,3 +133,42 @@ print.estimate_grouplevel <- print.estimate_contrasts } align } + + +.format_layout <- function(x, select) { + # possible names for estimate column + estimate_names <- unique( + attributes(x)$coef_name, + c("Coefficient", "Slope", "Predicted", "Median", "Mean", "MAP", "Difference", "Ratio") + ) + # define column name for estimates + estimate_col <- estimate_names[which.min(estimate_names == colnames(x))] + # focal terms + focal_terms <- intersect( + c("Level1", "Level2", attributes(x)$focal_terms), + colnames(x) + ) + + # do we have shortcuts for "select"? + select <- switch(select, + # basic output, only focal terms, estimate, CI and p + basic = { + out <- c(focal_terms, estimate_col, "CI_low", "CI_high") + if ("p" %in% colnames(x)) { + out <- c(out, "p") + } + out + }, + select + ) + + # if "select" refers to column names, return selected columns + if (all(select %in% colnames(x))) { + return(x[select]) + } + + # add significant stars, in case it's requested + x$stars <- insight::format_p(x$p, stars = TRUE, stars_only = TRUE) + + x +} diff --git a/R/print_html.R b/R/print_html.R index 9240b503b..9b92da6ee 100644 --- a/R/print_html.R +++ b/R/print_html.R @@ -1,6 +1,22 @@ #' @export -print_html.estimate_contrasts <- function(x, full_labels = TRUE, ...) { - formatted_table <- format(x, format = "html", ...) +print_html.estimate_contrasts <- function(x, + select = getOption("modelbased_select", NULL), + include_grid = getOption("modelbased_include_grid", FALSE), + full_labels = TRUE, + ...) { + # copy original + out <- x + # get attributes, but remove some of them - else, matching attribute fails + attr <- attributes(x) + attr <- attr[setdiff(names(attr), c("names", "row.names"))] + + # select columns to print + if (!is.null(select)) { + out <- .format_layout(out, select) + attributes(out) <- utils::modifyList(attributes(out), attr) + } + + formatted_table <- format(out, format = "html", include_grid = include_grid, ...) # remove redundant labels, for "by" variables formatted_table <- .remove_redundant_labels(x, formatted_table, full_labels) diff --git a/R/print_md.R b/R/print_md.R index 28fe47471..29d7b66af 100644 --- a/R/print_md.R +++ b/R/print_md.R @@ -1,6 +1,22 @@ #' @export -print_md.estimate_contrasts <- function(x, full_labels = TRUE, ...) { - formatted_table <- format(x, format = "markdown", ...) +print_md.estimate_contrasts <- function(x, + select = getOption("modelbased_select", NULL), + include_grid = getOption("modelbased_include_grid", FALSE), + full_labels = TRUE, + ...) { + # copy original + out <- x + # get attributes, but remove some of them - else, matching attribute fails + attr <- attributes(x) + attr <- attr[setdiff(names(attr), c("names", "row.names"))] + + # select columns to print + if (!is.null(select)) { + out <- .format_layout(out, select) + attributes(out) <- utils::modifyList(attributes(out), attr) + } + + formatted_table <- format(out, format = "markdown", include_grid = include_grid, ...) # remove redundant labels, for "by" variables formatted_table <- .remove_redundant_labels(x, formatted_table, full_labels) diff --git a/man/print.estimate_contrasts.Rd b/man/print.estimate_contrasts.Rd new file mode 100644 index 000000000..0abb3aa80 --- /dev/null +++ b/man/print.estimate_contrasts.Rd @@ -0,0 +1,120 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/print.R +\name{print.estimate_contrasts} +\alias{print.estimate_contrasts} +\title{Printing modelbased-objects} +\usage{ +\method{print}{estimate_contrasts}( + x, + select = getOption("modelbased_select", NULL), + include_grid = getOption("modelbased_include_grid", FALSE), + full_labels = getOption("modelbased_full_labels", TRUE), + ... +) +} +\arguments{ +\item{x}{An object returned by the different \verb{estimate_*()} functions.} + +\item{select}{Determines which columns are printed and the table layout. +There are two options for this argument: +\itemize{ +\item \strong{A string expression with layout pattern} + +\code{select} is a string with "tokens" enclosed in braces. These tokens will be +replaced by their associated columns, where the selected columns will be +collapsed into one column. Following tokens are replaced by the related +coefficients or statistics: \code{{estimate}}, \code{{se}}, \code{{ci}} (or \code{{ci_low}} and +\code{{ci_high}}), \code{{p}}, \code{{pd}} and \code{{stars}}. The token \code{{ci}} will be replaced +by \verb{{ci_low}, {ci_high}}. Example: \code{select = "{estimate}{stars} ({ci})"} + +It is possible to create multiple columns as well. A \code{|} separates values +into new cells/columns. Example: \code{select = "{estimate} ({ci})|{p}"}. +\item \strong{A string indicating a pre-defined layout} + +\code{select} can be one of the following string values, to create one of the +following pre-defined column layouts: +\itemize{ +\item \code{"minimal"}: Estimates, confidence intervals and numeric p-values, in two +columns. This is equivalent to \code{select = "{estimate} ({ci})|{p}"}. +\item \code{"short"}: Estimate, standard errors and numeric p-values, in two columns. +This is equivalent to \code{select = "{estimate} ({se})|{p}"}. +\item \code{"ci"}: Estimates and confidence intervals, no asterisks for p-values. +This is equivalent to \code{select = "{estimate} ({ci})"}. +\item \code{"se"}: Estimates and standard errors, no asterisks for p-values. This is +equivalent to \code{select = "{estimate} ({se})"}. +\item \code{"ci_p"}: Estimates, confidence intervals and asterisks for p-values. This +is equivalent to \code{select = "{estimate}{stars} ({ci})"}. +\item \code{"se_p"}: Estimates, standard errors and asterisks for p-values. This is +equivalent to \code{select = "{estimate}{stars} ({se})"}.. +} +} + +Using \code{select} to define columns will re-order columns and remove all columns +related to uncertainty (standard errors, confidence intervals), test statistics, +and p-values (and similar, like \code{pd} or \code{BF} for Bayesian models), because +these are assumed to be included or intentionally excluded when using \code{select}. +The new column order will be: Parameter columns first, followed by the "glue" +columns, followed by all remaining columns. If further columns should also be +placed first, add those as \code{focal_terms} attributes to \code{x}. I.e., following +columns are considers as "parameter columns" and placed first: +\code{c(easystats_columns("parameter"), attributes(x)$focal_terms)}. + +\strong{Note:} glue-like syntax is still experimental in the case of more complex models +(like mixed models) and may not return expected results.} + +\item{include_grid}{Logical, if \code{TRUE}, the data grid is included in the +table output. Only applies to prediction-functions like \code{estimate_relation()} +or \code{estimate_link()}.} + +\item{full_labels}{Logical, if \code{TRUE} (default), all labels for focal terms +are shown. If \code{FALSE}, redundant (duplicated) labels are removed from rows.} + +\item{...}{Arguments passed to \code{insight::format_table()} or +\code{insight::export_table()}.} +} +\value{ +Invisibly returns \code{x}. +} +\description{ +\code{print()} method for \strong{modelbased} objects. Can be used to tweak the output +of tables. +} +\note{ +Use \code{print_html()} and \code{print_md()} to create tables in HTML or +markdown format, respectively. +} +\section{Global Options to Customize Tables when Printing}{ + +Columns and table layout can be customized using \code{options()}: +\itemize{ +\item \code{modelbased_select}: \verb{options(modelbased_select = )} will set a +default value for the \code{select} argument and can be used to define a custom +default layout for printing. +\item \code{modelbased_include_grid}: \code{options(modelbased_include_grid = TRUE)} will +set a default value for the \code{include_grid} argument and can be used to +include data grids in the output by default or not. +\item \code{modelbased_full_labels}: \code{options(modelbased_full_labels = FALSE)} will +remove redundant (duplicated) labels from rows. +} +} + +\examples{ +\dontshow{if (insight::check_if_installed("marginaleffects", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +model <- lm(Petal.Length ~ Species, data = iris) +out <- estimate_means(model, "Species") + +# default +print(out) + +# smaller set of columns +print(out, select = "minimal") + +# remove redundant labels +data(efc, package = "modelbased") +efc <- datawizard::to_factor(efc, c("c161sex", "c172code", "e16sex")) +levels(efc$c172code) <- c("low", "mid", "high") +fit <- lm(neg_c_7 ~ c161sex * c172code * e16sex , data = efc) +out <- estimate_means(fit, c("c161sex", "c172code", "e16sex")) +print(out, full_labels = FALSE, select = "{estimate} ({se})") +\dontshow{\}) # examplesIf} +} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index a66cfd733..8cd41d8d9 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -16,6 +16,7 @@ reference: - starts_with("pool_") - title: "Miscellaenous" contents: + - print.estimate_contrasts - describe_nonlinear - get_emmeans - get_marginalmeans diff --git a/tests/testthat/_snaps/print.md b/tests/testthat/_snaps/print.md index db6beea19..e1dc60b3b 100644 --- a/tests/testthat/_snaps/print.md +++ b/tests/testthat/_snaps/print.md @@ -193,6 +193,8 @@ Code print(estimate_expectation(m, by = "spp", predict = "conditional"), zap_small = TRUE) Output + Model-based Predictions + spp | Predicted | SE | CI --------------------------------------- GP | 0.73 | 0.21 | [0.32, 1.14] @@ -202,4 +204,123 @@ EC-L | 1.42 | 0.37 | [0.69, 2.14] DES-L | 1.34 | 0.36 | [0.63, 2.04] DF | 0.78 | 0.21 | [0.37, 1.19] + + Variable predicted: count + Predictors modulated: spp + Predictors controlled: mined (yes) + Predictions are on the conditional-scale. + +# print - layouts and include data grid + + Code + print(out) + Output + Estimated Marginal Means + + Species | Mean | SE | 95% CI | t(147) + ------------------------------------------------ + setosa | 1.46 | 0.06 | [1.34, 1.58] | 24.02 + versicolor | 4.26 | 0.06 | [4.14, 4.38] | 70.00 + virginica | 5.55 | 0.06 | [5.43, 5.67] | 91.23 + + Variable predicted: Petal.Length + Predictors modulated: Species + +--- + + Code + print(out, select = "minimal") + Output + Estimated Marginal Means + + Species | Mean (CI) + ------------------------------ + setosa | 1.46 (1.34, 1.58) + versicolor | 4.26 (4.14, 4.38) + virginica | 5.55 (5.43, 5.67) + + Variable predicted: Petal.Length + Predictors modulated: Species + +--- + + Code + print(out, select = "minimal") + Output + Marginal Contrasts Analysis + + Level1 | Level2 | Difference (CI) | p + ---------------------------------------------------- + versicolor | setosa | 2.80 (2.63, 2.97) | <0.001 + virginica | setosa | 4.09 (3.92, 4.26) | <0.001 + virginica | versicolor | 1.29 (1.12, 1.46) | <0.001 + + Variable predicted: Petal.Length + Predictors contrasted: Species + p-values are uncorrected. + +--- + + Code + print(out, select = "{estimate}{stars}|{ci}") + Output + Marginal Contrasts Analysis + + Level1 | Level2 | Difference | CI + ------------------------------------------------- + versicolor | setosa | 2.80*** | 2.63, 2.97 + virginica | setosa | 4.09*** | 3.92, 4.26 + virginica | versicolor | 1.29*** | 1.12, 1.46 + + Variable predicted: Petal.Length + Predictors contrasted: Species + p-values are uncorrected. + +--- + + Code + print(estimate_relation(m, by = "qsec")) + Output + Model-based Predictions + + qsec | Predicted | SE | 95% CI + --------------------------------------- + 14.50 | 2.80 | 0.19 | [2.41, 3.18] + 15.43 | 2.91 | 0.15 | [2.62, 3.21] + 16.37 | 3.03 | 0.11 | [2.81, 3.26] + 17.30 | 3.15 | 0.09 | [2.97, 3.32] + 18.23 | 3.27 | 0.08 | [3.10, 3.44] + 19.17 | 3.38 | 0.10 | [3.17, 3.60] + 20.10 | 3.50 | 0.14 | [3.21, 3.78] + 21.03 | 3.62 | 0.18 | [3.25, 3.98] + 21.97 | 3.73 | 0.22 | [3.28, 4.19] + 22.90 | 3.85 | 0.27 | [3.30, 4.40] + + Variable predicted: wt + Predictors modulated: qsec + Predictors controlled: mpg (20) + +--- + + Code + print(estimate_relation(m, by = "qsec"), include_grid = TRUE) + Output + Model-based Predictions + + qsec | mpg | Predicted | SE | 95% CI + ----------------------------------------------- + 14.50 | 20.09 | 2.80 | 0.19 | [2.41, 3.18] + 15.43 | 20.09 | 2.91 | 0.15 | [2.62, 3.21] + 16.37 | 20.09 | 3.03 | 0.11 | [2.81, 3.26] + 17.30 | 20.09 | 3.15 | 0.09 | [2.97, 3.32] + 18.23 | 20.09 | 3.27 | 0.08 | [3.10, 3.44] + 19.17 | 20.09 | 3.38 | 0.10 | [3.17, 3.60] + 20.10 | 20.09 | 3.50 | 0.14 | [3.21, 3.78] + 21.03 | 20.09 | 3.62 | 0.18 | [3.25, 3.98] + 21.97 | 20.09 | 3.73 | 0.22 | [3.28, 4.19] + 22.90 | 20.09 | 3.85 | 0.27 | [3.30, 4.40] + + Variable predicted: wt + Predictors modulated: qsec + Predictors controlled: mpg (20) diff --git a/tests/testthat/_snaps/windows/print.md b/tests/testthat/_snaps/windows/print.md index 9f9adf58c..6fedba9f5 100644 --- a/tests/testthat/_snaps/windows/print.md +++ b/tests/testthat/_snaps/windows/print.md @@ -24,12 +24,15 @@ Code summary(deriv) + Message + There might be too few data to accurately determine intervals. Consider + setting `length = 100` (or larger) in your call to `estimate_slopes()`. Output - Average Marginal Effects + Johnson-Neymann Intervals - Start | End | x | Slope | SE | 95% CI | t(998) | p - ---------------------------------------------------------------------- - -3.38 | 3.28 | -0.05 | 12.75 | 0.25 | [12.26, 13.24] | 50.97 | < .001 + Start | End | Direction | Confidence + -------------------------------------- + -3.38 | 3.28 | positive | Significant Marginal effects estimated for x @@ -60,12 +63,15 @@ Code summary(deriv2) + Message + There might be too few data to accurately determine intervals. Consider + setting `length = 100` (or larger) in your call to `estimate_slopes()`. Output - Average Marginal Effects + Johnson-Neymann Intervals - Start | End | x | Slope | SE | 95% CI | t | p - --------------------------------------------------------------------- - -3.38 | 3.28 | -0.05 | 12.75 | 0.25 | [12.26, 13.24] | 50.97 | < .001 + Start | End | Direction | Confidence + -------------------------------------- + -3.38 | 3.28 | positive | Significant Marginal effects estimated for x Type of slope was dY/dX diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R index 828a90f2d..840fc08a0 100644 --- a/tests/testthat/test-print.R +++ b/tests/testthat/test-print.R @@ -105,3 +105,19 @@ test_that("estimate_epectation - don't print empty RE columns", { ) expect_snapshot(print(estimate_expectation(m, by = "spp", predict = "conditional"), zap_small = TRUE)) }) + + +test_that("print - layouts and include data grid", { + data(iris) + model <- lm(Petal.Length ~ Species, data = iris) + out <- estimate_means(model, "Species") + expect_snapshot(print(out)) + expect_snapshot(print(out, select = "minimal")) + out <- estimate_contrasts(model, "Species") + expect_snapshot(print(out, select = "minimal")) + expect_snapshot(print(out, select = "{estimate}{stars}|{ci}")) + + m <- lm(wt ~ qsec + mpg, dat = mtcars) + expect_snapshot(print(estimate_relation(m, by = "qsec"))) + expect_snapshot(print(estimate_relation(m, by = "qsec"), include_grid = TRUE)) +})