From e905816b085e3ff6191612ae19f5fd195b13a93e Mon Sep 17 00:00:00 2001 From: Joseph Luchman Date: Sat, 30 Apr 2022 08:45:49 -0500 Subject: [PATCH 1/3] initial commit performance to parameters --- DESCRIPTION | 5 +- NAMESPACE | 2 + R/dominance_analysis.R | 497 +++++++++++++++++++++++ _pkgdown.yml | 1 + inst/WORDLIST | 6 + man/dominance_analysis.Rd | 147 +++++++ tests/testthat/test-dominance_analysis.R | 81 ++++ 7 files changed, 737 insertions(+), 2 deletions(-) create mode 100644 R/dominance_analysis.R create mode 100644 man/dominance_analysis.Rd create mode 100644 tests/testthat/test-dominance_analysis.R diff --git a/DESCRIPTION b/DESCRIPTION index 3e5e3612f..f0594ade5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -75,8 +75,8 @@ Depends: R (>= 3.4) Imports: bayestestR (>= 0.11.5), - datawizard (>= 0.4.0.3), - insight (> 0.17.0), + datawizard (>= 0.4.0), + insight (>= 0.17.0), graphics, methods, stats, @@ -103,6 +103,7 @@ Suggests: cluster, cplm, dbscan, + domir (>= 0.2.0), drc, DRR, effectsize (>= 0.6.0), diff --git a/NAMESPACE b/NAMESPACE index 67e61310b..8e20685b0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -544,6 +544,7 @@ S3method(print,n_clusters_silhouette) S3method(print,n_factors) S3method(print,parameters_brms_meta) S3method(print,parameters_clusters) +S3method(print,parameters_da) S3method(print,parameters_efa) S3method(print,parameters_efa_summary) S3method(print,parameters_loadings) @@ -867,6 +868,7 @@ export(dof_betwithin) export(dof_kenward) export(dof_ml1) export(dof_satterthwaite) +export(dominance_analysis) export(efa_to_cfa) export(equivalence_test) export(factor_analysis) diff --git a/R/dominance_analysis.R b/R/dominance_analysis.R new file mode 100644 index 000000000..492443b57 --- /dev/null +++ b/R/dominance_analysis.R @@ -0,0 +1,497 @@ +#' @title Dominance Analysis +#' @name dominance_analysis +#' @inheritParams domir::domin +#' +#' @description Computes Dominance Analysis Statistics and Designations +#' +#' @param model A model object supported by `performance::r2()`. See 'Details'. +#' @param sets A (named) list of formula objects with no left hand +#' side/response. A named list uses the name provided each element +#' as the label for the set. +#' +#' Predictors in each formula are bound together as a set in the dominance +#' analysis and dominance statistics and designations are computed for +#' the predictors together. Predictors in `sets` must be present in the model +#' submitted to the `model` argument and cannot be in the `all` argument. +#' @param all A formula with no left hand side/response. +#' +#' Predictors in the formula are included in each subset in the dominance +#' analysis and the R2 value associated with them is subtracted from the +#' overall value. Predictors in `all` must be present in the model +#' submitted to the `model` argument and cannot be in the `sets` argument. +#' +#' @param quote_args A character vector of arguments in the model submitted to +#' `model` to `quote()` prior to submitting to the dominance analysis. This +#' is necessary for data masked arguments (e.g., `weights`) to prevent them +#' from being evaluated before being applied to the model and causing an error. +#' +#' @param ... Not used at current. +#' +#' @return Object of class `"parameters_da"`. +#' +#' An object of class `"parameters_da"` is a list of `data.frame`s composed +#' of the following elements: +#' \describe{ +#' \item{`general`}{A `data.frame` which associates dominance statistics with +#' model parameters. The variables in this `data.frame` include: +#' \describe{ +#' \item{`parameter`}{Parameter names.} +#' \item{`general_dominance`}{Vector of general dominance statistics.} +#' \item{`standardized`}{Vector of general dominance statistics normalized +#' to sum to 1.} +#' \item{`ranks`}{Vector of ranks applied to the general dominance +#' statistics.} +#' \item{`subset`}{Names of the subset to which the parameter belongs in +#' the dominance analysis. Each other `data.frame` will refer to these +#' subset names.}}} +#' \item{`conditional_dominance`}{A `data.frame` of conditional dominance +#' statistics. Each observation represents a subset and each variable +#' represents an the average increment to R2 with a specific number of +#' subsets in the model. `NULL` if `conditional` argument is `FALSE`.} +#' \item{`complete_dominance`}{A `data.frame` of complete dominance +#' designations. The subsets in the observations are compared to the +#' subsets referenced in each variable. Whether the subset +#' in each variable dominates the subset in each observation is +#' represented in the logical value. `NULL` if `complete` +#' argument is `FALSE`..} +#' } +#' +#' @details Computes two decompositions of the model's R2 and returns +#' a matrix of designations from which predictor relative importance +#' determinations can be obtained. +#' +#' Note in the output that the "constant" subset is associated with a +#' component of the model that does not directly contribute to the R2 such +#' as an intercept. The "all" subset is apportioned a component of the fit +#' statistic but is not considered a part of the dominance analysis and +#' therefore does not receive a rank, conditional dominance statistics, or +#' complete dominance designations. +#' +#' The input model is parsed using `insight::find_predictors()`, does not +#' yet support interactions, transformations, or offsets applied in the +#' R formula, and will fail with an error if any such terms are detected. +#' +#' The model submitted must accept an formula object as a `formula` +#' argument. In addition, the model object must accept the data on which +#' the model is estimated as a `data` argument. Formulas submitted +#' using object references (i.e., `lm(mtcars$mpg ~ mtcars$vs)`) and +#' functions that accept data as a non-`data` argument +#' (e.g., `survey::svyglm()` uses `design`) will fail with an error. +#' +#' Models that return `TRUE` for the `insight::model_info()` +#' function's values "is_bayesian", "is_mixed", "is_gam", +#' is_multivariate", "is_zero_inflated", +#' or "is_hurdle" are not supported at current. +#' +#' When `performance::r2()` returns multiple values, only the first is used +#' by default. +#' +#' The underlying `domir::domin()` function that implements the dominance +#' statistic and designation computations has only been tested to R version +#' 3.5 and will fail with an error if called in R versions < 3.5. +#' +#' @references +#' - Azen, R., & Budescu, D. V. (2003). The dominance analysis approach +#' for comparing predictors in multiple regression. Psychological Methods, +#' 8(2), 129-148. doi:10.1037/1082-989X.8.2.129 +#' +#' - Budescu, D. V. (1993). Dominance analysis: A new approach to the +#' problem of relative importance of predictors in multiple regression. +#' Psychological Bulletin, 114(3), 542-551. doi:10.1037/0033-2909.114.3.542 +#' +#' - Groemping, U. (2007). Estimators of relative importance in linear +#' regression based on variance decomposition. The American Statistician, +#' 61(2), 139-147. doi:10.1198/000313007X188252 +#' +#' @seealso [domir::domin()] +#' +#' @author Joseph Luchman +#' +#' @examples +#' if (getRversion() >= "3.5.0" && require("domir") && +#' require("performance")) { +#' data(mtcars) +#' model <- glm(vs ~ cyl + carb + mpg, data = mtcars, family = binomial()) +#' +#' performance::r2(model) +#' dominance_analysis(model) +#'} +#' @export +dominance_analysis <- function(model, sets = NULL, all = NULL, + conditional = TRUE, complete = TRUE, + quote_args = NULL, ...) { + + # Exit Conditions ---- + insight::check_if_installed("domir") + insight::check_if_installed("performance") + + if (!insight::is_regression_model(model)) { + stop(insight::format_message( + paste(deparse(substitute(model)), "is not a supported insight model."), + "You may be able to dominance analyze this model using the domir package." + ), call. = FALSE) + } + + if (!any(utils::.S3methods("r2", class = class(model)[[1]], envir = getNamespace("performance")) == + paste0("r2.", class(model)[[1]]))) { + stop(insight::format_message( + paste(deparse(substitute(model)), "does not have a perfomance-supported r2 method."), + "You may be able to dominance analyze this model using the domir package." + ), call. = FALSE) + } + + model_info <- insight::model_info(model) + if (any(unlist(model_info[c("is_bayesian", "is_mixed", "is_gam", "is_multivariate", "is_zero_inflated", "is_hurdle")]))) { + stop(insight::format_message( + paste0("`dominance_analysis()` does not yet support models of class ", class(model), "."), + "You may be able to dominance analyze this model using the domir package." + ), call. = FALSE) + } + + if (!is.null(insight::find_interactions(model))) { + stop("Interactions in the model formula are not allowed.", call. = FALSE) + } + + if (!all(insight::find_predictors(model)$conditional %in% attr(stats::terms(insight::find_formula(model)$conditional), "term.labels"))) { + stop(insight::format_message( + "Predictors do not match terms.", + "This usually occurs when there are in-formula predictor transformations such as log(x) or I(x+z)." + ), call. = FALSE) + } + + if (!is.null(insight::find_offset(model))) { + stop("Offsets in the model formula are not allowed.", call. = FALSE) + } + + if (getRversion() < "3.5") { + stop("R versions < 3.5 not supported.", call. = FALSE) + } + + if (!is.null(sets)) { + if (!is.list(sets)) { + stop("sets argument must be submitted as list.", call. = FALSE) + } + + if (length(sets) != length(unlist(sets))) { + stop("Nested lists are not allowed in sets.", call. = FALSE) + } + + if (!all(sapply(sets, isa, "formula"))) { + stop("Each element of list in sets must be a formula.", call. = FALSE) + } + + if (any(sapply(sets, function(x) attr(stats::terms(x), "response")==1))) { + stop("Formulas in sets argument must not have responses/left hand sides.", call. = FALSE) + } + } + + if (!is.null(all)) { + if (!isa(all, "formula")) { + stop("all argument must be submitted as a formula.", call. = FALSE) + } + + if (attr(stats::terms(all), "response") == 1) { + stop("Formula in all argument must not have a response/left hand side.", call. = FALSE) + } + } + + if (!is.null(quote_args) && !all(is.character(quote_args))) { + stop("All arguments in quote_args must be characters.", call. = FALSE) + } + + # Collect components for arguments ---- + ivs <- insight::find_predictors(model, flatten = TRUE) + + dv <- insight::find_response(model) + + reg <- insight::model_name(model) + + # Process sets ---- + if (!is.null(sets)) { + # gather predictors from each set + sets_processed <- lapply(sets, function(x) attr(stats::terms(x), "term.labels")) + + # remove predictors from `ivs` list if in sets + set_remove_loc <- unlist(lapply(sets_processed, function(x) which(ivs %in% x))) + + if (length(set_remove_loc) != length(unlist(sets_processed))) { + wrong_set_terms <- unlist(sets_processed)[which(!(unlist(sets_processed) %in% ivs))] + + stop( + insight::format_message( + "Terms", + paste(wrong_set_terms, sep = " "), + "in sets argument do not match any predictors in model."), call. = FALSE) + } + + ivs <- ivs[-set_remove_loc] + + # apply names to sets + set_names <- names(sets) + + missing_set_names <- which(set_names == "") + + if (length(missing_set_names) > 0) + set_names[missing_set_names] <- paste0("set", missing_set_names) + + if (any(set_names %in% c("all", "constant"))) { + stop( + insight::format_message( + "Names 'all' and 'constant' are reserved for subset names in the dominance_analysis function.", + "Please rename any sets currently named 'all' or 'constant.'"), call. = FALSE) + } + + if (any(set_names %in% ivs)) { + repeat_names <- set_names[which(set_names %in% ivs)] + + stop(insight::format_message( + "Set names", + paste(repeat_names, sep = " "), "are also the names of invidiual predictors.", + "Please rename these sets."), call. = FALSE) + } + + } + + else sets_processed <- NULL + + # Process all ---- + if (!is.null(all)) { + # gather predictors in all + all_processed <- attr(stats::terms(all), "term.labels") + + # remove predictors in all from `ivs` list + all_remove_loc <- which(ivs %in% all_processed) + + if (any(all_processed %in% unlist(sets_processed))) { + reused_terms <- all_processed[which(all_processed %in% unlist(sets_processed))] + + stop(insight::format_message( + "Terms", + paste(reused_terms, sep = " "), + "in all argument are also used in sets argument."), call. = FALSE) + } + + if (length(all_remove_loc) != length(unlist(all_processed))) { + wrong_all_terms <- all_processed[which(!(all_processed) %in% ivs)] + + stop(insight::format_message( + "Terms", + paste(wrong_all_terms, sep = " "), + "in all argument do not match any predictors in model."), call. = FALSE) + } + + ivs <- ivs[-all_remove_loc] # update IVs + + } + + else all_processed <- NULL + + # name collisions across subsets - exit + if (any(ivs %in% c("all", "constant"))) { + stop( + insight::format_message( + "Names 'all' and 'constant' are reserved for subset names in the dominance_analysis function.", + "Please rename any predictors currently named 'all' or 'constant.'", + "Alternatively, put the predictor in a set by itself."), call. = FALSE) + } + + # big DA warning + if (length(c(ivs, unlist(sets_processed))) > 15) warning( + cat(paste("Total of", 2^length(ivs)-1, "models to be estimated.\n", + "Process may take some time.")) , call. = FALSE) + + # Build non-formula model arguments to `domin` ---- + fml <- stats::reformulate(ivs, response = dv, intercept = insight::has_intercept(model)) + + data <- insight::get_data(model) + + args <- as.list(insight::get_call(model), collapse = "") # extract all arguments from call + + loc <- which(!(names(args) %in% c("formula", "data"))) # find formula and data arguments + + if (length(which(names(args) %in% c("formula", "data"))) != 2) { # exit if formula and data arguments missing + stop("Model submitted does not have a formula and data argument.", call. = FALSE) + } + + args <- args[loc] # remove formula and data arguments + args <- args[-1] # remove function name + + # quote arguments for domin + for (arg in quote_args) { + if (!(arg %in% names(args))) stop(arg, " in quote_args not among arguments in model.", call. = FALSE) + + else args[[arg]] <- str2lang(paste0("quote(", deparse(args[[arg]]), ")", collapse = "")) + + } + + # Internal wrapper to ensure r2 values conform to domin ---- + r2_wrap <- function(model, ...) { + list(fitstat = performance::r2(model, ...)[[1]]) + } + + # Finalize and implement DA + args2domin <- append(list(formula_overall = fml, reg = reg, fitstat = list(r2_wrap, "fitstat"), + data = data, conditional = conditional, complete = complete, + sets = sets_processed, all = all_processed), args) + + utils::capture.output(domir_res <- do.call(domir::domin, args2domin)) + + # Set up returned data.frames ---- + if (!is.null(sets)) { + + names(domir_res$General_Dominance) <- + c(names(domir_res$General_Dominance)[1:(length(domir_res$General_Dominance) - length(set_names))], + set_names) + + if (conditional) + rownames(domir_res$Conditional_Dominance) <- names(domir_res$General_Dominance) + + } + + if (complete) { + + colnames(domir_res$Complete_Dominance) <- paste0("< ", names(domir_res$General_Dominance)) + + dimnames(domir_res$Complete_Dominance) <- list( + colnames(domir_res$Complete_Dominance), + names(domir_res$General_Dominance) + ) + + domir_res$Complete_Dominance <- t(domir_res$Complete_Dominance) + + } + + da_df_res <- da_df_cat <- + data.frame(parameter = insight::find_parameters(model, flatten = TRUE)) + + da_df_cat = data.frame(da_df_cat, subset = NA_character_) + + if (!is.null(sets)) { + + for (set in 1:length(sets)) { + + set_name <- ifelse(!is.null(names(sets)[[set]]), names(sets)[[set]], + paste0("set", set)) + + da_df_cat$subset <- + replace(da_df_cat$subset, + da_df_res$parameter %in% all.vars(sets[[set]]), set_name) + + } + + } + + if (!is.null(all)) { + + da_df_cat$subset <- + replace(da_df_cat$subset, + da_df_res$parameter %in% all.vars(all), "all") + + } + + da_df_cat$subset <- + ifelse((da_df_res$parameter %in% (insight::find_predictors(model, flatten = TRUE))) & + (is.na(da_df_cat$subset)), + da_df_res$parameter, + da_df_cat$subset) + + da_df_cat$subset <- + replace(da_df_cat$subset, + is.na(da_df_cat$subset), "constant") + + da_df_res <- + datawizard::data_merge(da_df_cat, + data.frame(subset = names(domir_res$General_Dominance), + general_dominance = domir_res$General_Dominance)) + + if (!is.null(all)) { + + da_df_res$general_dominance <- + replace(da_df_res$general_dominance, da_df_res$subset == "all", domir_res$Fit_Statistic_All_Subsets) + + } + + da_df_res <- + datawizard::data_merge(da_df_res, + data.frame(subset = names(domir_res$General_Dominance), + standardized = domir_res$Standardized)) + + da_df_res <- + datawizard::data_merge(da_df_res, + data.frame(subset = names(domir_res$General_Dominance), + ranks = domir_res$Ranks)) + + da_df_res <- + datawizard::data_relocate(da_df_res, "subset", after = "ranks") + + if (conditional) { + + da_df_cdl <- + data.frame(subset = names(domir_res$General_Dominance)) + + da_df_cdl <- + datawizard::data_merge(da_df_cdl, + data.frame(subset = names(domir_res$General_Dominance), + domir_res$Conditional_Dominance)) + + da_df_cdl <- + datawizard::data_rename(da_df_cdl, + names(da_df_cdl)[2:length(da_df_cdl)], + colnames(domir_res$Conditional_Dominance)) + } + + else da_df_cdl <- NULL + + if (complete) { + + da_df_cpt <- + data.frame(subset = names(domir_res$General_Dominance)) + + da_df_cpt <- + datawizard::data_merge(da_df_cpt, + data.frame(subset = names(domir_res$General_Dominance), + domir_res$Complete_Dominance)) + + da_df_cpt <- + datawizard::data_rename(da_df_cpt, + names(da_df_cpt)[2:length(da_df_cpt)], + colnames(domir_res$Complete_Dominance)) + + } + + else da_df_cpt <- NULL + + da_list <- list(general = da_df_res, + conditional = da_df_cdl, + complete = da_df_cpt) + + # add attributes and class + attr(da_list, "model_R2") <- domir_res$Fit_Statistic_Overall + attr(da_list$general, "table_title") <- "General Dominance Statistics" + if (conditional) attr(da_list$conditional, "table_title") <- "Conditional Dominance Statistics" + if (complete) attr(da_list$complete, "table_title") <- "Complete Dominance Designations" + + class(da_list) <- c("parameters_da") + + da_list + +} + + + + +# methods ------------------------------ + + +#' @export +print.parameters_da <- function(x, digits = 3, ...) { + insight::print_color("# Dominance Analysis Results", "blue") + cat("\n\n") + + cat("Model R2 Value: ", sprintf("%.*f", digits, attr(x, "model_R2")), "\n\n") + + cat(insight::export_table(x, digits = digits, ...)) + + invisible(x) + +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 10fa74936..c963408f9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -25,6 +25,7 @@ reference: - title: "Comprehensive Model Parameters" contents: - compare_parameters + - dominance_analysis - model_parameters - model_parameters.aov - model_parameters.befa diff --git a/inst/WORDLIST b/inst/WORDLIST index 95b867c01..ee892714f 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,14 +1,17 @@ Anova +Azen BGGM BLUPs BMC BayesFM BayesFactor +bayesian Bentler Bezdek Biometrics Biometrika Blume +Budescu CFA CNG Cattell @@ -18,6 +21,7 @@ Cramer's CrossValidated D'Agostino DBSCAN +decompositions DOI DRR Davison @@ -40,6 +44,7 @@ Garrido Gelman Golino Gorsuch +Groemping Greevy Gustafson HC @@ -61,6 +66,7 @@ LMMs Lakens Laparra Lawley +Luchman MSA Maechler Malo diff --git a/man/dominance_analysis.Rd b/man/dominance_analysis.Rd new file mode 100644 index 000000000..ed7652121 --- /dev/null +++ b/man/dominance_analysis.Rd @@ -0,0 +1,147 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dominance_analysis.R +\name{dominance_analysis} +\alias{dominance_analysis} +\title{Dominance Analysis} +\usage{ +dominance_analysis( + model, + sets = NULL, + all = NULL, + conditional = TRUE, + complete = TRUE, + quote_args = NULL, + ... +) +} +\arguments{ +\item{model}{A model object supported by \code{performance::r2()}. See 'Details'.} + +\item{sets}{A (named) list of formula objects with no left hand +side/response. A named list uses the name provided each element +as the label for the set. + +Predictors in each formula are bound together as a set in the dominance +analysis and dominance statistics and designations are computed for +the predictors together. Predictors in \code{sets} must be present in the model +submitted to the \code{model} argument and cannot be in the \code{all} argument.} + +\item{all}{A formula with no left hand side/response. + +Predictors in the formula are included in each subset in the dominance +analysis and the R2 value associated with them is subtracted from the +overall value. Predictors in \code{all} must be present in the model +submitted to the \code{model} argument and cannot be in the \code{sets} argument.} + +\item{conditional}{Logical. If \code{FALSE} then conditional dominance matrix is not computed. + +If conditional dominance is not desired as an importance criterion, avoiding computing the conditional dominance matrix can save computation time.} + +\item{complete}{Logical. If \code{FALSE} then complete dominance matrix is not computed. + +If complete dominance is not desired as an importance criterion, avoiding computing complete dominance designations can save computation time.} + +\item{quote_args}{A character vector of arguments in the model submitted to +\code{model} to \code{quote()} prior to submitting to the dominance analysis. This +is necessary for data masked arguments (e.g., \code{weights}) to prevent them +from being evaluated before being applied to the model and causing an error.} + +\item{...}{Not used at current.} +} +\value{ +Object of class \code{"parameters_da"}. + +An object of class \code{"parameters_da"} is a list of \code{data.frame}s composed +of the following elements: +\describe{ +\item{\code{general}}{A \code{data.frame} which associates dominance statistics with +model parameters. The variables in this \code{data.frame} include: +\describe{ +\item{\code{parameter}}{Parameter names.} +\item{\code{general_dominance}}{Vector of general dominance statistics.} +\item{\code{standardized}}{Vector of general dominance statistics normalized +to sum to 1.} +\item{\code{ranks}}{Vector of ranks applied to the general dominance +statistics.} +\item{\code{subset}}{Names of the subset to which the parameter belongs in +the dominance analysis. Each other \code{data.frame} will refer to these +subset names.}}} +\item{\code{conditional_dominance}}{A \code{data.frame} of conditional dominance +statistics. Each observation represents a subset and each variable +represents an the average increment to R2 with a specific number of +subsets in the model. \code{NULL} if \code{conditional} argument is \code{FALSE}.} +\item{\code{complete_dominance}}{A \code{data.frame} of complete dominance +designations. The subsets in the observations are compared to the +subsets referenced in each variable. Whether the subset +in each variable dominates the subset in each observation is +represented in the logical value. \code{NULL} if \code{complete} +argument is \code{FALSE}..} +} +} +\description{ +Computes Dominance Analysis Statistics and Designations +} +\details{ +Computes two decompositions of the model's R2 and returns +a matrix of designations from which predictor relative importance +determinations can be obtained. + +Note in the output that the "constant" subset is associated with a +component of the model that does not directly contribute to the R2 such +as an intercept. The "all" subset is apportioned a component of the fit +statistic but is not considered a part of the dominance analysis and +therefore does not receive a rank, conditional dominance statistics, or +complete dominance designations. + +The input model is parsed using \code{insight::find_predictors()}, does not +yet support interactions, transformations, or offsets applied in the +R formula, and will fail with an error if any such terms are detected. + +The model submitted must accept an formula object as a \code{formula} +argument. In addition, the model object must accept the data on which +the model is estimated as a \code{data} argument. Formulas submitted +using object references (i.e., \code{lm(mtcars$mpg ~ mtcars$vs)}) and +functions that accept data as a non-\code{data} argument +(e.g., \code{survey::svyglm()} uses \code{design}) will fail with an error. + +Models that return \code{TRUE} for the \code{insight::model_info()} +function's values "is_bayesian", "is_mixed", "is_gam", +is_multivariate", "is_zero_inflated", +or "is_hurdle" are not supported at current. + +When \code{performance::r2()} returns multiple values, only the first is used +by default. + +The underlying \code{domir::domin()} function that implements the dominance +statistic and designation computations has only been tested to R version +3.5 and will fail with an error if called in R versions < 3.5. +} +\examples{ +if (getRversion() >= "3.5.0" && require("domir") && +require("performance")) { + data(mtcars) + model <- glm(vs ~ cyl + carb + mpg, data = mtcars, family = binomial()) + + performance::r2(model) + dominance_analysis(model) +} +} +\references{ +\itemize{ +\item Azen, R., & Budescu, D. V. (2003). The dominance analysis approach +for comparing predictors in multiple regression. Psychological Methods, +8(2), 129-148. doi:10.1037/1082-989X.8.2.129 +\item Budescu, D. V. (1993). Dominance analysis: A new approach to the +problem of relative importance of predictors in multiple regression. +Psychological Bulletin, 114(3), 542-551. doi:10.1037/0033-2909.114.3.542 +\item Groemping, U. (2007). Estimators of relative importance in linear +regression based on variance decomposition. The American Statistician, +61(2), 139-147. doi:10.1198/000313007X188252 +} +} +\seealso{ +\code{\link[domir:domin]{domir::domin()}} +} +\author{ +Joseph Luchman +} diff --git a/tests/testthat/test-dominance_analysis.R b/tests/testthat/test-dominance_analysis.R new file mode 100644 index 000000000..d9396ccbe --- /dev/null +++ b/tests/testthat/test-dominance_analysis.R @@ -0,0 +1,81 @@ +if (requiet("testthat") && requiet("performance") && requiet("domir") && + (R.version$major > 3 | (R.version$major == 3 && R.version$minor >= 5))) { + + data(mtcars) + + DA_test_model <- lm(mpg ~ vs + cyl + carb, data = mtcars) + + DA_performance <- dominance_analysis(DA_test_model) + + DA_domir <- domin(mpg ~ vs + cyl + carb, lm, list(performance::r2, "R2"), data = mtcars) + + test_that("dominance_analysis$general_dominance", { + + gnrl_domir <- c(NA, DA_domir$General_Dominance) + names(gnrl_domir) <- NULL + + gnrl_da <- DA_performance$general$general_dominance + + expect_equal(gnrl_domir, + gnrl_da) + }) + + test_that("dominance_analysis$conditional_dominance", { + + cdl_domir <- DA_domir$Conditional_Dominance + dimnames(cdl_domir) <-c(NULL, NULL) + + cdl_da <- as.matrix(DA_performance$conditional[,-1]) + dimnames(cdl_da) <-c(NULL, NULL) + + expect_equal(cdl_domir, + cdl_da) + }) + + test_that("dominance_analysis$complete_dominance", { + + cpt_domir <- DA_domir$Complete_Dominance + dimnames(cpt_domir) <- list(NULL, NULL) + + cpt_da <- t(DA_performance$complete[,-1]) + dimnames(cpt_da) <- list(NULL, NULL) + + expect_equal(cpt_domir, + cpt_da) + }) + + DA_performance2 <- + dominance_analysis(DA_test_model, all = ~ vs, sets = c(~carb), + complete = FALSE, conditional = FALSE) + DA_domir2 <- + domin(mpg ~ cyl, lm, list(performance::r2, "R2"), + all = "vs", sets = list("carb"), data = mtcars, + complete = FALSE, conditional = FALSE) + + test_that("dominance_analysis$general_dominance with sets/all", { + + domir_all_sub_r2 <- DA_domir2$Fit_Statistic_All_Subsets + names(domir_all_sub_r2) <-NULL + + expect_equal(domir_all_sub_r2, + with(DA_performance2$general, general_dominance[subset == "all"])) + + gnrl_domir2 <- DA_domir2$General_Dominance + names(gnrl_domir2) <- NULL + + gnrl_da2 <- aggregate(DA_performance2$general$general_dominance, + list(DA_performance2$general$subset), mean) + + gnrl_da2 <- gnrl_da2[which(gnrl_da2$Group.1 %in% c("cyl", "set1")),] + + gnrl_da2 <- gnrl_da2$x + + names(gnrl_da2) <- NULL + + expect_equal(gnrl_domir2, + gnrl_da2) + + }) + + +} From 9baa6731cda7b26ed468f6b9f38378ec60d65ff4 Mon Sep 17 00:00:00 2001 From: Joseph Luchman Date: Sun, 1 May 2022 09:57:11 -0500 Subject: [PATCH 2/3] documentation cleanup --- R/dominance_analysis.R | 24 +++++++++++++++++++----- man/dominance_analysis.Rd | 19 ++++++++++++++++--- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/R/dominance_analysis.R b/R/dominance_analysis.R index 492443b57..4630c3aa9 100644 --- a/R/dominance_analysis.R +++ b/R/dominance_analysis.R @@ -5,14 +5,17 @@ #' @description Computes Dominance Analysis Statistics and Designations #' #' @param model A model object supported by `performance::r2()`. See 'Details'. +#' #' @param sets A (named) list of formula objects with no left hand -#' side/response. A named list uses the name provided each element -#' as the label for the set. +#' side/response. If the list has names, the name provided each element +#' will be used as the label for the set. Unnamed list elements will be +#' provided a set number name based on its position among the sets as entered. #' #' Predictors in each formula are bound together as a set in the dominance #' analysis and dominance statistics and designations are computed for #' the predictors together. Predictors in `sets` must be present in the model #' submitted to the `model` argument and cannot be in the `all` argument. +#' #' @param all A formula with no left hand side/response. #' #' Predictors in the formula are included in each subset in the dominance @@ -36,14 +39,16 @@ #' model parameters. The variables in this `data.frame` include: #' \describe{ #' \item{`parameter`}{Parameter names.} -#' \item{`general_dominance`}{Vector of general dominance statistics.} +#' \item{`general_dominance`}{Vector of general dominance statistics. +#' The R2 ascribed to variables in the `all` argument are also reported +#' here though they are not general dominance statistics.} #' \item{`standardized`}{Vector of general dominance statistics normalized #' to sum to 1.} #' \item{`ranks`}{Vector of ranks applied to the general dominance #' statistics.} #' \item{`subset`}{Names of the subset to which the parameter belongs in -#' the dominance analysis. Each other `data.frame` will refer to these -#' subset names.}}} +#' the dominance analysis. Each other `data.frame` returned will refer +#' to these subset names.}}} #' \item{`conditional_dominance`}{A `data.frame` of conditional dominance #' statistics. Each observation represents a subset and each variable #' represents an the average increment to R2 with a specific number of @@ -111,10 +116,19 @@ #' if (getRversion() >= "3.5.0" && require("domir") && #' require("performance")) { #' data(mtcars) +#' +#' # Dominance Analysis with Logit Regression #' model <- glm(vs ~ cyl + carb + mpg, data = mtcars, family = binomial()) #' #' performance::r2(model) #' dominance_analysis(model) +#' +#' # Dominance Analysis with Weighted Logit Regression +#' model_wt <- glm(vs ~ cyl + carb + mpg, data = mtcars, +#' weights = wt, family = binomial()) +#' +#' dominance_analysis(model_wt, quote_args = "weights") +#' #'} #' @export dominance_analysis <- function(model, sets = NULL, all = NULL, diff --git a/man/dominance_analysis.Rd b/man/dominance_analysis.Rd index ed7652121..9960d0b52 100644 --- a/man/dominance_analysis.Rd +++ b/man/dominance_analysis.Rd @@ -18,8 +18,9 @@ dominance_analysis( \item{model}{A model object supported by \code{performance::r2()}. See 'Details'.} \item{sets}{A (named) list of formula objects with no left hand -side/response. A named list uses the name provided each element -as the label for the set. +side/response. If the list has names, the name provided each element +will be used as the label for the set. Unnamed list elements will be +provided a set number name based on its position among the sets as entered. Predictors in each formula are bound together as a set in the dominance analysis and dominance statistics and designations are computed for @@ -58,7 +59,9 @@ of the following elements: model parameters. The variables in this \code{data.frame} include: \describe{ \item{\code{parameter}}{Parameter names.} -\item{\code{general_dominance}}{Vector of general dominance statistics.} +\item{\code{general_dominance}}{Vector of general dominance statistics. +The R2 ascribed to variables in the \code{all} argument are also reported +here though they are not general dominance statistics.} \item{\code{standardized}}{Vector of general dominance statistics normalized to sum to 1.} \item{\code{ranks}}{Vector of ranks applied to the general dominance @@ -120,10 +123,20 @@ statistic and designation computations has only been tested to R version if (getRversion() >= "3.5.0" && require("domir") && require("performance")) { data(mtcars) + + # Dominance Analysis with Logit Regression model <- glm(vs ~ cyl + carb + mpg, data = mtcars, family = binomial()) performance::r2(model) dominance_analysis(model) + + # Dominance Analysis with Weighted Logit Regression + + model_wt <- glm(vs ~ cyl + carb + mpg, data = mtcars, + weights = wt, family = binomial()) + + dominance_analysis(model_wt, quote_args = "weights") + } } \references{ From 5d85481ec0d2c14be51c599c5a3bf7f2f4996d8f Mon Sep 17 00:00:00 2001 From: Joseph Luchman Date: Sat, 25 Jun 2022 10:56:47 -0500 Subject: [PATCH 3/3] stylistic updates to `dominance_analysis` --- DESCRIPTION | 7 +- R/dominance_analysis.R | 88 +++++++++++++++++------- man/dominance_analysis.Rd | 21 +++--- tests/testthat/test-dominance_analysis.R | 12 ++-- 4 files changed, 87 insertions(+), 41 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d1e92ec14..2b17a1f2e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -58,7 +58,12 @@ Authors@R: family = "Morrison", role = "ctb", email = "dmorrison01@ucla.edu", - comment = c(ORCID = "0000-0002-7195-830X", Twitter = "@demstats1"))) + comment = c(ORCID = "0000-0002-7195-830X", Twitter = "@demstats1")), + person(given = "Joseph", + family = "Luchman", + role = "ctb", + email = "jluchman@gmail.com", + comment = c(ORCID = "0000-0002-8886-9717"))) Maintainer: Daniel Lüdecke Description: Utilities for processing the parameters of various statistical models. Beyond computing p values, CIs, and other indices diff --git a/R/dominance_analysis.R b/R/dominance_analysis.R index 4630c3aa9..c467cadbc 100644 --- a/R/dominance_analysis.R +++ b/R/dominance_analysis.R @@ -35,25 +35,25 @@ #' An object of class `"parameters_da"` is a list of `data.frame`s composed #' of the following elements: #' \describe{ -#' \item{`general`}{A `data.frame` which associates dominance statistics with +#' \item{`General`}{A `data.frame` which associates dominance statistics with #' model parameters. The variables in this `data.frame` include: #' \describe{ -#' \item{`parameter`}{Parameter names.} -#' \item{`general_dominance`}{Vector of general dominance statistics. +#' \item{`Parameter`}{Parameter names.} +#' \item{`General_Dominance`}{Vector of general dominance statistics. #' The R2 ascribed to variables in the `all` argument are also reported #' here though they are not general dominance statistics.} -#' \item{`standardized`}{Vector of general dominance statistics normalized +#' \item{`Percent`}{Vector of general dominance statistics normalized #' to sum to 1.} -#' \item{`ranks`}{Vector of ranks applied to the general dominance +#' \item{`Ranks`}{Vector of ranks applied to the general dominance #' statistics.} -#' \item{`subset`}{Names of the subset to which the parameter belongs in +#' \item{`Subset`}{Names of the subset to which the parameter belongs in #' the dominance analysis. Each other `data.frame` returned will refer #' to these subset names.}}} -#' \item{`conditional_dominance`}{A `data.frame` of conditional dominance +#' \item{`Conditional`}{A `data.frame` of conditional dominance #' statistics. Each observation represents a subset and each variable #' represents an the average increment to R2 with a specific number of #' subsets in the model. `NULL` if `conditional` argument is `FALSE`.} -#' \item{`complete_dominance`}{A `data.frame` of complete dominance +#' \item{`Complete`}{A `data.frame` of complete dominance #' designations. The subsets in the observations are compared to the #' subsets referenced in each variable. Whether the subset #' in each variable dominates the subset in each observation is @@ -339,12 +339,12 @@ dominance_analysis <- function(model, sets = NULL, all = NULL, } # Internal wrapper to ensure r2 values conform to domin ---- - r2_wrap <- function(model, ...) { + .r2_wrap <- function(model, ...) { list(fitstat = performance::r2(model, ...)[[1]]) } # Finalize and implement DA - args2domin <- append(list(formula_overall = fml, reg = reg, fitstat = list(r2_wrap, "fitstat"), + args2domin <- append(list(formula_overall = fml, reg = reg, fitstat = list(.r2_wrap, "fitstat"), data = data, conditional = conditional, complete = complete, sets = sets_processed, all = all_processed), args) @@ -357,14 +357,16 @@ dominance_analysis <- function(model, sets = NULL, all = NULL, c(names(domir_res$General_Dominance)[1:(length(domir_res$General_Dominance) - length(set_names))], set_names) - if (conditional) + if (conditional) { rownames(domir_res$Conditional_Dominance) <- names(domir_res$General_Dominance) + } + } if (complete) { - colnames(domir_res$Complete_Dominance) <- paste0("< ", names(domir_res$General_Dominance)) + colnames(domir_res$Complete_Dominance) <- paste0("dmn_", names(domir_res$General_Dominance)) dimnames(domir_res$Complete_Dominance) <- list( colnames(domir_res$Complete_Dominance), @@ -441,11 +443,11 @@ dominance_analysis <- function(model, sets = NULL, all = NULL, if (conditional) { da_df_cdl <- - data.frame(subset = names(domir_res$General_Dominance)) + data.frame(Subset = names(domir_res$General_Dominance)) da_df_cdl <- datawizard::data_merge(da_df_cdl, - data.frame(subset = names(domir_res$General_Dominance), + data.frame(Subset = names(domir_res$General_Dominance), domir_res$Conditional_Dominance)) da_df_cdl <- @@ -459,11 +461,11 @@ dominance_analysis <- function(model, sets = NULL, all = NULL, if (complete) { da_df_cpt <- - data.frame(subset = names(domir_res$General_Dominance)) + data.frame(Subset = names(domir_res$General_Dominance)) da_df_cpt <- datawizard::data_merge(da_df_cpt, - data.frame(subset = names(domir_res$General_Dominance), + data.frame(Subset = names(domir_res$General_Dominance), domir_res$Complete_Dominance)) da_df_cpt <- @@ -475,15 +477,20 @@ dominance_analysis <- function(model, sets = NULL, all = NULL, else da_df_cpt <- NULL - da_list <- list(general = da_df_res, - conditional = da_df_cdl, - complete = da_df_cpt) + da_df_res <- + datawizard::data_rename(da_df_res, + replacement = c("Parameter", "General_Dominance", + "Percent", "Ranks", "Subset")) + + da_list <- list(General = da_df_res, + Conditional = da_df_cdl, + Complete = da_df_cpt) # add attributes and class attr(da_list, "model_R2") <- domir_res$Fit_Statistic_Overall - attr(da_list$general, "table_title") <- "General Dominance Statistics" - if (conditional) attr(da_list$conditional, "table_title") <- "Conditional Dominance Statistics" - if (complete) attr(da_list$complete, "table_title") <- "Complete Dominance Designations" + attr(da_list$General, "table_title") <- "General Dominance Statistics" + if (conditional) attr(da_list$Conditional, "table_title") <- "Conditional Dominance Statistics" + if (complete) attr(da_list$Complete, "table_title") <- "Complete Dominance Designations" class(da_list) <- c("parameters_da") @@ -504,7 +511,42 @@ print.parameters_da <- function(x, digits = 3, ...) { cat("Model R2 Value: ", sprintf("%.*f", digits, attr(x, "model_R2")), "\n\n") - cat(insight::export_table(x, digits = digits, ...)) + printed_x <- x + + printed_x$General <- datawizard::data_rename(x$General, + pattern = "General_Dominance", + replacement = "General Dominance") + + if (!is.null(x$Conditional)) { + + cdl_col <- ncol(x$Conditional) + + cdl_names<- paste0("IVs_", 1:(cdl_col - 1)) + + cdl_names_rep <- paste("IVs:", 1:(cdl_col - 1)) + + printed_x$Conditional <- + datawizard::data_rename(x$Conditional, + pattern = cdl_names, + replacement = cdl_names_rep) + + } + + if (!is.null(x$Complete)) { + + cpt_names <- names(x$Complete)[-1] + + cpt_names_rep <- gsub("dmn_", "< ", + cpt_names) + + printed_x$Complete <- + datawizard::data_rename(x$Complete, + pattern = cpt_names, + replacement = cpt_names_rep) + + } + + cat(insight::export_table(printed_x, digits = digits, ...)) invisible(x) diff --git a/man/dominance_analysis.Rd b/man/dominance_analysis.Rd index 9960d0b52..2d9272f16 100644 --- a/man/dominance_analysis.Rd +++ b/man/dominance_analysis.Rd @@ -55,25 +55,25 @@ Object of class \code{"parameters_da"}. An object of class \code{"parameters_da"} is a list of \code{data.frame}s composed of the following elements: \describe{ -\item{\code{general}}{A \code{data.frame} which associates dominance statistics with +\item{\code{General}}{A \code{data.frame} which associates dominance statistics with model parameters. The variables in this \code{data.frame} include: \describe{ -\item{\code{parameter}}{Parameter names.} -\item{\code{general_dominance}}{Vector of general dominance statistics. +\item{\code{Parameter}}{Parameter names.} +\item{\code{General_Dominance}}{Vector of general dominance statistics. The R2 ascribed to variables in the \code{all} argument are also reported here though they are not general dominance statistics.} -\item{\code{standardized}}{Vector of general dominance statistics normalized +\item{\code{Percent}}{Vector of general dominance statistics normalized to sum to 1.} -\item{\code{ranks}}{Vector of ranks applied to the general dominance +\item{\code{Ranks}}{Vector of ranks applied to the general dominance statistics.} -\item{\code{subset}}{Names of the subset to which the parameter belongs in -the dominance analysis. Each other \code{data.frame} will refer to these -subset names.}}} -\item{\code{conditional_dominance}}{A \code{data.frame} of conditional dominance +\item{\code{Subset}}{Names of the subset to which the parameter belongs in +the dominance analysis. Each other \code{data.frame} returned will refer +to these subset names.}}} +\item{\code{Conditional}}{A \code{data.frame} of conditional dominance statistics. Each observation represents a subset and each variable represents an the average increment to R2 with a specific number of subsets in the model. \code{NULL} if \code{conditional} argument is \code{FALSE}.} -\item{\code{complete_dominance}}{A \code{data.frame} of complete dominance +\item{\code{Complete}}{A \code{data.frame} of complete dominance designations. The subsets in the observations are compared to the subsets referenced in each variable. Whether the subset in each variable dominates the subset in each observation is @@ -131,7 +131,6 @@ require("performance")) { dominance_analysis(model) # Dominance Analysis with Weighted Logit Regression - model_wt <- glm(vs ~ cyl + carb + mpg, data = mtcars, weights = wt, family = binomial()) diff --git a/tests/testthat/test-dominance_analysis.R b/tests/testthat/test-dominance_analysis.R index d9396ccbe..46fe8230d 100644 --- a/tests/testthat/test-dominance_analysis.R +++ b/tests/testthat/test-dominance_analysis.R @@ -14,7 +14,7 @@ if (requiet("testthat") && requiet("performance") && requiet("domir") && gnrl_domir <- c(NA, DA_domir$General_Dominance) names(gnrl_domir) <- NULL - gnrl_da <- DA_performance$general$general_dominance + gnrl_da <- DA_performance$General$General_Dominance expect_equal(gnrl_domir, gnrl_da) @@ -25,7 +25,7 @@ if (requiet("testthat") && requiet("performance") && requiet("domir") && cdl_domir <- DA_domir$Conditional_Dominance dimnames(cdl_domir) <-c(NULL, NULL) - cdl_da <- as.matrix(DA_performance$conditional[,-1]) + cdl_da <- as.matrix(DA_performance$Conditional[,-1]) dimnames(cdl_da) <-c(NULL, NULL) expect_equal(cdl_domir, @@ -37,7 +37,7 @@ if (requiet("testthat") && requiet("performance") && requiet("domir") && cpt_domir <- DA_domir$Complete_Dominance dimnames(cpt_domir) <- list(NULL, NULL) - cpt_da <- t(DA_performance$complete[,-1]) + cpt_da <- t(DA_performance$Complete[,-1]) dimnames(cpt_da) <- list(NULL, NULL) expect_equal(cpt_domir, @@ -58,13 +58,13 @@ if (requiet("testthat") && requiet("performance") && requiet("domir") && names(domir_all_sub_r2) <-NULL expect_equal(domir_all_sub_r2, - with(DA_performance2$general, general_dominance[subset == "all"])) + with(DA_performance2$General, General_Dominance[Subset == "all"])) gnrl_domir2 <- DA_domir2$General_Dominance names(gnrl_domir2) <- NULL - gnrl_da2 <- aggregate(DA_performance2$general$general_dominance, - list(DA_performance2$general$subset), mean) + gnrl_da2 <- aggregate(DA_performance2$General$General_Dominance, + list(DA_performance2$General$Subset), mean) gnrl_da2 <- gnrl_da2[which(gnrl_da2$Group.1 %in% c("cyl", "set1")),]