diff --git a/NAMESPACE b/NAMESPACE index 21007d8b6..fad90c328 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -65,11 +65,14 @@ export(create_coalition_table) export(explain) export(explain_forecast) export(finalize_explanation) -export(get_adaptive_arguments_default) +export(finalize_explanation_forecast) export(get_cov_mat) export(get_data_specs) +export(get_extra_est_args_default) +export(get_iterative_args_default) export(get_model_specs) export(get_mu_vec) +export(get_output_args_default) export(get_supported_approaches) export(hat_matrix_cpp) export(mahalanobis_distance_cpp) diff --git a/NEWS.md b/NEWS.md index 20c71de62..f84c87761 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,7 @@ # shapr 1.0.0 * (Just some notes so far) -* Adaptive estimatio/convergence detection +* iterative estimatio/convergence detection * Verbosity * Complete restructuring motivated by introducing the Python wrapper. The restructuring splits the explanation tasks into smaller pieces, which was necessary to allow the Python wrapper to move back and forth between R and Python. * As part of the restructuring, we also did a number of design changes, resulting in a series of breaking changes described below. diff --git a/R/approach_vaeac.R b/R/approach_vaeac.R index 6b2a6da89..da3f17555 100644 --- a/R/approach_vaeac.R +++ b/R/approach_vaeac.R @@ -1664,7 +1664,7 @@ vaeac_check_parameters <- function(x_train, #' each batch when generating the Monte Carlo samples. If `NULL`, then the function generates the Monte Carlo samples #' for the provided coalitions and all explicands sent to [shapr::explain()] at the time. #' The number of coalitions are determined by the `n_batches` used by [shapr::explain()]. We recommend to tweak -#' `adaptive_arguments$max_batch_size` and `adaptive_arguments$min_n_batches` +#' `extra_computation_args$max_batch_size` and `extra_computation_args$min_n_batches` #' rather than `vaeac.batch_size_sampling`. Larger batch sizes are often much faster provided sufficient memory. #' @param vaeac.running_avg_n_values Positive integer (default is `5`). The number of previous IWAE values to include #' when we compute the running means of the IWAE criterion. diff --git a/R/check_convergence.R b/R/check_convergence.R index 4b0710666..b260d9a77 100644 --- a/R/check_convergence.R +++ b/R/check_convergence.R @@ -7,9 +7,9 @@ check_convergence <- function(internal) { iter <- length(internal$iter_list) - convergence_tolerance <- internal$parameters$adaptive_arguments$convergence_tolerance - max_iter <- internal$parameters$adaptive_arguments$max_iter - max_n_coalitions <- internal$parameters$adaptive_arguments$max_n_coalitions + convergence_tol <- internal$parameters$iterative_args$convergence_tol + max_iter <- internal$parameters$iterative_args$max_iter + max_n_coalitions <- internal$parameters$iterative_args$max_n_coalitions paired_shap_sampling <- internal$parameters$paired_shap_sampling n_shapley_values <- internal$parameters$n_shapley_values @@ -32,11 +32,11 @@ check_convergence <- function(internal) { converged_sd <- FALSE } else { converged_exact <- FALSE - if (!is.null(convergence_tolerance)) { + if (!is.null(convergence_tol)) { dt_shapley_est0[, maxval := max(.SD, na.rm = TRUE), .SDcols = -c(1, 2), by = .I] dt_shapley_est0[, minval := min(.SD, na.rm = TRUE), .SDcols = -c(1, 2), by = .I] dt_shapley_est0[, max_sd0 := max_sd0] - dt_shapley_est0[, req_samples := (max_sd0 / ((maxval - minval) * convergence_tolerance))^2] + dt_shapley_est0[, req_samples := (max_sd0 / ((maxval - minval) * convergence_tol))^2] dt_shapley_est0[, conv_measure := max_sd0 / ((maxval - minval) * sqrt(n_sampled_coalitions))] dt_shapley_est0[, req_samples := min(req_samples, 2^n_shapley_values - 2)] diff --git a/R/cli.R b/R/cli.R index 251994a65..18af9f4d2 100644 --- a/R/cli.R +++ b/R/cli.R @@ -3,17 +3,17 @@ cli_startup <- function(internal, model, verbose) { is_groupwise <- internal$parameters$is_groupwise approach <- internal$parameters$approach - adaptive <- internal$parameters$adaptive + iterative <- internal$parameters$iterative n_shapley_values <- internal$parameters$n_shapley_values n_explain <- internal$parameters$n_explain - saving_path <- internal$parameters$adaptive_arguments$saving_path + saving_path <- internal$parameters$output_args$saving_path causal_ordering_names_string <- internal$parameters$causal_ordering_names_string max_n_coalitions_causal <- internal$parameters$max_n_coalitions_causal confounding_string <- internal$parameters$confounding_string feat_group_txt <- ifelse(is_groupwise, "group-wise", "feature-wise") - adaptive_txt <- ifelse(adaptive, "adaptive", "non-adaptive") + iterative_txt <- ifelse(iterative, "iterative", "non-iterative") testing <- internal$parameters$testing asymmetric <- internal$parameters$asymmetric @@ -22,7 +22,7 @@ cli_startup <- function(internal, model, verbose) { line_vec <- "Model class: {.cls {class(model)}}" line_vec <- c(line_vec, "Approach: {.emph {approach}}") - line_vec <- c(line_vec, "Adaptive estimation: {.emph {adaptive}}") + line_vec <- c(line_vec, "Iterative estimation: {.emph {iterative}}") line_vec <- c(line_vec, "Number of {.emph {feat_group_txt}} Shapley values: {n_shapley_values}") line_vec <- c(line_vec, "Number of observations to explain: {n_explain}") if (isTRUE(asymmetric)) { @@ -54,8 +54,8 @@ cli_startup <- function(internal, model, verbose) { } if ("basic" %in% verbose) { - if (isTRUE(adaptive)) { - msg <- "Adaptive computation started" + if (isTRUE(iterative)) { + msg <- "iterative computation started" } else { msg <- "Main computation started" } @@ -65,10 +65,10 @@ cli_startup <- function(internal, model, verbose) { cli_iter <- function(verbose, internal, iter) { - adaptive <- internal$parameters$adaptive + iterative <- internal$parameters$iterative asymmetric <- internal$parameters$asymmetric - if (!is.null(verbose) && isTRUE(adaptive)) { + if (!is.null(verbose) && isTRUE(iterative)) { cli::cli_h1("Iteration {iter}") } @@ -77,7 +77,7 @@ cli_iter <- function(verbose, internal, iter) { tot_coal <- internal$iter_list[[iter]]$n_coalitions all_coal <- ifelse(asymmetric, internal$parameters$max_n_coalitions, 2^internal$parameters$n_shapley_values) - extra_msg <- ifelse(adaptive, ", {new_coal} new", "") + extra_msg <- ifelse(iterative, ", {new_coal} new", "") msg <- paste0("Using {tot_coal} of {all_coal} coalitions", extra_msg, ". ") diff --git a/R/compute_estimates.R b/R/compute_estimates.R index 46bbb5833..34c14c826 100644 --- a/R/compute_estimates.R +++ b/R/compute_estimates.R @@ -16,7 +16,7 @@ compute_estimates <- function(internal, vS_list) { iter <- length(internal$iter_list) compute_sd <- internal$iter_list[[iter]]$compute_sd - n_boot_samps <- internal$parameters$adaptive_arguments$n_boot_samps + n_boot_samps <- internal$parameters$extra_computation_args$n_boot_samps processed_vS_list <- postprocess_vS_list( vS_list = vS_list, @@ -75,7 +75,7 @@ compute_estimates <- function(internal, vS_list) { #' @keywords internal postprocess_vS_list <- function(vS_list, internal) { - keep_samp_for_vS <- internal$parameters$keep_samp_for_vS + keep_samp_for_vS <- internal$parameters$output_args$keep_samp_for_vS prediction_zero <- internal$parameters$prediction_zero n_explain <- internal$parameters$n_explain @@ -185,7 +185,7 @@ bootstrap_shapley <- function(internal, dt_vS, n_boot_samps = 100, seed = 123) { n_features <- internal$parameters$n_features shap_names <- internal$parameters$shap_names paired_shap_sampling <- internal$parameters$paired_shap_sampling - shapley_reweight <- internal$parameters$shapley_reweighting + shapley_reweight <- internal$parameters$kernelSHAP_reweighting boot_sd_array <- array(NA, dim = c(n_explain, n_features + 1, n_boot_samps)) @@ -242,7 +242,7 @@ bootstrap_shapley <- function(internal, dt_vS, n_boot_samps = 100, seed = 123) { X_boot <- rbind(X_keep, X_boot0) data.table::setorder(X_boot, id_coalition) - shapley_reweighting(X_boot, reweight = shapley_reweight) # reweights the shapley weights by reference + kernelSHAP_reweighting(X_boot, reweight = shapley_reweight) # reweights the shapley weights by reference W_boot <- shapr::weight_matrix( X = X_boot, @@ -282,7 +282,7 @@ bootstrap_shapley <- function(internal, dt_vS, n_boot_samps = 100, seed = 123) { shap_names <- internal$parameters$horizon_features[[i]] } dt_cols <- c(1, seq_len(n_explain) + (i - 1) * n_explain + 1) - dt_vS_this <- dt_vS[, ..dt_cols] + dt_vS_this <- dt_vS[, dt_cols, with = FALSE] result[[i]] <- bootstrap_shapley_inner(X, n_shapley_values, shap_names, internal, dt_vS_this, n_boot_samps, seed) } result <- rbindlist(result, fill = TRUE) @@ -303,7 +303,7 @@ bootstrap_shapley_inner <- function(X, n_shapley_values, shap_names, internal, d n_explain <- internal$parameters$n_explain paired_shap_sampling <- internal$parameters$paired_shap_sampling - shapley_reweight <- internal$parameters$shapley_reweighting + shapley_reweight <- internal$parameters$kernelSHAP_reweighting X_org <- copy(X) @@ -377,7 +377,7 @@ bootstrap_shapley_inner <- function(X, n_shapley_values, shap_names, internal, d for (i in seq_len(n_boot_samps)) { this_X <- X_boot[boot_id == i] # This is highly inefficient, but the best way to deal with the reweighting for now - shapley_reweighting(this_X, reweight = shapley_reweight) + kernelSHAP_reweighting(this_X, reweight = shapley_reweight) W_boot <- weight_matrix( X = this_X, diff --git a/R/compute_vS.R b/R/compute_vS.R index b2e89fce4..cb0e4d171 100644 --- a/R/compute_vS.R +++ b/R/compute_vS.R @@ -115,7 +115,8 @@ batch_compute_vS <- function(S, internal, model, predict_model, p = NULL) { if (regression) { dt_vS <- batch_prepare_vS_regression(S = S, internal = internal) } else { - # Here dt_vS is either only dt_vS or a list containing dt_vS and dt if internal$parameters$keep_samp_for_vS = TRUE + # Here dt_vS is either only dt_vS or a list containing dt_vS and dt if + # internal$parameters$output_args$keep_samp_for_vS = TRUE dt_vS <- batch_prepare_vS_MC(S = S, internal = internal, model = model, predict_model = predict_model) } @@ -168,7 +169,7 @@ batch_prepare_vS_MC <- function(S, internal, model, predict_model) { explain_lags <- internal$parameters$explain_lags y <- internal$data$y xreg <- internal$data$xreg - keep_samp_for_vS <- internal$parameters$keep_samp_for_vS + keep_samp_for_vS <- internal$parameters$output_args$keep_samp_for_vS causal_sampling <- internal$parameters$causal_sampling # Make it optional to store and return the dt_list diff --git a/R/documentation.R b/R/documentation.R index 734fe20cd..51e0eb7d2 100644 --- a/R/documentation.R +++ b/R/documentation.R @@ -3,7 +3,7 @@ #' @param internal List. #' Holds all parameters, data, functions and computed objects used within [explain()] #' The list contains one or more of the elements `parameters`, `data`, `objects`, `iter_list`, `timing_list`, -#' `main_timing_list`, `output`, `iter_timing_list` and `iter_results`. +#' `main_timing_list`, `output`, and `iter_timing_list`. #' #' @param model Objects. #' The model object that ought to be explained. diff --git a/R/explain.R b/R/explain.R index 165793f86..c6ad43706 100644 --- a/R/explain.R +++ b/R/explain.R @@ -28,9 +28,9 @@ #' such as the mean of the predictions in the training data are also reasonable. #' #' @param max_n_coalitions Integer. -#' The upper limit on the number of unique feature/group coalitions to use in the adaptive procedure -#' (if `adaptive = TRUE`). -#' If `adaptive = FALSE` it represents the number of feature/group coalitions to use directly. +#' The upper limit on the number of unique feature/group coalitions to use in the iterative procedure +#' (if `iterative = TRUE`). +#' If `iterative = FALSE` it represents the number of feature/group coalitions to use directly. #' The quantity refers to the number of unique feature coalitions if `group = NULL`, #' and group coalitions if `group != NULL`. #' `max_n_coalitions = NULL` corresponds to `max_n_coalitions=2^n_features`. @@ -53,10 +53,6 @@ #' Specifies the seed before any randomness based code is being run. #' If `NULL` no seed is set in the calling environment. #' -#' @param keep_samp_for_vS Logical. -#' Indicates whether the samples used in the Monte Carlo estimation of v_S should be returned (in `internal$output`). -#' Not used for `approach="regression_separate"` or `approach="regression_surrogate"`. -#' #' @param predict_model Function. #' The prediction function used when `model` is not natively supported. #' (Run [get_supported_models()] for a list of natively supported models.) @@ -79,11 +75,6 @@ #' disabled for unsupported model classes. #' Can also be used to override the default function for natively supported model classes. #' -#' @param MSEv_uniform_comb_weights Logical. -#' If `TRUE` (default), then the function weights the coalitions uniformly when computing the MSEv criterion. -#' If `FALSE`, then the function use the Shapley kernel weights to weight the coalitions when computing the MSEv -#' criterion. -#' Note that the Shapley kernel weights are replaced by the sampling frequency when not all coalitions are considered. #' #' @param verbose String vector or NULL. #' Specifies the verbosity (printout detail level) through one or more of strings `"basic"`, `"progress"`, @@ -91,8 +82,8 @@ #' `"basic"` (default) displays basic information about the computation which is being performed. #' `"progress` displays information about where in the calculation process the function currently is. #' #' `"convergence"` displays information on how close to convergence the Shapley value estimates are -#' (only when `adaptive = TRUE`) . -#' `"shapley"` displays intermediate Shapley value estimates and standard deviations (only when `adaptive = TRUE`) +#' (only when `iterative = TRUE`) . +#' `"shapley"` displays intermediate Shapley value estimates and standard deviations (only when `iterative = TRUE`) #' + the final estimates. #' `"vS_details"` displays information about the v_S estimates. #' This is most relevant for `approach %in% c("regression_separate", "regression_surrogate", "vaeac"`). @@ -105,9 +96,9 @@ #' That is, if there are 5 features and e.g. coalitions (1,3,5) are sampled, then also coalition (2,4) is used for #' computing the Shapley values. This is done to reduce the variance of the Shapley value estimates. #' -#' @param adaptive Logical or NULL +#' @param iterative Logical or NULL #' If `NULL` (default), the argument is set to `TRUE` if there are more than 5 features/groups, and `FALSE` otherwise. -#' If eventually `TRUE`, the Shapley values are estimated adaptively in an iterative manner. +#' If eventually `TRUE`, the Shapley values are estimated iteratively in an iterative manner. #' This provides sufficiently accurate Shapley value estimates faster. #' First an initial number of coalitions is sampled, then bootsrapping is used to estimate the variance of the Shapley #' values. @@ -115,13 +106,18 @@ #' If the variances are too high, we estimate the number of required samples to reach convergence, and thereby add more #' coalitions. #' The process is repeated until the variances are below the threshold. -#' Specifics related to the adaptive process and convergence criterion are set through `adaptive_arguments`. -#' -#' @param adaptive_arguments Named list. -#' Specifices the arguments for the adaptive procedure. -#' See [shapr::get_adaptive_arguments_default()] for description of the arguments and their default values. -#' -#' @param shapley_reweighting String. +#' Specifics related to the iterative process and convergence criterion are set through `iterative_args`. +#' +#' @param iterative_args Named list. +#' Specifices the arguments for the iterative procedure. +#' See [shapr::get_iterative_args_default()] for description of the arguments and their default values. +#' @param output_args Named list. +#' Specifices certain arguments related to the output of the function. +#' See [shapr::get_output_args_default()] for description of the arguments and their default values. +#' @param extra_computation_args Named list. +#' Specifices extra arguments related to the computation of the Shapley values. +#' See [shapr::get_extra_est_args_default()] for description of the arguments and their default values. +#' @param kernelSHAP_reweighting String. #' How to reweight the sampling frequency weights in the kernelSHAP solution after sampling, with the aim of reducing #' the randomness and thereby the variance of the Shapley value estimates. #' One of `'none'`, `'on_N'`, `'on_all'`, `'on_all_cond'` (default). @@ -138,7 +134,7 @@ #' If an object of class `shapr` is provided or string with a path to where intermediate results are strored, #' then the function will use the previous object to continue the computation. #' This is useful if the computation is interrupted or you want higher accuracy than already obtained, and therefore -#' want to continue the adaptive estimation. See the vignette for examples. +#' want to continue the iterative estimation. See the vignette for examples. #' #' @param asymmetric Logical. #' Not applicable for (regular) non-causal or asymmetric explanations. @@ -204,22 +200,22 @@ #' #' The package allows for parallelized computation with progress updates through the tightly connected #' [future::future] and [progressr::progressr] packages. See the examples below. -#' For adaptive estimation (`adaptive=TRUE`), intermediate results may also be printed to the console +#' For iterative estimation (`iterative=TRUE`), intermediate results may also be printed to the console #' (according to the `verbose` argument). #' Moreover, the intermediate results are written to disk. -#' This combined with adaptive estimation with (optional) intermediate results printed to the console (and temporary +#' This combined with iterative estimation with (optional) intermediate results printed to the console (and temporary #' written to disk, and batch computing of the v(S) values, enables fast and accurate estimation of the Shapley values #' in a memory friendly manner. #' #' @return Object of class `c("shapr", "list")`. Contains the following items: #' \describe{ -#' \item{shapley_values}{data.table with the estimated Shapley values with explained observation in the rows and +#' \item{shapley_values_est}{data.table with the estimated Shapley values with explained observation in the rows and #' features along the columns. #' The column `none` is the prediction not devoted to any of the features (given by the argument `prediction_zero`)} #' \item{shapley_values_sd}{data.table with the standard deviation of the Shapley values reflecting the uncertainty. #' Note that this only reflects the coalition sampling part of the kernelSHAP procedure, and is therefore by #' definition 0 when all coalitions is used. -#' Only present when `adaptive = TRUE` and `adaptive_arguments$compute_sd=TRUE`.} +#' Only present when `extra_computation_args$compute_sd=TRUE`.} #' \item{internal}{List with the different parameters, data, functions and other output used internally.} #' \item{pred_explain}{Numeric vector with the predictions for the explained observations} #' \item{MSEv}{List with the values of the MSEv evaluation criterion for the approach. See the @@ -229,8 +225,8 @@ #' `init_time` and `end_time` gives the time stamps for the start and end of the computation. #' `total_time_secs` gives the total time in seconds for the complete execution of `explain()`. #' `main_timing_secs` gives the time in seconds for the main computations. -#' `iter_timing_secs` gives for each iteration of the adaptive estimation, the time spent on the different parts -#' adaptive estimation routine.} +#' `iter_timing_secs` gives for each iteration of the iterative estimation, the time spent on the different parts +#' iterative estimation routine.} #' } #' #' @examples @@ -319,7 +315,7 @@ #' ) #' #' # Print the Shapley values -#' print(explain1$shapley_values) +#' print(explain1$shapley_values_est) #' #' # Plot the results #' if (requireNamespace("ggplot2", quietly = TRUE)) { @@ -339,7 +335,7 @@ #' prediction_zero = p, #' n_MC_samples = 1e2 #' ) -#' print(explain_groups$shapley_values) +#' print(explain_groups$shapley_values_est) #' #' # Separate and surrogate regression approaches with linear regression models. #' # More complex regression models can be used, and we can use CV to @@ -365,19 +361,19 @@ #' regression.model = parsnip::linear_reg() #' ) #' -#' ## Adaptive estimation +#' ## iterative estimation #' # For illustration purposes only. By default not used for such small dimensions as here #' #' # Gaussian approach -#' explain_adaptive <- explain( +#' explain_iterative <- explain( #' model = model, #' x_explain = x_explain, #' x_train = x_train, #' approach = "gaussian", #' prediction_zero = p, #' n_MC_samples = 1e2, -#' adaptive = TRUE, -#' adaptive_arguments = list(initial_n_coalitions = 10) +#' iterative = TRUE, +#' iterative_args = list(initial_n_coalitions = 10) #' ) #' #' @export @@ -399,24 +395,24 @@ explain <- function(model, x_explain, x_train, approach, - paired_shap_sampling = TRUE, prediction_zero, + iterative = NULL, max_n_coalitions = NULL, - adaptive = NULL, group = NULL, + paired_shap_sampling = TRUE, n_MC_samples = 1e3, + kernelSHAP_reweighting = "on_all_cond", seed = 1, - keep_samp_for_vS = FALSE, + verbose = "basic", predict_model = NULL, get_model_specs = NULL, - MSEv_uniform_comb_weights = TRUE, - verbose = "basic", - adaptive_arguments = list(), - shapley_reweighting = "on_all_cond", prev_shapr_object = NULL, asymmetric = FALSE, causal_ordering = NULL, confounding = NULL, + extra_computation_args = list(), + iterative_args = list(), + output_args = list(), ...) { # ... is further arguments passed to specific approaches @@ -442,18 +438,18 @@ explain <- function(model, group = group, n_MC_samples = n_MC_samples, seed = seed, - keep_samp_for_vS = keep_samp_for_vS, feature_specs = feature_specs, - MSEv_uniform_comb_weights = MSEv_uniform_comb_weights, verbose = verbose, - adaptive = adaptive, - adaptive_arguments = adaptive_arguments, - shapley_reweighting = shapley_reweighting, + iterative = iterative, + iterative_args = iterative_args, + kernelSHAP_reweighting = kernelSHAP_reweighting, init_time = init_time, prev_shapr_object = prev_shapr_object, asymmetric = asymmetric, causal_ordering = causal_ordering, confounding = confounding, + output_args = output_args, + extra_computation_args = extra_computation_args, ... ) @@ -531,7 +527,7 @@ explain <- function(model, internal$main_timing_list$main_computation <- Sys.time() - # Rerun after convergence to get the same output format as for the non-adaptive approach + # Rerun after convergence to get the same output format as for the non-iterative approach output <- finalize_explanation(internal = internal) internal$main_timing_list$finalize_explanation <- Sys.time() @@ -583,7 +579,8 @@ testing_cleanup <- function(output) { } # Delete the saving_path - output$internal$parameters$adaptive_arguments$saving_path <- NULL + output$internal$parameters$output_args$saving_path <- NULL + output$saving_path <- NULL return(output) } diff --git a/R/explain_forecast.R b/R/explain_forecast.R index 55ab1d45c..f3f8ea5dc 100644 --- a/R/explain_forecast.R +++ b/R/explain_forecast.R @@ -95,14 +95,13 @@ explain_forecast <- function(model, approach, prediction_zero, max_n_coalitions = NULL, - adaptive = NULL, - adaptive_arguments = list(), - shapley_reweighting = "on_all_cond", + iterative = NULL, + iterative_args = list(), + kernelSHAP_reweighting = "on_all_cond", group_lags = TRUE, group = NULL, n_MC_samples = 1e3, seed = 1, - keep_samp_for_vS = FALSE, predict_model = NULL, get_model_specs = NULL, verbose = "basic", @@ -131,13 +130,12 @@ explain_forecast <- function(model, max_n_coalitions = max_n_coalitions, n_MC_samples = n_MC_samples, seed = seed, - keep_samp_for_vS = keep_samp_for_vS, feature_specs = feature_specs, type = "forecast", horizon = horizon, - adaptive = adaptive, - adaptive_arguments = adaptive_arguments, - shapley_reweighting = shapley_reweighting, + iterative = iterative, + iterative_args = iterative_args, + kernelSHAP_reweighting = kernelSHAP_reweighting, init_time = init_time, y = y, xreg = xreg, diff --git a/R/finalize_explanation.R b/R/finalize_explanation.R index 579a39504..b820c4297 100644 --- a/R/finalize_explanation.R +++ b/R/finalize_explanation.R @@ -4,7 +4,7 @@ #' #' @export finalize_explanation <- function(internal) { - MSEv_uniform_comb_weights <- internal$parameters$MSEv_uniform_comb_weights + MSEv_uniform_comb_weights <- internal$parameters$output_args$MSEv_uniform_comb_weights output_size <- internal$parameters$output_size dt_vS <- internal$output$dt_vS @@ -45,14 +45,16 @@ finalize_explanation <- function(internal) { } # Extract iterative results in a simplified format - internal$iter_results <- get_iter_results(internal$iter_list) + iterative_results <- get_iter_results(internal$iter_list) output <- list( - shapley_values = dt_shapley_est, + shapley_values_est = dt_shapley_est, shapley_values_sd = dt_shapley_sd, - internal = internal, pred_explain = p, - MSEv = MSEv + MSEv = MSEv, + iterative_results = iterative_results, + saving_path = internal$parameters$output_args$saving_path, + internal = internal ) attr(output, "class") <- c("shapr", "list") @@ -68,7 +70,7 @@ get_iter_results <- function(iter_list) { } iter_list_to_dt <- function(iter_list, what = c( - "exact", "compute_sd", "reduction_factor", "n_coalitions", "n_batches", + "exact", "compute_sd", "n_coal_next_iter_factor", "n_coalitions", "n_batches", "converged", "converged_exact", "converged_sd", "converged_max_iter", "est_required_coalitions", "est_remaining_coalitions", "overall_conv_measure" )) { @@ -220,3 +222,54 @@ compute_MSEv_eval_crit <- function(internal, MSEv_coalition = MSEv_coalition )) } + + +#' Computes the Shapley values given `v(S)` +#' +#' @inherit explain +#' @inheritParams default_doc +#' @param vS_list List +#' Output from [compute_vS()] +#' +#' @export +finalize_explanation_forecast <- function(vS_list, internal) { # Temporary used for forecast only (the old function) + MSEv_uniform_comb_weights <- internal$parameters$output_args$MSEv_uniform_comb_weights + + processed_vS_list <- postprocess_vS_list( + vS_list = vS_list, + internal = internal + ) + + # Extract the predictions we are explaining + p <- get_p(processed_vS_list$dt_vS, internal) + + # Compute the Shapley values + dt_shapley <- compute_shapley_new(internal, processed_vS_list$dt_vS) + + # Clearing out the timing lists as they are added to the output separately + internal$main_timing_list <- internal$iter_timing_list <- internal$timing_list <- NULL + + # Clearing out the tmp list with model and predict_model (only added for AICc-types of empirical approach) + internal$tmp <- NULL + + internal$output <- processed_vS_list + + output <- list( + shapley_values_est = dt_shapley, + internal = internal, + pred_explain = p + ) + attr(output, "class") <- c("shapr", "list") + + # Compute the MSEv evaluation criterion if the output of the predictive model is a scalar. + # TODO: check if it makes sense for output_size > 1. + if (internal$parameters$output_size == 1) { + output$MSEv <- compute_MSEv_eval_crit( + internal = internal, + dt_vS = processed_vS_list$dt_vS, + MSEv_uniform_comb_weights = MSEv_uniform_comb_weights + ) + } + + return(output) +} diff --git a/R/plot.R b/R/plot.R index c7f8fa363..13eb328a5 100644 --- a/R/plot.R +++ b/R/plot.R @@ -186,7 +186,7 @@ plot.shapr <- function(x, } # Remove the explain_id column - x$shapley_values <- x$shapley_values[, -"explain_id"] + x$shapley_values_est <- x$shapley_values_est[, -"explain_id"] if (is.null(index_x_explain)) index_x_explain <- seq(x$internal$parameters$n_explain) if (is.null(top_k_features)) top_k_features <- x$internal$parameters$n_features + 1 @@ -229,7 +229,7 @@ plot.shapr <- function(x, # melting Kshap shap_names <- x$internal$parameters$shap_names - dt_shap <- round(data.table::copy(x$shapley_values), digits = digits) + dt_shap <- round(data.table::copy(x$shapley_values_est), digits = digits) dt_shap[, id := .I] dt_shap_long <- data.table::melt(dt_shap, id.vars = "id", value.name = "phi") dt_shap_long[, sign := factor(sign(phi), levels = c(1, -1), labels = c("Increases", "Decreases"))] @@ -241,7 +241,7 @@ plot.shapr <- function(x, desc_mat[, i] <- paste0(shap_names[i], " = ", desc_mat[, i]) } } else { - desc_mat <- trimws(format(x$shapley_values[, -c("explain_id", "none")], digits = digits)) + desc_mat <- trimws(format(x$shapley_values_est[, -c("explain_id", "none")], digits = digits)) for (i in seq_len(ncol(desc_mat))) { desc_mat[, i] <- paste0(shap_names[i]) } @@ -1165,7 +1165,7 @@ MSEv_check_explanation_list <- function(explanation_list) { if (any(names(explanation_list) == "")) stop("All the entries in `explanation_list` must be named.") # Check that all explanation objects use the same column names for the Shapley values - if (length(unique(lapply(explanation_list, function(explanation) colnames(explanation$shapley_values)))) != 1) { + if (length(unique(lapply(explanation_list, function(explanation) colnames(explanation$shapley_values_est)))) != 1) { stop("The Shapley value feature names are not identical in all objects in the `explanation_list`.") } @@ -1586,7 +1586,7 @@ plot_SV_several_approaches <- function(explanation_list, if (any(names(explanation_list) == "")) stop("All the entries in `explanation_list` must be named.") # Check that the column names for the Shapley values are the same for all explanations in the `explanation_list` - if (length(unique(lapply(explanation_list, function(explanation) colnames(explanation$shapley_values)))) != 1) { + if (length(unique(lapply(explanation_list, function(explanation) colnames(explanation$shapley_values_est)))) != 1) { stop("The Shapley value feature names are not identical in all objects in the `explanation_list`.") } @@ -1708,7 +1708,7 @@ update_only_these_features <- function(explanation_list, # Update the `only_these_features` parameter vector based on `plot_phi0` or in case it is NULL # Get the common feature names for all explanation objects (including `none`) and one without `none` - feature_names_with_none <- colnames(explanation_list[[1]]$shapley_values)[-1] + feature_names_with_none <- colnames(explanation_list[[1]]$shapley_values_est)[-1] feature_names_without_none <- feature_names_with_none[feature_names_with_none != "none"] # Only keep the desired features/columns @@ -1759,7 +1759,7 @@ extract_Shapley_values_dt <- function(explanation_list, lapply( explanation_list, function(explanation) { - data.table::copy(explanation$shapley_values)[, c(".id", ".pred") := list(.I, explanation$pred_explain)] + data.table::copy(explanation$shapley_values_est)[, c(".id", ".pred") := list(.I, explanation$pred_explain)] } ), use.names = TRUE, diff --git a/R/prepare_next_iteration.R b/R/prepare_next_iteration.R index 7800db73f..13bd231bc 100644 --- a/R/prepare_next_iteration.R +++ b/R/prepare_next_iteration.R @@ -1,4 +1,4 @@ -#' Prepares the next iteration of the adaptive sampling algorithm +#' Prepares the next iteration of the iterative sampling algorithm #' #' @inheritParams default_doc_explain #' @@ -14,18 +14,18 @@ prepare_next_iteration <- function(internal) { next_iter_list <- list() n_shapley_values <- internal$parameters$n_shapley_values - reduction_factor_vec <- internal$parameters$adaptive_arguments$reduction_factor_vec - fixed_n_coalitions_per_iter <- internal$parameters$adaptive_arguments$fixed_n_coalitions_per_iter - max_n_coalitions <- internal$parameters$adaptive_arguments$max_n_coalitions + n_coal_next_iter_factor_vec <- internal$parameters$iterative_args$n_coal_next_iter_factor_vec + fixed_n_coalitions_per_iter <- internal$parameters$iterative_args$fixed_n_coalitions_per_iter + max_n_coalitions <- internal$parameters$iterative_args$max_n_coalitions est_remaining_coalitions <- internal$iter_list[[iter]]$est_remaining_coalitions - reduction_factor <- internal$iter_list[[iter]]$reduction_factor + n_coal_next_iter_factor <- internal$iter_list[[iter]]$n_coal_next_iter_factor current_n_coalitions <- internal$iter_list[[iter]]$n_coalitions current_coal_samples <- internal$iter_list[[iter]]$coal_samples if (is.null(fixed_n_coalitions_per_iter)) { - proposal_next_n_coalitions <- current_n_coalitions + ceiling(est_remaining_coalitions * reduction_factor) + proposal_next_n_coalitions <- current_n_coalitions + ceiling(est_remaining_coalitions * n_coal_next_iter_factor) } else { proposal_next_n_coalitions <- current_n_coalitions + fixed_n_coalitions_per_iter } @@ -53,14 +53,14 @@ prepare_next_iteration <- function(internal) { next_iter_list$compute_sd <- TRUE } - if (!is.null(reduction_factor_vec[1])) { - next_iter_list$reduction_factor <- ifelse( - length(reduction_factor_vec) >= iter, - reduction_factor_vec[iter], - reduction_factor_vec[length(reduction_factor_vec)] + if (!is.null(n_coal_next_iter_factor_vec[1])) { + next_iter_list$n_coal_next_iter_factor <- ifelse( + length(n_coal_next_iter_factor_vec) >= iter, + n_coal_next_iter_factor_vec[iter], + n_coal_next_iter_factor_vec[length(n_coal_next_iter_factor_vec)] ) } else { - next_iter_list$reduction_factor <- NULL + next_iter_list$n_coal_next_iter_factor <- NULL } next_iter_list$new_n_coalitions <- next_iter_list$n_coalitions - current_n_coalitions diff --git a/R/print.R b/R/print.R index 5937e1b42..573cc36e6 100644 --- a/R/print.R +++ b/R/print.R @@ -1,6 +1,6 @@ #' @export print.shapr <- function(x, digits = 4, ...) { - shap <- copy(x$shapley_values) + shap <- copy(x$shapley_values_est) shap_names <- x$internal$parameters$shap_names cols <- c("none", shap_names) shap[, (cols) := lapply(.SD, round, digits = digits + 2), .SDcols = cols] diff --git a/R/print_iter.R b/R/print_iter.R index 6c1a3491b..174eea7ab 100644 --- a/R/print_iter.R +++ b/R/print_iter.R @@ -14,14 +14,14 @@ print_iter <- function(internal) { converged_max_iter <- internal$iter_list[[iter]]$converged_max_iter converged_max_n_coalitions <- internal$iter_list[[iter]]$converged_max_n_coalitions overall_conv_measure <- internal$iter_list[[iter]]$overall_conv_measure - reduction_factor <- internal$iter_list[[iter]]$reduction_factor + n_coal_next_iter_factor <- internal$iter_list[[iter]]$n_coal_next_iter_factor - saving_path <- internal$parameters$adaptive_arguments$saving_path - convergence_tolerance <- internal$parameters$adaptive_arguments$convergence_tolerance + saving_path <- internal$parameters$output_args$saving_path + convergence_tol <- internal$parameters$iterative_args$convergence_tol testing <- internal$parameters$testing if ("convergence" %in% verbose) { - convergence_tolerance <- internal$parameters$adaptive_arguments$convergence_tolerance + convergence_tol <- internal$parameters$iterative_args$convergence_tol current_n_coalitions <- internal$iter_list[[iter]]$n_coalitions est_remaining_coalitions <- internal$iter_list[[iter]]$est_remaining_coalitions @@ -35,15 +35,15 @@ print_iter <- function(internal) { if (isFALSE(converged)) { msg <- "Not converged after {current_n_coalitions} coalitions:\n" - if (!is.null(convergence_tolerance)) { + if (!is.null(convergence_tol)) { conv_nice <- signif(overall_conv_measure, 2) - tol_nice <- format(signif(convergence_tolerance, 2), scientific = FALSE) - reduction_factor_nice <- format(signif(reduction_factor * 100, 2), scientific = FALSE) + tol_nice <- format(signif(convergence_tol, 2), scientific = FALSE) + n_coal_next_iter_factor_nice <- format(signif(n_coal_next_iter_factor * 100, 2), scientific = FALSE) msg <- paste0( msg, "Current convergence measure: {conv_nice} [needs {tol_nice}]\n", "Estimated remaining coalitions: {est_remaining_coalitions}\n", - "(Concervatively) adding {reduction_factor_nice}% of that ({next_new_n_coalitions} coalitions) ", + "(Concervatively) adding {n_coal_next_iter_factor_nice}% of that ({next_new_n_coalitions} coalitions) ", "in the next iteration." ) } diff --git a/R/save_results.R b/R/save_results.R index ca48c0098..cef0e97b9 100644 --- a/R/save_results.R +++ b/R/save_results.R @@ -5,7 +5,7 @@ #' @export #' @keywords internal save_results <- function(internal) { - saving_path <- internal$parameters$adaptive_arguments$saving_path + saving_path <- internal$parameters$output_args$saving_path # Modify name for the new file filename <- basename(saving_path) diff --git a/R/setup.R b/R/setup.R index 19d32d563..be00abb95 100644 --- a/R/setup.R +++ b/R/setup.R @@ -33,9 +33,7 @@ setup <- function(x_train, group, n_MC_samples, seed, - keep_samp_for_vS, feature_specs, - MSEv_uniform_comb_weights = TRUE, type = "normal", horizon = NULL, y = NULL, @@ -46,9 +44,9 @@ setup <- function(x_train, explain_xreg_lags = NULL, group_lags = NULL, verbose, - adaptive = NULL, - adaptive_arguments = list(), - shapley_reweighting = "none", + iterative = NULL, + iterative_args = list(), + kernelSHAP_reweighting = "none", is_python = FALSE, testing = FALSE, init_time = NULL, @@ -56,6 +54,8 @@ setup <- function(x_train, asymmetric = FALSE, causal_ordering = NULL, confounding = NULL, + output_args = list(), + extra_computation_args = list(), ...) { internal <- list() @@ -68,7 +68,7 @@ setup <- function(x_train, prev_iter_list <- prev_internal$iter_list # Overwrite the input arguments set in explain() with those from in prev_shapr_object - # except model, x_explain, x_train, max_n_coalitions, adaptive_arguments, seed + # except model, x_explain, x_train, max_n_coalitions, iterative_args, seed list2env(prev_internal$parameters) } @@ -82,7 +82,6 @@ setup <- function(x_train, group = group, n_MC_samples = n_MC_samples, seed = seed, - keep_samp_for_vS = keep_samp_for_vS, type = type, horizon = horizon, train_idx = train_idx, @@ -90,16 +89,17 @@ setup <- function(x_train, explain_y_lags = explain_y_lags, explain_xreg_lags = explain_xreg_lags, group_lags = group_lags, - MSEv_uniform_comb_weights = MSEv_uniform_comb_weights, verbose = verbose, - adaptive = adaptive, - adaptive_arguments = adaptive_arguments, - shapley_reweighting = shapley_reweighting, + iterative = iterative, + iterative_args = iterative_args, + kernelSHAP_reweighting = kernelSHAP_reweighting, is_python = is_python, testing = testing, asymmetric = asymmetric, causal_ordering = causal_ordering, confounding = confounding, + output_args = output_args, + extra_computation_args = extra_computation_args, ... ) @@ -126,7 +126,7 @@ setup <- function(x_train, internal <- check_and_set_parameters(internal, type) - internal <- set_adaptive_parameters(internal, prev_iter_list) + internal <- set_iterative_parameters(internal, prev_iter_list) internal$timing_list <- list( init_time = init_time, @@ -137,7 +137,7 @@ setup <- function(x_train, } get_prev_internal <- function(prev_shapr_object, - exclude_parameters = c("max_n_coalitions", "adaptive_arguments", "seed")) { + exclude_parameters = c("max_n_coalitions", "iterative_args", "seed")) { cl <- class(prev_shapr_object)[1] if (cl == "character") { @@ -169,7 +169,6 @@ get_parameters <- function(approach, group, n_MC_samples, seed, - keep_samp_for_vS, type, horizon, train_idx, @@ -177,16 +176,17 @@ get_parameters <- function(approach, explain_y_lags, explain_xreg_lags, group_lags = NULL, - MSEv_uniform_comb_weights, verbose = "basic", - adaptive = FALSE, - adaptive_arguments = list(), - shapley_reweighting = "none", - testing, + iterative = FALSE, + iterative_args = list(), + kernelSHAP_reweighting = "none", asymmetric, causal_ordering, confounding, is_python, + output_args = list(), + extra_computation_args = list(), + testing = FALSE, ...) { # Check input type for approach @@ -195,12 +195,19 @@ get_parameters <- function(approach, stop("`paired_shap_sampling` must be a single logical.") } - if (!is.logical(adaptive) && length(adaptive) == 1) { - stop("`adaptive` must be a single logical.") + if (!is.logical(iterative) && length(iterative) == 1) { + stop("`iterative` must be a single logical.") } - if (!is.list(adaptive_arguments)) { - stop("`adaptive_arguments` must be a list.") + if (!is.list(iterative_args)) { + stop("`iterative_args` must be a list.") } + if (!is.list(output_args)) { + stop("`output_args` must be a list.") + } + if (!is.list(extra_computation_args)) { + stop("`extra_computation_args` must be a list.") + } + # max_n_coalitions @@ -226,11 +233,6 @@ get_parameters <- function(approach, stop("`n_MC_samples` must be a single positive integer.") } - # keep_samp_for_vS - if (!(is.logical(keep_samp_for_vS) && - length(keep_samp_for_vS) == 1)) { - stop("`keep_samp_for_vS` must be single logical.") - } # type if (!(type %in% c("normal", "forecast"))) { @@ -281,10 +283,6 @@ get_parameters <- function(approach, } } - # Parameter used in the MSEv evaluation criterion - if (!(is.logical(MSEv_uniform_comb_weights) && length(MSEv_uniform_comb_weights) == 1)) { - stop("`MSEv_uniform_comb_weights` must be single logical.") - } # Parameter used in asymmetric and causal Shapley values (more in-depth checks later) if (!is.logical(asymmetric) || length(asymmetric) != 1) stop("`asymmetric` must be a single logical.\n") @@ -304,10 +302,10 @@ get_parameters <- function(approach, } # type - if (!(length(shapley_reweighting) == 1 && shapley_reweighting %in% + if (!(length(kernelSHAP_reweighting) == 1 && kernelSHAP_reweighting %in% c("none", "on_N", "on_coal_size", "on_all", "on_N_sum", "on_all_cond", "on_all_cond_paired", "comb"))) { stop( - "`shapley_reweighting` must be one of `none`, `on_N`, `on_coal_size`, `on_N_sum`, ", + "`kernelSHAP_reweighting` must be one of `none`, `on_N`, `on_coal_size`, `on_N_sum`, ", "`on_all`, `on_all_cond`, `on_all_cond_paired` or `comb`.\n" ) } @@ -322,21 +320,21 @@ get_parameters <- function(approach, group = group, n_MC_samples = n_MC_samples, seed = seed, - keep_samp_for_vS = keep_samp_for_vS, is_python = is_python, output_size = output_size, type = type, horizon = horizon, group_lags = group_lags, - MSEv_uniform_comb_weights = MSEv_uniform_comb_weights, verbose = verbose, - shapley_reweighting = shapley_reweighting, - adaptive = adaptive, - adaptive_arguments = adaptive_arguments, - testing = testing, + kernelSHAP_reweighting = kernelSHAP_reweighting, + iterative = iterative, + iterative_args = iterative_args, + output_args = output_args, + extra_computation_args = extra_computation_args, asymmetric = asymmetric, causal_ordering = causal_ordering, - confounding = confounding + confounding = confounding, + testing = testing ) # Getting additional parameters from ... @@ -630,12 +628,15 @@ check_and_set_parameters <- function(internal, type) { check_max_n_coalitions_fc(internal) - # Check and set adaptive - internal <- check_and_set_adaptive(internal) # sets the adaptive parameter if it is NULL (default) + internal <- set_output_parameters(internal) + + internal <- check_and_set_iterative(internal) # sets the iterative parameter if it is NULL (default) # Set if we are to do exact Shapley value computations or not internal <- set_exact(internal) + internal <- set_extra_estimation_params(internal) + # Give warnings to the user about long computation times check_computability(internal) @@ -972,29 +973,192 @@ check_max_n_coalitions_fc <- function(internal) { } } -check_and_set_adaptive <- function(internal) { - adaptive <- internal$parameters$adaptive +#' @author Martin Jullum +#' @keywords internal +set_output_parameters <- function(internal) { + output_args <- internal$parameters$output_args + + # Get defaults + output_args <- utils::modifyList(get_output_args_default(), + output_args, + keep.null = TRUE + ) + + check_output_args(output_args) + + internal$parameters$output_args <- output_args + + return(internal) +} + +#' Gets the default values for the output arguments +#' +#' @param keep_samp_for_vS Logical. +#' Indicates whether the samples used in the Monte Carlo estimation of v_S should be returned (in `internal$output`). +#' Not used for `approach="regression_separate"` or `approach="regression_surrogate"`. +#' @param MSEv_uniform_comb_weights Logical. +#' If `TRUE` (default), then the function weights the coalitions uniformly when computing the MSEv criterion. +#' If `FALSE`, then the function use the Shapley kernel weights to weight the coalitions when computing the MSEv +#' criterion. +#' Note that the Shapley kernel weights are replaced by the sampling frequency when not all coalitions are considered. +#' @param saving_path String. +#' The path to the directory where the results of the iterative estimation procedure should be saved. +#' Defaults to a temporary directory. +#' @export +#' @author Martin Jullum +get_output_args_default <- function(keep_samp_for_vS = FALSE, + MSEv_uniform_comb_weights = TRUE, + saving_path = tempfile("shapr_obj_", fileext = ".rds")) { + return(mget(methods::formalArgs(get_output_args_default))) +} + +check_output_args <- function(output_args) { + list2env(output_args, envir = environment()) # Make accessible in the environment + + # Check the output_args elements + + # keep_samp_for_vS + if (!(is.logical(keep_samp_for_vS) && + length(keep_samp_for_vS) == 1)) { + stop("`output_args$keep_samp_for_vS` must be single logical.") + } + + # Parameter used in the MSEv evaluation criterion + if (!(is.logical(MSEv_uniform_comb_weights) && length(MSEv_uniform_comb_weights) == 1)) { + stop("`output_args$MSEv_uniform_comb_weights` must be single logical.") + } + + # saving_path + if (!(is.character(saving_path) && + length(saving_path) == 1)) { + stop("`output_args$saving_path` must be a single character.") + } + + # Also check that saving_path exists, and abort if not... + if (!dir.exists(dirname(saving_path))) { + stop( + paste0( + "Directory ", dirname(saving_path), " in the output_args$saving_path does not exists.\n", + "Please create the directory with `dir.create('", dirname(saving_path), "')` or use another directory." + ) + ) + } +} + + +#' @author Martin Jullum +#' @keywords internal +set_extra_estimation_params <- function(internal) { + extra_computation_args <- internal$parameters$extra_computation_args + + # Get defaults + extra_computation_args <- utils::modifyList(get_extra_est_args_default(internal), + extra_computation_args, + keep.null = TRUE + ) + + # Check the output_args elements + check_extra_computation_args(extra_computation_args) + + extra_computation_args <- trans_null_extra_est_args(extra_computation_args) + + internal$parameters$extra_computation_args <- extra_computation_args + + return(internal) +} + +#' Gets the default values for the extra estimation arguments +#' +#' @param compute_sd Logical. Whether to estimate the standard deviations of the Shapley value estimates. This is TRUE +#' whenever sampling based kernelSHAP is applied (either iteratively or with a fixed number of coalitions). +#' @param n_boot_samps Integer. The number of bootstrapped samples (i.e. samples with replacement) from the set of all +#' coalitions used to estimate the standard deviations of the Shapley value estimates. +#' @param max_batch_size Integer. The maximum number of coalitions to estimate simultaneously within each iteration. +#' A larger numbers requires more memory, but may have a slight computational advantage. +#' @param min_n_batches Integer. The minimum number of batches to split the computation into within each iteration. +#' Larger numbers gives more frequent progress updates. If parallelization is applied, this should be set no smaller +#' than the number of parallel workers. +#' @inheritParams default_doc_explain +#' @export +#' @author Martin Jullum +get_extra_est_args_default <- function(internal, # Only used to get the default value of compute_sd + compute_sd = isFALSE(internal$parameters$exact), + n_boot_samps = 100, + max_batch_size = 10, + min_n_batches = 10) { + return(mget(methods::formalArgs(get_extra_est_args_default)[-1])) # [-1] to exclude internal +} + +check_extra_computation_args <- function(extra_computation_args) { + list2env(extra_computation_args, envir = environment()) # Make accessible in the environment + + # compute_sd + if (!(is.logical(compute_sd) && + length(compute_sd) == 1)) { + stop("`extra_computation_args$compute_sd` must be single logical.") + } + + # n_boot_samps + if (!(is.wholenumber(n_boot_samps) && + length(n_boot_samps) == 1 && + !is.na(n_boot_samps) && + n_boot_samps > 0)) { + stop("`extra_computation_args$n_boot_samps` must be a single positive integer.") + } + + # max_batch_size + if (!is.null(max_batch_size) && + !((is.wholenumber(max_batch_size) || is.infinite(max_batch_size)) && + length(max_batch_size) == 1 && + !is.na(max_batch_size) && + max_batch_size > 0)) { + stop("`extra_computation_args$max_batch_size` must be NULL, Inf or a single positive integer.") + } + + # min_n_batches + if (!is.null(min_n_batches) && + !(is.wholenumber(min_n_batches) && + length(min_n_batches) == 1 && + !is.na(min_n_batches) && + min_n_batches > 0)) { + stop("`extra_computation_args$min_n_batches` must be NULL or a single positive integer.") + } +} + +trans_null_extra_est_args <- function(extra_computation_args) { + list2env(extra_computation_args, envir = environment()) + + # Translating NULL to always return n_batches = 1 (if just one approach) + extra_computation_args$min_n_batches <- ifelse(is.null(min_n_batches), 1, min_n_batches) + extra_computation_args$max_batch_size <- ifelse(is.null(max_batch_size), Inf, max_batch_size) + + return(extra_computation_args) +} + + +check_and_set_iterative <- function(internal) { + iterative <- internal$parameters$iterative approach <- internal$parameters$approach - # Always adaptive = FALSE for vaeac and regression_surrogate + # Always iterative = FALSE for vaeac and regression_surrogate if (any(approach %in% c("vaeac", "regression_surrogate"))) { unsupported <- approach[approach %in% c("vaeac", "regression_surrogate")] - if (isTRUE(adaptive)) { + if (isTRUE(iterative)) { warning( paste0( - "Adaptive estimation of Shapley values are not supported for approach = ", - paste0(unsupported, collapse = ", "), ". Setting adaptive = FALSE." + "Iterative estimation of Shapley values are not supported for approach = ", + paste0(unsupported, collapse = ", "), ". Setting iterative = FALSE." ) ) } - internal$parameters$adaptive <- FALSE + internal$parameters$iterative <- FALSE } else { - # Sets the default value of adaptive to TRUE if computing more than 5 Shapley values for all other approaches - if (is.null(adaptive)) { + # Sets the default value of iterative to TRUE if computing more than 5 Shapley values for all other approaches + if (is.null(iterative)) { n_shapley_values <- internal$parameters$n_shapley_values # n_features if feature-wise and n_groups if group-wise - internal$parameters$adaptive <- isTRUE(n_shapley_values > 5) + internal$parameters$iterative <- isTRUE(n_shapley_values > 5) } } @@ -1007,11 +1171,11 @@ set_exact <- function(internal) { n_features <- internal$parameters$n_features n_groups <- internal$parameters$n_groups is_groupwise <- internal$parameters$is_groupwise - adaptive <- internal$parameters$adaptive + iterative <- internal$parameters$iterative asymmetric <- internal$parameters$asymmetric max_n_coalitions_causal <- internal$parameters$max_n_coalitions_causal - if (isFALSE(adaptive) && + if (isFALSE(iterative) && ( (isTRUE(asymmetric) && max_n_coalitions == max_n_coalitions_causal) || (isFALSE(is_groupwise) && max_n_coalitions == 2^n_features) || @@ -1047,7 +1211,7 @@ check_computability <- function(internal) { paste0( "Due to computation time, we recommend not computing asymmetric Shapley values exactly \n", "with all valid causal coalitions (", max_n_coalitions_causal, ") when larger than 5000.\n", - "Consider reducing max_n_coalitions and enabling adaptive estimation with adaptive = TRUE.\n" + "Consider reducing max_n_coalitions and enabling iterative estimation with iterative = TRUE.\n" ) ) } @@ -1061,7 +1225,7 @@ check_computability <- function(internal) { paste0( "Due to computation time, we recommend not computing Shapley values exactly \n", "with all 2^n_features (", 2^n_features, ") coalitions for n_features > 13.\n", - "Consider reducing max_n_coalitions and enabling adaptive estimation with adaptive = TRUE.\n" + "Consider reducing max_n_coalitions and enabling iterative estimation with iterative = TRUE.\n" ) ) } @@ -1070,7 +1234,7 @@ check_computability <- function(internal) { paste0( "Due to computation time, we recommend not computing Shapley values exactly \n", "with all 2^n_groups (", 2^n_groups, ") coalitions for n_groups > 13.\n", - "Consider reducing max_n_coalitions and enabling adaptive estimation with adaptive = TRUE.\n" + "Consider reducing max_n_coalitions and enabling iterative estimation with iterative = TRUE.\n" ) ) } @@ -1078,30 +1242,26 @@ check_computability <- function(internal) { paste0( "Due to computation time, we recommend not computing causal Shapley values exactly \n", "with all valid causal coalitions when there are more than 1000 due to the long causal sampling time. \n", - "Consider reducing max_n_coalitions and enabling adaptive estimation with adaptive = TRUE.\n" + "Consider reducing max_n_coalitions and enabling iterative estimation with iterative = TRUE.\n" ) } } else { if (isFALSE(is_groupwise) && n_features > 30) { warning( - paste0( - "Due to computation time, we strongly recommend enabling adaptive estimation with adaptive = TRUE", - " when n_features > 30.\n" - ) + "Due to computation time, we strongly recommend enabling iterative estimation with iterative = TRUE", + " when n_features > 30.\n", ) } if (isTRUE(is_groupwise) && n_groups > 30) { warning( - paste0( - "Due to computation time, we strongly recommend enabling adaptive estimation with adaptive = TRUE", - " when n_groups > 30.\n" - ) + "Due to computation time, we strongly recommend enabling iterative estimation with iterative = TRUE", + " when n_groups > 30.\n", ) } if (isTRUE(causal_sampling) && !is.null(max_n_coalitions_causal) && max_n_coalitions_causal > 1000) { warning( paste0( - "Due to computation time, we strongly recommend enabling adaptive estimation with adaptive = TRUE ", + "Due to computation time, we strongly recommend enabling iterative estimation with iterative = TRUE ", "when the number of valid causal coalitions are more than 1000 due to the long causal sampling time. \n" ) ) @@ -1165,7 +1325,7 @@ check_regression <- function(internal) { } # Check that we are not to keep the Monte Carlo samples - if (internal$parameters$keep_samp_for_vS) { + if (internal$parameters$output_args$keep_samp_for_vS) { stop(paste( "`keep_samp_for_vS` must be `FALSE` for the `regression_separate` and `regression_surrogate`", "approaches as there are no Monte Carlo samples to keep for these approaches." @@ -1278,45 +1438,44 @@ check_groups <- function(feature_names, group) { - #' @keywords internal -set_adaptive_parameters <- function(internal, prev_iter_list = NULL) { - adaptive <- internal$parameters$adaptive +set_iterative_parameters <- function(internal, prev_iter_list = NULL) { + iterative <- internal$parameters$iterative - adaptive_arguments <- internal$parameters$adaptive_arguments + iterative_args <- internal$parameters$iterative_args - adaptive_arguments <- utils::modifyList(get_adaptive_arguments_default(internal), - adaptive_arguments, + iterative_args <- utils::modifyList(get_iterative_args_default(internal), + iterative_args, keep.null = TRUE ) - # Force setting the number of coalitions and iterations for non-adaptive method - if (isFALSE(adaptive)) { - adaptive_arguments$max_iter <- 1 - adaptive_arguments$initial_n_coalitions <- adaptive_arguments$max_n_coalitions + # Force setting the number of coalitions and iterations for non-iterative method + if (isFALSE(iterative)) { + iterative_args$max_iter <- 1 + iterative_args$initial_n_coalitions <- iterative_args$max_n_coalitions } - check_adaptive_arguments(adaptive_arguments) + check_iterative_args(iterative_args) # Translate any null input - adaptive_arguments <- trans_null_adaptive_arguments(adaptive_arguments) + iterative_args <- trans_null_iterative_args(iterative_args) - internal$parameters$adaptive_arguments <- adaptive_arguments + internal$parameters$iterative_args <- iterative_args if (!is.null(prev_iter_list)) { # Update internal with the iter_list from prev_shapr_object internal$iter_list <- prev_iter_list - # Conveniently allow running non-adaptive estimation one step further - if (isFALSE(internal$parameters$adaptive)) { - internal$parameters$adaptive_arguments$max_iter <- length(internal$iter_list) + 1 - internal$parameters$adaptive_arguments$reduction_factor_vec <- NULL + # Conveniently allow running non-iterative estimation one step further + if (isFALSE(internal$parameters$iterative)) { + internal$parameters$iterative_args$max_iter <- length(internal$iter_list) + 1 + internal$parameters$iterative_args$n_coal_next_iter_factor_vec <- NULL } - # Update convergence data with NEW adaptive arguments + # Update convergence data with NEW iterative arguments internal <- check_convergence(internal) - # Check for convergence based on last iter_list with new adaptive arguments + # Check for convergence based on last iter_list with new iterative arguments check_vs_prev_shapr_object(internal) # Prepare next iteration @@ -1324,20 +1483,20 @@ set_adaptive_parameters <- function(internal, prev_iter_list = NULL) { } else { internal$iter_list <- list() internal$iter_list[[1]] <- list( - n_coalitions = adaptive_arguments$initial_n_coalitions, - new_n_coalitions = adaptive_arguments$initial_n_coalitions, + n_coalitions = iterative_args$initial_n_coalitions, + new_n_coalitions = iterative_args$initial_n_coalitions, exact = internal$parameters$exact, - compute_sd = adaptive_arguments$compute_sd, - reduction_factor = adaptive_arguments$reduction_factor_vec[1], - n_batches = set_n_batches(adaptive_arguments$initial_n_coalitions, internal) + compute_sd = internal$parameters$extra_computation_args$compute_sd, + n_coal_next_iter_factor = iterative_args$n_coal_next_iter_factor_vec[1], + n_batches = set_n_batches(iterative_args$initial_n_coalitions, internal) ) } return(internal) } -check_adaptive_arguments <- function(adaptive_arguments) { - list2env(adaptive_arguments, envir = environment()) +check_iterative_args <- function(iterative_args) { + list2env(iterative_args, envir = environment()) # initial_n_coalitions @@ -1346,7 +1505,7 @@ check_adaptive_arguments <- function(adaptive_arguments) { !is.na(initial_n_coalitions) && initial_n_coalitions <= max_n_coalitions && initial_n_coalitions > 2)) { - stop("`adaptive_arguments$initial_n_coalitions` must be a single integer between 2 and `max_n_coalitions`.") + stop("`iterative_args$initial_n_coalitions` must be a single integer between 2 and `max_n_coalitions`.") } # fixed_n_coalitions @@ -1357,7 +1516,7 @@ check_adaptive_arguments <- function(adaptive_arguments) { fixed_n_coalitions_per_iter <= max_n_coalitions && fixed_n_coalitions_per_iter > 0)) { stop( - "`adaptive_arguments$fixed_n_coalitions_per_iter` must be NULL or a single positive integer no larger than", + "`iterative_args$fixed_n_coalitions_per_iter` must be NULL or a single positive integer no larger than", "`max_n_coalitions`." ) } @@ -1368,90 +1527,39 @@ check_adaptive_arguments <- function(adaptive_arguments) { length(max_iter) == 1 && !is.na(max_iter) && max_iter > 0)) { - stop("`adaptive_arguments$max_iter` must be NULL, Inf or a single positive integer.") - } - - # convergence_tolerance - if (!is.null(convergence_tolerance) && - !(length(convergence_tolerance) == 1 && - !is.na(convergence_tolerance) && - convergence_tolerance >= 0)) { - stop("`adaptive_arguments$convergence_tolerance` must be NULL, 0, or a positive numeric.") - } - - # reduction_factor_vec - if (!is.null(reduction_factor_vec) && - !(all(!is.na(reduction_factor_vec)) && - all(reduction_factor_vec <= 1) && - all(reduction_factor_vec >= 0))) { - stop("`adaptive_arguments$reduction_factor_vec` must be NULL or a vector or numerics between 0 and 1.") - } - - # n_boot_samps - if (!(is.wholenumber(n_boot_samps) && - length(n_boot_samps) == 1 && - !is.na(n_boot_samps) && - n_boot_samps > 0)) { - stop("`adaptive_arguments$n_boot_samps` must be a single positive integer.") - } - - # compute_sd - if (!(is.logical(compute_sd) && - length(compute_sd) == 1)) { - stop("`adaptive_arguments$compute_sd` must be a single logical.") - } - - - # min_n_batches - if (!is.null(min_n_batches) && - !(is.wholenumber(min_n_batches) && - length(min_n_batches) == 1 && - !is.na(min_n_batches) && - min_n_batches > 0)) { - stop("`adaptive_arguments$min_n_batches` must be NULL or a single positive integer.") - } - - # max_batch_size - if (!is.null(max_batch_size) && - !((is.wholenumber(max_batch_size) || is.infinite(max_batch_size)) && - length(max_batch_size) == 1 && - !is.na(max_batch_size) && - max_batch_size > 0)) { - stop("`adaptive_arguments$max_batch_size` must be NULL, Inf or a single positive integer.") + stop("`iterative_args$max_iter` must be NULL, Inf or a single positive integer.") } - # saving_path - if (!(is.character(saving_path) && - length(saving_path) == 1)) { - stop("`adaptive_arguments$saving_path` must be a single character.") + # convergence_tol + if (!is.null(convergence_tol) && + !(length(convergence_tol) == 1 && + !is.na(convergence_tol) && + convergence_tol >= 0)) { + stop("`iterative_args$convergence_tol` must be NULL, 0, or a positive numeric.") } - # Check that the saving_path exists, and abort if not... - if (!dir.exists(dirname(saving_path))) { - stop( - paste0( - "Directory ", dirname(saving_path), " in the adaptive_arguments$saving_path does not exists.\n", - "Please create the directory with `dir.create('", dirname(saving_path), "')` or use another directory." - ) - ) + # n_coal_next_iter_factor_vec + if (!is.null(n_coal_next_iter_factor_vec) && + !(all(!is.na(n_coal_next_iter_factor_vec)) && + all(n_coal_next_iter_factor_vec <= 1) && + all(n_coal_next_iter_factor_vec >= 0))) { + stop("`iterative_args$n_coal_next_iter_factor_vec` must be NULL or a vector or numerics between 0 and 1.") } } -trans_null_adaptive_arguments <- function(adaptive_arguments) { - list2env(adaptive_arguments, envir = environment()) +trans_null_iterative_args <- function(iterative_args) { + list2env(iterative_args, envir = environment()) # Translating NULL to always return n_batches = 1 (if just one approach) - adaptive_arguments$min_n_batches <- ifelse(is.null(min_n_batches), 1, min_n_batches) - adaptive_arguments$max_batch_size <- ifelse(is.null(max_batch_size), Inf, max_batch_size) - adaptive_arguments$max_iter <- ifelse(is.null(max_iter), Inf, max_iter) + iterative_args$max_iter <- ifelse(is.null(max_iter), Inf, max_iter) - return(adaptive_arguments) + return(iterative_args) } set_n_batches <- function(n_coalitions, internal) { - min_n_batches <- internal$parameters$adaptive_arguments$min_n_batches - max_batch_size <- internal$parameters$adaptive_arguments$max_batch_size + min_n_batches <- internal$parameters$extra_computation_args$min_n_batches + max_batch_size <- internal$parameters$extra_computation_args$max_batch_size n_unique_approaches <- internal$parameters$n_unique_approaches @@ -1484,13 +1592,13 @@ check_vs_prev_shapr_object <- function(internal) { if (isTRUE(converged_sd)) { message0 <- c( message0, - "Convergence tolerance reached. Consider decreasing `adaptive_arguments$tolerance`.\n" + "Convergence tolerance reached. Consider decreasing `iterative_args$tolerance`.\n" ) } if (isTRUE(converged_max_iter)) { message0 <- c( message0, - "Maximum number of iterations reached. Consider increasing `adaptive_arguments$max_iter`.\n" + "Maximum number of iterations reached. Consider increasing `iterative_args$max_iter`.\n" ) } if (isTRUE(converged_max_n_coalitions)) { @@ -1504,81 +1612,57 @@ check_vs_prev_shapr_object <- function(internal) { } # Get functions ======================================================================================================== -#' Function to specify arguments of the adaptive estimation procedure +#' Function to specify arguments of the iterative estimation procedure #' -#' @details The functions sets default values for the adaptive estimation procedure, according to the function defaults. -#' If the argument `adaptive` of [shapr::explain()] is FALSE, it sets parameters corresponding to the use of a -#' non-adaptive estimation procedure +#' @details The functions sets default values for the iterative estimation procedure, according to the function +#' defaults. +#' If the argument `iterative` of [shapr::explain()] is FALSE, it sets parameters corresponding to the use of a +#' non-iterative estimation procedure #' #' @param max_iter Integer. Maximum number of estimation iterations #' @param initial_n_coalitions Integer. Number of coalitions to use in the first estimation iteration. #' @param fixed_n_coalitions_per_iter Integer. Number of `n_coalitions` to use in each iteration. #' `NULL` (default) means setting it based on estimates based on a set convergence threshold. -#' @param convergence_tolerance Numeric. The t variable in the convergence threshold formula on page 6 in the paper +#' @param convergence_tol Numeric. The t variable in the convergence threshold formula on page 6 in the paper #' Covert and Lee (2021), 'Improving KernelSHAP: Practical Shapley Value Estimation via Linear Regression' #' https://arxiv.org/pdf/2012.01536. Smaller values requires more coalitions before convergence is reached. -#' @param reduction_factor_vec Numeric vector. The number of `n_coalitions` that must be used to reach convergence -#' in the next iteration is estimated. +#' @param n_coal_next_iter_factor_vec Numeric vector. The number of `n_coalitions` that must be used to reach +#' convergence in the next iteration is estimated. #' The number of `n_coalitions` actually used in the next iteration is set to this estimate multiplied by -#' `reduction_factor_vec[i]` for iteration `i`. +#' `n_coal_next_iter_factor_vec[i]` for iteration `i`. #' It is wise to start with smaller numbers to avoid using too many `n_coalitions` due to uncertain estimates in #' the first iterations. -#' @param n_boot_samps Integer. The number of bootstrapped samples (i.e. samples with replacement) from the set of all -#' coalitions used to estimate the standard deviations of the Shapley value estimates. -#' @param compute_sd Logical. Whether to estimate the standard deviations of the Shapley value estimates. -#' @param max_batch_size Integer. The maximum number of coalitions to estimate simultaneously within each iteration. -#' A larger numbers requires more memory, but may have a slight computational advantage. -#' @param min_n_batches Integer. The minimum number of batches to split the computation into within each iteration. -#' Larger numbers gives more frequent progress updates. If parallelization is applied, this should be set no smaller -#' than the number of parallel workers. -#' @param saving_path String. -#' The path to the directory where the results of the adaptive estimation procedure should be saved. -#' Defaults to a temporary directory. #' @inheritParams default_doc_explain #' #' @export #' @author Martin Jullum -get_adaptive_arguments_default <- function(internal, - initial_n_coalitions = ceiling( - min( - 200, - internal$parameters$max_n_coalitions_causal, - internal$parameters$max_n_coalitions, - max( - 5, - internal$parameters$n_features, - (2^internal$parameters$n_features) / 10 - ) - ) - ), - fixed_n_coalitions_per_iter = NULL, - max_iter = 20, - convergence_tolerance = 0.02, - reduction_factor_vec = c(seq(0.1, 1, by = 0.1), rep(1, max_iter - 10)), - n_boot_samps = 100, - compute_sd = isTRUE(internal$parameters$adaptive), - max_batch_size = 10, - min_n_batches = 10, - saving_path = tempfile("shapr_obj_", fileext = ".rds")) { - adaptive <- internal$parameters$adaptive +get_iterative_args_default <- function(internal, + initial_n_coalitions = ceiling( + min( + 200, + max( + 5, + internal$parameters$n_features, + (2^internal$parameters$n_features) / 10 + ) + ) + ), + fixed_n_coalitions_per_iter = NULL, + max_iter = 20, + convergence_tol = 0.02, + n_coal_next_iter_factor_vec = c(seq(0.1, 1, by = 0.1), rep(1, max_iter - 10))) { + iterative <- internal$parameters$iterative max_n_coalitions <- internal$parameters$max_n_coalitions - exact <- internal$parameters$exact - is_groupwise <- internal$parameters$is_groupwise - if (isTRUE(adaptive)) { + if (isTRUE(iterative)) { ret_list <- mget( c( "initial_n_coalitions", "fixed_n_coalitions_per_iter", "max_n_coalitions", "max_iter", - "convergence_tolerance", - "reduction_factor_vec", - "n_boot_samps", - "compute_sd", - "max_batch_size", - "min_n_batches", - "saving_path" + "convergence_tol", + "n_coal_next_iter_factor_vec" ) ) } else { @@ -1587,13 +1671,8 @@ get_adaptive_arguments_default <- function(internal, fixed_n_coalitions_per_iter = NULL, max_n_coalitions = max_n_coalitions, max_iter = 1, - convergence_tolerance = NULL, - reduction_factor_vec = NULL, - n_boot_samps = n_boot_samps, - compute_sd = isFALSE(exact) && isFALSE(is_groupwise), - max_batch_size = max_batch_size, - min_n_batches = min_n_batches, - saving_path = saving_path + convergence_tol = NULL, + n_coal_next_iter_factor_vec = NULL ) } return(ret_list) diff --git a/R/shapley_setup.R b/R/shapley_setup.R index ad8d42f30..bfd6e7c8f 100644 --- a/R/shapley_setup.R +++ b/R/shapley_setup.R @@ -11,7 +11,7 @@ shapley_setup <- function(internal) { approach <- internal$parameters$approach is_groupwise <- internal$parameters$is_groupwise paired_shap_sampling <- internal$parameters$paired_shap_sampling - shapley_reweighting <- internal$parameters$shapley_reweighting + kernelSHAP_reweighting <- internal$parameters$kernelSHAP_reweighting coal_feature_list <- internal$objects$coal_feature_list causal_sampling <- internal$parameters$causal_sampling causal_ordering <- internal$parameters$causal_ordering @@ -42,7 +42,7 @@ shapley_setup <- function(internal) { prev_coal_samples = prev_coal_samples, coal_feature_list = coal_feature_list, approach0 = approach, - shapley_reweighting = shapley_reweighting, + kernelSHAP_reweighting = kernelSHAP_reweighting, dt_valid_causal_coalitions = dt_valid_causal_coalitions ) @@ -182,7 +182,7 @@ create_coalition_table <- function(m, prev_coal_samples = NULL, coal_feature_list = as.list(seq_len(m)), approach0 = "gaussian", - shapley_reweighting = "none", + kernelSHAP_reweighting = "none", dt_valid_causal_coalitions = NULL) { if (exact) { dt <- exact_coalition_table( @@ -197,7 +197,7 @@ create_coalition_table <- function(m, weight_zero_m = weight_zero_m, paired_shap_sampling = paired_shap_sampling, prev_coal_samples = prev_coal_samples, - shapley_reweighting = shapley_reweighting, + kernelSHAP_reweighting = kernelSHAP_reweighting, dt_valid_causal_coalitions = dt_valid_causal_coalitions ) stopifnot( @@ -221,7 +221,7 @@ create_coalition_table <- function(m, } #' @keywords internal -shapley_reweighting <- function(X, reweight = "on_N") { +kernelSHAP_reweighting <- function(X, reweight = "on_N") { # Updates the shapley weights in X based on the reweighting strategy BY REFERENCE @@ -293,7 +293,7 @@ sample_coalition_table <- function(m, weight_zero_m = 10^6, paired_shap_sampling = TRUE, prev_coal_samples = NULL, - shapley_reweighting = "none", + kernelSHAP_reweighting, valid_causal_coalitions = NULL, dt_valid_causal_coalitions = NULL) { # Setup @@ -426,7 +426,7 @@ sample_coalition_table <- function(m, nms <- c("id_coalition", "coalitions", "coalition_size", "N", "shapley_weight", "p", "sample_freq") data.table::setcolorder(X, nms) - shapley_reweighting(X, reweight = shapley_reweighting) # Reweights the shapley weights in X by reference + kernelSHAP_reweighting(X, reweight = kernelSHAP_reweighting) # Reweights the shapley weights in X by reference return(X) } @@ -665,7 +665,7 @@ shapley_setup_forecast <- function(internal) { approach <- internal$parameters$approach is_groupwise <- internal$parameters$is_groupwise paired_shap_sampling <- internal$parameters$paired_shap_sampling - shapley_reweighting <- internal$parameters$shapley_reweighting + kernelSHAP_reweighting <- internal$parameters$kernelSHAP_reweighting coal_feature_list <- internal$objects$coal_feature_list horizon <- internal$parameters$horizon @@ -707,7 +707,7 @@ shapley_setup_forecast <- function(internal) { prev_coal_samples = prev_coal_samples, coal_feature_list = this_coal_feature_list, approach0 = approach, - shapley_reweighting = shapley_reweighting + kernelSHAP_reweighting = kernelSHAP_reweighting ) W_list[[i]] <- weight_matrix( diff --git a/R/zzz.R b/R/zzz.R index 83b755d44..a1458be87 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -126,7 +126,7 @@ "initial_n_coalitions", "max_n_coalitions", "fixed_n_coalitions_per_iter", - "reduction_factor_vec", + "n_coal_next_iter_factor_vec", "n_boot_samps", "compute_sd", "min_n_batches", @@ -136,9 +136,11 @@ "cond", "tmp_coalitions", "max_iter", - "convergence_tolerance", + "convergence_tol", "conv_measure", "verbose", + "MSEv_uniform_comb_weights", + "keep_samp_for_vS", "S_original_names_with_id", "Sbar_features", "Sbar_now_names", diff --git a/README.Rmd b/README.Rmd index aff32ad85..d7511c54a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -84,7 +84,7 @@ Current methodological restrictions: Future releases will include: - Computational improvement of the AICc optimization approach, -- Adaptive selection of method to account for the feature dependence. +- iterative selection of method to account for the feature dependence. --> @@ -176,7 +176,7 @@ explanation <- explain( # Printing the Shapley values for the test data. # For more information about the interpretation of the values in the table, see ?shapr::explain. -print(explanation$shapley_values) +print(explanation$shapley_values_est) # Finally we plot the resulting explanations plot(explanation) diff --git a/README.md b/README.md index eb18d9d99..203526d60 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ Current methodological restrictions: Future releases will include: - Computational improvement of the AICc optimization approach, -- Adaptive selection of method to account for the feature dependence. +- iterative selection of method to account for the feature dependence. --> Note the prediction outcome must be numeric. All approaches except @@ -236,7 +236,7 @@ explanation <- explain( # Printing the Shapley values for the test data. # For more information about the interpretation of the values in the table, see ?shapr::explain. -print(explanation$shapley_values) +print(explanation$shapley_values_est) #> none Solar.R Wind Temp Month #> 1: 43.08571 13.2117337 4.785645 -25.57222 -5.599230 #> 2: 43.08571 -9.9727747 5.830694 -11.03873 -7.829954 diff --git a/inst/scripts/Beeswarm_illustration.R b/inst/scripts/Beeswarm_illustration.R index a3fdfaab4..83fedf42f 100644 --- a/inst/scripts/Beeswarm_illustration.R +++ b/inst/scripts/Beeswarm_illustration.R @@ -28,8 +28,8 @@ plot_shapr <- function(x, is_groupwise <- x$internal$parameters$is_groupwise # melting Kshap - shap_names <- colnames(x$shapley_values)[-1] - dt_shap <- round(data.table::copy(x$shapley_values), digits = digits) + shap_names <- colnames(x$shapley_values_est)[-1] + dt_shap <- round(data.table::copy(x$shapley_values_est), digits = digits) dt_shap[, id := .I] dt_shap_long <- data.table::melt(dt_shap, id.vars = "id", value.name = "phi") dt_shap_long[, sign := factor(sign(phi), levels = c(1, -1), labels = c("Increases", "Decreases"))] @@ -41,7 +41,7 @@ plot_shapr <- function(x, desc_mat[, i] <- paste0(shap_names[i], " = ", desc_mat[, i]) } } else { - desc_mat <- trimws(format(x$shapley_values[, -1], digits = digits)) + desc_mat <- trimws(format(x$shapley_values_est[, -1], digits = digits)) for (i in seq_len(ncol(desc_mat))) { desc_mat[, i] <- paste0(shap_names[i]) } diff --git a/inst/scripts/Compare_Conditional_and_Causal_Categorical.R b/inst/scripts/Compare_Conditional_and_Causal_Categorical.R index c633c6631..502b16e47 100644 --- a/inst/scripts/Compare_Conditional_and_Causal_Categorical.R +++ b/inst/scripts/Compare_Conditional_and_Causal_Categorical.R @@ -54,7 +54,7 @@ causal_categorical <- explain( n_MC_samples = 50, # Just for speed verbose = c("basic", "convergence", "shapley", "vS_details"), keep_samp_for_vS = TRUE, - adaptive = FALSE + iterative = FALSE ) # Warning CTREE is the slowest approach by far @@ -70,7 +70,7 @@ causal_ctree <- explain( n_MC_samples = 50, # Just for speed verbose = c("basic", "convergence", "shapley", "vS_details"), keep_samp_for_vS = TRUE, - adaptive = FALSE + iterative = FALSE ) causal_vaeac <- explain( @@ -86,7 +86,7 @@ causal_vaeac <- explain( n_MC_samples = 50, # Just for speed verbose = c("basic", "convergence", "shapley", "vS_details"), keep_samp_for_vS = TRUE, - adaptive = FALSE + iterative = FALSE ) shapr::plot_SV_several_approaches(list( @@ -109,7 +109,7 @@ conditional_independence <- explain( n_MC_samples = 50, # Just for speed verbose = c("basic", "convergence", "shapley", "vS_details"), keep_samp_for_vS = TRUE, - adaptive = FALSE + iterative = FALSE ) conditional_categorical <- explain( @@ -124,7 +124,7 @@ conditional_categorical <- explain( n_MC_samples = 50, # Just for speed verbose = c("basic", "convergence", "shapley", "vS_details"), keep_samp_for_vS = TRUE, - adaptive = FALSE + iterative = FALSE ) # Warning CTREE is the slowest approach by far @@ -140,7 +140,7 @@ conditional_ctree <- explain( n_MC_samples = 50, # Just for speed verbose = c("basic", "convergence", "shapley", "vS_details"), keep_samp_for_vS = TRUE, - adaptive = FALSE + iterative = FALSE ) conditional_vaeac <- explain( @@ -156,7 +156,7 @@ conditional_vaeac <- explain( n_MC_samples = 50, # Just for speed verbose = c("basic", "convergence", "shapley", "vS_details"), keep_samp_for_vS = TRUE, - adaptive = FALSE + iterative = FALSE ) shapr::plot_SV_several_approaches(list( diff --git a/inst/scripts/Heskes_bike_rental_illustration.R b/inst/scripts/Heskes_bike_rental_illustration.R index 2fcc9dbd2..9fb9d2722 100644 --- a/inst/scripts/Heskes_bike_rental_illustration.R +++ b/inst/scripts/Heskes_bike_rental_illustration.R @@ -1,5 +1,5 @@ # This file build on Pull Request https://github.com/NorskRegnesentral/shapr/pull/273 -# This file does not run on the adaptive version. +# This file does not run on the iterative version. # The point of the file was to replicate the plot values that Heskes obtained in their implementation # to validate my implementation. @@ -72,7 +72,7 @@ save_plots <- FALSE sina_plot <- function(explanation, seed = 123) { set.seed(seed) - shapley_values <- explanation$shapley_values[, -"none", drop = FALSE] + shapley_values_est <- explanation$shapley_values_est[, -"none", drop = FALSE] X_values <- explanation$internal$data$x_explain # If we are doing group Shapley, then we compute the mean feature value for each group for each explicand @@ -85,12 +85,12 @@ sina_plot <- function(explanation, seed = 123) { data_long <- X_values %>% tidyr::pivot_longer(everything()) %>% dplyr::bind_cols( - explanation$shapley_values %>% + explanation$shapley_values_est %>% dplyr::select(-none) %>% tidyr::pivot_longer(everything()) %>% dplyr::select(-name) %>% dplyr::rename(shap = value)) %>% - dplyr::mutate(name = factor(name, levels = rev(names(explanation$shapley_values)))) %>% + dplyr::mutate(name = factor(name, levels = rev(names(explanation$shapley_values_est)))) %>% dplyr::group_by(name) %>% dplyr::arrange(name) %>% dplyr::mutate(mean_value = mean(value)) %>% @@ -454,10 +454,10 @@ gridExtra::grid.arrange(save_explanation_asymmetric_causal$plot + ggplot2::ggtit message("3. Producing scatter plots comparing marginal and causal Shapley values on the test set") sv_correlation_df <- data.frame( temp = x_explain[, "temp"], - sv_marg_cosyear = explanation_marginal$shapley_values$cosyear, - sv_caus_cosyear = explanation_causal$shapley_values$cosyear, - sv_marg_temp = explanation_marginal$shapley_values$temp, - sv_caus_temp = explanation_causal$shapley_values$temp + sv_marg_cosyear = explanation_marginal$shapley_values_est$cosyear, + sv_caus_cosyear = explanation_causal$shapley_values_est$cosyear, + sv_marg_temp = explanation_marginal$shapley_values_est$temp, + sv_caus_temp = explanation_causal$shapley_values_est$temp ) @@ -547,7 +547,7 @@ dates_idx = sapply(dates, function(data) which(as.integer(row.names(x_explain)) explanations = list("Marginal" = explanation_marginal, "Causal" = explanation_causal) explanations_extracted = data.table::rbindlist(lapply(seq_along(explanations), function(idx) { - explanations[[idx]]$shapley_values[dates_idx, ..features][, `:=` (Date = dates, type = names(explanations)[idx])] + explanations[[idx]]$shapley_values_est[dates_idx, ..features][, `:=` (Date = dates, type = names(explanations)[idx])] })) dt_all = data.table::melt(explanations_extracted, id.vars = c("Date", "type"), variable.name = "feature") @@ -756,7 +756,7 @@ explanation_asymmetric_all_gaussian2 <- ) }) -explanation_asymmetric_all_gaussian$shapley_values - explanation_asymmetric_all_gaussian2$shapley_values +explanation_asymmetric_all_gaussian$shapley_values_est - explanation_asymmetric_all_gaussian2$shapley_values_est explanation_asymmetric_all_gaussian$MSEv @@ -880,7 +880,7 @@ explanation_group_asymmetric_causal_time = system.time({ }) }) -explanation_group_asymmetric_causal$shapley_values +explanation_group_asymmetric_causal$shapley_values_est sina_plot(explanation_group_asymmetric_causal) # Now we compute the group Shapley values based on only half of the coalitions diff --git a/inst/scripts/check_model_workflow.R b/inst/scripts/check_model_workflow.R index 01799eae1..ef76a36ff 100644 --- a/inst/scripts/check_model_workflow.R +++ b/inst/scripts/check_model_workflow.R @@ -64,7 +64,7 @@ explain_xgboost = explain( ) # See that the shapley values are identical -all.equal(explain_workflow$shapley_values, explain_xgboost$shapley_values) +all.equal(explain_workflow$shapley_values_est, explain_xgboost$shapley_values_est) # Other models in workflow --------------------------------------------------------------------------------------------- set.seed(1) diff --git a/inst/scripts/compare_shap_python_new.R b/inst/scripts/compare_shap_python_new.R index c15fed9d6..b8a0a2b33 100644 --- a/inst/scripts/compare_shap_python_new.R +++ b/inst/scripts/compare_shap_python_new.R @@ -56,8 +56,8 @@ time_R_largesigma0 <- proc.time() (time_R_largesigma <- time_R_largesigma0 - time_R_indep0) # Printing the Shapley values for the test data -Kshap_indep <- explanation_independence$shapley_values -Kshap_largesigma <- explanation_largesigma$shapley_values +Kshap_indep <- explanation_independence$shapley_values_est +Kshap_largesigma <- explanation_largesigma$shapley_values_est Kshap_indep Kshap_largesigma diff --git a/inst/scripts/devel/devel_batch_testing.R b/inst/scripts/devel/devel_batch_testing.R index 1ad762b98..8fcb2c807 100644 --- a/inst/scripts/devel/devel_batch_testing.R +++ b/inst/scripts/devel/devel_batch_testing.R @@ -56,7 +56,7 @@ expl <- explain(model = model, prediction_zero = p0, n_batches = 100, n_samples = 1000, - adaptive = TRUE, + iterative = TRUE, print_iter_info = TRUE, print_shapleyres = TRUE) diff --git a/inst/scripts/devel/devel_convergence_branch.R b/inst/scripts/devel/devel_convergence_branch.R index 71ee609e3..9d022ba17 100644 --- a/inst/scripts/devel/devel_convergence_branch.R +++ b/inst/scripts/devel/devel_convergence_branch.R @@ -37,56 +37,56 @@ p0 <- mean(y_train) # Computing the actual Shapley values with kernelSHAP accounting for feature dependence using # the empirical (conditional) distribution approach with bandwidth parameter sigma = 0.1 (default) -explanation_adaptive <- explain( +explanation_iterative <- explain( model = model, x_explain = x_explain, x_train = x_train, approach = "gaussian", max_n_coalitions = 500, prediction_zero = p0, - adaptive = TRUE, + iterative = TRUE, print_shapleyres = TRUE, # tmp print_iter_info = TRUE, # tmp - shapley_reweighting = "on_N" + kernelSHAP_reweighting = "on_N" ) -explanation_adaptive <- explain( +explanation_iterative <- explain( model = model, x_explain = x_explain, x_train = x_train, approach = "ctree", n_coalitions = 500, prediction_zero = p0, - adaptive = TRUE, + iterative = TRUE, print_shapleyres = TRUE, # tmp print_iter_info = TRUE, # tmp - shapley_reweighting = "on_N" + kernelSHAP_reweighting = "on_N" ) -explanation_nonadaptive <- explain( +explanation_noniterative <- explain( model = model, x_explain = x_explain, x_train = x_train, approach = "gaussian", n_coalitions = 400, prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) -explanation_adaptive <- explain( +explanation_iterative <- explain( model = model, x_explain = x_explain, x_train = x_train, approach = "gaussian", n_coalitions = 500, prediction_zero = p0, - adaptive = TRUE, - adaptive_arguments = list(initial_n_coalitions=10,convergence_tolerance=0.0001), + iterative = TRUE, + iterative_args = list(initial_n_coalitions=10,convergence_tol=0.0001), print_shapleyres = TRUE, # tmp print_iter_info = TRUE, # tmp - shapley_reweighting = "on_N" + kernelSHAP_reweighting = "on_N" ) @@ -98,43 +98,43 @@ explanation_adaptive <- explain( -plot(explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, - explanation_adaptive$internal$output$iter_objects$dt_iter_shapley_sd[explain_id==1,Solar.R],type="l") -sd_full <- explanation_adaptive$internal$output$iter_objects$dt_iter_shapley_sd[explain_id==1][.N,Solar.R] -n_samples_full <- explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res[.N,n_current_samples] +plot(explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, + explanation_iterative$internal$output$iter_objects$dt_iter_shapley_sd[explain_id==1,Solar.R],type="l") +sd_full <- explanation_iterative$internal$output$iter_objects$dt_iter_shapley_sd[explain_id==1][.N,Solar.R] +n_samples_full <- explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res[.N,n_current_samples] sd_full0 <- sd_full*sqrt(n_samples_full) -lines(explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, - sd_full0/sqrt(explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples),type="l",col=2) +lines(explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, + sd_full0/sqrt(explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples),type="l",col=2) -plot(explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, - explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res$estimated_required_samples,type="l",ylim=c(0,4000),lwd=4) +plot(explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, + explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res$estimated_required_samples,type="l",ylim=c(0,4000),lwd=4) for(i in 1:20){ - lines(explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, - explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res[[5+i]],type="l",col=1+i) + lines(explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, + explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res[[5+i]],type="l",col=1+i) } -plot(explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, - explanation_adaptive$internal$output$iter_objects$dt_iter_shapley_sd[explain_id==1,Solar.R],type="l",ylim=c(0,2)) -sd_full <- explanation_adaptive$internal$output$iter_objects$dt_iter_shapley_sd[explain_id==1][.N,Solar.R] -n_samples_full <- explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res[.N,n_current_samples] +plot(explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, + explanation_iterative$internal$output$iter_objects$dt_iter_shapley_sd[explain_id==1,Solar.R],type="l",ylim=c(0,2)) +sd_full <- explanation_iterative$internal$output$iter_objects$dt_iter_shapley_sd[explain_id==1][.N,Solar.R] +n_samples_full <- explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res[.N,n_current_samples] sd_full0 <- sd_full*sqrt(n_samples_full) -lines(explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, - sd_full0/sqrt(explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples),type="l",col=2,lwd=3) +lines(explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, + sd_full0/sqrt(explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples),type="l",col=2,lwd=3) for(i in 1:20){ - lines(explanation_adaptive$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, - explanation_adaptive$internal$output$iter_objects$dt_iter_shapley_sd[explain_id==i,Solar.R],type="l",col=1+i) + lines(explanation_iterative$internal$output$iter_objects$dt_iter_convergence_res$n_current_samples, + explanation_iterative$internal$output$iter_objects$dt_iter_shapley_sd[explain_id==i,Solar.R],type="l",col=1+i) } -lines(explanation_adaptive$internal$output$dt_iter_convergence_res$n_current_samples, - sd_full0/sqrt(explanation_adaptive$internal$output$dt_iter_convergence_res$n_current_samples),type="l",col=2) +lines(explanation_iterative$internal$output$dt_iter_convergence_res$n_current_samples, + sd_full0/sqrt(explanation_iterative$internal$output$dt_iter_convergence_res$n_current_samples),type="l",col=2) -plot(explanation_adaptive$internal$output$dt_iter_convergence_res$estimated_required_samples) +plot(explanation_iterative$internal$output$dt_iter_convergence_res$estimated_required_samples) explanation_regular <- explain( model = model, @@ -143,6 +143,6 @@ explanation_regular <- explain( approach = "gaussian", n_coalitions = NULL, prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) diff --git a/inst/scripts/devel/devel_verbose.R b/inst/scripts/devel/devel_verbose.R index f15bf15ec..d9f0a54fc 100644 --- a/inst/scripts/devel/devel_verbose.R +++ b/inst/scripts/devel/devel_verbose.R @@ -6,13 +6,13 @@ ex <- explain( approach = "independence", prediction_zero = p0, max_n_coalitions = 30, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 6, - convergence_tolerance = 0.0005, - reduction_factor_vec = rep(10^(-6), 10), + convergence_tol = 0.0005, + n_coal_next_iter_factor_vec = rep(10^(-6), 10), max_iter = 8 ), - adaptive = TRUE,verbose=c("basic","progress") + iterative = TRUE,verbose=c("basic","progress") ) ex <- explain( @@ -23,7 +23,7 @@ ex <- explain( approach = "regression_separate", prediction_zero = p0, max_n_coalitions = 30, - adaptive = TRUE,verbose=c("vS_details") + iterative = TRUE,verbose=c("vS_details") ) ex <- explain( model = model_lm_numeric, @@ -32,7 +32,7 @@ ex <- explain( approach = "regression_separate", prediction_zero = p0, max_n_coalitions = 30, - adaptive = TRUE,verbose=c("basic","progress","vS_details"), + iterative = TRUE,verbose=c("basic","progress","vS_details"), regression.model = parsnip::decision_tree(tree_depth = hardhat::tune(), engine = "rpart", mode = "regression"), regression.tune_values = dials::grid_regular(dials::tree_depth(), levels = 4), regression.vfold_cv_para = list(v = 5) @@ -45,7 +45,7 @@ ex <- explain( approach = "regression_surrogate", prediction_zero = p0, max_n_coalitions = 30, - adaptive = FALSE,verbose=c("basic","vS_details"), + iterative = FALSE,verbose=c("basic","vS_details"), regression.model = parsnip::decision_tree(tree_depth = hardhat::tune(), engine = "rpart", mode = "regression"), regression.tune_values = dials::grid_regular(dials::tree_depth(), levels = 4), regression.vfold_cv_para = list(v = 5) @@ -64,7 +64,7 @@ ex <- explain( approach = "vaeac", prediction_zero = p0, max_n_coalitions = 30, - adaptive = FALSE,verbose=c("basic","progress","vS_details"), + iterative = FALSE,verbose=c("basic","progress","vS_details"), n_MC_samples = 100, vaeac.epochs = 3 ) @@ -77,7 +77,7 @@ ex2 <- explain( approach = "vaeac", prediction_zero = p0, max_n_coalitions = 30, - adaptive = FALSE,verbose=c("basic","progress","vS_details"), + iterative = FALSE,verbose=c("basic","progress","vS_details"), n_MC_samples = 100, vaeac.extra_parameters = list( vaeac.pretrained_vaeac_model = ex$internal$parameters$vaeac @@ -99,7 +99,7 @@ ex <- explain( approach = "regression_separate", prediction_zero = p0, max_n_coalitions = 30, - adaptive = FALSE,verbose=c("basic") + iterative = FALSE,verbose=c("basic") ) @@ -111,13 +111,13 @@ ex <- explain( approach = "empirical", prediction_zero = p0, max_n_coalitions = 30, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 6, - convergence_tolerance = 0.0005, - reduction_factor_vec = rep(10^(-6), 10), + convergence_tol = 0.0005, + n_coal_next_iter_factor_vec = rep(10^(-6), 10), max_iter = 8 ), - adaptive = TRUE,verbose=c("basic","convergence","shapley") + iterative = TRUE,verbose=c("basic","convergence","shapley") ) @@ -128,8 +128,8 @@ explain( x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive = TRUE, - adaptive_arguments <- list(n_initial_) + iterative = TRUE, + iterative_args <- list(n_initial_) verbose = c("basic"), paired_shap_sampling = TRUE ) diff --git a/inst/scripts/devel/real_data_iterative_kernelshap.R b/inst/scripts/devel/real_data_iterative_kernelshap.R index 00447ee0b..9d6404f99 100644 --- a/inst/scripts/devel/real_data_iterative_kernelshap.R +++ b/inst/scripts/devel/real_data_iterative_kernelshap.R @@ -88,7 +88,7 @@ Sigma = cov(x_train) ### First run proper shapr call on this sim_results_saving_folder = "/nr/project/stat/BigInsight/Projects/Explanations/EffektivShapley/Frida/simuleringsresultater/gmc_data_v3/"#"../effektiv_shapley_output/" -shapley_reweighting_strategy = "none" +kernelSHAP_reweighting_strategy = "none" predict_model_xgb <- function(object,newdata){ xgboost:::predict.xgb.Booster(object,as.matrix(newdata)) @@ -109,7 +109,7 @@ expl <- shapr::explain(model = model, approach = "ctree", prediction_zero = p0 ) -fwrite(expl$shapley_values,paste0(sim_results_saving_folder,"exact_shapley_values_", shapley_reweighting_strategy, ".csv")) +fwrite(expl$shapley_values_est,paste0(sim_results_saving_folder,"exact_shapley_values_", kernelSHAP_reweighting_strategy, ".csv")) print(Sys.time()) # These are the parameters for for iterative_kshap_func @@ -148,7 +148,7 @@ for(kk in seq_along(testObs_computed_vec)){ shapley_threshold_val = shapley_threshold_val, shapley_threshold_prob = shapley_threshold_prob, approach = approach, - shapley_reweighting_strategy = shapley_reweighting_strategy) + kernelSHAP_reweighting_strategy = kernelSHAP_reweighting_strategy) runres_list[[kk]] <- run$kshap_final runcomps_list[[kk]] <- sum(sapply(run$keep_list,"[[","no_computed_combinations")) @@ -159,7 +159,7 @@ for(kk in seq_along(testObs_computed_vec)){ est <- rbindlist(runres_list) est[,other_features:=NULL] -fwrite(est,paste0(sim_results_saving_folder,"iterative_shapley_values_", shapley_reweighting_strategy, ".csv")) +fwrite(est,paste0(sim_results_saving_folder,"iterative_shapley_values_", kernelSHAP_reweighting_strategy, ".csv")) expl_approx <- matrix(0, nrow = length(inds), ncol = m+1) expl_approx_obj_list <- list() @@ -170,14 +170,14 @@ for (i in seq_along(testObs_computed_vec)){ approach = approach, prediction_zero = p0, n_coalitions = runcomps_list[[i]]) - expl_approx[i,] = unlist(expl_approx_obj$shapley_values) + expl_approx[i,] = unlist(expl_approx_obj$shapley_values_est) expl_approx_obj_list[[i]] <- expl_approx_obj } expl_approx <- as.data.table(expl_approx) -truth <- expl$shapley_values +truth <- expl$shapley_values_est colnames(expl_approx) <- colnames(truth) -fwrite(expl_approx,paste0(sim_results_saving_folder,"approx_shapley_values_", shapley_reweighting_strategy, ".csv")) +fwrite(expl_approx,paste0(sim_results_saving_folder,"approx_shapley_values_", kernelSHAP_reweighting_strategy, ".csv")) bias_vec <- colMeans(est-truth) rmse_vec <- sqrt(colMeans((est-truth)^2)) @@ -187,7 +187,7 @@ bias_vec_approx <- colMeans(expl_approx-truth) rmse_vec_approx <- sqrt(colMeans((expl_approx-truth)^2)) mae_vec_approx <- colMeans(abs(expl_approx-truth)) -save.image(paste0(sim_results_saving_folder, "iterative_kernelshap_lingauss_p12_", shapley_reweighting_strategy, ".RData")) +save.image(paste0(sim_results_saving_folder, "iterative_kernelshap_lingauss_p12_", kernelSHAP_reweighting_strategy, ".RData")) hist(unlist(runcomps_list),breaks = 20) @@ -212,7 +212,7 @@ print(Sys.time()) # run$kshap_final -# expl$shapley_values +# expl$shapley_values_est @@ -223,7 +223,7 @@ print(Sys.time()) # sum(unlist(kshap_final[.N,])) -# sum(unlist(expl$shapley_values[testObs_computed,])) +# sum(unlist(expl$shapley_values_est[testObs_computed,])) @@ -239,7 +239,7 @@ print(Sys.time()) # full_pred <- predict(model,x_explain)[5] # p0 <- mean(y_train) -# pred_not_to_decompose <- sum(expl$shapley_values[5,VV7:VV9]) +# pred_not_to_decompose <- sum(expl$shapley_values_est[5,VV7:VV9]) # run_minor <- iterative_kshap_func(model,x_explain,x_train, diff --git a/inst/scripts/devel/real_data_iterative_kernelshap_analyze_results.R b/inst/scripts/devel/real_data_iterative_kernelshap_analyze_results.R index bbacbef75..866d28bf9 100644 --- a/inst/scripts/devel/real_data_iterative_kernelshap_analyze_results.R +++ b/inst/scripts/devel/real_data_iterative_kernelshap_analyze_results.R @@ -1,19 +1,19 @@ library(data.table) -shapley_reweighting_strategy = "none" +kernelSHAP_reweighting_strategy = "none" shapley_threshold_val <- 0.2 sim_results_folder = "/nr/project/stat/BigInsight/Projects/Explanations/EffektivShapley/Frida/simuleringsresultater/gmc_data_v3/" -load(paste0("/nr/project/stat/BigInsight/Projects/Explanations/EffektivShapley/Frida/simuleringsresultater/gmc_data_v3/iterative_kernelshap_lingauss_p12_", shapley_reweighting_strategy, ".RData")) +load(paste0("/nr/project/stat/BigInsight/Projects/Explanations/EffektivShapley/Frida/simuleringsresultater/gmc_data_v3/iterative_kernelshap_lingauss_p12_", kernelSHAP_reweighting_strategy, ".RData")) -exact_vals = fread(paste0(sim_results_folder,"exact_shapley_values_", shapley_reweighting_strategy, ".csv")) +exact_vals = fread(paste0(sim_results_folder,"exact_shapley_values_", kernelSHAP_reweighting_strategy, ".csv")) # names(exact_vals) <- c("phi0", paste0("VV",1:12)) -iterative_vals = fread(paste0(sim_results_folder,"iterative_shapley_values_", shapley_reweighting_strategy, ".csv")) -approx_vals = fread(paste0(sim_results_folder,"approx_shapley_values_", shapley_reweighting_strategy, ".csv")) +iterative_vals = fread(paste0(sim_results_folder,"iterative_shapley_values_", kernelSHAP_reweighting_strategy, ".csv")) +approx_vals = fread(paste0(sim_results_folder,"approx_shapley_values_", kernelSHAP_reweighting_strategy, ".csv")) bias_vec <- colMeans(exact_vals - iterative_vals) rmse_vec <- sqrt(colMeans((exact_vals - iterative_vals)^2)) diff --git a/inst/scripts/devel/simtest_iterative_kernelshap_lingauss_analyze_results.R b/inst/scripts/devel/simtest_iterative_kernelshap_lingauss_analyze_results.R index 51feb23d2..ac35df40c 100644 --- a/inst/scripts/devel/simtest_iterative_kernelshap_lingauss_analyze_results.R +++ b/inst/scripts/devel/simtest_iterative_kernelshap_lingauss_analyze_results.R @@ -1,18 +1,18 @@ library(data.table) -shapley_reweighting_strategy = "none" +kernelSHAP_reweighting_strategy = "none" shapley_threshold_val <- 0.2 sim_results_folder = "/nr/project/stat/BigInsight/Projects/Explanations/EffektivShapley/Frida/simuleringsresultater/sim_lingauss_v2/" -load(paste0(sim_results_folder,"iterative_kernelshap_",shapley_threshold_val,"_",shapley_reweighting_strategy, ".RData")) +load(paste0(sim_results_folder,"iterative_kernelshap_",shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".RData")) -exact_vals = fread(paste0(sim_results_folder,"exact_shapley_values_", shapley_threshold_val,"_",shapley_reweighting_strategy, ".csv")) +exact_vals = fread(paste0(sim_results_folder,"exact_shapley_values_", shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".csv")) names(exact_vals) <- c("phi0", paste0("VV",1:12)) -iterative_vals = fread(paste0(sim_results_folder,"iterative_shapley_values_", shapley_threshold_val,"_",shapley_reweighting_strategy, ".csv")) -approx_vals = fread(paste0(sim_results_folder,"approx_shapley_values_", shapley_threshold_val,"_",shapley_reweighting_strategy, ".csv")) +iterative_vals = fread(paste0(sim_results_folder,"iterative_shapley_values_", shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".csv")) +approx_vals = fread(paste0(sim_results_folder,"approx_shapley_values_", shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".csv")) bias_vec <- colMeans(exact_vals - iterative_vals) rmse_vec <- sqrt(colMeans((exact_vals - iterative_vals)^2)) diff --git a/inst/scripts/devel/simtest_iterative_kernelshap_lingauss_v2.R b/inst/scripts/devel/simtest_iterative_kernelshap_lingauss_v2.R index bedcbfcd0..1d88192c5 100644 --- a/inst/scripts/devel/simtest_iterative_kernelshap_lingauss_v2.R +++ b/inst/scripts/devel/simtest_iterative_kernelshap_lingauss_v2.R @@ -71,7 +71,7 @@ p0 <- mean(y_train) ### First run proper shapr call on this sim_results_saving_folder = "/nr/project/stat/BigInsight/Projects/Explanations/EffektivShapley/Frida/simuleringsresultater/sim_lingauss_v2/"#"../effektiv_shapley_output/" -shapley_reweighting_strategy = "none" +kernelSHAP_reweighting_strategy = "none" set.seed(465132) inds = 1:n_explain @@ -82,7 +82,7 @@ expl <- shapr::explain(model = model, approach = "gaussian", prediction_zero = p0,Sigma=Sigma,mu=mu) -fwrite(expl$shapley_values,paste0(sim_results_saving_folder,"exact_shapley_values_",shapley_threshold_val,"_",shapley_reweighting_strategy, ".csv")) +fwrite(expl$shapley_values_est,paste0(sim_results_saving_folder,"exact_shapley_values_",shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".csv")) cutoff_feats <- paste0("VV",1:12) @@ -127,7 +127,7 @@ for(kk in testObs_computed_vec){ n_samples = n_samples, gaussian.mu = mu, gaussian.cov_mat = Sigma, - shapley_reweighting_strategy = shapley_reweighting_strategy) + kernelSHAP_reweighting_strategy = kernelSHAP_reweighting_strategy) runres_list[[kk]] <- run$kshap_final runcomps_list[[kk]] <- sum(sapply(run$keep_list,"[[","no_computed_combinations")) print(kk) @@ -135,12 +135,12 @@ for(kk in testObs_computed_vec){ est <- rbindlist(runres_list) est[,other_features:=NULL] -fwrite(est,paste0(sim_results_saving_folder,"iterative_shapley_values_",shapley_threshold_val,"_",shapley_reweighting_strategy, ".csv")) +fwrite(est,paste0(sim_results_saving_folder,"iterative_shapley_values_",shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".csv")) -truth <- expl$shapley_values +truth <- expl$shapley_values_est expl_approx <- matrix(0, nrow = length(inds), ncol = m+1) expl_approx_obj_list <- list() @@ -152,12 +152,12 @@ for (i in testObs_computed_vec){ prediction_zero = p0, n_coalitions = runcomps_list[[i]], Sigma=Sigma,mu=mu) - expl_approx[i,] = unlist(expl_approx_obj$shapley_values) + expl_approx[i,] = unlist(expl_approx_obj$shapley_values_est) expl_approx_obj_list[[i]] <- expl_approx_obj } expl_approx <- as.data.table(expl_approx) colnames(expl_approx) <- colnames(truth) -fwrite(expl_approx,paste0(sim_results_saving_folder,"approx_shapley_values_",shapley_threshold_val,"_",shapley_reweighting_strategy, ".csv")) +fwrite(expl_approx,paste0(sim_results_saving_folder,"approx_shapley_values_",shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".csv")) bias_vec <- colMeans(est-truth) rmse_vec <- sqrt(colMeans((est-truth)^2)) @@ -167,7 +167,7 @@ bias_vec_approx <- colMeans(expl_approx-truth) rmse_vec_approx <- sqrt(colMeans((expl_approx-truth)^2)) mae_vec_approx <- colMeans(abs(expl_approx-truth)) -save.image(paste0(sim_results_saving_folder, "iterative_kernelshap_",shapley_threshold_val,"_",shapley_reweighting_strategy, ".RData")) +save.image(paste0(sim_results_saving_folder, "iterative_kernelshap_",shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".RData")) hist(unlist(runcomps_list),breaks = 20) @@ -197,7 +197,7 @@ full_pred # run$kshap_final -# expl$shapley_values +# expl$shapley_values_est @@ -208,7 +208,7 @@ full_pred # sum(unlist(kshap_final[.N,])) -# sum(unlist(expl$shapley_values[testObs_computed,])) +# sum(unlist(expl$shapley_values_est[testObs_computed,])) @@ -224,7 +224,7 @@ full_pred # full_pred <- predict(model,x_explain)[5] # p0 <- mean(y_train) -# pred_not_to_decompose <- sum(expl$shapley_values[5,VV7:VV9]) +# pred_not_to_decompose <- sum(expl$shapley_values_est[5,VV7:VV9]) # run_minor <- iterative_kshap_func(model,x_explain,x_train, diff --git a/inst/scripts/devel/simtest_iterative_kernelshap_nonlingauss_analyze_results.R b/inst/scripts/devel/simtest_iterative_kernelshap_nonlingauss_analyze_results.R index cc994057e..9888f57f1 100644 --- a/inst/scripts/devel/simtest_iterative_kernelshap_nonlingauss_analyze_results.R +++ b/inst/scripts/devel/simtest_iterative_kernelshap_nonlingauss_analyze_results.R @@ -1,18 +1,18 @@ library(data.table) -shapley_reweighting_strategy = "none" +kernelSHAP_reweighting_strategy = "none" shapley_threshold_val <- 0.2 sim_results_folder = "/nr/project/stat/BigInsight/Projects/Explanations/EffektivShapley/Frida/simuleringsresultater/sim_nonlingauss_v2/" -load(paste0(sim_results_folder,"iterative_kernelshap_",shapley_threshold_val,"_",shapley_reweighting_strategy, ".RData")) +load(paste0(sim_results_folder,"iterative_kernelshap_",shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".RData")) -exact_vals = fread(paste0(sim_results_folder,"exact_shapley_values_", shapley_threshold_val,"_",shapley_reweighting_strategy, ".csv")) +exact_vals = fread(paste0(sim_results_folder,"exact_shapley_values_", shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".csv")) names(exact_vals) <- c("phi0", paste0("VV",1:12)) -iterative_vals = fread(paste0(sim_results_folder,"iterative_shapley_values_", shapley_threshold_val,"_",shapley_reweighting_strategy, ".csv")) -approx_vals = fread(paste0(sim_results_folder,"approx_shapley_values_", shapley_threshold_val,"_",shapley_reweighting_strategy, ".csv")) +iterative_vals = fread(paste0(sim_results_folder,"iterative_shapley_values_", shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".csv")) +approx_vals = fread(paste0(sim_results_folder,"approx_shapley_values_", shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".csv")) bias_vec <- colMeans(exact_vals - iterative_vals) rmse_vec <- sqrt(colMeans((exact_vals - iterative_vals)^2)) diff --git a/inst/scripts/devel/simtest_reweighting_strategies.R b/inst/scripts/devel/simtest_reweighting_strategies.R index b1356faa6..3a5a24f0c 100644 --- a/inst/scripts/devel/simtest_reweighting_strategies.R +++ b/inst/scripts/devel/simtest_reweighting_strategies.R @@ -52,7 +52,7 @@ p0 <- mean(y_train) ### First run proper shapr call on this -shapley_reweighting_strategy = "none" +kernelSHAP_reweighting_strategy = "none" set.seed(465132) progressr::handlers(global = TRUE) @@ -66,7 +66,7 @@ expl <- shapr::explain(model = model, dt_vS_map <- merge(expl$internal$iter_list[[1]]$coalition_map,expl$internal$output$dt_vS,by="id_coalition")[,-"id_coalition"] -shapley_reweighting_strategy_vec <- c("none","on_N","on_coal_size","on_all","on_all_cond") +kernelSHAP_reweighting_strategy_vec <- c("none","on_N","on_coal_size","on_all","on_all_cond") n_coalitions_vec <- c(50,100,200,400,800,1200,1600,2000,2400,2800,3200,3600,4000) @@ -97,7 +97,7 @@ for(i0 in seq_along(paired_shap_sampling_vec)){ mu=mu, seed = this_seed, max_n_coalitions = this_n_coalitions, - shapley_reweighting = "none", + kernelSHAP_reweighting = "none", paired_shap_sampling = this_paired_shap_sampling) this0_X <- this$internal$objects$X @@ -107,12 +107,12 @@ for(i0 in seq_along(paired_shap_sampling_vec)){ setorder(exact_dt_vS,id_coalition) - for(iii in seq_along(shapley_reweighting_strategy_vec)){ - this_shapley_reweighting_strategy <- shapley_reweighting_strategy_vec[iii] + for(iii in seq_along(kernelSHAP_reweighting_strategy_vec)){ + this_kernelSHAP_reweighting_strategy <- kernelSHAP_reweighting_strategy_vec[iii] this_X <- copy(this0_X) - shapr:::shapley_reweighting(this_X,reweight=this_shapley_reweighting_strategy) + shapr:::kernelSHAP_reweighting(this_X,reweight=this_kernelSHAP_reweighting_strategy) this_W <- weight_matrix( X = this_X, @@ -120,9 +120,9 @@ for(i0 in seq_along(paired_shap_sampling_vec)){ ) shap_dt0 <- as.data.table(cbind(seq_len(n_explain),t(this_W%*%as.matrix(exact_dt_vS[,-c("coalitions_str","id_coalition")])))) - names(shap_dt0) <- names(this$shapley_values) + names(shap_dt0) <- names(this$shapley_values_est) - this_diff <- unlist(shap_dt0[,-c(1,2)]-expl$shapley_values[,-c(1,2)]) + this_diff <- unlist(shap_dt0[,-c(1,2)]-expl$shapley_values_est[,-c(1,2)]) this_bias <- mean(this_diff) this_var <- var(this_diff) this_MAE <- mean(abs(this_diff)) @@ -130,7 +130,7 @@ for(i0 in seq_along(paired_shap_sampling_vec)){ res_vec <- data.table(n_coalitions = this_n_coalitions, paired_shap_sampling = this_paired_shap_sampling, - shapley_reweighting_strategy = this_shapley_reweighting_strategy, + kernelSHAP_reweighting_strategy = this_kernelSHAP_reweighting_strategy, seed = this_seed, bias=this_bias, var = this_var, @@ -154,11 +154,11 @@ res_dt <- rbindlist(res_list) fwrite(res_dt,file = "../../Div/extra_shapr_scripts_etc/res_dt_reweighting_sims_lingaus.csv") -resres <- res_dt[,lapply(.SD,mean),.SDcols=c("bias","var","MAE","RMSE"),by=.(paired_shap_sampling,n_coalitions,shapley_reweighting_strategy)] +resres <- res_dt[,lapply(.SD,mean),.SDcols=c("bias","var","MAE","RMSE"),by=.(paired_shap_sampling,n_coalitions,kernelSHAP_reweighting_strategy)] library(ggplot2) -ggplot(resres[paired_shap_sampling==TRUE],aes(x=n_coalitions,y=MAE,col=shapley_reweighting_strategy,linetype= paired_shap_sampling))+ +ggplot(resres[paired_shap_sampling==TRUE],aes(x=n_coalitions,y=MAE,col=kernelSHAP_reweighting_strategy,linetype= paired_shap_sampling))+ geom_line() @@ -204,7 +204,7 @@ for(kk in testObs_computed_vec){ n_samples = n_samples, gaussian.mu = mu, gaussian.cov_mat = Sigma, - shapley_reweighting_strategy = shapley_reweighting_strategy) + kernelSHAP_reweighting_strategy = kernelSHAP_reweighting_strategy) runres_list[[kk]] <- run$kshap_final runcomps_list[[kk]] <- sum(sapply(run$keep_list,"[[","no_computed_combinations")) print(kk) @@ -212,12 +212,12 @@ for(kk in testObs_computed_vec){ est <- rbindlist(runres_list) est[,other_features:=NULL] -fwrite(est,paste0(sim_results_saving_folder,"iterative_shapley_values_",shapley_threshold_val,"_",shapley_reweighting_strategy, ".csv")) +fwrite(est,paste0(sim_results_saving_folder,"iterative_shapley_values_",shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".csv")) -truth <- expl$shapley_values +truth <- expl$shapley_values_est expl_approx <- matrix(0, nrow = length(inds), ncol = m+1) expl_approx_obj_list <- list() @@ -229,12 +229,12 @@ for (i in testObs_computed_vec){ prediction_zero = p0, n_combinations = runcomps_list[[i]], Sigma=Sigma,mu=mu) - expl_approx[i,] = unlist(expl_approx_obj$shapley_values) + expl_approx[i,] = unlist(expl_approx_obj$shapley_values_est) expl_approx_obj_list[[i]] <- expl_approx_obj } expl_approx <- as.data.table(expl_approx) colnames(expl_approx) <- colnames(truth) -fwrite(expl_approx,paste0(sim_results_saving_folder,"approx_shapley_values_",shapley_threshold_val,"_",shapley_reweighting_strategy, ".csv")) +fwrite(expl_approx,paste0(sim_results_saving_folder,"approx_shapley_values_",shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".csv")) bias_vec <- colMeans(est-truth) rmse_vec <- sqrt(colMeans((est-truth)^2)) @@ -244,7 +244,7 @@ bias_vec_approx <- colMeans(expl_approx-truth) rmse_vec_approx <- sqrt(colMeans((expl_approx-truth)^2)) mae_vec_approx <- colMeans(abs(expl_approx-truth)) -save.image(paste0(sim_results_saving_folder, "iterative_kernelshap_",shapley_threshold_val,"_",shapley_reweighting_strategy, ".RData")) +save.image(paste0(sim_results_saving_folder, "iterative_kernelshap_",shapley_threshold_val,"_",kernelSHAP_reweighting_strategy, ".RData")) hist(unlist(runcomps_list),breaks = 20) diff --git a/inst/scripts/devel/simtest_reweighting_strategies_nonlinear.R b/inst/scripts/devel/simtest_reweighting_strategies_nonlinear.R index 5e4d09522..0c1c8b521 100644 --- a/inst/scripts/devel/simtest_reweighting_strategies_nonlinear.R +++ b/inst/scripts/devel/simtest_reweighting_strategies_nonlinear.R @@ -69,7 +69,7 @@ p0 <- mean(y_train) ### First run proper shapr call on this -shapley_reweighting_strategy = "none" +kernelSHAP_reweighting_strategy = "none" set.seed(465132) progressr::handlers(global = TRUE) @@ -83,7 +83,7 @@ expl <- shapr::explain(model = model, dt_vS_map <- merge(expl$internal$iter_list[[1]]$coalition_map,expl$internal$output$dt_vS,by="id_coalition")[,-"id_coalition"] -shapley_reweighting_strategy_vec <- c("none","on_N","on_coal_size","on_all","on_all_cond") +kernelSHAP_reweighting_strategy_vec <- c("none","on_N","on_coal_size","on_all","on_all_cond") n_coalitions_vec <- c(50,100,200,400,800,1200,1600,2000,2400,2800,3200,3600,4000) @@ -114,7 +114,7 @@ for(i0 in seq_along(paired_shap_sampling_vec)){ mu=mu, seed = this_seed, max_n_coalitions = this_n_coalitions, - shapley_reweighting = "none", + kernelSHAP_reweighting = "none", paired_shap_sampling = this_paired_shap_sampling) this0_X <- this$internal$objects$X @@ -124,12 +124,12 @@ for(i0 in seq_along(paired_shap_sampling_vec)){ setorder(exact_dt_vS,id_coalition) - for(iii in seq_along(shapley_reweighting_strategy_vec)){ - this_shapley_reweighting_strategy <- shapley_reweighting_strategy_vec[iii] + for(iii in seq_along(kernelSHAP_reweighting_strategy_vec)){ + this_kernelSHAP_reweighting_strategy <- kernelSHAP_reweighting_strategy_vec[iii] this_X <- copy(this0_X) - shapr:::shapley_reweighting(this_X,reweight=this_shapley_reweighting_strategy) + shapr:::kernelSHAP_reweighting(this_X,reweight=this_kernelSHAP_reweighting_strategy) this_W <- weight_matrix( X = this_X, @@ -137,9 +137,9 @@ for(i0 in seq_along(paired_shap_sampling_vec)){ ) shap_dt0 <- as.data.table(cbind(seq_len(n_explain),t(this_W%*%as.matrix(exact_dt_vS[,-c("coalitions_str","id_coalition")])))) - names(shap_dt0) <- names(this$shapley_values) + names(shap_dt0) <- names(this$shapley_values_est) - this_diff <- unlist(shap_dt0[,-c(1,2)]-expl$shapley_values[,-c(1,2)]) + this_diff <- unlist(shap_dt0[,-c(1,2)]-expl$shapley_values_est[,-c(1,2)]) this_bias <- mean(this_diff) this_var <- var(this_diff) this_MAE <- mean(abs(this_diff)) @@ -147,7 +147,7 @@ for(i0 in seq_along(paired_shap_sampling_vec)){ res_vec <- data.table(n_coalitions = this_n_coalitions, paired_shap_sampling = this_paired_shap_sampling, - shapley_reweighting_strategy = this_shapley_reweighting_strategy, + kernelSHAP_reweighting_strategy = this_kernelSHAP_reweighting_strategy, seed = this_seed, bias=this_bias, var = this_var, @@ -171,12 +171,12 @@ res_dt <- rbindlist(res_list) fwrite(res_dt,file = "../../Div/extra_shapr_scripts_etc/res_dt_reweighting_sims_nonlingaus.csv") -resres <- res_dt[,lapply(.SD,mean),.SDcols=c("bias","var","MAE","RMSE"),by=.(paired_shap_sampling,n_coalitions,shapley_reweighting_strategy)] +resres <- res_dt[,lapply(.SD,mean),.SDcols=c("bias","var","MAE","RMSE"),by=.(paired_shap_sampling,n_coalitions,kernelSHAP_reweighting_strategy)] library(ggplot2) -ggplot(resres[paired_shap_sampling==TRUE],aes(x=n_coalitions,y=MAE,col=shapley_reweighting_strategy,linetype= paired_shap_sampling))+ +ggplot(resres[paired_shap_sampling==TRUE],aes(x=n_coalitions,y=MAE,col=kernelSHAP_reweighting_strategy,linetype= paired_shap_sampling))+ geom_line() -ggplot(resres[paired_shap_sampling==FALSE],aes(x=n_coalitions,y=MAE,col=shapley_reweighting_strategy,linetype= paired_shap_sampling))+ +ggplot(resres[paired_shap_sampling==FALSE],aes(x=n_coalitions,y=MAE,col=kernelSHAP_reweighting_strategy,linetype= paired_shap_sampling))+ geom_line() diff --git a/inst/scripts/devel/simtest_reweighting_strategies_nonlinear_nonunique_sampling.R b/inst/scripts/devel/simtest_reweighting_strategies_nonlinear_nonunique_sampling.R index 82c573e90..cc8341f92 100644 --- a/inst/scripts/devel/simtest_reweighting_strategies_nonlinear_nonunique_sampling.R +++ b/inst/scripts/devel/simtest_reweighting_strategies_nonlinear_nonunique_sampling.R @@ -69,7 +69,7 @@ p0 <- mean(y_train) ### First run proper shapr call on this -shapley_reweighting_strategy = "none" +kernelSHAP_reweighting_strategy = "none" set.seed(465132) progressr::handlers(global = TRUE) @@ -83,7 +83,7 @@ expl <- shapr::explain(model = model, dt_vS_map <- merge(expl$internal$iter_list[[1]]$coalition_map,expl$internal$output$dt_vS,by="id_coalition")[,-"id_coalition"] -shapley_reweighting_strategy_vec <- c("none","on_N","on_coal_size","on_all","on_all_cond","on_all_cond_paired","comb") +kernelSHAP_reweighting_strategy_vec <- c("none","on_N","on_coal_size","on_all","on_all_cond","on_all_cond_paired","comb") n_coalitions_vec <- c(50,100,200,400,800,1200,1600,2000,2400,2800,3200,3600,4000) @@ -114,7 +114,7 @@ for(ii in seq_along(n_coalitions_vec)){ mu=mu, seed = this_seed, max_n_coalitions = this_n_coalitions, - shapley_reweighting = "none", + kernelSHAP_reweighting = "none", unique_sampling = TRUE, paired_shap_sampling = this_paired_shap_sampling) @@ -125,12 +125,12 @@ for(ii in seq_along(n_coalitions_vec)){ setorder(exact_dt_vS,id_coalition) - for(iii in seq_along(shapley_reweighting_strategy_vec)){ - this_shapley_reweighting_strategy <- shapley_reweighting_strategy_vec[iii] + for(iii in seq_along(kernelSHAP_reweighting_strategy_vec)){ + this_kernelSHAP_reweighting_strategy <- kernelSHAP_reweighting_strategy_vec[iii] this_X <- copy(this0_X) - shapr:::shapley_reweighting(this_X,reweight=this_shapley_reweighting_strategy) + shapr:::kernelSHAP_reweighting(this_X,reweight=this_kernelSHAP_reweighting_strategy) this_W <- weight_matrix( X = this_X, @@ -138,9 +138,9 @@ for(ii in seq_along(n_coalitions_vec)){ ) shap_dt0 <- as.data.table(cbind(seq_len(n_explain),t(this_W%*%as.matrix(exact_dt_vS[,-c("coalitions_str","id_coalition")])))) - names(shap_dt0) <- names(this$shapley_values) + names(shap_dt0) <- names(this$shapley_values_est) - this_diff <- unlist(shap_dt0[,-c(1,2)]-expl$shapley_values[,-c(1,2)]) + this_diff <- unlist(shap_dt0[,-c(1,2)]-expl$shapley_values_est[,-c(1,2)]) this_bias <- mean(this_diff) this_var <- var(this_diff) this_MAE <- mean(abs(this_diff)) @@ -148,7 +148,7 @@ for(ii in seq_along(n_coalitions_vec)){ res_vec <- data.table(n_coalitions = this_n_coalitions, paired_shap_sampling = this_paired_shap_sampling, - shapley_reweighting_strategy = this_shapley_reweighting_strategy, + kernelSHAP_reweighting_strategy = this_kernelSHAP_reweighting_strategy, seed = this_seed, bias=this_bias, var = this_var, @@ -162,7 +162,7 @@ for(ii in seq_along(n_coalitions_vec)){ weight_dt[,n_coalitions:=this_n_coalitions] weight_dt[,paired_shap_sampling:=this_paired_shap_sampling] - weight_dt[,shapley_reweighting_strategy:=this_shapley_reweighting_strategy] + weight_dt[,kernelSHAP_reweighting_strategy:=this_kernelSHAP_reweighting_strategy] weight_dt[,seed:=this_seed] weight_list[[length(weight_list)+1]] <- copy(weight_dt) @@ -184,19 +184,19 @@ res_dt <- rbindlist(res_list) fwrite(res_dt,file = "../../Div/extra_shapr_scripts_etc/res_dt_reweighting_sims_nonlingaus_nonunique_sampling_new.csv") -resres <- res_dt[,lapply(.SD,mean),.SDcols=c("bias","var","MAE","RMSE"),by=.(paired_shap_sampling,n_coalitions,shapley_reweighting_strategy)] -resres_sd <- res_dt[,lapply(.SD,sd),.SDcols=c("bias","var","MAE","RMSE"),by=.(paired_shap_sampling,n_coalitions,shapley_reweighting_strategy)] +resres <- res_dt[,lapply(.SD,mean),.SDcols=c("bias","var","MAE","RMSE"),by=.(paired_shap_sampling,n_coalitions,kernelSHAP_reweighting_strategy)] +resres_sd <- res_dt[,lapply(.SD,sd),.SDcols=c("bias","var","MAE","RMSE"),by=.(paired_shap_sampling,n_coalitions,kernelSHAP_reweighting_strategy)] library(ggplot2) -ggplot(resres,aes(x=n_coalitions,y=MAE,col=shapley_reweighting_strategy,linetype= paired_shap_sampling))+ +ggplot(resres,aes(x=n_coalitions,y=MAE,col=kernelSHAP_reweighting_strategy,linetype= paired_shap_sampling))+ geom_line() -ggplot(resres[paired_shap_sampling==FALSE],aes(x=n_coalitions,y=MAE,col=shapley_reweighting_strategy,linetype= paired_shap_sampling))+ +ggplot(resres[paired_shap_sampling==FALSE],aes(x=n_coalitions,y=MAE,col=kernelSHAP_reweighting_strategy,linetype= paired_shap_sampling))+ geom_line()+scale_y_log10() -ggplot(resres[paired_shap_sampling==TRUE],aes(x=n_coalitions,y=MAE,col=shapley_reweighting_strategy,linetype= paired_shap_sampling))+ +ggplot(resres[paired_shap_sampling==TRUE],aes(x=n_coalitions,y=MAE,col=kernelSHAP_reweighting_strategy,linetype= paired_shap_sampling))+ geom_line()+scale_y_log10() @@ -205,13 +205,13 @@ ggplot(resres[paired_shap_sampling==TRUE],aes(x=n_coalitions,y=MAE,col=shapley_r weight_dt <- rbindlist(weight_list) -weight_dt[!(coalition_size%in%c(0,12)),sum_shapley_weight:=sum(shapley_weight),by=.(seed,paired_shap_sampling,n_coalitions,shapley_reweighting_strategy)] +weight_dt[!(coalition_size%in%c(0,12)),sum_shapley_weight:=sum(shapley_weight),by=.(seed,paired_shap_sampling,n_coalitions,kernelSHAP_reweighting_strategy)] weight_dt[!(coalition_size%in%c(0,12)),shapley_weight:=shapley_weight/sum_shapley_weight] -weight_dt[!(coalition_size%in%c(0,12)),mean(shapley_weight),by=.(seed,paired_shap_sampling,n_coalitions,shapley_reweighting_strategy)] +weight_dt[!(coalition_size%in%c(0,12)),mean(shapley_weight),by=.(seed,paired_shap_sampling,n_coalitions,kernelSHAP_reweighting_strategy)] -ww_dt <- weight_dt[!(coalition_size%in%c(0,12)),list(mean_weight=mean(shapley_weight)),by=.(coalition_size,paired_shap_sampling,n_coalitions,shapley_reweighting_strategy)] +ww_dt <- weight_dt[!(coalition_size%in%c(0,12)),list(mean_weight=mean(shapley_weight)),by=.(coalition_size,paired_shap_sampling,n_coalitions,kernelSHAP_reweighting_strategy)] -ggplot(ww_dt[paired_shap_sampling==TRUE & shapley_reweighting_strategy %in% c("none","on_all_cond_paired","on_N")],aes(x=coalition_size,y=mean_weight,col=shapley_reweighting_strategy))+ +ggplot(ww_dt[paired_shap_sampling==TRUE & kernelSHAP_reweighting_strategy %in% c("none","on_all_cond_paired","on_N")],aes(x=coalition_size,y=mean_weight,col=kernelSHAP_reweighting_strategy))+ geom_point()+facet_grid(~n_coalitions) diff --git a/inst/scripts/devel/simtest_timing_to_Frida.R b/inst/scripts/devel/simtest_timing_to_Frida.R index d6366ce16..93fc801bf 100644 --- a/inst/scripts/devel/simtest_timing_to_Frida.R +++ b/inst/scripts/devel/simtest_timing_to_Frida.R @@ -85,14 +85,14 @@ expl <- explain( n_coalitions = 100, Sigma=Sigma, mu=mu, - adaptive = TRUE, + iterative = TRUE, unique_sampling = FALSE, - adaptive_arguments = list(initial_n_coalitions = 50, + iterative_args = list(initial_n_coalitions = 50, fixed_n_coalitions_per_iter = 50, max_iter = 10, - convergence_tolerance = 10^(-10), + convergence_tol = 10^(-10), compute_sd = TRUE), - shapley_reweighting = "none", + kernelSHAP_reweighting = "none", print_iter_info = TRUE ) diff --git a/inst/scripts/devel/testing_explain_forevast_n_comb.R b/inst/scripts/devel/testing_explain_forevast_n_comb.R index 23517f2c9..43aeb6ced 100644 --- a/inst/scripts/devel/testing_explain_forevast_n_comb.R +++ b/inst/scripts/devel/testing_explain_forevast_n_comb.R @@ -175,14 +175,14 @@ cols_horizon3 <- h3full$internal$objects$cols_per_horizon[[3]] h1mean1 <- h2mean1 <- h2mean2 <- h3mean1 <- h3mean2 <- h3mean3 <- list() for(i in 1:reps){ - h1mean1[[i]] <- as.matrix(h1list[[i]]$shapley_values[horizon==1, ..cols_horizon1]) + h1mean1[[i]] <- as.matrix(h1list[[i]]$shapley_values_est[horizon==1, ..cols_horizon1]) - h2mean1[[i]] <- as.matrix(h2list[[i]]$shapley_values[horizon==1, ..cols_horizon1]) - h2mean2[[i]] <- as.matrix(h2list[[i]]$shapley_values[horizon==2, ..cols_horizon2]) + h2mean1[[i]] <- as.matrix(h2list[[i]]$shapley_values_est[horizon==1, ..cols_horizon1]) + h2mean2[[i]] <- as.matrix(h2list[[i]]$shapley_values_est[horizon==2, ..cols_horizon2]) - h3mean1[[i]] <- as.matrix(h3list[[i]]$shapley_values[horizon==1, ..cols_horizon1]) - h3mean2[[i]] <- as.matrix(h3list[[i]]$shapley_values[horizon==2, ..cols_horizon2]) - h3mean3[[i]] <- as.matrix(h3list[[i]]$shapley_values[horizon==3, ..cols_horizon3]) + h3mean1[[i]] <- as.matrix(h3list[[i]]$shapley_values_est[horizon==1, ..cols_horizon1]) + h3mean2[[i]] <- as.matrix(h3list[[i]]$shapley_values_est[horizon==2, ..cols_horizon2]) + h3mean3[[i]] <- as.matrix(h3list[[i]]$shapley_values_est[horizon==3, ..cols_horizon3]) } @@ -190,25 +190,25 @@ for(i in 1:reps){ Reduce("+", h1mean1) / reps Reduce("+", h2mean1) / reps Reduce("+", h3mean1) / reps -h3full$shapley_values[horizon==1,..cols_horizon1] +h3full$shapley_values_est[horizon==1,..cols_horizon1] # Horizon 2 Reduce("+", h2mean2) / reps Reduce("+", h3mean2) / reps -h3full$shapley_values[horizon==2,..cols_horizon2] +h3full$shapley_values_est[horizon==2,..cols_horizon2] # Horizon 3 Reduce("+", h3mean3) / reps -h3full$shapley_values[horizon==3,..cols_horizon3] +h3full$shapley_values_est[horizon==3,..cols_horizon3] -expect_equal(h2$shapley_values[horizon==1, ..cols_horizon1], - h1$shapley_values[horizon==1,..cols_horizon1]) +expect_equal(h2$shapley_values_est[horizon==1, ..cols_horizon1], + h1$shapley_values_est[horizon==1,..cols_horizon1]) -expect_equal(h3$shapley_values[horizon==1, ..cols_horizon1], - h1$shapley_values[horizon==1,..cols_horizon1]) +expect_equal(h3$shapley_values_est[horizon==1, ..cols_horizon1], + h1$shapley_values_est[horizon==1,..cols_horizon1]) cols_horizon2 <- h2$internal$objects$cols_per_horizon[[2]] -expect_equal(h3$shapley_values[horizon==2, ..cols_horizon2], - h2$shapley_values[horizon==2,..cols_horizon2]) +expect_equal(h3$shapley_values_est[horizon==2, ..cols_horizon2], + h2$shapley_values_est[horizon==2,..cols_horizon2]) diff --git a/inst/scripts/devel/testing_intermediate_saving.R b/inst/scripts/devel/testing_intermediate_saving.R index 81bb9149b..7214a2eac 100644 --- a/inst/scripts/devel/testing_intermediate_saving.R +++ b/inst/scripts/devel/testing_intermediate_saving.R @@ -6,15 +6,15 @@ aa = explain( x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.01, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.01, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 30 ), - adaptive = TRUE, + iterative = TRUE, print_shapleyres = TRUE, - print_iter_info = TRUE,shapley_reweighting = "on_N" + print_iter_info = TRUE,kernelSHAP_reweighting = "on_N" ) bb = explain( @@ -23,15 +23,15 @@ bb = explain( x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.001, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.001, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 30 ), - adaptive = TRUE, + iterative = TRUE, print_shapleyres = TRUE, - print_iter_info = TRUE,shapley_reweighting = "on_N",prev_shapr_object = aa + print_iter_info = TRUE,kernelSHAP_reweighting = "on_N",prev_shapr_object = aa ) @@ -48,16 +48,16 @@ full = explain( x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.001, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.001, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 7 ), - adaptive = TRUE, + iterative = TRUE, print_shapleyres = TRUE, print_iter_info = TRUE, - shapley_reweighting = "on_N", + kernelSHAP_reweighting = "on_N", seed=NULL ) @@ -68,16 +68,16 @@ first = explain( x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.001, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.001, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 4 ), - adaptive = TRUE, + iterative = TRUE, print_shapleyres = TRUE, print_iter_info = TRUE, - shapley_reweighting = "on_N", + kernelSHAP_reweighting = "on_N", seed=NULL ) @@ -88,16 +88,16 @@ second = explain( x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.001, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.001, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 7 ), - adaptive = TRUE, + iterative = TRUE, print_shapleyres = TRUE, print_iter_info = TRUE, - shapley_reweighting = "on_N", + kernelSHAP_reweighting = "on_N", seed=NULL, prev_shapr_object = first ) @@ -111,22 +111,22 @@ second_path = explain( x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.001, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.001, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 5 ), - adaptive = TRUE, + iterative = TRUE, print_shapleyres = TRUE, print_iter_info = TRUE, - shapley_reweighting = "on_N", + kernelSHAP_reweighting = "on_N", seed=NULL, - prev_shapr_object = first$internal$parameters$adaptive_arguments$saving_path + prev_shapr_object = first$internal$parameters$output_args$saving_path ) # Identical results -all.equal(full$shapley_values,second$shapley_values) # TRUE -all.equal(full$shapley_values,second2$shapley_values) # TRUE -all.equal(full$shapley_values,second_path$shapley_values) # TRUE +all.equal(full$shapley_values_est,second$shapley_values_est) # TRUE +all.equal(full$shapley_values_est,second2$shapley_values_est) # TRUE +all.equal(full$shapley_values_est,second_path$shapley_values_est) # TRUE diff --git a/inst/scripts/devel/verifying_arima_model_output.R b/inst/scripts/devel/verifying_arima_model_output.R index 7a63bcbf5..4f27f3c40 100644 --- a/inst/scripts/devel/verifying_arima_model_output.R +++ b/inst/scripts/devel/verifying_arima_model_output.R @@ -51,20 +51,20 @@ exp <- explain_forecast(model = model_arima_temp, # These two should be approximately equal # For y -exp$shapley_values$Y1.1 +exp$shapley_values_est$Y1.1 model_arima_temp$coef[1]*(y[explain_idx]-mean(y)) #[1] -0.13500 0.20643 #[1] -0.079164 0.208118 # for xreg1 -exp$shapley_values$var1.F1 +exp$shapley_values_est$var1.F1 model_arima_temp$coef[3]*(xreg[explain_idx+1,1]-mean(xreg[,1])) #[1] -0.030901 1.179386 #[1] -0.12034 1.19589 # for xreg2 -exp$shapley_values$var2.F1 +exp$shapley_values_est$var2.F1 0 #[1] 0.011555 0.031911 #[1] 0 diff --git a/inst/scripts/example_plot_MSEv.R b/inst/scripts/example_plot_MSEv.R index 05381b580..51fd56ee2 100644 --- a/inst/scripts/example_plot_MSEv.R +++ b/inst/scripts/example_plot_MSEv.R @@ -381,7 +381,7 @@ explanation_gaussian <- explain( ) explanation_gaussian_copy <- copy(explanation_gaussian_all) -colnames(explanation_gaussian_copy$shapley_values) <- rev(colnames(explanation_gaussian_copy$shapley_values)) +colnames(explanation_gaussian_copy$shapley_values_est) <- rev(colnames(explanation_gaussian_copy$shapley_values_est)) # Will give an error due to different feature names plot_MSEv_eval_crit(list( diff --git a/inst/scripts/readme_example.R b/inst/scripts/readme_example.R index 480f599d7..b9e8852a9 100644 --- a/inst/scripts/readme_example.R +++ b/inst/scripts/readme_example.R @@ -39,7 +39,7 @@ explanation <- explain( # Printing the Shapley values for the test data. # For more information about the interpretation of the values in the table, see ?shapr::explain. -print(explanation$shapley_values) +print(explanation$shapley_values_est) # Finally we plot the resulting explanations plot(explanation) diff --git a/inst/scripts/vilde/sketch_for_waterfall_plot.R b/inst/scripts/vilde/sketch_for_waterfall_plot.R index dc9e9278f..f8572fb55 100644 --- a/inst/scripts/vilde/sketch_for_waterfall_plot.R +++ b/inst/scripts/vilde/sketch_for_waterfall_plot.R @@ -29,11 +29,11 @@ res <- explain_final(x_train,x_test,model,approach="independence",prediction_zer plot(res) i<- 1 # index for observation we want to plot -dt <- data.table(feat_name = paste0(colnames(res$shapley_values[,-1]), " = ", format(res$internal$data$x_explain[i,], 2) ), - shapley_value = as.numeric(res$shapley_values[i,-1]) +dt <- data.table(feat_name = paste0(colnames(res$shapley_values_est[,-1]), " = ", format(res$internal$data$x_explain[i,], 2) ), + shapley_value = as.numeric(res$shapley_values_est[i,-1]) ) dt -expected <- as.numeric(res$shapley_values[i,])[1] +expected <- as.numeric(res$shapley_values_est[i,])[1] observed <- res$pred_explain[i] dt[, sign := ifelse(shapley_value > 0, "Increases", "Decreases")] diff --git a/man/check_verbose.Rd b/man/check_verbose.Rd index 6300f3210..5af03b591 100644 --- a/man/check_verbose.Rd +++ b/man/check_verbose.Rd @@ -13,8 +13,8 @@ Specifies the verbosity (printout detail level) through one or more of strings \ \code{"basic"} (default) displays basic information about the computation which is being performed. \verb{"progress} displays information about where in the calculation process the function currently is. #' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are -(only when \code{adaptive = TRUE}) . -\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{adaptive = TRUE}) +(only when \code{iterative = TRUE}) . +\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}) \itemize{ \item the final estimates. \code{"vS_details"} displays information about the v_S estimates. diff --git a/man/compute_MSEv_eval_crit.Rd b/man/compute_MSEv_eval_crit.Rd index 309f50e56..27643a769 100644 --- a/man/compute_MSEv_eval_crit.Rd +++ b/man/compute_MSEv_eval_crit.Rd @@ -15,19 +15,13 @@ compute_MSEv_eval_crit( \item{internal}{List. Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, -\code{main_timing_list}, \code{output}, \code{iter_timing_list} and \code{iter_results}.} +\code{main_timing_list}, \code{output}, and \code{iter_timing_list}.} \item{dt_vS}{Data.table of dimension \code{n_coalitions} times \code{n_explain + 1} containing the contribution function estimates. The first column is assumed to be named \code{id_coalition} and containing the ids of the coalitions. The last row is assumed to be the full coalition, i.e., it contains the predicted responses for the observations which are to be explained.} -\item{MSEv_uniform_comb_weights}{Logical. -If \code{TRUE} (default), then the function weights the coalitions uniformly when computing the MSEv criterion. -If \code{FALSE}, then the function use the Shapley kernel weights to weight the coalitions when computing the MSEv -criterion. -Note that the Shapley kernel weights are replaced by the sampling frequency when not all coalitions are considered.} - \item{MSEv_skip_empty_full_comb}{Logical. If \code{TRUE} (default), we exclude the empty and grand coalitions when computing the MSEv evaluation criterion. This is reasonable as they are identical for all methods, i.e., their contribution function is independent of the used method as they are special cases not diff --git a/man/compute_shapley_new.Rd b/man/compute_shapley_new.Rd index 6e3a5ce6f..3c1d249f2 100644 --- a/man/compute_shapley_new.Rd +++ b/man/compute_shapley_new.Rd @@ -10,7 +10,7 @@ compute_shapley_new(internal, dt_vS) \item{internal}{List. Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, -\code{main_timing_list}, \code{output}, \code{iter_timing_list} and \code{iter_results}.} +\code{main_timing_list}, \code{output}, and \code{iter_timing_list}.} \item{dt_vS}{The contribution matrix.} } diff --git a/man/create_coalition_table.Rd b/man/create_coalition_table.Rd index 07a902806..1b340f207 100644 --- a/man/create_coalition_table.Rd +++ b/man/create_coalition_table.Rd @@ -13,7 +13,7 @@ create_coalition_table( prev_coal_samples = NULL, coal_feature_list = as.list(seq_len(m)), approach0 = "gaussian", - shapley_reweighting = "none", + kernelSHAP_reweighting = "none", dt_valid_causal_coalitions = NULL ) } @@ -42,7 +42,7 @@ A list mapping each coalition to the features it contains.} \item{approach0}{Character vector. Contains the approach to be used for eastimation of each coalition size. Same as \code{approach} in \code{explain()}.} -\item{shapley_reweighting}{String. +\item{kernelSHAP_reweighting}{String. How to reweight the sampling frequency weights in the kernelSHAP solution after sampling, with the aim of reducing the randomness and thereby the variance of the Shapley value estimates. One of \code{'none'}, \code{'on_N'}, \code{'on_all'}, \code{'on_all_cond'} (default). diff --git a/man/default_doc.Rd b/man/default_doc.Rd index 7ee25632a..1da47ca1e 100644 --- a/man/default_doc.Rd +++ b/man/default_doc.Rd @@ -10,7 +10,7 @@ default_doc(internal, model, predict_model, output_size, extra, ...) \item{internal}{List. Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, -\code{main_timing_list}, \code{output}, \code{iter_timing_list} and \code{iter_results}.} +\code{main_timing_list}, \code{output}, and \code{iter_timing_list}.} \item{model}{Objects. The model object that ought to be explained. diff --git a/man/explain.Rd b/man/explain.Rd index f31e26a21..b1e3e2454 100644 --- a/man/explain.Rd +++ b/man/explain.Rd @@ -9,24 +9,24 @@ explain( x_explain, x_train, approach, - paired_shap_sampling = TRUE, prediction_zero, + iterative = NULL, max_n_coalitions = NULL, - adaptive = NULL, group = NULL, + paired_shap_sampling = TRUE, n_MC_samples = 1000, + kernelSHAP_reweighting = "on_all_cond", seed = 1, - keep_samp_for_vS = FALSE, + verbose = "basic", predict_model = NULL, get_model_specs = NULL, - MSEv_uniform_comb_weights = TRUE, - verbose = "basic", - adaptive_arguments = list(), - shapley_reweighting = "on_all_cond", prev_shapr_object = NULL, asymmetric = FALSE, causal_ordering = NULL, confounding = NULL, + extra_computation_args = list(), + iterative_args = list(), + output_args = list(), ... ) } @@ -49,28 +49,15 @@ All elements should, either be \code{"gaussian"}, \code{"copula"}, \code{"empiri \code{"categorical"}, \code{"timeseries"}, \code{"independence"}, \code{"regression_separate"}, or \code{"regression_surrogate"}. The two regression approaches can not be combined with any other approach. See details for more information.} -\item{paired_shap_sampling}{Logical. -If \code{TRUE} (default), paired versions of all sampled coalitions are also included in the computation. -That is, if there are 5 features and e.g. coalitions (1,3,5) are sampled, then also coalition (2,4) is used for -computing the Shapley values. This is done to reduce the variance of the Shapley value estimates.} - \item{prediction_zero}{Numeric. The prediction value for unseen data, i.e. an estimate of the expected prediction without conditioning on any features. Typically we set this value equal to the mean of the response variable in our training data, but other choices such as the mean of the predictions in the training data are also reasonable.} -\item{max_n_coalitions}{Integer. -The upper limit on the number of unique feature/group coalitions to use in the adaptive procedure -(if \code{adaptive = TRUE}). -If \code{adaptive = FALSE} it represents the number of feature/group coalitions to use directly. -The quantity refers to the number of unique feature coalitions if \code{group = NULL}, -and group coalitions if \code{group != NULL}. -\code{max_n_coalitions = NULL} corresponds to \code{max_n_coalitions=2^n_features}.} - -\item{adaptive}{Logical or NULL +\item{iterative}{Logical or NULL If \code{NULL} (default), the argument is set to \code{TRUE} if there are more than 5 features/groups, and \code{FALSE} otherwise. -If eventually \code{TRUE}, the Shapley values are estimated adaptively in an iterative manner. +If eventually \code{TRUE}, the Shapley values are estimated iteratively in an iterative manner. This provides sufficiently accurate Shapley value estimates faster. First an initial number of coalitions is sampled, then bootsrapping is used to estimate the variance of the Shapley values. @@ -78,7 +65,15 @@ A convergence criterion is used to determine if the variances of the Shapley val If the variances are too high, we estimate the number of required samples to reach convergence, and thereby add more coalitions. The process is repeated until the variances are below the threshold. -Specifics related to the adaptive process and convergence criterion are set through \code{adaptive_arguments}.} +Specifics related to the iterative process and convergence criterion are set through \code{iterative_args}.} + +\item{max_n_coalitions}{Integer. +The upper limit on the number of unique feature/group coalitions to use in the iterative procedure +(if \code{iterative = TRUE}). +If \code{iterative = FALSE} it represents the number of feature/group coalitions to use directly. +The quantity refers to the number of unique feature coalitions if \code{group = NULL}, +and group coalitions if \code{group != NULL}. +\code{max_n_coalitions = NULL} corresponds to \code{max_n_coalitions=2^n_features}.} \item{group}{List. If \code{NULL} regular feature wise Shapley values are computed. @@ -86,6 +81,11 @@ If provided, group wise Shapley values are computed. \code{group} then has lengt the number of groups. The list element contains character vectors with the features included in each of the different groups.} +\item{paired_shap_sampling}{Logical. +If \code{TRUE} (default), paired versions of all sampled coalitions are also included in the computation. +That is, if there are 5 features and e.g. coalitions (1,3,5) are sampled, then also coalition (2,4) is used for +computing the Shapley values. This is done to reduce the variance of the Shapley value estimates.} + \item{n_MC_samples}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation. For \code{approach="ctree"}, \code{n_MC_samples} corresponds to the number of samples @@ -94,13 +94,39 @@ For \code{approach="empirical"}, \code{n_MC_samples} is the \eqn{K} parameter i Aas et al. (2021), i.e. the maximum number of observations (with largest weights) that is used, see also the \code{empirical.eta} argument \code{\link[=setup_approach.empirical]{setup_approach.empirical()}}.} +\item{kernelSHAP_reweighting}{String. +How to reweight the sampling frequency weights in the kernelSHAP solution after sampling, with the aim of reducing +the randomness and thereby the variance of the Shapley value estimates. +One of \code{'none'}, \code{'on_N'}, \code{'on_all'}, \code{'on_all_cond'} (default). +\code{'none'} means no reweighting, i.e. the sampling frequency weights are used as is. +\code{'on_coal_size'} means the sampling frequencies are averaged over all coalitions of the same size. +\code{'on_N'} means the sampling frequencies are averaged over all coalitions with the same original sampling +probabilities. +\code{'on_all'} means the original sampling probabilities are used for all coalitions. +\code{'on_all_cond'} means the original sampling probabilities are used for all coalitions, while adjusting for the +probability that they are sampled at least once. +This method is preferred as it has performed the best in simulation studies.} + \item{seed}{Positive integer. Specifies the seed before any randomness based code is being run. If \code{NULL} no seed is set in the calling environment.} -\item{keep_samp_for_vS}{Logical. -Indicates whether the samples used in the Monte Carlo estimation of v_S should be returned (in \code{internal$output}). -Not used for \code{approach="regression_separate"} or \code{approach="regression_surrogate"}.} +\item{verbose}{String vector or NULL. +Specifies the verbosity (printout detail level) through one or more of strings \code{"basic"}, \code{"progress"}, +\code{"convergence"}, \code{"shapley"} and \code{"vS_details"}. +\code{"basic"} (default) displays basic information about the computation which is being performed. +\verb{"progress} displays information about where in the calculation process the function currently is. +#' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are +(only when \code{iterative = TRUE}) . +\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}) +\itemize{ +\item the final estimates. +\code{"vS_details"} displays information about the v_S estimates. +This is most relevant for \verb{approach \%in\% c("regression_separate", "regression_surrogate", "vaeac"}). +\code{NULL} means no printout. +Note that any combination of four strings can be used. +E.g. \code{verbose = c("basic", "vS_details")} will display basic information + details about the vS estimation process. +}} \item{predict_model}{Function. The prediction function used when \code{model} is not natively supported. @@ -124,51 +150,11 @@ If \code{NULL} (the default) internal functions are used for natively supported disabled for unsupported model classes. Can also be used to override the default function for natively supported model classes.} -\item{MSEv_uniform_comb_weights}{Logical. -If \code{TRUE} (default), then the function weights the coalitions uniformly when computing the MSEv criterion. -If \code{FALSE}, then the function use the Shapley kernel weights to weight the coalitions when computing the MSEv -criterion. -Note that the Shapley kernel weights are replaced by the sampling frequency when not all coalitions are considered.} - -\item{verbose}{String vector or NULL. -Specifies the verbosity (printout detail level) through one or more of strings \code{"basic"}, \code{"progress"}, -\code{"convergence"}, \code{"shapley"} and \code{"vS_details"}. -\code{"basic"} (default) displays basic information about the computation which is being performed. -\verb{"progress} displays information about where in the calculation process the function currently is. -#' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are -(only when \code{adaptive = TRUE}) . -\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{adaptive = TRUE}) -\itemize{ -\item the final estimates. -\code{"vS_details"} displays information about the v_S estimates. -This is most relevant for \verb{approach \%in\% c("regression_separate", "regression_surrogate", "vaeac"}). -\code{NULL} means no printout. -Note that any combination of four strings can be used. -E.g. \code{verbose = c("basic", "vS_details")} will display basic information + details about the vS estimation process. -}} - -\item{adaptive_arguments}{Named list. -Specifices the arguments for the adaptive procedure. -See \code{\link[=get_adaptive_arguments_default]{get_adaptive_arguments_default()}} for description of the arguments and their default values.} - -\item{shapley_reweighting}{String. -How to reweight the sampling frequency weights in the kernelSHAP solution after sampling, with the aim of reducing -the randomness and thereby the variance of the Shapley value estimates. -One of \code{'none'}, \code{'on_N'}, \code{'on_all'}, \code{'on_all_cond'} (default). -\code{'none'} means no reweighting, i.e. the sampling frequency weights are used as is. -\code{'on_coal_size'} means the sampling frequencies are averaged over all coalitions of the same size. -\code{'on_N'} means the sampling frequencies are averaged over all coalitions with the same original sampling -probabilities. -\code{'on_all'} means the original sampling probabilities are used for all coalitions. -\code{'on_all_cond'} means the original sampling probabilities are used for all coalitions, while adjusting for the -probability that they are sampled at least once. -This method is preferred as it has performed the best in simulation studies.} - \item{prev_shapr_object}{\code{shapr} object or string. If an object of class \code{shapr} is provided or string with a path to where intermediate results are strored, then the function will use the previous object to continue the computation. This is useful if the computation is interrupted or you want higher accuracy than already obtained, and therefore -want to continue the adaptive estimation. See the vignette for examples.} +want to continue the iterative estimation. See the vignette for examples.} \item{asymmetric}{Logical. Not applicable for (regular) non-causal or asymmetric explanations. @@ -206,6 +192,18 @@ specified, then \code{explain} computes asymmetric/symmetric causal Shapley valu \code{asymmetric}. The \code{approach} cannot be \code{regression_separate} and \code{regression_surrogate} as the regression-based approaches are not applicable to the causal Shapley value methodology.} +\item{extra_computation_args}{Named list. +Specifices extra arguments related to the computation of the Shapley values. +See \code{\link[=get_extra_est_args_default]{get_extra_est_args_default()}} for description of the arguments and their default values.} + +\item{iterative_args}{Named list. +Specifices the arguments for the iterative procedure. +See \code{\link[=get_iterative_args_default]{get_iterative_args_default()}} for description of the arguments and their default values.} + +\item{output_args}{Named list. +Specifices certain arguments related to the output of the function. +See \code{\link[=get_output_args_default]{get_output_args_default()}} for description of the arguments and their default values.} + \item{...}{ Arguments passed on to \code{\link[=setup_approach.empirical]{setup_approach.empirical}}, \code{\link[=setup_approach.independence]{setup_approach.independence}}, \code{\link[=setup_approach.gaussian]{setup_approach.gaussian}}, \code{\link[=setup_approach.copula]{setup_approach.copula}}, \code{\link[=setup_approach.ctree]{setup_approach.ctree}}, \code{\link[=setup_approach.vaeac]{setup_approach.vaeac}}, \code{\link[=setup_approach.categorical]{setup_approach.categorical}}, \code{\link[=setup_approach.regression_separate]{setup_approach.regression_separate}}, \code{\link[=setup_approach.regression_surrogate]{setup_approach.regression_surrogate}}, \code{\link[=setup_approach.timeseries]{setup_approach.timeseries}} \describe{ @@ -330,13 +328,13 @@ This is useful if the underlying time series are scaled between 0 and 1, for exa \value{ Object of class \code{c("shapr", "list")}. Contains the following items: \describe{ -\item{shapley_values}{data.table with the estimated Shapley values with explained observation in the rows and +\item{shapley_values_est}{data.table with the estimated Shapley values with explained observation in the rows and features along the columns. The column \code{none} is the prediction not devoted to any of the features (given by the argument \code{prediction_zero})} \item{shapley_values_sd}{data.table with the standard deviation of the Shapley values reflecting the uncertainty. Note that this only reflects the coalition sampling part of the kernelSHAP procedure, and is therefore by definition 0 when all coalitions is used. -Only present when \code{adaptive = TRUE} and \code{adaptive_arguments$compute_sd=TRUE}.} +Only present when \code{extra_computation_args$compute_sd=TRUE}.} \item{internal}{List with the different parameters, data, functions and other output used internally.} \item{pred_explain}{Numeric vector with the predictions for the explained observations} \item{MSEv}{List with the values of the MSEv evaluation criterion for the approach. See the @@ -346,8 +344,8 @@ Only present when \code{adaptive = TRUE} and \code{adaptive_arguments$compute_sd \code{init_time} and \code{end_time} gives the time stamps for the start and end of the computation. \code{total_time_secs} gives the total time in seconds for the complete execution of \code{explain()}. \code{main_timing_secs} gives the time in seconds for the main computations. -\code{iter_timing_secs} gives for each iteration of the adaptive estimation, the time spent on the different parts -adaptive estimation routine.} +\code{iter_timing_secs} gives for each iteration of the iterative estimation, the time spent on the different parts +iterative estimation routine.} } } \description{ @@ -370,10 +368,10 @@ on the prediction, taking into account their causal relationships, by adapting t The package allows for parallelized computation with progress updates through the tightly connected \link[future:future]{future::future} and \link[progressr:progressr]{progressr::progressr} packages. See the examples below. -For adaptive estimation (\code{adaptive=TRUE}), intermediate results may also be printed to the console +For iterative estimation (\code{iterative=TRUE}), intermediate results may also be printed to the console (according to the \code{verbose} argument). Moreover, the intermediate results are written to disk. -This combined with adaptive estimation with (optional) intermediate results printed to the console (and temporary +This combined with iterative estimation with (optional) intermediate results printed to the console (and temporary written to disk, and batch computing of the v(S) values, enables fast and accurate estimation of the Shapley values in a memory friendly manner. } @@ -463,7 +461,7 @@ explain5 <- explain( ) # Print the Shapley values -print(explain1$shapley_values) +print(explain1$shapley_values_est) # Plot the results if (requireNamespace("ggplot2", quietly = TRUE)) { @@ -483,7 +481,7 @@ explain_groups <- explain( prediction_zero = p, n_MC_samples = 1e2 ) -print(explain_groups$shapley_values) +print(explain_groups$shapley_values_est) # Separate and surrogate regression approaches with linear regression models. # More complex regression models can be used, and we can use CV to @@ -509,19 +507,19 @@ explain_surrogate_lm <- explain( regression.model = parsnip::linear_reg() ) -## Adaptive estimation +## iterative estimation # For illustration purposes only. By default not used for such small dimensions as here # Gaussian approach -explain_adaptive <- explain( +explain_iterative <- explain( model = model, x_explain = x_explain, x_train = x_train, approach = "gaussian", prediction_zero = p, n_MC_samples = 1e2, - adaptive = TRUE, - adaptive_arguments = list(initial_n_coalitions = 10) + iterative = TRUE, + iterative_args = list(initial_n_coalitions = 10) ) } diff --git a/man/explain_forecast.Rd b/man/explain_forecast.Rd index c3a07622f..db66aba7a 100644 --- a/man/explain_forecast.Rd +++ b/man/explain_forecast.Rd @@ -16,14 +16,13 @@ explain_forecast( approach, prediction_zero, max_n_coalitions = NULL, - adaptive = NULL, - adaptive_arguments = list(), - shapley_reweighting = "on_all_cond", + iterative = NULL, + iterative_args = list(), + kernelSHAP_reweighting = "on_all_cond", group_lags = TRUE, group = NULL, n_MC_samples = 1000, seed = 1, - keep_samp_for_vS = FALSE, predict_model = NULL, get_model_specs = NULL, verbose = "basic", @@ -78,16 +77,16 @@ Typically we set this value equal to the mean of the response variable in our tr such as the mean of the predictions in the training data are also reasonable.} \item{max_n_coalitions}{Integer. -The upper limit on the number of unique feature/group coalitions to use in the adaptive procedure -(if \code{adaptive = TRUE}). -If \code{adaptive = FALSE} it represents the number of feature/group coalitions to use directly. +The upper limit on the number of unique feature/group coalitions to use in the iterative procedure +(if \code{iterative = TRUE}). +If \code{iterative = FALSE} it represents the number of feature/group coalitions to use directly. The quantity refers to the number of unique feature coalitions if \code{group = NULL}, and group coalitions if \code{group != NULL}. \code{max_n_coalitions = NULL} corresponds to \code{max_n_coalitions=2^n_features}.} -\item{adaptive}{Logical or NULL +\item{iterative}{Logical or NULL If \code{NULL} (default), the argument is set to \code{TRUE} if there are more than 5 features/groups, and \code{FALSE} otherwise. -If eventually \code{TRUE}, the Shapley values are estimated adaptively in an iterative manner. +If eventually \code{TRUE}, the Shapley values are estimated iteratively in an iterative manner. This provides sufficiently accurate Shapley value estimates faster. First an initial number of coalitions is sampled, then bootsrapping is used to estimate the variance of the Shapley values. @@ -95,13 +94,13 @@ A convergence criterion is used to determine if the variances of the Shapley val If the variances are too high, we estimate the number of required samples to reach convergence, and thereby add more coalitions. The process is repeated until the variances are below the threshold. -Specifics related to the adaptive process and convergence criterion are set through \code{adaptive_arguments}.} +Specifics related to the iterative process and convergence criterion are set through \code{iterative_args}.} -\item{adaptive_arguments}{Named list. -Specifices the arguments for the adaptive procedure. -See \code{\link[=get_adaptive_arguments_default]{get_adaptive_arguments_default()}} for description of the arguments and their default values.} +\item{iterative_args}{Named list. +Specifices the arguments for the iterative procedure. +See \code{\link[=get_iterative_args_default]{get_iterative_args_default()}} for description of the arguments and their default values.} -\item{shapley_reweighting}{String. +\item{kernelSHAP_reweighting}{String. How to reweight the sampling frequency weights in the kernelSHAP solution after sampling, with the aim of reducing the randomness and thereby the variance of the Shapley value estimates. One of \code{'none'}, \code{'on_N'}, \code{'on_all'}, \code{'on_all_cond'} (default). @@ -136,10 +135,6 @@ Aas et al. (2021), i.e. the maximum number of observations (with largest weights Specifies the seed before any randomness based code is being run. If \code{NULL} no seed is set in the calling environment.} -\item{keep_samp_for_vS}{Logical. -Indicates whether the samples used in the Monte Carlo estimation of v_S should be returned (in \code{internal$output}). -Not used for \code{approach="regression_separate"} or \code{approach="regression_surrogate"}.} - \item{predict_model}{Function. The prediction function used when \code{model} is not natively supported. (Run \code{\link[=get_supported_models]{get_supported_models()}} for a list of natively supported models.) @@ -168,8 +163,8 @@ Specifies the verbosity (printout detail level) through one or more of strings \ \code{"basic"} (default) displays basic information about the computation which is being performed. \verb{"progress} displays information about where in the calculation process the function currently is. #' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are -(only when \code{adaptive = TRUE}) . -\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{adaptive = TRUE}) +(only when \code{iterative = TRUE}) . +\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}) \itemize{ \item the final estimates. \code{"vS_details"} displays information about the v_S estimates. @@ -265,13 +260,13 @@ This is useful if the underlying time series are scaled between 0 and 1, for exa \value{ Object of class \code{c("shapr", "list")}. Contains the following items: \describe{ -\item{shapley_values}{data.table with the estimated Shapley values with explained observation in the rows and +\item{shapley_values_est}{data.table with the estimated Shapley values with explained observation in the rows and features along the columns. The column \code{none} is the prediction not devoted to any of the features (given by the argument \code{prediction_zero})} \item{shapley_values_sd}{data.table with the standard deviation of the Shapley values reflecting the uncertainty. Note that this only reflects the coalition sampling part of the kernelSHAP procedure, and is therefore by definition 0 when all coalitions is used. -Only present when \code{adaptive = TRUE} and \code{adaptive_arguments$compute_sd=TRUE}.} +Only present when \code{extra_computation_args$compute_sd=TRUE}.} \item{internal}{List with the different parameters, data, functions and other output used internally.} \item{pred_explain}{Numeric vector with the predictions for the explained observations} \item{MSEv}{List with the values of the MSEv evaluation criterion for the approach. See the @@ -281,8 +276,8 @@ Only present when \code{adaptive = TRUE} and \code{adaptive_arguments$compute_sd \code{init_time} and \code{end_time} gives the time stamps for the start and end of the computation. \code{total_time_secs} gives the total time in seconds for the complete execution of \code{explain()}. \code{main_timing_secs} gives the time in seconds for the main computations. -\code{iter_timing_secs} gives for each iteration of the adaptive estimation, the time spent on the different parts -adaptive estimation routine.} +\code{iter_timing_secs} gives for each iteration of the iterative estimation, the time spent on the different parts +iterative estimation routine.} } } \description{ diff --git a/man/finalize_explanation_forecast.Rd b/man/finalize_explanation_forecast.Rd new file mode 100644 index 000000000..5ed40b0f4 --- /dev/null +++ b/man/finalize_explanation_forecast.Rd @@ -0,0 +1,232 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/finalize_explanation.R +\name{finalize_explanation_forecast} +\alias{finalize_explanation_forecast} +\title{Computes the Shapley values given \code{v(S)}} +\usage{ +finalize_explanation_forecast(vS_list, internal) +} +\arguments{ +\item{vS_list}{List +Output from \code{\link[=compute_vS]{compute_vS()}}} + +\item{internal}{List. +Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} +The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, +\code{main_timing_list}, \code{output}, and \code{iter_timing_list}.} +} +\value{ +Object of class \code{c("shapr", "list")}. Contains the following items: +\describe{ +\item{shapley_values_est}{data.table with the estimated Shapley values with explained observation in the rows and +features along the columns. +The column \code{none} is the prediction not devoted to any of the features (given by the argument \code{prediction_zero})} +\item{shapley_values_sd}{data.table with the standard deviation of the Shapley values reflecting the uncertainty. +Note that this only reflects the coalition sampling part of the kernelSHAP procedure, and is therefore by +definition 0 when all coalitions is used. +Only present when \code{extra_computation_args$compute_sd=TRUE}.} +\item{internal}{List with the different parameters, data, functions and other output used internally.} +\item{pred_explain}{Numeric vector with the predictions for the explained observations} +\item{MSEv}{List with the values of the MSEv evaluation criterion for the approach. See the +\href{https://norskregnesentral.github.io/shapr/articles/understanding_shapr.html#msev-evaluation-criterion +}{MSEv evaluation section in the vignette for details}.} +\item{timing}{List containing timing information for the different parts of the computation. +\code{init_time} and \code{end_time} gives the time stamps for the start and end of the computation. +\code{total_time_secs} gives the total time in seconds for the complete execution of \code{explain()}. +\code{main_timing_secs} gives the time in seconds for the main computations. +\code{iter_timing_secs} gives for each iteration of the iterative estimation, the time spent on the different parts +iterative estimation routine.} +} +} +\description{ +Computes dependence-aware Shapley values for observations in \code{x_explain} from the specified +\code{model} by using the method specified in \code{approach} to estimate the conditional expectation. +} +\details{ +The \code{shapr} package implements kernelSHAP estimation of dependence-aware Shapley values with +eight different Monte Carlo-based approaches for estimating the conditional distributions of the data, namely +\code{"empirical"}, \code{"gaussian"}, \code{"copula"}, \code{"ctree"}, \code{"vaeac"}, \code{"categorical"}, \code{"timeseries"}, and \code{"independence"}. +\code{shapr} has also implemented two regression-based approaches \code{"regression_separate"} and \code{"regression_surrogate"}. +It is also possible to combine the different approaches, see the vignettes for more information. + +The package also supports the computation of causal and asymmetric Shapley values as introduced by +Heskes et al. (2020) and Frye et al. (2020). Asymmetric Shapley values were proposed by Heskes et al. (2020) +as a way to incorporate causal knowledge in the real world by restricting the possible feature +combinations/coalitions when computing the Shapley values to those consistent with a (partial) causal ordering. +Causal Shapley values were proposed by Frye et al. (2020) as a way to explain the total effect of features +on the prediction, taking into account their causal relationships, by adapting the sampling procedure in \code{shapr}. + +The package allows for parallelized computation with progress updates through the tightly connected +\link[future:future]{future::future} and \link[progressr:progressr]{progressr::progressr} packages. See the examples below. +For iterative estimation (\code{iterative=TRUE}), intermediate results may also be printed to the console +(according to the \code{verbose} argument). +Moreover, the intermediate results are written to disk. +This combined with iterative estimation with (optional) intermediate results printed to the console (and temporary +written to disk, and batch computing of the v(S) values, enables fast and accurate estimation of the Shapley values +in a memory friendly manner. +} +\examples{ + +# Load example data +data("airquality") +airquality <- airquality[complete.cases(airquality), ] +x_var <- c("Solar.R", "Wind", "Temp", "Month") +y_var <- "Ozone" + +# Split data into test- and training data +data_train <- head(airquality, -3) +data_explain <- tail(airquality, 3) + +x_train <- data_train[, x_var] +x_explain <- data_explain[, x_var] + +# Fit a linear model +lm_formula <- as.formula(paste0(y_var, " ~ ", paste0(x_var, collapse = " + "))) +model <- lm(lm_formula, data = data_train) + +# Explain predictions +p <- mean(data_train[, y_var]) + +\dontrun{ +# (Optionally) enable parallelization via the future package +if (requireNamespace("future", quietly = TRUE)) { + future::plan("multisession", workers = 2) +} +} + +# (Optionally) enable progress updates within every iteration via the progressr package +if (requireNamespace("progressr", quietly = TRUE)) { + progressr::handlers(global = TRUE) +} + +# Empirical approach +explain1 <- explain( + model = model, + x_explain = x_explain, + x_train = x_train, + approach = "empirical", + prediction_zero = p, + n_MC_samples = 1e2 +) + +# Gaussian approach +explain2 <- explain( + model = model, + x_explain = x_explain, + x_train = x_train, + approach = "gaussian", + prediction_zero = p, + n_MC_samples = 1e2 +) + +# Gaussian copula approach +explain3 <- explain( + model = model, + x_explain = x_explain, + x_train = x_train, + approach = "copula", + prediction_zero = p, + n_MC_samples = 1e2 +) + +# ctree approach +explain4 <- explain( + model = model, + x_explain = x_explain, + x_train = x_train, + approach = "ctree", + prediction_zero = p, + n_MC_samples = 1e2 +) + +# Combined approach +approach <- c("gaussian", "gaussian", "empirical") +explain5 <- explain( + model = model, + x_explain = x_explain, + x_train = x_train, + approach = approach, + prediction_zero = p, + n_MC_samples = 1e2 +) + +# Print the Shapley values +print(explain1$shapley_values_est) + +# Plot the results +if (requireNamespace("ggplot2", quietly = TRUE)) { + plot(explain1) + plot(explain1, plot_type = "waterfall") +} + +# Group-wise explanations +group_list <- list(A = c("Temp", "Month"), B = c("Wind", "Solar.R")) + +explain_groups <- explain( + model = model, + x_explain = x_explain, + x_train = x_train, + group = group_list, + approach = "empirical", + prediction_zero = p, + n_MC_samples = 1e2 +) +print(explain_groups$shapley_values_est) + +# Separate and surrogate regression approaches with linear regression models. +# More complex regression models can be used, and we can use CV to +# tune the hyperparameters of the regression models and preprocess +# the data before sending it to the model. See the regression vignette +# (Shapley value explanations using the regression paradigm) for more +# details about the `regression_separate` and `regression_surrogate` approaches. +explain_separate_lm <- explain( + model = model, + x_explain = x_explain, + x_train = x_train, + prediction_zero = p, + approach = "regression_separate", + regression.model = parsnip::linear_reg() +) + +explain_surrogate_lm <- explain( + model = model, + x_explain = x_explain, + x_train = x_train, + prediction_zero = p, + approach = "regression_surrogate", + regression.model = parsnip::linear_reg() +) + +## iterative estimation +# For illustration purposes only. By default not used for such small dimensions as here + +# Gaussian approach +explain_iterative <- explain( + model = model, + x_explain = x_explain, + x_train = x_train, + approach = "gaussian", + prediction_zero = p, + n_MC_samples = 1e2, + iterative = TRUE, + iterative_args = list(initial_n_coalitions = 10) +) + +} +\references{ +\itemize{ +\item Aas, K., Jullum, M., & Lland, A. (2021). Explaining individual predictions when features are dependent: +More accurate approximations to Shapley values. Artificial Intelligence, 298, 103502. +\item Frye, C., Rowat, C., & Feige, I. (2020). Asymmetric Shapley values: +incorporating causal knowledge into model-agnostic explainability. +Advances in neural information processing systems, 33, 1229-1239. +\item Heskes, T., Sijben, E., Bucur, I. G., & Claassen, T. (2020). Causal shapley values: +Exploiting causal knowledge to explain individual predictions of complex models. +Advances in neural information processing systems, 33, 4778-4789. +\item Olsen, L. H. B., Glad, I. K., Jullum, M., & Aas, K. (2024). A comparative study of methods for estimating +model-agnostic Shapley value explanations. Data Mining and Knowledge Discovery, 1-48. +} +} +\author{ +Martin Jullum, Lars Henry Berge Olsen +} diff --git a/man/get_adaptive_arguments_default.Rd b/man/get_adaptive_arguments_default.Rd deleted file mode 100644 index 11c655e89..000000000 --- a/man/get_adaptive_arguments_default.Rd +++ /dev/null @@ -1,71 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/setup.R -\name{get_adaptive_arguments_default} -\alias{get_adaptive_arguments_default} -\title{Function to specify arguments of the adaptive estimation procedure} -\usage{ -get_adaptive_arguments_default( - internal, - initial_n_coalitions = ceiling(min(200, internal$parameters$max_n_coalitions_causal, - internal$parameters$max_n_coalitions, max(5, internal$parameters$n_features, - (2^internal$parameters$n_features)/10))), - fixed_n_coalitions_per_iter = NULL, - max_iter = 20, - convergence_tolerance = 0.02, - reduction_factor_vec = c(seq(0.1, 1, by = 0.1), rep(1, max_iter - 10)), - n_boot_samps = 100, - compute_sd = isTRUE(internal$parameters$adaptive), - max_batch_size = 10, - min_n_batches = 10, - saving_path = tempfile("shapr_obj_", fileext = ".rds") -) -} -\arguments{ -\item{internal}{List. -Not used directly, but passed through from \code{\link[=explain]{explain()}}.} - -\item{initial_n_coalitions}{Integer. Number of coalitions to use in the first estimation iteration.} - -\item{fixed_n_coalitions_per_iter}{Integer. Number of \code{n_coalitions} to use in each iteration. -\code{NULL} (default) means setting it based on estimates based on a set convergence threshold.} - -\item{max_iter}{Integer. Maximum number of estimation iterations} - -\item{convergence_tolerance}{Numeric. The t variable in the convergence threshold formula on page 6 in the paper -Covert and Lee (2021), 'Improving KernelSHAP: Practical Shapley Value Estimation via Linear Regression' -https://arxiv.org/pdf/2012.01536. Smaller values requires more coalitions before convergence is reached.} - -\item{reduction_factor_vec}{Numeric vector. The number of \code{n_coalitions} that must be used to reach convergence -in the next iteration is estimated. -The number of \code{n_coalitions} actually used in the next iteration is set to this estimate multiplied by -\code{reduction_factor_vec[i]} for iteration \code{i}. -It is wise to start with smaller numbers to avoid using too many \code{n_coalitions} due to uncertain estimates in -the first iterations.} - -\item{n_boot_samps}{Integer. The number of bootstrapped samples (i.e. samples with replacement) from the set of all -coalitions used to estimate the standard deviations of the Shapley value estimates.} - -\item{compute_sd}{Logical. Whether to estimate the standard deviations of the Shapley value estimates.} - -\item{max_batch_size}{Integer. The maximum number of coalitions to estimate simultaneously within each iteration. -A larger numbers requires more memory, but may have a slight computational advantage.} - -\item{min_n_batches}{Integer. The minimum number of batches to split the computation into within each iteration. -Larger numbers gives more frequent progress updates. If parallelization is applied, this should be set no smaller -than the number of parallel workers.} - -\item{saving_path}{String. -The path to the directory where the results of the adaptive estimation procedure should be saved. -Defaults to a temporary directory.} -} -\description{ -Function to specify arguments of the adaptive estimation procedure -} -\details{ -The functions sets default values for the adaptive estimation procedure, according to the function defaults. -If the argument \code{adaptive} of \code{\link[=explain]{explain()}} is FALSE, it sets parameters corresponding to the use of a -non-adaptive estimation procedure -} -\author{ -Martin Jullum -} diff --git a/man/get_extra_est_args_default.Rd b/man/get_extra_est_args_default.Rd new file mode 100644 index 000000000..4f7772532 --- /dev/null +++ b/man/get_extra_est_args_default.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setup.R +\name{get_extra_est_args_default} +\alias{get_extra_est_args_default} +\title{Gets the default values for the extra estimation arguments} +\usage{ +get_extra_est_args_default( + internal, + compute_sd = isFALSE(internal$parameters$exact), + n_boot_samps = 100, + max_batch_size = 10, + min_n_batches = 10 +) +} +\arguments{ +\item{internal}{List. +Not used directly, but passed through from \code{\link[=explain]{explain()}}.} + +\item{compute_sd}{Logical. Whether to estimate the standard deviations of the Shapley value estimates. This is TRUE +whenever sampling based kernelSHAP is applied (either iteratively or with a fixed number of coalitions).} + +\item{n_boot_samps}{Integer. The number of bootstrapped samples (i.e. samples with replacement) from the set of all +coalitions used to estimate the standard deviations of the Shapley value estimates.} + +\item{max_batch_size}{Integer. The maximum number of coalitions to estimate simultaneously within each iteration. +A larger numbers requires more memory, but may have a slight computational advantage.} + +\item{min_n_batches}{Integer. The minimum number of batches to split the computation into within each iteration. +Larger numbers gives more frequent progress updates. If parallelization is applied, this should be set no smaller +than the number of parallel workers.} +} +\description{ +Gets the default values for the extra estimation arguments +} +\author{ +Martin Jullum +} diff --git a/man/get_iterative_args_default.Rd b/man/get_iterative_args_default.Rd new file mode 100644 index 000000000..ca995d454 --- /dev/null +++ b/man/get_iterative_args_default.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setup.R +\name{get_iterative_args_default} +\alias{get_iterative_args_default} +\title{Function to specify arguments of the iterative estimation procedure} +\usage{ +get_iterative_args_default( + internal, + initial_n_coalitions = ceiling(min(200, max(5, internal$parameters$n_features, + (2^internal$parameters$n_features)/10))), + fixed_n_coalitions_per_iter = NULL, + max_iter = 20, + convergence_tol = 0.02, + n_coal_next_iter_factor_vec = c(seq(0.1, 1, by = 0.1), rep(1, max_iter - 10)) +) +} +\arguments{ +\item{internal}{List. +Not used directly, but passed through from \code{\link[=explain]{explain()}}.} + +\item{initial_n_coalitions}{Integer. Number of coalitions to use in the first estimation iteration.} + +\item{fixed_n_coalitions_per_iter}{Integer. Number of \code{n_coalitions} to use in each iteration. +\code{NULL} (default) means setting it based on estimates based on a set convergence threshold.} + +\item{max_iter}{Integer. Maximum number of estimation iterations} + +\item{convergence_tol}{Numeric. The t variable in the convergence threshold formula on page 6 in the paper +Covert and Lee (2021), 'Improving KernelSHAP: Practical Shapley Value Estimation via Linear Regression' +https://arxiv.org/pdf/2012.01536. Smaller values requires more coalitions before convergence is reached.} + +\item{n_coal_next_iter_factor_vec}{Numeric vector. The number of \code{n_coalitions} that must be used to reach +convergence in the next iteration is estimated. +The number of \code{n_coalitions} actually used in the next iteration is set to this estimate multiplied by +\code{n_coal_next_iter_factor_vec[i]} for iteration \code{i}. +It is wise to start with smaller numbers to avoid using too many \code{n_coalitions} due to uncertain estimates in +the first iterations.} +} +\description{ +Function to specify arguments of the iterative estimation procedure +} +\details{ +The functions sets default values for the iterative estimation procedure, according to the function +defaults. +If the argument \code{iterative} of \code{\link[=explain]{explain()}} is FALSE, it sets parameters corresponding to the use of a +non-iterative estimation procedure +} +\author{ +Martin Jullum +} diff --git a/man/get_output_args_default.Rd b/man/get_output_args_default.Rd new file mode 100644 index 000000000..4365118bc --- /dev/null +++ b/man/get_output_args_default.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/setup.R +\name{get_output_args_default} +\alias{get_output_args_default} +\title{Gets the default values for the output arguments} +\usage{ +get_output_args_default( + keep_samp_for_vS = FALSE, + MSEv_uniform_comb_weights = TRUE, + saving_path = tempfile("shapr_obj_", fileext = ".rds") +) +} +\arguments{ +\item{keep_samp_for_vS}{Logical. +Indicates whether the samples used in the Monte Carlo estimation of v_S should be returned (in \code{internal$output}). +Not used for \code{approach="regression_separate"} or \code{approach="regression_surrogate"}.} + +\item{MSEv_uniform_comb_weights}{Logical. +If \code{TRUE} (default), then the function weights the coalitions uniformly when computing the MSEv criterion. +If \code{FALSE}, then the function use the Shapley kernel weights to weight the coalitions when computing the MSEv +criterion. +Note that the Shapley kernel weights are replaced by the sampling frequency when not all coalitions are considered.} + +\item{saving_path}{String. +The path to the directory where the results of the iterative estimation procedure should be saved. +Defaults to a temporary directory.} +} +\description{ +Gets the default values for the output arguments +} +\author{ +Martin Jullum +} diff --git a/man/prepare_data_single_coalition.Rd b/man/prepare_data_single_coalition.Rd index e40830a1c..9bd170b2b 100644 --- a/man/prepare_data_single_coalition.Rd +++ b/man/prepare_data_single_coalition.Rd @@ -10,7 +10,7 @@ prepare_data_single_coalition(internal, index_features) \item{internal}{List. Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, -\code{main_timing_list}, \code{output}, \code{iter_timing_list} and \code{iter_results}.} +\code{main_timing_list}, \code{output}, and \code{iter_timing_list}.} } \description{ The \code{\link[=prepare_data.categorical]{prepare_data.categorical()}} function is slow when evaluated for a single coalition. diff --git a/man/prepare_next_iteration.Rd b/man/prepare_next_iteration.Rd index 9a23bd2d3..996a7330d 100644 --- a/man/prepare_next_iteration.Rd +++ b/man/prepare_next_iteration.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/prepare_next_iteration.R \name{prepare_next_iteration} \alias{prepare_next_iteration} -\title{Prepares the next iteration of the adaptive sampling algorithm} +\title{Prepares the next iteration of the iterative sampling algorithm} \usage{ prepare_next_iteration(internal) } @@ -11,6 +11,6 @@ prepare_next_iteration(internal) Not used directly, but passed through from \code{\link[=explain]{explain()}}.} } \description{ -Prepares the next iteration of the adaptive sampling algorithm +Prepares the next iteration of the iterative sampling algorithm } \keyword{internal} diff --git a/man/regression.check_parameters.Rd b/man/regression.check_parameters.Rd index 73930ae26..55c2f3e22 100644 --- a/man/regression.check_parameters.Rd +++ b/man/regression.check_parameters.Rd @@ -10,7 +10,7 @@ regression.check_parameters(internal) \item{internal}{List. Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, -\code{main_timing_list}, \code{output}, \code{iter_timing_list} and \code{iter_results}.} +\code{main_timing_list}, \code{output}, and \code{iter_timing_list}.} } \value{ The same \code{internal} list, but added logical indicator \code{internal$parameters$regression.tune} diff --git a/man/regression.get_y_hat.Rd b/man/regression.get_y_hat.Rd index ff02d4cb4..9eff9cbd5 100644 --- a/man/regression.get_y_hat.Rd +++ b/man/regression.get_y_hat.Rd @@ -10,7 +10,7 @@ regression.get_y_hat(internal, model, predict_model) \item{internal}{List. Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, -\code{main_timing_list}, \code{output}, \code{iter_timing_list} and \code{iter_results}.} +\code{main_timing_list}, \code{output}, and \code{iter_timing_list}.} \item{model}{Objects. The model object that ought to be explained. diff --git a/man/regression.surrogate_aug_data.Rd b/man/regression.surrogate_aug_data.Rd index 14c97aabe..2acb6c419 100644 --- a/man/regression.surrogate_aug_data.Rd +++ b/man/regression.surrogate_aug_data.Rd @@ -20,7 +20,7 @@ regression.surrogate_aug_data( \item{internal}{List. Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, -\code{main_timing_list}, \code{output}, \code{iter_timing_list} and \code{iter_results}.} +\code{main_timing_list}, \code{output}, and \code{iter_timing_list}.} \item{x}{Data.table containing the data. Either the training data or the explicands. If \code{x} is the explicands, then \code{index_features} must be provided.} diff --git a/man/regression.train_model.Rd b/man/regression.train_model.Rd index 60f227175..6d5c0807e 100644 --- a/man/regression.train_model.Rd +++ b/man/regression.train_model.Rd @@ -32,8 +32,8 @@ Specifies the verbosity (printout detail level) through one or more of strings \ \code{"basic"} (default) displays basic information about the computation which is being performed. \verb{"progress} displays information about where in the calculation process the function currently is. #' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are -(only when \code{adaptive = TRUE}) . -\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{adaptive = TRUE}) +(only when \code{iterative = TRUE}) . +\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}) \itemize{ \item the final estimates. \code{"vS_details"} displays information about the v_S estimates. diff --git a/man/setup.Rd b/man/setup.Rd index 62be520ba..28b96bfb4 100644 --- a/man/setup.Rd +++ b/man/setup.Rd @@ -15,9 +15,7 @@ setup( group, n_MC_samples, seed, - keep_samp_for_vS, feature_specs, - MSEv_uniform_comb_weights = TRUE, type = "normal", horizon = NULL, y = NULL, @@ -28,9 +26,9 @@ setup( explain_xreg_lags = NULL, group_lags = NULL, verbose, - adaptive = NULL, - adaptive_arguments = list(), - shapley_reweighting = "none", + iterative = NULL, + iterative_args = list(), + kernelSHAP_reweighting = "none", is_python = FALSE, testing = FALSE, init_time = NULL, @@ -38,6 +36,8 @@ setup( asymmetric = FALSE, causal_ordering = NULL, confounding = NULL, + output_args = list(), + extra_computation_args = list(), ... ) } @@ -68,9 +68,9 @@ such as the mean of the predictions in the training data are also reasonable.} \item{output_size}{TODO: Document} \item{max_n_coalitions}{Integer. -The upper limit on the number of unique feature/group coalitions to use in the adaptive procedure -(if \code{adaptive = TRUE}). -If \code{adaptive = FALSE} it represents the number of feature/group coalitions to use directly. +The upper limit on the number of unique feature/group coalitions to use in the iterative procedure +(if \code{iterative = TRUE}). +If \code{iterative = FALSE} it represents the number of feature/group coalitions to use directly. The quantity refers to the number of unique feature coalitions if \code{group = NULL}, and group coalitions if \code{group != NULL}. \code{max_n_coalitions = NULL} corresponds to \code{max_n_coalitions=2^n_features}.} @@ -93,10 +93,6 @@ Aas et al. (2021), i.e. the maximum number of observations (with largest weights Specifies the seed before any randomness based code is being run. If \code{NULL} no seed is set in the calling environment.} -\item{keep_samp_for_vS}{Logical. -Indicates whether the samples used in the Monte Carlo estimation of v_S should be returned (in \code{internal$output}). -Not used for \code{approach="regression_separate"} or \code{approach="regression_surrogate"}.} - \item{feature_specs}{List. The output from \code{\link[=get_model_specs]{get_model_specs()}} or \code{\link[=get_data_specs]{get_data_specs()}}. Contains the 3 elements: \describe{ @@ -105,12 +101,6 @@ Contains the 3 elements: \item{factor_levels}{Character vector with the levels for any categorical features.} }} -\item{MSEv_uniform_comb_weights}{Logical. -If \code{TRUE} (default), then the function weights the coalitions uniformly when computing the MSEv criterion. -If \code{FALSE}, then the function use the Shapley kernel weights to weight the coalitions when computing the MSEv -criterion. -Note that the Shapley kernel weights are replaced by the sampling frequency when not all coalitions are considered.} - \item{type}{Character. Either "normal" or "forecast" corresponding to function \code{setup()} is called from, correspondingly the type of explanation that should be generated.} @@ -154,8 +144,8 @@ Specifies the verbosity (printout detail level) through one or more of strings \ \code{"basic"} (default) displays basic information about the computation which is being performed. \verb{"progress} displays information about where in the calculation process the function currently is. #' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are -(only when \code{adaptive = TRUE}) . -\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{adaptive = TRUE}) +(only when \code{iterative = TRUE}) . +\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}) \itemize{ \item the final estimates. \code{"vS_details"} displays information about the v_S estimates. @@ -165,9 +155,9 @@ Note that any combination of four strings can be used. E.g. \code{verbose = c("basic", "vS_details")} will display basic information + details about the vS estimation process. }} -\item{adaptive}{Logical or NULL +\item{iterative}{Logical or NULL If \code{NULL} (default), the argument is set to \code{TRUE} if there are more than 5 features/groups, and \code{FALSE} otherwise. -If eventually \code{TRUE}, the Shapley values are estimated adaptively in an iterative manner. +If eventually \code{TRUE}, the Shapley values are estimated iteratively in an iterative manner. This provides sufficiently accurate Shapley value estimates faster. First an initial number of coalitions is sampled, then bootsrapping is used to estimate the variance of the Shapley values. @@ -175,13 +165,13 @@ A convergence criterion is used to determine if the variances of the Shapley val If the variances are too high, we estimate the number of required samples to reach convergence, and thereby add more coalitions. The process is repeated until the variances are below the threshold. -Specifics related to the adaptive process and convergence criterion are set through \code{adaptive_arguments}.} +Specifics related to the iterative process and convergence criterion are set through \code{iterative_args}.} -\item{adaptive_arguments}{Named list. -Specifices the arguments for the adaptive procedure. -See \code{\link[=get_adaptive_arguments_default]{get_adaptive_arguments_default()}} for description of the arguments and their default values.} +\item{iterative_args}{Named list. +Specifices the arguments for the iterative procedure. +See \code{\link[=get_iterative_args_default]{get_iterative_args_default()}} for description of the arguments and their default values.} -\item{shapley_reweighting}{String. +\item{kernelSHAP_reweighting}{String. How to reweight the sampling frequency weights in the kernelSHAP solution after sampling, with the aim of reducing the randomness and thereby the variance of the Shapley value estimates. One of \code{'none'}, \code{'on_N'}, \code{'on_all'}, \code{'on_all_cond'} (default). @@ -210,7 +200,7 @@ Used to calculate the time it took to run the full \code{explain} call.} If an object of class \code{shapr} is provided or string with a path to where intermediate results are strored, then the function will use the previous object to continue the computation. This is useful if the computation is interrupted or you want higher accuracy than already obtained, and therefore -want to continue the adaptive estimation. See the vignette for examples.} +want to continue the iterative estimation. See the vignette for examples.} \item{asymmetric}{Logical. Not applicable for (regular) non-causal or asymmetric explanations. @@ -248,6 +238,14 @@ specified, then \code{explain} computes asymmetric/symmetric causal Shapley valu \code{asymmetric}. The \code{approach} cannot be \code{regression_separate} and \code{regression_surrogate} as the regression-based approaches are not applicable to the causal Shapley value methodology.} +\item{output_args}{Named list. +Specifices certain arguments related to the output of the function. +See \code{\link[=get_output_args_default]{get_output_args_default()}} for description of the arguments and their default values.} + +\item{extra_computation_args}{Named list. +Specifices extra arguments related to the computation of the Shapley values. +See \code{\link[=get_extra_est_args_default]{get_extra_est_args_default()}} for description of the arguments and their default values.} + \item{...}{Further arguments passed to specific approaches} } \description{ diff --git a/man/setup_computation.Rd b/man/setup_computation.Rd index ca128d70b..afd255e00 100644 --- a/man/setup_computation.Rd +++ b/man/setup_computation.Rd @@ -10,7 +10,7 @@ setup_computation(internal, model, predict_model) \item{internal}{List. Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, -\code{main_timing_list}, \code{output}, \code{iter_timing_list} and \code{iter_results}.} +\code{main_timing_list}, \code{output}, and \code{iter_timing_list}.} \item{model}{Objects. The model object that ought to be explained. diff --git a/man/test_predict_model.Rd b/man/test_predict_model.Rd index 40adb5b01..b43d1f6ec 100644 --- a/man/test_predict_model.Rd +++ b/man/test_predict_model.Rd @@ -18,7 +18,7 @@ See the documentation of \code{\link[=explain]{explain()}} for details.} \item{internal}{List. Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, -\code{main_timing_list}, \code{output}, \code{iter_timing_list} and \code{iter_results}.} +\code{main_timing_list}, \code{output}, and \code{iter_timing_list}.} } \description{ Model testing function diff --git a/man/vaeac_check_parameters.Rd b/man/vaeac_check_parameters.Rd index d49550d7c..70b539ee6 100644 --- a/man/vaeac_check_parameters.Rd +++ b/man/vaeac_check_parameters.Rd @@ -169,8 +169,8 @@ Specifies the verbosity (printout detail level) through one or more of strings \ \code{"basic"} (default) displays basic information about the computation which is being performed. \verb{"progress} displays information about where in the calculation process the function currently is. #' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are -(only when \code{adaptive = TRUE}) . -\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{adaptive = TRUE}) +(only when \code{iterative = TRUE}) . +\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}) \itemize{ \item the final estimates. \code{"vS_details"} displays information about the v_S estimates. diff --git a/man/vaeac_get_extra_para_default.Rd b/man/vaeac_get_extra_para_default.Rd index 27968c11d..8b54f75f9 100644 --- a/man/vaeac_get_extra_para_default.Rd +++ b/man/vaeac_get_extra_para_default.Rd @@ -80,7 +80,7 @@ during the training of the vaeac model. Used in \code{\link[torch:dataloader]{to each batch when generating the Monte Carlo samples. If \code{NULL}, then the function generates the Monte Carlo samples for the provided coalitions and all explicands sent to \code{\link[=explain]{explain()}} at the time. The number of coalitions are determined by the \code{n_batches} used by \code{\link[=explain]{explain()}}. We recommend to tweak -\code{adaptive_arguments$max_batch_size} and \code{adaptive_arguments$min_n_batches} +\code{extra_computation_args$max_batch_size} and \code{extra_computation_args$min_n_batches} rather than \code{vaeac.batch_size_sampling}. Larger batch sizes are often much faster provided sufficient memory.} \item{vaeac.running_avg_n_values}{Positive integer (default is \code{5}). The number of previous IWAE values to include diff --git a/man/vaeac_get_mask_generator_name.Rd b/man/vaeac_get_mask_generator_name.Rd index 4fdfeae2b..00601f6d7 100644 --- a/man/vaeac_get_mask_generator_name.Rd +++ b/man/vaeac_get_mask_generator_name.Rd @@ -33,8 +33,8 @@ Specifies the verbosity (printout detail level) through one or more of strings \ \code{"basic"} (default) displays basic information about the computation which is being performed. \verb{"progress} displays information about where in the calculation process the function currently is. #' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are -(only when \code{adaptive = TRUE}) . -\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{adaptive = TRUE}) +(only when \code{iterative = TRUE}) . +\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}) \itemize{ \item the final estimates. \code{"vS_details"} displays information about the v_S estimates. diff --git a/man/vaeac_impute_missing_entries.Rd b/man/vaeac_impute_missing_entries.Rd index 2cb567060..e1f36ce83 100644 --- a/man/vaeac_impute_missing_entries.Rd +++ b/man/vaeac_impute_missing_entries.Rd @@ -37,8 +37,8 @@ Specifies the verbosity (printout detail level) through one or more of strings \ \code{"basic"} (default) displays basic information about the computation which is being performed. \verb{"progress} displays information about where in the calculation process the function currently is. #' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are -(only when \code{adaptive = TRUE}) . -\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{adaptive = TRUE}) +(only when \code{iterative = TRUE}) . +\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}) \itemize{ \item the final estimates. \code{"vS_details"} displays information about the v_S estimates. diff --git a/man/vaeac_train_model.Rd b/man/vaeac_train_model.Rd index cfa8e88b6..4d1314f5e 100644 --- a/man/vaeac_train_model.Rd +++ b/man/vaeac_train_model.Rd @@ -169,8 +169,8 @@ Specifies the verbosity (printout detail level) through one or more of strings \ \code{"basic"} (default) displays basic information about the computation which is being performed. \verb{"progress} displays information about where in the calculation process the function currently is. #' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are -(only when \code{adaptive = TRUE}) . -\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{adaptive = TRUE}) +(only when \code{iterative = TRUE}) . +\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}) \itemize{ \item the final estimates. \code{"vS_details"} displays information about the v_S estimates. diff --git a/man/vaeac_train_model_auxiliary.Rd b/man/vaeac_train_model_auxiliary.Rd index 889b5d5d5..65f1fb617 100644 --- a/man/vaeac_train_model_auxiliary.Rd +++ b/man/vaeac_train_model_auxiliary.Rd @@ -49,8 +49,8 @@ Specifies the verbosity (printout detail level) through one or more of strings \ \code{"basic"} (default) displays basic information about the computation which is being performed. \verb{"progress} displays information about where in the calculation process the function currently is. #' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are -(only when \code{adaptive = TRUE}) . -\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{adaptive = TRUE}) +(only when \code{iterative = TRUE}) . +\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}) \itemize{ \item the final estimates. \code{"vS_details"} displays information about the v_S estimates. diff --git a/man/vaeac_train_model_continue.Rd b/man/vaeac_train_model_continue.Rd index 645025ea0..552c561c7 100644 --- a/man/vaeac_train_model_continue.Rd +++ b/man/vaeac_train_model_continue.Rd @@ -32,8 +32,8 @@ Specifies the verbosity (printout detail level) through one or more of strings \ \code{"basic"} (default) displays basic information about the computation which is being performed. \verb{"progress} displays information about where in the calculation process the function currently is. #' \code{"convergence"} displays information on how close to convergence the Shapley value estimates are -(only when \code{adaptive = TRUE}) . -\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{adaptive = TRUE}) +(only when \code{iterative = TRUE}) . +\code{"shapley"} displays intermediate Shapley value estimates and standard deviations (only when \code{iterative = TRUE}) \itemize{ \item the final estimates. \code{"vS_details"} displays information about the v_S estimates. diff --git a/python/shaprpy/explain.py b/python/shaprpy/explain.py index 1e0642227..ef3f1e147 100644 --- a/python/shaprpy/explain.py +++ b/python/shaprpy/explain.py @@ -195,7 +195,7 @@ def explain( routput = regression_remove_objects(routput) # Convert R objects to Python objects - df_shapley = r2py(base.as_data_frame(routput.rx2('shapley_values'))) + df_shapley = r2py(base.as_data_frame(routput.rx2('shapley_values_est'))) pred_explain = r2py(routput.rx2('pred_explain')) internal = recurse_r_tree(routput.rx2('internal')) MSEv = recurse_r_tree(routput.rx2('MSEv')) @@ -218,7 +218,7 @@ def batch_compute_vS(S, rinternal, model, predict_model): if regression: dt_vS = shapr.batch_prepare_vS_regression(S=S, internal=rinternal) else: - # dt_vS is either only dt_vS or a list containing dt_vS and dt if internal$parameters$keep_samp_for_vS = TRUE + # dt_vS is either only dt_vS or a list containing dt_vS and dt if internal$parameters$output_args$keep_samp_for_vS = TRUE dt_vS = batch_prepare_vS_MC(S=S, rinternal=rinternal, model=model, predict_model=predict_model) return dt_vS @@ -418,4 +418,4 @@ def change_first_underscore_to_dot(kwargs): kwargs_tmp = {} for k, v in kwargs.items(): kwargs_tmp[k.replace('_', '.', 1)] = v - return kwargs_tmp \ No newline at end of file + return kwargs_tmp diff --git a/tests/testthat/_snaps/adaptive-output.md b/tests/testthat/_snaps/adaptive-output.md index 7e483da59..72239e8d0 100644 --- a/tests/testthat/_snaps/adaptive-output.md +++ b/tests/testthat/_snaps/adaptive-output.md @@ -9,11 +9,11 @@ * Model class: * Approach: independence - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 5 of 32 coalitions, 5 new. @@ -621,11 +621,11 @@ * Model class: * Approach: gaussian - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 5 of 32 coalitions, 5 new. @@ -659,11 +659,11 @@ * Model class: * Approach: gaussian - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 5 of 32 coalitions, 5 new. @@ -725,11 +725,11 @@ * Model class: * Approach: gaussian - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 5 of 32 coalitions, 5 new. @@ -859,11 +859,11 @@ * Model class: * Approach: gaussian - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 5 of 32 coalitions, 5 new. diff --git a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_gaussian_group_converges_tol.rds b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_gaussian_group_converges_tol.rds index 7ce9cd8c5..7394eaf89 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_gaussian_group_converges_tol.rds and b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_gaussian_group_converges_tol.rds differ diff --git a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_indep_conv_max_n_coalitions.rds b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_indep_conv_max_n_coalitions.rds index 43d0a4474..76286933e 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_indep_conv_max_n_coalitions.rds and b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_indep_conv_max_n_coalitions.rds differ diff --git a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_cont_est_object.rds b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_cont_est_object.rds index c2981af60..96aab6ef9 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_cont_est_object.rds and b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_cont_est_object.rds differ diff --git a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_cont_est_path.rds b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_cont_est_path.rds index c2981af60..96aab6ef9 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_cont_est_path.rds and b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_cont_est_path.rds differ diff --git a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_maxit.rds b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_maxit.rds index ea18b8031..de01585c9 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_maxit.rds and b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_maxit.rds differ diff --git a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_tol.rds b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_tol.rds index faee30197..b87810bfb 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_tol.rds and b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_tol.rds differ diff --git a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_tol_paired.rds b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_tol_paired.rds index faee30197..b87810bfb 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_tol_paired.rds and b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_converges_tol_paired.rds differ diff --git a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_reach_exact.rds b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_reach_exact.rds index 581e02a88..355c868db 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_reach_exact.rds and b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_independence_reach_exact.rds differ diff --git a/tests/testthat/_snaps/adaptive-output/output_verbose_1.rds b/tests/testthat/_snaps/adaptive-output/output_verbose_1.rds index b3ee6c1f7..0697297cc 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_verbose_1.rds and b/tests/testthat/_snaps/adaptive-output/output_verbose_1.rds differ diff --git a/tests/testthat/_snaps/adaptive-output/output_verbose_1_3.rds b/tests/testthat/_snaps/adaptive-output/output_verbose_1_3.rds index e0d2d8bf9..6011838ef 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_verbose_1_3.rds and b/tests/testthat/_snaps/adaptive-output/output_verbose_1_3.rds differ diff --git a/tests/testthat/_snaps/adaptive-output/output_verbose_1_3_4.rds b/tests/testthat/_snaps/adaptive-output/output_verbose_1_3_4.rds index 66a19b7da..434f78f8f 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_verbose_1_3_4.rds and b/tests/testthat/_snaps/adaptive-output/output_verbose_1_3_4.rds differ diff --git a/tests/testthat/_snaps/adaptive-output/output_verbose_1_3_4_5.rds b/tests/testthat/_snaps/adaptive-output/output_verbose_1_3_4_5.rds index 2b9369fa0..b7ebe066f 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_verbose_1_3_4_5.rds and b/tests/testthat/_snaps/adaptive-output/output_verbose_1_3_4_5.rds differ diff --git a/tests/testthat/_snaps/adaptive-setup.md b/tests/testthat/_snaps/adaptive-setup.md index d8f0d0c82..1b331af40 100644 --- a/tests/testthat/_snaps/adaptive-setup.md +++ b/tests/testthat/_snaps/adaptive-setup.md @@ -4,15 +4,15 @@ n_batches_non_numeric_1 <- "bla" explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_non_numeric_1)) + extra_computation_args = list(min_n_batches = n_batches_non_numeric_1)) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, and is therefore set to 2^n_features = 32. Condition - Error in `check_adaptive_arguments()`: - ! `adaptive_arguments$min_n_batches` must be NULL or a single positive integer. + Error in `check_extra_computation_args()`: + ! `extra_computation_args$min_n_batches` must be NULL or a single positive integer. --- @@ -20,15 +20,15 @@ n_batches_non_numeric_2 <- TRUE explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_non_numeric_2)) + extra_computation_args = list(min_n_batches = n_batches_non_numeric_2)) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, and is therefore set to 2^n_features = 32. Condition - Error in `check_adaptive_arguments()`: - ! `adaptive_arguments$min_n_batches` must be NULL or a single positive integer. + Error in `check_extra_computation_args()`: + ! `extra_computation_args$min_n_batches` must be NULL or a single positive integer. --- @@ -36,15 +36,15 @@ n_batches_non_integer <- 10.5 explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_non_integer)) + extra_computation_args = list(min_n_batches = n_batches_non_integer)) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, and is therefore set to 2^n_features = 32. Condition - Error in `check_adaptive_arguments()`: - ! `adaptive_arguments$min_n_batches` must be NULL or a single positive integer. + Error in `check_extra_computation_args()`: + ! `extra_computation_args$min_n_batches` must be NULL or a single positive integer. --- @@ -52,15 +52,15 @@ n_batches_too_long <- c(1, 2) explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_too_long)) + extra_computation_args = list(min_n_batches = n_batches_too_long)) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, and is therefore set to 2^n_features = 32. Condition - Error in `check_adaptive_arguments()`: - ! `adaptive_arguments$min_n_batches` must be NULL or a single positive integer. + Error in `check_extra_computation_args()`: + ! `extra_computation_args$min_n_batches` must be NULL or a single positive integer. --- @@ -68,15 +68,15 @@ n_batches_is_NA <- as.numeric(NA) explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_is_NA)) + extra_computation_args = list(min_n_batches = n_batches_is_NA)) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, and is therefore set to 2^n_features = 32. Condition - Error in `check_adaptive_arguments()`: - ! `adaptive_arguments$min_n_batches` must be NULL or a single positive integer. + Error in `check_extra_computation_args()`: + ! `extra_computation_args$min_n_batches` must be NULL or a single positive integer. --- @@ -84,13 +84,13 @@ n_batches_non_positive <- 0 explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_non_positive)) + extra_computation_args = list(min_n_batches = n_batches_non_positive)) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, and is therefore set to 2^n_features = 32. Condition - Error in `check_adaptive_arguments()`: - ! `adaptive_arguments$min_n_batches` must be NULL or a single positive integer. + Error in `check_extra_computation_args()`: + ! `extra_computation_args$min_n_batches` must be NULL or a single positive integer. diff --git a/tests/testthat/_snaps/asymmetric-causal-output.md b/tests/testthat/_snaps/asymmetric-causal-output.md index 614566a6a..0177b8b4d 100644 --- a/tests/testthat/_snaps/asymmetric-causal-output.md +++ b/tests/testthat/_snaps/asymmetric-causal-output.md @@ -9,7 +9,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 @@ -36,7 +36,7 @@ * Model class: * Approach: regression_separate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 @@ -52,7 +52,7 @@ 2: 2 42.44 5.546 -6.262 -4.518 -6.664 -1.982 3: 3 42.44 9.720 -32.555 7.270 -3.377 1.374 -# output_asym_cond_reg_adaptive +# output_asym_cond_reg_iterative Code (out <- code) @@ -63,13 +63,13 @@ * Model class: * Approach: regression_separate - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 * Causal ordering: {Solar.R, Wind}, {Temp}, {Month, Day} - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 5 of 8 coalitions, 5 new. @@ -94,7 +94,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -119,7 +119,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Causal ordering: {Solar.R, Wind, Temp, Month, Day} @@ -146,7 +146,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Causal ordering: {Solar.R, Wind, Temp, Month, Day} @@ -173,7 +173,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 @@ -201,7 +201,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 @@ -229,7 +229,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 @@ -253,7 +253,7 @@ Message * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 @@ -281,7 +281,7 @@ * Model class: * Approach: empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 @@ -309,7 +309,7 @@ * Model class: * Approach: ctree - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 @@ -337,7 +337,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Causal ordering: {Solar.R, Wind}, {Temp}, {Month, Day} @@ -364,7 +364,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Causal ordering: {Solar.R, Wind}, {Temp}, {Month, Day} @@ -391,7 +391,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Causal ordering: {Solar.R, Wind}, {Temp}, {Month, Day} @@ -418,7 +418,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of group-wise Shapley values: 3 * Number of observations to explain: 3 * Causal ordering: {A, B}, {C} @@ -445,7 +445,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of group-wise Shapley values: 3 * Number of observations to explain: 3 * Causal ordering: {A}, {B}, {C} @@ -461,7 +461,7 @@ 2: 2 42.44 3.126 -6.343 -10.662 3: 3 42.44 5.310 -17.036 -5.842 -# output_sym_caus_conf_mix_group_adaptive +# output_sym_caus_conf_mix_group_iterative Code (out <- code) @@ -494,7 +494,7 @@ * Model class: * Approach: ctree - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Causal ordering: {Solar.R, Wind}, {Temp}, {Day, Month_factor} @@ -510,7 +510,7 @@ 2: 2 42.44 4.729 -11.40 -7.837 1.6971 -2.570 3: 3 42.44 3.010 -23.62 3.218 4.8728 1.922 -# output_mixed_sym_caus_conf_TRUE_adaptive +# output_mixed_sym_caus_conf_TRUE_iterative Code (out <- code) @@ -521,13 +521,13 @@ * Model class: * Approach: ctree - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Causal ordering: {Solar.R, Wind}, {Temp}, {Day, Month_factor} * Components with confounding: {Solar.R, Wind}, {Temp}, {Day, Month_factor} - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 5 of 32 coalitions, 5 new. @@ -570,7 +570,7 @@ * Model class: * Approach: ctree - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 @@ -598,7 +598,7 @@ * Model class: * Approach: ctree - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 @@ -626,13 +626,13 @@ * Model class: * Approach: regression_separate - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 * Number of asymmetric coalitions: 8 * Causal ordering: {Solar.R, Wind}, {Temp}, {Day, Month_factor} - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 5 of 8 coalitions, 5 new. @@ -657,7 +657,7 @@ * Model class: * Approach: categorical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 4 * Number of observations to explain: 2 * Causal ordering: {Solar.R_factor, Wind_factor}, {Ozone_sub30_factor}, @@ -684,14 +684,14 @@ * Model class: * Approach: categorical - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of feature-wise Shapley values: 4 * Number of observations to explain: 3 * Causal ordering: {Solar.R_factor, Wind_factor}, {Ozone_sub30_factor}, {Month_factor} * Components with confounding: {Solar.R_factor, Wind_factor} - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 5 of 16 coalitions, 5 new. @@ -725,7 +725,7 @@ * Model class: * Approach: ctree - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 4 * Number of observations to explain: 3 * Causal ordering: {Solar.R_factor, Wind_factor}, {Ozone_sub30_factor}, diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_FALSE.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_FALSE.rds index 5424e6f29..14ba674c5 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_FALSE.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_FALSE.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_TRUE.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_TRUE.rds index 7d11939ca..fb1d734a5 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_TRUE.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_TRUE.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix.rds index bf6eac94c..21cbcdf4f 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_ctree.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_ctree.rds index 9b559712c..2cc8b74d1 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_ctree.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_ctree.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_empirical.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_empirical.rds index 4997871bb..2c48c83d5 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_empirical.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_empirical.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_n_coal.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_n_coal.rds index 628a6a1ac..a9e4afeb0 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_n_coal.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_caus_conf_mix_n_coal.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_cond_reg.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_cond_reg.rds index fa50b5861..31c97c9e3 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_cond_reg.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_cond_reg.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_cond_reg_adaptive.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_cond_reg_adaptive.rds deleted file mode 100644 index ea1f1c439..000000000 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_cond_reg_adaptive.rds and /dev/null differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_asym_cond_reg_iterative.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_cond_reg_iterative.rds new file mode 100644 index 000000000..73b0db6af Binary files /dev/null and b/tests/testthat/_snaps/asymmetric-causal-output/output_asym_cond_reg_iterative.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_asymmetric_conditional.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_asymmetric_conditional.rds index 7f38735d4..bd5a1f75e 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_asymmetric_conditional.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_asymmetric_conditional.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_cat_asym_causal_mixed_cat_ad.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_cat_asym_causal_mixed_cat_ad.rds index 23857f759..10a5efcd2 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_cat_asym_causal_mixed_cat_ad.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_cat_asym_causal_mixed_cat_ad.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_categorical_asym_causal_mixed_cat.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_categorical_asym_causal_mixed_cat.rds index adf82daaf..a8bed6265 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_categorical_asym_causal_mixed_cat.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_categorical_asym_causal_mixed_cat.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_categorical_asym_causal_mixed_ctree.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_categorical_asym_causal_mixed_ctree.rds index 6303a4d08..dd492da81 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_categorical_asym_causal_mixed_ctree.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_categorical_asym_causal_mixed_ctree.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_asym_cond_reg.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_asym_cond_reg.rds index e09320a8d..a118b3ba1 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_asym_cond_reg.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_asym_cond_reg.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_TRUE.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_TRUE.rds index 7ef11330a..6bb20c0b4 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_TRUE.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_TRUE.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_TRUE_adaptive.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_TRUE_adaptive.rds deleted file mode 100644 index 293f57e55..000000000 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_TRUE_adaptive.rds and /dev/null differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_TRUE_iterative.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_TRUE_iterative.rds new file mode 100644 index 000000000..0b966a372 Binary files /dev/null and b/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_TRUE_iterative.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_mixed.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_mixed.rds index 068dc0233..cbd3a9a98 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_mixed.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_mixed.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_mixed_2.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_mixed_2.rds index cfcb4c1ff..9449892fe 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_mixed_2.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_mixed_sym_caus_conf_mixed_2.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_FALSE.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_FALSE.rds index a43c54caf..0b467aaa4 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_FALSE.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_FALSE.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE.rds index c11185075..09d37d403 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE_group.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE_group.rds index e93b12e6e..17390ddec 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE_group.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE_group.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix.rds index 2221f4481..710b81499 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group.rds index b45047bb8..7310786e1 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group_adaptive.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group_adaptive.rds deleted file mode 100644 index 7ff387bb2..000000000 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group_adaptive.rds and /dev/null differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group_iterative.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group_iterative.rds new file mode 100644 index 000000000..ab8aad2a1 Binary files /dev/null and b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group_iterative.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_conditional.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_conditional.rds index 3ab5bf1bc..80061644b 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_conditional.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_conditional.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_marginal_gaussian.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_marginal_gaussian.rds index 9804b8e4c..8bff4fb6e 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_marginal_gaussian.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_marginal_gaussian.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_marginal_independence.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_marginal_independence.rds index e7ec330e4..0fe9f4ca4 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_marginal_independence.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_symmetric_marginal_independence.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-setup.md b/tests/testthat/_snaps/asymmetric-causal-setup.md index e7372268d..c5468f86b 100644 --- a/tests/testthat/_snaps/asymmetric-causal-setup.md +++ b/tests/testthat/_snaps/asymmetric-causal-setup.md @@ -4,7 +4,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = TRUE, causal_ordering = list(1:6), confounding = NULL, approach = "gaussian", - adaptive = FALSE) + iterative = FALSE) Condition Error in `check_and_set_causal_ordering()`: ! `causal_ordering` is incomplete/incorrect. It must contain all feature names or indices exactly once. @@ -15,7 +15,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = TRUE, causal_ordering = list(1:5, 5), confounding = NULL, approach = "gaussian", - adaptive = FALSE) + iterative = FALSE) Condition Error in `check_and_set_causal_ordering()`: ! `causal_ordering` is incomplete/incorrect. It must contain all feature names or indices exactly once. @@ -26,7 +26,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = TRUE, causal_ordering = list(2:5, 5), confounding = NULL, approach = "gaussian", - adaptive = FALSE) + iterative = FALSE) Condition Error in `check_and_set_causal_ordering()`: ! `causal_ordering` is incomplete/incorrect. It must contain all feature names or indices exactly once. @@ -37,7 +37,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = TRUE, causal_ordering = list(1:2, 4), confounding = NULL, approach = "gaussian", - adaptive = FALSE) + iterative = FALSE) Condition Error in `check_and_set_causal_ordering()`: ! `causal_ordering` is incomplete/incorrect. It must contain all feature names or indices exactly once. @@ -49,7 +49,7 @@ x_train = x_train_numeric, prediction_zero = p0, asymmetric = TRUE, causal_ordering = list("Solar.R", "Wind", "Temp", "Month", "Day", "Invalid feature name"), confounding = NULL, approach = "gaussian", - adaptive = FALSE) + iterative = FALSE) Condition Error in `convert_feature_name_to_idx()`: ! `causal_ordering` contains feature names (`Invalid feature name`) that are not in the data (`Solar.R`, `Wind`, `Temp`, `Month`, `Day`). @@ -60,7 +60,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = TRUE, causal_ordering = list("Solar.R", "Wind", "Temp", "Month", "Day", "Day"), - confounding = NULL, approach = "gaussian", adaptive = FALSE) + confounding = NULL, approach = "gaussian", iterative = FALSE) Condition Error in `check_and_set_causal_ordering()`: ! `causal_ordering` is incomplete/incorrect. It must contain all feature names or indices exactly once. @@ -71,7 +71,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = TRUE, causal_ordering = list("Solar.R", "Wind", "Temp", "Day", "Day"), confounding = NULL, - approach = "gaussian", adaptive = FALSE) + approach = "gaussian", iterative = FALSE) Condition Error in `check_and_set_causal_ordering()`: ! `causal_ordering` is incomplete/incorrect. It must contain all feature names or indices exactly once. @@ -82,7 +82,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = TRUE, causal_ordering = list("Solar.R", "Wind"), confounding = NULL, approach = "gaussian", - adaptive = FALSE) + iterative = FALSE) Condition Error in `check_and_set_causal_ordering()`: ! `causal_ordering` is incomplete/incorrect. It must contain all feature names or indices exactly once. @@ -94,7 +94,7 @@ x_train = x_train_numeric, prediction_zero = p0, asymmetric = TRUE, causal_ordering = list(c("Solar.R", "Wind", "Temp", "Month"), "Day"), confounding = NULL, approach = "gaussian", group = list(A = c("Solar.R", - "Wind"), B = "Temp", C = c("Month", "Day")), adaptive = FALSE) + "Wind"), B = "Temp", C = c("Month", "Day")), iterative = FALSE) Condition Error in `convert_feature_name_to_idx()`: ! `causal_ordering` contains group names (`Solar.R`, `Wind`, `Temp`, `Month`, `Day`) that are not in the data (`A`, `B`, `C`). @@ -106,7 +106,7 @@ x_train = x_train_numeric, prediction_zero = p0, asymmetric = TRUE, causal_ordering = list(c("A", "C"), "Wrong name"), confounding = NULL, approach = "gaussian", group = list(A = c("Solar.R", "Wind"), B = "Temp", C = c( - "Month", "Day")), adaptive = FALSE) + "Month", "Day")), iterative = FALSE) Condition Error in `convert_feature_name_to_idx()`: ! `causal_ordering` contains group names (`Wrong name`) that are not in the data (`A`, `B`, `C`). @@ -118,7 +118,7 @@ x_train = x_train_numeric, prediction_zero = p0, asymmetric = TRUE, causal_ordering = list(c("A"), "B"), confounding = NULL, approach = "gaussian", group = list(A = c("Solar.R", "Wind"), B = "Temp", C = c("Month", "Day")), - adaptive = FALSE) + iterative = FALSE) Condition Error in `check_and_set_causal_ordering()`: ! `causal_ordering` is incomplete/incorrect. It must contain all group names or indices exactly once. @@ -129,7 +129,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = FALSE, causal_ordering = list(1:2, 3:4, 5), confounding = TRUE, approach = c( - "gaussian", "independence", "empirical", "gaussian"), adaptive = FALSE) + "gaussian", "independence", "empirical", "gaussian"), iterative = FALSE) Condition Error in `check_and_set_causal_sampling()`: ! Causal Shapley values is not applicable for combined approaches. @@ -140,7 +140,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = c(FALSE, FALSE), causal_ordering = list(1:2, 3:4, 5), confounding = TRUE, approach = "gaussian", - adaptive = FALSE) + iterative = FALSE) Condition Error in `get_parameters()`: ! `asymmetric` must be a single logical. @@ -151,7 +151,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = "Must be a single logical", causal_ordering = list(1:2, 3:4, 5), confounding = TRUE, approach = "gaussian", - adaptive = FALSE) + iterative = FALSE) Condition Error in `get_parameters()`: ! `asymmetric` must be a single logical. @@ -162,7 +162,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = 1L, causal_ordering = list(1:2, 3:4, 5), confounding = TRUE, approach = "gaussian", - adaptive = FALSE) + iterative = FALSE) Condition Error in `get_parameters()`: ! `asymmetric` must be a single logical. @@ -173,7 +173,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = FALSE, causal_ordering = list(1:2, 3:4, 5), confounding = c("A", "B", "C"), - approach = "gaussian", , adaptive = FALSE) + approach = "gaussian", iterative = FALSE) Condition Error in `get_parameters()`: ! `confounding` must be a logical (vector). @@ -184,7 +184,7 @@ explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, asymmetric = FALSE, causal_ordering = list(1:2, 3:4, 5), confounding = c(TRUE, FALSE), approach = "gaussian", - adaptive = FALSE) + iterative = FALSE) Condition Error in `check_and_set_confounding()`: ! `confounding` must either be a single logical or a vector of logicals of the same length as the number of components in `causal_ordering` (3). diff --git a/tests/testthat/_snaps/forecast-output.md b/tests/testthat/_snaps/forecast-output.md index 000bc2510..e2fae1c19 100644 --- a/tests/testthat/_snaps/forecast-output.md +++ b/tests/testthat/_snaps/forecast-output.md @@ -12,7 +12,7 @@ * Model class: * Approach: empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 2 * Number of observations to explain: 2 @@ -43,7 +43,7 @@ * Model class: * Approach: empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 7 * Number of observations to explain: 2 @@ -68,7 +68,7 @@ 5: 0.5630 6: -0.7615 -# forecast_output_arima_numeric_adaptive +# forecast_output_arima_numeric_iterative Code (out <- code) @@ -78,11 +78,11 @@ * Model class: * Approach: empirical - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of feature-wise Shapley values: 9 * Number of observations to explain: 2 - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 10 of 512 coalitions, 10 new. @@ -110,7 +110,7 @@ 5: -1.5436 -0.5418 2.8952 6: -0.6202 -0.8545 0.4549 -# forecast_output_arima_numeric_adaptive_groups +# forecast_output_arima_numeric_iterative_groups Code (out <- code) @@ -120,11 +120,11 @@ * Model class: * Approach: empirical - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of group-wise Shapley values: 10 * Number of observations to explain: 2 - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 10 of 1024 coalitions, 10 new. @@ -158,7 +158,7 @@ * Model class: * Approach: empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 2 * Number of observations to explain: 2 @@ -189,7 +189,7 @@ * Model class: * Approach: empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of group-wise Shapley values: 4 * Number of observations to explain: 2 @@ -220,7 +220,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 3 * Number of observations to explain: 2 diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_ar_numeric.rds b/tests/testthat/_snaps/forecast-output/forecast_output_ar_numeric.rds index acf02e047..2fcf7aa9b 100644 Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_ar_numeric.rds and b/tests/testthat/_snaps/forecast-output/forecast_output_ar_numeric.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric.rds index a54430ddf..6b5c6c5da 100644 Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric.rds and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive.rds deleted file mode 100644 index bfb36dcf3..000000000 Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive.rds and /dev/null differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive_groups.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive_groups.rds deleted file mode 100644 index 08c6e8c35..000000000 Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive_groups.rds and /dev/null differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_iterative.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_iterative.rds new file mode 100644 index 000000000..26be2ea86 Binary files /dev/null and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_iterative.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_iterative_groups.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_iterative_groups.rds new file mode 100644 index 000000000..e7ed70acb Binary files /dev/null and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_iterative_groups.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_lags.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_lags.rds index dcf472133..51b75e049 100644 Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_lags.rds and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_lags.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_xreg.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_xreg.rds index 24546311b..e32fdafc5 100644 Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_xreg.rds and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_xreg.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_forecast_ARIMA_group_numeric.rds b/tests/testthat/_snaps/forecast-output/forecast_output_forecast_ARIMA_group_numeric.rds index 8ed51d8c1..e5db013be 100644 Binary files a/tests/testthat/_snaps/forecast-output/forecast_output_forecast_ARIMA_group_numeric.rds and b/tests/testthat/_snaps/forecast-output/forecast_output_forecast_ARIMA_group_numeric.rds differ diff --git a/tests/testthat/_snaps/forecast-setup.md b/tests/testthat/_snaps/forecast-setup.md index f78fa6afa..6f03d5298 100644 --- a/tests/testthat/_snaps/forecast-setup.md +++ b/tests/testthat/_snaps/forecast-setup.md @@ -103,33 +103,9 @@ max_n_coalitions is smaller than max(10, n_features + 1 = 8),which will result in unreliable results. It is therefore set to 10. - * Model class: - * Approach: independence - * Adaptive estimation: TRUE - * Number of feature-wise Shapley values: 7 - * Number of observations to explain: 2 - - -- Adaptive computation started -- - - -- Iteration 1 ----------------------------------------------------------------- - i Using 8 of 128 coalitions, 8 new. - Output - explain_idx horizon none Temp.1 Temp.2 Wind.1 Wind.2 Wind.F1 Wind.F2 - - 1: 149 1 77.88 -6.1669 -3.2230 0.5692 0.5692 -2.2557 NA - 2: 150 1 77.88 -1.8556 -2.2854 0.5822 0.5822 -2.6587 NA - 3: 149 2 77.88 -3.7482 -0.8043 -0.6402 -0.6402 -1.1390 -3.9124 - 4: 150 2 77.88 -0.3161 -0.7458 -0.1876 -0.1876 -1.1651 -0.8742 - 5: 149 3 77.88 -1.7783 -1.7783 -1.7783 -1.1423 -1.7783 0.0000 - 6: 150 3 77.88 -0.4531 -0.4531 -0.4531 -1.1683 -0.4531 0.0000 - Wind.F3 - - 1: NA - 2: NA - 3: NA - 4: NA - 5: -0.4697 - 6: -1.6952 + Condition + Error in `check_iterative_args()`: + ! `iterative_args$initial_n_coalitions` must be a single integer between 2 and `max_n_coalitions`. --- @@ -153,7 +129,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of group-wise Shapley values: 4 * Number of observations to explain: 2 diff --git a/tests/testthat/_snaps/regression-output.md b/tests/testthat/_snaps/regression-output.md index f1ced1801..73230c664 100644 --- a/tests/testthat/_snaps/regression-output.md +++ b/tests/testthat/_snaps/regression-output.md @@ -1,4 +1,4 @@ -# output_lm_numeric_lm_separate_adaptive +# output_lm_numeric_lm_separate_iterative Code (out <- code) @@ -9,11 +9,11 @@ * Model class: * Approach: regression_separate - * Adaptive estimation: TRUE + * Iterative estimation: TRUE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 - -- Adaptive computation started -- + -- iterative computation started -- -- Iteration 1 ----------------------------------------------------------------- i Using 5 of 32 coalitions, 5 new. @@ -47,7 +47,7 @@ * Model class: * Approach: regression_separate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -68,7 +68,7 @@ Message * Model class: * Approach: regression_separate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -93,7 +93,7 @@ * Model class: * Approach: regression_separate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 4 * Number of observations to explain: 3 @@ -118,7 +118,7 @@ * Model class: * Approach: regression_separate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -143,7 +143,7 @@ * Model class: * Approach: regression_separate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -168,7 +168,7 @@ * Model class: * Approach: regression_separate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -193,7 +193,7 @@ * Model class: * Approach: regression_separate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -218,7 +218,7 @@ * Model class: * Approach: regression_separate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -232,7 +232,7 @@ 2: 2 42.44 8.183 -1.463 -16.499 3.63 -9.233 3: 3 42.44 3.364 -14.946 0.401 -11.32 11.905 -# output_lm_numeric_lm_surrogate_adaptive +# output_lm_numeric_lm_surrogate_iterative Code (out <- code) @@ -242,12 +242,12 @@ and is therefore set to 2^n_features = 32. Condition - Warning in `check_and_set_adaptive()`: - Adaptive estimation of Shapley values are not supported for approach = regression_surrogate. Setting adaptive = FALSE. + Warning in `check_and_set_iterative()`: + Iterative estimation of Shapley values are not supported for approach = regression_surrogate. Setting iterative = FALSE. Message * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -272,7 +272,7 @@ * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -293,7 +293,7 @@ Message * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -314,7 +314,7 @@ Message * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -339,7 +339,7 @@ * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 4 * Number of observations to explain: 3 @@ -364,7 +364,7 @@ * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -389,7 +389,7 @@ * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -414,7 +414,7 @@ * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 diff --git a/tests/testthat/_snaps/regression-output/output_lm_categorical_lm_separate.rds b/tests/testthat/_snaps/regression-output/output_lm_categorical_lm_separate.rds index 011631e05..51b4a330b 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_categorical_lm_separate.rds and b/tests/testthat/_snaps/regression-output/output_lm_categorical_lm_separate.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_categorical_lm_surrogate.rds b/tests/testthat/_snaps/regression-output/output_lm_categorical_lm_surrogate.rds index 246d9994f..9cc5a5799 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_categorical_lm_surrogate.rds and b/tests/testthat/_snaps/regression-output/output_lm_categorical_lm_surrogate.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_separate.rds b/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_separate.rds index 528e9d6e7..b6dd532ee 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_separate.rds and b/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_separate.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_separate_parallel.rds b/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_separate_parallel.rds index d0870ca6c..67c49f255 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_separate_parallel.rds and b/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_separate_parallel.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_surrogate.rds b/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_surrogate.rds index 37ffccc02..791246703 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_surrogate.rds and b/tests/testthat/_snaps/regression-output/output_lm_mixed_decision_tree_cv_surrogate.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_mixed_lm_separate.rds b/tests/testthat/_snaps/regression-output/output_lm_mixed_lm_separate.rds index b5f6f2485..e5910370b 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_mixed_lm_separate.rds and b/tests/testthat/_snaps/regression-output/output_lm_mixed_lm_separate.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_mixed_lm_surrogate.rds b/tests/testthat/_snaps/regression-output/output_lm_mixed_lm_surrogate.rds index 3e8ddde7c..95c24fb6a 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_mixed_lm_surrogate.rds and b/tests/testthat/_snaps/regression-output/output_lm_mixed_lm_surrogate.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_mixed_splines_separate.rds b/tests/testthat/_snaps/regression-output/output_lm_mixed_splines_separate.rds index 3ba59f710..6a0b062f9 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_mixed_splines_separate.rds and b/tests/testthat/_snaps/regression-output/output_lm_mixed_splines_separate.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_mixed_xgboost_separate.rds b/tests/testthat/_snaps/regression-output/output_lm_mixed_xgboost_separate.rds index 3108d8aa7..781f0619f 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_mixed_xgboost_separate.rds and b/tests/testthat/_snaps/regression-output/output_lm_mixed_xgboost_separate.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_mixed_xgboost_surrogate.rds b/tests/testthat/_snaps/regression-output/output_lm_mixed_xgboost_surrogate.rds index 8ef78c72a..430ec826e 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_mixed_xgboost_surrogate.rds and b/tests/testthat/_snaps/regression-output/output_lm_mixed_xgboost_surrogate.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate.rds b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate.rds index 0a7ea0e98..18bd04305 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate.rds and b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate_adaptive.rds b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate_adaptive.rds deleted file mode 100644 index 9ae543c41..000000000 Binary files a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate_adaptive.rds and /dev/null differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate_iterative.rds b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate_iterative.rds new file mode 100644 index 000000000..0770d63b6 Binary files /dev/null and b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate_iterative.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate_n_comb.rds b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate_n_comb.rds index b1b23373d..e01c8e14d 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate_n_comb.rds and b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_separate_n_comb.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate.rds b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate.rds index b06d8b1f3..af6063e9b 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate.rds and b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_adaptive.rds b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_adaptive.rds deleted file mode 100644 index b06d8b1f3..000000000 Binary files a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_adaptive.rds and /dev/null differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_iterative.rds b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_iterative.rds new file mode 100644 index 000000000..af6063e9b Binary files /dev/null and b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_iterative.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_n_comb.rds b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_n_comb.rds index 80a4d3fbc..4496f2af7 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_n_comb.rds and b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_n_comb.rds differ diff --git a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_reg_surr_n_comb.rds b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_reg_surr_n_comb.rds index 5d7a5f36d..2ec897d61 100644 Binary files a/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_reg_surr_n_comb.rds and b/tests/testthat/_snaps/regression-output/output_lm_numeric_lm_surrogate_reg_surr_n_comb.rds differ diff --git a/tests/testthat/_snaps/regression-setup.md b/tests/testthat/_snaps/regression-setup.md index 090a017ca..e63719891 100644 --- a/tests/testthat/_snaps/regression-setup.md +++ b/tests/testthat/_snaps/regression-setup.md @@ -3,7 +3,8 @@ Code explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, approach = c( - "regression_surrogate", "gaussian", "independence", "empirical"), adaptive = FALSE) + "regression_surrogate", "gaussian", "independence", "empirical"), + iterative = FALSE) Condition Error in `check_approach()`: ! The `regression_separate` and `regression_surrogate` approaches cannot be combined with other approaches. @@ -13,7 +14,7 @@ Code explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, approach = c( - "regression_separate", "gaussian", "independence", "empirical"), adaptive = FALSE) + "regression_separate", "gaussian", "independence", "empirical"), iterative = FALSE) Condition Error in `check_approach()`: ! The `regression_separate` and `regression_surrogate` approaches cannot be combined with other approaches. @@ -120,7 +121,7 @@ Code explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, approach = "regression_surrogate", - regression.tune_values = data.frame(tree_depth = c(1, 2, 3)), adaptive = FALSE) + regression.tune_values = data.frame(tree_depth = c(1, 2, 3)), iterative = FALSE) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, @@ -128,7 +129,7 @@ * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -262,7 +263,7 @@ x_train = x_train_numeric, prediction_zero = p0, approach = "regression_surrogate", regression.recipe_func = function(x) { return(2) - }, adaptive = FALSE) + }, iterative = FALSE) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, @@ -270,7 +271,7 @@ * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -286,7 +287,7 @@ Code explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, approach = "regression_surrogate", - regression.surrogate_n_comb = 2^ncol(x_explain_numeric) - 1, adaptive = FALSE) + regression.surrogate_n_comb = 2^ncol(x_explain_numeric) - 1, iterative = FALSE) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, @@ -294,7 +295,7 @@ * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -310,7 +311,7 @@ Code explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, prediction_zero = p0, approach = "regression_surrogate", - regression.surrogate_n_comb = 0, adaptive = FALSE) + regression.surrogate_n_comb = 0, iterative = FALSE) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, @@ -318,7 +319,7 @@ * Model class: * Approach: regression_surrogate - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 diff --git a/tests/testthat/_snaps/regular-output.md b/tests/testthat/_snaps/regular-output.md index 151bd379e..632383c8f 100644 --- a/tests/testthat/_snaps/regular-output.md +++ b/tests/testthat/_snaps/regular-output.md @@ -9,7 +9,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -34,7 +34,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -59,7 +59,7 @@ * Model class: * Approach: empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -80,7 +80,7 @@ Message * Model class: * Approach: empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -110,7 +110,7 @@ Message * Model class: * Approach: empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -161,7 +161,7 @@ Message * Model class: * Approach: empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -182,7 +182,7 @@ Message * Model class: * Approach: empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -207,7 +207,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -232,7 +232,7 @@ * Model class: * Approach: copula - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -257,7 +257,7 @@ * Model class: * Approach: ctree - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -282,7 +282,7 @@ * Model class: * Approach: vaeac - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -307,7 +307,7 @@ * Model class: * Approach: ctree - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 4 * Number of observations to explain: 3 @@ -332,7 +332,7 @@ * Model class: * Approach: vaeac - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 4 * Number of observations to explain: 3 @@ -357,7 +357,7 @@ * Model class: * Approach: categorical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 4 * Number of observations to explain: 3 @@ -382,7 +382,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 4 * Number of observations to explain: 3 @@ -407,7 +407,7 @@ * Model class: * Approach: timeseries - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of group-wise Shapley values: 4 * Number of observations to explain: 2 @@ -431,7 +431,7 @@ * Model class: * Approach: gaussian, empirical, ctree, and independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -456,7 +456,7 @@ * Model class: * Approach: ctree, copula, independence, and copula - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -481,7 +481,7 @@ * Model class: * Approach: independence, empirical, gaussian, and empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -506,7 +506,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -531,7 +531,7 @@ * Model class: * Approach: ctree - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -556,7 +556,7 @@ * Model class: * Approach: vaeac - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -581,7 +581,7 @@ * Model class: * Approach: ctree, independence, ctree, and independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -606,7 +606,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -634,7 +634,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -662,7 +662,7 @@ * Model class: * Approach: ctree - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -687,7 +687,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 2 * Number of observations to explain: 3 @@ -712,7 +712,7 @@ * Model class: * Approach: ctree - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -737,7 +737,7 @@ * Model class: * Approach: empirical - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -762,7 +762,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 diff --git a/tests/testthat/_snaps/regular-output/output_custom_lm_numeric_independence_1.rds b/tests/testthat/_snaps/regular-output/output_custom_lm_numeric_independence_1.rds index 59416303f..a82a9d8e5 100644 Binary files a/tests/testthat/_snaps/regular-output/output_custom_lm_numeric_independence_1.rds and b/tests/testthat/_snaps/regular-output/output_custom_lm_numeric_independence_1.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_custom_lm_numeric_independence_2.rds b/tests/testthat/_snaps/regular-output/output_custom_lm_numeric_independence_2.rds index 59416303f..a82a9d8e5 100644 Binary files a/tests/testthat/_snaps/regular-output/output_custom_lm_numeric_independence_2.rds and b/tests/testthat/_snaps/regular-output/output_custom_lm_numeric_independence_2.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_custom_xgboost_mixed_dummy_ctree.rds b/tests/testthat/_snaps/regular-output/output_custom_xgboost_mixed_dummy_ctree.rds index e1980d562..9abb24e1c 100644 Binary files a/tests/testthat/_snaps/regular-output/output_custom_xgboost_mixed_dummy_ctree.rds and b/tests/testthat/_snaps/regular-output/output_custom_xgboost_mixed_dummy_ctree.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_categorical_ctree.rds b/tests/testthat/_snaps/regular-output/output_lm_categorical_ctree.rds index 7170013d3..59f5d23ae 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_categorical_ctree.rds and b/tests/testthat/_snaps/regular-output/output_lm_categorical_ctree.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_categorical_independence.rds b/tests/testthat/_snaps/regular-output/output_lm_categorical_independence.rds index 7a2a3aa27..8516466f5 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_categorical_independence.rds and b/tests/testthat/_snaps/regular-output/output_lm_categorical_independence.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_categorical_method.rds b/tests/testthat/_snaps/regular-output/output_lm_categorical_method.rds index 2394620fb..f3f171306 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_categorical_method.rds and b/tests/testthat/_snaps/regular-output/output_lm_categorical_method.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_categorical_vaeac.rds b/tests/testthat/_snaps/regular-output/output_lm_categorical_vaeac.rds index cd28bd6c0..0717162de 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_categorical_vaeac.rds and b/tests/testthat/_snaps/regular-output/output_lm_categorical_vaeac.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_mixed_comb.rds b/tests/testthat/_snaps/regular-output/output_lm_mixed_comb.rds index fcd742105..931ce50d9 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_mixed_comb.rds and b/tests/testthat/_snaps/regular-output/output_lm_mixed_comb.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_mixed_ctree.rds b/tests/testthat/_snaps/regular-output/output_lm_mixed_ctree.rds index 953c098d3..e4b60be42 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_mixed_ctree.rds and b/tests/testthat/_snaps/regular-output/output_lm_mixed_ctree.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_mixed_independence.rds b/tests/testthat/_snaps/regular-output/output_lm_mixed_independence.rds index 2a02cb086..97a0d238f 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_mixed_independence.rds and b/tests/testthat/_snaps/regular-output/output_lm_mixed_independence.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_mixed_vaeac.rds b/tests/testthat/_snaps/regular-output/output_lm_mixed_vaeac.rds index 5bd02b7c7..a1eb54259 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_mixed_vaeac.rds and b/tests/testthat/_snaps/regular-output/output_lm_mixed_vaeac.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_comb1.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_comb1.rds index 2aefd8011..12b4e5dd7 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_comb1.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_comb1.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_comb2.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_comb2.rds index 2fa47952d..8d9abe50f 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_comb2.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_comb2.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_comb3.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_comb3.rds index 9617c40d5..dd9b28d3d 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_comb3.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_comb3.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_copula.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_copula.rds index 471203929..d6928debf 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_copula.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_copula.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_ctree.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_ctree.rds index 91a4cf111..69f7a61a9 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_ctree.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_ctree.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_ctree_parallelized.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_ctree_parallelized.rds index 91a4cf111..69f7a61a9 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_ctree_parallelized.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_ctree_parallelized.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical.rds index db6bb33a8..2316a3acc 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_AICc_each.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_AICc_each.rds index 2534ce1ce..367922cc7 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_AICc_each.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_AICc_each.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_AICc_full.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_AICc_full.rds index 4a368749c..a4691aad1 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_AICc_full.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_AICc_full.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_independence.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_independence.rds index 62825af9b..a005d77b7 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_independence.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_independence.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_n_coalitions.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_n_coalitions.rds index 66c2fb594..e9bd604f4 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_n_coalitions.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_n_coalitions.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_progress.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_progress.rds index db6bb33a8..2316a3acc 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_progress.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_empirical_progress.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_gaussian.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_gaussian.rds index e5bd1849c..e2fba82a2 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_gaussian.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_gaussian.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_independence.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_independence.rds index 626584575..dfca2b741 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_independence.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_independence.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_independence_MSEv_Shapley_weights.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_independence_MSEv_Shapley_weights.rds index 9dfd42860..b4b1b1231 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_independence_MSEv_Shapley_weights.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_independence_MSEv_Shapley_weights.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_independence_keep_samp_for_vS.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_independence_keep_samp_for_vS.rds index 995625cb7..8eed1a51f 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_independence_keep_samp_for_vS.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_independence_keep_samp_for_vS.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_interaction.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_interaction.rds index bcf7b8808..507e17dfa 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_interaction.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_interaction.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_numeric_vaeac.rds b/tests/testthat/_snaps/regular-output/output_lm_numeric_vaeac.rds index 106b56bfe..5b5ac43ce 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_numeric_vaeac.rds and b/tests/testthat/_snaps/regular-output/output_lm_numeric_vaeac.rds differ diff --git a/tests/testthat/_snaps/regular-output/output_lm_timeseries_method.rds b/tests/testthat/_snaps/regular-output/output_lm_timeseries_method.rds index 36dce40a2..6ba6c2942 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_timeseries_method.rds and b/tests/testthat/_snaps/regular-output/output_lm_timeseries_method.rds differ diff --git a/tests/testthat/_snaps/regular-setup.md b/tests/testthat/_snaps/regular-setup.md index d62be3ca7..e689c5b8c 100644 --- a/tests/testthat/_snaps/regular-setup.md +++ b/tests/testthat/_snaps/regular-setup.md @@ -34,7 +34,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -67,7 +67,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -100,7 +100,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -134,7 +134,7 @@ * Model class: * Approach: independence - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -381,7 +381,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -410,7 +410,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of group-wise Shapley values: 3 * Number of observations to explain: 3 @@ -582,10 +582,15 @@ keep_samp_for_vS_non_logical_1 <- "bla" explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - keep_samp_for_vS = keep_samp_for_vS_non_logical_1) + output_args = list(keep_samp_for_vS = keep_samp_for_vS_non_logical_1)) + Message + Success with message: + max_n_coalitions is NULL or larger than or 2^n_features = 32, + and is therefore set to 2^n_features = 32. + Condition - Error in `get_parameters()`: - ! `keep_samp_for_vS` must be single logical. + Error in `check_output_args()`: + ! `output_args$keep_samp_for_vS` must be single logical. --- @@ -593,10 +598,15 @@ keep_samp_for_vS_non_logical_2 <- NULL explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - keep_samp_for_vS = keep_samp_for_vS_non_logical_2) + output_args = list(keep_samp_for_vS = keep_samp_for_vS_non_logical_2)) + Message + Success with message: + max_n_coalitions is NULL or larger than or 2^n_features = 32, + and is therefore set to 2^n_features = 32. + Condition - Error in `get_parameters()`: - ! `keep_samp_for_vS` must be single logical. + Error in `check_output_args()`: + ! `output_args$keep_samp_for_vS` must be single logical. --- @@ -604,10 +614,15 @@ keep_samp_for_vS_too_long <- c(TRUE, FALSE) explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - keep_samp_for_vS = keep_samp_for_vS_too_long) + output_args = list(keep_samp_for_vS = keep_samp_for_vS_too_long)) + Message + Success with message: + max_n_coalitions is NULL or larger than or 2^n_features = 32, + and is therefore set to 2^n_features = 32. + Condition - Error in `get_parameters()`: - ! `keep_samp_for_vS` must be single logical. + Error in `check_output_args()`: + ! `output_args$keep_samp_for_vS` must be single logical. # erroneous input: `MSEv_uniform_comb_weights` @@ -615,10 +630,15 @@ MSEv_uniform_comb_weights_nl_1 <- "bla" explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_nl_1) + output_args = list(MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_nl_1)) + Message + Success with message: + max_n_coalitions is NULL or larger than or 2^n_features = 32, + and is therefore set to 2^n_features = 32. + Condition - Error in `get_parameters()`: - ! `MSEv_uniform_comb_weights` must be single logical. + Error in `check_output_args()`: + ! `output_args$MSEv_uniform_comb_weights` must be single logical. --- @@ -626,10 +646,15 @@ MSEv_uniform_comb_weights_nl_2 <- NULL explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_nl_2) + output_args = list(MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_nl_2)) + Message + Success with message: + max_n_coalitions is NULL or larger than or 2^n_features = 32, + and is therefore set to 2^n_features = 32. + Condition - Error in `get_parameters()`: - ! `MSEv_uniform_comb_weights` must be single logical. + Error in `check_output_args()`: + ! `output_args$MSEv_uniform_comb_weights` must be single logical. --- @@ -637,10 +662,15 @@ MSEv_uniform_comb_weights_long <- c(TRUE, FALSE) explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_long) + output_args = list(MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_long)) + Message + Success with message: + max_n_coalitions is NULL or larger than or 2^n_features = 32, + and is therefore set to 2^n_features = 32. + Condition - Error in `get_parameters()`: - ! `MSEv_uniform_comb_weights` must be single logical. + Error in `check_output_args()`: + ! `output_args$MSEv_uniform_comb_weights` must be single logical. # erroneous input: `predict_model` @@ -895,7 +925,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -922,7 +952,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of group-wise Shapley values: 3 * Number of observations to explain: 3 @@ -942,7 +972,7 @@ explanation_exact <- explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "gaussian", prediction_zero = p0, n_MC_samples = 2, seed = 123, max_n_coalitions = NULL, - adaptive = FALSE) + iterative = FALSE) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, @@ -950,7 +980,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -963,13 +993,13 @@ Code explanation_equal <- explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "gaussian", - prediction_zero = p0, n_MC_samples = 2, seed = 123, adaptive_arguments = list( + prediction_zero = p0, n_MC_samples = 2, seed = 123, extra_computation_args = list( compute_sd = FALSE), max_n_coalitions = 2^ncol(x_explain_numeric), - adaptive = FALSE) + iterative = FALSE) Message * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 @@ -982,9 +1012,9 @@ Code explanation_larger <- explain(testing = TRUE, model = model_lm_numeric, x_explain = x_explain_numeric, x_train = x_train_numeric, approach = "gaussian", - prediction_zero = p0, n_MC_samples = 2, seed = 123, adaptive_arguments = list( + prediction_zero = p0, n_MC_samples = 2, seed = 123, extra_computation_args = list( compute_sd = FALSE), max_n_coalitions = 2^ncol(x_explain_numeric) + 1, - adaptive = FALSE) + iterative = FALSE) Message Success with message: max_n_coalitions is NULL or larger than or 2^n_features = 32, @@ -992,7 +1022,7 @@ * Model class: * Approach: gaussian - * Adaptive estimation: FALSE + * Iterative estimation: FALSE * Number of feature-wise Shapley values: 5 * Number of observations to explain: 3 diff --git a/tests/testthat/test-adaptive-output.R b/tests/testthat/test-adaptive-output.R index c4e787f21..20b132075 100644 --- a/tests/testthat/test-adaptive-output.R +++ b/tests/testthat/test-adaptive-output.R @@ -9,7 +9,7 @@ test_that("output_lm_numeric_independence_reach_exact", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive = TRUE, + iterative = TRUE, verbose = c("basic", "convergence", "shapley"), paired_shap_sampling = TRUE ), @@ -26,11 +26,11 @@ test_that("output_lm_numeric_independence_converges_tol", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.1 + convergence_tol = 0.1 ), - adaptive = TRUE, + iterative = TRUE, verbose = c("convergence", "shapley") ), "output_lm_numeric_independence_converges_tol" @@ -46,13 +46,13 @@ test_that("output_lm_numeric_independence_converges_maxit", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.001, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.001, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 8 ), - adaptive = TRUE, + iterative = TRUE, verbose = c("convergence", "shapley") ), "output_lm_numeric_independence_converges_maxit" @@ -69,7 +69,7 @@ test_that("output_lm_numeric_indep_conv_max_n_coalitions", { approach = "independence", prediction_zero = p0, max_n_coalitions = 20, - adaptive = TRUE, + iterative = TRUE, verbose = c("convergence", "shapley") ), "output_lm_numeric_indep_conv_max_n_coalitions" @@ -93,11 +93,11 @@ test_that("output_lm_numeric_gaussian_group_converges_tol", { approach = "gaussian", group = groups, prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 5, - convergence_tolerance = 0.1 + convergence_tol = 0.1 ), - adaptive = TRUE, + iterative = TRUE, verbose = c("convergence", "shapley") ), "output_lm_numeric_gaussian_group_converges_tol" @@ -113,11 +113,11 @@ test_that("output_lm_numeric_independence_converges_tol_paired", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.1 + convergence_tol = 0.1 ), - adaptive = TRUE, + iterative = TRUE, verbose = c("convergence", "shapley"), paired_shap_sampling = TRUE ), @@ -137,13 +137,13 @@ test_that("output_lm_numeric_independence_saving_and_cont_est", { approach = "independence", prediction_zero = p0, paired_shap_sampling = FALSE, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.001, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.001, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 8 ), - adaptive = TRUE, + iterative = TRUE, seed = NULL, verbose = NULL ) @@ -159,13 +159,13 @@ test_that("output_lm_numeric_independence_saving_and_cont_est", { approach = "independence", prediction_zero = p0, paired_shap_sampling = FALSE, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.001, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.001, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 5 ), - adaptive = TRUE, + iterative = TRUE, seed = NULL, verbose = NULL ) @@ -180,13 +180,13 @@ test_that("output_lm_numeric_independence_saving_and_cont_est", { approach = "independence", prediction_zero = p0, paired_shap_sampling = FALSE, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.001, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.001, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 8 ), - adaptive = TRUE, + iterative = TRUE, verbose = NULL, prev_shapr_object = e_init_object, seed = NULL, @@ -208,13 +208,13 @@ test_that("output_lm_numeric_independence_saving_and_cont_est", { approach = "independence", prediction_zero = p0, paired_shap_sampling = FALSE, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.001, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.001, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 5 ), - adaptive = TRUE, + iterative = TRUE, seed = NULL, verbose = NULL ) @@ -229,15 +229,15 @@ test_that("output_lm_numeric_independence_saving_and_cont_est", { approach = "independence", prediction_zero = p0, paired_shap_sampling = FALSE, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 10, - convergence_tolerance = 0.001, - reduction_factor_vec = rep(10^(-5), 10), + convergence_tol = 0.001, + n_coal_next_iter_factor_vec = rep(10^(-5), 10), max_iter = 8 ), - adaptive = TRUE, + iterative = TRUE, verbose = NULL, - prev_shapr_object = e_init_path$internal$parameters$adaptive_arguments$saving_path, + prev_shapr_object = e_init_path$saving_path, seed = NULL ), "output_lm_numeric_independence_cont_est_path" @@ -256,7 +256,7 @@ test_that("output_verbose_1", { x_train = x_train_numeric, approach = "gaussian", prediction_zero = p0, - adaptive = TRUE, + iterative = TRUE, verbose = c("basic") ), "output_verbose_1" @@ -272,7 +272,7 @@ test_that("output_verbose_1_3", { x_train = x_train_numeric, approach = "gaussian", prediction_zero = p0, - adaptive = TRUE, + iterative = TRUE, verbose = c("basic", "convergence") ), "output_verbose_1_3" @@ -288,7 +288,7 @@ test_that("output_verbose_1_3_4", { x_train = x_train_numeric, approach = "gaussian", prediction_zero = p0, - adaptive = TRUE, + iterative = TRUE, verbose = c("basic", "convergence", "shapley") ), "output_verbose_1_3_4" @@ -304,7 +304,7 @@ test_that("output_verbose_1_3_4_5", { x_train = x_train_numeric, approach = "gaussian", prediction_zero = p0, - adaptive = TRUE, + iterative = TRUE, verbose = c("basic", "convergence", "shapley", "vS_details") ), "output_verbose_1_3_4_5" diff --git a/tests/testthat/test-adaptive-setup.R b/tests/testthat/test-adaptive-setup.R index 32e6236f9..fbc85a9f2 100644 --- a/tests/testthat/test-adaptive-setup.R +++ b/tests/testthat/test-adaptive-setup.R @@ -1,4 +1,4 @@ -test_that("adaptive_arguments are respected", { +test_that("iterative_args are respected", { ex <- explain( testing = TRUE, model = model_lm_numeric, @@ -7,13 +7,13 @@ test_that("adaptive_arguments are respected", { approach = "independence", prediction_zero = p0, max_n_coalitions = 30, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 6, - convergence_tolerance = 0.0005, - reduction_factor_vec = rep(10^(-6), 10), + convergence_tol = 0.0005, + n_coal_next_iter_factor_vec = rep(10^(-6), 10), max_iter = 8 ), - adaptive = TRUE + iterative = TRUE ) # Check that initial_n_coalitions is respected @@ -21,11 +21,11 @@ test_that("adaptive_arguments are respected", { # Check that max_iter is respected expect_equal(length(ex$internal$iter_list), 8) - expect_true(ex$internal$iter_results$iter_info_dt[.N, converged_max_iter]) + expect_true(ex$iterative_results$iter_info_dt[.N, converged_max_iter]) }) -test_that("adaptive feature wise and groupwise computations identical", { +test_that("iterative feature wise and groupwise computations identical", { groups <- list( Solar.R = "Solar.R", Wind = "Wind", @@ -41,11 +41,11 @@ test_that("adaptive feature wise and groupwise computations identical", { x_train = x_train_numeric, approach = "gaussian", prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 5, - convergence_tolerance = 0.1 + convergence_tol = 0.1 ), - adaptive = TRUE + iterative = TRUE ) @@ -57,16 +57,16 @@ test_that("adaptive feature wise and groupwise computations identical", { approach = "gaussian", group = groups, prediction_zero = p0, - adaptive_arguments = list( + iterative_args = list( initial_n_coalitions = 5, - convergence_tolerance = 0.1 + convergence_tol = 0.1 ), - adaptive = TRUE + iterative = TRUE ) # Checking equality in the list with all final and intermediate results - expect_equal(expl_feat$internal$iter_results, expl_group$internal$iter_results) + expect_equal(expl_feat$iter_results, expl_group$iter_results) }) test_that("erroneous input: `min_n_batches`", { @@ -83,7 +83,7 @@ test_that("erroneous input: `min_n_batches`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_non_numeric_1) + extra_computation_args = list(min_n_batches = n_batches_non_numeric_1) ) }, error = TRUE @@ -100,7 +100,7 @@ test_that("erroneous input: `min_n_batches`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_non_numeric_2) + extra_computation_args = list(min_n_batches = n_batches_non_numeric_2) ) }, error = TRUE @@ -117,7 +117,7 @@ test_that("erroneous input: `min_n_batches`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_non_integer) + extra_computation_args = list(min_n_batches = n_batches_non_integer) ) }, error = TRUE @@ -134,7 +134,7 @@ test_that("erroneous input: `min_n_batches`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_too_long) + extra_computation_args = list(min_n_batches = n_batches_too_long) ) }, error = TRUE @@ -151,7 +151,7 @@ test_that("erroneous input: `min_n_batches`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_is_NA) + extra_computation_args = list(min_n_batches = n_batches_is_NA) ) }, error = TRUE @@ -168,7 +168,7 @@ test_that("erroneous input: `min_n_batches`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = n_batches_non_positive) + extra_computation_args = list(min_n_batches = n_batches_non_positive) ) }, error = TRUE @@ -184,7 +184,7 @@ test_that("different n_batches gives same/different shapley values for different x_train = x_train_numeric, approach = "empirical", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = 5, max_batch_size = 10) + extra_computation_args = list(min_n_batches = 5, max_batch_size = 10) ) explain.empirical_n_batches_10 <- explain( @@ -194,7 +194,7 @@ test_that("different n_batches gives same/different shapley values for different x_train = x_train_numeric, approach = "empirical", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = 10, max_batch_size = 10) + extra_computation_args = list(min_n_batches = 10, max_batch_size = 10) ) # Difference in the objects (n_batches and related) @@ -204,8 +204,8 @@ test_that("different n_batches gives same/different shapley values for different )) # Same Shapley values expect_equal( - explain.empirical_n_batches_5$shapley_values, - explain.empirical_n_batches_10$shapley_values + explain.empirical_n_batches_5$shapley_values_est, + explain.empirical_n_batches_10$shapley_values_est ) # approach "ctree" is seed dependent @@ -216,7 +216,7 @@ test_that("different n_batches gives same/different shapley values for different x_train = x_train_numeric, approach = "ctree", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = 5, max_batch_size = 10) + extra_computation_args = list(min_n_batches = 5, max_batch_size = 10) ) explain.ctree_n_batches_10 <- explain( @@ -226,7 +226,7 @@ test_that("different n_batches gives same/different shapley values for different x_train = x_train_numeric, approach = "ctree", prediction_zero = p0, - adaptive_arguments = list(min_n_batches = 10, max_batch_size = 10) + extra_computation_args = list(min_n_batches = 10, max_batch_size = 10) ) # Difference in the objects (n_batches and related) @@ -236,7 +236,7 @@ test_that("different n_batches gives same/different shapley values for different )) # NEITHER same Shapley values expect_false(identical( - explain.ctree_n_batches_5$shapley_values, - explain.ctree_n_batches_10$shapley_values + explain.ctree_n_batches_5$shapley_values_est, + explain.ctree_n_batches_10$shapley_values_est )) }) diff --git a/tests/testthat/test-asymmetric-causal-output.R b/tests/testthat/test-asymmetric-causal-output.R index 8c7148239..9fb4668c8 100644 --- a/tests/testthat/test-asymmetric-causal-output.R +++ b/tests/testthat/test-asymmetric-causal-output.R @@ -37,7 +37,7 @@ test_that("output_asym_cond_reg", { ) }) -test_that("output_asym_cond_reg_adaptive", { +test_that("output_asym_cond_reg_iterative", { expect_snapshot_rds( explain( testing = TRUE, @@ -51,9 +51,9 @@ test_that("output_asym_cond_reg_adaptive", { causal_ordering = list(1:2, 3, 4:5), confounding = NULL, paired_shap_sampling = FALSE, - adaptive = TRUE + iterative = TRUE ), - "output_asym_cond_reg_adaptive" + "output_asym_cond_reg_iterative" ) }) @@ -323,7 +323,7 @@ test_that("output_sym_caus_conf_mix_group", { ) }) -test_that("output_sym_caus_conf_mix_group_adaptive", { +test_that("output_sym_caus_conf_mix_group_iterative", { expect_snapshot_rds( explain( testing = TRUE, @@ -338,9 +338,9 @@ test_that("output_sym_caus_conf_mix_group_adaptive", { group = list("A" = c("Solar.R"), B = c("Wind", "Temp"), C = c("Month", "Day")), n_MC_samples = 5, # Just for speed, verbose = c("convergence"), - adaptive = TRUE + iterative = TRUE ), - "output_sym_caus_conf_mix_group_adaptive" + "output_sym_caus_conf_mix_group_iterative" ) }) @@ -367,7 +367,7 @@ test_that("output_mixed_sym_caus_conf_TRUE", { ) }) -test_that("output_mixed_sym_caus_conf_TRUE_adaptive", { +test_that("output_mixed_sym_caus_conf_TRUE_iterative", { expect_snapshot_rds( explain( testing = TRUE, @@ -380,9 +380,9 @@ test_that("output_mixed_sym_caus_conf_TRUE_adaptive", { causal_ordering = list(1:2, 3, 4:5), confounding = TRUE, n_MC_samples = 5, # Just for speed - adaptive = TRUE + iterative = TRUE ), - "output_mixed_sym_caus_conf_TRUE_adaptive" + "output_mixed_sym_caus_conf_TRUE_iterative" ) }) @@ -439,7 +439,7 @@ test_that("output_mixed_asym_cond_reg", { causal_ordering = list(1:2, 3, 4:5), paired_shap_sampling = FALSE, confounding = NULL, - adaptive = TRUE + iterative = TRUE ), "output_mixed_asym_cond_reg" ) @@ -461,7 +461,7 @@ test_that("output_categorical_asym_causal_mixed_cat", { causal_ordering = list(3:4, 2, 1), confounding = c(TRUE, FALSE, FALSE), n_MC_samples = 5, # Just for speed - keep_samp_for_vS = TRUE + output_args = list(keep_samp_for_vS = TRUE) ), "output_categorical_asym_causal_mixed_cat" ) @@ -482,7 +482,7 @@ test_that("output_cat_asym_causal_mixed_cat_ad", { causal_ordering = list(3:4, 2, 1), confounding = c(TRUE, FALSE, FALSE), n_MC_samples = 5, # Just for speed - adaptive = TRUE + iterative = TRUE ), "output_cat_asym_causal_mixed_cat_ad" ) diff --git a/tests/testthat/test-asymmetric-causal-setup.R b/tests/testthat/test-asymmetric-causal-setup.R index b535b077b..3a17d9d78 100644 --- a/tests/testthat/test-asymmetric-causal-setup.R +++ b/tests/testthat/test-asymmetric-causal-setup.R @@ -14,7 +14,7 @@ test_that("asymmetric erroneous input: `causal_ordering`", { causal_ordering = list(1:6), confounding = NULL, approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -33,7 +33,7 @@ test_that("asymmetric erroneous input: `causal_ordering`", { causal_ordering = list(1:5, 5), confounding = NULL, approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -52,7 +52,7 @@ test_that("asymmetric erroneous input: `causal_ordering`", { causal_ordering = list(2:5, 5), confounding = NULL, approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -71,7 +71,7 @@ test_that("asymmetric erroneous input: `causal_ordering`", { causal_ordering = list(1:2, 4), confounding = NULL, approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -90,7 +90,7 @@ test_that("asymmetric erroneous input: `causal_ordering`", { causal_ordering = list("Solar.R", "Wind", "Temp", "Month", "Day", "Invalid feature name"), confounding = NULL, approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -109,7 +109,7 @@ test_that("asymmetric erroneous input: `causal_ordering`", { causal_ordering = list("Solar.R", "Wind", "Temp", "Month", "Day", "Day"), confounding = NULL, approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -128,7 +128,7 @@ test_that("asymmetric erroneous input: `causal_ordering`", { causal_ordering = list("Solar.R", "Wind", "Temp", "Day", "Day"), confounding = NULL, approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -147,7 +147,7 @@ test_that("asymmetric erroneous input: `causal_ordering`", { causal_ordering = list("Solar.R", "Wind"), confounding = NULL, approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -167,7 +167,7 @@ test_that("asymmetric erroneous input: `causal_ordering`", { confounding = NULL, approach = "gaussian", group = list("A" = c("Solar.R", "Wind"), B = "Temp", C = c("Month", "Day")), - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -187,7 +187,7 @@ test_that("asymmetric erroneous input: `causal_ordering`", { confounding = NULL, approach = "gaussian", group = list("A" = c("Solar.R", "Wind"), B = "Temp", C = c("Month", "Day")), - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -207,7 +207,7 @@ test_that("asymmetric erroneous input: `causal_ordering`", { confounding = NULL, approach = "gaussian", group = list("A" = c("Solar.R", "Wind"), B = "Temp", C = c("Month", "Day")), - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -231,7 +231,7 @@ test_that("asymmetric erroneous input: `approach`", { causal_ordering = list(1:2, 3:4, 5), confounding = TRUE, approach = c("gaussian", "independence", "empirical", "gaussian"), - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -254,7 +254,7 @@ test_that("asymmetric erroneous input: `asymmetric`", { causal_ordering = list(1:2, 3:4, 5), confounding = TRUE, approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -273,7 +273,7 @@ test_that("asymmetric erroneous input: `asymmetric`", { causal_ordering = list(1:2, 3:4, 5), confounding = TRUE, approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -292,7 +292,7 @@ test_that("asymmetric erroneous input: `asymmetric`", { causal_ordering = list(1:2, 3:4, 5), confounding = TRUE, approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -315,8 +315,8 @@ test_that("asymmetric erroneous input: `confounding`", { asymmetric = FALSE, causal_ordering = list(1:2, 3:4, 5), confounding = c("A", "B", "C"), - approach = "gaussian", , - adaptive = FALSE + approach = "gaussian", + iterative = FALSE ) }, error = TRUE @@ -335,7 +335,7 @@ test_that("asymmetric erroneous input: `confounding`", { causal_ordering = list(1:2, 3:4, 5), confounding = c(TRUE, FALSE), approach = "gaussian", - adaptive = FALSE + iterative = FALSE ) }, error = TRUE diff --git a/tests/testthat/test-forecast-output.R b/tests/testthat/test-forecast-output.R index 876d09615..1f90c9b3c 100644 --- a/tests/testthat/test-forecast-output.R +++ b/tests/testthat/test-forecast-output.R @@ -33,13 +33,13 @@ test_that("forecast_output_arima_numeric", { prediction_zero = p0_ar, group_lags = FALSE, max_n_coalitions = 150, - adaptive = FALSE + iterative = FALSE ), "forecast_output_arima_numeric" ) }) -test_that("forecast_output_arima_numeric_adaptive", { +test_that("forecast_output_arima_numeric_iterative", { expect_snapshot_rds( explain_forecast( testing = TRUE, @@ -55,14 +55,14 @@ test_that("forecast_output_arima_numeric_adaptive", { prediction_zero = p0_ar, group_lags = FALSE, max_n_coalitions = 150, - adaptive = TRUE, - adaptive_arguments = list(initial_n_coalitions = 10) + iterative = TRUE, + iterative_args = list(initial_n_coalitions = 10) ), - "forecast_output_arima_numeric_adaptive" + "forecast_output_arima_numeric_iterative" ) }) -test_that("forecast_output_arima_numeric_adaptive_groups", { +test_that("forecast_output_arima_numeric_iterative_groups", { expect_snapshot_rds( explain_forecast( testing = TRUE, @@ -78,10 +78,10 @@ test_that("forecast_output_arima_numeric_adaptive_groups", { prediction_zero = p0_ar, group_lags = TRUE, max_n_coalitions = 150, - adaptive = TRUE, - adaptive_arguments = list(initial_n_coalitions = 10, convergence_tolerance = 7e-3) + iterative = TRUE, + iterative_args = list(initial_n_coalitions = 10, convergence_tol = 7e-3) ), - "forecast_output_arima_numeric_adaptive_groups" + "forecast_output_arima_numeric_iterative_groups" ) }) @@ -163,7 +163,7 @@ test_that("ARIMA gives the same output with different horizons", { group_lags = FALSE, n_batches = 1, max_n_coalitions = 200, - adaptive = FALSE + iterative = FALSE ) @@ -182,7 +182,7 @@ test_that("ARIMA gives the same output with different horizons", { group_lags = FALSE, n_batches = 1, max_n_coalitions = 100, - adaptive = FALSE + iterative = FALSE ) h1 <- explain_forecast( @@ -200,24 +200,24 @@ test_that("ARIMA gives the same output with different horizons", { group_lags = FALSE, n_batches = 1, max_n_coalitions = 50, - adaptive = FALSE + iterative = FALSE ) cols_horizon1 <- h2$internal$objects$cols_per_horizon[[1]] expect_equal( - h2$shapley_values[horizon == 1, ..cols_horizon1], - h1$shapley_values[horizon == 1, ..cols_horizon1] + h2$shapley_values_est[horizon == 1, ..cols_horizon1], + h1$shapley_values_est[horizon == 1, ..cols_horizon1] ) expect_equal( - h3$shapley_values[horizon == 1, ..cols_horizon1], - h1$shapley_values[horizon == 1, ..cols_horizon1] + h3$shapley_values_est[horizon == 1, ..cols_horizon1], + h1$shapley_values_est[horizon == 1, ..cols_horizon1] ) cols_horizon2 <- h2$internal$objects$cols_per_horizon[[2]] expect_equal( - h3$shapley_values[horizon == 2, ..cols_horizon2], - h2$shapley_values[horizon == 2, ..cols_horizon2] + h3$shapley_values_est[horizon == 2, ..cols_horizon2], + h2$shapley_values_est[horizon == 2, ..cols_horizon2] ) }) @@ -237,7 +237,7 @@ test_that("ARIMA gives the same output with different horizons with grouping", { group_lags = TRUE, n_batches = 1, max_n_coalitions = 50, - adaptive = FALSE + iterative = FALSE ) @@ -256,7 +256,7 @@ test_that("ARIMA gives the same output with different horizons with grouping", { group_lags = TRUE, n_batches = 1, max_n_coalitions = 50, - adaptive = FALSE + iterative = FALSE ) h1 <- explain_forecast( @@ -274,21 +274,21 @@ test_that("ARIMA gives the same output with different horizons with grouping", { group_lags = TRUE, n_batches = 1, max_n_coalitions = 50, - adaptive = FALSE + iterative = FALSE ) expect_equal( - h2$shapley_values[horizon == 1], - h1$shapley_values[horizon == 1] + h2$shapley_values_est[horizon == 1], + h1$shapley_values_est[horizon == 1] ) expect_equal( - h3$shapley_values[horizon == 1], - h1$shapley_values[horizon == 1] + h3$shapley_values_est[horizon == 1], + h1$shapley_values_est[horizon == 1] ) expect_equal( - h3$shapley_values[horizon == 2], - h2$shapley_values[horizon == 2] + h3$shapley_values_est[horizon == 2], + h2$shapley_values_est[horizon == 2] ) }) diff --git a/tests/testthat/test-forecast-setup.R b/tests/testthat/test-forecast-setup.R index a20c55f2c..00da0fe0a 100644 --- a/tests/testthat/test-forecast-setup.R +++ b/tests/testthat/test-forecast-setup.R @@ -151,30 +151,33 @@ test_that("erroneous input: `prediction_zero`", { test_that("erroneous input: `max_n_coalitions`", { set.seed(123) - expect_snapshot({ - # Too low max_n_coalitions (smaller than # features) - horizon <- 3 - explain_y_lags <- 2 - explain_xreg_lags <- 2 + expect_snapshot( + { + # Too low max_n_coalitions (smaller than # features) + horizon <- 3 + explain_y_lags <- 2 + explain_xreg_lags <- 2 - n_coalitions <- horizon + explain_y_lags + explain_xreg_lags - 1 + n_coalitions <- horizon + explain_y_lags + explain_xreg_lags - 1 - explain_forecast( - testing = TRUE, - model = model_arima_temp, - y = data_arima[1:150, "Temp"], - xreg = data_arima[, "Wind"], - train_idx = 2:148, - explain_idx = 149:150, - explain_y_lags = explain_y_lags, - explain_xreg_lags = explain_xreg_lags, - horizon = horizon, - approach = "independence", - prediction_zero = p0_ar, - max_n_coalitions = n_coalitions, - group_lags = FALSE - ) - }) + explain_forecast( + testing = TRUE, + model = model_arima_temp, + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], + train_idx = 2:148, + explain_idx = 149:150, + explain_y_lags = explain_y_lags, + explain_xreg_lags = explain_xreg_lags, + horizon = horizon, + approach = "independence", + prediction_zero = p0_ar, + max_n_coalitions = n_coalitions, + group_lags = FALSE + ) + }, + error = TRUE + ) expect_snapshot({ diff --git a/tests/testthat/test-regression-output.R b/tests/testthat/test-regression-output.R index d2418abb0..d730fdb60 100644 --- a/tests/testthat/test-regression-output.R +++ b/tests/testthat/test-regression-output.R @@ -1,5 +1,5 @@ # Separate regression ================================================================================================== -test_that("output_lm_numeric_lm_separate_adaptive", { +test_that("output_lm_numeric_lm_separate_iterative", { expect_snapshot_rds( explain( testing = TRUE, @@ -9,9 +9,9 @@ test_that("output_lm_numeric_lm_separate_adaptive", { approach = "regression_separate", prediction_zero = p0, regression.model = parsnip::linear_reg(), - adaptive = TRUE + iterative = TRUE ), - "output_lm_numeric_lm_separate_adaptive" + "output_lm_numeric_lm_separate_iterative" ) }) @@ -26,7 +26,7 @@ test_that("output_lm_numeric_lm_separate", { approach = "regression_separate", prediction_zero = p0, regression.model = parsnip::linear_reg(), - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_lm_separate" ) @@ -43,7 +43,7 @@ test_that("output_lm_numeric_lm_separate_n_comb", { prediction_zero = p0, max_n_coalitions = 10, regression.model = parsnip::linear_reg(), - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_lm_separate_n_comb" ) @@ -59,7 +59,7 @@ test_that("output_lm_categorical_lm_separate", { approach = "regression_separate", prediction_zero = p0, regression.model = parsnip::linear_reg(), - adaptive = FALSE + iterative = FALSE ), "output_lm_categorical_lm_separate" ) @@ -75,7 +75,7 @@ test_that("output_lm_mixed_lm_separate", { approach = "regression_separate", prediction_zero = p0, regression.model = parsnip::linear_reg(), - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_lm_separate" ) @@ -94,7 +94,7 @@ test_that("output_lm_mixed_splines_separate", { regression.recipe_func = function(regression.recipe) { recipes::step_ns(regression.recipe, recipes::all_numeric_predictors(), deg_free = 2) }, - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_splines_separate" ) @@ -112,7 +112,7 @@ test_that("output_lm_mixed_decision_tree_cv_separate", { regression.model = parsnip::decision_tree(tree_depth = hardhat::tune(), engine = "rpart", mode = "regression"), regression.tune_values = data.frame(tree_depth = c(1, 2)), regression.vfold_cv_para = list(v = 2), - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_decision_tree_cv_separate" ) @@ -131,7 +131,7 @@ test_that("output_lm_mixed_decision_tree_cv_separate_parallel", { regression.model = parsnip::decision_tree(tree_depth = hardhat::tune(), engine = "rpart", mode = "regression"), regression.tune_values = data.frame(tree_depth = c(1, 2)), regression.vfold_cv_para = list(v = 2), - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_decision_tree_cv_separate_parallel" ) @@ -151,14 +151,14 @@ test_that("output_lm_mixed_xgboost_separate", { regression.recipe_func = function(regression.recipe) { return(recipes::step_dummy(regression.recipe, recipes::all_factor_predictors())) }, - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_xgboost_separate" ) }) # Surrogate regression ================================================================================================= -test_that("output_lm_numeric_lm_surrogate_adaptive", { +test_that("output_lm_numeric_lm_surrogate_iterative", { expect_snapshot_rds( explain( testing = TRUE, @@ -168,9 +168,9 @@ test_that("output_lm_numeric_lm_surrogate_adaptive", { approach = "regression_surrogate", prediction_zero = p0, regression.model = parsnip::linear_reg(), - adaptive = TRUE + iterative = TRUE ), - "output_lm_numeric_lm_surrogate_adaptive" + "output_lm_numeric_lm_surrogate_iterative" ) }) @@ -185,7 +185,7 @@ test_that("output_lm_numeric_lm_surrogate", { approach = "regression_surrogate", prediction_zero = p0, regression.model = parsnip::linear_reg(), - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_lm_surrogate" ) @@ -202,7 +202,7 @@ test_that("output_lm_numeric_lm_surrogate_n_comb", { prediction_zero = p0, max_n_coalitions = 10, regression.model = parsnip::linear_reg(), - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_lm_surrogate_n_comb" ) @@ -220,7 +220,7 @@ test_that("output_lm_numeric_lm_surrogate_reg_surr_n_comb", { max_n_coalitions = 10, regression.model = parsnip::linear_reg(), regression.surrogate_n_comb = 8, - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_lm_surrogate_reg_surr_n_comb" ) @@ -236,7 +236,7 @@ test_that("output_lm_categorical_lm_surrogate", { approach = "regression_surrogate", prediction_zero = p0, regression.model = parsnip::linear_reg(), - adaptive = FALSE + iterative = FALSE ), "output_lm_categorical_lm_surrogate" ) @@ -252,7 +252,7 @@ test_that("output_lm_mixed_lm_surrogate", { approach = "regression_surrogate", prediction_zero = p0, regression.model = parsnip::linear_reg(), - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_lm_surrogate" ) @@ -270,7 +270,7 @@ test_that("output_lm_mixed_decision_tree_cv_surrogate", { regression.model = parsnip::decision_tree(tree_depth = hardhat::tune(), engine = "rpart", mode = "regression"), regression.tune_values = data.frame(tree_depth = c(1, 2)), regression.vfold_cv_para = list(v = 2), - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_decision_tree_cv_surrogate" ) @@ -289,7 +289,7 @@ test_that("output_lm_mixed_xgboost_surrogate", { regression.recipe_func = function(regression.recipe) { recipes::step_dummy(regression.recipe, recipes::all_factor_predictors()) }, - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_xgboost_surrogate" ) diff --git a/tests/testthat/test-regression-setup.R b/tests/testthat/test-regression-setup.R index 03feedd04..9a8998ae0 100644 --- a/tests/testthat/test-regression-setup.R +++ b/tests/testthat/test-regression-setup.R @@ -11,7 +11,7 @@ test_that("regression erroneous input: `approach`", { x_train = x_train_numeric, prediction_zero = p0, approach = c("regression_surrogate", "gaussian", "independence", "empirical"), - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -27,7 +27,7 @@ test_that("regression erroneous input: `approach`", { x_train = x_train_numeric, prediction_zero = p0, approach = c("regression_separate", "gaussian", "independence", "empirical"), - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -147,7 +147,7 @@ test_that("regression erroneous input: `regression.model`", { prediction_zero = p0, approach = "regression_surrogate", regression.tune_values = data.frame(tree_depth = c(1, 2, 3)), - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -301,7 +301,7 @@ test_that("regression erroneous input: `regression.recipe_func`", { regression.recipe_func = function(x) { return(2) }, - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -322,7 +322,7 @@ test_that("regression erroneous input: `regression.surrogate_n_comb`", { prediction_zero = p0, approach = "regression_surrogate", regression.surrogate_n_comb = 2^ncol(x_explain_numeric) - 1, - adaptive = FALSE + iterative = FALSE ) }, error = TRUE @@ -339,7 +339,7 @@ test_that("regression erroneous input: `regression.surrogate_n_comb`", { prediction_zero = p0, approach = "regression_surrogate", regression.surrogate_n_comb = 0, - adaptive = FALSE + iterative = FALSE ) }, error = TRUE diff --git a/tests/testthat/test-regular-output.R b/tests/testthat/test-regular-output.R index a5bfc02d6..d9c5cba9f 100644 --- a/tests/testthat/test-regular-output.R +++ b/tests/testthat/test-regular-output.R @@ -9,7 +9,7 @@ test_that("output_lm_numeric_independence", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_independence" ) @@ -24,8 +24,8 @@ test_that("output_lm_numeric_independence_MSEv_Shapley_weights", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - MSEv_uniform_comb_weights = FALSE, - adaptive = FALSE + output_args = list(MSEv_uniform_comb_weights = FALSE), + iterative = FALSE ), "output_lm_numeric_independence_MSEv_Shapley_weights" ) @@ -40,7 +40,7 @@ test_that("output_lm_numeric_empirical", { x_train = x_train_numeric, approach = "empirical", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_empirical" ) @@ -56,7 +56,7 @@ test_that("output_lm_numeric_empirical_n_coalitions", { approach = "empirical", prediction_zero = p0, max_n_coalitions = 20, - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_empirical_n_coalitions" ) @@ -73,7 +73,7 @@ test_that("output_lm_numeric_empirical_independence", { approach = "empirical", prediction_zero = p0, empirical.type = "independence", - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_empirical_independence" ) @@ -91,7 +91,7 @@ test_that("output_lm_numeric_empirical_AICc_each", { prediction_zero = p0, max_n_coalitions = 8, empirical.type = "AICc_each_k", - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_empirical_AICc_each" ) @@ -109,7 +109,7 @@ test_that("output_lm_numeric_empirical_AICc_full", { prediction_zero = p0, max_n_coalitions = 8, empirical.type = "AICc_full", - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_empirical_AICc_full" ) @@ -124,7 +124,7 @@ test_that("output_lm_numeric_gaussian", { x_train = x_train_numeric, approach = "gaussian", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_gaussian" ) @@ -139,7 +139,7 @@ test_that("output_lm_numeric_copula", { x_train = x_train_numeric, approach = "copula", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_copula" ) @@ -154,7 +154,7 @@ test_that("output_lm_numeric_ctree", { x_train = x_train_numeric, approach = "ctree", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_ctree" ) @@ -177,7 +177,7 @@ test_that("output_lm_numeric_vaeac", { vaeac.epochs_initiation_phase = 2, # Low value here to speed up the time vaeac.save_model = FALSE # Removes names and objects such as tmpdir and tmpfile ), - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_vaeac" ) @@ -192,7 +192,7 @@ test_that("output_lm_categorical_ctree", { x_train = x_train_categorical, approach = "ctree", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_categorical_ctree" ) @@ -215,7 +215,7 @@ test_that("output_lm_categorical_vaeac", { vaeac.epochs_initiation_phase = 2, # Low value here to speed up the time vaeac.save_model = FALSE # Removes tmpdir and tmpfiles ), - adaptive = FALSE + iterative = FALSE ), "output_lm_categorical_vaeac" ) @@ -230,7 +230,7 @@ test_that("output_lm_categorical_categorical", { x_train = x_train_categorical, approach = "categorical", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_categorical_method" ) @@ -245,7 +245,7 @@ test_that("output_lm_categorical_independence", { x_train = x_train_categorical, approach = "independence", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_categorical_independence" ) @@ -261,7 +261,7 @@ test_that("output_lm_ts_timeseries", { approach = "timeseries", prediction_zero = p0_ts, group = group_ts, - adaptive = FALSE + iterative = FALSE ), "output_lm_timeseries_method" ) @@ -276,7 +276,7 @@ test_that("output_lm_numeric_comb1", { x_train = x_train_numeric, approach = c("gaussian", "empirical", "ctree", "independence"), prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_comb1" ) @@ -291,7 +291,7 @@ test_that("output_lm_numeric_comb2", { x_train = x_train_numeric, approach = c("ctree", "copula", "independence", "copula"), prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_comb2" ) @@ -306,7 +306,7 @@ test_that("output_lm_numeric_comb3", { x_train = x_train_numeric, approach = c("independence", "empirical", "gaussian", "empirical"), prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_comb3" ) @@ -324,7 +324,7 @@ test_that("output_lm_mixed_independence", { x_train = x_train_mixed, approach = "independence", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_independence" ) @@ -339,7 +339,7 @@ test_that("output_lm_mixed_ctree", { x_train = x_train_mixed, approach = "ctree", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_ctree" ) @@ -362,7 +362,7 @@ test_that("output_lm_mixed_vaeac", { vaeac.epochs_initiation_phase = 2, # Low value here to speed up the time vaeac.save_model = FALSE # Removes tmpdir and tmpfiles ), - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_vaeac" ) @@ -378,7 +378,7 @@ test_that("output_lm_mixed_comb", { x_train = x_train_mixed, approach = c("ctree", "independence", "ctree", "independence"), prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_mixed_comb" ) @@ -406,7 +406,7 @@ test_that("output_custom_lm_numeric_independence_1", { approach = "independence", prediction_zero = p0, predict_model = custom_pred_func, - adaptive = FALSE + iterative = FALSE ), "output_custom_lm_numeric_independence_1" ) @@ -433,7 +433,7 @@ test_that("output_custom_lm_numeric_independence_2", { approach = "independence", prediction_zero = p0, predict_model = custom_pred_func, - adaptive = FALSE + iterative = FALSE )), "output_custom_lm_numeric_independence_2" ) @@ -445,13 +445,13 @@ test_that("output_custom_lm_numeric_independence_2", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) # Check that the printed Shapley values are identical expect_equal( - custom$shapley_values, - native$shapley_values + custom$shapley_values_est, + native$shapley_values_est ) }) @@ -497,7 +497,7 @@ test_that("output_custom_xgboost_mixed_dummy_ctree", { prediction_zero = p0, predict_model = predict_model.xgboost_dummy, get_model_specs = NA, - adaptive = FALSE + iterative = FALSE ) # custom$internal$objects$predict_model <- "Del on purpose" # Avoids issues with xgboost package updates custom @@ -518,7 +518,7 @@ test_that("output_lm_numeric_interaction", { x_train = x_train_interaction, approach = "independence", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ), "output_lm_numeric_interaction" ) @@ -535,7 +535,7 @@ test_that("output_lm_numeric_ctree_parallelized", { x_train = x_train_numeric, approach = "ctree", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) }, "output_lm_numeric_ctree_parallelized" @@ -557,7 +557,7 @@ test_that("output_lm_numeric_empirical_progress", { x_train = x_train_numeric, approach = "empirical", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) }) }, @@ -566,7 +566,7 @@ test_that("output_lm_numeric_empirical_progress", { }) -# Just checking that internal$output$dt_samp_for_vS keep_samp_for_vS +# Just checking that internal$output$dt_samp_for_vS works test_that("output_lm_numeric_independence_keep_samp_for_vS", { expect_snapshot_rds( (out <- explain( @@ -576,8 +576,8 @@ test_that("output_lm_numeric_independence_keep_samp_for_vS", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - keep_samp_for_vS = TRUE, - adaptive = FALSE + output_args = list(keep_samp_for_vS = TRUE), + iterative = FALSE )), "output_lm_numeric_independence_keep_samp_for_vS" ) diff --git a/tests/testthat/test-regular-setup.R b/tests/testthat/test-regular-setup.R index 50efaa18a..a42314511 100644 --- a/tests/testthat/test-regular-setup.R +++ b/tests/testthat/test-regular-setup.R @@ -782,7 +782,7 @@ test_that("erroneous input: `keep_samp_for_vS`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - keep_samp_for_vS = keep_samp_for_vS_non_logical_1 + output_args = list(keep_samp_for_vS = keep_samp_for_vS_non_logical_1) ) }, error = TRUE @@ -799,7 +799,7 @@ test_that("erroneous input: `keep_samp_for_vS`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - keep_samp_for_vS = keep_samp_for_vS_non_logical_2 + output_args = list(keep_samp_for_vS = keep_samp_for_vS_non_logical_2) ) }, error = TRUE @@ -816,7 +816,7 @@ test_that("erroneous input: `keep_samp_for_vS`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - keep_samp_for_vS = keep_samp_for_vS_too_long + output_args = list(keep_samp_for_vS = keep_samp_for_vS_too_long) ) }, error = TRUE @@ -837,7 +837,7 @@ test_that("erroneous input: `MSEv_uniform_comb_weights`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_nl_1 + output_args = list(MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_nl_1) ) }, error = TRUE @@ -854,7 +854,7 @@ test_that("erroneous input: `MSEv_uniform_comb_weights`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_nl_2 + output_args = list(MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_nl_2) ) }, error = TRUE @@ -871,7 +871,7 @@ test_that("erroneous input: `MSEv_uniform_comb_weights`", { x_train = x_train_numeric, approach = "independence", prediction_zero = p0, - MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_long + output_args = list(MSEv_uniform_comb_weights = MSEv_uniform_comb_weights_long) ) }, error = TRUE @@ -1211,7 +1211,7 @@ test_that("Shapr with `max_n_coalitions` >= 2^m uses exact Shapley kernel weight n_MC_samples = 2, # Low value for fast computations seed = 123, max_n_coalitions = NULL, - adaptive = FALSE + iterative = FALSE ) ) @@ -1225,9 +1225,9 @@ test_that("Shapr with `max_n_coalitions` >= 2^m uses exact Shapley kernel weight prediction_zero = p0, n_MC_samples = 2, # Low value for fast computations seed = 123, - adaptive_arguments = list(compute_sd = FALSE), + extra_computation_args = list(compute_sd = FALSE), max_n_coalitions = 2^ncol(x_explain_numeric), - adaptive = FALSE + iterative = FALSE ) ) @@ -1243,9 +1243,9 @@ test_that("Shapr with `max_n_coalitions` >= 2^m uses exact Shapley kernel weight prediction_zero = p0, n_MC_samples = 2, # Low value for fast computations seed = 123, - adaptive_arguments = list(compute_sd = FALSE), + extra_computation_args = list(compute_sd = FALSE), max_n_coalitions = 2^ncol(x_explain_numeric) + 1, - adaptive = FALSE + iterative = FALSE ) ) @@ -1303,7 +1303,7 @@ test_that("data feature ordering is output_lm_numeric_column_order", { prediction_zero = p0 ) - explain.new_data_feature_order <- explain( + ex.new_data_feature_order <- explain( testing = TRUE, model = model_lm_numeric, x_explain = rev(x_explain_numeric), @@ -1323,12 +1323,12 @@ test_that("data feature ordering is output_lm_numeric_column_order", { # Same Shapley values, but different order expect_false(identical( - explain.original$shapley_values, - explain.new_data_feature_order$shapley_values + explain.original$shapley_values_est, + ex.new_data_feature_order$shapley_values_est )) expect_equal( - explain.original$shapley_values[, mget(sort(names(explain.original$shapley_values)))], - explain.new_data_feature_order$shapley_values[, mget(sort(names(explain.new_data_feature_order$shapley_values)))] + explain.original$shapley_values_est[, mget(sort(names(explain.original$shapley_values_est)))], + ex.new_data_feature_order$shapley_values_est[, mget(sort(names(ex.new_data_feature_order$shapley_values_est)))] ) # Same Shapley values in same order @@ -1558,7 +1558,7 @@ test_that("vaeac_set_seed_works", { vaeac.extra_parameters = list( vaeac.epochs_initiation_phase = 2 ), - adaptive = FALSE + iterative = FALSE ) explanation_vaeac_2 <- explain( @@ -1575,11 +1575,11 @@ test_that("vaeac_set_seed_works", { vaeac.extra_parameters = list( vaeac.epochs_initiation_phase = 2 ), - adaptive = FALSE + iterative = FALSE ) # Check for equal Shapley values - expect_equal(explanation_vaeac_1$shapley_values, explanation_vaeac_2$shapley_values) + expect_equal(explanation_vaeac_1$shapley_values_est, explanation_vaeac_2$shapley_values_est) }) test_that("vaeac_pretreained_vaeac_model", { @@ -1600,7 +1600,7 @@ test_that("vaeac_pretreained_vaeac_model", { vaeac.extra_parameters = list( vaeac.epochs_initiation_phase = 2 ), - adaptive = FALSE + iterative = FALSE ) #### We can do this by reusing the vaeac model OBJECT @@ -1620,11 +1620,11 @@ test_that("vaeac_pretreained_vaeac_model", { vaeac.extra_parameters = list( vaeac.pretrained_vaeac_model = vaeac.pretrained_vaeac_model ), - adaptive = FALSE + iterative = FALSE ) # Check for equal Shapley values - expect_equal(explanation_vaeac_1$shapley_values, explanation_pretrained_vaeac$shapley_values) + expect_equal(explanation_vaeac_1$shapley_values_est, explanation_pretrained_vaeac$shapley_values_est) #### We can also do this by reusing the vaeac model PATH # Get the pre-trained vaeac model path @@ -1643,11 +1643,11 @@ test_that("vaeac_pretreained_vaeac_model", { vaeac.extra_parameters = list( vaeac.pretrained_vaeac_model = vaeac.pretrained_vaeac_path ), - adaptive = FALSE + iterative = FALSE ) # Check for equal Shapley values - expect_equal(explanation_vaeac_1$shapley_values, explanation_pretrained_vaeac$shapley_values) + expect_equal(explanation_vaeac_1$shapley_values_est, explanation_pretrained_vaeac$shapley_values_est) }) @@ -1682,5 +1682,5 @@ test_that("feature wise and groupwise computations are identical", { # Checking equality in the list with all final and intermediate results - expect_equal(expl_feat$shapley_values, expl_group$shapley_values) + expect_equal(expl_feat$shapley_values_est, expl_group$shapley_values_est) }) diff --git a/vignettes/understanding_shapr.Rmd b/vignettes/understanding_shapr.Rmd index c66b7e55e..f556d6298 100644 --- a/vignettes/understanding_shapr.Rmd +++ b/vignettes/understanding_shapr.Rmd @@ -24,7 +24,7 @@ editor_options: > [Estimation approaches and plotting functionality](#ex) -> [Adaptive estimation](#adaptive) +> [iterative estimation](#iterative) > [Parallelization](#para) @@ -84,10 +84,10 @@ on the process of estimating the `v(S)`'s (and training the `"vaeac"` model) is `progressr` package, supporting progress updates also for parallelized computation. See [Verbosity and progress updates](#verbose) for details. -Moreover, the default behavior is to estimate the Shapley values adaptively/iteratively, with increasing number of +Moreover, the default behavior is to estimate the Shapley values iteratively/iteratively, with increasing number of feature coalitions being added, and to stop estimation as the estimated Shapley values has achieved a certain level of stability. -More information about this is provided in [Adaptive estimation](#adaptive) +More information about this is provided in [iterative estimation](#iterative) The above, combined with batch computation of the `v(S)` values, enables fast and accurate estimation of the Shapley values in a memory friendly manner. @@ -102,18 +102,18 @@ Below we provide brief descriptions of the most important parts of the default b By default `explain` always compute feature-wise Shapley values. Groups of features can be explained by providing the feature groups through the `group` argument. -When there are five or less features (or feature groups), adaptive estimation is by default disabled. +When there are five or less features (or feature groups), iterative estimation is by default disabled. The reason for this is that it is usually faster to estimate the Shapley values for all possible coalitions (`v(S)`), than to estimate the uncertainty of the Shapley values, and potentially stop estimation earlier. -While adaptive estimation is the default starting from six features, it is mainly when there are more than ten features, +While iterative estimation is the default starting from six features, it is mainly when there are more than ten features, that it is most beneficial, and can save a lot of computation time. The reason for this is that the number of possible coalitions grows exponentially. -These defaults can be overridden by setting the `adaptive` argument to `TRUE` or `FALSE`. -When using the `adaptive` argument, the estimation for an observation is stopped when all Shapley value +These defaults can be overridden by setting the `iterative` argument to `TRUE` or `FALSE`. +When using the `iterative` argument, the estimation for an observation is stopped when all Shapley value standard deviations are below `t` times the range of the Shapley values. -The `t` value controls the convergence tolerance, defaults to 0.02, and can be set through the `adaptive_arguments$convergence_tolerance` argument, see [Adaptive estimation](#adaptive) for more details. +The `t` value controls the convergence tolerance, defaults to 0.02, and can be set through the `iterative_args$convergence_tol` argument, see [iterative estimation](#iterative) for more details. -Since the adaptiveness default changes based on the number of features (or feature groups), the default is also to have +Since the iterativeness default changes based on the number of features (or feature groups), the default is also to have no upper bound on the number of coalitions considered. This can be controlled through the `max_n_coalitions` argument. @@ -472,7 +472,7 @@ explanation <- explain( x_train = x_train, approach = "empirical", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) #> Note: Feature classes extracted from the model contains NA. #> Assuming feature classes from the data are correct. @@ -483,7 +483,7 @@ explanation <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:03 ─────────────────────────────────────────────── #> • Model class: #> • Approach: empirical -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533bee009e.rds' @@ -494,7 +494,7 @@ explanation <- explain( # Printing the Shapley values for the test data. # For more information about the interpretation of the values in the table, see ?shapr::explain. -print(explanation$shapley_values) +print(explanation$shapley_values_est) #> explain_id none Solar.R Wind Temp Month #> #> 1: 1 43.086 13.21173 4.7856 -25.572 -5.5992 @@ -542,7 +542,7 @@ explanation_plot <- explain( x_train = x_train, approach = "empirical", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) #> Note: Feature classes extracted from the model contains NA. #> Assuming feature classes from the data are correct. @@ -553,7 +553,7 @@ explanation_plot <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:06 ─────────────────────────────────────────────── #> • Model class: #> • Approach: empirical -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 111 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533b63ff74.rds' @@ -606,7 +606,7 @@ explanation_lm_cat <- explain( x_train = x_train_cat, approach = "ctree", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) #> Success with message: #> max_n_coalitions is NULL or larger than or 2^n_features = 16, @@ -615,7 +615,7 @@ explanation_lm_cat <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:15 ─────────────────────────────────────────────── #> • Model class: #> • Approach: ctree -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553557b5c09.rds' @@ -649,7 +649,7 @@ explanation_ctree <- explain( ctree.mincriterion = 0.80, ctree.minsplit = 20, ctree.minbucket = 20, - adaptive = FALSE + iterative = FALSE ) #> Success with message: #> max_n_coalitions is NULL or larger than or 2^n_features = 16, @@ -658,7 +658,7 @@ explanation_ctree <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:16 ─────────────────────────────────────────────── #> • Model class: #> • Approach: ctree -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55360adc960.rds' @@ -703,7 +703,7 @@ explanation_cat_method <- explain( x_train = x_train_all_cat, approach = "categorical", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) #> Success with message: #> max_n_coalitions is NULL or larger than or 2^n_features = 16, @@ -712,7 +712,7 @@ explanation_cat_method <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:17 ─────────────────────────────────────────────── #> • Model class: #> • Approach: categorical -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5536b89b7a3.rds' @@ -781,7 +781,7 @@ explanation_timeseries <- explain( approach = "timeseries", prediction_zero = p0_ts, group = group_ts, - adaptive = FALSE + iterative = FALSE ) #> Success with message: #> max_n_coalitions is NULL or larger than or 2^n_groups = 16, @@ -790,7 +790,7 @@ explanation_timeseries <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:17 ─────────────────────────────────────────────── #> • Model class: #> • Approach: timeseries -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of group-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5536329ede9.rds' @@ -918,7 +918,7 @@ explanation_independence <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:19 ─────────────────────────────────────────────── #> • Model class: #> • Approach: independence -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 25 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553367d7353.rds' @@ -946,7 +946,7 @@ explanation_empirical <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:20 ─────────────────────────────────────────────── #> • Model class: #> • Approach: empirical -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 25 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553439c5c43.rds' @@ -974,7 +974,7 @@ explanation_gaussian_1e1 <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:24 ─────────────────────────────────────────────── #> • Model class: #> • Approach: gaussian -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 25 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5532b2f38e4.rds' @@ -1002,7 +1002,7 @@ explanation_gaussian_1e2 <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:24 ─────────────────────────────────────────────── #> • Model class: #> • Approach: gaussian -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 25 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5535c3c085.rds' @@ -1030,7 +1030,7 @@ explanation_combined <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:25 ─────────────────────────────────────────────── #> • Model class: #> • Approach: gaussian, empirical, and independence -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 25 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5532d65f8f0.rds' @@ -1150,20 +1150,20 @@ plot_MSEv_eval_crit(explanation_list_named) + ![](figure_main/unnamed-chunk-14-1.png) - + -# Adaptive estimation +# iterative estimation -Adaptive estimation is the default when computing Shapley values with six or more features (or feature groups), and -can always be manually overridden by setting `adaptive = FALSE` in the `explain()` function. -The idea behind adaptive estimation is to estimate sufficiently accurate Shapley value estimates faster. +iterative estimation is the default when computing Shapley values with six or more features (or feature groups), and +can always be manually overridden by setting `iterative = FALSE` in the `explain()` function. +The idea behind iterative estimation is to estimate sufficiently accurate Shapley value estimates faster. First, an initial number of coalitions is sampled, then, bootsrapping is used to estimate the variance of the Shapley values. A convergence criterion is used to determine if the variances of the Shapley values are sufficently small. If the variances are too high, we estimate the number of required samples to reach convergence, and thereby add more coalitions. The process is repeated until the variances are below the threshold. -Specifics related to the adaptive process and convergence criterion are set through `adaptive_arguments` argument. +Specifics related to the iterative process and convergence criterion are set through `iterative_args` argument. The convergence criterion we use is adopted from @covert2021improving, and slightly modified to work for multiple observations @@ -1172,7 +1172,7 @@ observations where $\hat{\phi}_{ij}$ is the Shapley value of feature $j$ for observation $i$, and $\text{sd}(\phi_{ij})$ is the its (bootstrap) estimated standard deviation. The default value of $t$ is 0.02. -Below we provide some examples of how to use the adaptive estimation procedure +Below we provide some examples of how to use the iterative estimation procedure @@ -1214,8 +1214,8 @@ ex <- explain( x_train = x_train, approach = "gaussian", prediction_zero = p0, - adaptive = TRUE, - adaptive_arguments = list(convergence_tolerance = 0.1) + iterative = TRUE, + iterative_args = list(convergence_tol = 0.1) ) #> Note: Feature classes extracted from the model contains NA. #> Assuming feature classes from the data are correct. @@ -1226,12 +1226,12 @@ ex <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:28 ─────────────────────────────────────────────── #> • Model class: #> • Approach: gaussian -#> • Adaptive estimation: TRUE +#> • iterative estimation: TRUE #> • Number of feature-wise Shapley values: 5 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553117f9996.rds' #> -#> ── Adaptive computation started ── +#> ── iterative computation started ── #> #> ── Iteration 1 ────────────────────────────────────────────────────────────────────────────────────── #> ℹ Using 5 of 32 coalitions, 5 new. @@ -1267,8 +1267,8 @@ computations (the computation of v(S)) in sequential batches with different feature subsets $S$, the memory usage can be significantly reduces. The user can control the number of batches by setting the two arguments -`adaptive_arguments$max_batch_size` (defaults to 10) and -`adaptive_arguments$min_n_batches` (defaults to 10). +`extra_computation_args$max_batch_size` (defaults to 10) and +`extra_computation_args$min_n_batches` (defaults to 10). ## Parallelized computation @@ -1310,7 +1310,7 @@ explanation_par <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:31 ─────────────────────────────────────────────── #> • Model class: #> • Approach: empirical -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 5 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5535eb6e09a.rds' @@ -1331,7 +1331,7 @@ and allows one or more of the strings `"basic"`, `"progress"`, `"convergence"`, `"basic"` (default) displays basic information about the computation which is being performed, `"progress` displays information about where in the calculation process the function currently is, `"convergence"` displays information on how close to convergence the Shapley value estimates are -(for adaptive estimation), +(for iterative estimation), `"shapley"` displays (intermediate) Shapley value estimates and standard deviations + the final estimates, while `"vS_details"` displays information about the `v(S)` estimates for some of the approaches. If the user wants no printout, the argument can be set to `NULL`. @@ -1474,7 +1474,7 @@ explanation_combined <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:35 ─────────────────────────────────────────────── #> • Model class: #> • Approach: empirical, copula, and gaussian -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5537d03d824.rds' @@ -1511,7 +1511,7 @@ explanation_combined <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:36 ─────────────────────────────────────────────── #> • Model class: #> • Approach: ctree, ctree, and empirical -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533712cd69.rds' @@ -1545,7 +1545,7 @@ explanation_group <- explain( approach = "empirical", prediction_zero = p0, group = group_list, - adaptive = FALSE + iterative = FALSE ) #> Note: Feature classes extracted from the model contains NA. #> Assuming feature classes from the data are correct. @@ -1556,7 +1556,7 @@ explanation_group <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:38 ─────────────────────────────────────────────── #> • Model class: #> • Approach: empirical -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of group-wise Shapley values: 2 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553136ac3f0.rds' @@ -1691,7 +1691,7 @@ explanation_custom <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:39 ─────────────────────────────────────────────── #> • Model class: #> • Approach: empirical -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55372210bb6.rds' @@ -1736,7 +1736,7 @@ explanation_custom_minimal <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:42 ─────────────────────────────────────────────── #> • Model class: #> • Approach: empirical -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5536b48f19a.rds' @@ -1795,7 +1795,7 @@ explanation_tidymodels <- explain( x_train = x_train, approach = "empirical", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) #> Success with message: #> max_n_coalitions is NULL or larger than or 2^n_features = 16, @@ -1804,7 +1804,7 @@ explanation_tidymodels <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:46 ─────────────────────────────────────────────── #> • Model class: #> • Approach: empirical -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5537331b301.rds' @@ -1814,7 +1814,7 @@ explanation_tidymodels <- explain( #> ℹ Using 16 of 16 coalitions. # See that the Shapley value explanations are identical too -all.equal(explanation$shapley_values, explanation_tidymodels$shapley_values) +all.equal(explanation$shapley_values_est, explanation_tidymodels$shapley_values_est) #> [1] TRUE ``` @@ -1848,7 +1848,7 @@ explanation_vaeac <- explain( vaeac.depth = 2, vaeac.epochs = 3, vaeac.n_vaeacs_initialize = 2, - adaptive = FALSE + iterative = FALSE ) #> Note: Feature classes extracted from the model contains NA. #> Assuming feature classes from the data are correct. @@ -1894,7 +1894,7 @@ explanation_vaeac_early_stop <- explain( vaeac.epochs = 1000, # Set it to a large number vaeac.n_vaeacs_initialize = 2, vaeac.extra_parameters = list(vaeac.epochs_early_stopping = 2), - adaptive = FALSE + iterative = FALSE ) #> Note: Feature classes extracted from the model contains NA. #> Assuming feature classes from the data are correct. @@ -1926,16 +1926,16 @@ plot_MSEv_eval_crit(list("Vaeac 3 epochs" = explanation_vaeac, "Vaeac early stop ## Continued computation {#cont_computation} In this section, we demonstrate how to continue to improve estimation accuracy with additional coalition samples, -from a previous Shapley value computation based on `shapr::explain()` with the adaptive estimation procedure. +from a previous Shapley value computation based on `shapr::explain()` with the iterative estimation procedure. This can be done either by passing an existing object of class `shapr`, or by passing a string with the path to the intermediately saved results. -The latter is found at `SHAPR_OBJ$internal$parameters$adaptive_arguments$saving_path`, defaults to a temporary folder, +The latter is found at `SHAPR_OBJ$saving_path`, defaults to a temporary folder, and is updated after each iteration. This can be particularly handy for long-running computations. ``` r -# First we run the computation with the adaptive estimation procedure for a limited number of coalition samples +# First we run the computation with the iterative estimation procedure for a limited number of coalition samples library(xgboost) library(data.table) @@ -1973,7 +1973,7 @@ ex_init <- explain( approach = "gaussian", prediction_zero = p0, max_n_coalitions = 20, - adaptive = TRUE + iterative = TRUE ) #> Note: Feature classes extracted from the model contains NA. #> Assuming feature classes from the data are correct. @@ -1981,12 +1981,12 @@ ex_init <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:49 ─────────────────────────────────────────────── #> • Model class: #> • Approach: gaussian -#> • Adaptive estimation: TRUE +#> • iterative estimation: TRUE #> • Number of feature-wise Shapley values: 5 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553c15795.rds' #> -#> ── Adaptive computation started ── +#> ── iterative computation started ── #> #> ── Iteration 1 ────────────────────────────────────────────────────────────────────────────────────── #> ℹ Using 5 of 32 coalitions, 5 new. @@ -2011,7 +2011,7 @@ ex_further <- explain( approach = "gaussian", prediction_zero = p0, max_n_coalitions = 25, - adaptive_arguments = list(convergence_tolerance = 0.005), # Decrease the convergence threshold + iterative_args = list(convergence_tol = 0.005), # Decrease the convergence threshold prev_shapr_object = ex_init ) #> Note: Feature classes extracted from the model contains NA. @@ -2020,7 +2020,7 @@ ex_further <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:53 ─────────────────────────────────────────────── #> • Model class: #> • Approach: gaussian -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 5 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5537d1fed73.rds' @@ -2029,8 +2029,8 @@ ex_further <- explain( #> #> ℹ Using 24 of 32 coalitions. -print(ex_further$internal$parameters$adaptive_arguments$saving_path) -#> [1] "/tmp/RtmpRxPm0I/shapr_obj_10e5537d1fed73.rds" +print(ex_further$saving_path) +#> [1] "/tmp/RtmpPIciRY/shapr_obj_c1876481fd8a.rds" # Using the ex_init object to continue the computation for the remaining coalition samples # but this time using the path to the saved intermediate estimation object @@ -2041,7 +2041,7 @@ ex_even_further <- explain( approach = "gaussian", prediction_zero = p0, max_n_coalitions = NULL, - prev_shapr_object = ex_further$internal$parameters$adaptive_arguments$saving_path + prev_shapr_object = ex_further$saving_path ) #> Note: Feature classes extracted from the model contains NA. #> Assuming feature classes from the data are correct. @@ -2052,7 +2052,7 @@ ex_even_further <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:08:54 ─────────────────────────────────────────────── #> • Model class: #> • Approach: gaussian -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 5 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533e44ec7f.rds' @@ -2298,7 +2298,7 @@ explanation_forecast <- explain_forecast( #> max_n_coalitions is NULL or larger than or 2^n_features = 32, #> and is therefore set to 2^n_features = 32. -print(explanation_forecast$shapley_values) +print(explanation_forecast$shapley_values_est) #> explain_idx horizon none Temp.1 Temp.2 Wind.1 Wind.F1 Wind.F2 #> #> 1: 151 1 77.96 -0.67793 -0.67340 -1.2688 0.493408 NA diff --git a/vignettes/understanding_shapr.Rmd.orig b/vignettes/understanding_shapr.Rmd.orig index 8730d9c55..30ce83a7c 100644 --- a/vignettes/understanding_shapr.Rmd.orig +++ b/vignettes/understanding_shapr.Rmd.orig @@ -39,7 +39,7 @@ library(shapr) > [Estimation approaches and plotting functionality](#ex) -> [Adaptive estimation](#adaptive) +> [iterative estimation](#iterative) > [Parallelization](#para) @@ -99,10 +99,10 @@ on the process of estimating the `v(S)`'s (and training the `"vaeac"` model) is `progressr` package, supporting progress updates also for parallelized computation. See [Verbosity and progress updates](#verbose) for details. -Moreover, the default behavior is to estimate the Shapley values adaptively/iteratively, with increasing number of +Moreover, the default behavior is to estimate the Shapley values iteratively/iteratively, with increasing number of feature coalitions being added, and to stop estimation as the estimated Shapley values has achieved a certain level of stability. -More information about this is provided in [Adaptive estimation](#adaptive) +More information about this is provided in [iterative estimation](#iterative) The above, combined with batch computation of the `v(S)` values, enables fast and accurate estimation of the Shapley values in a memory friendly manner. @@ -117,18 +117,18 @@ Below we provide brief descriptions of the most important parts of the default b By default `explain` always compute feature-wise Shapley values. Groups of features can be explained by providing the feature groups through the `group` argument. -When there are five or less features (or feature groups), adaptive estimation is by default disabled. +When there are five or less features (or feature groups), iterative estimation is by default disabled. The reason for this is that it is usually faster to estimate the Shapley values for all possible coalitions (`v(S)`), than to estimate the uncertainty of the Shapley values, and potentially stop estimation earlier. -While adaptive estimation is the default starting from six features, it is mainly when there are more than ten features, +While iterative estimation is the default starting from six features, it is mainly when there are more than ten features, that it is most beneficial, and can save a lot of computation time. The reason for this is that the number of possible coalitions grows exponentially. -These defaults can be overridden by setting the `adaptive` argument to `TRUE` or `FALSE`. -When using the `adaptive` argument, the estimation for an observation is stopped when all Shapley value +These defaults can be overridden by setting the `iterative` argument to `TRUE` or `FALSE`. +When using the `iterative` argument, the estimation for an observation is stopped when all Shapley value standard deviations are below `t` times the range of the Shapley values. -The `t` value controls the convergence tolerance, defaults to 0.02, and can be set through the `adaptive_arguments$convergence_tolerance` argument, see [Adaptive estimation](#adaptive) for more details. +The `t` value controls the convergence tolerance, defaults to 0.02, and can be set through the `iterative_args$convergence_tol` argument, see [iterative estimation](#iterative) for more details. -Since the adaptiveness default changes based on the number of features (or feature groups), the default is also to have +Since the iterativeness default changes based on the number of features (or feature groups), the default is also to have no upper bound on the number of coalitions considered. This can be controlled through the `max_n_coalitions` argument. @@ -486,12 +486,12 @@ explanation <- explain( x_train = x_train, approach = "empirical", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) # Printing the Shapley values for the test data. # For more information about the interpretation of the values in the table, see ?shapr::explain. -print(explanation$shapley_values) +print(explanation$shapley_values_est) # Plot the resulting explanations for observations 1 and 6 plot(explanation, bar_plot_phi0 = FALSE, index_x_explain = c(1, 6)) @@ -525,7 +525,7 @@ explanation_plot <- explain( x_train = x_train, approach = "empirical", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) plot(explanation_plot, plot_type = "beeswarm") ``` @@ -566,7 +566,7 @@ explanation_lm_cat <- explain( x_train = x_train_cat, approach = "ctree", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) # Plot the resulting explanations for observations 1 and 6, excluding @@ -591,7 +591,7 @@ explanation_ctree <- explain( ctree.mincriterion = 0.80, ctree.minsplit = 20, ctree.minbucket = 20, - adaptive = FALSE + iterative = FALSE ) # Default parameters (based on (Hothorn, 2006)) are: # mincriterion = 0.95 @@ -629,7 +629,7 @@ explanation_cat_method <- explain( x_train = x_train_all_cat, approach = "categorical", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) ``` @@ -691,7 +691,7 @@ explanation_timeseries <- explain( approach = "timeseries", prediction_zero = p0_ts, group = group_ts, - adaptive = FALSE + iterative = FALSE ) ``` @@ -933,20 +933,20 @@ plot_MSEv_eval_crit(explanation_list_named) + ) ``` - + -# Adaptive estimation +# iterative estimation -Adaptive estimation is the default when computing Shapley values with six or more features (or feature groups), and -can always be manually overridden by setting `adaptive = FALSE` in the `explain()` function. -The idea behind adaptive estimation is to estimate sufficiently accurate Shapley value estimates faster. +iterative estimation is the default when computing Shapley values with six or more features (or feature groups), and +can always be manually overridden by setting `iterative = FALSE` in the `explain()` function. +The idea behind iterative estimation is to estimate sufficiently accurate Shapley value estimates faster. First, an initial number of coalitions is sampled, then, bootsrapping is used to estimate the variance of the Shapley values. A convergence criterion is used to determine if the variances of the Shapley values are sufficently small. If the variances are too high, we estimate the number of required samples to reach convergence, and thereby add more coalitions. The process is repeated until the variances are below the threshold. -Specifics related to the adaptive process and convergence criterion are set through `adaptive_arguments` argument. +Specifics related to the iterative process and convergence criterion are set through `iterative_args` argument. The convergence criterion we use is adopted from @covert2021improving, and slightly modified to work for multiple observations @@ -955,7 +955,7 @@ observations where $\hat{\phi}_{ij}$ is the Shapley value of feature $j$ for observation $i$, and $\text{sd}(\phi_{ij})$ is the its (bootstrap) estimated standard deviation. The default value of $t$ is 0.02. -Below we provide some examples of how to use the adaptive estimation procedure +Below we provide some examples of how to use the iterative estimation procedure @@ -996,8 +996,8 @@ ex <- explain( x_train = x_train, approach = "gaussian", prediction_zero = p0, - adaptive = TRUE, - adaptive_arguments = list(convergence_tolerance = 0.1) + iterative = TRUE, + iterative_args = list(convergence_tol = 0.1) ) ``` @@ -1026,8 +1026,8 @@ computations (the computation of v(S)) in sequential batches with different feature subsets $S$, the memory usage can be significantly reduces. The user can control the number of batches by setting the two arguments -`adaptive_arguments$max_batch_size` (defaults to 10) and -`adaptive_arguments$min_n_batches` (defaults to 10). +`extra_computation_args$max_batch_size` (defaults to 10) and +`extra_computation_args$min_n_batches` (defaults to 10). ## Parallelized computation @@ -1072,7 +1072,7 @@ and allows one or more of the strings `"basic"`, `"progress"`, `"convergence"`, `"basic"` (default) displays basic information about the computation which is being performed, `"progress` displays information about where in the calculation process the function currently is, `"convergence"` displays information on how close to convergence the Shapley value estimates are -(for adaptive estimation), +(for iterative estimation), `"shapley"` displays (intermediate) Shapley value estimates and standard deviations + the final estimates, while `"vS_details"` displays information about the `v(S)` estimates for some of the approaches. If the user wants no printout, the argument can be set to `NULL`. @@ -1246,7 +1246,7 @@ explanation_group <- explain( approach = "empirical", prediction_zero = p0, group = group_list, - adaptive = FALSE + iterative = FALSE ) # Prints the group-wise explanations explanation_group @@ -1425,11 +1425,11 @@ explanation_tidymodels <- explain( x_train = x_train, approach = "empirical", prediction_zero = p0, - adaptive = FALSE + iterative = FALSE ) # See that the Shapley value explanations are identical too -all.equal(explanation$shapley_values, explanation_tidymodels$shapley_values) +all.equal(explanation$shapley_values_est, explanation_tidymodels$shapley_values_est) ``` ## The parameters of the `vaeac` approach @@ -1461,7 +1461,7 @@ explanation_vaeac <- explain( vaeac.depth = 2, vaeac.epochs = 3, vaeac.n_vaeacs_initialize = 2, - adaptive = FALSE + iterative = FALSE ) ``` @@ -1498,7 +1498,7 @@ explanation_vaeac_early_stop <- explain( vaeac.epochs = 1000, # Set it to a large number vaeac.n_vaeacs_initialize = 2, vaeac.extra_parameters = list(vaeac.epochs_early_stopping = 2), - adaptive = FALSE + iterative = FALSE ) ``` @@ -1519,15 +1519,15 @@ plot_MSEv_eval_crit(list("Vaeac 3 epochs" = explanation_vaeac, "Vaeac early stop ## Continued computation {#cont_computation} In this section, we demonstrate how to continue to improve estimation accuracy with additional coalition samples, -from a previous Shapley value computation based on `shapr::explain()` with the adaptive estimation procedure. +from a previous Shapley value computation based on `shapr::explain()` with the iterative estimation procedure. This can be done either by passing an existing object of class `shapr`, or by passing a string with the path to the intermediately saved results. -The latter is found at `SHAPR_OBJ$internal$parameters$adaptive_arguments$saving_path`, defaults to a temporary folder, +The latter is found at `SHAPR_OBJ$saving_path`, defaults to a temporary folder, and is updated after each iteration. This can be particularly handy for long-running computations. ```{r} -# First we run the computation with the adaptive estimation procedure for a limited number of coalition samples +# First we run the computation with the iterative estimation procedure for a limited number of coalition samples library(xgboost) library(data.table) @@ -1565,7 +1565,7 @@ ex_init <- explain( approach = "gaussian", prediction_zero = p0, max_n_coalitions = 20, - adaptive = TRUE + iterative = TRUE ) # Using the ex_init object to continue the computation with 5 more coalition samples @@ -1576,11 +1576,11 @@ ex_further <- explain( approach = "gaussian", prediction_zero = p0, max_n_coalitions = 25, - adaptive_arguments = list(convergence_tolerance = 0.005), # Decrease the convergence threshold + iterative_args = list(convergence_tol = 0.005), # Decrease the convergence threshold prev_shapr_object = ex_init ) -print(ex_further$internal$parameters$adaptive_arguments$saving_path) +print(ex_further$saving_path) # Using the ex_init object to continue the computation for the remaining coalition samples # but this time using the path to the saved intermediate estimation object @@ -1591,7 +1591,7 @@ ex_even_further <- explain( approach = "gaussian", prediction_zero = p0, max_n_coalitions = NULL, - prev_shapr_object = ex_further$internal$parameters$adaptive_arguments$saving_path + prev_shapr_object = ex_further$saving_path ) @@ -1788,7 +1788,7 @@ explanation_forecast <- explain_forecast( group_lags = FALSE ) -print(explanation_forecast$shapley_values) +print(explanation_forecast$shapley_values_est) ``` diff --git a/vignettes/understanding_shapr_asymmetric_causal.Rmd b/vignettes/understanding_shapr_asymmetric_causal.Rmd index 59839defe..711654140 100644 --- a/vignettes/understanding_shapr_asymmetric_causal.Rmd +++ b/vignettes/understanding_shapr_asymmetric_causal.Rmd @@ -613,7 +613,7 @@ explanation_asym_con[["gaussian"]] <- explain( #> ── Iteration 1 ────────────────────────────────────────────────────────────────────────────────────────────────── #> ℹ Using 13 of 20 coalitions, 13 new. -explanation_asym_con[["gaussian_non_adaptive"]] <- explain( +explanation_asym_con[["gaussian_non_iterative"]] <- explain( model = model, x_train = x_train, x_explain = x_explain, @@ -624,7 +624,7 @@ explanation_asym_con[["gaussian_non_adaptive"]] <- explain( asymmetric = TRUE, causal_ordering = causal_ordering, confounding = NULL, # Default value - adaptive = FALSE + iterative = FALSE ) #> Note: Feature classes extracted from the model contains NA. #> Assuming feature classes from the data are correct. @@ -722,7 +722,7 @@ instead of all $128$ coalitions (see code below). print_MSEv_scores_and_time(explanation_asym_con) #> MSEv MSEv_sd Time (secs) #> gaussian 330603.3 36828.70 1.66 -#> gaussian_non_adaptive 306457.7 35411.60 1.52 +#> gaussian_non_iterative 306457.7 35411.60 1.52 #> ctree 260562.1 29428.95 8.75 #> xgboost 307562.1 39362.81 1.60 @@ -1015,7 +1015,7 @@ explanation_sym_cau[["gaussian"]] <- explain( asymmetric = FALSE, causal_ordering = list(1, 2:3, 4:7), confounding = c(FALSE, TRUE, FALSE), - adaptive = FALSE, # Set to FALSE to get a single iteration to illustrate sampling steps below + iterative = FALSE, # Set to FALSE to get a single iteration to illustrate sampling steps below exact = TRUE ) #> Note: Feature classes extracted from the model contains NA. @@ -1337,10 +1337,10 @@ sv_framework_2_str <- "Causal SV" # Set up the data.frame we are going to plot sv_correlation_df <- data.frame( color = x_explain[, color], - sv_framework_1_feature_1 = sv_framework_1$shapley_values[[feature_1]], - sv_framework_2_feature_1 = sv_framework_2$shapley_values[[feature_1]], - sv_framework_1_feature_2 = sv_framework_1$shapley_values[[feature_2]], - sv_framework_2_feature_2 = sv_framework_2$shapley_values[[feature_2]] + sv_framework_1_feature_1 = sv_framework_1$shapley_values_est[[feature_1]], + sv_framework_2_feature_1 = sv_framework_2$shapley_values_est[[feature_1]], + sv_framework_1_feature_2 = sv_framework_1$shapley_values_est[[feature_2]], + sv_framework_2_feature_2 = sv_framework_2$shapley_values_est[[feature_2]] ) # Make the plots @@ -1501,7 +1501,7 @@ explanations <- list( # Extract the relevant Shapley values explanations_extracted <- data.table::rbindlist(lapply(seq_along(explanations), function(idx) { - explanations[[idx]]$shapley_values[ + explanations[[idx]]$shapley_values_est[ dates_idx, ..features ][, `:=`(Date = dates, type = names(explanations)[idx])] })) diff --git a/vignettes/understanding_shapr_asymmetric_causal.Rmd.orig b/vignettes/understanding_shapr_asymmetric_causal.Rmd.orig index f9de45bea..66f19c49d 100644 --- a/vignettes/understanding_shapr_asymmetric_causal.Rmd.orig +++ b/vignettes/understanding_shapr_asymmetric_causal.Rmd.orig @@ -490,7 +490,7 @@ explanation_asym_con[["gaussian"]] <- explain( confounding = NULL # Default value ) -explanation_asym_con[["gaussian_non_adaptive"]] <- explain( +explanation_asym_con[["gaussian_non_iterative"]] <- explain( model = model, x_train = x_train, x_explain = x_explain, @@ -501,7 +501,7 @@ explanation_asym_con[["gaussian_non_adaptive"]] <- explain( asymmetric = TRUE, causal_ordering = causal_ordering, confounding = NULL, # Default value - adaptive = FALSE + iterative = FALSE ) explanation_asym_con[["ctree"]] <- explain( @@ -675,7 +675,7 @@ explanation_sym_cau[["gaussian"]] <- explain( asymmetric = FALSE, causal_ordering = list(1, 2:3, 4:7), confounding = c(FALSE, TRUE, FALSE), - adaptive = FALSE, # Set to FALSE to get a single iteration to illustrate sampling steps below + iterative = FALSE, # Set to FALSE to get a single iteration to illustrate sampling steps below exact = TRUE ) @@ -845,10 +845,10 @@ sv_framework_2_str <- "Causal SV" # Set up the data.frame we are going to plot sv_correlation_df <- data.frame( color = x_explain[, color], - sv_framework_1_feature_1 = sv_framework_1$shapley_values[[feature_1]], - sv_framework_2_feature_1 = sv_framework_2$shapley_values[[feature_1]], - sv_framework_1_feature_2 = sv_framework_1$shapley_values[[feature_2]], - sv_framework_2_feature_2 = sv_framework_2$shapley_values[[feature_2]] + sv_framework_1_feature_1 = sv_framework_1$shapley_values_est[[feature_1]], + sv_framework_2_feature_1 = sv_framework_2$shapley_values_est[[feature_1]], + sv_framework_1_feature_2 = sv_framework_1$shapley_values_est[[feature_2]], + sv_framework_2_feature_2 = sv_framework_2$shapley_values_est[[feature_2]] ) # Make the plots @@ -1006,7 +1006,7 @@ explanations <- list( # Extract the relevant Shapley values explanations_extracted <- data.table::rbindlist(lapply(seq_along(explanations), function(idx) { - explanations[[idx]]$shapley_values[ + explanations[[idx]]$shapley_values_est[ dates_idx, ..features ][, `:=`(Date = dates, type = names(explanations)[idx])] })) diff --git a/vignettes/understanding_shapr_regression.Rmd b/vignettes/understanding_shapr_regression.Rmd index 15624fc8d..788f33526 100644 --- a/vignettes/understanding_shapr_regression.Rmd +++ b/vignettes/understanding_shapr_regression.Rmd @@ -274,7 +274,7 @@ explanation_list$MC_empirical <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:09:54 ─────────────────────────────────────────────── #> • Model class: #> • Approach: empirical -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553378f592d.rds' @@ -308,7 +308,7 @@ explanation_list$sep_lm <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:10:00 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533c20e191.rds' @@ -384,7 +384,7 @@ explanation_list$sep_pcr <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:10:01 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55318b105b2.rds' @@ -420,7 +420,7 @@ explanation_list$sep_splines <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:10:02 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553a209912.rds' @@ -489,7 +489,7 @@ explanation_list$sep_reicpe_example <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:10:03 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55334f61d01.rds' @@ -567,7 +567,7 @@ explanation_list$sep_tree_stump <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:10:04 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553108eb1.rds' @@ -594,7 +594,7 @@ explanation_list$sep_tree_default <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:10:05 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5534d028986.rds' @@ -715,7 +715,7 @@ explanation_list$sep_tree_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:10:06 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553173580dc.rds' @@ -750,7 +750,7 @@ explanation_list$sep_tree_cv_2 <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:10:19 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5531b0af982.rds' @@ -790,7 +790,7 @@ explanation_list$sep_rf <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:10:45 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5535c02b48f.rds' @@ -825,7 +825,7 @@ explanation_list$sep_rf_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:10:46 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5534bc71658.rds' @@ -1046,7 +1046,7 @@ explanation_list$sep_xgboost <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:11:21 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553b9eedb1.rds' @@ -1076,7 +1076,7 @@ explanation_list$sep_xgboost_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:11:22 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5536c6c263f.rds' @@ -1107,7 +1107,7 @@ explanation_list$sep_xgboost_cv_par <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:11:37 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55375979516.rds' @@ -1142,7 +1142,7 @@ explanation_list$sep_xgboost_cv_2_par <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:11:50 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553e0b863c.rds' @@ -1242,7 +1242,7 @@ explanation_list$sur_lm <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:12:05 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533f5d53fc.rds' @@ -1269,7 +1269,7 @@ explanation_list$sur_xgboost <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:12:05 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553657c3c72.rds' @@ -1303,7 +1303,7 @@ explanation_list$sur_xgboost_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:12:06 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55349e5a38c.rds' @@ -1330,7 +1330,7 @@ explanation_list$sur_rf <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:12:08 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553eebc9ea.rds' @@ -1365,7 +1365,7 @@ explanation_list$sur_rf_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:12:09 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5537965b6b3.rds' @@ -1422,7 +1422,7 @@ explanation_list$sur_rf_cv_par <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:12:37 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533f7681e8.rds' @@ -1434,8 +1434,8 @@ future::plan(future::sequential) # To return to non-parallel computation # Check that we get identical Shapley value explanations all.equal( - explanation_list$sur_rf_cv$shapley_values, - explanation_list$sur_rf_cv_par$shapley_values + explanation_list$sur_rf_cv$shapley_values_est, + explanation_list$sur_rf_cv_par$shapley_values_est ) #> [1] TRUE ``` @@ -1638,7 +1638,7 @@ explanation_list$sep_ppr <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:12:58 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553791592c7.rds' @@ -1667,7 +1667,7 @@ explanation_list$sep_ppr_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:12:58 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5531ac3859d.rds' @@ -1694,7 +1694,7 @@ explanation_list$sur_ppr <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:13:09 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55339bdd72a.rds' @@ -1723,7 +1723,7 @@ explanation_list$sur_ppr_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:13:09 ─────────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5532987aff5.rds' @@ -1807,7 +1807,7 @@ explanation_list_MC$MC_independence <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:13:11 ─────────────────────────────────────────────── #> • Model class: #> • Approach: independence -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533cae2265.rds' @@ -1836,7 +1836,7 @@ explanation_list_MC$MC_gaussian <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:13:12 ─────────────────────────────────────────────── #> • Model class: #> • Approach: gaussian -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5532491f5ab.rds' @@ -1862,7 +1862,7 @@ explanation_list_MC$MC_copula <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:13:13 ─────────────────────────────────────────────── #> • Model class: #> • Approach: copula -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553451ae5c5.rds' @@ -1888,7 +1888,7 @@ explanation_list_MC$MC_ctree <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:13:13 ─────────────────────────────────────────────── #> • Model class: #> • Approach: ctree -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533d628d5e.rds' @@ -1915,7 +1915,7 @@ explanation_list_MC$MC_vaeac <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:13:15 ─────────────────────────────────────────────── #> • Model class: #> • Approach: vaeac -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5534050514.rds' @@ -2099,7 +2099,7 @@ explanation_list_mixed$MC_independence <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:15:23 ──────────────────────── #> • Model class: #> • Approach: independence -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: @@ -2123,7 +2123,7 @@ explanation_list_mixed$MC_ctree <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:15:24 ──────────────────────── #> • Model class: #> • Approach: ctree -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: @@ -2147,7 +2147,7 @@ explanation_list_mixed$MC_vaeac <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:15:26 ──────────────────────── #> • Model class: #> • Approach: vaeac -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: @@ -2183,7 +2183,7 @@ explanation_list_mixed$sep_lm <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:18:46 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533131b08d.rds' @@ -2211,7 +2211,7 @@ explanation_list_mixed$sep_splines <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:18:47 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55366111c0d.rds' @@ -2236,7 +2236,7 @@ explanation_list_mixed$sep_tree <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:18:48 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5531ad27dab.rds' @@ -2269,7 +2269,7 @@ explanation_list_mixed$sep_tree_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:18:49 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55348ac2c82.rds' @@ -2294,7 +2294,7 @@ explanation_list_mixed$sep_rf <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:19:18 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55369de7bb8.rds' @@ -2326,7 +2326,7 @@ explanation_list_mixed$sep_rf_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:19:20 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5537f540ca9.rds' @@ -2354,7 +2354,7 @@ explanation_list_mixed$sep_xgboost <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:13 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533286e2bf.rds' @@ -2389,7 +2389,7 @@ explanation_list_mixed$sep_xgboost_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:14 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5531fa7a245.rds' @@ -2424,7 +2424,7 @@ explanation_list_mixed$sur_lm <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:33 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55365b26da6.rds' @@ -2453,7 +2453,7 @@ explanation_list_mixed$sur_splines <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:34 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5537f7cd475.rds' @@ -2478,7 +2478,7 @@ explanation_list_mixed$sur_tree <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:34 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55342bb266a.rds' @@ -2511,7 +2511,7 @@ explanation_list_mixed$sur_tree_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:35 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553263d5b45.rds' @@ -2536,7 +2536,7 @@ explanation_list_mixed$sur_rf <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:37 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5536f402f15.rds' @@ -2565,7 +2565,7 @@ explanation_list_mixed$sur_rf_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:38 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55321ef0397.rds' @@ -2593,7 +2593,7 @@ explanation_list_mixed$sur_xgboost <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:52 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5535b569440.rds' @@ -2628,7 +2628,7 @@ explanation_list_mixed$sur_xgboost_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:52 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5532e902f01.rds' @@ -2752,7 +2752,7 @@ explanation_list_str$sep_lm <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:57 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5532789a643.rds' @@ -2781,7 +2781,7 @@ explanation_list_str$sep_pcr <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:58 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e553c707510.rds' @@ -2810,7 +2810,7 @@ explanation_list_str$sep_splines <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:20:59 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5535c2a1de9.rds' @@ -2840,7 +2840,7 @@ explanation_list_str$sep_tree_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:21:00 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5531d5a6c89.rds' @@ -2874,7 +2874,7 @@ explanation_list_str$sep_rf_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:21:12 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_separate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e5533d1c027d.rds' @@ -2909,7 +2909,7 @@ explanation_list_str$sur_rf_cv <- explain( #> ── Starting `shapr::explain()` at 2024-10-09 16:21:47 ──────────────────────────────────────────── #> • Model class: #> • Approach: regression_surrogate -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 20 #> • Computations (temporary) saved at: '/tmp/RtmpRxPm0I/shapr_obj_10e55364f1a477.rds' diff --git a/vignettes/understanding_shapr_regression.Rmd.orig b/vignettes/understanding_shapr_regression.Rmd.orig index 9d9f32ef6..c9f5f53b7 100644 --- a/vignettes/understanding_shapr_regression.Rmd.orig +++ b/vignettes/understanding_shapr_regression.Rmd.orig @@ -892,8 +892,8 @@ future::plan(future::sequential) # To return to non-parallel computation # Check that we get identical Shapley value explanations all.equal( - explanation_list$sur_rf_cv$shapley_values, - explanation_list$sur_rf_cv_par$shapley_values + explanation_list$sur_rf_cv$shapley_values_est, + explanation_list$sur_rf_cv_par$shapley_values_est ) ``` diff --git a/vignettes/understanding_shapr_vaeac.Rmd b/vignettes/understanding_shapr_vaeac.Rmd index 2f3a79377..dcf30355e 100644 --- a/vignettes/understanding_shapr_vaeac.Rmd +++ b/vignettes/understanding_shapr_vaeac.Rmd @@ -172,7 +172,7 @@ We can look at the Shapley values. ``` r # Printing and ploting the Shapley values. # See ?shapr::explain for interpretation of the values. -print(explanation$shapley_values) +print(explanation$shapley_values_est) #> explain_id none Solar.R Wind Temp Month #> #> 1: 1 43.086 4.35827 -0.49487 -16.7173 0.55352 @@ -219,7 +219,7 @@ expl_pretrained_vaeac <- explain( #> and is therefore set to 2^n_features = 16. # Check that this version provides the same Shapley values -all.equal(explanation$shapley_values, expl_pretrained_vaeac$shapley_values) +all.equal(explanation$shapley_values_est, expl_pretrained_vaeac$shapley_values_est) #> [1] TRUE ``` @@ -251,7 +251,7 @@ expl_pretrained_vaeac_path <- explain( #> and is therefore set to 2^n_features = 16. # Check that this version provides the same Shapley values -all.equal(explanation$shapley_values, expl_pretrained_vaeac_path$shapley_values) +all.equal(explanation$shapley_values_est, expl_pretrained_vaeac_path$shapley_values_est) #> [1] TRUE ``` @@ -442,7 +442,7 @@ rbind( "Paired" = expl_paired_sampling_TRUE$timing$main_timing_secs, "Regular" = expl_paired_sampling_FALSE$timing$main_timing_secs ) -#> setup test_prediction adaptive_estimation finalize_explanation +#> setup test_prediction iterative_estimation finalize_explanation #> Paired 0.048088 0.036740 11.721 0.0049973 #> Regular 0.047131 0.036345 11.517 0.0049357 ``` @@ -482,7 +482,7 @@ expl_with_messages <- explain( #> ── Starting `shapr::explain()` at 2024-10-04 14:57:22 ───────────────────────────────────────────────────────────────────────────────────────────────────────── #> • Model class: #> • Approach: vaeac -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpIQRVZ2/shapr_obj_acefb1be76dcf.rds' @@ -555,7 +555,7 @@ progressr::with_progress({ #> '/tmp/RtmpIQRVZ2/X2024.10.04.14.57.33.088772_n_features_4_n_train_105_depth_3_width_32_latent_8_lr_0.001_epoch_best.pt' #> '/tmp/RtmpIQRVZ2/X2024.10.04.14.57.33.088772_n_features_4_n_train_105_depth_3_width_32_latent_8_lr_0.001_epoch_best_running.pt' #> '/tmp/RtmpIQRVZ2/X2024.10.04.14.57.33.088772_n_features_4_n_train_105_depth_3_width_32_latent_8_lr_0.001_epoch_last.pt' -all.equal(expl_with_messages$shapley_values, expl_with_progressr$shapley_values) +all.equal(expl_with_messages$shapley_values_est, expl_with_progressr$shapley_values_est) #> [1] TRUE ``` @@ -741,7 +741,7 @@ expl_early_stopping <- explain( #> ── Starting `shapr::explain()` at 2024-10-04 14:57:44 ───────────────────────────────────────────────────────────────────────────────────────────────────────── #> • Model class: #> • Approach: vaeac -#> • Adaptive estimation: FALSE +#> • iterative estimation: FALSE #> • Number of feature-wise Shapley values: 4 #> • Number of observations to explain: 6 #> • Computations (temporary) saved at: '/tmp/RtmpIQRVZ2/shapr_obj_acefb6c654eee.rds' diff --git a/vignettes/understanding_shapr_vaeac.Rmd.orig b/vignettes/understanding_shapr_vaeac.Rmd.orig index 63ee5cc63..20499c9b7 100644 --- a/vignettes/understanding_shapr_vaeac.Rmd.orig +++ b/vignettes/understanding_shapr_vaeac.Rmd.orig @@ -177,7 +177,7 @@ We can look at the Shapley values. ```{r first-vaeac-plots, cache = TRUE} # Printing and ploting the Shapley values. # See ?shapr::explain for interpretation of the values. -print(explanation$shapley_values) +print(explanation$shapley_values_est) plot(explanation) ``` @@ -208,7 +208,7 @@ expl_pretrained_vaeac <- explain( ) # Check that this version provides the same Shapley values -all.equal(explanation$shapley_values, expl_pretrained_vaeac$shapley_values) +all.equal(explanation$shapley_values_est, expl_pretrained_vaeac$shapley_values_est) ``` ## Pre-trained vaeac (path) {#pretrained_vaeac_path} @@ -233,7 +233,7 @@ expl_pretrained_vaeac_path <- explain( ) # Check that this version provides the same Shapley values -all.equal(explanation$shapley_values, expl_pretrained_vaeac_path$shapley_values) +all.equal(explanation$shapley_values_est, expl_pretrained_vaeac_path$shapley_values_est) ``` @@ -397,7 +397,7 @@ progressr::with_progress({ vaeac.n_vaeacs_initialize = 2 ) }) -all.equal(expl_with_messages$shapley_values, expl_with_progressr$shapley_values) +all.equal(expl_with_messages$shapley_values_est, expl_with_progressr$shapley_values_est) ``` ## Continue the training of the vaeac approach {#continue_training}