Skip to content

Commit

Permalink
Function and documentation cleanup (#427)
Browse files Browse the repository at this point in the history
  • Loading branch information
martinju authored Dec 19, 2024
1 parent f89ead4 commit d353276
Show file tree
Hide file tree
Showing 233 changed files with 2,039 additions and 2,786 deletions.
7 changes: 5 additions & 2 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,16 @@ inst/compare_lundberg\.xgb\.obj
^CRAN-SUBMISSION$
^.Rprofile
^python$
^rebuild-long-running-vignette\.R$
^vignettes/understanding_shapr_vaeac\.Rmd\.orig$
^rebuild_long_running_vignette\.R$
^vignettes/understanding_shapr\.Rmd\.orig$
^vignettes/understanding_shapr_vaeac\.Rmd\.orig$
^vignettes/understanding_shapr_regression\.Rmd\.orig$
^vignettes/understanding_shapr_asymmetric_causal\.Rmd\.orig$
^vignettes/figure_main/*$
^vignettes/cache_main/*$
^vignettes/figure_vaeac/*$
^vignettes/cache_vaeac/*$
^vignettes/figure_regression/*$
^vignettes/cache_regression/*$
^vignettes/figure_asymmetric_causal/*$
^vignettes/cache_asymmetric_causal/*$
22 changes: 3 additions & 19 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -53,51 +53,36 @@ S3method(setup_approach,regression_surrogate)
S3method(setup_approach,timeseries)
S3method(setup_approach,vaeac)
export(additional_regression_setup)
export(aicc_full_single_cpp)
export(append_vS_list)
export(check_convergence)
export(cli_compute_vS)
export(cli_iter)
export(cli_startup)
export(coalition_matrix_cpp)
export(compute_estimates)
export(compute_shapley_new)
export(compute_shapley)
export(compute_time)
export(compute_vS)
export(correction_matrix_cpp)
export(create_coalition_table)
export(explain)
export(explain_forecast)
export(finalize_explanation)
export(finalize_explanation_forecast)
export(get_cov_mat)
export(get_data_specs)
export(get_extra_est_args_default)
export(get_extra_comp_args_default)
export(get_iterative_args_default)
export(get_model_specs)
export(get_mu_vec)
export(get_output_args_default)
export(get_supported_approaches)
export(hat_matrix_cpp)
export(mahalanobis_distance_cpp)
export(observation_impute_cpp)
export(get_supported_models)
export(plot_MSEv_eval_crit)
export(plot_SV_several_approaches)
export(predict_model)
export(prepare_data)
export(prepare_data_causal)
export(prepare_data_copula_cpp)
export(prepare_data_copula_cpp_caus)
export(prepare_data_gaussian_cpp)
export(prepare_data_gaussian_cpp_caus)
export(prepare_next_iteration)
export(print_iter)
export(regression.train_model)
export(rss_cpp)
export(save_results)
export(setup)
export(setup_approach)
export(setup_computation)
export(shapley_setup)
export(testing_cleanup)
export(vaeac_get_evaluation_criteria)
Expand All @@ -107,7 +92,6 @@ export(vaeac_plot_imputed_ggpairs)
export(vaeac_train_model)
export(vaeac_train_model_continue)
export(weight_matrix)
export(weight_matrix_cpp)
importFrom(Rcpp,sourceCpp)
importFrom(data.table,":=")
importFrom(data.table,as.data.table)
Expand Down
240 changes: 105 additions & 135 deletions R/RcppExports.R

Large diffs are not rendered by default.

25 changes: 15 additions & 10 deletions R/approach.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
#' Set up the framework chosen approach
#'
#' The different choices of `approach` takes different (optional) parameters,
#' The different choices of `approach` take different (optional) parameters,
#' which are forwarded from [explain()].
#'
#' @param ... `approach`-specific arguments. See below.
#' @param ... Arguments passed to specific classes. See below
#'
#' @inheritParams default_doc_explain
#' @inheritParams default_doc_export
#'
#' @export
#' @keywords internal
#' @author Martin Jullum
setup_approach <- function(internal, ...) {
verbose <- internal$parameters$verbose

Expand All @@ -16,8 +18,6 @@ setup_approach <- function(internal, ...) {
iter <- length(internal$iter_list)
X <- internal$iter_list[[iter]]$X



needs_X <- c("regression_surrogate", "vaeac")

run_now <- (isFALSE(any(needs_X %in% approach)) && isTRUE(is.null(X))) ||
Expand Down Expand Up @@ -54,7 +54,8 @@ setup_approach <- function(internal, ...) {
}
}

#' @inheritParams default_doc
#' @inheritParams default_doc_internal
#' @rdname setup_approach
#' @export
setup_approach.combined <- function(internal, ...) {
org_approach <- internal$parameters$approach
Expand All @@ -76,10 +77,11 @@ setup_approach.combined <- function(internal, ...) {
#' @return A data.table containing simulated data used to estimate
#' the contribution function by Monte Carlo integration.
#'
#' @inheritParams default_doc_explain
#' @inheritParams default_doc_export
#'
#' @export
#' @keywords internal
#' @author Martin Jullum
prepare_data <- function(internal, index_features = NULL, ...) {
iter <- length(internal$iter_list)

Expand All @@ -93,8 +95,8 @@ prepare_data <- function(internal, index_features = NULL, ...) {

# Check if the user provided one or several approaches.
if (length(approach) > 1) {
# Picks the relevant approach from the X table which list the unique approach of the batch
# matches by index_features
# Pick the relevant approach from the X table which lists the unique approach of the batch
# matched by index_features
class(this_class) <- X[id_coalition == index_features[1], approach]
} else {
# Only one approach for all coalitions sizes
Expand All @@ -118,5 +120,8 @@ insert_defaults <- function(internal, defaults) {

#' @keywords internal
get_factor_approaches <- function() {
c("'independence' (not recommended)", "'ctree'", "'vaeac'", "'categorical'")
c(
"'independence' (not recommended)", "'ctree'", "'vaeac'",
"'categorical'", "'regression_separate'", "'regression_surrogate'"
)
}
10 changes: 5 additions & 5 deletions R/approach_categorical.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
#' `NULL` means it is estimated from the `x_train` and `x_explain`.
#'
#' @param categorical.epsilon Numeric value. (Optional)
#' If \code{joint_probability_dt} is not supplied, probabilities/frequencies are
#' If `categorical.joint_probability_dt` is not supplied, probabilities/frequencies are
#' estimated using `x_train`. If certain observations occur in `x_explain` and NOT in `x_train`,
#' then epsilon is used as the proportion of times that these observations occurs in the training data.
#' In theory, this proportion should be zero, but this causes an error later in the Shapley computation.
#'
#' @inheritParams default_doc_explain
#' @inheritParams default_doc_export
#'
#' @export
setup_approach.categorical <- function(internal,
Expand Down Expand Up @@ -96,7 +96,7 @@ setup_approach.categorical <- function(internal,
}


#' @inheritParams default_doc
#' @inheritParams default_doc_internal
#'
#' @rdname prepare_data
#' @export
Expand Down Expand Up @@ -197,10 +197,10 @@ prepare_data.categorical <- function(internal, index_features = NULL, ...) {

#' Compute the conditional probabilities for a single coalition for the categorical approach
#'
#' The [shapr::prepare_data.categorical()] function is slow when evaluated for a single coalition.
#' The [prepare_data.categorical()] function is slow when evaluated for a single coalition.
#' This is a bottleneck for Causal Shapley values which call said function a lot with single coalitions.
#'
#' @inheritParams default_doc
#' @inheritParams default_doc_internal
#'
#' @keywords internal
#' @author Lars Henry Berge Olsen
Expand Down
5 changes: 2 additions & 3 deletions R/approach_copula.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' @rdname setup_approach
#' @inheritParams default_doc_explain
#' @inheritParams default_doc_export
#' @export
#' @author Martin Jullum
setup_approach.copula <- function(internal, ...) {
Expand Down Expand Up @@ -41,7 +41,7 @@ setup_approach.copula <- function(internal, ...) {
return(internal)
}

#' @inheritParams default_doc
#' @inheritParams default_doc_internal
#' @rdname prepare_data
#' @export
#' @author Lars Henry Berge Olsen
Expand All @@ -63,7 +63,6 @@ prepare_data.copula <- function(internal, index_features, ...) {

S <- internal$iter_list[[iter]]$S[index_features, , drop = FALSE]


if (causal_sampling) {
# Casual Shapley values (either symmetric or asymmetric)

Expand Down
74 changes: 33 additions & 41 deletions R/approach_ctree.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
#' @rdname setup_approach
#'
#' @param ctree.mincriterion Numeric scalar or vector. (default = 0.95)
#' @param ctree.mincriterion Numeric scalar or vector.
#' Either a scalar or vector of length equal to the number of features in the model.
#' Value is equal to 1 - \eqn{\alpha} where \eqn{\alpha} is the nominal level of the conditional independence tests.
#' The value is equal to 1 - \eqn{\alpha} where \eqn{\alpha} is the nominal level of the conditional independence tests.
#' If it is a vector, this indicates which value to use when conditioning on various numbers of features.
#' The default value is 0.95.
#'
#' @param ctree.minsplit Numeric scalar. (default = 20)
#' @param ctree.minsplit Numeric scalar.
#' Determines minimum value that the sum of the left and right daughter nodes required for a split.
#' The default value is 20.
#'
#' @param ctree.minbucket Numeric scalar. (default = 7)
#' @param ctree.minbucket Numeric scalar.
#' Determines the minimum sum of weights in a terminal node required for a split
#' The default value is 7.
#'
#' @param ctree.sample Boolean. (default = TRUE)
#' If TRUE, then the method always samples `n_MC_samples` observations from the leaf nodes (with replacement).
#' If FALSE and the number of observations in the leaf node is less than `n_MC_samples`,
#' @param ctree.sample Boolean.
#' If `TRUE` (default), then the method always samples `n_MC_samples` observations from the leaf nodes
#' (with replacement).
#' If `FALSE` and the number of observations in the leaf node is less than `n_MC_samples`,
#' the method will take all observations in the leaf.
#' If FALSE and the number of observations in the leaf node is more than `n_MC_samples`,
#' If `FALSE` and the number of observations in the leaf node is more than `n_MC_samples`,
#' the method will sample `n_MC_samples` observations (with replacement).
#' This means that there will always be sampling in the leaf unless
#' `sample` = FALSE AND the number of obs in the node is less than `n_MC_samples`.
#' `sample = FALSE` *and* the number of obs in the node is less than `n_MC_samples`.
#'
#' @inheritParams default_doc_explain
#' @inheritParams default_doc_export
#'
#' @export
setup_approach.ctree <- function(internal,
Expand All @@ -37,7 +41,7 @@ setup_approach.ctree <- function(internal,
}


#' @inheritParams default_doc
#' @inheritParams default_doc_internal
#'
#' @rdname prepare_data
#' @export
Expand Down Expand Up @@ -106,32 +110,24 @@ prepare_data.ctree <- function(internal, index_features = NULL, ...) {
return(dt2)
}

#' Make all conditional inference trees
#' Build all the conditional inference trees
#'
#' @param given_ind Numeric value. Indicates which features are conditioned on.
#' @param given_ind Integer vector.
#' Indicates which features are conditioned on.
#'
#' @inheritParams default_doc
#'
#' @param mincriterion Numeric scalar or vector. (default = 0.95)
#' Either a scalar or vector of length equal to the number of features in the model.
#' Value is equal to 1 - \eqn{\alpha} where \eqn{\alpha} is the nominal level of the conditional independence tests.
#' If it is a vector, this indicates which value to use when conditioning on various numbers of features.
#'
#' @param minsplit Numeric scalar. (default = 20)
#' Determines minimum value that the sum of the left and right daughter nodes required for a split.
#'
#' @param minbucket Numeric scalar. (default = 7)
#' Determines the minimum sum of weights in a terminal node required for a split
#'
#' @param use_partykit String. In some semi-rare cases `partykit::ctree` runs into an error related to the LINPACK
#' used by R. To get around this problem, one may fall back to using the newer (but slower) `partykit::ctree`
#' @param use_partykit String. In some semi-rare cases [partykit::ctree()] runs into an error related to the LINPACK
#' used by R. To get around this problem, one may fall back to using the newer (but slower) [partykit::ctree()]
#' function, which is a reimplementation of the same method. Setting this parameter to `"on_error"` (default)
#' falls back to `partykit::ctree`, if `party::ctree` fails. Other options are `"never"`, which always
#' uses `party::ctree`, and `"always"`, which always uses `partykit::ctree`. A warning message is
#' created whenever `partykit::ctree` is used.
#' falls back to [partykit::ctree()], if [party::ctree()] fails. Other options are `"never"`, which always
#' uses [party::ctree()], and `"always"`, which always uses [partykit::ctree()]. A warning message is
#' created whenever [partykit::ctree()] is used.
#'
#' @inheritParams default_doc_internal
#'
#' @return List with conditional inference tree and the variables conditioned/not conditioned on.
#'
#' @details See the documentation of the [setup_approach.ctree()] function for undocumented parameters.
#'
#' @keywords internal
#' @author Annabelle Redelmeier, Martin Jullum
create_ctree <- function(given_ind,
Expand Down Expand Up @@ -201,21 +197,17 @@ create_ctree <- function(given_ind,

#' Sample ctree variables from a given conditional inference tree
#'
#'
#' @param tree List. Contains tree which is an object of type ctree built from the party package.
#' Also contains given_ind, the features to condition upon.
#'
#' @param n_MC_samples Numeric. Indicates how many samples to use for MCMC.
#'
#' @param x_explain Matrix, data.frame or data.table with the features of the observation whose
#' predictions ought to be explained (test data). Dimension `1\timesp` or `p\times1`.
#'
#' @param x_train Matrix, data.frame or data.table with training data.
#' @param n_MC_samples Scalar integer.
#' Corresponds to the number of samples from the leaf node.
#' See an exception when sample = FALSE in [setup_approach.ctree()].
#'
#' @param n_features Positive integer. The number of features.
#' @inheritParams default_doc_internal
#'
#' @param sample Boolean. True indicates that the method samples from the terminal node
#' of the tree whereas False indicates that the method takes all the observations if it is
#' less than n_MC_samples.
#' @details See the documentation of the [setup_approach.ctree()] function for undocumented parameters.
#'
#' @return data.table with `n_MC_samples` (conditional) Gaussian samples
#'
Expand Down
Loading

0 comments on commit d353276

Please sign in to comment.