Function and documentation cleanup (#427)

NorskRegnesentral · Dec 19, 2024 · d353276 · d353276
1 parent f89ead4
commit d353276
Show file tree

Hide file tree

Showing 233 changed files with 2,039 additions and 2,786 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -22,13 +22,16 @@ inst/compare_lundberg\.xgb\.obj
 ^CRAN-SUBMISSION$
 ^.Rprofile
 ^python$
-^rebuild-long-running-vignette\.R$
-^vignettes/understanding_shapr_vaeac\.Rmd\.orig$
+^rebuild_long_running_vignette\.R$
 ^vignettes/understanding_shapr\.Rmd\.orig$
+^vignettes/understanding_shapr_vaeac\.Rmd\.orig$
 ^vignettes/understanding_shapr_regression\.Rmd\.orig$
+^vignettes/understanding_shapr_asymmetric_causal\.Rmd\.orig$
 ^vignettes/figure_main/*$
 ^vignettes/cache_main/*$
 ^vignettes/figure_vaeac/*$
 ^vignettes/cache_vaeac/*$
 ^vignettes/figure_regression/*$
 ^vignettes/cache_regression/*$
+^vignettes/figure_asymmetric_causal/*$
+^vignettes/cache_asymmetric_causal/*$
diff --git a/NAMESPACE b/NAMESPACE
@@ -53,51 +53,36 @@ S3method(setup_approach,regression_surrogate)
 S3method(setup_approach,timeseries)
 S3method(setup_approach,vaeac)
 export(additional_regression_setup)
-export(aicc_full_single_cpp)
 export(append_vS_list)
 export(check_convergence)
 export(cli_compute_vS)
 export(cli_iter)
 export(cli_startup)
 export(coalition_matrix_cpp)
 export(compute_estimates)
-export(compute_shapley_new)
+export(compute_shapley)
 export(compute_time)
 export(compute_vS)
-export(correction_matrix_cpp)
-export(create_coalition_table)
 export(explain)
 export(explain_forecast)
 export(finalize_explanation)
-export(finalize_explanation_forecast)
-export(get_cov_mat)
-export(get_data_specs)
-export(get_extra_est_args_default)
+export(get_extra_comp_args_default)
 export(get_iterative_args_default)
 export(get_model_specs)
-export(get_mu_vec)
 export(get_output_args_default)
 export(get_supported_approaches)
-export(hat_matrix_cpp)
-export(mahalanobis_distance_cpp)
-export(observation_impute_cpp)
+export(get_supported_models)
 export(plot_MSEv_eval_crit)
 export(plot_SV_several_approaches)
 export(predict_model)
 export(prepare_data)
 export(prepare_data_causal)
-export(prepare_data_copula_cpp)
-export(prepare_data_copula_cpp_caus)
-export(prepare_data_gaussian_cpp)
-export(prepare_data_gaussian_cpp_caus)
 export(prepare_next_iteration)
 export(print_iter)
 export(regression.train_model)
-export(rss_cpp)
 export(save_results)
 export(setup)
 export(setup_approach)
-export(setup_computation)
 export(shapley_setup)
 export(testing_cleanup)
 export(vaeac_get_evaluation_criteria)
@@ -107,7 +92,6 @@ export(vaeac_plot_imputed_ggpairs)
 export(vaeac_train_model)
 export(vaeac_train_model_continue)
 export(weight_matrix)
-export(weight_matrix_cpp)
 importFrom(Rcpp,sourceCpp)
 importFrom(data.table,":=")
 importFrom(data.table,as.data.table)

diff --git a/R/RcppExports.R b/R/RcppExports.R
diff --git a/R/approach.R b/R/approach.R
@@ -1,13 +1,15 @@
 #' Set up the framework chosen approach
 #'
-#' The different choices of `approach` takes different (optional) parameters,
+#' The different choices of `approach` take different (optional) parameters,
 #' which are forwarded from [explain()].
 #'
-#' @param ... `approach`-specific arguments. See below.
+#' @param ... Arguments passed to specific classes. See below
 #'
-#' @inheritParams default_doc_explain
+#' @inheritParams default_doc_export
 #'
 #' @export
+#' @keywords internal
+#' @author Martin Jullum
 setup_approach <- function(internal, ...) {
   verbose <- internal$parameters$verbose
 
@@ -16,8 +18,6 @@ setup_approach <- function(internal, ...) {
   iter <- length(internal$iter_list)
   X <- internal$iter_list[[iter]]$X
 
-
-
   needs_X <- c("regression_surrogate", "vaeac")
 
   run_now <- (isFALSE(any(needs_X %in% approach)) && isTRUE(is.null(X))) ||
@@ -54,7 +54,8 @@ setup_approach <- function(internal, ...) {
   }
 }
 
-#' @inheritParams default_doc
+#' @inheritParams default_doc_internal
+#' @rdname setup_approach
 #' @export
 setup_approach.combined <- function(internal, ...) {
   org_approach <- internal$parameters$approach
@@ -76,10 +77,11 @@ setup_approach.combined <- function(internal, ...) {
 #' @return A data.table containing simulated data used to estimate
 #' the contribution function by Monte Carlo integration.
 #'
-#' @inheritParams default_doc_explain
+#' @inheritParams default_doc_export
 #'
 #' @export
 #' @keywords internal
+#' @author Martin Jullum
 prepare_data <- function(internal, index_features = NULL, ...) {
   iter <- length(internal$iter_list)
 
@@ -93,8 +95,8 @@ prepare_data <- function(internal, index_features = NULL, ...) {
 
   # Check if the user provided one or several approaches.
   if (length(approach) > 1) {
-    # Picks the relevant approach from the X table which list the unique approach of the batch
-    # matches by index_features
+    # Pick the relevant approach from the X table which lists the unique approach of the batch
+    # matched by index_features
     class(this_class) <- X[id_coalition == index_features[1], approach]
   } else {
     # Only one approach for all coalitions sizes
@@ -118,5 +120,8 @@ insert_defaults <- function(internal, defaults) {
 
 #' @keywords internal
 get_factor_approaches <- function() {
-  c("'independence' (not recommended)", "'ctree'", "'vaeac'", "'categorical'")
+  c(
+    "'independence' (not recommended)", "'ctree'", "'vaeac'",
+    "'categorical'", "'regression_separate'", "'regression_surrogate'"
+  )
 }
diff --git a/R/approach_categorical.R b/R/approach_categorical.R
@@ -6,12 +6,12 @@
 #' `NULL` means it is estimated from the `x_train` and `x_explain`.
 #'
 #' @param categorical.epsilon Numeric value. (Optional)
-#' If \code{joint_probability_dt} is not supplied, probabilities/frequencies are
+#' If `categorical.joint_probability_dt` is not supplied, probabilities/frequencies are
 #' estimated using `x_train`. If certain observations occur in `x_explain` and NOT in `x_train`,
 #' then epsilon is used as the proportion of times that these observations occurs in the training data.
 #' In theory, this proportion should be zero, but this causes an error later in the Shapley computation.
 #'
-#' @inheritParams default_doc_explain
+#' @inheritParams default_doc_export
 #'
 #' @export
 setup_approach.categorical <- function(internal,
@@ -96,7 +96,7 @@ setup_approach.categorical <- function(internal,
 }
 
 
-#' @inheritParams default_doc
+#' @inheritParams default_doc_internal
 #'
 #' @rdname prepare_data
 #' @export
@@ -197,10 +197,10 @@ prepare_data.categorical <- function(internal, index_features = NULL, ...) {
 
 #' Compute the conditional probabilities for a single coalition for the categorical approach
 #'
-#' The [shapr::prepare_data.categorical()] function is slow when evaluated for a single coalition.
+#' The [prepare_data.categorical()] function is slow when evaluated for a single coalition.
 #' This is a bottleneck for Causal Shapley values which call said function a lot with single coalitions.
 #'
-#' @inheritParams default_doc
+#' @inheritParams default_doc_internal
 #'
 #' @keywords internal
 #' @author Lars Henry Berge Olsen

diff --git a/R/approach_copula.R b/R/approach_copula.R
@@ -1,5 +1,5 @@
 #' @rdname setup_approach
-#' @inheritParams default_doc_explain
+#' @inheritParams default_doc_export
 #' @export
 #' @author Martin Jullum
 setup_approach.copula <- function(internal, ...) {
@@ -41,7 +41,7 @@ setup_approach.copula <- function(internal, ...) {
   return(internal)
 }
 
-#' @inheritParams default_doc
+#' @inheritParams default_doc_internal
 #' @rdname prepare_data
 #' @export
 #' @author Lars Henry Berge Olsen
@@ -63,7 +63,6 @@ prepare_data.copula <- function(internal, index_features, ...) {
 
   S <- internal$iter_list[[iter]]$S[index_features, , drop = FALSE]
 
-
   if (causal_sampling) {
     # Casual Shapley values (either symmetric or asymmetric)
 

diff --git a/R/approach_ctree.R b/R/approach_ctree.R
@@ -1,26 +1,30 @@
 #' @rdname setup_approach
 #'
-#' @param ctree.mincriterion Numeric scalar or vector. (default = 0.95)
+#' @param ctree.mincriterion Numeric scalar or vector.
 #' Either a scalar or vector of length equal to the number of features in the model.
-#' Value is equal to 1 - \eqn{\alpha} where \eqn{\alpha} is the nominal level of the conditional independence tests.
+#' The value is equal to 1 - \eqn{\alpha} where \eqn{\alpha} is the nominal level of the conditional independence tests.
 #' If it is a vector, this indicates which value to use when conditioning on various numbers of features.
+#' The default value is 0.95.
 #'
-#' @param ctree.minsplit Numeric scalar. (default = 20)
+#' @param ctree.minsplit Numeric scalar.
 #' Determines minimum value that the sum of the left and right daughter nodes required for a split.
+#' The default value is 20.
 #'
-#' @param ctree.minbucket Numeric scalar. (default = 7)
+#' @param ctree.minbucket Numeric scalar.
 #' Determines the minimum sum of weights in a terminal node required for a split
+#' The default value is 7.
 #'
-#' @param ctree.sample Boolean. (default = TRUE)
-#' If TRUE, then the method always samples `n_MC_samples` observations from the leaf nodes (with replacement).
-#' If FALSE and the number of observations in the leaf node is less than `n_MC_samples`,
+#' @param ctree.sample Boolean.
+#' If `TRUE` (default), then the method always samples `n_MC_samples` observations from the leaf nodes
+#' (with replacement).
+#' If `FALSE` and the number of observations in the leaf node is less than `n_MC_samples`,
 #' the method will take all observations in the leaf.
-#' If FALSE and the number of observations in the leaf node is more than `n_MC_samples`,
+#' If `FALSE` and the number of observations in the leaf node is more than `n_MC_samples`,
 #' the method will sample `n_MC_samples` observations (with replacement).
 #' This means that there will always be sampling in the leaf unless
-#' `sample` = FALSE AND the number of obs in the node is less than `n_MC_samples`.
+#' `sample = FALSE` *and* the number of obs in the node is less than `n_MC_samples`.
 #'
-#' @inheritParams default_doc_explain
+#' @inheritParams default_doc_export
 #'
 #' @export
 setup_approach.ctree <- function(internal,
@@ -37,7 +41,7 @@ setup_approach.ctree <- function(internal,
 }
 
 
-#' @inheritParams default_doc
+#' @inheritParams default_doc_internal
 #'
 #' @rdname prepare_data
 #' @export
@@ -106,32 +110,24 @@ prepare_data.ctree <- function(internal, index_features = NULL, ...) {
   return(dt2)
 }
 
-#' Make all conditional inference trees
+#' Build all the conditional inference trees
 #'
-#' @param given_ind Numeric value. Indicates which features are conditioned on.
+#' @param given_ind Integer vector.
+#' Indicates which features are conditioned on.
 #'
-#' @inheritParams default_doc
-#'
-#' @param mincriterion Numeric scalar or vector. (default = 0.95)
-#' Either a scalar or vector of length equal to the number of features in the model.
-#' Value is equal to 1 - \eqn{\alpha} where \eqn{\alpha} is the nominal level of the conditional independence tests.
-#' If it is a vector, this indicates which value to use when conditioning on various numbers of features.
-#'
-#' @param minsplit Numeric scalar. (default = 20)
-#' Determines minimum value that the sum of the left and right daughter nodes required for a split.
-#'
-#' @param minbucket Numeric scalar. (default = 7)
-#' Determines the minimum sum of weights in a terminal node required for a split
-#'
-#' @param use_partykit String. In some semi-rare cases `partykit::ctree` runs into an error related to the LINPACK
-#' used by R. To get around this problem, one may fall back to using the newer (but slower) `partykit::ctree`
+#' @param use_partykit String. In some semi-rare cases [partykit::ctree()] runs into an error related to the LINPACK
+#' used by R. To get around this problem, one may fall back to using the newer (but slower) [partykit::ctree()]
 #' function, which is a reimplementation of the same method. Setting this parameter to `"on_error"` (default)
-#' falls back to  `partykit::ctree`, if `party::ctree` fails. Other options are `"never"`, which always
-#' uses `party::ctree`, and `"always"`, which always uses `partykit::ctree`. A warning message is
-#' created whenever `partykit::ctree` is used.
+#' falls back to  [partykit::ctree()], if [party::ctree()] fails. Other options are `"never"`, which always
+#' uses [party::ctree()], and `"always"`, which always uses [partykit::ctree()]. A warning message is
+#' created whenever [partykit::ctree()] is used.
+#'
+#' @inheritParams default_doc_internal
 #'
 #' @return List with conditional inference tree and the variables conditioned/not conditioned on.
 #'
+#' @details See the documentation of the [setup_approach.ctree()] function for undocumented parameters.
+#'
 #' @keywords internal
 #' @author Annabelle Redelmeier, Martin Jullum
 create_ctree <- function(given_ind,
@@ -201,21 +197,17 @@ create_ctree <- function(given_ind,
 
 #' Sample ctree variables from a given conditional inference tree
 #'
+#'
 #' @param tree List. Contains tree which is an object of type ctree built from the party package.
 #' Also contains given_ind, the features to condition upon.
 #'
-#' @param n_MC_samples Numeric. Indicates how many samples to use for MCMC.
-#'
-#' @param x_explain Matrix, data.frame or data.table with the features of the observation whose
-#' predictions ought to be explained (test data). Dimension `1\timesp` or `p\times1`.
-#'
-#' @param x_train Matrix, data.frame or data.table with training data.
+#' @param n_MC_samples Scalar integer.
+#' Corresponds to the number of samples from the leaf node.
+#' See an exception when sample = FALSE in [setup_approach.ctree()].
 #'
-#' @param n_features Positive integer. The number of features.
+#' @inheritParams default_doc_internal
 #'
-#' @param sample Boolean. True indicates that the method samples from the terminal node
-#' of the tree whereas False indicates that the method takes all the observations if it is
-#' less than n_MC_samples.
+#' @details See the documentation of the [setup_approach.ctree()] function for undocumented parameters.
 #'
 #' @return data.table with `n_MC_samples` (conditional) Gaussian samples
 #'