From dd7f0113a185aa7b676ca6e06b4b01f7a633a9ab Mon Sep 17 00:00:00 2001 From: Nikolai Sellereite Date: Thu, 4 Jul 2019 14:56:11 +0200 Subject: [PATCH] Regenerates documentation (#60) --- man/compute_kshap.Rd | 30 ++++++++++++++------ man/feature_combinations.Rd | 9 ++++-- man/global_arguments.Rd | 53 ++++++++++++++++++++++------------- man/observation_impute.Rd | 9 ++++-- man/observation_impute_cpp.Rd | 3 +- man/predictions.Rd | 19 ++++++++----- man/prepare_kshap.Rd | 11 ++++++-- man/sample_combinations.Rd | 15 +++++----- man/sample_copula.Rd | 12 +++++--- man/sample_gaussian.Rd | 12 +++++--- man/scale_data.Rd | 3 +- 11 files changed, 114 insertions(+), 62 deletions(-) diff --git a/man/compute_kshap.Rd b/man/compute_kshap.Rd index 602993500..960bb3a3f 100644 --- a/man/compute_kshap.Rd +++ b/man/compute_kshap.Rd @@ -15,29 +15,41 @@ compute_kshap(model, l, noSamp_MC = 1000, verbose = FALSE, \item{l}{List. The output from the \code{prepare_kshap} function} -\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} +\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the +Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} \item{verbose}{Integer. How much information to print during function execution (in development)} -\item{cond_approach}{String or list. When being a list, the elements in the list refers to the rows in l$X that ought to be included in each of the approaches!} +\item{cond_approach}{String or list. When being a list, the elements in the list refers to the +rows in l$X that ought to be included in each of the approaches!} -\item{empirical_settings}{List. Specifying the settings when using the empirical method to compute the conditional expectations.} +\item{empirical_settings}{List. Specifying the settings when using the empirical method to +compute the conditional expectations.} -\item{pred_zero}{The prediction value for unseen data, typically equal to the mean of the response} +\item{pred_zero}{The prediction value for unseen data, typically equal to the mean of the +response} -\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} +\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. +NULL means it is estimated from the data if needed (in the Gaussian approach).} -\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} +\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating +distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} -\item{ensure_condcov_symmetry}{Logical. Whether to ensure that the conditional covariance matrices in the Gaussian and copula approaches are symmetric. -Typically only needed if the original covariance is just barely positive definite.} +\item{ensure_condcov_symmetry}{Logical. Whether to ensure that the conditional covariance +matrices in the Gaussian and copula approaches are symmetric. Typically only needed if the +original covariance is just barely positive definite.} } \value{ -List with kernel SHAP values (\code{Kshap}) and other object used to perform the computation (helpful for debugging etc.) +List with kernel SHAP values (\code{Kshap}) and other object used to perform +the computation (helpful for debugging etc.) } \description{ Computes kernel SHAP values for test data } +\details{ +If \code{cond_approach} is a list, the elements in the list refers to the rows in +\code{l$X} that ought to be included in each of the approaches! +} \author{ Martin Jullum } diff --git a/man/feature_combinations.Rd b/man/feature_combinations.Rd index dccc6c5c8..e0c5837e1 100644 --- a/man/feature_combinations.Rd +++ b/man/feature_combinations.Rd @@ -10,14 +10,17 @@ feature_combinations(m, exact = TRUE, noSamp = 200, \arguments{ \item{m}{Integer. Total number of features} -\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a sampling approach to approximate the sum} +\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a +sampling approach to approximate the sum} \item{noSamp}{Integer. How many samples to use when approximating the sum in the Shapley formula (previously called \code{nrows})} -\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.} +\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full +conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.} -\item{reduce_dim}{Logical. Indicating whether to reduce the dimension of the weighted least squares problem by merging identical columns and adjusting their weights.} +\item{reduce_dim}{Logical. Indicating whether to reduce the dimension of the weighted least +squares problem by merging identical columns and adjusting their weights.} } \value{ data.table diff --git a/man/global_arguments.Rd b/man/global_arguments.Rd index a3281c3ac..05430e11c 100644 --- a/man/global_arguments.Rd +++ b/man/global_arguments.Rd @@ -18,13 +18,15 @@ global_arguments(m, N, s, Xtrain, Xtest, nsamples, features, exact, sigma, \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data} -\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)} +\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to +be explained (test data)} \item{nsamples}{Integer. Number of samples} \item{features}{List.} -\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a sampling approach to approximate the sum} +\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a +sampling approach to approximate the sum} \item{sigma}{Numeric} @@ -42,17 +44,20 @@ global_arguments(m, N, s, Xtrain, Xtest, nsamples, features, exact, sigma, \item{I}{Matrix} -\item{cond_approach}{String or list. When being a list, the elements in the list refers to the rows in l$X that ought to be included in each of the approaches!} +\item{cond_approach}{String or list. When being a list, the elements in the list refers to the +rows in l$X that ought to be included in each of the approaches!} \item{p_default}{Numeric} -\item{distance_metric}{String indicating which distance metric should be used in the empirical conditional -distribution. Defaults to "Euclidean", "Mahalanobis" and "Mahalanobis_scaled" being the other options. "Mahalanobis_scaled" includes -the 1/|S| factor in the paper is preferred for a consistent \eqn{\sigma}.} +\item{distance_metric}{String indicating which distance metric should be used in the empirical +conditional distribution. Defaults to "Euclidean", "Mahalanobis" and "Mahalanobis_scaled" being +the other options. "Mahalanobis_scaled" includes the 1/|S| factor in the paper is preferred for +a consistent \eqn{\sigma}.} -\item{kernel_metric}{String indicating which kernel metric should be used in the empirical conditional distribution. -Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence" (imputing independently, ignoring any distance) being the second option -"Gaussian_old" [\eqn{\sqrt(\exp(-D/2\sigma))}] is also kept for reproducibility.} +\item{kernel_metric}{String indicating which kernel metric should be used in the empirical +conditional distribution. Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence" +(imputing independently, ignoring any distance) being the second option "Gaussian_old" +[\eqn{\sqrt(\exp(-D/2\sigma))}] is also kept for reproducibility.} \item{Xtrain_mat}{Matrix with the features from the training data} @@ -61,27 +66,35 @@ Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence" (imputing i \item{noSamp}{Integer. How many samples to use when approximating the sum in the Shapley formula (previously called \code{nrows})} -\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.} +\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full +conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.} -\item{reduce_dim}{Logical. Indicating whether to reduce the dimension of the weighted least squares problem by merging identical columns and adjusting their weights.} +\item{reduce_dim}{Logical. Indicating whether to reduce the dimension of the weighted least +squares problem by merging identical columns and adjusting their weights.} \item{l}{List. The output from the \code{prepare_kshap} function} -\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} +\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the +Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} -\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} +\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. +NULL means it is estimated from the data if needed (in the Gaussian approach).} -\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} +\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating +distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} \item{verbose}{Logical} -\item{cond_approach}{Either a string indicating which method should be used to estimate all conditional expectations. -Defaults to "empirical_fixed_sigma", with "empirical_AICc_full", "empirical_AICc_each_k","Gaussian" and "copula" being other alternatives. One can also supply a named list where the names -are one or more of the implemented methods, and the named lists contains one vector each, each containing the row numbers of the S-matrix -computed using \code{prepare_kshap} that whose corresponding conditional expectations should be computed with that method. Any number not -specified is computed with the default empirical method.} +\item{cond_approach}{Either a string indicating which method should be used to estimate all +conditional expectations. Defaults to "empirical_fixed_sigma", with "empirical_AICc_full", +"empirical_AICc_each_k","Gaussian" and "copula" being other alternatives. One can also supply a +named list where the names are one or more of the implemented methods, and the named lists +contains one vector each, each containing the row numbers of the S-matrix computed using +\code{prepare_kshap} that whose corresponding conditional expectations should be computed with +that method. Any number not specified is computed with the default empirical method.} -\item{W_kernel}{Array. Contains all nonscaled weights between training and testing observations for all combinations.} +\item{W_kernel}{Array. Contains all nonscaled weights between training and testing observations +for all combinations.} \item{Xtest_Gauss_trans}{Vector with the Gaussian transformed test observations} diff --git a/man/observation_impute.Rd b/man/observation_impute.Rd index d2d15ef73..2cfbb1ce6 100644 --- a/man/observation_impute.Rd +++ b/man/observation_impute.Rd @@ -8,15 +8,18 @@ observation_impute(W_kernel, S, Xtrain, Xtest, w_threshold = 0.7, noSamp_MC = 1000) } \arguments{ -\item{W_kernel}{Array. Contains all nonscaled weights between training and testing observations for all combinations.} +\item{W_kernel}{Array. Contains all nonscaled weights between training and testing observations +for all combinations.} \item{S}{Matrix} \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data} -\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)} +\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to +be explained (test data)} -\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} +\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the +Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} } \value{ List diff --git a/man/observation_impute_cpp.Rd b/man/observation_impute_cpp.Rd index c44a50888..8c38b0b16 100644 --- a/man/observation_impute_cpp.Rd +++ b/man/observation_impute_cpp.Rd @@ -13,7 +13,8 @@ observation_impute_cpp(ID, Comb, Xtrain, Xtest, S) \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data} -\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)} +\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to +be explained (test data)} \item{S}{Matrix} } diff --git a/man/predictions.Rd b/man/predictions.Rd index 6834803af..9065c22cd 100644 --- a/man/predictions.Rd +++ b/man/predictions.Rd @@ -16,17 +16,20 @@ predictions(model, D, h_optim_vec, kernel_metric, S, Xtrain, Xtest, \item{D}{Matrix} -\item{kernel_metric}{String indicating which kernel metric should be used in the empirical conditional distribution. -Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence" (imputing independently, ignoring any distance) being the second option -"Gaussian_old" [\eqn{\sqrt(\exp(-D/2\sigma))}] is also kept for reproducibility.} +\item{kernel_metric}{String indicating which kernel metric should be used in the empirical +conditional distribution. Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence" +(imputing independently, ignoring any distance) being the second option "Gaussian_old" +[\eqn{\sqrt(\exp(-D/2\sigma))}] is also kept for reproducibility.} \item{S}{Matrix} \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data} -\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)} +\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to +be explained (test data)} -\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} +\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the +Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} \item{verbose}{Integer. How much information to print during function execution (in development)} @@ -34,9 +37,11 @@ Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence" (imputing i \item{pred_zero}{Numeric} -\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} +\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. +NULL means it is estimated from the data if needed (in the Gaussian approach).} -\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} +\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating +distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} \item{Xtest_Gauss_trans}{Vector with the Gaussian transformed test observations} } diff --git a/man/prepare_kshap.Rd b/man/prepare_kshap.Rd index 182138e80..47d44a41d 100644 --- a/man/prepare_kshap.Rd +++ b/man/prepare_kshap.Rd @@ -11,14 +11,19 @@ prepare_kshap(Xtrain, Xtest, exact = TRUE, noSamp = NULL, \arguments{ \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data} -\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)} +\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to +be explained (test data)} -\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a sampling approach to approximate the sum} +\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a +sampling approach to approximate the sum} \item{noSamp}{Integer. How many samples to use when approximating the sum in the Shapley formula (previously called \code{nrows})} -\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.} +\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full +conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.} + +\item{compute_distances_for_no_var}{If equal to \code{NULL} no distances are computed} } \value{ Matrix diff --git a/man/sample_combinations.Rd b/man/sample_combinations.Rd index cb1a32794..e41b344c7 100644 --- a/man/sample_combinations.Rd +++ b/man/sample_combinations.Rd @@ -2,23 +2,24 @@ % Please edit documentation in R/extra.R \name{sample_combinations} \alias{sample_combinations} -\title{Helper function to sample a combination of training and testing rows, which does not risk getting the same observation twice. -Need to improve this help file.} +\title{Helper function to sample a combination of training and testing rows, which does not risk +getting the same observation twice. Need to improve this help file.} \usage{ sample_combinations(nTrain, nTest, nosamp, separate = F) } \arguments{ -\item{separate}{Logical indicating whether the train and test data should be sampled separately or in a joint sampling space. -If they are sampled separately (which typically would be used when optimizing more than one distribution at once) we sample with -replacement if more samples than training data. Not optimal, but for now fine if careful when using more samples than the number +\item{separate}{Logical indicating whether the train and test data should be sampled separately +or in a joint sampling space. If they are sampled separately (which typically would be used when +optimizing more than one distribution at once) we sample with replacement if more samples than +training data. Not optimal, but for now fine if careful when using more samples than the number training observations while at the same time doing optimization over every test observation.} } \value{ Numeric } \description{ -Helper function to sample a combination of training and testing rows, which does not risk getting the same observation twice. -Need to improve this help file. +Helper function to sample a combination of training and testing rows, which does not risk +getting the same observation twice. Need to improve this help file. } \author{ Martin Jullum diff --git a/man/sample_copula.Rd b/man/sample_copula.Rd index aa2273233..fce50fe42 100644 --- a/man/sample_copula.Rd +++ b/man/sample_copula.Rd @@ -10,11 +10,14 @@ sample_copula(given_ind, noSamp_MC, mu, Sigma, p, Xtest_Gauss_trans, \arguments{ \item{given_ind}{Vector} -\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} +\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the +Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} -\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} +\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. +NULL means it is estimated from the data if needed (in the Gaussian approach).} -\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} +\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating +distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} \item{p}{Positive integer} @@ -22,7 +25,8 @@ sample_copula(given_ind, noSamp_MC, mu, Sigma, p, Xtest_Gauss_trans, \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data} -\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)} +\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to +be explained (test data)} } \value{ data.table with \code{noSamp_MC} (conditional) Gaussian samples diff --git a/man/sample_gaussian.Rd b/man/sample_gaussian.Rd index 1cd5dc186..2add2ad5a 100644 --- a/man/sample_gaussian.Rd +++ b/man/sample_gaussian.Rd @@ -10,15 +10,19 @@ sample_gaussian(given_ind, noSamp_MC, mu, Sigma, p, Xtest, \arguments{ \item{given_ind}{Vector} -\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} +\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the +Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})} -\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} +\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. +NULL means it is estimated from the data if needed (in the Gaussian approach).} -\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} +\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating +distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).} \item{p}{Positive integer} -\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)} +\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to +be explained (test data)} } \value{ data.table with \code{noSamp_MC} (conditional) Gaussian samples diff --git a/man/scale_data.Rd b/man/scale_data.Rd index 3580a58cd..949f8aa51 100644 --- a/man/scale_data.Rd +++ b/man/scale_data.Rd @@ -9,7 +9,8 @@ scale_data(Xtrain, Xtest, scale = TRUE) \arguments{ \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data} -\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)} +\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to +be explained (test data)} \item{scale}{Logical} }