From dd7f0113a185aa7b676ca6e06b4b01f7a633a9ab Mon Sep 17 00:00:00 2001
From: Nikolai Sellereite <n_sellereite@hotmail.com>
Date: Thu, 4 Jul 2019 14:56:11 +0200
Subject: [PATCH] Regenerates documentation (#60)

---
 man/compute_kshap.Rd          | 30 ++++++++++++++------
 man/feature_combinations.Rd   |  9 ++++--
 man/global_arguments.Rd       | 53 ++++++++++++++++++++++-------------
 man/observation_impute.Rd     |  9 ++++--
 man/observation_impute_cpp.Rd |  3 +-
 man/predictions.Rd            | 19 ++++++++-----
 man/prepare_kshap.Rd          | 11 ++++++--
 man/sample_combinations.Rd    | 15 +++++-----
 man/sample_copula.Rd          | 12 +++++---
 man/sample_gaussian.Rd        | 12 +++++---
 man/scale_data.Rd             |  3 +-
 11 files changed, 114 insertions(+), 62 deletions(-)

diff --git a/man/compute_kshap.Rd b/man/compute_kshap.Rd
index 602993500..960bb3a3f 100644
--- a/man/compute_kshap.Rd
+++ b/man/compute_kshap.Rd
@@ -15,29 +15,41 @@ compute_kshap(model, l, noSamp_MC = 1000, verbose = FALSE,
 
 \item{l}{List. The output from the \code{prepare_kshap} function}
 
-\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
+\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the
+Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
 
 \item{verbose}{Integer. How much information to print during function execution (in development)}
 
-\item{cond_approach}{String or list. When being a list, the elements in the list refers to the rows in l$X that ought to be included in each of the approaches!}
+\item{cond_approach}{String or list. When being a list, the elements in the list refers to the
+rows in l$X that ought to be included in each of the approaches!}
 
-\item{empirical_settings}{List. Specifying the settings when using the empirical method to compute the conditional expectations.}
+\item{empirical_settings}{List. Specifying the settings when using the empirical method to
+compute the conditional expectations.}
 
-\item{pred_zero}{The prediction value for unseen data, typically equal to the mean of the response}
+\item{pred_zero}{The prediction value for unseen data, typically equal to the mean of the
+response}
 
-\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
+\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution.
+NULL means it is estimated from the data if needed (in the Gaussian approach).}
 
-\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
+\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating
+distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
 
-\item{ensure_condcov_symmetry}{Logical. Whether to ensure that the conditional covariance matrices in the Gaussian and copula approaches are symmetric.
-Typically only needed if the original covariance is just barely positive definite.}
+\item{ensure_condcov_symmetry}{Logical. Whether to ensure that the conditional covariance
+matrices in the Gaussian and copula approaches are symmetric. Typically only needed if the
+original covariance is just barely positive definite.}
 }
 \value{
-List with kernel SHAP values (\code{Kshap}) and other object used to perform the computation (helpful for debugging etc.)
+List with kernel SHAP values (\code{Kshap}) and other object used to perform
+the computation (helpful for debugging etc.)
 }
 \description{
 Computes kernel SHAP values for test data
 }
+\details{
+If \code{cond_approach} is a list, the elements in the list refers to the rows in
+\code{l$X} that ought to be included in each of the approaches!
+}
 \author{
 Martin Jullum
 }
diff --git a/man/feature_combinations.Rd b/man/feature_combinations.Rd
index dccc6c5c8..e0c5837e1 100644
--- a/man/feature_combinations.Rd
+++ b/man/feature_combinations.Rd
@@ -10,14 +10,17 @@ feature_combinations(m, exact = TRUE, noSamp = 200,
 \arguments{
 \item{m}{Integer. Total number of features}
 
-\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a sampling approach to approximate the sum}
+\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a
+sampling approach to approximate the sum}
 
 \item{noSamp}{Integer. How many samples to use when approximating the sum in the Shapley formula
 (previously called \code{nrows})}
 
-\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.}
+\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full
+conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.}
 
-\item{reduce_dim}{Logical. Indicating whether to reduce the dimension of the weighted least squares problem by merging identical columns and adjusting their weights.}
+\item{reduce_dim}{Logical. Indicating whether to reduce the dimension of the weighted least
+squares problem by merging identical columns and adjusting their weights.}
 }
 \value{
 data.table
diff --git a/man/global_arguments.Rd b/man/global_arguments.Rd
index a3281c3ac..05430e11c 100644
--- a/man/global_arguments.Rd
+++ b/man/global_arguments.Rd
@@ -18,13 +18,15 @@ global_arguments(m, N, s, Xtrain, Xtest, nsamples, features, exact, sigma,
 
 \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data}
 
-\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)}
+\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to
+be explained (test data)}
 
 \item{nsamples}{Integer. Number of samples}
 
 \item{features}{List.}
 
-\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a sampling approach to approximate the sum}
+\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a
+sampling approach to approximate the sum}
 
 \item{sigma}{Numeric}
 
@@ -42,17 +44,20 @@ global_arguments(m, N, s, Xtrain, Xtest, nsamples, features, exact, sigma,
 
 \item{I}{Matrix}
 
-\item{cond_approach}{String or list. When being a list, the elements in the list refers to the rows in l$X that ought to be included in each of the approaches!}
+\item{cond_approach}{String or list. When being a list, the elements in the list refers to the
+rows in l$X that ought to be included in each of the approaches!}
 
 \item{p_default}{Numeric}
 
-\item{distance_metric}{String indicating which distance metric should be used in the empirical conditional
-distribution. Defaults to "Euclidean", "Mahalanobis" and "Mahalanobis_scaled" being the other options. "Mahalanobis_scaled" includes
-the 1/|S| factor in the paper is preferred for a consistent \eqn{\sigma}.}
+\item{distance_metric}{String indicating which distance metric should be used in the empirical
+conditional distribution. Defaults to "Euclidean", "Mahalanobis" and "Mahalanobis_scaled" being
+the other options. "Mahalanobis_scaled" includes the 1/|S| factor in the paper is preferred for
+a consistent \eqn{\sigma}.}
 
-\item{kernel_metric}{String indicating which kernel metric should be used in the empirical conditional distribution.
-Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence" (imputing independently, ignoring any distance) being the second option
-"Gaussian_old" [\eqn{\sqrt(\exp(-D/2\sigma))}] is also kept for reproducibility.}
+\item{kernel_metric}{String indicating which kernel metric should be used in the empirical
+conditional distribution. Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence"
+(imputing independently, ignoring any distance) being the second option "Gaussian_old"
+[\eqn{\sqrt(\exp(-D/2\sigma))}] is also kept for reproducibility.}
 
 \item{Xtrain_mat}{Matrix with the features from the training data}
 
@@ -61,27 +66,35 @@ Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence" (imputing i
 \item{noSamp}{Integer. How many samples to use when approximating the sum in the Shapley formula
 (previously called \code{nrows})}
 
-\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.}
+\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full
+conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.}
 
-\item{reduce_dim}{Logical. Indicating whether to reduce the dimension of the weighted least squares problem by merging identical columns and adjusting their weights.}
+\item{reduce_dim}{Logical. Indicating whether to reduce the dimension of the weighted least
+squares problem by merging identical columns and adjusting their weights.}
 
 \item{l}{List. The output from the \code{prepare_kshap} function}
 
-\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
+\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the
+Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
 
-\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
+\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution.
+NULL means it is estimated from the data if needed (in the Gaussian approach).}
 
-\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
+\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating
+distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
 
 \item{verbose}{Logical}
 
-\item{cond_approach}{Either a string indicating which method should be used to estimate all conditional expectations.
-Defaults to "empirical_fixed_sigma", with "empirical_AICc_full", "empirical_AICc_each_k","Gaussian" and "copula" being other alternatives. One can also supply a named list where the names
-are one or more of the implemented methods, and the named lists contains one vector each, each containing the row numbers of the S-matrix
-computed using \code{prepare_kshap} that whose corresponding conditional expectations should be computed with that method. Any number not
-specified is computed with the default empirical method.}
+\item{cond_approach}{Either a string indicating which method should be used to estimate all
+conditional expectations. Defaults to "empirical_fixed_sigma", with "empirical_AICc_full",
+"empirical_AICc_each_k","Gaussian" and "copula" being other alternatives. One can also supply a
+named list where the names are one or more of the implemented methods, and the named lists
+contains one vector each, each containing the row numbers of the S-matrix computed using
+\code{prepare_kshap} that whose corresponding conditional expectations should be computed with
+that method. Any number not specified is computed with the default empirical method.}
 
-\item{W_kernel}{Array. Contains all nonscaled weights between training and testing observations for all combinations.}
+\item{W_kernel}{Array. Contains all nonscaled weights between training and testing observations
+for all combinations.}
 
 \item{Xtest_Gauss_trans}{Vector with the Gaussian transformed test observations}
 
diff --git a/man/observation_impute.Rd b/man/observation_impute.Rd
index d2d15ef73..2cfbb1ce6 100644
--- a/man/observation_impute.Rd
+++ b/man/observation_impute.Rd
@@ -8,15 +8,18 @@ observation_impute(W_kernel, S, Xtrain, Xtest, w_threshold = 0.7,
   noSamp_MC = 1000)
 }
 \arguments{
-\item{W_kernel}{Array. Contains all nonscaled weights between training and testing observations for all combinations.}
+\item{W_kernel}{Array. Contains all nonscaled weights between training and testing observations
+for all combinations.}
 
 \item{S}{Matrix}
 
 \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data}
 
-\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)}
+\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to
+be explained (test data)}
 
-\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
+\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the
+Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
 }
 \value{
 List
diff --git a/man/observation_impute_cpp.Rd b/man/observation_impute_cpp.Rd
index c44a50888..8c38b0b16 100644
--- a/man/observation_impute_cpp.Rd
+++ b/man/observation_impute_cpp.Rd
@@ -13,7 +13,8 @@ observation_impute_cpp(ID, Comb, Xtrain, Xtest, S)
 
 \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data}
 
-\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)}
+\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to
+be explained (test data)}
 
 \item{S}{Matrix}
 }
diff --git a/man/predictions.Rd b/man/predictions.Rd
index 6834803af..9065c22cd 100644
--- a/man/predictions.Rd
+++ b/man/predictions.Rd
@@ -16,17 +16,20 @@ predictions(model, D, h_optim_vec, kernel_metric, S, Xtrain, Xtest,
 
 \item{D}{Matrix}
 
-\item{kernel_metric}{String indicating which kernel metric should be used in the empirical conditional distribution.
-Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence" (imputing independently, ignoring any distance) being the second option
-"Gaussian_old" [\eqn{\sqrt(\exp(-D/2\sigma))}] is also kept for reproducibility.}
+\item{kernel_metric}{String indicating which kernel metric should be used in the empirical
+conditional distribution. Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence"
+(imputing independently, ignoring any distance) being the second option "Gaussian_old"
+[\eqn{\sqrt(\exp(-D/2\sigma))}] is also kept for reproducibility.}
 
 \item{S}{Matrix}
 
 \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data}
 
-\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)}
+\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to
+be explained (test data)}
 
-\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
+\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the
+Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
 
 \item{verbose}{Integer. How much information to print during function execution (in development)}
 
@@ -34,9 +37,11 @@ Defaults to "Gaussian" [\eqn{\exp(-D/2\sigma)}], with "independence" (imputing i
 
 \item{pred_zero}{Numeric}
 
-\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
+\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution.
+NULL means it is estimated from the data if needed (in the Gaussian approach).}
 
-\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
+\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating
+distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
 
 \item{Xtest_Gauss_trans}{Vector with the Gaussian transformed test observations}
 }
diff --git a/man/prepare_kshap.Rd b/man/prepare_kshap.Rd
index 182138e80..47d44a41d 100644
--- a/man/prepare_kshap.Rd
+++ b/man/prepare_kshap.Rd
@@ -11,14 +11,19 @@ prepare_kshap(Xtrain, Xtest, exact = TRUE, noSamp = NULL,
 \arguments{
 \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data}
 
-\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)}
+\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to
+be explained (test data)}
 
-\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a sampling approach to approximate the sum}
+\item{exact}{Logical. If TRUE, uses the full sum in the Shapley formula, if FALSE, uses a
+sampling approach to approximate the sum}
 
 \item{noSamp}{Integer. How many samples to use when approximating the sum in the Shapley formula
 (previously called \code{nrows})}
 
-\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.}
+\item{shapley_weight_inf_replacement}{Numeric. Indicating which weight to use for the full
+conditional and unconditional expectations in kernel SHAPs weighted least squares formulation.}
+
+\item{compute_distances_for_no_var}{If equal to \code{NULL} no distances are computed}
 }
 \value{
 Matrix
diff --git a/man/sample_combinations.Rd b/man/sample_combinations.Rd
index cb1a32794..e41b344c7 100644
--- a/man/sample_combinations.Rd
+++ b/man/sample_combinations.Rd
@@ -2,23 +2,24 @@
 % Please edit documentation in R/extra.R
 \name{sample_combinations}
 \alias{sample_combinations}
-\title{Helper function to sample a combination of training and testing rows, which does not risk getting the same observation twice.
-Need to improve this help file.}
+\title{Helper function to sample a combination of training and testing rows, which does not risk
+getting the same observation twice. Need to improve this help file.}
 \usage{
 sample_combinations(nTrain, nTest, nosamp, separate = F)
 }
 \arguments{
-\item{separate}{Logical indicating whether the train and test data should be sampled separately or in a joint sampling space.
-If they are sampled separately (which typically would be used when optimizing more than one distribution at once) we sample with
-replacement if more samples than training data. Not optimal, but for now fine if careful when using more samples than the number
+\item{separate}{Logical indicating whether the train and test data should be sampled separately
+or in a joint sampling space. If they are sampled separately (which typically would be used when
+optimizing more than one distribution at once) we sample with replacement if more samples than
+training data. Not optimal, but for now fine if careful when using more samples than the number
 training observations while at the same time doing optimization over every test observation.}
 }
 \value{
 Numeric
 }
 \description{
-Helper function to sample a combination of training and testing rows, which does not risk getting the same observation twice.
-Need to improve this help file.
+Helper function to sample a combination of training and testing rows, which does not risk
+getting the same observation twice. Need to improve this help file.
 }
 \author{
 Martin Jullum
diff --git a/man/sample_copula.Rd b/man/sample_copula.Rd
index aa2273233..fce50fe42 100644
--- a/man/sample_copula.Rd
+++ b/man/sample_copula.Rd
@@ -10,11 +10,14 @@ sample_copula(given_ind, noSamp_MC, mu, Sigma, p, Xtest_Gauss_trans,
 \arguments{
 \item{given_ind}{Vector}
 
-\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
+\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the
+Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
 
-\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
+\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution.
+NULL means it is estimated from the data if needed (in the Gaussian approach).}
 
-\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
+\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating
+distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
 
 \item{p}{Positive integer}
 
@@ -22,7 +25,8 @@ sample_copula(given_ind, noSamp_MC, mu, Sigma, p, Xtest_Gauss_trans,
 
 \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data}
 
-\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)}
+\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to
+be explained (test data)}
 }
 \value{
 data.table with \code{noSamp_MC} (conditional) Gaussian samples
diff --git a/man/sample_gaussian.Rd b/man/sample_gaussian.Rd
index 1cd5dc186..2add2ad5a 100644
--- a/man/sample_gaussian.Rd
+++ b/man/sample_gaussian.Rd
@@ -10,15 +10,19 @@ sample_gaussian(given_ind, noSamp_MC, mu, Sigma, p, Xtest,
 \arguments{
 \item{given_ind}{Vector}
 
-\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
+\item{noSamp_MC}{Positive integer. Indicating the maximum number of samples to use in the
+Monte Carlo integration for every conditional expectation (previously called \code{n_threshold})}
 
-\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
+\item{mu}{Numeric vector. (Optional) Containing the mean of the data generating distribution.
+NULL means it is estimated from the data if needed (in the Gaussian approach).}
 
-\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
+\item{Sigma}{Numeric matrix. (Optional) Containing the covariance matrix of the data generating
+distribution. NULL means it is estimated from the data if needed (in the Gaussian approach).}
 
 \item{p}{Positive integer}
 
-\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)}
+\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to
+be explained (test data)}
 }
 \value{
 data.table with \code{noSamp_MC} (conditional) Gaussian samples
diff --git a/man/scale_data.Rd b/man/scale_data.Rd
index 3580a58cd..949f8aa51 100644
--- a/man/scale_data.Rd
+++ b/man/scale_data.Rd
@@ -9,7 +9,8 @@ scale_data(Xtrain, Xtest, scale = TRUE)
 \arguments{
 \item{Xtrain}{Matrix, data.frame or data.table with the features from the training data}
 
-\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to be explained (test data)}
+\item{Xtest}{Matrix, data.frame or data.table with the features, whose predictions ought to
+be explained (test data)}
 
 \item{scale}{Logical}
 }