Skip to content

Commit

Permalink
Need to clean code. Push in case something happens with my computer.
Browse files Browse the repository at this point in the history
  • Loading branch information
LHBO committed Jan 5, 2024
1 parent df9777d commit 11819e8
Show file tree
Hide file tree
Showing 2 changed files with 301 additions and 7 deletions.
110 changes: 103 additions & 7 deletions R/approach_copula.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,93 @@ setup_approach.copula <- function(internal, ...) {
return(internal)
}



#' @inheritParams default_doc
#' @rdname prepare_data
#' @export
prepare_data.copula <- function(internal, index_features = NULL, ...) {
#' @author Lars Henry Berge Olsen
prepare_data.copula2 <- function(internal, index_features, ...) {

# Extract used variables
X <- internal$objects$X
x_train <- internal$data$x_train
x_train_mat <- as.matrix(internal$data$x_train)
x_explain <- internal$data$x_explain
x_explain_mat <- as.matrix(internal$data$x_explain)
n_explain <- internal$parameters$n_explain
copula.cov_mat <- internal$parameters$copula.cov_mat
n_samples <- internal$parameters$n_samples
copula.mu <- internal$parameters$copula.mu
n_samples = 1000000
n_features <- internal$parameters$n_features
n_combinations <- internal$parameters$n_combinations
n_combinations_now <- length(index_features)
copula.mu <- internal$parameters$copula.mu
copula.cov_mat <- internal$parameters$copula.cov_mat
copula.x_explain_gaussian_mat <- as.matrix(internal$data$copula.x_explain_gaussian) # CAN SKIP as.matrix as it is a matrix allready
feature_names <- internal$parameters$feature_names

copula.x_explain_gaussian <- internal$data$copula.x_explain_gaussian








S <- internal$objects$S[index_features, , drop = FALSE]


# Generate the MC samples from N(0, 1)
MC_samples_mat <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features)

# Use Cpp to convert the MC samples to N(mu_{Sbar|S}, Sigma_{Sbar|S}) for all coalitions and explicands.
# The object `dt` is here a 3D array of dimension (n_samples, n_explain*n_coalitions, n_features).
# INCLUDE THE TRANSFORMATION


dt <- prepare_data_copula_cpp(MC_samples_mat = MC_samples_mat,
x_explain_mat = x_explain_mat,
x_explain_gaussian_mat = copula.x_explain_gaussian_mat,
x_train_mat = x_train_mat,
S = S,
mu = copula.mu,
cov_mat = copula.cov_mat)

# Reshape `dt` to a 2D array of dimension (n_samples*n_explain*n_coalitions, n_features).
dim(dt) = c(n_combinations_now*n_explain*n_samples, n_features)

# Convert to a data.table
dt = as.data.table(dt)
setnames(dt, feature_names)
dt[, "id_combination" := rep(seq(nrow(S)), each = n_samples * n_explain)]
dt[, "id" := rep(seq(n_explain), each = n_samples, times = nrow(S))]
dt[, "w" := 1 / n_samples]
data.table::setcolorder(dt, c("id_combination", "id", feature_names))
dt[, id_combination := index_features[id_combination]]
dt

dt[id_combination == 9 & id == 1,]

dt_agr = dt[, lapply(.SD, mean), by = list(id, id_combination)]
data.table::setorderv(dt_agr, c("id", "id_combination"))
dt_agr

return(dt)
}

#' @inheritParams default_doc
#' @rdname prepare_data
#' @export
prepare_data.copula <- function(internal, index_features = NULL, ...) {
X <- internal$objects$X
x_train <- internal$data$x_train
x_explain <- internal$data$x_explain
n_explain <- internal$parameters$n_explain
n_samples <- internal$parameters$n_samples
n_features <- internal$parameters$n_features
copula.mu <- internal$parameters$copula.mu
copula.cov_mat <- internal$parameters$copula.cov_mat
copula.x_explain_gaussian <- internal$data$copula.x_explain_gaussian


x_explain0 <- as.matrix(x_explain)
Expand All @@ -74,7 +147,9 @@ prepare_data.copula <- function(internal, index_features = NULL, ...) {
}


n_samples = 1000000
for (i in seq_len(n_explain)) {
print(i)
l <- lapply(
X = features,
FUN = sample_copula,
Expand All @@ -86,14 +161,25 @@ prepare_data.copula <- function(internal, index_features = NULL, ...) {
x_train = as.matrix(x_train),
x_explain_gaussian = copula.x_explain_gaussian[i, , drop = FALSE]
)
# x_explain_gaussian2 = x_explain_gaussian
# x_train2 = x_train
# x_explain2 = x_explain

dt_l[[i]] <- data.table::rbindlist(l, idcol = "id_combination")
dt_l[[i]][, w := 1 / n_samples]
dt_l[[i]][, id := i]
if (!is.null(index_features)) dt_l[[i]][, id_combination := index_features[id_combination]]
}
dt <- data.table::rbindlist(dt_l, use.names = TRUE, fill = TRUE)
dt
dt9 = dt
dt9_agr = dt9[, lapply(.SD, mean), by = list(id, id_combination)]
dt9_agr - dt_agr
return(dt)




}

#' Sample conditional variables using the Gaussian copula approach
Expand All @@ -119,6 +205,7 @@ sample_copula <- function(index_given, n_samples, mu, cov_mat, m, x_explain_gaus
} else {
dependent_ind <- (seq_len(length(mu)))[-index_given]

# Dette har jeg kode til
tmp <- condMVNorm::condMVN(
mean = mu,
sigma = cov_mat,
Expand All @@ -127,15 +214,24 @@ sample_copula <- function(index_given, n_samples, mu, cov_mat, m, x_explain_gaus
X.given = x_explain_gaussian[index_given]
)

# Dette har jeg kode til. Bruker cholensky + simulert data fra før
ret0_z <- mvnfast::rmvn(n = n_samples, mu = tmp$condMean, sigma = tmp$condVar)

# Dette må jeg skrive selv
ret0_x <- apply(
X = rbind(ret0_z, x_train[, dependent_ind, drop = FALSE]),
MARGIN = 2,
FUN = inv_gaussian_transform,
n_z = n_samples
n_z = n_samples,
type = 5
)
ret0_x_cpp = inv_gaussian_transform_cpp(z = ret0_z, x = x_train[, dependent_ind, drop = FALSE])

ret0_x_cpp = inv_gaussian_transform_cpp_armamat(rbind(ret0_z, x_train[, dependent_ind, drop = FALSE]), n_samples = n_samples)
colnames(ret0_x_cpp) = feature_names[dependent_ind]
all.equal(ret0_x, ret0_x_cpp)

# Dette har jeg kode til
ret <- matrix(NA, ncol = m, nrow = n_samples)
ret[, index_given] <- rep(x_explain[index_given], each = n_samples)
ret[, dependent_ind] <- ret0_x
Expand All @@ -156,13 +252,13 @@ sample_copula <- function(index_given, n_samples, mu, cov_mat, m, x_explain_gaus
#' @keywords internal
#'
#' @author Martin Jullum
inv_gaussian_transform <- function(zx, n_z) {
inv_gaussian_transform <- function(zx, n_z, type) {
if (n_z >= length(zx)) stop("n_z should be less than length(zx)")
ind <- 1:n_z
z <- zx[ind]
x <- zx[-ind]
u <- stats::pnorm(z)
x_new <- stats::quantile(x, probs = u)
x_new <- stats::quantile(x, probs = u, type = type)
return(as.double(x_new))
}

Expand Down
198 changes: 198 additions & 0 deletions src/Copula.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
#include <RcppArmadillo.h>
#include <iostream>

// [[Rcpp::depends(RcppArmadillo)]]

//' Transforms new data to a standardized normal distribution
//'
//' @param zx Numeric vector. The first `n_samples` items are the Gaussian data, and the last part is
//' the data with the original transformation.
//' @param n_samples Positive integer. Number of elements of `zx` that belongs to new data.
//'
//' @return Numeric matrix of length `n_samples`
//'
//' @keywords internal
//'
//' @author Lars Henry Berge Olsen
// [[Rcpp::export]]
Rcpp::NumericVector inv_gaussian_transform_cpp_Rcpp(const Rcpp::NumericVector& zx, const int n_samples) {

// Extract z and x
Rcpp::NumericVector z = zx[Rcpp::Range(0, n_samples - 1)];
Rcpp::NumericVector x = zx[Rcpp::Range(n_samples, zx.size() - 1)];

// Calculate u
Rcpp::NumericVector u = Rcpp::pnorm(z);

// Calculate x_new using Armadillo's quantile function
arma::vec x_arma = Rcpp::as<arma::vec>(x);
arma::vec u_arma = Rcpp::as<arma::vec>(u);

arma::vec x_new_arma = arma::quantile(x_arma, u_arma);

// Convert back to Rcpp::NumericMatrix
Rcpp::NumericVector x_new = Rcpp::wrap(x_new_arma);

return x_new;
}


// [[Rcpp::export]]
arma::mat inv_gaussian_transform_cpp_mat(Rcpp::NumericMatrix zx, const int n_samples) {

int n_features = zx.ncol();

// Extract z and x
Rcpp::NumericMatrix z = zx(Rcpp::Range(0, n_samples - 1), Rcpp::_ );
Rcpp::NumericMatrix x = zx(Rcpp::Range(n_samples, zx.nrow() - 1), Rcpp::_ );

// Rcpp::NumericMatrix u = Rcpp::pnorm(z);

// Convert Rcpp::NumericMatrix to arma::mat
arma::mat z_arma = Rcpp::as<arma::mat>(z);
arma::mat x_arma = Rcpp::as<arma::mat>(x);

// Calculate u
arma::mat u_arma = arma::normcdf(z_arma);

// Calculate x_new using Armadillo's quantile function
arma::mat x_new_arma(n_samples, n_features);
for (int feature_idx = 0; feature_idx < n_features; feature_idx++) {
x_new_arma.col(feature_idx) = arma::quantile(x_arma.col(feature_idx), u_arma.col(feature_idx));
}

// // Convert back to Rcpp::NumericVector
// Rcpp::NumericMatrix x_new = Rcpp::wrap(x_new_arma);

return x_new_arma;
}

// [[Rcpp::export]]
arma::mat inv_gaussian_transform_cpp_armamat(arma::mat zx, const int n_samples) {

int n_features = zx.n_cols;

// WHAT IS THE POINT TO FIRST ADD THEM TOGETHER AND THEN SPLIT THEM?
// Extract z and x
arma::mat z = zx.rows(0, n_samples - 1);
arma::mat x = zx.rows(n_samples, zx.n_rows - 1);

// Calculate u
arma::mat u = arma::normcdf(z);

// Calculate x_new using Armadillo's quantile function
arma::mat x_new(n_samples, n_features);
for (int feature_idx = 0; feature_idx < n_features; feature_idx++) {
x_new.col(feature_idx) = arma::quantile(x.col(feature_idx), u.col(feature_idx));
}

// // Convert back to Rcpp::NumericVector
// Rcpp::NumericMatrix x_new = Rcpp::wrap(x_new_arma);

return x_new;
}



//' Transforms new data to a standardized normal distribution
//'
//' @details The function uses `arma::quantile(...)` which corresponds to R's `stats::quantile(..., type = 5)`.
//'
//' @param z arma::mat. The data are the Gaussian Monte Carlos samples to transform.
//' @param x arma::mat. The data with the original transformation. Used to conduct the transformation of `z`.
//'
//' @return arma::mat of same dimension as `z`
//'
//' @keywords internal
//' @author Lars Henry Berge Olsen
// [[Rcpp::export]]
arma::mat inv_gaussian_transform_cpp(arma::mat z, arma::mat x) {
int n_features = z.n_cols;
int n_samples = z.n_rows;
arma::mat u = arma::normcdf(z);
arma::mat z_new(n_samples, n_features);
for (int feature_idx = 0; feature_idx < n_features; feature_idx++) {
z_new.col(feature_idx) = arma::quantile(x.col(feature_idx), u.col(feature_idx));
}
return z_new;
}



// [[Rcpp::export]]
arma::cube prepare_data_copula_cpp(arma::mat MC_samples_mat,
arma::mat x_explain_mat,
arma::mat x_explain_gaussian_mat,
arma::mat x_train_mat,
arma::mat S,
arma::vec mu,
arma::mat cov_mat) {

int n_explain = x_explain_mat.n_rows;
int n_samples = MC_samples_mat.n_rows;
int n_features = MC_samples_mat.n_cols;
int n_coalitions = S.n_rows;

// Initialize auxiliary matrix and result cube
arma::mat aux_mat(n_samples, n_features);
arma::cube result_cube(n_samples, n_explain*n_coalitions, n_features);

// Iterate over the coalitions
for (int S_ind = 0; S_ind < n_coalitions; S_ind++) {

// Get current coalition S and the indices of the features in coalition S and mask Sbar
arma::mat S_now = S.row(S_ind);
arma::uvec S_now_idx = arma::find(S_now > 0.5);
arma::uvec Sbar_now_idx = arma::find(S_now < 0.5);

// Extract the features we condition on
arma::mat x_S_star = x_explain_mat.cols(S_now_idx);
arma::mat x_S_star_gaussian = x_explain_gaussian_mat.cols(S_now_idx);

// // Does that we do not conditioning on
// arma::mat x_Sbar_star = x_train_mat.cols(Sbar_now_idx);

// Extract the mean values for the features in the two sets
arma::vec mu_S = mu.elem(S_now_idx);
arma::vec mu_Sbar = mu.elem(Sbar_now_idx);

std::cout << mu_S << std::endl;

// Extract the relevant parts of the covariance matrix
arma::mat cov_mat_SS = cov_mat.submat(S_now_idx, S_now_idx);
arma::mat cov_mat_SSbar = cov_mat.submat(S_now_idx, Sbar_now_idx);
arma::mat cov_mat_SbarS = cov_mat.submat(Sbar_now_idx, S_now_idx);
arma::mat cov_mat_SbarSbar = cov_mat.submat(Sbar_now_idx, Sbar_now_idx);

// Compute the covariance matrix multiplication factors/terms and the conditional covariance matrix
arma::mat cov_mat_SbarS_cov_mat_SS_inv = cov_mat_SbarS * inv(cov_mat_SS);
arma::mat cond_cov_mat_Sbar_given_S = cov_mat_SbarSbar - cov_mat_SbarS_cov_mat_SS_inv * cov_mat_SSbar;

// Ensure that the conditional covariance matrix is symmetric
if (!cond_cov_mat_Sbar_given_S.is_symmetric()) {
cond_cov_mat_Sbar_given_S = arma::symmatl(cond_cov_mat_Sbar_given_S);
}

// Compute the conditional mean of Xsbar given Xs = Xs_star
arma::mat x_Sbar_mean = cov_mat_SbarS_cov_mat_SS_inv * (x_S_star_gaussian.each_row() - mu_S.t()).t();
x_Sbar_mean.each_col() += mu_Sbar;

// Transform the samples to be from N(O, Sigma_Sbar|S)
arma::mat MC_samples_mat_now = MC_samples_mat.cols(Sbar_now_idx) * arma::chol(cond_cov_mat_Sbar_given_S);

// Loop over the different test observations and combine the generated values with the values we conditioned on
for (int idx_now = 0; idx_now < n_explain; idx_now++) {

arma::mat MC_samples_mat_now_now = MC_samples_mat_now + repmat(trans(x_Sbar_mean.col(idx_now)), n_samples, 1);
arma::mat MC_samples_mat_now_now_trans =
inv_gaussian_transform_cpp(MC_samples_mat_now_now, x_train_mat.cols(Sbar_now_idx));

aux_mat.cols(S_now_idx) = repmat(x_S_star.row(idx_now), n_samples, 1);
aux_mat.cols(Sbar_now_idx) = MC_samples_mat_now_now_trans;

result_cube.col(S_ind*n_explain + idx_now) = aux_mat;
}
}

return result_cube;
}

0 comments on commit 11819e8

Please sign in to comment.