Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

To include option to personalise adjustment set for each candidate variable #25

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions R/varimpact.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
#' @param verbose_reduction Boolean - if TRUE, will display more detail during
#' variable reduction step (clustering).
#' @param digits Number of digits to round the value labels.
#' @param adjustment_exclusions List of variables to be removed from the adjustment set
#' of each variable for which we want to estimate importance.
#'
#' @return Results object. TODO: add more detail here.
#'
Expand Down Expand Up @@ -209,7 +211,8 @@ varimpact =
verbose_tmle = FALSE,
verbose_reduction = FALSE,
parallel = TRUE,
digits = 4L) {
digits = 4L,
adjustment_exclusions = list()) {

# Time the full function execution.
time_start = proc.time()
Expand Down Expand Up @@ -254,7 +257,7 @@ varimpact =
}

########
# Applied to Explanatory (X) data frame
# Applied to Explanatory (X) data frame.
sna = sapply(data, sum_na)

n = nrow(data)
Expand Down Expand Up @@ -330,7 +333,8 @@ varimpact =
adjust_cutoff = adjust_cutoff,
verbose = verbose,
verbose_tmle = verbose_tmle,
verbose_reduction = verbose_reduction)
verbose_reduction = verbose_reduction,
adjustment_exclusions= adjustment_exclusions)

# Combine the separate continuous and factor results.
results =
Expand Down Expand Up @@ -365,6 +369,6 @@ varimpact =

# Set a custom class so that we can override print and summary.
class(results) = "varimpact"

invisible(results)
}
28 changes: 22 additions & 6 deletions R/vim-numerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ vim_numerics =
Qbounds,
corthres,
adjust_cutoff,
adjustment_exclusions,
verbose = FALSE,
verbose_tmle = FALSE,
verbose_reduction = FALSE) {
Expand Down Expand Up @@ -230,12 +231,26 @@ vim_numerics =
numerics$miss.cont,
factors$datafac.dumW,
factors$miss.fac)


#print(nameA)
#print(var_i)
#print(colnames(W))

# Remove any columns in which all values are NA.
# CK: but we're using imputed data, so there should be no NAs actually.
# (With the exception of the NA vectors possibly added above.
W = W[, !apply(is.na(W), 2, all), drop = FALSE]

#print(nameA)

# Remove variables from the adjustment set of each variable for which we want to measure importance
if (nameA %in% names(adjustment_exclusions) && length(adjustment_exclusions[[nameA]]) > 0) {
W = W[, setdiff(colnames(W), adjustment_exclusions[[nameA]]), drop = FALSE]
}

#print(colnames(W))
#print(class(W))

# Separate adjustment matrix into the training and test folds.
Wt = W[folds != fold_k, , drop = FALSE]
Wv = W[folds == fold_k, , drop = FALSE]
Expand Down Expand Up @@ -344,6 +359,7 @@ vim_numerics =
# Create a list to hold the results for this level.
bin_result = list(
name = nameA,
W_names = colnames(W),
cv_fold = fold_k,
level = bin_j,
level_label = At_bin_labels[bin_j],
Expand Down Expand Up @@ -386,7 +402,7 @@ vim_numerics =

# Save how many obs have this level/bin in this training fold.
bin_result$train_cell_size = sum(IA)

###############################################
# TODO: send to calculate_estimates first, which would calculate
# TMLE, IPTW, G-Comp, and Unadj estimates.
Expand Down Expand Up @@ -466,18 +482,18 @@ vim_numerics =
}

bin_result$test_msg = "success"

# Save to the main list.
bin_results[[bin_j]] = bin_result
}

# Finished looping over each level of the assignment variable
# (primarily training fold, but now also some val fold work).
if (verbose) cat(" done.\n")

# Save individual bin results.
fold_result$bin_results = bin_results

# Create a dataframe version of the bin results.
fold_result$bin_df =
do.call(rbind, lapply(bin_results, function(result) {
Expand Down Expand Up @@ -578,7 +594,7 @@ vim_numerics =
"label = ", training_estimates[[minj]]$label, ")")
}
fold_result$message = message

if (verbose) {
cat(message, "\n")
}
Expand Down
44 changes: 44 additions & 0 deletions tests/testthat/test-vim-numerics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
## Test

test_that("adjustment set functionality works appropriately", {

library(varimpact)
library(SuperLearner)
library(devtools)
library(testthat)

# Data setup taken from Chris Kennedy Github page (https://github.com/ck37/varimpact)
set.seed(1, "L'Ecuyer-CMRG")
N <- 300
num_normal <- 5
X <- as.data.frame(matrix(rnorm(N * num_normal), N, num_normal))
Y <- rbinom(N, 1, plogis(.2*X[, 1] + .1*X[, 2] - .2*X[, 3] + .1*X[, 3]*X[, 4] - .2*abs(X[, 4])))
# Add some missing data to X so we can test imputation.
for (i in 1:10) X[sample(nrow(X), 1), sample(ncol(X), 1)] <- NA

Q_lib = c("SL.mean", "SL.glmnet", "SL.ranger", "SL.rpartPrune")
g_lib = c("SL.mean", "SL.glmnet")

#vim = varimpact(Y = Y, data = X, Q.library = Q_lib, g.library = g_lib)

vim = varimpact(Y = Y, data = X, Q.library = Q_lib, g.library = g_lib, adjustment_exclusions = list("V1" = c("V2","V3"), "V3" = c("V1"), "V5" = c("V1","V2","V3")))

# V1
expect_equal(vim$all_vims$V1$fold_results[[1]]$bin_results[[1]]$W_names, c("V4","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))

# V2
expect_equal(vim$all_vims$V2$fold_results[[1]]$bin_results[[1]]$W_names, c("V1","V3","V4","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))

# V3
expect_equal(vim$all_vims$V3$fold_results[[1]]$bin_results[[1]]$W_names, c("V2","V4","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))

# V4
expect_equal(vim$all_vims$V4$fold_results[[1]]$bin_results[[1]]$W_names, c("V1","V2","V3","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))

# V5
expect_equal(vim$all_vims$V5$fold_results[[1]]$bin_results[[1]]$W_names, c("V4","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))

})