ck37 · DS-Rodrigues · Mar 24, 2023 · Mar 24, 2023 · Mar 24, 2023
diff --git a/R/varimpact.R b/R/varimpact.R
@@ -71,6 +71,8 @@
 #' @param verbose_reduction Boolean - if TRUE, will display more detail during
 #'   variable reduction step (clustering).
 #' @param digits Number of digits to round the value labels.
+#' @param adjustment_exclusions List of variables to be removed from the adjustment set
+#' of each variable for which we want to estimate importance.
 #'
 #' @return Results object. TODO: add more detail here.
 #'
@@ -209,7 +211,8 @@ varimpact =
            verbose_tmle = FALSE,
            verbose_reduction = FALSE,
            parallel = TRUE,
-           digits = 4L) {
+           digits = 4L,
+           adjustment_exclusions = list()) {
 
   # Time the full function execution.
   time_start = proc.time()
@@ -254,7 +257,7 @@ varimpact =
   }
 
   ########
-  # Applied to Explanatory (X) data frame
+  # Applied to Explanatory (X) data frame.
   sna = sapply(data, sum_na)
 
   n = nrow(data)
@@ -330,7 +333,8 @@ varimpact =
                  adjust_cutoff = adjust_cutoff,
                  verbose = verbose,
                  verbose_tmle = verbose_tmle,
-                 verbose_reduction = verbose_reduction)
+                 verbose_reduction = verbose_reduction,
+                 adjustment_exclusions= adjustment_exclusions)
 
   # Combine the separate continuous and factor results.
   results =
@@ -365,6 +369,6 @@ varimpact =
 
   # Set a custom class so that we can override print and summary.
   class(results) = "varimpact"
-
+  
   invisible(results)
 }
diff --git a/R/vim-numerics.R b/R/vim-numerics.R
@@ -13,6 +13,7 @@ vim_numerics =
            Qbounds,
            corthres,
            adjust_cutoff,
+           adjustment_exclusions,
            verbose = FALSE,
            verbose_tmle = FALSE,
            verbose_reduction = FALSE) {
@@ -230,12 +231,26 @@ vim_numerics =
                          numerics$miss.cont,
                          factors$datafac.dumW,
                          factors$miss.fac)
-
+
+          #print(nameA)
+          #print(var_i)
+          #print(colnames(W))
+
           # Remove any columns in which all values are NA.
           # CK: but we're using imputed data, so there should be no NAs actually.
           # (With the exception of the NA vectors possibly added above.
           W = W[, !apply(is.na(W), 2, all), drop = FALSE]
 
+          #print(nameA)
+
+          # Remove variables from the adjustment set of each variable for which we want to measure importance
+          if (nameA %in% names(adjustment_exclusions) && length(adjustment_exclusions[[nameA]]) > 0) {
+            W = W[, setdiff(colnames(W), adjustment_exclusions[[nameA]]), drop = FALSE]
+          }
+
+          #print(colnames(W))
+          #print(class(W))
+
           # Separate adjustment matrix into the training and test folds.
           Wt = W[folds != fold_k, , drop = FALSE]
           Wv = W[folds == fold_k, , drop = FALSE]
@@ -344,6 +359,7 @@ vim_numerics =
               # Create a list to hold the results for this level.
               bin_result = list(
                 name = nameA,
+                W_names = colnames(W),
                 cv_fold = fold_k,
                 level = bin_j,
                 level_label = At_bin_labels[bin_j],
@@ -386,7 +402,7 @@ vim_numerics =
 
               # Save how many obs have this level/bin in this training fold.
               bin_result$train_cell_size = sum(IA)
-
+              
               ###############################################
               # TODO: send to calculate_estimates first, which would calculate
               # TMLE, IPTW, G-Comp, and Unadj estimates.
@@ -466,18 +482,18 @@ vim_numerics =
               }
 
               bin_result$test_msg = "success"
-
+              
               # Save to the main list.
               bin_results[[bin_j]] = bin_result
             }
-
+            
             # Finished looping over each level of the assignment variable
             # (primarily training fold, but now also some val fold work).
             if (verbose) cat(" done.\n")
 
             # Save individual bin results.
             fold_result$bin_results = bin_results
-
+            
             # Create a dataframe version of the bin results.
             fold_result$bin_df =
               do.call(rbind, lapply(bin_results, function(result) {
@@ -578,7 +594,7 @@ vim_numerics =
                                 "label = ", training_estimates[[minj]]$label, ")")
               }
               fold_result$message = message
-
+              
               if (verbose) {
                 cat(message, "\n")
               }

diff --git a/tests/testthat/test-vim-numerics.R b/tests/testthat/test-vim-numerics.R
@@ -0,0 +1,44 @@
+## Test 
+
+test_that("adjustment set functionality works appropriately", {
+
+          library(varimpact)
+          library(SuperLearner)
+          library(devtools)
+          library(testthat)
+
+          # Data setup taken from Chris Kennedy Github page (https://github.com/ck37/varimpact)
+          set.seed(1, "L'Ecuyer-CMRG")
+          N <- 300
+          num_normal <- 5
+          X <- as.data.frame(matrix(rnorm(N * num_normal), N, num_normal))
+          Y <- rbinom(N, 1, plogis(.2*X[, 1] + .1*X[, 2] - .2*X[, 3] + .1*X[, 3]*X[, 4] - .2*abs(X[, 4])))
+          # Add some missing data to X so we can test imputation.
+          for (i in 1:10) X[sample(nrow(X), 1), sample(ncol(X), 1)] <- NA
+
+          Q_lib = c("SL.mean", "SL.glmnet", "SL.ranger", "SL.rpartPrune")
+          g_lib = c("SL.mean", "SL.glmnet")
+
+          #vim = varimpact(Y = Y, data = X, Q.library = Q_lib, g.library = g_lib)
+
+          vim = varimpact(Y = Y, data = X, Q.library = Q_lib, g.library = g_lib, adjustment_exclusions = list("V1" = c("V2","V3"), "V3" = c("V1"), "V5" = c("V1","V2","V3")))
+
+          # V1
+          expect_equal(vim$all_vims$V1$fold_results[[1]]$bin_results[[1]]$W_names, c("V4","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))
+
+          # V2
+          expect_equal(vim$all_vims$V2$fold_results[[1]]$bin_results[[1]]$W_names, c("V1","V3","V4","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))
+
+          # V3
+          expect_equal(vim$all_vims$V3$fold_results[[1]]$bin_results[[1]]$W_names, c("V2","V4","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))
+
+          # V4
+          expect_equal(vim$all_vims$V4$fold_results[[1]]$bin_results[[1]]$W_names, c("V1","V2","V3","V5","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))
+
+          # V5
+          expect_equal(vim$all_vims$V5$fold_results[[1]]$bin_results[[1]]$W_names, c("V4","Imiss_V1","Imiss_V2","Imiss_V3","Imiss_V5"))
+
+          })
+
+
+