Skip to content

Commit

Permalink
Adapt missing data sims (#443)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielinteractive authored Jun 3, 2024
1 parent 239fe10 commit 7c478d1
Show file tree
Hide file tree
Showing 36 changed files with 1,676 additions and 311 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ src/*.so
src/*.dll
testthat-problems.rds
README.md
**/sascfg_personal.py
.Rprofile
/doc/
/Meta/
Expand Down
1 change: 1 addition & 0 deletions simulations/missing-data-benchmarks/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
**/fit_results.[Rr]ds
slurm.out
.future
108 changes: 108 additions & 0 deletions simulations/missing-data-benchmarks/R/customizations/experiment.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
stopifnot(any(grepl("simChef", search())))

# Custom new method which only kicks off the future jobs.
# This is modified from the fit() method, see
# https://github.com/Yu-Group/simChef/blob/main/R/experiment.R#L976C3-L980C62
Experiment$set(
"public",
"onlystart",
function(n_reps = 1,
future.globals = NULL,
future.packages = NULL,
future.seed = TRUE,
verbose = 1, ...) {
parallel_strategy <- "reps"

dgp_list <- private$.get_obj_list("dgp")
method_list <- private$.get_obj_list("method")

if (length(dgp_list) == 0) {
private$.throw_empty_list_error("dgp", "generate data from")
}

if (length(method_list) == 0) {
private$.throw_empty_list_error("method", "fit methods in")
}

private$.update_fit_params()

checkpoint <- FALSE
n_reps_cached <- 0
n_reps_total <- n_reps
fit_results <- data.frame()

if (verbose >= 1) {
inform(sprintf("Fitting %s...", self$name))
start_time <- Sys.time()
}

if (is.null(future.packages)) {
future.packages <- private$.future.packages
}

if (is.null(future.globals)) {
future.globals <- private$.future.globals
}

dgp_params_list <- private$.combine_vary_params("dgp")
method_params_list <- private$.combine_vary_params("method")

# if new_fit_params is not NULL after the if statement below, then not all
# combos of (dgp_params_list, method_params_list) need to be rerun so need
# to check cache ids when fitting
new_fit_params <- NULL

duplicate_param_names <- private$.get_duplicate_param_names()

# simulations
n_reps <- min(n_reps, n_reps_total - n_reps_cached)

new_fit_results <- local({

# create an env with objs/funcs that the future workers need
workenv <- rlang::new_environment(
data = list(
verbose = verbose,
dgp_list = dgp_list,
method_list = method_list,
new_fit_params = new_fit_params,
dgp_params_list = dgp_params_list,
method_params_list = method_params_list,
duplicate_param_names = duplicate_param_names,
do_call_wrapper = function(name,
fun,
params,
verbose,
call) {
tryCatch(
do_call_handler(
name, fun, params, verbose, call
),
error = identity
)
}
),
parent = rlang::ns_env()
)

# get the experiment compute fun
compute_fun <- compute_rep

environment(compute_fun) <- workenv

# compute the experiment
compute_fun(n_reps,
future.globals,
future.packages,
future.seed)
})

gc()

new_fit_results %>%
dplyr::mutate(
.rep = as.character(as.numeric(.rep) + n_reps_cached)
) %>%
simplify_tibble()
}
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# format the fit results
format_fit_results <- function(fit_results) {
fit_results %>%
transmute(
effect_size = str_extract(.dgp_name, "^([^_])+"),
rep = .rep,
dgp_name = .dgp_name,
method_name = .method_name,
converged = pmap(
.l = list(fit, .method_name, converged),
.f = function(f, method_name, conv_status) {
get_convergence(method_name, f, conv_status)
}
),
converged = unlist(converged),
fit_time = fit_time,
emmeans_output = pmap(
.l = list(fit, .dgp_name, .method_name, data, converged),
.f = function(f, dgp_name, method_name, dt, conv_status) {
get_emmeans_output(method_name, f, dt, conv_status)
}
),
covmat_estimates = pmap(
.l = list(fit, .method_name, converged),
.f = function(f, method_name, conv_status) {
get_cov_mat_estimate(method_name, f, conv_status)
}
)
)
}

# The whole process
format_fit_and_save <- function(experiment) {
gc()
fit_results <- experiment$get_cached_results("fit")
formatted_fit_results <- format_fit_results(fit_results)
formatted_fit_results_file <- file.path(experiment$get_save_dir(), "formatted_fit_results.rds")
saveRDS(formatted_fit_results, file = formatted_fit_results_file)
formatted_fit_results
}




75 changes: 64 additions & 11 deletions simulations/missing-data-benchmarks/R/eval/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -242,19 +242,26 @@ format_emmeans_df <- function(emmeans_df) {

# extract model fits output at each visit from mmrm fit
get_mmrm_emmeans_output <- function(fit) {
# extract emmeans output
marginal_means <- emmeans(
fit,
spec = trt.vs.ctrl ~ trt | visit_num,
weights = "proportional"
)
emmeans_df <- as.data.frame(marginal_means$contrasts)
result <- try({
# extract emmeans output
marginal_means <- emmeans(
fit,
spec = trt.vs.ctrl ~ trt | visit_num,
weights = "proportional"
)
emmeans_df <- as.data.frame(marginal_means$contrasts)

# compute lower and upper 95% CI
emmeans_df <- get_95_ci(emmeans_df)
# compute lower and upper 95% CI
emmeans_df <- get_95_ci(emmeans_df)

# format to resemble SAS output
format_emmeans_df(emmeans_df)
# format to resemble SAS output
format_emmeans_df(emmeans_df)
})
if (!inherits(result, "try-error")) {
result
} else {
NA
}
}

# extract model fit outputs at each visit from glmmTMB fit
Expand Down Expand Up @@ -323,3 +330,49 @@ get_emmeans_output <- function(method, fit, dt, converged) {
NA
}
}

# get_cov_mat_estimate ----

# extract covariance matrix estimate from mmrm fit
get_mmrm_cov_mat_estimate <- function(fit) {
mmrm::VarCorr(fit)
}

# extract covariance matrix estimate from glmmTMB fit
get_glmm_cov_mat_estimate <- function(fit) {
mat_with_attrs <- glmmTMB::VarCorr(fit)[[c("cond", "participant")]]
dim <- nrow(mat_with_attrs)
ind <- seq_len(dim)
mat_with_attrs[ind, ind]
}

# extract covariance matrix estimate from nlme fit
get_nlme_cov_mat_estimate <- function(fit) {
mat_with_attrs <- nlme::getVarCov(fit)
dim <- nrow(mat_with_attrs)
ind <- seq_len(dim)
mat_with_attrs[ind, ind]
}

# extract covariance matrix estimate from proc mixed fit
get_mixed_cov_mat_estimate <- function(fit) {
attr(fit, "covmat")
}

# general function for covariance matrix estimate
get_cov_mat_estimate <- function(method, fit, converged) {
if (get_convergence(method, fit, converged)) {
if (str_detect(method, "mmrm")) {
get_mmrm_cov_mat_estimate(fit)
} else if (str_detect(method, "glmmtmb")) {
get_glmm_cov_mat_estimate(fit)
} else if (str_detect(method, "nlme")) {
get_nlme_cov_mat_estimate(fit)
} else if (str_detect(method, "proc_mixed")) {
get_mixed_cov_mat_estimate(fit)
}
} else {
NA
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,26 @@ library(tidyr)
library(emmeans)

# load the required helper functions
source("R/format-replicate-results/helpers.R")
# This is just an example how to use these functions.

source("R/customizations/format-fit-results.R")
source("R/eval/helpers.R")

# load a fit_results.rds file
fit_results <- readRDS("R/format-replicate-results/fit_results_100.rds")
fit_results <- readRDS("R/format-replicate-results/fit_results_test.rds")

# format the results
formatted_fit_results_df <- fit_results %>%
format_fit_results(missingness = "none", sample_size = 200)
formatted_fit_results_df <- format_fit_results(fit_results)
saveRDS(formatted_fit_results_df, file = "R/format-replicate-results/df_test.rds")

# formatted_fit_results_df <- readRDS("df_test.rds")
head(formatted_fit_results_df)

# to obtain the estimated covariance matrices:
formatted_fit_results_df$covmat_estimates[1:2]

# to obtain the emmeans by visit:
formatted_fit_results_df |>
select(- covmat_estimates, - fit_time, - converged) |>
unnest(cols = emmeans_output)

Loading

0 comments on commit 7c478d1

Please sign in to comment.