From 800dafe4022f6e0d8869d8422799452b37b57785 Mon Sep 17 00:00:00 2001 From: gowerc Date: Fri, 27 Sep 2024 11:37:10 +0100 Subject: [PATCH 1/3] initial --- R/draws.R | 4 - R/mcmc.R | 9 +- R/methods.R | 17 +- data-raw/create_print_test_data.R | 4 +- man/method.Rd | 7 +- tests/testthat/_snaps/print.md | 1 - tests/testthat/test-mcmc.R | 46 ++---- tests/testthat/test-print.R | 3 +- tests/testthat/test-reproducibility.R | 9 +- vignettes/advanced.html | 2 +- vignettes/quickstart.Rmd | 6 +- vignettes/quickstart.html | 220 +++++++++++++------------- 12 files changed, 146 insertions(+), 182 deletions(-) diff --git a/R/draws.R b/R/draws.R index 9c8f8afb..b0064997 100644 --- a/R/draws.R +++ b/R/draws.R @@ -478,10 +478,6 @@ extract_data_nmar_as_na <- function(longdata) { #' @export draws.bayes <- function(data, data_ice = NULL, vars, method, ncores = 1, quiet = FALSE) { - if (!is.na(method$seed)) { - set.seed(method$seed) - } - longdata <- longDataConstructor$new(data, vars) longdata$set_strategies(data_ice) diff --git a/R/mcmc.R b/R/mcmc.R index be884869..d84d3547 100644 --- a/R/mcmc.R +++ b/R/mcmc.R @@ -61,7 +61,6 @@ fit_mcmc <- function( n_imputations <- method$n_samples burn_in <- method$burn_in - seed <- method$seed burn_between <- method$burn_between same_cov <- method$same_cov @@ -114,13 +113,7 @@ fit_mcmc <- function( ) ) - assert_that( - !is.na(seed), - !is.null(seed), - is.numeric(seed), - msg = "mcmc seed is invalid" - ) - sampling_args$seed <- seed + sampling_args$seed <- sample.int(.Machine$integer.max, 1) stan_fit <- record({ do.call(sampling, sampling_args) diff --git a/R/methods.R b/R/methods.R index c652cdf9..406f2282 100644 --- a/R/methods.R +++ b/R/methods.R @@ -44,10 +44,7 @@ #' @param type a character string that specifies the resampling method used to perform inference #' when a conditional mean imputation approach (set via `method_condmean()`) is used. Must be one of `"bootstrap"` or `"jackknife"`. #' -#' @param seed a numeric that specifies the seed to be used in the call to Stan. This -#' argument is passed onto the `seed` argument of [rstan::sampling()]. Note that -#' this is only required for `method_bayes()`, for all other methods you can achieve -#' reproducible results by setting the seed via `set.seed()`. See details. +#' @param seed deprecated. Please use `set.seed()` instead. #' #' @details #' @@ -93,14 +90,20 @@ method_bayes <- function( burn_between = 50, same_cov = TRUE, n_samples = 20, - seed = sample.int(.Machine$integer.max, 1) + seed = NULL ) { + if (!is.null(seed)) { + warning(paste0( + "The `seed` argument to `method_bayes()` has been deprecated", + " please use `set.seed()` instead" + )) + } + x <- list( burn_in = burn_in, burn_between = burn_between, same_cov = same_cov, - n_samples = n_samples, - seed = seed + n_samples = n_samples ) return(as_class(x, c("method", "bayes"))) } diff --git a/data-raw/create_print_test_data.R b/data-raw/create_print_test_data.R index 9c324d62..71c2c120 100644 --- a/data-raw/create_print_test_data.R +++ b/data-raw/create_print_test_data.R @@ -107,14 +107,14 @@ set.seed(413) dobj <- get_data(40) suppressWarnings({ + set.seet(859) drawobj_b <- draws( data = dobj$dat, data_ice = dobj$dat_ice, vars = dobj$vars, method = method_bayes( n_samples = 50, - burn_between = 1, - seed = 859 + burn_between = 1 ) ) }) diff --git a/man/method.Rd b/man/method.Rd index 0ba22c87..e8077e0b 100644 --- a/man/method.Rd +++ b/man/method.Rd @@ -13,7 +13,7 @@ method_bayes( burn_between = 50, same_cov = TRUE, n_samples = 20, - seed = sample.int(.Machine$integer.max, 1) + seed = NULL ) method_approxbayes( @@ -61,10 +61,7 @@ matrix will be fit for each group as determined by the \code{group} argument of In the case of \code{method_condmean(type = "jackknife")} this argument must be set to \code{NULL}. See details.} -\item{seed}{a numeric that specifies the seed to be used in the call to Stan. This -argument is passed onto the \code{seed} argument of \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}}. Note that -this is only required for \code{method_bayes()}, for all other methods you can achieve -reproducible results by setting the seed via \code{set.seed()}. See details.} +\item{seed}{deprecated. Please use \code{set.seed()} instead.} \item{covariance}{a character string that specifies the structure of the covariance matrix to be used in the imputation model. Must be one of \code{"us"} (default), \code{"toep"}, diff --git a/tests/testthat/_snaps/print.md b/tests/testthat/_snaps/print.md index 6f549fb3..989ca93a 100644 --- a/tests/testthat/_snaps/print.md +++ b/tests/testthat/_snaps/print.md @@ -255,7 +255,6 @@ burn_between: 1 same_cov: TRUE n_samples: 50 - seed: 859 --- diff --git a/tests/testthat/test-mcmc.R b/tests/testthat/test-mcmc.R index 7fb72148..422f4d54 100644 --- a/tests/testthat/test-mcmc.R +++ b/tests/testthat/test-mcmc.R @@ -529,8 +529,7 @@ test_that("fit_mcmc can recover known values with same_cov = FALSE", { n_samples = 250, burn_in = 100, burn_between = 3, - same_cov = FALSE, - seed = 8931 + same_cov = FALSE ) ### No missingness @@ -604,36 +603,17 @@ test_that("fit_mcmc can recover known values with same_cov = FALSE", { }) -test_that("invalid seed throws an error", { - - set.seed(301) - sigma <- as_vcov(c(6, 4, 4), c(0.5, 0.2, 0.3)) - dat <- get_sim_data(50, sigma) - - dat_ice <- dat %>% - group_by(id) %>% - arrange(desc(visit)) %>% - slice(1) %>% - ungroup() %>% - mutate(strategy = "MAR") - - vars <- set_vars( - visit = "visit", - subjid = "id", - group = "group", - covariates = "sex", - strategy = "strategy", - outcome = "outcome" - ) - - expect_error( - draws( - dat, - dat_ice, - vars, - method_bayes(n_samples = 2, seed = NA), - quiet = TRUE - ), - regexp = "mcmc seed is invalid" +test_that("seed argument to method_bayes is depreciated", { + expect_warning( + { + method <- method_bayes( + n_samples = 250, + burn_in = 100, + burn_between = 3, + same_cov = FALSE, + seed = 1234 + ) + }, + regexp = "seed.*deprecated" ) }) diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R index f661573e..fa592370 100644 --- a/tests/testthat/test-print.R +++ b/tests/testthat/test-print.R @@ -104,8 +104,7 @@ test_that("print - bayesian", { vars = dobj$vars, method = method_bayes( n_samples = 50, - burn_between = 1, - seed = 859 + burn_between = 1 ), quiet = TRUE ) diff --git a/tests/testthat/test-reproducibility.R b/tests/testthat/test-reproducibility.R index fdc8e70f..04809a9f 100644 --- a/tests/testthat/test-reproducibility.R +++ b/tests/testthat/test-reproducibility.R @@ -86,7 +86,7 @@ test_that("Results are Reproducible", { -test_that("bayes - seed argument works without set.seed", { +test_that("bayes - set.seed produces identical results", { sigma <- as_vcov(c(2, 1, 0.7), c(0.5, 0.3, 0.2)) dat <- get_sim_data(200, sigma, trt = 8) %>% @@ -111,17 +111,16 @@ test_that("bayes - seed argument works without set.seed", { ) meth <- method_bayes( - seed = 1482, burn_between = 5, burn_in = 200, - n_samples = 2 + n_samples = 6 ) - set.seed(49812) + set.seed(1234) x <- suppressWarnings({ draws(dat, dat_ice, vars, meth, quiet = TRUE) }) - set.seed(2414) + set.seed(1234) y <- suppressWarnings({ draws(dat, dat_ice, vars, meth, quiet = TRUE) }) diff --git a/vignettes/advanced.html b/vignettes/advanced.html index d0a958b4..84a8d0ae 100644 --- a/vignettes/advanced.html +++ b/vignettes/advanced.html @@ -714,7 +714,7 @@

6 Custom imputation strategies#> pars <- list(mu = mu, sigma = sigma) #> return(pars) #> } -#> <bytecode: 0x7ff37e6af218> +#> <bytecode: 0x7f86686ebac0> #> <environment: namespace:rbmi>

To further illustrate this for a simple example, assume that a new strategy is to be implemented as follows: - The marginal mean of the imputation distribution is equal to the marginal mean trajectory for the subject according to their assigned group and covariates up to the ICE. diff --git a/vignettes/quickstart.Rmd b/vignettes/quickstart.Rmd index ac8a64f3..56b8cee8 100644 --- a/vignettes/quickstart.Rmd +++ b/vignettes/quickstart.Rmd @@ -117,8 +117,7 @@ vars <- set_vars( method <- method_bayes( burn_in = 200, burn_between = 5, - n_samples = 150, - seed = 675442751 + n_samples = 150 ) # Create samples for the imputation parameters by running the draws() function @@ -347,8 +346,7 @@ vars <- set_vars( method <- method_bayes( burn_in = 200, burn_between = 5, - n_samples = 150, - seed = 675442751 + n_samples = 150 ) diff --git a/vignettes/quickstart.html b/vignettes/quickstart.html index 58495d20..d22421ae 100644 --- a/vignettes/quickstart.html +++ b/vignettes/quickstart.html @@ -441,34 +441,35 @@

3 Draws

method <- method_bayes( burn_in = 200, burn_between = 5, - n_samples = 150, - seed = 675442751 -) - -# Create samples for the imputation parameters by running the draws() function -set.seed(987) -drawObj <- draws( - data = dat, - data_ice = dat_ice, - vars = vars, - method = method, - quiet = TRUE -) -drawObj -#> -#> Draws Object -#> ------------ -#> Number of Samples: 150 -#> Number of Failed Samples: 0 -#> Model Formula: CHANGE ~ 1 + THERAPY + VISIT + BASVAL * VISIT + THERAPY * VISIT -#> Imputation Type: random -#> Method: -#> name: Bayes -#> burn_in: 200 -#> burn_between: 5 -#> same_cov: TRUE -#> n_samples: 150 -#> seed: 675442751 + n_samples = 150 +) + +# Create samples for the imputation parameters by running the draws() function +set.seed(987) +drawObj <- draws( + data = dat, + data_ice = dat_ice, + vars = vars, + method = method, + quiet = TRUE +) +#> Warning in fit_mcmc(designmat = model_df_scaled[, -1, drop = FALSE], outcome = model_df_scaled[, : The largest R-hat is 1.06, indicating chains have not mixed. +#> Running the chains for more iterations may help. See +#> https://mc-stan.org/misc/warnings.html#r-hat +drawObj +#> +#> Draws Object +#> ------------ +#> Number of Samples: 150 +#> Number of Failed Samples: 0 +#> Model Formula: CHANGE ~ 1 + THERAPY + VISIT + BASVAL * VISIT + THERAPY * VISIT +#> Imputation Type: random +#> Method: +#> name: Bayes +#> burn_in: 200 +#> burn_between: 5 +#> same_cov: TRUE +#> n_samples: 150

Note the use of set_vars() which specifies the names of the key variables within the dataset and the imputation model. Additionally, note that whilst vars$group and vars$visit are added as terms to the imputation model by default, their interaction is not, @@ -693,15 +694,15 @@

6 Pool

#> trt_4 -0.092 0.683 -1.439 1.256 0.893 #> lsm_ref_4 -1.616 0.486 -2.576 -0.656 0.001 #> lsm_alt_4 -1.708 0.475 -2.645 -0.77 <0.001 -#> trt_5 1.286 0.928 -0.546 3.119 0.167 -#> lsm_ref_5 -4.114 0.662 -5.421 -2.807 <0.001 -#> lsm_alt_5 -2.828 0.645 -4.103 -1.553 <0.001 -#> trt_6 1.909 0.996 -0.059 3.877 0.057 -#> lsm_ref_6 -6.082 0.712 -7.488 -4.676 <0.001 -#> lsm_alt_6 -4.173 0.693 -5.543 -2.803 <0.001 -#> trt_7 2.065 1.125 -0.158 4.288 0.068 -#> lsm_ref_7 -6.937 0.817 -8.554 -5.32 <0.001 -#> lsm_alt_7 -4.872 0.787 -6.427 -3.317 <0.001 +#> trt_5 1.328 0.924 -0.497 3.153 0.153 +#> lsm_ref_5 -4.14 0.66 -5.443 -2.838 <0.001 +#> lsm_alt_5 -2.812 0.646 -4.088 -1.537 <0.001 +#> trt_6 1.939 1 -0.037 3.915 0.054 +#> lsm_ref_6 -6.085 0.718 -7.503 -4.667 <0.001 +#> lsm_alt_6 -4.146 0.699 -5.527 -2.766 <0.001 +#> trt_7 2.136 1.12 -0.078 4.35 0.058 +#> lsm_ref_7 -6.982 0.812 -8.587 -5.377 <0.001 +#> lsm_alt_7 -4.846 0.789 -6.405 -3.287 <0.001 #> --------------------------------------------------

The table of values shown in the print message for poolObj can also be extracted using the as.data.frame() function:

as.data.frame(poolObj)
@@ -709,18 +710,18 @@ 

6 Pool

#> 1 trt_4 -0.09180645 0.6826279 -1.43949684 1.2558839 8.931772e-01 #> 2 lsm_ref_4 -1.61581996 0.4862316 -2.57577141 -0.6558685 1.093708e-03 #> 3 lsm_alt_4 -1.70762640 0.4749573 -2.64531931 -0.7699335 4.262148e-04 -#> 4 trt_5 1.28644500 0.9277209 -0.54590272 3.1187927 1.674969e-01 -#> 5 lsm_ref_5 -4.11444658 0.6617013 -5.42140736 -2.8074858 4.341299e-09 -#> 6 lsm_alt_5 -2.82800159 0.6453115 -4.10255468 -1.5534485 2.131897e-05 -#> 7 trt_6 1.90919233 0.9961837 -0.05886381 3.8772485 5.716563e-02 -#> 8 lsm_ref_6 -6.08225789 0.7116785 -7.48832324 -4.6761925 1.254942e-14 -#> 9 lsm_alt_6 -4.17306556 0.6934318 -5.54301764 -2.8031135 1.257113e-08 -#> 10 trt_7 2.06527821 1.1246338 -0.15780585 4.2883623 6.837825e-02 -#> 11 lsm_ref_7 -6.93702520 0.8174548 -8.55359983 -5.3204506 3.240396e-14 -#> 12 lsm_alt_7 -4.87174699 0.7866796 -6.42696291 -3.3165311 6.097291e-09
+#> 4 trt_5 1.32800107 0.9239991 -0.49687491 3.1528770 1.526144e-01 +#> 5 lsm_ref_5 -4.14031255 0.6595847 -5.44302381 -2.8376013 3.163421e-09 +#> 6 lsm_alt_5 -2.81231148 0.6459122 -4.08807336 -1.5365496 2.396574e-05 +#> 7 trt_6 1.93891419 1.0001460 -0.03694571 3.9147741 5.438468e-02 +#> 8 lsm_ref_6 -6.08530002 0.7176967 -7.50335519 -4.6672448 1.946811e-14 +#> 9 lsm_alt_6 -4.14638583 0.6985434 -5.52650481 -2.7662668 1.911219e-08 +#> 10 trt_7 2.13609482 1.1201125 -0.07781920 4.3500088 5.849971e-02 +#> 11 lsm_ref_7 -6.98181990 0.8117268 -8.58678186 -5.3768579 1.511791e-14 +#> 12 lsm_alt_7 -4.84572508 0.7885999 -6.40477980 -3.2866704 7.793320e-09

These outputs gives an estimated difference of -2.065 (95% CI -0.158 to 4.288) -between the two groups at the last visit with an associated p-value of 0.068.

+2.136 (95% CI -0.078 to 4.350) +between the two groups at the last visit with an associated p-value of 0.058.

7 Code

@@ -772,69 +773,68 @@

7 Code

method <- method_bayes( burn_in = 200, burn_between = 5, - n_samples = 150, - seed = 675442751 -) + n_samples = 150 +) + - -# Create samples for the imputation parameters by running the draws() function -set.seed(987) -drawObj <- draws( - data = dat, - data_ice = dat_ice, - vars = vars, - method = method, - quiet = TRUE -) - -# Impute the data -imputeObj <- impute( - drawObj, - references = c("DRUG" = "PLACEBO", "PLACEBO" = "PLACEBO") -) - -# Fit the analysis model on each imputed dataset -anaObj <- analyse( - imputeObj, - ancova, - vars = set_vars( - subjid = "PATIENT", - outcome = "CHANGE", - visit = "VISIT", - group = "THERAPY", - covariates = c("BASVAL") - ) -) - -# Apply a delta adjustment - -# Add a delta-value of 5 to all imputed values (i.e. those values -# which were missing in the original dataset) in the drug arm. -delta_df <- delta_template(imputeObj) %>% - as_tibble() %>% - mutate(delta = if_else(THERAPY == "DRUG" & is_missing , 5, 0)) %>% - select(PATIENT, VISIT, delta) - -# Repeat the analyses with the adjusted values -anaObj_delta <- analyse( - imputeObj, - ancova, - delta = delta_df, - vars = set_vars( - subjid = "PATIENT", - outcome = "CHANGE", - visit = "VISIT", - group = "THERAPY", - covariates = c("BASVAL") - ) -) - -# Pool the results -poolObj <- pool( - anaObj, - conf.level = 0.95, - alternative = "two.sided" -)
+# Create samples for the imputation parameters by running the draws() function +set.seed(987) +drawObj <- draws( + data = dat, + data_ice = dat_ice, + vars = vars, + method = method, + quiet = TRUE +) + +# Impute the data +imputeObj <- impute( + drawObj, + references = c("DRUG" = "PLACEBO", "PLACEBO" = "PLACEBO") +) + +# Fit the analysis model on each imputed dataset +anaObj <- analyse( + imputeObj, + ancova, + vars = set_vars( + subjid = "PATIENT", + outcome = "CHANGE", + visit = "VISIT", + group = "THERAPY", + covariates = c("BASVAL") + ) +) + +# Apply a delta adjustment + +# Add a delta-value of 5 to all imputed values (i.e. those values +# which were missing in the original dataset) in the drug arm. +delta_df <- delta_template(imputeObj) %>% + as_tibble() %>% + mutate(delta = if_else(THERAPY == "DRUG" & is_missing , 5, 0)) %>% + select(PATIENT, VISIT, delta) + +# Repeat the analyses with the adjusted values +anaObj_delta <- analyse( + imputeObj, + ancova, + delta = delta_df, + vars = set_vars( + subjid = "PATIENT", + outcome = "CHANGE", + visit = "VISIT", + group = "THERAPY", + covariates = c("BASVAL") + ) +) + +# Pool the results +poolObj <- pool( + anaObj, + conf.level = 0.95, + alternative = "two.sided" +) From 33c4e963ee04cf3bef9ec8436fa8f8d5dec59bf4 Mon Sep 17 00:00:00 2001 From: Craig Gower-Page Date: Mon, 30 Sep 2024 09:52:54 +0100 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Isaac Gravestock <83659704+gravesti@users.noreply.github.com> Signed-off-by: Craig Gower-Page --- R/methods.R | 4 ++-- data-raw/create_print_test_data.R | 2 +- tests/testthat/test-mcmc.R | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/methods.R b/R/methods.R index 406f2282..b9f7f9f4 100644 --- a/R/methods.R +++ b/R/methods.R @@ -93,10 +93,10 @@ method_bayes <- function( seed = NULL ) { if (!is.null(seed)) { - warning(paste0( + warning( "The `seed` argument to `method_bayes()` has been deprecated", " please use `set.seed()` instead" - )) + ) } x <- list( diff --git a/data-raw/create_print_test_data.R b/data-raw/create_print_test_data.R index 71c2c120..501c1bcf 100644 --- a/data-raw/create_print_test_data.R +++ b/data-raw/create_print_test_data.R @@ -107,7 +107,7 @@ set.seed(413) dobj <- get_data(40) suppressWarnings({ - set.seet(859) + set.seed(859) drawobj_b <- draws( data = dobj$dat, data_ice = dobj$dat_ice, diff --git a/tests/testthat/test-mcmc.R b/tests/testthat/test-mcmc.R index 422f4d54..aa77c88d 100644 --- a/tests/testthat/test-mcmc.R +++ b/tests/testthat/test-mcmc.R @@ -603,7 +603,7 @@ test_that("fit_mcmc can recover known values with same_cov = FALSE", { }) -test_that("seed argument to method_bayes is depreciated", { +test_that("seed argument to method_bayes is deprecated", { expect_warning( { method <- method_bayes( From 52acd6127ebdc5e028a1ca4dfaee834a09be1be6 Mon Sep 17 00:00:00 2001 From: gowerc Date: Fri, 4 Oct 2024 15:13:07 +0100 Subject: [PATCH 3/3] upgrade warning -> error --- R/methods.R | 12 +++++++----- tests/testthat/test-mcmc.R | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R/methods.R b/R/methods.R index b9f7f9f4..8d091323 100644 --- a/R/methods.R +++ b/R/methods.R @@ -92,12 +92,14 @@ method_bayes <- function( n_samples = 20, seed = NULL ) { - if (!is.null(seed)) { - warning( - "The `seed` argument to `method_bayes()` has been deprecated", - " please use `set.seed()` instead" + assertthat::assert_that( + is.null(seed), + msg = paste( + "The `seed` argument to `method_bayes()` has been deprecated;", + "please use `set.seed()` instead.", + collapse = " " ) - } + ) x <- list( burn_in = burn_in, diff --git a/tests/testthat/test-mcmc.R b/tests/testthat/test-mcmc.R index aa77c88d..5550e458 100644 --- a/tests/testthat/test-mcmc.R +++ b/tests/testthat/test-mcmc.R @@ -604,7 +604,7 @@ test_that("fit_mcmc can recover known values with same_cov = FALSE", { test_that("seed argument to method_bayes is deprecated", { - expect_warning( + expect_error( { method <- method_bayes( n_samples = 250,