From 800dafe4022f6e0d8869d8422799452b37b57785 Mon Sep 17 00:00:00 2001
From: gowerc
Date: Fri, 27 Sep 2024 11:37:10 +0100
Subject: [PATCH 1/3] initial
---
R/draws.R | 4 -
R/mcmc.R | 9 +-
R/methods.R | 17 +-
data-raw/create_print_test_data.R | 4 +-
man/method.Rd | 7 +-
tests/testthat/_snaps/print.md | 1 -
tests/testthat/test-mcmc.R | 46 ++----
tests/testthat/test-print.R | 3 +-
tests/testthat/test-reproducibility.R | 9 +-
vignettes/advanced.html | 2 +-
vignettes/quickstart.Rmd | 6 +-
vignettes/quickstart.html | 220 +++++++++++++-------------
12 files changed, 146 insertions(+), 182 deletions(-)
diff --git a/R/draws.R b/R/draws.R
index 9c8f8afb..b0064997 100644
--- a/R/draws.R
+++ b/R/draws.R
@@ -478,10 +478,6 @@ extract_data_nmar_as_na <- function(longdata) {
#' @export
draws.bayes <- function(data, data_ice = NULL, vars, method, ncores = 1, quiet = FALSE) {
- if (!is.na(method$seed)) {
- set.seed(method$seed)
- }
-
longdata <- longDataConstructor$new(data, vars)
longdata$set_strategies(data_ice)
diff --git a/R/mcmc.R b/R/mcmc.R
index be884869..d84d3547 100644
--- a/R/mcmc.R
+++ b/R/mcmc.R
@@ -61,7 +61,6 @@ fit_mcmc <- function(
n_imputations <- method$n_samples
burn_in <- method$burn_in
- seed <- method$seed
burn_between <- method$burn_between
same_cov <- method$same_cov
@@ -114,13 +113,7 @@ fit_mcmc <- function(
)
)
- assert_that(
- !is.na(seed),
- !is.null(seed),
- is.numeric(seed),
- msg = "mcmc seed is invalid"
- )
- sampling_args$seed <- seed
+ sampling_args$seed <- sample.int(.Machine$integer.max, 1)
stan_fit <- record({
do.call(sampling, sampling_args)
diff --git a/R/methods.R b/R/methods.R
index c652cdf9..406f2282 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -44,10 +44,7 @@
#' @param type a character string that specifies the resampling method used to perform inference
#' when a conditional mean imputation approach (set via `method_condmean()`) is used. Must be one of `"bootstrap"` or `"jackknife"`.
#'
-#' @param seed a numeric that specifies the seed to be used in the call to Stan. This
-#' argument is passed onto the `seed` argument of [rstan::sampling()]. Note that
-#' this is only required for `method_bayes()`, for all other methods you can achieve
-#' reproducible results by setting the seed via `set.seed()`. See details.
+#' @param seed deprecated. Please use `set.seed()` instead.
#'
#' @details
#'
@@ -93,14 +90,20 @@ method_bayes <- function(
burn_between = 50,
same_cov = TRUE,
n_samples = 20,
- seed = sample.int(.Machine$integer.max, 1)
+ seed = NULL
) {
+ if (!is.null(seed)) {
+ warning(paste0(
+ "The `seed` argument to `method_bayes()` has been deprecated",
+ " please use `set.seed()` instead"
+ ))
+ }
+
x <- list(
burn_in = burn_in,
burn_between = burn_between,
same_cov = same_cov,
- n_samples = n_samples,
- seed = seed
+ n_samples = n_samples
)
return(as_class(x, c("method", "bayes")))
}
diff --git a/data-raw/create_print_test_data.R b/data-raw/create_print_test_data.R
index 9c324d62..71c2c120 100644
--- a/data-raw/create_print_test_data.R
+++ b/data-raw/create_print_test_data.R
@@ -107,14 +107,14 @@ set.seed(413)
dobj <- get_data(40)
suppressWarnings({
+ set.seet(859)
drawobj_b <- draws(
data = dobj$dat,
data_ice = dobj$dat_ice,
vars = dobj$vars,
method = method_bayes(
n_samples = 50,
- burn_between = 1,
- seed = 859
+ burn_between = 1
)
)
})
diff --git a/man/method.Rd b/man/method.Rd
index 0ba22c87..e8077e0b 100644
--- a/man/method.Rd
+++ b/man/method.Rd
@@ -13,7 +13,7 @@ method_bayes(
burn_between = 50,
same_cov = TRUE,
n_samples = 20,
- seed = sample.int(.Machine$integer.max, 1)
+ seed = NULL
)
method_approxbayes(
@@ -61,10 +61,7 @@ matrix will be fit for each group as determined by the \code{group} argument of
In the case of \code{method_condmean(type = "jackknife")} this argument
must be set to \code{NULL}. See details.}
-\item{seed}{a numeric that specifies the seed to be used in the call to Stan. This
-argument is passed onto the \code{seed} argument of \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}}. Note that
-this is only required for \code{method_bayes()}, for all other methods you can achieve
-reproducible results by setting the seed via \code{set.seed()}. See details.}
+\item{seed}{deprecated. Please use \code{set.seed()} instead.}
\item{covariance}{a character string that specifies the structure of the covariance
matrix to be used in the imputation model. Must be one of \code{"us"} (default), \code{"toep"},
diff --git a/tests/testthat/_snaps/print.md b/tests/testthat/_snaps/print.md
index 6f549fb3..989ca93a 100644
--- a/tests/testthat/_snaps/print.md
+++ b/tests/testthat/_snaps/print.md
@@ -255,7 +255,6 @@
burn_between: 1
same_cov: TRUE
n_samples: 50
- seed: 859
---
diff --git a/tests/testthat/test-mcmc.R b/tests/testthat/test-mcmc.R
index 7fb72148..422f4d54 100644
--- a/tests/testthat/test-mcmc.R
+++ b/tests/testthat/test-mcmc.R
@@ -529,8 +529,7 @@ test_that("fit_mcmc can recover known values with same_cov = FALSE", {
n_samples = 250,
burn_in = 100,
burn_between = 3,
- same_cov = FALSE,
- seed = 8931
+ same_cov = FALSE
)
### No missingness
@@ -604,36 +603,17 @@ test_that("fit_mcmc can recover known values with same_cov = FALSE", {
})
-test_that("invalid seed throws an error", {
-
- set.seed(301)
- sigma <- as_vcov(c(6, 4, 4), c(0.5, 0.2, 0.3))
- dat <- get_sim_data(50, sigma)
-
- dat_ice <- dat %>%
- group_by(id) %>%
- arrange(desc(visit)) %>%
- slice(1) %>%
- ungroup() %>%
- mutate(strategy = "MAR")
-
- vars <- set_vars(
- visit = "visit",
- subjid = "id",
- group = "group",
- covariates = "sex",
- strategy = "strategy",
- outcome = "outcome"
- )
-
- expect_error(
- draws(
- dat,
- dat_ice,
- vars,
- method_bayes(n_samples = 2, seed = NA),
- quiet = TRUE
- ),
- regexp = "mcmc seed is invalid"
+test_that("seed argument to method_bayes is depreciated", {
+ expect_warning(
+ {
+ method <- method_bayes(
+ n_samples = 250,
+ burn_in = 100,
+ burn_between = 3,
+ same_cov = FALSE,
+ seed = 1234
+ )
+ },
+ regexp = "seed.*deprecated"
)
})
diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R
index f661573e..fa592370 100644
--- a/tests/testthat/test-print.R
+++ b/tests/testthat/test-print.R
@@ -104,8 +104,7 @@ test_that("print - bayesian", {
vars = dobj$vars,
method = method_bayes(
n_samples = 50,
- burn_between = 1,
- seed = 859
+ burn_between = 1
),
quiet = TRUE
)
diff --git a/tests/testthat/test-reproducibility.R b/tests/testthat/test-reproducibility.R
index fdc8e70f..04809a9f 100644
--- a/tests/testthat/test-reproducibility.R
+++ b/tests/testthat/test-reproducibility.R
@@ -86,7 +86,7 @@ test_that("Results are Reproducible", {
-test_that("bayes - seed argument works without set.seed", {
+test_that("bayes - set.seed produces identical results", {
sigma <- as_vcov(c(2, 1, 0.7), c(0.5, 0.3, 0.2))
dat <- get_sim_data(200, sigma, trt = 8) %>%
@@ -111,17 +111,16 @@ test_that("bayes - seed argument works without set.seed", {
)
meth <- method_bayes(
- seed = 1482,
burn_between = 5,
burn_in = 200,
- n_samples = 2
+ n_samples = 6
)
- set.seed(49812)
+ set.seed(1234)
x <- suppressWarnings({
draws(dat, dat_ice, vars, meth, quiet = TRUE)
})
- set.seed(2414)
+ set.seed(1234)
y <- suppressWarnings({
draws(dat, dat_ice, vars, meth, quiet = TRUE)
})
diff --git a/vignettes/advanced.html b/vignettes/advanced.html
index d0a958b4..84a8d0ae 100644
--- a/vignettes/advanced.html
+++ b/vignettes/advanced.html
@@ -714,7 +714,7 @@ Custom imputation strategies#> pars <- list(mu = mu, sigma = sigma)
#> return(pars)
#> }
-#> <bytecode: 0x7ff37e6af218>
+#> <bytecode: 0x7f86686ebac0>
#> <environment: namespace:rbmi>
To further illustrate this for a simple example, assume that a new strategy is to be implemented as follows:
- The marginal mean of the imputation distribution is equal to the marginal mean trajectory for the subject according to their assigned group and covariates up to the ICE.
diff --git a/vignettes/quickstart.Rmd b/vignettes/quickstart.Rmd
index ac8a64f3..56b8cee8 100644
--- a/vignettes/quickstart.Rmd
+++ b/vignettes/quickstart.Rmd
@@ -117,8 +117,7 @@ vars <- set_vars(
method <- method_bayes(
burn_in = 200,
burn_between = 5,
- n_samples = 150,
- seed = 675442751
+ n_samples = 150
)
# Create samples for the imputation parameters by running the draws() function
@@ -347,8 +346,7 @@ vars <- set_vars(
method <- method_bayes(
burn_in = 200,
burn_between = 5,
- n_samples = 150,
- seed = 675442751
+ n_samples = 150
)
diff --git a/vignettes/quickstart.html b/vignettes/quickstart.html
index 58495d20..d22421ae 100644
--- a/vignettes/quickstart.html
+++ b/vignettes/quickstart.html
@@ -441,34 +441,35 @@
Draws
method <- method_bayes(
burn_in = 200,
burn_between = 5,
- n_samples = 150,
- seed = 675442751
-)
-
-# Create samples for the imputation parameters by running the draws() function
-set.seed(987)
-drawObj <- draws(
- data = dat,
- data_ice = dat_ice,
- vars = vars,
- method = method,
- quiet = TRUE
-)
-drawObj
-#>
-#> Draws Object
-#> ------------
-#> Number of Samples: 150
-#> Number of Failed Samples: 0
-#> Model Formula: CHANGE ~ 1 + THERAPY + VISIT + BASVAL * VISIT + THERAPY * VISIT
-#> Imputation Type: random
-#> Method:
-#> name: Bayes
-#> burn_in: 200
-#> burn_between: 5
-#> same_cov: TRUE
-#> n_samples: 150
-#> seed: 675442751
+ n_samples = 150
+)
+
+# Create samples for the imputation parameters by running the draws() function
+set.seed(987)
+drawObj <- draws(
+ data = dat,
+ data_ice = dat_ice,
+ vars = vars,
+ method = method,
+ quiet = TRUE
+)
+#> Warning in fit_mcmc(designmat = model_df_scaled[, -1, drop = FALSE], outcome = model_df_scaled[, : The largest R-hat is 1.06, indicating chains have not mixed.
+#> Running the chains for more iterations may help. See
+#> https://mc-stan.org/misc/warnings.html#r-hat
+drawObj
+#>
+#> Draws Object
+#> ------------
+#> Number of Samples: 150
+#> Number of Failed Samples: 0
+#> Model Formula: CHANGE ~ 1 + THERAPY + VISIT + BASVAL * VISIT + THERAPY * VISIT
+#> Imputation Type: random
+#> Method:
+#> name: Bayes
+#> burn_in: 200
+#> burn_between: 5
+#> same_cov: TRUE
+#> n_samples: 150
Note the use of set_vars()
which specifies the names of the key variables
within the dataset and the imputation model. Additionally, note that whilst vars$group
and vars$visit
are added as terms to the imputation model by default, their interaction is not,
@@ -693,15 +694,15 @@
Pool
#> trt_4 -0.092 0.683 -1.439 1.256 0.893
#> lsm_ref_4 -1.616 0.486 -2.576 -0.656 0.001
#> lsm_alt_4 -1.708 0.475 -2.645 -0.77 <0.001
-#> trt_5 1.286 0.928 -0.546 3.119 0.167
-#> lsm_ref_5 -4.114 0.662 -5.421 -2.807 <0.001
-#> lsm_alt_5 -2.828 0.645 -4.103 -1.553 <0.001
-#> trt_6 1.909 0.996 -0.059 3.877 0.057
-#> lsm_ref_6 -6.082 0.712 -7.488 -4.676 <0.001
-#> lsm_alt_6 -4.173 0.693 -5.543 -2.803 <0.001
-#> trt_7 2.065 1.125 -0.158 4.288 0.068
-#> lsm_ref_7 -6.937 0.817 -8.554 -5.32 <0.001
-#> lsm_alt_7 -4.872 0.787 -6.427 -3.317 <0.001
+#> trt_5 1.328 0.924 -0.497 3.153 0.153
+#> lsm_ref_5 -4.14 0.66 -5.443 -2.838 <0.001
+#> lsm_alt_5 -2.812 0.646 -4.088 -1.537 <0.001
+#> trt_6 1.939 1 -0.037 3.915 0.054
+#> lsm_ref_6 -6.085 0.718 -7.503 -4.667 <0.001
+#> lsm_alt_6 -4.146 0.699 -5.527 -2.766 <0.001
+#> trt_7 2.136 1.12 -0.078 4.35 0.058
+#> lsm_ref_7 -6.982 0.812 -8.587 -5.377 <0.001
+#> lsm_alt_7 -4.846 0.789 -6.405 -3.287 <0.001
#> --------------------------------------------------
The table of values shown in the print message for poolObj
can also be extracted using the as.data.frame()
function:
as.data.frame(poolObj)
@@ -709,18 +710,18 @@ Pool
#> 1 trt_4 -0.09180645 0.6826279 -1.43949684 1.2558839 8.931772e-01
#> 2 lsm_ref_4 -1.61581996 0.4862316 -2.57577141 -0.6558685 1.093708e-03
#> 3 lsm_alt_4 -1.70762640 0.4749573 -2.64531931 -0.7699335 4.262148e-04
-#> 4 trt_5 1.28644500 0.9277209 -0.54590272 3.1187927 1.674969e-01
-#> 5 lsm_ref_5 -4.11444658 0.6617013 -5.42140736 -2.8074858 4.341299e-09
-#> 6 lsm_alt_5 -2.82800159 0.6453115 -4.10255468 -1.5534485 2.131897e-05
-#> 7 trt_6 1.90919233 0.9961837 -0.05886381 3.8772485 5.716563e-02
-#> 8 lsm_ref_6 -6.08225789 0.7116785 -7.48832324 -4.6761925 1.254942e-14
-#> 9 lsm_alt_6 -4.17306556 0.6934318 -5.54301764 -2.8031135 1.257113e-08
-#> 10 trt_7 2.06527821 1.1246338 -0.15780585 4.2883623 6.837825e-02
-#> 11 lsm_ref_7 -6.93702520 0.8174548 -8.55359983 -5.3204506 3.240396e-14
-#> 12 lsm_alt_7 -4.87174699 0.7866796 -6.42696291 -3.3165311 6.097291e-09
+#> 4 trt_5 1.32800107 0.9239991 -0.49687491 3.1528770 1.526144e-01
+#> 5 lsm_ref_5 -4.14031255 0.6595847 -5.44302381 -2.8376013 3.163421e-09
+#> 6 lsm_alt_5 -2.81231148 0.6459122 -4.08807336 -1.5365496 2.396574e-05
+#> 7 trt_6 1.93891419 1.0001460 -0.03694571 3.9147741 5.438468e-02
+#> 8 lsm_ref_6 -6.08530002 0.7176967 -7.50335519 -4.6672448 1.946811e-14
+#> 9 lsm_alt_6 -4.14638583 0.6985434 -5.52650481 -2.7662668 1.911219e-08
+#> 10 trt_7 2.13609482 1.1201125 -0.07781920 4.3500088 5.849971e-02
+#> 11 lsm_ref_7 -6.98181990 0.8117268 -8.58678186 -5.3768579 1.511791e-14
+#> 12 lsm_alt_7 -4.84572508 0.7885999 -6.40477980 -3.2866704 7.793320e-09
These outputs gives an estimated difference of
-2.065 (95% CI -0.158 to 4.288)
-between the two groups at the last visit with an associated p-value of 0.068.
+2.136 (95% CI -0.078 to 4.350)
+between the two groups at the last visit with an associated p-value of 0.058.
Code
@@ -772,69 +773,68 @@
Code
method <- method_bayes(
burn_in = 200,
burn_between = 5,
-
n_samples = 150,
-
seed = 675442751
-
)
+
n_samples = 150
+
)
+
-
-
# Create samples for the imputation parameters by running the draws() function
-
set.seed(987)
-
drawObj <- draws(
-
data = dat,
-
data_ice = dat_ice,
-
vars = vars,
-
method = method,
-
quiet = TRUE
-
)
-
-
# Impute the data
-
imputeObj <- impute(
-
drawObj,
-
references = c("DRUG" = "PLACEBO", "PLACEBO" = "PLACEBO")
-
)
-
-
# Fit the analysis model on each imputed dataset
-
anaObj <- analyse(
-
imputeObj,
-
ancova,
-
vars = set_vars(
-
subjid = "PATIENT",
-
outcome = "CHANGE",
-
visit = "VISIT",
-
group = "THERAPY",
-
covariates = c("BASVAL")
-
)
-
)
-
-
# Apply a delta adjustment
-
-
# Add a delta-value of 5 to all imputed values (i.e. those values
-
# which were missing in the original dataset) in the drug arm.
-
delta_df <- delta_template(imputeObj) %>%
-
as_tibble() %>%
-
mutate(delta = if_else(THERAPY == "DRUG" & is_missing , 5, 0)) %>%
-
select(PATIENT, VISIT, delta)
-
-
# Repeat the analyses with the adjusted values
-
anaObj_delta <- analyse(
-
imputeObj,
-
ancova,
-
delta = delta_df,
-
vars = set_vars(
-
subjid = "PATIENT",
-
outcome = "CHANGE",
-
visit = "VISIT",
-
group = "THERAPY",
-
covariates = c("BASVAL")
-
)
-
)
-
-
# Pool the results
-
poolObj <- pool(
-
anaObj,
-
conf.level = 0.95,
-
alternative = "two.sided"
-
)
+# Create samples for the imputation parameters by running the draws() function
+set.seed(987)
+drawObj <- draws(
+ data = dat,
+ data_ice = dat_ice,
+ vars = vars,
+ method = method,
+ quiet = TRUE
+)
+
+# Impute the data
+imputeObj <- impute(
+ drawObj,
+ references = c("DRUG" = "PLACEBO", "PLACEBO" = "PLACEBO")
+)
+
+# Fit the analysis model on each imputed dataset
+anaObj <- analyse(
+ imputeObj,
+ ancova,
+ vars = set_vars(
+ subjid = "PATIENT",
+ outcome = "CHANGE",
+ visit = "VISIT",
+ group = "THERAPY",
+ covariates = c("BASVAL")
+ )
+)
+
+# Apply a delta adjustment
+
+# Add a delta-value of 5 to all imputed values (i.e. those values
+# which were missing in the original dataset) in the drug arm.
+delta_df <- delta_template(imputeObj) %>%
+ as_tibble() %>%
+ mutate(delta = if_else(THERAPY == "DRUG" & is_missing , 5, 0)) %>%
+ select(PATIENT, VISIT, delta)
+
+# Repeat the analyses with the adjusted values
+anaObj_delta <- analyse(
+ imputeObj,
+ ancova,
+ delta = delta_df,
+ vars = set_vars(
+ subjid = "PATIENT",
+ outcome = "CHANGE",
+ visit = "VISIT",
+ group = "THERAPY",
+ covariates = c("BASVAL")
+ )
+)
+
+# Pool the results
+poolObj <- pool(
+ anaObj,
+ conf.level = 0.95,
+ alternative = "two.sided"
+)
From 33c4e963ee04cf3bef9ec8436fa8f8d5dec59bf4 Mon Sep 17 00:00:00 2001
From: Craig Gower-Page
Date: Mon, 30 Sep 2024 09:52:54 +0100
Subject: [PATCH 2/3] Apply suggestions from code review
Co-authored-by: Isaac Gravestock <83659704+gravesti@users.noreply.github.com>
Signed-off-by: Craig Gower-Page
---
R/methods.R | 4 ++--
data-raw/create_print_test_data.R | 2 +-
tests/testthat/test-mcmc.R | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/R/methods.R b/R/methods.R
index 406f2282..b9f7f9f4 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -93,10 +93,10 @@ method_bayes <- function(
seed = NULL
) {
if (!is.null(seed)) {
- warning(paste0(
+ warning(
"The `seed` argument to `method_bayes()` has been deprecated",
" please use `set.seed()` instead"
- ))
+ )
}
x <- list(
diff --git a/data-raw/create_print_test_data.R b/data-raw/create_print_test_data.R
index 71c2c120..501c1bcf 100644
--- a/data-raw/create_print_test_data.R
+++ b/data-raw/create_print_test_data.R
@@ -107,7 +107,7 @@ set.seed(413)
dobj <- get_data(40)
suppressWarnings({
- set.seet(859)
+ set.seed(859)
drawobj_b <- draws(
data = dobj$dat,
data_ice = dobj$dat_ice,
diff --git a/tests/testthat/test-mcmc.R b/tests/testthat/test-mcmc.R
index 422f4d54..aa77c88d 100644
--- a/tests/testthat/test-mcmc.R
+++ b/tests/testthat/test-mcmc.R
@@ -603,7 +603,7 @@ test_that("fit_mcmc can recover known values with same_cov = FALSE", {
})
-test_that("seed argument to method_bayes is depreciated", {
+test_that("seed argument to method_bayes is deprecated", {
expect_warning(
{
method <- method_bayes(
From 52acd6127ebdc5e028a1ca4dfaee834a09be1be6 Mon Sep 17 00:00:00 2001
From: gowerc
Date: Fri, 4 Oct 2024 15:13:07 +0100
Subject: [PATCH 3/3] upgrade warning -> error
---
R/methods.R | 12 +++++++-----
tests/testthat/test-mcmc.R | 2 +-
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/R/methods.R b/R/methods.R
index b9f7f9f4..8d091323 100644
--- a/R/methods.R
+++ b/R/methods.R
@@ -92,12 +92,14 @@ method_bayes <- function(
n_samples = 20,
seed = NULL
) {
- if (!is.null(seed)) {
- warning(
- "The `seed` argument to `method_bayes()` has been deprecated",
- " please use `set.seed()` instead"
+ assertthat::assert_that(
+ is.null(seed),
+ msg = paste(
+ "The `seed` argument to `method_bayes()` has been deprecated;",
+ "please use `set.seed()` instead.",
+ collapse = " "
)
- }
+ )
x <- list(
burn_in = burn_in,
diff --git a/tests/testthat/test-mcmc.R b/tests/testthat/test-mcmc.R
index aa77c88d..5550e458 100644
--- a/tests/testthat/test-mcmc.R
+++ b/tests/testthat/test-mcmc.R
@@ -604,7 +604,7 @@ test_that("fit_mcmc can recover known values with same_cov = FALSE", {
test_that("seed argument to method_bayes is deprecated", {
- expect_warning(
+ expect_error(
{
method <- method_bayes(
n_samples = 250,