From 8c99a217cda0e180e6973af9846d5b2567ac4513 Mon Sep 17 00:00:00 2001 From: jgabry Date: Sat, 29 Jul 2023 12:04:53 -0600 Subject: [PATCH 1/5] enable jacobian argument for optimization closes #761 --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ R/args.R | 10 +++++++++- R/model.R | 8 ++++++++ man/model-method-optimize.Rd | 8 ++++++++ tests/testthat/test-model-optimize.R | 14 ++++++++++++-- 6 files changed, 42 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b656d0d98..9c9cd3da2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cmdstanr Title: R Interface to 'CmdStan' -Version: 0.6.0 +Version: 0.6.0.9000 Date: 2023-07-25 Authors@R: c(person(given = "Jonah", family = "Gabry", role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 863988233..f45d97323 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# cmdstanr 0.6.0.9000 + +Items for next release + # cmdstanr 0.6.0 ### Major new features diff --git a/R/args.R b/R/args.R index 74a864661..412d477d0 100644 --- a/R/args.R +++ b/R/args.R @@ -374,6 +374,7 @@ OptimizeArgs <- R6::R6Class( public = list( method = "optimize", initialize = function(iter = NULL, + jacobian = NULL, algorithm = NULL, init_alpha = NULL, tol_obj = NULL, @@ -382,8 +383,9 @@ OptimizeArgs <- R6::R6Class( tol_rel_grad = NULL, tol_param = NULL, history_size = NULL) { - self$algorithm <- algorithm self$iter <- iter + self$jacobian <- jacobian + self$algorithm <- algorithm self$init_alpha <- init_alpha self$tol_obj <- tol_obj self$tol_rel_obj <- tol_rel_obj @@ -407,6 +409,7 @@ OptimizeArgs <- R6::R6Class( } new_args <- list( "method=optimize", + .make_arg("jacobian"), .make_arg("iter"), .make_arg("algorithm"), .make_arg("init_alpha"), @@ -669,6 +672,11 @@ validate_sample_args <- function(self, num_procs) { validate_optimize_args <- function(self) { checkmate::assert_subset(self$algorithm, empty.ok = TRUE, choices = c("bfgs", "lbfgs", "newton")) + checkmate::assert_flag(self$jacobian, null.ok = TRUE) + if (!is.null(self$jacobian)) { + self$jacobian <- as.integer(self$jacobian) + } + checkmate::assert_integerish(self$iter, lower = 1, null.ok = TRUE, len = 1) if (!is.null(self$iter)) { self$iter <- as.integer(self$iter) diff --git a/R/model.R b/R/model.R index f32f7e424..dad878bf2 100644 --- a/R/model.R +++ b/R/model.R @@ -1374,6 +1374,12 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi) #' for `"lbfgs"` and `"bfgs`. For their default values and more details see #' the CmdStan User's Guide. The default values can also be obtained by #' running `cmdstanr_example(method="optimize")$metadata()`. +#' @param jacobian (logical) Whether or not the Jacobian adjustment should be +#' included in the gradient. The default is whatever is the default in the +#' installed version of CmdStan (historically this has been `FALSE`). If the +#' goal of running optimization is to later produce a sample from a normal +#' approximation centered at the mode in the unconstrained space then +#' `jacobian` must be set to `TRUE`. #' @param init_alpha (positive real) The initial step size parameter. #' @param tol_obj (positive real) Convergence tolerance on changes in objective function value. #' @param tol_rel_obj (positive real) Convergence tolerance on relative changes in objective function value. @@ -1399,6 +1405,7 @@ optimize <- function(data = NULL, threads = NULL, opencl_ids = NULL, algorithm = NULL, + jacobian = NULL, init_alpha = NULL, iter = NULL, tol_obj = NULL, @@ -1418,6 +1425,7 @@ optimize <- function(data = NULL, } optimize_args <- OptimizeArgs$new( algorithm = algorithm, + jacobian = jacobian, init_alpha = init_alpha, iter = iter, tol_obj = tol_obj, diff --git a/man/model-method-optimize.Rd b/man/model-method-optimize.Rd index 42f81d07a..f9634dbae 100644 --- a/man/model-method-optimize.Rd +++ b/man/model-method-optimize.Rd @@ -17,6 +17,7 @@ optimize( threads = NULL, opencl_ids = NULL, algorithm = NULL, + jacobian = NULL, init_alpha = NULL, iter = NULL, tol_obj = NULL, @@ -132,6 +133,13 @@ for \code{"lbfgs"} and \verb{"bfgs}. For their default values and more details s the CmdStan User's Guide. The default values can also be obtained by running \code{cmdstanr_example(method="optimize")$metadata()}.} +\item{jacobian}{(logical) Whether or not the Jacobian adjustment should be +included in the gradient. The default is whatever is the default in the +installed version of CmdStan (historically this has been \code{FALSE}). If the +goal of running optimization is to later produce a sample from a normal +approximation centered at the mode in the unconstrained space then +\code{jacobian} must be set to \code{TRUE}.} + \item{init_alpha}{(positive real) The initial step size parameter.} \item{iter}{(positive integer) The maximum number of iterations.} diff --git a/tests/testthat/test-model-optimize.R b/tests/testthat/test-model-optimize.R index ce28c2106..66fe5eac0 100644 --- a/tests/testthat/test-model-optimize.R +++ b/tests/testthat/test-model-optimize.R @@ -13,7 +13,8 @@ ok_arg_values <- list( algorithm = "lbfgs", iter = 100, init_alpha = 0.002, - save_latent_dynamics = FALSE + save_latent_dynamics = FALSE, + jacobian = TRUE ) # using any of these should cause optimize() to error @@ -25,7 +26,8 @@ bad_arg_values <- list( algorithm = "NOT_AN_ALGORITHM", iter = -20, init_alpha = -20, - save_latent_dynamics = "NOT_LOGICAL" + save_latent_dynamics = "NOT_LOGICAL", + jacobian = 30 ) ok_arg_sci_nota_values <- list( @@ -142,3 +144,11 @@ test_that("optimize() method runs when the stan file is removed", { mod_tmp$optimize(data = data_list) ) }) + +test_that("optimize() recognizes new jacobian argument", { + fit <- mod$optimize(data = data_list, jacobian = FALSE) + expect_equal(fit$metadata()$jacobian, 0) + + fit2 <- mod$optimize(data = data_list, jacobian = TRUE) + expect_equal(fit$metadata()$jacobian, 1) +}) From d4d98c75197332dc103d2f91acc71b7788f00126 Mon Sep 17 00:00:00 2001 From: jgabry Date: Sat, 29 Jul 2023 12:39:06 -0600 Subject: [PATCH 2/5] warning if cmdstan < 2.32 installed --- R/args.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/args.R b/R/args.R index 412d477d0..7794510e4 100644 --- a/R/args.R +++ b/R/args.R @@ -551,7 +551,7 @@ validate_cmdstan_args <- function(self) { checkmate::assert_integerish(self$refresh, lower = 0, null.ok = TRUE) checkmate::assert_integerish(self$sig_figs, lower = 1, upper = 18, null.ok = TRUE) if (!is.null(self$sig_figs) && cmdstan_version() < "2.25") { - warning("The 'sig_figs' argument is only supported with cmdstan 2.25+ and will be ignored!") + warning("The 'sig_figs' argument is only supported with cmdstan 2.25+ and will be ignored!", call. = FALSE) } if (!is.null(self$refresh)) { self$refresh <- as.integer(self$refresh) @@ -674,6 +674,9 @@ validate_optimize_args <- function(self) { choices = c("bfgs", "lbfgs", "newton")) checkmate::assert_flag(self$jacobian, null.ok = TRUE) if (!is.null(self$jacobian)) { + if (cmdstan_version() < "2.32") { + warning("The 'jacobian' argument is only supported with cmdstan 2.32+ and will be ignored!", call. = FALSE) + } self$jacobian <- as.integer(self$jacobian) } From 61a40efb15fae025709ec6181765e41057da75be Mon Sep 17 00:00:00 2001 From: jgabry Date: Sat, 29 Jul 2023 12:55:10 -0600 Subject: [PATCH 3/5] improve jacobian doc --- R/model.R | 14 ++++++++------ man/model-method-optimize.Rd | 14 ++++++++------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/R/model.R b/R/model.R index dad878bf2..4b01c43db 100644 --- a/R/model.R +++ b/R/model.R @@ -1374,12 +1374,14 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi) #' for `"lbfgs"` and `"bfgs`. For their default values and more details see #' the CmdStan User's Guide. The default values can also be obtained by #' running `cmdstanr_example(method="optimize")$metadata()`. -#' @param jacobian (logical) Whether or not the Jacobian adjustment should be -#' included in the gradient. The default is whatever is the default in the -#' installed version of CmdStan (historically this has been `FALSE`). If the -#' goal of running optimization is to later produce a sample from a normal -#' approximation centered at the mode in the unconstrained space then -#' `jacobian` must be set to `TRUE`. +#' @param jacobian (logical) Whether or not the model’s log probability +#' function should include the log absolute Jacobian determinant of inverse +#' parameter transforms. If `jacobian=FALSE` (historically this has always +#' been the default) optimization returns the regularized maximum likelihood +#' estimate (MLE). If `jacobian=TRUE` optimization produces the maximum a +#' posteriori estimate (MAP). See the +#' [Maximum Likelihood Estimation](https://mc-stan.org/docs/cmdstan-guide/maximum-likelihood-estimation.html) +#' section of the CmdStan User's Guide for more details on the Jacobian adjustments. #' @param init_alpha (positive real) The initial step size parameter. #' @param tol_obj (positive real) Convergence tolerance on changes in objective function value. #' @param tol_rel_obj (positive real) Convergence tolerance on relative changes in objective function value. diff --git a/man/model-method-optimize.Rd b/man/model-method-optimize.Rd index f9634dbae..305cc06ee 100644 --- a/man/model-method-optimize.Rd +++ b/man/model-method-optimize.Rd @@ -133,12 +133,14 @@ for \code{"lbfgs"} and \verb{"bfgs}. For their default values and more details s the CmdStan User's Guide. The default values can also be obtained by running \code{cmdstanr_example(method="optimize")$metadata()}.} -\item{jacobian}{(logical) Whether or not the Jacobian adjustment should be -included in the gradient. The default is whatever is the default in the -installed version of CmdStan (historically this has been \code{FALSE}). If the -goal of running optimization is to later produce a sample from a normal -approximation centered at the mode in the unconstrained space then -\code{jacobian} must be set to \code{TRUE}.} +\item{jacobian}{(logical) Whether or not the model’s log probability +function should include the log absolute Jacobian determinant of inverse +parameter transforms. If \code{jacobian=FALSE} (historically this has always +been the default) optimization returns the regularized maximum likelihood +estimate (MLE). If \code{jacobian=TRUE} optimization produces the maximum a +posteriori estimate (MAP). See the +\href{https://mc-stan.org/docs/cmdstan-guide/maximum-likelihood-estimation.html}{Maximum Likelihood Estimation} +section of the CmdStan User's Guide for more details on the Jacobian adjustments.} \item{init_alpha}{(positive real) The initial step size parameter.} From c998ace3f9745859a061ec8fef4f87a66bb28f2b Mon Sep 17 00:00:00 2001 From: jgabry Date: Sat, 29 Jul 2023 13:57:17 -0600 Subject: [PATCH 4/5] fix test --- tests/testthat/test-model-optimize.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-model-optimize.R b/tests/testthat/test-model-optimize.R index 66fe5eac0..9be5b6431 100644 --- a/tests/testthat/test-model-optimize.R +++ b/tests/testthat/test-model-optimize.R @@ -150,5 +150,5 @@ test_that("optimize() recognizes new jacobian argument", { expect_equal(fit$metadata()$jacobian, 0) fit2 <- mod$optimize(data = data_list, jacobian = TRUE) - expect_equal(fit$metadata()$jacobian, 1) + expect_equal(fit2$metadata()$jacobian, 1) }) From b47a32715be9e611b297127695ffe0bffc8683d9 Mon Sep 17 00:00:00 2001 From: jgabry Date: Sat, 29 Jul 2023 15:07:11 -0600 Subject: [PATCH 5/5] default jacobian=FALSE and improve doc --- R/model.R | 38 ++++++++++++++---------------------- man/model-method-optimize.Rd | 37 +++++++++++++---------------------- 2 files changed, 29 insertions(+), 46 deletions(-) diff --git a/R/model.R b/R/model.R index 4b01c43db..a545e95c1 100644 --- a/R/model.R +++ b/R/model.R @@ -1346,23 +1346,17 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi) #' @family CmdStanModel methods #' #' @description The `$optimize()` method of a [`CmdStanModel`] object runs -#' Stan's optimizer to obtain a posterior mode (penalized maximum likelihood) -#' estimate. +#' Stan's optimizer to obtain a (penalized) maximum likelihood estimate or a +#' maximum a posteriori estimate (if `jacobian=TRUE`). See the +#' [Maximum Likelihood Estimation](https://mc-stan.org/docs/cmdstan-guide/maximum-likelihood-estimation.html) +#' section of the CmdStan User's Guide for more details. #' #' Any argument left as `NULL` will default to the default value used by the -#' installed version of CmdStan. See the -#' [CmdStan User’s Guide](https://mc-stan.org/docs/cmdstan-guide/) -#' for more details. -#' -#' @details CmdStan can find the posterior mode (assuming there is one). If the -#' posterior is not convex, there is no guarantee Stan will be able to find -#' the global mode as opposed to a local optimum of log probability. For -#' optimization, the mode is calculated without the Jacobian adjustment for -#' constrained variables, which shifts the mode due to the change of -#' variables. Thus modes correspond to modes of the model as written. -#' -#' -- [*CmdStan User's Guide*](https://mc-stan.org/docs/cmdstan-guide/) -#' +#' installed version of CmdStan. See the [CmdStan User’s +#' Guide](https://mc-stan.org/docs/cmdstan-guide/) for more details on the +#' default arguments. The default values can also be obtained by checking the +#' metadata of an example model, e.g., +#' `cmdstanr_example(method="optimize")$metadata()`. #' @template model-common-args #' @param threads (positive integer) If the model was #' [compiled][model-method-compile] with threading support, the number of @@ -1374,14 +1368,12 @@ CmdStanModel$set("public", name = "sample_mpi", value = sample_mpi) #' for `"lbfgs"` and `"bfgs`. For their default values and more details see #' the CmdStan User's Guide. The default values can also be obtained by #' running `cmdstanr_example(method="optimize")$metadata()`. -#' @param jacobian (logical) Whether or not the model’s log probability -#' function should include the log absolute Jacobian determinant of inverse -#' parameter transforms. If `jacobian=FALSE` (historically this has always -#' been the default) optimization returns the regularized maximum likelihood -#' estimate (MLE). If `jacobian=TRUE` optimization produces the maximum a -#' posteriori estimate (MAP). See the +#' @param jacobian (logical) Whether or not to use the Jacobian adjustment for +#' constrained variables. By default this is `FALSE`, meaning optimization +#' yields the (regularized) maximum likelihood estimate. Setting it to `TRUE` +#' yields the maximum a posteriori estimate. See the #' [Maximum Likelihood Estimation](https://mc-stan.org/docs/cmdstan-guide/maximum-likelihood-estimation.html) -#' section of the CmdStan User's Guide for more details on the Jacobian adjustments. +#' section of the CmdStan User's Guide for more details. #' @param init_alpha (positive real) The initial step size parameter. #' @param tol_obj (positive real) Convergence tolerance on changes in objective function value. #' @param tol_rel_obj (positive real) Convergence tolerance on relative changes in objective function value. @@ -1407,7 +1399,7 @@ optimize <- function(data = NULL, threads = NULL, opencl_ids = NULL, algorithm = NULL, - jacobian = NULL, + jacobian = FALSE, init_alpha = NULL, iter = NULL, tol_obj = NULL, diff --git a/man/model-method-optimize.Rd b/man/model-method-optimize.Rd index 305cc06ee..e2444673b 100644 --- a/man/model-method-optimize.Rd +++ b/man/model-method-optimize.Rd @@ -17,7 +17,7 @@ optimize( threads = NULL, opencl_ids = NULL, algorithm = NULL, - jacobian = NULL, + jacobian = FALSE, init_alpha = NULL, iter = NULL, tol_obj = NULL, @@ -133,14 +133,12 @@ for \code{"lbfgs"} and \verb{"bfgs}. For their default values and more details s the CmdStan User's Guide. The default values can also be obtained by running \code{cmdstanr_example(method="optimize")$metadata()}.} -\item{jacobian}{(logical) Whether or not the model’s log probability -function should include the log absolute Jacobian determinant of inverse -parameter transforms. If \code{jacobian=FALSE} (historically this has always -been the default) optimization returns the regularized maximum likelihood -estimate (MLE). If \code{jacobian=TRUE} optimization produces the maximum a -posteriori estimate (MAP). See the +\item{jacobian}{(logical) Whether or not to use the Jacobian adjustment for +constrained variables. By default this is \code{FALSE}, meaning optimization +yields the (regularized) maximum likelihood estimate. Setting it to \code{TRUE} +yields the maximum a posteriori estimate. See the \href{https://mc-stan.org/docs/cmdstan-guide/maximum-likelihood-estimation.html}{Maximum Likelihood Estimation} -section of the CmdStan User's Guide for more details on the Jacobian adjustments.} +section of the CmdStan User's Guide for more details.} \item{init_alpha}{(positive real) The initial step size parameter.} @@ -164,23 +162,16 @@ A \code{\link{CmdStanMLE}} object. } \description{ The \verb{$optimize()} method of a \code{\link{CmdStanModel}} object runs -Stan's optimizer to obtain a posterior mode (penalized maximum likelihood) -estimate. +Stan's optimizer to obtain a (penalized) maximum likelihood estimate or a +maximum a posteriori estimate (if \code{jacobian=TRUE}). See the +\href{https://mc-stan.org/docs/cmdstan-guide/maximum-likelihood-estimation.html}{Maximum Likelihood Estimation} +section of the CmdStan User's Guide for more details. Any argument left as \code{NULL} will default to the default value used by the -installed version of CmdStan. See the -\href{https://mc-stan.org/docs/cmdstan-guide/}{CmdStan User’s Guide} -for more details. -} -\details{ -CmdStan can find the posterior mode (assuming there is one). If the -posterior is not convex, there is no guarantee Stan will be able to find -the global mode as opposed to a local optimum of log probability. For -optimization, the mode is calculated without the Jacobian adjustment for -constrained variables, which shifts the mode due to the change of -variables. Thus modes correspond to modes of the model as written. - --- \href{https://mc-stan.org/docs/cmdstan-guide/}{\emph{CmdStan User's Guide}} +installed version of CmdStan. See the \href{https://mc-stan.org/docs/cmdstan-guide/}{CmdStan User’s Guide} for more details on the +default arguments. The default values can also be obtained by checking the +metadata of an example model, e.g., +\code{cmdstanr_example(method="optimize")$metadata()}. } \examples{ \dontrun{