v2.20 candidate

- cmdstanr default when installed - added log_lik argument to compare - cpp_fast option for cstan and ulam, turns off range checks for compiled stan models
rmcelreath · Jul 16, 2021 · e14e2c6 · e14e2c6
1 parent 2acf2fd
commit e14e2c6
Show file tree

Hide file tree

Showing 7 changed files with 105 additions and 45 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: rethinking
 Type: Package
 Title: Statistical Rethinking book package
-Version: 2.13
-Date: 2020-08-26
+Version: 2.20
+Date: 2021-07-01
 Author: Richard McElreath
 Maintainer: Richard McElreath <[email protected]>
 Imports: coda, MASS, mvtnorm, loo, shape

diff --git a/R/cmdstan_support.r b/R/cmdstan_support.r
@@ -14,10 +14,16 @@ cmdstanr_model_write <- function( the_model ) {
     }
 
 # wrapper to fit stan model using cmdstanr and return rstan stanfit object
-cstan <- function( file , model_code , data=list() , chains=1 , cores=1 , iter=1000 , warmup , threads=1 , control=list(adapt_delta=0.95) , cpp_options=list() , save_warmup=TRUE , ... ) {
+cstan <- function( file , model_code , data=list() , chains=1 , cores=1 , iter=1000 , warmup , threads=1 , control=list(adapt_delta=0.95) , cpp_options=list() , save_warmup=TRUE , cpp_fast=FALSE , rstan_out=TRUE , ... ) {
 
     if ( threads>1 ) cpp_options[['stan_threads']] <- TRUE
 
+    # dangerous compile settings that can improve run times
+    if ( cpp_fast==TRUE ) {
+        cpp_options[['STAN_NO_RANGE_CHECKS']] <- TRUE
+        cpp_options[['STAN_CPP_OPTIMS']] <- TRUE
+    }
+
     if ( missing(file) & !missing(model_code) ) {
         file <- cmdstanr_model_write( model_code )
     }
@@ -61,8 +67,35 @@ cstan <- function( file , model_code , data=list() , chains=1 , cores=1 , iter=1
             save_warmup=save_warmup , ... )
 
     # coerce to stanfit object
-    stanfit <- rstan::read_stan_csv(cmdstanfit$output_files())
-
-    return(stanfit)
+    if ( rstan_out==TRUE )
+        return( rstan::read_stan_csv(cmdstanfit$output_files()) )
+    else
+        return(cmdstanfit)
 }
 
+# in place tests
+if ( FALSE ) {
+
+    library(rethinking)
+
+    the_code <- "
+    data{
+        int N;
+        real y[N];
+    }
+    parameters{
+        real mu;
+    }
+    model{
+        mu ~ normal(0,1);
+        y ~ normal(mu,1);
+    }
+    "
+
+    m <- cstan( model_code=the_code , data=list(N=2,y=rnorm(2)) , chains=4 , rstan_out=FALSE )
+
+    m$summary( variables="mu" , "mean" , "sd" , ~quantile(.x, probs = c(0.4, 0.6)) , "rhat" , "ess_bulk" )
+
+    precis(m)
+
+}
diff --git a/R/compare.r b/R/compare.r
@@ -14,7 +14,7 @@ setMethod( "show" , "compareIC" , function(object) {
 } )
 
 # new compare function, defaulting to WAIC
-compare <- function( ... , n=1e3 , sort="WAIC" , func=WAIC , WAIC=TRUE , refresh=0 , warn=TRUE , result_order=c(1,5,3,6,2,4) ) {
+compare <- function( ... , n=1e3 , sort="WAIC" , func=WAIC , WAIC=TRUE , refresh=0 , warn=TRUE , result_order=c(1,5,3,6,2,4) , log_lik="log_lik" ) {
 
     # retrieve list of models
     L <- list(...)
@@ -59,7 +59,7 @@ compare <- function( ... , n=1e3 , sort="WAIC" , func=WAIC , WAIC=TRUE , refresh
     }
 
     if ( the_func %in% c("WAIC","PSIS","LOO") ) {
-        IC.list.pw <- lapply( L , function(z) do.call( the_func , list( z , n=n , refresh=refresh , pointwise=TRUE , warn=warn ) ) )
+        IC.list.pw <- lapply( L , function(z) do.call( the_func , list( z , n=n , refresh=refresh , pointwise=TRUE , warn=warn , log_lik=log_lik ) ) )
         p.list <- sapply( IC.list.pw , function(x) sum( x$penalty ) )
         se.list <- sapply( IC.list.pw , function(x) x$std_err[1] )
         IC.list <- sapply( IC.list.pw , function(x) sum( x[[1]] ) )
@@ -332,42 +332,36 @@ m3 <- map(
 
 plot(x)
 
-# now map2stan
+# now ulam
 
-m0 <- map2stan(
+m0 <- ulam(
     alist(
         pulled_left ~ dbinom(1,theta),
         logit(theta) <- a,
         a ~ dnorm(0,1)
     ) ,
-    data=d,
-    start=list(a=0)
-)
+    data=d , log_lik=TRUE )
 
-m1 <- map2stan(
+m1 <- ulam(
     alist(
         pulled_left ~ dbinom(1,theta),
         logit(theta) <- a + bp*prosoc_left,
         a ~ dnorm(0,1),
         bp ~ dnorm(0,1)
     ) ,
-    data=d,
-    start=list(a=0,bp=0)
-)
+    data=d , log_lik=TRUE )
 
-m2 <- map2stan(
+m2 <- ulam(
     alist(
         pulled_left ~ dbinom(1,theta),
         logit(theta) <- a + bp*prosoc_left + bpc*condition*prosoc_left,
         a ~ dnorm(0,1),
         bp ~ dnorm(0,1),
         bpc ~ dnorm(0,1)
     ) ,
-    data=d,
-    start=list(a=0,bp=0,bpc=0)
-)
+    data=d , log_lik=TRUE )
 
-m3 <- map2stan(
+m3 <- ulam(
     alist(
         pulled_left ~ dbinom(1,theta),
         logit(theta) <- a + bp*prosoc_left + bc*condition + bpc*condition*prosoc_left,
@@ -376,32 +370,24 @@ m3 <- map2stan(
         bc ~ dnorm(0,1),
         bpc ~ dnorm(0,1)
     ) ,
-    data=d,
-    start=list(a=0,bp=0,bc=0,bpc=0)
-)
+    data=d , log_lik=TRUE )
 
-m4 <- map2stan(
+m4 <- ulam(
     alist(
         pulled_left ~ dbinom(1,theta),
-        logit(theta) <- a + aj + bp*prosoc_left + bc*condition + bpc*condition*prosoc_left,
+        logit(theta) <- a + aj[actor] + bp*prosoc_left + bc*condition + bpc*condition*prosoc_left,
         a ~ dnorm(0,1),
         aj[actor] ~ dnorm(0,sigma_actor),
         bp ~ dnorm(0,1),
         bc ~ dnorm(0,1),
         bpc ~ dnorm(0,1),
         sigma_actor ~ dcauchy(0,1)
     ) ,
-    data=d,
-    start=list(a=0,bp=0,bc=0,bpc=0,sigma_actor=1,aj=rep(0,7))
-)
+    data=d , log_lik=TRUE )
 
-( x1 <- compare(m0,m1,m2,m3,m4,WAIC=FALSE) )
-
-( x2 <- compare(m0,m1,m2,m3,m4,WAIC=TRUE) )
+( x1 <- compare(m0,m1,m2,m3,m4,log_lik="log_lik") )
 
 plot(x1)
 
-plot(x2)
-
 
 }
diff --git a/R/precis.r b/R/precis.r
@@ -119,8 +119,12 @@ postlistprecis <- function( post , prob=0.95 , spark=FALSE ) {
 }
 
 setGeneric("precis",
-function( object , depth=1 , pars , prob=0.89 , digits=2 , sort=NULL , decreasing=FALSE , ... ) {
-    new( "precis" , as.data.frame(object) , digits=digits )
+function( object , depth=1 , pars , prob=0.89 , digits=2 , sort=NULL , decreasing=FALSE , ... ) {   
+    if ( "CmdStanFit" %in% class(object) ) {
+        if ( missing(pars) ) pars <- NULL
+        precis_cmdstanfit( object , depth=depth , pars=pars , prob=prob , digits=digits , sort=sort, decreasing=decreasing , ... )
+    } else
+        new( "precis" , as.data.frame(object) , digits=digits )
 }
 )
 
@@ -334,6 +338,32 @@ function( object , depth=1 , pars , prob=0.89 , digits=2 , sort=NULL , decreasin
 })
 # precis2(mfit,pars=parlist)
 
+# cmdstanr doesn't export CmdStanFit class, so need to do this through default method
+#setMethod("precis", "CmdStanFit",
+precis_cmdstanfit <- function( object , depth=1 , pars , prob=0.89 , digits=2 , sort=NULL , decreasing=FALSE , ... ) {
+    low <- (1-prob)/2
+    upp <- 1-low
+    # result <- summary(object,variables=pars,probs=c(low,upp))$summary[,c(1,3:7)]
+    result <- object$summary( variables=pars , "mean" , "sd" , ~quantile(.x, probs = c(low, upp)) , "rhat" , "ess_bulk" )
+    result <- as.data.frame( result )
+
+    # take variable column and convert to row names
+    rownames(result) <- result$variable
+    result$variable <- NULL
+
+    idx <- which( rownames(result) %in% c("dev","lp__") )
+    idx2 <- grep( "log_lik[" , rownames(result) , fixed=TRUE )
+    if ( length(idx2)>0 ) idx <- c( idx , idx2 )
+    if ( length(idx)>0 ) {
+        # remove dev and lp__ and log_lik from table
+        result <- result[ -idx , ]
+    }
+
+    result <- precis_format( result , depth , sort , decreasing )
+
+    return( new( "precis" , result , digits=digits ) )
+}
+
 # old version 1 method
 precisx <- function( model , depth=1 , pars , ci=TRUE , prob=0.89 , corr=FALSE , digits=2 , warn=TRUE , spark=FALSE ) {
     the.class <- class(model)[1]

diff --git a/R/ulam-function.R b/R/ulam-function.R
@@ -17,9 +17,10 @@
 
 ulam_options <- new.env(parent=emptyenv())
 ulam_options$use_cmdstan <- FALSE
+if ( require(cmdstanr) ) ulam_options$use_cmdstan <- TRUE
 set_ulam_cmdstan <- function(x=TRUE) assign( "use_cmdstan" , x , env=ulam_options )
 
-ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 , iter=1000 , warmup , control=list(adapt_delta=0.95) , distribution_library=ulam_dists , macro_library=ulam_macros , custom , constraints , declare_all_data=TRUE , log_lik=FALSE , sample=TRUE , messages=TRUE , pre_scan_data=TRUE , coerce_int=TRUE , sample_prior=FALSE , file=NULL , cmdstan=ulam_options$use_cmdstan , threads=1 , grain=1 , ... ) {
+ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 , iter=1000 , warmup , control=list(adapt_delta=0.95) , distribution_library=ulam_dists , macro_library=ulam_macros , custom , constraints , declare_all_data=TRUE , log_lik=FALSE , sample=TRUE , messages=TRUE , pre_scan_data=TRUE , coerce_int=TRUE , sample_prior=FALSE , file=NULL , cmdstan=ulam_options$use_cmdstan , threads=1 , grain=1 , cpp_options=list() , cpp_fast=FALSE , ... ) {
 
     if ( !is.null(file) ) {
         rds_file_name <- concat( file , ".rds" )
@@ -1398,6 +1399,15 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
         return(list(file_stan,file_exe,do_compile))
     }
 
+    #if ( threads>1 ) 
+    cpp_options[['stan_threads']] <- TRUE
+
+    # dangerous compile settings that can improve run times
+    if ( cpp_fast==TRUE ) {
+        cpp_options[['STAN_NO_RANGE_CHECKS']] <- TRUE
+        cpp_options[['STAN_CPP_OPTIMS']] <- TRUE
+    }
+
     # fire lasers pew pew
     if ( sample==TRUE ) {
         if ( length(start)==0 ) {
@@ -1414,7 +1424,7 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
                         stan_file=filex[[1]],
                       # exe_file=filex[[2]],
                         compile=filex[[3]],
-                        cpp_options=list(stan_threads=TRUE) )
+                        cpp_options=cpp_options )
                     # set_num_threads( threads )
                     # iter means only post-warmup samples for cmdstanr
                     # so need to compute iter explicitly
@@ -1441,7 +1451,7 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
                         stan_file=filex[[1]],
                       # exe_file=filex[[2]],
                         compile=filex[[3]],
-                        cpp_options=list(stan_threads=TRUE) )
+                        cpp_options=cpp_options )
                     # set_num_threads( threads )
                     # iter means only post-warmup samples for cmdstanr
                     # so need to compute iter explicitly

diff --git a/README.md b/README.md
@@ -21,17 +21,17 @@ The first formula in the list is the probability of the outcome (likelihood); th
 
 You can find a manual with expanded installation and usage instructions here: ``http://xcelab.net/rm/software/``
 
-Here's the brief verison. 
+Here's the brief version. 
 
 You'll need to install ``rstan`` first. Go to ``http://mc-stan.org`` and follow the instructions for your platform. The biggest challenge is getting a C++ compiler configured to work with your installation of R. The instructions at ``https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started`` are quite thorough. Obey them, and you'll likely succeed.
 
-There are some advantages to accessing Stan through ``cmdstanr`` rather than rstan. These advantages include faster updates and therefore quicker access to new features. If you want to access Stan using the ``cmdstanr`` package instead, then you may install that as well with
+There are advantages to accessing Stan through ``cmdstanr`` rather than rstan. These advantages include faster updates and therefore quicker access to new features. Most users find cmdstanr models to run substantially faster than the same models compiled with rstan. So if you want to access Stan using the ``cmdstanr`` package, then you should install that as well with
 ```
 devtools::install_github("stan-dev/cmdstanr")
 ```
-If you haven't installed cmdstan previously, you will also need to do that with ``cmdstanr::install_cmdstan()``. Then you need to add ``cmdstan=TRUE`` to any ``ulam`` code to use cmdstan instead of rstan. To use cmdstan as the default interface, do ``set_ulam_cmdstan(TRUE)``.
+If you haven't installed cmdstan previously, you will also need to do that with ``cmdstanr::install_cmdstan()``. Then you can add ``cmdstan=TRUE`` to any ``ulam`` code to use cmdstan instead of rstan. To use cmdstan as the default interface, do ``set_ulam_cmdstan(TRUE)``. By default, when rethinking detects cmdstanr on your system, it will default to using it instead of rstan. If you really want to use rstan to compile and sample models, then use ``set_ulam_cmdstan(FALSE)``.
 
-Once rstan and cmdstan are installed (almost there), then you can install ``rethinking`` from within R using:
+Once rstan and cmdstanr are installed (almost there), then you can install ``rethinking`` from within R using:
 ```
 install.packages(c("coda","mvtnorm","devtools","loo","dagitty"))
 devtools::install_github("rmcelreath/rethinking")

diff --git a/man/compare.Rd b/man/compare.Rd
@@ -6,7 +6,7 @@
   Returns a table of model comparison statistics, by default focused on WAIC.
 }
 \usage{
-compare( ... , n=1e3 , sort="WAIC" , func=WAIC , WAIC=TRUE , refresh=0 )
+compare( ... , n=1e3 , sort="WAIC" , func=WAIC , WAIC=TRUE , refresh=0 , log_lik="log_lik" )
 ICweights( dev )
 }
 %- maybe also 'usage' for other objects documented here.
@@ -18,6 +18,7 @@ ICweights( dev )
   \item{WAIC}{Deprecated: If \code{TRUE}, uses \code{func} for comparison. If \code{FALSE}, uses DIC.}
   \item{refresh}{Progress display update interval. 0 suppresses display.}
   \item{dev}{Vector of values to use in computing model weights}
+  \item{log_lik}{Name of log likelihood list in posterior}
 }
 \details{
   This function computes WAIC and optionally DIC values for fit models and returns a table sorted by ascending values. Each row in this table is a model, and the various columns provide WAIC, effective numbers of parameters, model weights, and standard errors.