v1.52 candidate

- lots of man page updates - compare() checks nobs and model classes now; warns when not all equal - added R versions of dmvnorm2 and rmvnorm2 - map2stan() understands cores argument now and automatically parallelizes chains. not yet tested on Windows. - precis(), PI(), HPDI() default to 89% intervals now -
rmcelreath · Mar 24, 2015 · b8bc18b · b8bc18b
1 parent ec401c7
commit b8bc18b
Show file tree

Hide file tree

Showing 39 changed files with 405 additions and 192 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,11 +1,11 @@
 Package: rethinking
 Type: Package
 Title: Statistical Rethinking book package
-Version: 1.51
-Date: 2015-03-14
+Version: 1.52
+Date: 2015-03-24
 Author: Richard McElreath
 Maintainer: Richard McElreath <[email protected]>
-Imports: coda, MASS
-Depends: rstan, parallel
+Imports: coda, MASS, mvtnorm
+Depends: rstan, parallel, methods
 Description: Utilities for fitting and comparing models
 License: GPL (>= 3)
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,9 +3,19 @@ importFrom(coda, HPDinterval)
 importFrom(coda, as.mcmc)
 importFrom(MASS, mvrnorm)
 importFrom(rstan, stan)
+importFrom(rstan, sampling)
+#importFrom(rstan, stan_model)
 importFrom(rstan, extract)
 importFrom(rstan, sflist2stanfit)
 importFrom(rstan, summary)
-importClassesFrom(rstan , stanfit)
+importClassesFrom(rstan)
+#importClassesFrom(rstan , stanfit)
+#importClassesFrom(rstan , stanmodel)
 importFrom(parallel, mclapply)
-importFrom(parallel, parLapply)
+importFrom(parallel, parLapply)
+importFrom(parallel, makeCluster)
+importFrom(parallel, clusterExport)
+importFrom(mvtnorm, dmvnorm)
+importFrom(mvtnorm, rmvnorm)
+exportClasses(map2stan)
+exportClasses(map)
diff --git a/R/coeftab.r b/R/coeftab.r
@@ -3,7 +3,7 @@
 # coeftab class definition and show method
 setClass( "coeftab" , representation( coefs="matrix" , se="matrix" , nobs="numeric" , AIC="numeric" , digits="numeric" , width="numeric" ) )
 
-coeftab.show <- function( object ) {
+coeftab_show <- function( object ) {
     result <- object@coefs
     if ( !is.null(object@nobs) ) {
         result <- rbind( result , object@nobs )
@@ -12,9 +12,9 @@ coeftab.show <- function( object ) {
     coefs <- rrformat( result , digits=object@digits , width=object@width )
     print( coefs , quote=FALSE , justify="right" )
 }
-setMethod( "show" , "coeftab" , function(object) coeftab.show(object) )
+setMethod( "show" , "coeftab" , function(object) coeftab_show(object) )
 
-coeftab.plot <- function( x , y , pars , col.ci="black" , by.model=FALSE , prob=0.95 , ... ) {
+coeftab_plot <- function( x , y , pars , col.ci="black" , by.model=FALSE , prob=0.95 , ... ) {
     x.orig <- x
     xse <- x@se
     x <- x@coefs
@@ -59,9 +59,9 @@ coeftab.plot <- function( x , y , pars , col.ci="black" , by.model=FALSE , prob=
     }
     abline( v=0 , lty=1 , col=col.alpha("black",0.15) )
 }
-setMethod( "plot" , "coeftab" , function(x,y,...) coeftab.plot(x,y,...) )
+setMethod( "plot" , "coeftab" , function(x,y,...) coeftab_plot(x,y,...) )
 
-coeftab <- function( ... , se=FALSE , se.inside=FALSE , nobs=TRUE , digits=2 , width=7 , rotate=FALSE , compare=FALSE ) {
+coeftab <- function( ... , se=FALSE , se.inside=FALSE , nobs=TRUE , digits=2 , width=7 , rotate=FALSE ) {
 
     # se=TRUE outputs standard errors
     # se.inside=TRUE prints standard errors in parentheses in same column as estimates
@@ -118,7 +118,7 @@ coeftab <- function( ... , se=FALSE , se.inside=FALSE , nobs=TRUE , digits=2 , w
                     d[i,][ paste(names(kse)[j],".se",sep="") ] <- as.numeric( round(kse[j],digits) )
                 else
                     # combine with estimate
-                    d[i,][ names(kse)[j] ] <- paste( formatC( as.real(d[i,][ names(kse)[j] ]) , digits=digits ) , " (" , formatC( as.real( kse[j] ) , digits=digits ) , ")" , sep="" )
+                    d[i,][ names(kse)[j] ] <- paste( formatC( (d[i,][ names(kse)[j] ]) , digits=digits ) , " (" , formatC( as.real( kse[j] ) , digits=digits ) , ")" , sep="" )
             }
         }
     }
@@ -144,15 +144,6 @@ coeftab <- function( ... , se=FALSE , se.inside=FALSE , nobs=TRUE , digits=2 , w
         nobs <- 0
     }
 
-    # add AICc and weights
-    if ( compare==TRUE ) {
-        require(bbmle)
-        d$AICc <- sapply( 1:length(L) , function(z) AICc( L[[z]] , nobs=d$nobs[z] ) )
-        deltAICc <- d$AICc - min(d$AICc) 
-        wAIC <- exp( -0.5*deltAICc )
-        d$weight <- wAIC / sum( wAIC )
-    }
-
     # return table
     if ( rotate==FALSE ) {
         d <- t(d) # models along top is default

diff --git a/R/compare.r b/R/compare.r
@@ -3,11 +3,15 @@
 # compare class definition and show method
 setClass( "compareIC" , representation( output="data.frame" , dSE="matrix" ) )
 
-compare.show <- function( object ) {
-    r <- format_show( object@output , digits=c('default__'=1,'weight'=2,'SE'=2,'dSE'=2) )
+#compare.show <- function( object ) {
+#    r <- format_show( object@output , #digits=c('default__'=1,'weight'=2,'SE'=2,'dSE'=2) )
+#    print( r )
+#}
+setMethod( "show" , "compareIC" , function(object) {
+    r <- format_show( object@output , 
+                      digits=c('default__'=1,'weight'=2,'SE'=2,'dSE'=2) )
     print( r )
-}
-setMethod( "show" , "compareIC" , function(object) compare.show(object) )
+} )
 
 # new compare function, defaulting to WAIC
 compare <- function( ... , n=1e3 , sort="WAIC" , WAIC=TRUE , refresh=0 ) {
@@ -20,6 +24,22 @@ compare <- function( ... , n=1e3 , sort="WAIC" , WAIC=TRUE , refresh=0 ) {
     mnames <- match.call()
     mnames <- as.character(mnames)[2:(length(L)+1)]
 
+    # check class of fit models and warn when more than one class represented
+    classes <- as.character(sapply( L , class ))
+    if ( any(classes!=classes[1]) ) {
+        warning("Not all model fits of same class.\nThis is usually a bad idea, because it implies they were fit by different algorithms.\nCheck yourself, before you wreck yourself.")
+    }
+
+    # check nobs for all models
+    # if different, warn
+    nobs_list <- sapply( L , nobs )
+    if ( any(nobs_list != nobs_list[1]) ) {
+        nobs_out <- paste( mnames , nobs_list , "\n" )
+        nobs_out <- concat(nobs_out)
+        warning(concat(
+            "Different numbers of observations found for at least two models.\nInformation criteria only valid for comparing models fit to exactly same observations.\nNumber of observations for each model:\n",nobs_out))
+    }
+
     dSE.matrix <- matrix( NA , nrow=length(L) , ncol=length(L) )
     if ( WAIC==FALSE ) {
         DIC.list <- lapply( L , function(z) DIC( z , n=n ) )
@@ -119,6 +139,7 @@ setMethod("plot" , "compareIC" , function(x,y,xlim,SE=TRUE,dSE=TRUE,weights=FALS
     }
 })
 
+if ( FALSE ) {
 # AICc/BIC model comparison table
 compare_old <- function( ... , nobs=NULL , sort="AICc" , BIC=FALSE , DIC=FALSE , delta=TRUE , DICsamples=1e4 ) {
     require(bbmle)
@@ -206,6 +227,7 @@ compare_old <- function( ... , nobs=NULL , sort="AICc" , BIC=FALSE , DIC=FALSE ,
 
     new( "compareIC" , output=result )
 }
+}#FALSE
 
 # convert estimated D_test values to weights
 ICweights <- function( dev ) {

diff --git a/R/distributions.r b/R/distributions.r
@@ -265,7 +265,7 @@ dcategorical <- function( x , prob , log=TRUE ) {
 
 rcategorical <- function( n , prob ) {
     k <- length(prob)
-    y <- sample( 1:k , size=n , prob=p , replace=TRUE )
+    y <- sample( 1:k , size=n , prob=prob , replace=TRUE )
     return(y)
 }
 
@@ -300,3 +300,19 @@ softmax <- function( ... ) {
         return(p)
     }
 }
+
+
+# dmvnorm2 - SRS form of multi_normal
+# uses package mvtnorm
+dmvnorm2 <- function( x , Mu , sigma , Rho , log=FALSE ) {
+    DS <- diag(sigma)
+    SIGMA <- DS %*% Rho %*% DS
+    dmvnorm(x, Mu, SIGMA, log=log )
+}
+
+rmvnorm2 <- function( n , Mu=rep(0,length(sigma)) , sigma=rep(1,length(Mu)) , Rho=diag(length(Mu)) , method="chol" ) {
+    DS <- diag(sigma)
+    SIGMA <- DS %*% Rho %*% DS
+    rmvnorm( n=n , mean=Mu , sigma=SIGMA , method=method )
+}
+
diff --git a/R/map2stan-class.r b/R/map2stan-class.r
@@ -15,7 +15,7 @@ setMethod("coef", "map2stan", function(object) {
 
 setMethod("extract.samples","map2stan",
 function(object) {
-    require(rstan)
+    #require(rstan)
     p <- rstan::extract(object@stanfit)
     # get rid of dev and lp__
     p[['dev']] <- NULL
@@ -30,7 +30,7 @@ function(object) {
 
 setMethod("extract.samples","stanfit",
 function(object) {
-    require(rstan)
+    #require(rstan)
     p <- rstan::extract(object)
     # get rid of dev and lp__
     #p[['dev']] <- NULL
@@ -235,7 +235,7 @@ setMethod("plot" , "map2stan" , function(x,y,...) {
 })
 
 setMethod("pairs" , "map2stan" , function(x, n=500 , alpha=0.7 , cex=0.7 , pch=16 , adj=1 , pars , ...) {
-    require(rstan)
+    #require(rstan)
     posterior <- extract.samples(x)
     if ( !missing(pars) ) {
         # select out named parameters
@@ -270,7 +270,9 @@ setMethod("pairs" , "map2stan" , function(x, n=500 , alpha=0.7 , cex=0.7 , pch=1
 })
 
 # my trace plot function
-tracerplot <- function( object , col=c("slateblue","orange","red","green") , alpha=0.7 , bg=gray(0.6,0.5) , ask=TRUE , ... ) {
+#rethink_palette <- c("#5BBCD6","#F98400","#F2AD00","#00A08A","#FF0000")
+rethink_palette <- c("#8080FF","#F98400","#F2AD00","#00A08A","#FF0000")
+tracerplot <- function( object , col=rethink_palette , alpha=1 , bg=gray(0.6,0.5) , ask=TRUE , ... ) {
     chain.cols <- col
 
     if ( class(object)!="map2stan" ) stop( "requires map2stan fit" )

diff --git a/R/map2stan.r b/R/map2stan.r
@@ -7,15 +7,14 @@
 
 # to-do:
 # (*) different chains get different random start values
-# (*) support cores argument -> compile -> resample
 # (*) need to do p*theta and (1-p)*theta multiplication outside likelihood in betabinomial (similar story for gammapoisson) --- or is there an operator for pairwise vector multiplication?
 # (-) handle improper input more gracefully
 # (-) add "as is" formula type, with quoted text on RHS to dump into Stan code
 
 ##################
 # map2stan itself
 
-map2stan <- function( flist , data , start , pars , constraints=list() , types=list() , sample=TRUE , iter=2000 , chains=1 , debug=FALSE , verbose=FALSE , WAIC=TRUE , cores=1 , ... ) {
+map2stan <- function( flist , data , start , pars , constraints=list() , types=list() , sample=TRUE , iter=2000 , warmup=floor(iter/2) , chains=1 , debug=FALSE , verbose=FALSE , WAIC=TRUE , cores=1 , rng_seed , ... ) {
 
     ########################################
     # empty Stan code
@@ -71,6 +70,13 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
     if ( missing(start) ) start <- list()
     start.orig <- start
 
+    if ( iter <= warmup ) {
+        # user probably meant iter as number of samples
+        # so warn and convert
+        warning(concat("'iter' less than or equal to 'warmup'. Setting 'iter' to sum of 'iter' and 'warmup' instead (",iter+warmup,")."))
+        iter <- iter + warmup
+    }
+
     # check data for scale attributes and remove
     for ( i in 1:length(data) ) {
         if ( !is.null( attr(data[[i]],"scaled:center") ) | !is.null( attr(data[[i]],"scaled:scale") ) ) {
@@ -1226,19 +1232,65 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
     }
 
     if ( sample==TRUE ) {
-        require(rstan)
+        #require(rstan) # don't need anymore
 
         # sample
         modname <- deparse( flist[[1]] )
         initlist <- list()
         for ( achain in 1:chains ) initlist[[achain]] <- start
 
         if ( flag_refit==FALSE ) {
-            fit <- stan( model_code=model_code , model_name=modname , data=d , init=initlist , iter=iter , chains=chains , pars=pars , ... )
+
+            if ( chains==1 | cores==1 ) {
+
+                # single core
+                # so just run the model
+                fit <- stan( model_code=model_code , model_name=modname , data=d , init=initlist , iter=iter , chains=chains , pars=pars , ... )
+
+            } else {
+
+                # multi-core, multi-chain
+                # so pre-compile then run again
+                message("Precompiling model...")
+                fit_prep <- stan( model_code=model_code , model_name=modname , data=d , init=list(start) , iter=1 , chains=0 , pars=pars , test_grad=FALSE , refresh=-1 , ... )
+                #fit_prep <- stan_model( model_code=model_code , model_name=modname )
+
+                # if no seed provided, make one
+                if ( missing(rng_seed) ) rng_seed <- sample( 1:1e5 , 1 )
+
+                message(concat("Dispatching ",chains," chains to ",cores," cores."))
+                sys <- .Platform$OS.type
+                if ( sys=='unix' ) {
+                    # Mac or Linux
+                    # hand off to mclapply
+                    sflist <- mclapply( 1:chains , mc.cores=cores ,
+                        function(chainid)
+                            stan( fit=fit_prep , data=d , init=list(start) , pars=pars , iter=iter , warmup=warmup , chains=1 , seed=rng_seed , chain_id=chainid , ... )
+                    )
+                } else {
+                    # Windows
+                    # so use parLapply instead
+                    CL = makeCluster(cores)
+                    #data <- object@data
+                    env0 <- list( fit_prep=fit_prep, data=d, pars=pars, rng_seed=rng_seed, iter=iter, warmup=warmup , initlist=list(start) )
+                    clusterExport(cl = CL, c("iter","warmup","data", "fit_prep", "pars", "rng_seed","initlist"), as.environment(env0))
+                    sflist <- parLapply(CL, 1:chains, fun = function(cid) {
+                        stan( fit=fit_prep , data = data, pars = pars, chains = 1, 
+                          iter = iter, warmup = warmup, seed = rng_seed, 
+                          chain_id = cid, init=initlist )
+                    })
+                }
+                # merge result
+                fit <- sflist2stanfit(sflist)
+
+            }#multicore
+
         } else {
+
+            # reuse previous compiled model
             message(concat("Reusing previously compiled model ",oldfit@stanfit@model_name))
             fit <- stan( fit=oldfit@stanfit , model_name=modname , data=d , init=initlist , iter=iter , chains=chains , pars=pars , ... )
-        }
+        }# reuse
 
     } else {
         fit <- NULL
@@ -1288,7 +1340,8 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
 
         # push expected values back through model and fetch deviance
         #message("Taking one more sample now, at expected values of parameters, in order to compute DIC")
-        fit2 <- stan( fit=fit , init=list(Epost) , data=d , pars="dev" , chains=1 , iter=1 , refresh=-1 )
+        #fit2 <- stan( fit=fit , init=list(Epost) , data=d , pars="dev" , chains=1 , iter=1 , refresh=-1 )
+        fit2 <- sampling( fit@stanmodel , init=list(Epost) , data=d , pars="dev" , chains=1 , iter=1 , refresh=-1 )
         dhat <- as.numeric( extract(fit2,"dev") )
         pD <- dbar - dhat
         dic <- dbar + pD

diff --git a/R/plotting.r b/R/plotting.r
@@ -46,7 +46,7 @@ dens <- function( x , adj=0.5 , norm.comp=FALSE , main="" , show.HPDI=FALSE , sh
 }
 
 # just converts x,y,z lists of same length to a matrix for contour to plot
-contour.xyz <- function( x , y , z , ... ) {
+contour_xyz <- function( x , y , z , ... ) {
     ux <- unique(x)
     uy <- unique(y)
     n <- length(ux)
@@ -55,7 +55,7 @@ contour.xyz <- function( x , y , z , ... ) {
 }
 
 # just converts inputs to form expected by image()
-image.xyz <- function( x , y , z , ... ) {
+image_xyz <- function( x , y , z , ... ) {
     image( unique(x) , unique(y) , matrix(z, length(unique(x)), length(unique(y)) ) , ... )
 }
 

diff --git a/R/plp_plots.r b/R/plp_plots.r
@@ -70,7 +70,7 @@ plp <- function( mapfit , prob=0.9 , xlim , postcol="black" , priorcol="darkgray
     left <- as.numeric( post2[1,n:1] )
     right <- as.numeric( post2[3,n:1] )
     if ( missing(xlim) ) xlim <- c(min(left),max(right))
-    dotchart( mu , labels=colnames(post2)[n:1] , xlab="Value" , xlim=xlim , col=postcol , ... )
+    dotchart( mu , labels=colnames(post2)[n:1] , xlab="Value" , xlim=xlim , color=postcol , ... )
     for ( i in 1:length(mu) ) {
         lines( c(left[i],right[i]) , c(i,i) , lwd=2 , col=postcol )
         # and prior