Skip to content

Commit

Permalink
v1.52 candidate
Browse files Browse the repository at this point in the history
- lots of man page updates
- compare() checks nobs and model classes now; warns when not all equal
- added R versions of dmvnorm2 and rmvnorm2
- map2stan() understands cores argument now and automatically
parallelizes chains. not yet tested on Windows.
- precis(), PI(), HPDI() default to 89% intervals now
-
  • Loading branch information
Richard McElreath committed Mar 24, 2015
1 parent ec401c7 commit b8bc18b
Show file tree
Hide file tree
Showing 39 changed files with 405 additions and 192 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Package: rethinking
Type: Package
Title: Statistical Rethinking book package
Version: 1.51
Date: 2015-03-14
Version: 1.52
Date: 2015-03-24
Author: Richard McElreath
Maintainer: Richard McElreath <[email protected]>
Imports: coda, MASS
Depends: rstan, parallel
Imports: coda, MASS, mvtnorm
Depends: rstan, parallel, methods
Description: Utilities for fitting and comparing models
License: GPL (>= 3)
14 changes: 12 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,19 @@ importFrom(coda, HPDinterval)
importFrom(coda, as.mcmc)
importFrom(MASS, mvrnorm)
importFrom(rstan, stan)
importFrom(rstan, sampling)
#importFrom(rstan, stan_model)
importFrom(rstan, extract)
importFrom(rstan, sflist2stanfit)
importFrom(rstan, summary)
importClassesFrom(rstan , stanfit)
importClassesFrom(rstan)
#importClassesFrom(rstan , stanfit)
#importClassesFrom(rstan , stanmodel)
importFrom(parallel, mclapply)
importFrom(parallel, parLapply)
importFrom(parallel, parLapply)
importFrom(parallel, makeCluster)
importFrom(parallel, clusterExport)
importFrom(mvtnorm, dmvnorm)
importFrom(mvtnorm, rmvnorm)
exportClasses(map2stan)
exportClasses(map)
21 changes: 6 additions & 15 deletions R/coeftab.r
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# coeftab class definition and show method
setClass( "coeftab" , representation( coefs="matrix" , se="matrix" , nobs="numeric" , AIC="numeric" , digits="numeric" , width="numeric" ) )

coeftab.show <- function( object ) {
coeftab_show <- function( object ) {
result <- object@coefs
if ( !is.null(object@nobs) ) {
result <- rbind( result , object@nobs )
Expand All @@ -12,9 +12,9 @@ coeftab.show <- function( object ) {
coefs <- rrformat( result , digits=object@digits , width=object@width )
print( coefs , quote=FALSE , justify="right" )
}
setMethod( "show" , "coeftab" , function(object) coeftab.show(object) )
setMethod( "show" , "coeftab" , function(object) coeftab_show(object) )

coeftab.plot <- function( x , y , pars , col.ci="black" , by.model=FALSE , prob=0.95 , ... ) {
coeftab_plot <- function( x , y , pars , col.ci="black" , by.model=FALSE , prob=0.95 , ... ) {
x.orig <- x
xse <- x@se
x <- x@coefs
Expand Down Expand Up @@ -59,9 +59,9 @@ coeftab.plot <- function( x , y , pars , col.ci="black" , by.model=FALSE , prob=
}
abline( v=0 , lty=1 , col=col.alpha("black",0.15) )
}
setMethod( "plot" , "coeftab" , function(x,y,...) coeftab.plot(x,y,...) )
setMethod( "plot" , "coeftab" , function(x,y,...) coeftab_plot(x,y,...) )

coeftab <- function( ... , se=FALSE , se.inside=FALSE , nobs=TRUE , digits=2 , width=7 , rotate=FALSE , compare=FALSE ) {
coeftab <- function( ... , se=FALSE , se.inside=FALSE , nobs=TRUE , digits=2 , width=7 , rotate=FALSE ) {

# se=TRUE outputs standard errors
# se.inside=TRUE prints standard errors in parentheses in same column as estimates
Expand Down Expand Up @@ -118,7 +118,7 @@ coeftab <- function( ... , se=FALSE , se.inside=FALSE , nobs=TRUE , digits=2 , w
d[i,][ paste(names(kse)[j],".se",sep="") ] <- as.numeric( round(kse[j],digits) )
else
# combine with estimate
d[i,][ names(kse)[j] ] <- paste( formatC( as.real(d[i,][ names(kse)[j] ]) , digits=digits ) , " (" , formatC( as.real( kse[j] ) , digits=digits ) , ")" , sep="" )
d[i,][ names(kse)[j] ] <- paste( formatC( (d[i,][ names(kse)[j] ]) , digits=digits ) , " (" , formatC( as.real( kse[j] ) , digits=digits ) , ")" , sep="" )
}
}
}
Expand All @@ -144,15 +144,6 @@ coeftab <- function( ... , se=FALSE , se.inside=FALSE , nobs=TRUE , digits=2 , w
nobs <- 0
}

# add AICc and weights
if ( compare==TRUE ) {
require(bbmle)
d$AICc <- sapply( 1:length(L) , function(z) AICc( L[[z]] , nobs=d$nobs[z] ) )
deltAICc <- d$AICc - min(d$AICc)
wAIC <- exp( -0.5*deltAICc )
d$weight <- wAIC / sum( wAIC )
}

# return table
if ( rotate==FALSE ) {
d <- t(d) # models along top is default
Expand Down
30 changes: 26 additions & 4 deletions R/compare.r
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@
# compare class definition and show method
setClass( "compareIC" , representation( output="data.frame" , dSE="matrix" ) )

compare.show <- function( object ) {
r <- format_show( object@output , digits=c('default__'=1,'weight'=2,'SE'=2,'dSE'=2) )
#compare.show <- function( object ) {
# r <- format_show( object@output , #digits=c('default__'=1,'weight'=2,'SE'=2,'dSE'=2) )
# print( r )
#}
setMethod( "show" , "compareIC" , function(object) {
r <- format_show( object@output ,
digits=c('default__'=1,'weight'=2,'SE'=2,'dSE'=2) )
print( r )
}
setMethod( "show" , "compareIC" , function(object) compare.show(object) )
} )

# new compare function, defaulting to WAIC
compare <- function( ... , n=1e3 , sort="WAIC" , WAIC=TRUE , refresh=0 ) {
Expand All @@ -20,6 +24,22 @@ compare <- function( ... , n=1e3 , sort="WAIC" , WAIC=TRUE , refresh=0 ) {
mnames <- match.call()
mnames <- as.character(mnames)[2:(length(L)+1)]

# check class of fit models and warn when more than one class represented
classes <- as.character(sapply( L , class ))
if ( any(classes!=classes[1]) ) {
warning("Not all model fits of same class.\nThis is usually a bad idea, because it implies they were fit by different algorithms.\nCheck yourself, before you wreck yourself.")
}

# check nobs for all models
# if different, warn
nobs_list <- sapply( L , nobs )
if ( any(nobs_list != nobs_list[1]) ) {
nobs_out <- paste( mnames , nobs_list , "\n" )
nobs_out <- concat(nobs_out)
warning(concat(
"Different numbers of observations found for at least two models.\nInformation criteria only valid for comparing models fit to exactly same observations.\nNumber of observations for each model:\n",nobs_out))
}

dSE.matrix <- matrix( NA , nrow=length(L) , ncol=length(L) )
if ( WAIC==FALSE ) {
DIC.list <- lapply( L , function(z) DIC( z , n=n ) )
Expand Down Expand Up @@ -119,6 +139,7 @@ setMethod("plot" , "compareIC" , function(x,y,xlim,SE=TRUE,dSE=TRUE,weights=FALS
}
})

if ( FALSE ) {
# AICc/BIC model comparison table
compare_old <- function( ... , nobs=NULL , sort="AICc" , BIC=FALSE , DIC=FALSE , delta=TRUE , DICsamples=1e4 ) {
require(bbmle)
Expand Down Expand Up @@ -206,6 +227,7 @@ compare_old <- function( ... , nobs=NULL , sort="AICc" , BIC=FALSE , DIC=FALSE ,

new( "compareIC" , output=result )
}
}#FALSE

# convert estimated D_test values to weights
ICweights <- function( dev ) {
Expand Down
18 changes: 17 additions & 1 deletion R/distributions.r
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ dcategorical <- function( x , prob , log=TRUE ) {

rcategorical <- function( n , prob ) {
k <- length(prob)
y <- sample( 1:k , size=n , prob=p , replace=TRUE )
y <- sample( 1:k , size=n , prob=prob , replace=TRUE )
return(y)
}

Expand Down Expand Up @@ -300,3 +300,19 @@ softmax <- function( ... ) {
return(p)
}
}


# dmvnorm2 - SRS form of multi_normal
# uses package mvtnorm
dmvnorm2 <- function( x , Mu , sigma , Rho , log=FALSE ) {
DS <- diag(sigma)
SIGMA <- DS %*% Rho %*% DS
dmvnorm(x, Mu, SIGMA, log=log )
}

rmvnorm2 <- function( n , Mu=rep(0,length(sigma)) , sigma=rep(1,length(Mu)) , Rho=diag(length(Mu)) , method="chol" ) {
DS <- diag(sigma)
SIGMA <- DS %*% Rho %*% DS
rmvnorm( n=n , mean=Mu , sigma=SIGMA , method=method )
}

10 changes: 6 additions & 4 deletions R/map2stan-class.r
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ setMethod("coef", "map2stan", function(object) {

setMethod("extract.samples","map2stan",
function(object) {
require(rstan)
#require(rstan)
p <- rstan::extract(object@stanfit)
# get rid of dev and lp__
p[['dev']] <- NULL
Expand All @@ -30,7 +30,7 @@ function(object) {

setMethod("extract.samples","stanfit",
function(object) {
require(rstan)
#require(rstan)
p <- rstan::extract(object)
# get rid of dev and lp__
#p[['dev']] <- NULL
Expand Down Expand Up @@ -235,7 +235,7 @@ setMethod("plot" , "map2stan" , function(x,y,...) {
})

setMethod("pairs" , "map2stan" , function(x, n=500 , alpha=0.7 , cex=0.7 , pch=16 , adj=1 , pars , ...) {
require(rstan)
#require(rstan)
posterior <- extract.samples(x)
if ( !missing(pars) ) {
# select out named parameters
Expand Down Expand Up @@ -270,7 +270,9 @@ setMethod("pairs" , "map2stan" , function(x, n=500 , alpha=0.7 , cex=0.7 , pch=1
})

# my trace plot function
tracerplot <- function( object , col=c("slateblue","orange","red","green") , alpha=0.7 , bg=gray(0.6,0.5) , ask=TRUE , ... ) {
#rethink_palette <- c("#5BBCD6","#F98400","#F2AD00","#00A08A","#FF0000")
rethink_palette <- c("#8080FF","#F98400","#F2AD00","#00A08A","#FF0000")
tracerplot <- function( object , col=rethink_palette , alpha=1 , bg=gray(0.6,0.5) , ask=TRUE , ... ) {
chain.cols <- col

if ( class(object)!="map2stan" ) stop( "requires map2stan fit" )
Expand Down
65 changes: 59 additions & 6 deletions R/map2stan.r
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@

# to-do:
# (*) different chains get different random start values
# (*) support cores argument -> compile -> resample
# (*) need to do p*theta and (1-p)*theta multiplication outside likelihood in betabinomial (similar story for gammapoisson) --- or is there an operator for pairwise vector multiplication?
# (-) handle improper input more gracefully
# (-) add "as is" formula type, with quoted text on RHS to dump into Stan code

##################
# map2stan itself

map2stan <- function( flist , data , start , pars , constraints=list() , types=list() , sample=TRUE , iter=2000 , chains=1 , debug=FALSE , verbose=FALSE , WAIC=TRUE , cores=1 , ... ) {
map2stan <- function( flist , data , start , pars , constraints=list() , types=list() , sample=TRUE , iter=2000 , warmup=floor(iter/2) , chains=1 , debug=FALSE , verbose=FALSE , WAIC=TRUE , cores=1 , rng_seed , ... ) {

########################################
# empty Stan code
Expand Down Expand Up @@ -71,6 +70,13 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
if ( missing(start) ) start <- list()
start.orig <- start

if ( iter <= warmup ) {
# user probably meant iter as number of samples
# so warn and convert
warning(concat("'iter' less than or equal to 'warmup'. Setting 'iter' to sum of 'iter' and 'warmup' instead (",iter+warmup,")."))
iter <- iter + warmup
}

# check data for scale attributes and remove
for ( i in 1:length(data) ) {
if ( !is.null( attr(data[[i]],"scaled:center") ) | !is.null( attr(data[[i]],"scaled:scale") ) ) {
Expand Down Expand Up @@ -1226,19 +1232,65 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
}

if ( sample==TRUE ) {
require(rstan)
#require(rstan) # don't need anymore

# sample
modname <- deparse( flist[[1]] )
initlist <- list()
for ( achain in 1:chains ) initlist[[achain]] <- start

if ( flag_refit==FALSE ) {
fit <- stan( model_code=model_code , model_name=modname , data=d , init=initlist , iter=iter , chains=chains , pars=pars , ... )

if ( chains==1 | cores==1 ) {

# single core
# so just run the model
fit <- stan( model_code=model_code , model_name=modname , data=d , init=initlist , iter=iter , chains=chains , pars=pars , ... )

} else {

# multi-core, multi-chain
# so pre-compile then run again
message("Precompiling model...")
fit_prep <- stan( model_code=model_code , model_name=modname , data=d , init=list(start) , iter=1 , chains=0 , pars=pars , test_grad=FALSE , refresh=-1 , ... )
#fit_prep <- stan_model( model_code=model_code , model_name=modname )

# if no seed provided, make one
if ( missing(rng_seed) ) rng_seed <- sample( 1:1e5 , 1 )

message(concat("Dispatching ",chains," chains to ",cores," cores."))
sys <- .Platform$OS.type
if ( sys=='unix' ) {
# Mac or Linux
# hand off to mclapply
sflist <- mclapply( 1:chains , mc.cores=cores ,
function(chainid)
stan( fit=fit_prep , data=d , init=list(start) , pars=pars , iter=iter , warmup=warmup , chains=1 , seed=rng_seed , chain_id=chainid , ... )
)
} else {
# Windows
# so use parLapply instead
CL = makeCluster(cores)
#data <- object@data
env0 <- list( fit_prep=fit_prep, data=d, pars=pars, rng_seed=rng_seed, iter=iter, warmup=warmup , initlist=list(start) )
clusterExport(cl = CL, c("iter","warmup","data", "fit_prep", "pars", "rng_seed","initlist"), as.environment(env0))
sflist <- parLapply(CL, 1:chains, fun = function(cid) {
stan( fit=fit_prep , data = data, pars = pars, chains = 1,
iter = iter, warmup = warmup, seed = rng_seed,
chain_id = cid, init=initlist )
})
}
# merge result
fit <- sflist2stanfit(sflist)

}#multicore

} else {

# reuse previous compiled model
message(concat("Reusing previously compiled model ",oldfit@stanfit@model_name))
fit <- stan( fit=oldfit@stanfit , model_name=modname , data=d , init=initlist , iter=iter , chains=chains , pars=pars , ... )
}
}# reuse

} else {
fit <- NULL
Expand Down Expand Up @@ -1288,7 +1340,8 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l

# push expected values back through model and fetch deviance
#message("Taking one more sample now, at expected values of parameters, in order to compute DIC")
fit2 <- stan( fit=fit , init=list(Epost) , data=d , pars="dev" , chains=1 , iter=1 , refresh=-1 )
#fit2 <- stan( fit=fit , init=list(Epost) , data=d , pars="dev" , chains=1 , iter=1 , refresh=-1 )
fit2 <- sampling( fit@stanmodel , init=list(Epost) , data=d , pars="dev" , chains=1 , iter=1 , refresh=-1 )
dhat <- as.numeric( extract(fit2,"dev") )
pD <- dbar - dhat
dic <- dbar + pD
Expand Down
4 changes: 2 additions & 2 deletions R/plotting.r
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ dens <- function( x , adj=0.5 , norm.comp=FALSE , main="" , show.HPDI=FALSE , sh
}

# just converts x,y,z lists of same length to a matrix for contour to plot
contour.xyz <- function( x , y , z , ... ) {
contour_xyz <- function( x , y , z , ... ) {
ux <- unique(x)
uy <- unique(y)
n <- length(ux)
Expand All @@ -55,7 +55,7 @@ contour.xyz <- function( x , y , z , ... ) {
}

# just converts inputs to form expected by image()
image.xyz <- function( x , y , z , ... ) {
image_xyz <- function( x , y , z , ... ) {
image( unique(x) , unique(y) , matrix(z, length(unique(x)), length(unique(y)) ) , ... )
}

Expand Down
2 changes: 1 addition & 1 deletion R/plp_plots.r
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ plp <- function( mapfit , prob=0.9 , xlim , postcol="black" , priorcol="darkgray
left <- as.numeric( post2[1,n:1] )
right <- as.numeric( post2[3,n:1] )
if ( missing(xlim) ) xlim <- c(min(left),max(right))
dotchart( mu , labels=colnames(post2)[n:1] , xlab="Value" , xlim=xlim , col=postcol , ... )
dotchart( mu , labels=colnames(post2)[n:1] , xlab="Value" , xlim=xlim , color=postcol , ... )
for ( i in 1:length(mu) ) {
lines( c(left[i],right[i]) , c(i,i) , lwd=2 , col=postcol )
# and prior
Expand Down
Loading

0 comments on commit b8bc18b

Please sign in to comment.