Skip to content

Commit

Permalink
1.42 - fixes and Chapter 11 revisions
Browse files Browse the repository at this point in the history
- replaced . with _ in variable names in several data examples
- man page for glimmer()
- improvements to formatting for compare and precis output
- various bug fixes for pointwise WAIC reporting and calculations
- added sd2 and var2 to compute stddev and variance using n denominator
- finally got map's DIC method to handle vector parameters
correctly...should refactor the abstraction layer using what I learned
in the process
- WAIC methods can return p_WAIC as pointwise as well---useful for
measuring flexibility of individual points
  • Loading branch information
Richard McElreath committed Sep 22, 2014
1 parent 100b125 commit b6d8d5a
Show file tree
Hide file tree
Showing 18 changed files with 482 additions and 108 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: rethinking
Type: Package
Title: Statistical Rethinking book package
Version: 1.41
Version: 1.42
Date: 2014-09-02
Author: Richard McElreath
Maintainer: Richard McElreath <[email protected]>
Expand Down
9 changes: 5 additions & 4 deletions R/compare.r
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
setClass( "compareIC" , representation( output="data.frame" , dSE="matrix" ) )

compare.show <- function( object ) {
print( round( object@output , 2 ) )
r <- format_show( object@output , digits=c('default__'=1,'weight'=2,'SE'=2,'dSE'=2) )
print( r )
}
setMethod( "show" , "compareIC" , function(object) compare.show(object) )

Expand All @@ -27,9 +28,9 @@ compare <- function( ... , n=1e3 , sort="WAIC" , WAIC=TRUE , refresh=0 ) {
# use WAIC instead of DIC
# WAIC is processed pointwise, so can compute SE of differences, and summed later
WAIC.list <- lapply( L , function(z) WAIC( z , n=n , refresh=refresh , pointwise=TRUE ) )
pD.list <- sapply( WAIC.list , function(x) attr(x,"pWAIC") )
pD.list <- sapply( WAIC.list , function(x) sum(attr(x,"pWAIC")) )
se.list <- sapply( WAIC.list , function(x) attr(x,"se") )
DIC.list <- (-2)*sapply( WAIC.list , sum )
DIC.list <- sapply( WAIC.list , sum )
# compute SE of differences between adjacent models from top to bottom in ranking
colnames(dSE.matrix) <- mnames
rownames(dSE.matrix) <- mnames
Expand Down Expand Up @@ -72,7 +73,7 @@ compare <- function( ... , n=1e3 , sort="WAIC" , WAIC=TRUE , refresh=0 ) {

# plot method for compareIC results shows deviance in and expected deviance out of sample, for each model, ordered top-to-bottom by rank
setMethod("plot" , "compareIC" , function(x,y,xlim,SE=TRUE,dSE=TRUE,weights=FALSE,...) {
dev_in <- x@output[[1]] - x@output[[2]]
dev_in <- x@output[[1]] - x@output[[2]]*2
dev_out <- x@output[[1]]
if ( !is.null(x@output[['SE']]) ) devSE <- x@output[['SE']]
dev_out_lower <- dev_out - devSE
Expand Down
234 changes: 207 additions & 27 deletions R/glimmer.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,6 @@
# translate glmer style model formulas into equivalent formula alist suitable for map2stan
# just a stub for now

glimmer <- function( formula , family="Gaussian" , ... ) {

# templates
family_liks <- list(
Gaussian = "dnorm( mu , sigma )",
Binomial = "dbinom( size , p )",
Poisson = "dpois( lambda )"
)

# check input
if ( class(formula)!="formula" ) stop( "Input must be a glmer-style formula." )

f <- formula
flist <- alist()

# get outcome
outcome <- f[[2]]

# build likelihood
flist[[1]] <- as.formula( concat( as.character(outcome) , " ~ " , family_liks[[family]] ) )

# build linear model


}


parse_glimmer_formula <- function( formula , data ) {
## take a formula and parse into fixed effect and varying effect lists
nobars <- function(term) {
Expand Down Expand Up @@ -144,3 +117,210 @@ parse_glimmer_formula <- function( formula , data ) {
list( y=outcome , yname=outcome_name , fixef=fixef , ranef=ranef , dat=as.data.frame(mdat) )
}


glimmer <- function( formula , data , family=gaussian , prefix=c("b_","v_") , default_prior="dnorm(0,10)" , ... ) {

undot <- function( astring ) {
astring <- gsub( "." , "_" , astring , fixed=TRUE )
astring <- gsub( ":" , "_X_" , astring , fixed=TRUE )
astring <- gsub( "(" , "" , astring , fixed=TRUE )
astring <- gsub( ")" , "" , astring , fixed=TRUE )
astring
}

# convert family to text
family.orig <- family
if ( class(family)=="function" ) {
family <- do.call(family,args=list())
}
link <- family$link
family <- family$family

# templates
family_liks <- list(
gaussian = "dnorm( mu , sigma )",
binomial = "dbinom( size , p )",
poisson = "dpois( lambda )"
)
lm_names <- list(
gaussian = "mu",
binomial = "p",
poisson = "lambda"
)
link_names <- list(
gaussian = "identity",
binomial = "logit",
poisson = "log"
)

# check input
if ( class(formula)!="formula" ) stop( "Input must be a glmer-style formula." )
if ( missing(data) ) stop( "Need data" )

f <- formula
flist <- alist()
prior_list <- alist()

# parse
pf <- parse_glimmer_formula( formula , data )
pf$yname <- undot(pf$yname)

# build likelihood
# check for size variable in Binomial
dtext <- family_liks[[family]]
if ( family=="binomial" ) {
if ( class(pf$y)=="matrix" ) {
# cbind input
pf$dat[[pf$yname]] <- pf$y[,1]
pf$dat[[concat(pf$yname,"_size")]] <- apply(pf$y,1,sum)
dtext <- concat("dbinom( ",concat(pf$yname,"_size")," , p )")
} else {
# bernoulli
pf$dat[[pf$yname]] <- pf$y
dtext <- concat("dbinom( 1 , p )")
}
} else {
pf$dat[[pf$yname]] <- pf$y
}
flist[[1]] <- concat( as.character(pf$yname) , " ~ " , dtext )

# build fixed linear model
flm <- ""
for ( i in 1:length(pf$fixef) ) {
# for each term, add corresponding term to linear model text
aterm <- undot(pf$fixef[i])
newterm <- ""
if ( aterm=="Intercept" ) {
newterm <- aterm
prior_list[[newterm]] <- default_prior
} else {
par_name <- concat( prefix[1] , aterm )
newterm <- concat( par_name , "*" , aterm )
prior_list[[par_name]] <- default_prior
}
if ( i > 1 ) flm <- concat( flm , " +\n " )
flm <- concat( flm , newterm )
}

vlm <- ""
num_group_vars <- length(pf$ranef)
if ( num_group_vars > 0 ) {
for ( i in 1:num_group_vars ) {
group_var <- undot(names(pf$ranef)[i])
members <- list()
for ( j in 1:length(pf$ranef[[i]]) ) {
aterm <- undot(pf$ranef[[i]][j])
newterm <- ""
var_prefix <- prefix[2]
if ( num_group_vars>1 ) var_prefix <- concat( var_prefix , group_var , "_" )
if ( aterm=="Intercept" ) {
par_name <- concat( var_prefix , aterm )
newterm <- concat( par_name , "[" , group_var , "]" )
} else {
par_name <- concat( var_prefix , aterm )
newterm <- concat( par_name , "[" , group_var , "]" , "*" , aterm )
}
members[[par_name]] <- par_name
if ( i > 1 | j > 1 ) vlm <- concat( vlm , " +\n " )
vlm <- concat( vlm , newterm )
}#j
# add group prior
if ( length(members)>1 ) {
# multi_normal
gvar_name <- concat( "c(" , paste(members,collapse=",") , ")" , "[" , group_var , "]" )
prior_list[[gvar_name]] <- concat( "dmvnorm2(0,sigma_" , group_var , ",Rho_" , group_var , ")" )
prior_list[[concat("sigma_",group_var)]] <- concat( "dcauchy(0,2)" )
prior_list[[concat("Rho_",group_var)]] <- concat( "dlkjcorr(2)" )
} else {
# normal
gvar_name <- concat( members[[1]] , "[" , group_var , "]" )
prior_list[[gvar_name]] <- concat( "dnorm(0,sigma_" , group_var , ")" )
prior_list[[concat("sigma_",group_var)]] <- concat( "dcauchy(0,2)" )
}
# make sure grouping variables in dat
# also ensure is an integer index
pf$dat[[group_var]] <- coerce_index( data[[group_var]] )
}#i
}# ranef processing

# any special priors for likelihood function
if ( family=="gaussian" ) {
prior_list[["sigma"]] <- "dcauchy(0,2)"
}

# insert linear model
lm_name <- lm_names[[family]]
#link_func <- link_names[[family]]
if ( vlm=="" )
lm_txt <- concat( flm )
else
lm_txt <- concat( flm , " +\n " , vlm )
lm_left <- concat( link , "(" , lm_name , ")" )
if ( link=="identity" ) lm_left <- lm_name
flist[[2]] <- concat( lm_left , " <- " , lm_txt )

# build priors
for ( i in 1:length(prior_list) ) {
pname <- names(prior_list)[i]
p_txt <- prior_list[[i]]
flist[[i+2]] <- concat( pname , " ~ " , p_txt )
}

# build formula text and parse into alist object
flist_txt <- "alist(\n"
for ( i in 1:length(flist) ) {
flist_txt <- concat( flist_txt , " " , flist[[i]] )
if ( i < length(flist) ) flist_txt <- concat( flist_txt , ",\n" )
}
flist_txt <- concat( flist_txt , "\n)" )
flist2 <- eval(parse(text=flist_txt))

# clean variable names
names(pf$dat) <- sapply( names(pf$dat) , undot )
# remove Intercept from dat
pf$dat[['Intercept']] <- NULL

# result
cat(flist_txt)
cat("\n")
invisible(list(f=flist2,d=pf$dat))

}


####### TEST CODE ########
if ( FALSE ) {

library(rethinking)

data(chimpanzees)
f0 <- pulled.left ~ prosoc.left*condition - condition
m0 <- glimmer( f0 , chimpanzees , family=binomial )

f1 <- pulled.left ~ (1|actor) + prosoc.left*condition - condition
m1 <- glimmer( f1 , chimpanzees , family=binomial )
# m1s <- map2stan( m1$f , data=m1$d , sample=TRUE )

f2 <- pulled.left ~ (1+prosoc.left|actor) + prosoc.left*condition - condition
m2 <- glimmer( f2 , chimpanzees , family=binomial )

data(UCBadmit)
f3 <- cbind(admit,reject) ~ (1|dept) + applicant.gender
m3 <- glimmer( f3 , UCBadmit , binomial )
m3s <- map2stan( m3$f , data=m3$d )

f4 <- cbind(admit,reject) ~ (1+applicant.gender|dept) + applicant.gender
m4 <- glimmer( f4 , UCBadmit , binomial )
m4s <- map2stan( m4$f , data=m4$d )

data(Trolley)
f5 <- response ~ (1|id) + (1|story) + action + intention + contact
m5 <- glimmer( f5 , Trolley )
m5s <- map2stan( m5$f , m5$d , sample=FALSE )

f6 <- response ~ (1+action+intention|id) + (1+action+intention|story) + action + intention + contact
m6 <- glimmer( f6 , Trolley )
m6s <- map2stan( m6$f , m6$d , sample=TRUE )

}

12 changes: 7 additions & 5 deletions R/map-predict.r
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,22 @@ function( fit , data , n=1000 , ... ) {

setMethod("link", "map",
function( fit , data , n=1000 , post , probs=NULL , refresh=0.1 , flatten=TRUE , ... ) {

if ( class(fit)!="map" ) stop("Requires map fit")
if ( missing(data) ) {
data <- fit@data
} else {
# make sure all variables same length
# weird vectorization errors otherwise
data <- as.data.frame(data)
#data <- as.data.frame(data)
}

if ( missing(post) )
post <- extract.samples(fit,n=n)
else
n <- length(post[[1]])
else {
n <- dim(post[[1]])[1]
if ( is.null(n) ) n <- length(post[[1]])
}

nlm <- length(fit@links)
f_do_lm <- TRUE
Expand Down Expand Up @@ -74,7 +76,7 @@ function( fit , data , n=1000 , post , probs=NULL , refresh=0.1 , flatten=TRUE ,
par_name <- names(post)[ j ]
dims <- dim( post[[par_name]] )
# scalar
if ( length(dims)<2 ) init[[par_name]] <- post[[par_name]][s]
if ( is.null(dims) ) init[[par_name]] <- post[[par_name]][s]
# vector
if ( length(dims)==2 ) init[[par_name]] <- post[[par_name]][s,]
# matrix
Expand Down
6 changes: 3 additions & 3 deletions R/map2stan-class.r
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,13 @@ setMethod("show", "map2stan", function(object){

if ( !is.null(attr(object,"WAIC")) ) {
waic <- attr(object,"WAIC")
use_waic <- waic
if ( length(waic)>1 ) use_waic <- (-2)*sum(waic)
use_waic <- sum(waic)
cat("\nWAIC (SE): ")
cat( concat(round(as.numeric(use_waic),2) , " (" , round(as.numeric(attr(waic,"se")),1) , ")" , "\n" ) )

cat("pWAIC: ")
cat( round(as.numeric(attr(waic,"pWAIC")),2) , "\n" )
use_pWAIC <- sum( unlist(attr(waic,"pWAIC")) )
cat( round(as.numeric(use_pWAIC),2) , "\n" )
}

})
Expand Down
11 changes: 9 additions & 2 deletions R/map2stan.r
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
########################################
# check arguments
if ( missing(flist) ) stop( "Formula required." )
if ( class(flist) == "map" ) {
# previous map fit, so pull out components
if ( verbose==TRUE ) message( "Using formula from map fit" )
ftemp <- flist@formula
if ( missing(data) ) data <- flist@data
flist <- ftemp
}
if ( class(flist) != "list" ) {
if ( class(flist)=="formula" ) {
flist <- list(flist)
Expand Down Expand Up @@ -449,8 +456,8 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
fp[['used_predictors']][[undot(lik$outcome)]] <- list( var=undot(lik$outcome) , N='N' , type=lik$out_type )

# build info needed to perform imputation
var_missingness <- which(is.na(d[[lik$outcome]]))
var_temp <- ifelse( is.na(d[[lik$outcome]]) , -999 , d[[lik$outcome]] )
var_missingness <- which(is.na(d[[undot(lik$outcome)]]))
var_temp <- ifelse( is.na(d[[undot(lik$outcome)]]) , -999 , d[[undot(lik$outcome)]] )
# add to impute bank
impute_bank[[ undot(lik$outcome) ]] <- list(
N_miss = length(var_missingness),
Expand Down
16 changes: 15 additions & 1 deletion R/precis.r
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ precis.whitelist <- data.frame(
# precis class definition and show method
setClass( "precis" , representation( output="data.frame" , digits="numeric" ) )
precis.show <- function( object ) {
print( round( object@output , object@digits ) )
#print( round( object@output , object@digits ) )
r <- format_show( object@output , digits=c('default__'=object@digits,'n_eff'=0) )
print(r)
}
setMethod( "show" , "precis" , function(object) precis.show(object) )

Expand Down Expand Up @@ -113,6 +115,18 @@ precis <- function( model , depth=1 , pars , ci=TRUE , level=0.95 , corr=FALSE ,
result <- postlistprecis( post , prob=level )
}
}
if ( the.class=="map2stan" | the.class=="stanfit" ) {
# add n_eff to result
require(rstan)
if ( the.class=="map2stan" )
n_eff <- summary( model@stanfit )$summary[,'n_eff']
else
n_eff <- summary( model )$summary[,'n_eff']
n_eff <- n_eff[ -which(names(n_eff)=="lp__") ]
if ( the.class=="map2stan" )
n_eff <- n_eff[ -which(names(n_eff)=="dev") ]
result <- cbind( result , n_eff )
}
if ( corr==TRUE ) {
result <- cbind( result , Rho )
}
Expand Down
Loading

0 comments on commit b6d8d5a

Please sign in to comment.