Skip to content

Commit

Permalink
1.48 - crawling towards completeness
Browse files Browse the repository at this point in the history
- importing classes from rstan
- various man page updates
- fixed rare ensemble bug that occurred when some models has zero weight
- fix (again) for map models with single-parameter linear models like
mu <- a
- fix for duplicate index variable in unusual map2stan models
- map2stan better with matrix data inputs
- added WAIC, sim, and link methods for lm objects. These are still
experimental. link in particular known to behave oddly at times. Still
not sure why. So use with caution, if at all.
  • Loading branch information
Richard McElreath committed Feb 15, 2015
1 parent 0481862 commit c7a624e
Show file tree
Hide file tree
Showing 17 changed files with 408 additions and 81 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: rethinking
Type: Package
Title: Statistical Rethinking book package
Version: 1.47
Date: 2014-12-23
Version: 1.48
Date: 2015-02-15
Author: Richard McElreath
Maintainer: Richard McElreath <[email protected]>
Imports: coda, MASS
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
exportPattern("^[^x]")
importFrom(coda, HPDinterval)
importFrom(coda, as.mcmc)
importFrom(MASS, mvrnorm)
importFrom(MASS, mvrnorm)
importClassesFrom(rstan)
50 changes: 0 additions & 50 deletions R/compare.r
Original file line number Diff line number Diff line change
Expand Up @@ -215,56 +215,6 @@ ICweights <- function( dev ) {
return(w)
}

# build ensemble of samples using DIC/WAIC weights
ensemble <- function( ... , data , n=1e3 , WAIC=TRUE , refresh=0 ) {
# retrieve list of models
L <- list(...)
if ( is.list(L[[1]]) && length(L)==1 )
L <- L[[1]]
# retrieve model names from function call
mnames <- match.call()
mnames <- as.character(mnames)[2:(length(L)+1)]
ictab <- compare( ... , WAIC=WAIC , refresh=refresh , n=n , sort=FALSE )
rownames(ictab@output) <- mnames
weights <- ictab@output$weight
# generate simulated predictions for each model
# then compose ensemble using weights
if ( missing(data) ) {
link.list <- lapply( L , function(m) link(m , n=n , refresh=refresh ) )
sim.list <- lapply( L , function(m) sim(m , n=n , refresh=refresh ) )
} else {
link.list <- lapply( L , function(m) link(m , data=data , n=n , refresh=refresh ) )
sim.list <- lapply( L , function(m) sim(m , data=data , n=n , refresh=refresh ) )
}
#print(str(sim.list))
# compose weighted predictions
# calculate indices corresponding to each model
idx <- round( weights * n )
idx_start <- rep(1,length(idx))
idx_end <- idx
for ( i in 2:length(idx) ) {
idx_start[i] <- idx_start[i-1] + idx[i-1]
idx_end[i] <- min( idx_end[i-1] + idx[i] , n )
}
#print(idx_start)
#print(idx_end)
link_out <- link.list[[1]]
sim_out <- sim.list[[1]]
for ( i in 1:length(idx) ) {
idxrange <- idx_start[i]:idx_end[i]
link_out[idxrange,] <- link.list[[i]][idxrange,]
sim_out[idxrange,] <- sim.list[[i]][idxrange,]
}
result <- list( link=link_out , sim=sim_out )
names(weights) <- mnames
idxtab <- cbind( idx_start , idx_end )
rownames(idxtab) <- mnames
attr(result,"weights") <- weights
attr(result,"indices") <- idxtab
attr(result,"ictab") <- ictab
result
}

# tests
if (FALSE) {

Expand Down
63 changes: 63 additions & 0 deletions R/ensemble.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# build ensemble of samples using DIC/WAIC weights
ensemble <- function( ... , data , n=1e3 , WAIC=TRUE , refresh=0 ) {
# retrieve list of models
L <- list(...)
if ( is.list(L[[1]]) && length(L)==1 )
L <- L[[1]]
# retrieve model names from function call
mnames <- match.call()
mnames <- as.character(mnames)[2:(length(L)+1)]
if ( length(L)>1 ) {
ictab <- compare( ... , WAIC=WAIC , refresh=refresh , n=n , sort=FALSE )
rownames(ictab@output) <- mnames
weights <- ictab@output$weight
} else {
ictab <- NA
weights <- 1
}

# compute number of predictions per model
idx <- round( weights * n )

# generate simulated predictions for each model
# then compose ensemble using weights
if ( missing(data) ) {
link.list <- lapply( L , function(m) link(m , n=n , refresh=refresh ) )
sim.list <- lapply( L , function(m) sim(m , n=n , refresh=refresh ) )
} else {
link.list <- lapply( L , function(m) link(m , data=data , n=n , refresh=refresh ) )
sim.list <- lapply( L , function(m) sim(m , data=data , n=n , refresh=refresh ) )
}
#print(str(sim.list))

# compose weighted predictions
# calculate indices corresponding to each model
idx_start <- rep(1,length(idx))
idx_end <- idx
if ( length(L)>1 )
for ( i in 2:length(idx) ) {
idx_start[i] <- min( idx_start[i-1] + idx[i-1] , n )
idx_end[i] <- min( idx_end[i-1] + idx[i] , n )
}
#print(idx)
#print(idx_start)
#print(idx_end)
link_out <- link.list[[1]]
sim_out <- sim.list[[1]]
for ( i in 1:length(idx) ) {
if ( idx[i]>0 ) {
idxrange <- idx_start[i]:idx_end[i]
link_out[idxrange,] <- link.list[[i]][idxrange,]
sim_out[idxrange,] <- sim.list[[i]][idxrange,]
}
}
result <- list( link=link_out , sim=sim_out )
names(weights) <- mnames
idxtab <- cbind( idx_start , idx_end )
rownames(idxtab) <- mnames
attr(result,"weights") <- weights
attr(result,"indices") <- idxtab
attr(result,"ictab") <- ictab
result
}

2 changes: 1 addition & 1 deletion R/map-class.r
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ setMethod("summary", "map", function(object){

setGeneric("extract.samples",
function( object , n=10000 , clean.names=TRUE , ... ) {
require(MASS)
#require(MASS)
mu <- 0
if ( class(object)[1] %in% c("mer","bmer","glmerMod","lmerMod") ) {
mu <- fixef(object)
Expand Down
9 changes: 8 additions & 1 deletion R/map-predict.r
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ function( fit , data , n=1000 , ... ) {
)

setMethod("link", "map",
function( fit , data , n=1000 , post , probs=NULL , refresh=0.1 , flatten=TRUE , ... ) {
function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TRUE , ... ) {

if ( class(fit)!="map" ) stop("Requires map fit")
if ( missing(data) ) {
Expand All @@ -25,6 +25,13 @@ function( fit , data , n=1000 , post , probs=NULL , refresh=0.1 , flatten=TRUE ,
if ( is.null(n) ) n <- length(post[[1]])
}

# replace with any elements of replace list
if ( length( replace ) > 0 ) {
for ( i in 1:length(replace) ) {
post[[ names(replace)[i] ]] <- replace[[i]]
}
}

nlm <- length(fit@links)
f_do_lm <- TRUE
if ( nlm==0 ) {
Expand Down
10 changes: 7 additions & 3 deletions R/map.r
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,14 @@ map <- function( flist , data , start , method="BFGS" , hessian=TRUE , debug=FAL
# so user can pass arguments out of order to density function
RHS <- f[[3]]
LHS <- f[[2]]
fname <- as.character( RHS[[1]] )
flag_monad_linear_model <- FALSE
if ( length(RHS)==1 & class(RHS[[1]])=="numeric" )
flag_monad_linear_model <- TRUE
if ( length(RHS)==1 ) {
if ( class(RHS)=="numeric" | class(RHS)=="name" )
flag_monad_linear_model <- TRUE
fname <- ""
} else {
fname <- as.character( RHS[[1]] )
}
if ( fname=="+" | fname=="*" | fname=="-" | fname=="/" | fname=="%*%" | fname %in% invlink.names | flag_monad_linear_model==TRUE ) {
# linear model formula with no density (but maybe invlink) function
# return a list with parameter name in [[1]] and text of RHS in [[2]]
Expand Down
11 changes: 8 additions & 3 deletions R/map2stan.r
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,8 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
}

# add data declaration for grouping variable number of unique values
m_data <- concat( m_data , indent , "int<lower=1> " , N_txt , ";\n" )
#m_data <- concat( m_data , indent , "int<lower=1> " , N_txt , ";\n" )
fp[['used_predictors']][[N_txt]] <- list( var=N_txt , type="index" )

# add N count to data
N <- length( unique( d[[ vprior$group ]] ) )
Expand Down Expand Up @@ -1011,11 +1012,15 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
# coerce outcome type
if ( !is.null(var$type) ) type <- var$type
# build
if ( type=="matrix" )
if ( type=="matrix" ) {
# rely on var$N being vector of matrix dimensions
m_data <- concat( m_data , indent , type , "[",var$N[1],",",var$N[2],"] " , var$var , ";\n" )
else
}
if ( type=="index" ) {
m_data <- concat( m_data , indent , "int<lower=1> " , var$var , ";\n" )
} else {
m_data <- concat( m_data , indent , type , " " , var$var , "[" , var$N , "];\n" )
}
}#i
}

Expand Down
18 changes: 11 additions & 7 deletions R/map2stan_predict.r
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@

# to do:
# (*) fix dzipois vectorization hack --- need to fix dzipois itself
# (*) fix dordlogit in WAIC calculation -- not sure what issue is

# new link function that doesn't invoke Stan
setMethod("link", "map2stan",
function( fit , data , n=1000 , probs=NULL , refresh=0.1 , replace=list() , flatten=TRUE , ... ) {
function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TRUE , ... ) {

if ( class(fit)!="map2stan" ) stop("Requires map2stan fit")
if ( missing(data) ) {
Expand All @@ -18,7 +17,12 @@ function( fit , data , n=1000 , probs=NULL , refresh=0.1 , replace=list() , flat
#data <- as.data.frame(data)
}

post <- extract.samples(fit)
# get samples from Stan fit
if ( missing(post) )
post <- extract.samples(fit)

# check n and truncate accordingly
# if ( n==0 )

# replace with any elements of replace list
if ( length( replace ) > 0 ) {
Expand Down Expand Up @@ -89,7 +93,10 @@ function( fit , data , n=1000 , probs=NULL , refresh=0.1 , replace=list() , flat

init <- fit@start

###################
# loop over samples and compute each case for each linear model

# initialize refresh counter
ref_inc <- floor(n*refresh)
ref_next <- ref_inc

Expand Down Expand Up @@ -134,6 +141,7 @@ function( fit , data , n=1000 , probs=NULL , refresh=0.1 , replace=list() , flat
rhs[[k]] <- rhs0
}#k

# loop over samples
for ( i in 1:n ) {

# refresh progress display
Expand Down Expand Up @@ -198,10 +206,6 @@ function( fit , data , n=1000 , probs=NULL , refresh=0.1 , replace=list() , flat

}#i

if ( !is.null(probs) ) {
#NYI
}

if ( refresh>0 ) cat("\n")

if ( flatten==TRUE )
Expand Down
57 changes: 57 additions & 0 deletions R/zWAIC-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -312,3 +312,60 @@ function( object , n=1000 , refresh=0.1 , pointwise=FALSE , ... ) {

}
)

# lm
setMethod("WAIC", "lm",
function( object , n=1000 , refresh=0.1 , pointwise=FALSE , ... ) {

if ( refresh > 0 ) message("Constructing posterior predictions")

# extract samples --- will need for inline parameters e.g. sigma in likelihood
post <- extract.samples( object , n=n )
# get sigma
sigma <- sd(resid(object))

# compute log-lik at each sample
# can hijack predict.lm() to do this by inserting samples into object$coefficients
m_temp <- object
out_var <- object$model[[1]] # should be outcome variable
n_coefs <- length(coef(object))
s <- sapply( 1:n ,
function(i) {
for ( j in 1:n_coefs ) m_temp$coefficients[[j]] <- post[i,j]
mu <- as.numeric(predict(m_temp)) # as.numeric strips names
dnorm( out_var , mu , sigma , log=TRUE )
} )

pD <- 0
lppd <- 0
n_cases <- length(out_var)
n_samples <- n
lppd_vec <- rep(NA,n_cases)
pD_vec <- rep(NA,n_cases)
for ( i in 1:n_cases ) {# for each case

vll <- var2(s[i,])
pD <- pD + vll
lpd <- log_sum_exp(s[i,]) - log(n_samples)
lppd <- lppd + lpd
lppd_vec[i] <- lpd
pD_vec[i] <- vll
}#i - cases

waic_vec <- (-2)*( lppd_vec - pD_vec )
if ( pointwise==TRUE ) {
# return decomposed as WAIC for each observation i --- can sum to get total WAIC
# this is useful for computing standard errors of differences with compare()
waic <- (-2)*( lppd_vec - pD_vec )
lppd <- lppd_vec
pD <- pD_vec
} else {
waic <- (-2)*( lppd - pD )
}
attr(waic,"lppd") = lppd
attr(waic,"pWAIC") = pD
attr(waic,"se") = try(sqrt( n_cases*var2(waic_vec) ))

return(waic)
} )

Loading

0 comments on commit c7a624e

Please sign in to comment.