1.42 - fixes and Chapter 11 revisions

- replaced . with _ in variable names in several data examples - man page for glimmer() - improvements to formatting for compare and precis output - various bug fixes for pointwise WAIC reporting and calculations - added sd2 and var2 to compute stddev and variance using n denominator - finally got map's DIC method to handle vector parameters correctly...should refactor the abstraction layer using what I learned in the process - WAIC methods can return p_WAIC as pointwise as well---useful for measuring flexibility of individual points
rmcelreath · Sep 22, 2014 · b6d8d5a · b6d8d5a
1 parent 100b125
commit b6d8d5a
Show file tree

Hide file tree

Showing 18 changed files with 482 additions and 108 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: rethinking
 Type: Package
 Title: Statistical Rethinking book package
-Version: 1.41
+Version: 1.42
 Date: 2014-09-02
 Author: Richard McElreath
 Maintainer: Richard McElreath <[email protected]>

diff --git a/R/compare.r b/R/compare.r
@@ -4,7 +4,8 @@
 setClass( "compareIC" , representation( output="data.frame" , dSE="matrix" ) )
 
 compare.show <- function( object ) {
-    print( round( object@output , 2 ) )
+    r <- format_show( object@output , digits=c('default__'=1,'weight'=2,'SE'=2,'dSE'=2) )
+    print( r )
 }
 setMethod( "show" , "compareIC" , function(object) compare.show(object) )
 
@@ -27,9 +28,9 @@ compare <- function( ... , n=1e3 , sort="WAIC" , WAIC=TRUE , refresh=0 ) {
         # use WAIC instead of DIC
         # WAIC is processed pointwise, so can compute SE of differences, and summed later
         WAIC.list <- lapply( L , function(z) WAIC( z , n=n , refresh=refresh , pointwise=TRUE ) )
-        pD.list <- sapply( WAIC.list , function(x) attr(x,"pWAIC") )
+        pD.list <- sapply( WAIC.list , function(x) sum(attr(x,"pWAIC")) )
         se.list <- sapply( WAIC.list , function(x) attr(x,"se") )
-        DIC.list <- (-2)*sapply( WAIC.list , sum )
+        DIC.list <- sapply( WAIC.list , sum )
         # compute SE of differences between adjacent models from top to bottom in ranking
         colnames(dSE.matrix) <- mnames
         rownames(dSE.matrix) <- mnames
@@ -72,7 +73,7 @@ compare <- function( ... , n=1e3 , sort="WAIC" , WAIC=TRUE , refresh=0 ) {
 
 # plot method for compareIC results shows deviance in and expected deviance out of sample, for each model, ordered top-to-bottom by rank
 setMethod("plot" , "compareIC" , function(x,y,xlim,SE=TRUE,dSE=TRUE,weights=FALSE,...) {
-    dev_in <- x@output[[1]] - x@output[[2]]
+    dev_in <- x@output[[1]] - x@output[[2]]*2
     dev_out <- x@output[[1]]
     if ( !is.null(x@output[['SE']]) ) devSE <- x@output[['SE']]
     dev_out_lower <- dev_out - devSE

diff --git a/R/glimmer.R b/R/glimmer.R
@@ -2,33 +2,6 @@
 # translate glmer style model formulas into equivalent formula alist suitable for map2stan
 # just a stub for now
 
-glimmer <- function( formula , family="Gaussian" , ... ) {
-
-    # templates
-    family_liks <- list(
-        Gaussian = "dnorm( mu , sigma )",
-        Binomial = "dbinom( size , p )",
-        Poisson = "dpois( lambda )"
-    )
-
-    # check input
-    if ( class(formula)!="formula" ) stop( "Input must be a glmer-style formula." )
-
-    f <- formula
-    flist <- alist()
-
-    # get outcome
-    outcome <- f[[2]]
-
-    # build likelihood
-    flist[[1]] <- as.formula( concat( as.character(outcome) , " ~ " , family_liks[[family]] ) )
-
-    # build linear model
-
-
-}
-
-
 parse_glimmer_formula <- function( formula , data ) {
     ## take a formula and parse into fixed effect and varying effect lists
     nobars <- function(term) {
@@ -144,3 +117,210 @@ parse_glimmer_formula <- function( formula , data ) {
     list( y=outcome , yname=outcome_name , fixef=fixef , ranef=ranef , dat=as.data.frame(mdat) )
 }
 
+
+glimmer <- function( formula , data , family=gaussian , prefix=c("b_","v_") , default_prior="dnorm(0,10)" , ... ) {
+
+    undot <- function( astring ) {
+        astring <- gsub( "." , "_" , astring , fixed=TRUE )
+        astring <- gsub( ":" , "_X_" , astring , fixed=TRUE )
+        astring <- gsub( "(" , "" , astring , fixed=TRUE )
+        astring <- gsub( ")" , "" , astring , fixed=TRUE )
+        astring
+    }
+
+    # convert family to text
+    family.orig <- family
+    if ( class(family)=="function" ) {
+        family <- do.call(family,args=list())
+    }
+    link <- family$link
+    family <- family$family
+
+    # templates
+    family_liks <- list(
+        gaussian = "dnorm( mu , sigma )",
+        binomial = "dbinom( size , p )",
+        poisson = "dpois( lambda )"
+    )
+    lm_names <- list(
+        gaussian = "mu",
+        binomial = "p",
+        poisson = "lambda"
+    )
+    link_names <- list(
+        gaussian = "identity",
+        binomial = "logit",
+        poisson = "log"
+    )
+
+    # check input
+    if ( class(formula)!="formula" ) stop( "Input must be a glmer-style formula." )
+    if ( missing(data) ) stop( "Need data" )
+
+    f <- formula
+    flist <- alist()
+    prior_list <- alist()
+
+    # parse
+    pf <- parse_glimmer_formula( formula , data )
+    pf$yname <- undot(pf$yname)
+
+    # build likelihood
+    # check for size variable in Binomial
+    dtext <- family_liks[[family]]
+    if ( family=="binomial" ) {
+        if ( class(pf$y)=="matrix" ) {
+            # cbind input
+            pf$dat[[pf$yname]] <- pf$y[,1]
+            pf$dat[[concat(pf$yname,"_size")]] <- apply(pf$y,1,sum)
+            dtext <- concat("dbinom( ",concat(pf$yname,"_size")," , p )")
+        } else {
+            # bernoulli
+            pf$dat[[pf$yname]] <- pf$y
+            dtext <- concat("dbinom( 1 , p )")
+        }
+    } else {
+        pf$dat[[pf$yname]] <- pf$y
+    }
+    flist[[1]] <- concat( as.character(pf$yname) , " ~ " , dtext )
+
+    # build fixed linear model
+    flm <- ""
+    for ( i in 1:length(pf$fixef) ) {
+        # for each term, add corresponding term to linear model text
+        aterm <- undot(pf$fixef[i])
+        newterm <- ""
+        if ( aterm=="Intercept" ) {
+            newterm <- aterm
+            prior_list[[newterm]] <- default_prior
+        } else {
+            par_name <- concat( prefix[1] , aterm )
+            newterm <- concat( par_name , "*" , aterm )
+            prior_list[[par_name]] <- default_prior
+        }
+        if ( i > 1 ) flm <- concat( flm , " +\n        " )
+        flm <- concat( flm , newterm )
+    }
+
+    vlm <- ""
+    num_group_vars <- length(pf$ranef)
+    if ( num_group_vars > 0 ) {
+    for ( i in 1:num_group_vars ) {
+        group_var <- undot(names(pf$ranef)[i])
+        members <- list()
+        for ( j in 1:length(pf$ranef[[i]]) ) {
+            aterm <- undot(pf$ranef[[i]][j])
+            newterm <- ""
+            var_prefix <- prefix[2]
+            if ( num_group_vars>1 ) var_prefix <- concat( var_prefix , group_var , "_" )
+            if ( aterm=="Intercept" ) {
+                par_name <- concat( var_prefix , aterm )
+                newterm <- concat( par_name , "[" , group_var , "]" )
+            } else {
+                par_name <- concat( var_prefix , aterm )
+                newterm <- concat( par_name , "[" , group_var , "]" , "*" , aterm )
+            }
+            members[[par_name]] <- par_name
+            if ( i > 1 | j > 1 ) vlm <- concat( vlm , " +\n        " )
+            vlm <- concat( vlm , newterm )
+        }#j
+        # add group prior
+        if ( length(members)>1 ) {
+            # multi_normal
+            gvar_name <- concat( "c(" , paste(members,collapse=",") , ")" , "[" , group_var , "]" )
+            prior_list[[gvar_name]] <- concat( "dmvnorm2(0,sigma_" , group_var , ",Rho_" , group_var , ")" )
+            prior_list[[concat("sigma_",group_var)]] <- concat( "dcauchy(0,2)" )
+            prior_list[[concat("Rho_",group_var)]] <- concat( "dlkjcorr(2)" )
+        } else {
+            # normal
+            gvar_name <- concat( members[[1]] , "[" , group_var , "]" )
+            prior_list[[gvar_name]] <- concat( "dnorm(0,sigma_" , group_var , ")" )
+            prior_list[[concat("sigma_",group_var)]] <- concat( "dcauchy(0,2)" )
+        }
+        # make sure grouping variables in dat
+        # also ensure is an integer index
+        pf$dat[[group_var]] <- coerce_index( data[[group_var]] )
+    }#i
+    }# ranef processing
+
+    # any special priors for likelihood function
+    if ( family=="gaussian" ) {
+        prior_list[["sigma"]] <- "dcauchy(0,2)"
+    }
+
+    # insert linear model
+    lm_name <- lm_names[[family]]
+    #link_func <- link_names[[family]]
+    if ( vlm=="" )
+        lm_txt <- concat( flm )
+    else
+        lm_txt <- concat( flm , " +\n        " , vlm )
+    lm_left <- concat( link , "(" , lm_name , ")" )
+    if ( link=="identity" ) lm_left <- lm_name
+    flist[[2]] <- concat( lm_left , " <- " , lm_txt )
+
+    # build priors
+    for ( i in 1:length(prior_list) ) {
+        pname <- names(prior_list)[i]
+        p_txt <- prior_list[[i]]
+        flist[[i+2]] <- concat( pname , " ~ " , p_txt )
+    }
+
+    # build formula text and parse into alist object
+    flist_txt <- "alist(\n"
+    for ( i in 1:length(flist) ) {
+        flist_txt <- concat( flist_txt , "    " , flist[[i]] )
+        if ( i < length(flist) ) flist_txt <- concat( flist_txt , ",\n" )
+    }
+    flist_txt <- concat( flist_txt , "\n)" )
+    flist2 <- eval(parse(text=flist_txt))
+
+    # clean variable names
+    names(pf$dat) <- sapply( names(pf$dat) , undot )
+    # remove Intercept from dat
+    pf$dat[['Intercept']] <- NULL
+
+    # result
+    cat(flist_txt)
+    cat("\n")
+    invisible(list(f=flist2,d=pf$dat))
+
+}
+
+
+####### TEST CODE ########
+if ( FALSE ) {
+
+library(rethinking)
+
+data(chimpanzees)
+f0 <- pulled.left ~ prosoc.left*condition - condition
+m0 <- glimmer( f0 , chimpanzees , family=binomial )
+
+f1 <- pulled.left ~ (1|actor) + prosoc.left*condition - condition
+m1 <- glimmer( f1 , chimpanzees , family=binomial )
+# m1s <- map2stan( m1$f , data=m1$d , sample=TRUE )
+
+f2 <- pulled.left ~ (1+prosoc.left|actor) + prosoc.left*condition - condition
+m2 <- glimmer( f2 , chimpanzees , family=binomial )
+
+data(UCBadmit)
+f3 <- cbind(admit,reject) ~ (1|dept) + applicant.gender
+m3 <- glimmer( f3 , UCBadmit , binomial )
+m3s <- map2stan( m3$f , data=m3$d )
+
+f4 <- cbind(admit,reject) ~ (1+applicant.gender|dept) + applicant.gender
+m4 <- glimmer( f4 , UCBadmit , binomial )
+m4s <- map2stan( m4$f , data=m4$d )
+
+data(Trolley)
+f5 <- response ~ (1|id) + (1|story) + action + intention + contact
+m5 <- glimmer( f5 , Trolley )
+m5s <- map2stan( m5$f , m5$d , sample=FALSE )
+
+f6 <- response ~ (1+action+intention|id) + (1+action+intention|story) + action + intention + contact
+m6 <- glimmer( f6 , Trolley )
+m6s <- map2stan( m6$f , m6$d , sample=TRUE )
+
+}
+
diff --git a/R/map-predict.r b/R/map-predict.r
@@ -8,20 +8,22 @@ function( fit , data , n=1000 , ... ) {
 
 setMethod("link", "map",
 function( fit , data , n=1000 , post , probs=NULL , refresh=0.1 , flatten=TRUE , ... ) {
-
+    
     if ( class(fit)!="map" ) stop("Requires map fit")
     if ( missing(data) ) {
         data <- fit@data
     } else {
         # make sure all variables same length
         # weird vectorization errors otherwise
-        data <- as.data.frame(data)
+        #data <- as.data.frame(data)
     }
 
     if ( missing(post) ) 
         post <- extract.samples(fit,n=n)
-    else
-        n <- length(post[[1]])
+    else {
+        n <- dim(post[[1]])[1]
+        if ( is.null(n) ) n <- length(post[[1]])
+    }
 
     nlm <- length(fit@links)
     f_do_lm <- TRUE
@@ -74,7 +76,7 @@ function( fit , data , n=1000 , post , probs=NULL , refresh=0.1 , flatten=TRUE ,
                 par_name <- names(post)[ j ]
                 dims <- dim( post[[par_name]] )
                 # scalar
-                if ( length(dims)<2 ) init[[par_name]] <- post[[par_name]][s]
+                if ( is.null(dims) ) init[[par_name]] <- post[[par_name]][s]
                 # vector
                 if ( length(dims)==2 ) init[[par_name]] <- post[[par_name]][s,]
                 # matrix

diff --git a/R/map2stan-class.r b/R/map2stan-class.r
@@ -121,13 +121,13 @@ setMethod("show", "map2stan", function(object){
 
     if ( !is.null(attr(object,"WAIC")) ) {
         waic <- attr(object,"WAIC")
-        use_waic <- waic
-        if ( length(waic)>1 ) use_waic <- (-2)*sum(waic)
+        use_waic <- sum(waic)
         cat("\nWAIC (SE): ")
         cat( concat(round(as.numeric(use_waic),2) , " (" , round(as.numeric(attr(waic,"se")),1) , ")" , "\n" ) )
 
         cat("pWAIC: ")
-        cat( round(as.numeric(attr(waic,"pWAIC")),2) , "\n" )
+        use_pWAIC <- sum( unlist(attr(waic,"pWAIC")) )
+        cat( round(as.numeric(use_pWAIC),2) , "\n" )
     }
 
   })

diff --git a/R/map2stan.r b/R/map2stan.r
@@ -45,6 +45,13 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
     ########################################
     # check arguments
     if ( missing(flist) ) stop( "Formula required." )
+    if ( class(flist) == "map" ) {
+        # previous map fit, so pull out components
+        if ( verbose==TRUE ) message( "Using formula from map fit" )
+        ftemp <- flist@formula
+        if ( missing(data) ) data <- flist@data
+        flist <- ftemp
+    }
     if ( class(flist) != "list" ) {
         if ( class(flist)=="formula" ) {
             flist <- list(flist)
@@ -449,8 +456,8 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
                 fp[['used_predictors']][[undot(lik$outcome)]] <- list( var=undot(lik$outcome) , N='N' , type=lik$out_type )
 
                 # build info needed to perform imputation
-                var_missingness <- which(is.na(d[[lik$outcome]]))
-                var_temp <- ifelse( is.na(d[[lik$outcome]]) , -999 , d[[lik$outcome]] )
+                var_missingness <- which(is.na(d[[undot(lik$outcome)]]))
+                var_temp <- ifelse( is.na(d[[undot(lik$outcome)]]) , -999 , d[[undot(lik$outcome)]] )
                 # add to impute bank
                 impute_bank[[ undot(lik$outcome) ]] <- list(
                     N_miss = length(var_missingness),

diff --git a/R/precis.r b/R/precis.r
@@ -10,7 +10,9 @@ precis.whitelist <- data.frame(
 # precis class definition and show method
 setClass( "precis" , representation( output="data.frame" , digits="numeric" ) )
 precis.show <- function( object ) {
-    print( round( object@output , object@digits ) )
+    #print( round( object@output , object@digits ) )
+    r <- format_show( object@output , digits=c('default__'=object@digits,'n_eff'=0) )
+    print(r)
 }
 setMethod( "show" , "precis" , function(object) precis.show(object) )
 
@@ -113,6 +115,18 @@ precis <- function( model , depth=1 , pars , ci=TRUE , level=0.95 , corr=FALSE ,
             result <- postlistprecis( post , prob=level )
         }
     }
+    if ( the.class=="map2stan" | the.class=="stanfit" ) {
+        # add n_eff to result
+        require(rstan)
+        if ( the.class=="map2stan" )
+            n_eff <- summary( model@stanfit )$summary[,'n_eff']
+        else
+            n_eff <- summary( model )$summary[,'n_eff']
+        n_eff <- n_eff[ -which(names(n_eff)=="lp__") ]
+        if ( the.class=="map2stan" )
+            n_eff <- n_eff[ -which(names(n_eff)=="dev") ]
+        result <- cbind( result , n_eff )
+    }
     if ( corr==TRUE ) {
         result <- cbind( result , Rho )
     }