Version 1.56

- a bunch of edits to man pages - added a logit function for convenience - uniform priors in map2stan work via constaints now and are commented out in the model block, where they would be redundant - added automatic positive constraints for scale paramters in dbetabinom, dgampois, dgamma2 - fixed a subtle bug with WAIC calculations for map2stan models using imputation - Added quick installation section to README
rmcelreath · Jul 20, 2015 · a6631ad · a6631ad
1 parent 9cbaa47
commit a6631ad
Show file tree

Hide file tree

Showing 28 changed files with 234 additions and 47 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: rethinking
 Type: Package
 Title: Statistical Rethinking book package
-Version: 1.55
-Date: 2015-05-12
+Version: 1.56
+Date: 2015-07-20
 Author: Richard McElreath
 Maintainer: Richard McElreath <[email protected]>
 Imports: coda, MASS, mvtnorm

diff --git a/R/distributions.r b/R/distributions.r
@@ -13,6 +13,10 @@ inv_logit <- function( x ) {
     p
 }
 
+logit <- function( x ) {
+    log( x ) - log( 1-x )
+}
+
 pordlogit <- function( x , phi , a , log=FALSE ) {
     a <- c( as.numeric(a) , Inf )
     if ( length(phi) == 1 ) {

diff --git a/R/map2stan-templates.r b/R/map2stan-templates.r
@@ -323,7 +323,7 @@ map2stan.templates <- list(
 
             kout <- list('0','1') # z ~ normal(0,1)
             indent <- "    "
-                        
+
             ###########
             # Sigma and Rho
 
@@ -603,10 +603,23 @@ map2stan.templates <- list(
         R_name = "dunif",
         stan_name = "uniform",
         num_pars = 2,
-        par_names = c("alpha","beta"),
-        par_bounds = c("<lower=0>","<lower=0>"),
+        par_names = c("min","max"),
+        par_bounds = c("",""),
         par_types = c("real","real"),
         out_type = "real",
+        out_map = function(kout,kin,...) {
+            # need to coerce bounded constraints on the parameter
+            constr_list <- get( "constraints" , envir=parent.frame() )
+            kout <- as.character(kout)
+            if ( is.null(constr_list[[kout]]) ) {
+                constr_list[[kout]] <- concat( "lower=" , as.character(kin[[1]]) , ",upper=" , as.character(kin[[2]]) )
+                assign( "constraints" , constr_list , envir=parent.frame() )
+            }
+            # add comment tag in front, so that uniform dist isn't computed during sims
+            # the bounded contraints effectively establish uniform prior in Stan
+            kout <- concat( "// " , kout )
+            return(kout)
+        },
         par_map = function(k,...) {
             return(k);
         },
@@ -670,9 +683,17 @@ map2stan.templates <- list(
         out_type = "real",
         par_map = function(k,...) {
             p_name <- k[[1]];
-            theta_name <- k[[2]];
+            theta_name <- as.character(k[[2]]);
             k[[1]] <- concat(p_name,"*",theta_name);
             k[[2]] <- concat("(1-",p_name,")*",theta_name);
+
+            # need to coerce bounded constraints on theta parameter
+            constr_list <- get( "constraints" , envir=parent.frame() )
+            if ( is.null(constr_list[[theta_name]]) ) {
+                constr_list[[theta_name]] <- "lower=0"
+                assign( "constraints" , constr_list , envir=parent.frame() )
+            }
+
             return(k);
         },
         vectorized = TRUE
@@ -688,9 +709,17 @@ map2stan.templates <- list(
         out_type = "int",
         par_map = function(k,...) {
             p_name <- k[[2]];
-            theta_name <- k[[3]];
+            theta_name <- as.character(k[[3]]);
             k[[2]] <- concat(p_name,"*",theta_name);
             k[[3]] <- concat("(1-",p_name,")*",theta_name);
+
+            # need to coerce bounded constraints on theta parameter
+            constr_list <- get( "constraints" , envir=parent.frame() )
+            if ( is.null(constr_list[[theta_name]]) ) {
+                constr_list[[theta_name]] <- "lower=0"
+                assign( "constraints" , constr_list , envir=parent.frame() )
+            }
+
             return(k);
         },
         vectorized = TRUE
@@ -758,6 +787,14 @@ map2stan.templates <- list(
             scale <- as.character(k[[2]])
             k[[1]] <- concat( mu , "/" , scale )
             k[[2]] <- concat( "1/" , scale )
+
+            # need to coerce bounded constraints on scale parameter
+            constr_list <- get( "constraints" , envir=parent.frame() )
+            if ( is.null(constr_list[[scale]]) ) {
+                constr_list[[scale]] <- "lower=0"
+                assign( "constraints" , constr_list , envir=parent.frame() )
+            }
+
             return(k);
         },
         vectorized = TRUE
@@ -773,9 +810,17 @@ map2stan.templates <- list(
         out_type = "int",
         par_map = function(k,...) {
             mu_name <- k[[1]];
-            scale_name <- k[[2]];
+            scale_name <- as.character(k[[2]]);
             k[[1]] <- concat(mu_name);
             k[[2]] <- concat(scale_name);
+
+            # need to coerce bounded constraints on scale parameter
+            constr_list <- get( "constraints" , envir=parent.frame() )
+            if ( is.null(constr_list[[scale_name]]) ) {
+                constr_list[[scale_name]] <- "lower=0"
+                assign( "constraints" , constr_list , envir=parent.frame() )
+            }
+
             return(k);
         },
         vectorized = TRUE
@@ -808,6 +853,14 @@ dev <- dev + (-2)*(bernoulli_log(0,PAR1) + gamma_log(OUTCOME,PAR2,PAR3));",
             scale <- as.character(k[[3]])
             k[[2]] <- concat( mu , "[i]/" , scale )
             k[[3]] <- concat( "1/" , scale )
+
+            # need to coerce bounded constraints on scale parameter
+            constr_list <- get( "constraints" , envir=parent.frame() )
+            if ( is.null(constr_list[[scale]]) ) {
+                constr_list[[scale]] <- "lower=0"
+                assign( "constraints" , constr_list , envir=parent.frame() )
+            }
+
             return(k);
         },
         vectorized = FALSE

diff --git a/R/map2stan.r b/R/map2stan.r
@@ -529,6 +529,9 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
                 d[[ missingness_name ]] <- var_missingness
                 # flag as used
                 fp[['used_predictors']][[missingness_name]] <- list( var=missingness_name , N=N_missing_name , type="int" )
+                # trap for only 1 missing value and vectorization issues
+                if ( length(var_missingness)==1 )
+                    fp[['used_predictors']][[missingness_name]] <- list( var=missingness_name , type="index" )
                 # replace variable with missing values
                 d[[undot(lik$outcome)]] <- var_temp
 
@@ -614,7 +617,7 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
                 flag_mvprior <- FALSE
                 if ( length(RHS)>1 ) {
                     fname <- as.character( RHS[[1]] )
-                    if ( fname %in% c("dmvnorm","dmvnorm2","normal","multi_normal") )
+                    if ( fname %in% c("dmvnorm","dmvnorm2","multi_normal") )
                         flag_mvprior <- TRUE
                 }
                 if ( flag_mvprior==FALSE ) {
@@ -775,7 +778,16 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
             if ( class(prior$par_out)!="character" )
                 prior$par_out <- deparse(prior$par_out)
             parstxt <- paste( klist , collapse=" , " )
-            txt <- concat( indent , prior$par_out , " ~ " , prior$density , "( " , parstxt , " )" , prior$T_text , ";" )
+
+            # check for special formatting of left hand side
+            # also handle some constraints here
+            lhstxt <- prior$par_out
+            if ( !is.null(tmplt$out_map) ) {
+                lhstxt <- tmplt$out_map( prior$par_out , prior$pars_in , environment() )
+            }
+
+            # build text
+            txt <- concat( indent , lhstxt , " ~ " , prior$density , "( " , parstxt , " )" , prior$T_text , ";" )
 
             #m_model_priors <- concat( m_model_priors , txt , "\n" )
             m_model_txt <- concat( m_model_txt , txt , "\n" )
@@ -1054,15 +1066,25 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
 
                 # imputation stuff
                 if ( outcome %in% names(impute_bank) ) {
+                    N_miss <- impute_bank[[lik$outcome]]$N_miss
                     # add _merge suffix
                     outcome <- concat(outcome,suffix_merge)
                     # build code in transformed parameters that constructs x_merge
                     m_tpars1 <- concat( m_tpars1 , indent , "real " , outcome , "[N];\n" )
                     m_tpars2 <- concat( m_tpars2 , indent , outcome , " <- " , lik$outcome , ";\n" )
-                    m_tpars2 <- concat( m_tpars2 , indent , "for ( u in 1:" , lik$N_name , "_missing ) " , outcome , "[" , lik$outcome , "_missingness[u]] <- " , lik$outcome , "_impute[u]" , ";\n" )
+                    # trap for single missing value and vectorization issues
+                    if ( N_miss>1 )
+                        m_tpars2 <- concat( m_tpars2 , indent , "for ( u in 1:" , lik$N_name , "_missing ) " , outcome , "[" , lik$outcome , "_missingness[u]] <- " , lik$outcome , "_impute[u]" , ";\n" )
+                    else
+                        m_tpars2 <- concat( m_tpars2 , indent , outcome , "[" , lik$outcome , "_missingness] <- " , lik$outcome , "_impute" , ";\n" )
                     # add x_impute to start list
                     imputer_name <- concat(lik$outcome,"_impute")
                     start[[ imputer_name ]] <- rep( impute_bank[[lik$outcome]]$init , impute_bank[[lik$outcome]]$N_miss )
+                    if ( N_miss>1 )
+                        types[[ imputer_name ]] <- c("vector", concat("[",lik$N_name,"_missing]") )
+                    else
+                        # only one missing value, so cannot use vector
+                        types[[ imputer_name ]] <- "real"
                     # make sure x_impute is in pars
                     #pars[[ imputer_name ]] <- imputer_name
                 }

diff --git a/R/zDIC-methods.R b/R/zDIC-methods.R
@@ -86,3 +86,20 @@ function( object , ... ) {
     return(val)
 }
 )
+
+# function that takes mean of each parameter in a named list of samples
+postmeans <- function(post) {
+    Epost <- list()
+    for ( i in 1:length(post) ) {
+        dims <- length( dim( post[[i]] ) )
+        name <- names(post)[i]
+        if ( name!="lp__" & name!="dev" ) {
+            if ( dims==1 ) {
+                Epost[[ name ]] <- mean( post[[i]] )
+            } else {
+                Epost[[ name ]] <- apply( post[[i]] , 2:dims , mean )
+            }
+        }
+    }#i
+    return(Epost)
+}
diff --git a/R/zWAIC-methods.R b/R/zWAIC-methods.R
@@ -34,18 +34,18 @@ function( object , n=0 , refresh=0.1 , pointwise=FALSE , ... ) {
             if ( pointwise==FALSE ) return( old_waic )
         }
     }
-
-    # compute linear model values at each sample
-    if ( refresh > 0 ) message("Constructing posterior predictions")
-    lm_vals <- link( object , n=n , refresh=refresh , flatten=FALSE )
-
+
     # extract samples --- will need for inline parameters e.g. sigma in likelihood
     post <- extract.samples( object )
 
     n_samples <- dim( post[[1]] )[1]
     if ( n == 0 ) n <- n_samples # special flag for all samples in fit
     #if ( n_samples < n ) n <- n_samples
 
+    # compute linear model values at each sample
+    if ( refresh > 0 ) message("Constructing posterior predictions")
+    lm_vals <- link( object , n=n , refresh=refresh , flatten=FALSE )
+
     # compute log-lik at each sample
     liks <- object@formula_parsed$likelihood
     n_lik <- length( liks )

diff --git a/R/z_link-map2stan.r b/R/z_link-map2stan.r
@@ -19,7 +19,7 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TR
 
     # get samples from Stan fit
     if ( missing(post) )
-        post <- extract.samples(fit)
+        post <- extract.samples(fit,n=n)
 
     # check n and truncate accordingly
     # if ( n==0 )
@@ -134,9 +134,10 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TR
                 name_original <- names(fit@formula_parsed$impute_bank)[kk]
                 name_merge <- concat( name_original , "_merge" )
                 # _merge name should already be in linear model
+                #rhs0 <- gsub( name_merge , concat(name_merge,"[i,]") , rhs0 , fixed=TRUE )
                 #rhs0 <- gsub( name_original , name_merge , rhs0 , fixed=TRUE )
                 # construct merged matrix in posterior
-                #   this "parameter" in constant in columns for observed
+                #   this "parameter" is constant in columns for observed
                 #   but has samples in columns for imputed
                 n_cases <- length( data[[ name_original ]] )
                 var_merged <- matrix( NA , nrow=n , ncol=n_cases )
@@ -176,8 +177,8 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TR
         }
 
         # build inits
-        for ( j in 1:length(fit@pars) ) {
-            par_name <- fit@pars[ j ]
+        for ( j in 1:length(post) ) {
+            par_name <- names(post)[ j ]
             dims <- dim( post[[par_name]] )
             # scalar
             if ( length(dims)==1 ) init[[par_name]] <- post[[par_name]][i]
@@ -194,7 +195,7 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TR
             # ready environment
             e <- list( as.list(data) , as.list(init) )
             e <- unlist( e , recursive=FALSE )
-            
+
             # evaluate
             r <- eval(parse(text=rhs[[k]]),envir=e)
             flink <- fit@formula_parsed$lm[[k]]$link

diff --git a/R/zzz.R b/R/zzz.R
@@ -4,5 +4,6 @@
   theLib <- dirname(system.file(package = "rethinking"))
   pkgdesc <- packageDescription("rethinking", lib.loc = theLib)
   builddate <- gsub(';.*$', '', pkgdesc$Packaged)
-  packageStartupMessage(paste("rethinking (Version ", pkgdesc$Version, ", packaged: ", builddate, ")", sep = ""))
-} 
+  msg <- paste("rethinking (Version ", pkgdesc$Version, ")", sep = "")
+  packageStartupMessage(msg)
+}
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 rethinking
 ==========
 
-This R package accompanies a course and book on Bayesian data analysis. It contains tools for conducting both MAP estimation and Hamiltonian Monte Carlo (through RStan - mc-stan.org). These tools force the user to specify the model as a list of explicit distributional assumptions. This is more tedious than typical formula-based tools, but it is also much more flexible and powerful.
+This R package accompanies a course and book on Bayesian data analysis (McElreath 2016. Statistical Rethinking. CRC Press.). It contains tools for conducting both MAP estimation and Hamiltonian Monte Carlo (through RStan - mc-stan.org). These tools force the user to specify the model as a list of explicit distributional assumptions. This is more tedious than typical formula-based tools, but it is also much more flexible and powerful.
 
 For example, a simple Gaussian model could be specified with this list of formulas:
 
@@ -15,11 +15,32 @@ f <- alist(
 
 The first formula in the list is the likelihood; the second is the prior for ``mu``; the third is the prior for ``sigma`` (implicitly a half-Cauchy, due to positive constraint on ``sigma``).
 
+## Quick Installation
+
+You can find a manual with expanded installation and usage instructions here: ``http://xcelab.net/rm/software/``
+
+Here's the brief verison. 
+
+You'll need to install ``rstan`` first. Go to ``http://mc-stan.org`` and follow the instructions for your platform. Then you can install ``rethinking`` from within R using:
+```
+install.packages(c("coda","mvtnorm","devtools"))
+library(devtools)
+devtools::install_github("rmcelreath/rethinking")
+```
+If there are any problems, they likely arise when trying to install ``rstan``, so the ``rethinking`` package has nothing to do with it. See the manual linked above for some hints about getting ``rstan`` installed.
+
 ## MAP estimation
 
-Then to use maximum a posteriori (MAP) fitting:
+To use maximum a posteriori (MAP) fitting:
 ```
 library(rethinking)
+
+f <- alist(
+    y ~ dnorm( mu , sigma ),
+    mu ~ dnorm( 0 , 10 ),
+    sigma ~ dcauchy( 0 , 1 )
+)
+
 fit <- map( 
     f , 
     data=list(y=c(-1,1)) , 
@@ -120,7 +141,8 @@ f3 <- alist(
 
 ## Nice covariance priors
 
-And ``map2stan`` supports decomposition of covariance matrices into vectors of standard deviations and a correlation matrix, such that priors can be specified independently for each:
+The ``inv_wishart`` prior in the model just above is conventional, but not appealing. Since Stan does not use Gibbs sampling, there is no advantage to the ``inv_wishart`` prior. 
+To escape these conventional priors, ``map2stan`` supports decomposition of covariance matrices into vectors of standard deviations and a correlation matrix, such that priors can be specified independently for each:
 ```
 f4 <- alist(
     y ~ dnorm( mu , sigma ),
@@ -134,6 +156,7 @@ f4 <- alist(
 )
 ```
 
+
 # Non-centered parameterization
 Here is a non-centered parameterization that moves the scale parameters in the varying effects prior to the linear model, which is often more efficient for sampling:
 ```
@@ -164,7 +187,7 @@ f4nc <- alist(
     Rho_group ~ dlkjcorr(2)
 )
 ```
-Internally, a Cholesky factor ``L_Rho_group`` is used to perform sampling.
+Internally, a Cholesky factor ``L_Rho_group`` is used to perform sampling. It will appear in the returned samples, in addition to ``Rho_group``, which is constructed from it.
 
 ## Semi-automated Bayesian imputation
 
@@ -190,7 +213,7 @@ f5 <- alist(
 )
 m5 <- map2stan( f5 , data=list(y=y,x=x) )
 ```
-What ``map2stan`` does is notice the missing values, see the distribution assigned to the variable with the missing values, build the Stan code that uses a mix of observed and estimated ``x`` values in the regression. See the ``stancode(m)`` for details of the implementation.
+What ``map2stan`` does is notice the missing values, see the distribution assigned to the variable with the missing values, build the Stan code that uses a mix of observed and estimated ``x`` values in the regression. See the ``stancode(m5)`` for details of the implementation.
 
 ## Gaussian process
 

diff --git a/man/HPDI.Rd b/man/HPDI.Rd
@@ -23,7 +23,7 @@ PI( samples , prob=0.95 )
 }
 \value{
 }
-\references{McElreath 2011, Statistical Rethinking.}
+\references{}
 \author{Richard McElreath}
 \seealso{\code{\link{HPDinterval}}}
 \examples{