Skip to content

Commit

Permalink
Version 1.56
Browse files Browse the repository at this point in the history
- a bunch of edits to man pages
- added a logit function for convenience
- uniform priors in map2stan work via constaints now and are commented
out in the model block, where they would be redundant
- added automatic positive constraints for scale paramters in
dbetabinom, dgampois, dgamma2
- fixed a subtle bug with WAIC calculations for map2stan models using
imputation
- Added quick installation section to README
  • Loading branch information
Richard McElreath committed Jul 20, 2015
1 parent 9cbaa47 commit a6631ad
Show file tree
Hide file tree
Showing 28 changed files with 234 additions and 47 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: rethinking
Type: Package
Title: Statistical Rethinking book package
Version: 1.55
Date: 2015-05-12
Version: 1.56
Date: 2015-07-20
Author: Richard McElreath
Maintainer: Richard McElreath <[email protected]>
Imports: coda, MASS, mvtnorm
Expand Down
4 changes: 4 additions & 0 deletions R/distributions.r
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ inv_logit <- function( x ) {
p
}

logit <- function( x ) {
log( x ) - log( 1-x )
}

pordlogit <- function( x , phi , a , log=FALSE ) {
a <- c( as.numeric(a) , Inf )
if ( length(phi) == 1 ) {
Expand Down
65 changes: 59 additions & 6 deletions R/map2stan-templates.r
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ map2stan.templates <- list(

kout <- list('0','1') # z ~ normal(0,1)
indent <- " "

###########
# Sigma and Rho

Expand Down Expand Up @@ -603,10 +603,23 @@ map2stan.templates <- list(
R_name = "dunif",
stan_name = "uniform",
num_pars = 2,
par_names = c("alpha","beta"),
par_bounds = c("<lower=0>","<lower=0>"),
par_names = c("min","max"),
par_bounds = c("",""),
par_types = c("real","real"),
out_type = "real",
out_map = function(kout,kin,...) {
# need to coerce bounded constraints on the parameter
constr_list <- get( "constraints" , envir=parent.frame() )
kout <- as.character(kout)
if ( is.null(constr_list[[kout]]) ) {
constr_list[[kout]] <- concat( "lower=" , as.character(kin[[1]]) , ",upper=" , as.character(kin[[2]]) )
assign( "constraints" , constr_list , envir=parent.frame() )
}
# add comment tag in front, so that uniform dist isn't computed during sims
# the bounded contraints effectively establish uniform prior in Stan
kout <- concat( "// " , kout )
return(kout)
},
par_map = function(k,...) {
return(k);
},
Expand Down Expand Up @@ -670,9 +683,17 @@ map2stan.templates <- list(
out_type = "real",
par_map = function(k,...) {
p_name <- k[[1]];
theta_name <- k[[2]];
theta_name <- as.character(k[[2]]);
k[[1]] <- concat(p_name,"*",theta_name);
k[[2]] <- concat("(1-",p_name,")*",theta_name);

# need to coerce bounded constraints on theta parameter
constr_list <- get( "constraints" , envir=parent.frame() )
if ( is.null(constr_list[[theta_name]]) ) {
constr_list[[theta_name]] <- "lower=0"
assign( "constraints" , constr_list , envir=parent.frame() )
}

return(k);
},
vectorized = TRUE
Expand All @@ -688,9 +709,17 @@ map2stan.templates <- list(
out_type = "int",
par_map = function(k,...) {
p_name <- k[[2]];
theta_name <- k[[3]];
theta_name <- as.character(k[[3]]);
k[[2]] <- concat(p_name,"*",theta_name);
k[[3]] <- concat("(1-",p_name,")*",theta_name);

# need to coerce bounded constraints on theta parameter
constr_list <- get( "constraints" , envir=parent.frame() )
if ( is.null(constr_list[[theta_name]]) ) {
constr_list[[theta_name]] <- "lower=0"
assign( "constraints" , constr_list , envir=parent.frame() )
}

return(k);
},
vectorized = TRUE
Expand Down Expand Up @@ -758,6 +787,14 @@ map2stan.templates <- list(
scale <- as.character(k[[2]])
k[[1]] <- concat( mu , "/" , scale )
k[[2]] <- concat( "1/" , scale )

# need to coerce bounded constraints on scale parameter
constr_list <- get( "constraints" , envir=parent.frame() )
if ( is.null(constr_list[[scale]]) ) {
constr_list[[scale]] <- "lower=0"
assign( "constraints" , constr_list , envir=parent.frame() )
}

return(k);
},
vectorized = TRUE
Expand All @@ -773,9 +810,17 @@ map2stan.templates <- list(
out_type = "int",
par_map = function(k,...) {
mu_name <- k[[1]];
scale_name <- k[[2]];
scale_name <- as.character(k[[2]]);
k[[1]] <- concat(mu_name);
k[[2]] <- concat(scale_name);

# need to coerce bounded constraints on scale parameter
constr_list <- get( "constraints" , envir=parent.frame() )
if ( is.null(constr_list[[scale_name]]) ) {
constr_list[[scale_name]] <- "lower=0"
assign( "constraints" , constr_list , envir=parent.frame() )
}

return(k);
},
vectorized = TRUE
Expand Down Expand Up @@ -808,6 +853,14 @@ dev <- dev + (-2)*(bernoulli_log(0,PAR1) + gamma_log(OUTCOME,PAR2,PAR3));",
scale <- as.character(k[[3]])
k[[2]] <- concat( mu , "[i]/" , scale )
k[[3]] <- concat( "1/" , scale )

# need to coerce bounded constraints on scale parameter
constr_list <- get( "constraints" , envir=parent.frame() )
if ( is.null(constr_list[[scale]]) ) {
constr_list[[scale]] <- "lower=0"
assign( "constraints" , constr_list , envir=parent.frame() )
}

return(k);
},
vectorized = FALSE
Expand Down
28 changes: 25 additions & 3 deletions R/map2stan.r
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,9 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
d[[ missingness_name ]] <- var_missingness
# flag as used
fp[['used_predictors']][[missingness_name]] <- list( var=missingness_name , N=N_missing_name , type="int" )
# trap for only 1 missing value and vectorization issues
if ( length(var_missingness)==1 )
fp[['used_predictors']][[missingness_name]] <- list( var=missingness_name , type="index" )
# replace variable with missing values
d[[undot(lik$outcome)]] <- var_temp

Expand Down Expand Up @@ -614,7 +617,7 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
flag_mvprior <- FALSE
if ( length(RHS)>1 ) {
fname <- as.character( RHS[[1]] )
if ( fname %in% c("dmvnorm","dmvnorm2","normal","multi_normal") )
if ( fname %in% c("dmvnorm","dmvnorm2","multi_normal") )
flag_mvprior <- TRUE
}
if ( flag_mvprior==FALSE ) {
Expand Down Expand Up @@ -775,7 +778,16 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
if ( class(prior$par_out)!="character" )
prior$par_out <- deparse(prior$par_out)
parstxt <- paste( klist , collapse=" , " )
txt <- concat( indent , prior$par_out , " ~ " , prior$density , "( " , parstxt , " )" , prior$T_text , ";" )

# check for special formatting of left hand side
# also handle some constraints here
lhstxt <- prior$par_out
if ( !is.null(tmplt$out_map) ) {
lhstxt <- tmplt$out_map( prior$par_out , prior$pars_in , environment() )
}

# build text
txt <- concat( indent , lhstxt , " ~ " , prior$density , "( " , parstxt , " )" , prior$T_text , ";" )

#m_model_priors <- concat( m_model_priors , txt , "\n" )
m_model_txt <- concat( m_model_txt , txt , "\n" )
Expand Down Expand Up @@ -1054,15 +1066,25 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l

# imputation stuff
if ( outcome %in% names(impute_bank) ) {
N_miss <- impute_bank[[lik$outcome]]$N_miss
# add _merge suffix
outcome <- concat(outcome,suffix_merge)
# build code in transformed parameters that constructs x_merge
m_tpars1 <- concat( m_tpars1 , indent , "real " , outcome , "[N];\n" )
m_tpars2 <- concat( m_tpars2 , indent , outcome , " <- " , lik$outcome , ";\n" )
m_tpars2 <- concat( m_tpars2 , indent , "for ( u in 1:" , lik$N_name , "_missing ) " , outcome , "[" , lik$outcome , "_missingness[u]] <- " , lik$outcome , "_impute[u]" , ";\n" )
# trap for single missing value and vectorization issues
if ( N_miss>1 )
m_tpars2 <- concat( m_tpars2 , indent , "for ( u in 1:" , lik$N_name , "_missing ) " , outcome , "[" , lik$outcome , "_missingness[u]] <- " , lik$outcome , "_impute[u]" , ";\n" )
else
m_tpars2 <- concat( m_tpars2 , indent , outcome , "[" , lik$outcome , "_missingness] <- " , lik$outcome , "_impute" , ";\n" )
# add x_impute to start list
imputer_name <- concat(lik$outcome,"_impute")
start[[ imputer_name ]] <- rep( impute_bank[[lik$outcome]]$init , impute_bank[[lik$outcome]]$N_miss )
if ( N_miss>1 )
types[[ imputer_name ]] <- c("vector", concat("[",lik$N_name,"_missing]") )
else
# only one missing value, so cannot use vector
types[[ imputer_name ]] <- "real"
# make sure x_impute is in pars
#pars[[ imputer_name ]] <- imputer_name
}
Expand Down
17 changes: 17 additions & 0 deletions R/zDIC-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,20 @@ function( object , ... ) {
return(val)
}
)

# function that takes mean of each parameter in a named list of samples
postmeans <- function(post) {
Epost <- list()
for ( i in 1:length(post) ) {
dims <- length( dim( post[[i]] ) )
name <- names(post)[i]
if ( name!="lp__" & name!="dev" ) {
if ( dims==1 ) {
Epost[[ name ]] <- mean( post[[i]] )
} else {
Epost[[ name ]] <- apply( post[[i]] , 2:dims , mean )
}
}
}#i
return(Epost)
}
10 changes: 5 additions & 5 deletions R/zWAIC-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,18 @@ function( object , n=0 , refresh=0.1 , pointwise=FALSE , ... ) {
if ( pointwise==FALSE ) return( old_waic )
}
}

# compute linear model values at each sample
if ( refresh > 0 ) message("Constructing posterior predictions")
lm_vals <- link( object , n=n , refresh=refresh , flatten=FALSE )


# extract samples --- will need for inline parameters e.g. sigma in likelihood
post <- extract.samples( object )

n_samples <- dim( post[[1]] )[1]
if ( n == 0 ) n <- n_samples # special flag for all samples in fit
#if ( n_samples < n ) n <- n_samples

# compute linear model values at each sample
if ( refresh > 0 ) message("Constructing posterior predictions")
lm_vals <- link( object , n=n , refresh=refresh , flatten=FALSE )

# compute log-lik at each sample
liks <- object@formula_parsed$likelihood
n_lik <- length( liks )
Expand Down
11 changes: 6 additions & 5 deletions R/z_link-map2stan.r
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TR

# get samples from Stan fit
if ( missing(post) )
post <- extract.samples(fit)
post <- extract.samples(fit,n=n)

# check n and truncate accordingly
# if ( n==0 )
Expand Down Expand Up @@ -134,9 +134,10 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TR
name_original <- names(fit@formula_parsed$impute_bank)[kk]
name_merge <- concat( name_original , "_merge" )
# _merge name should already be in linear model
#rhs0 <- gsub( name_merge , concat(name_merge,"[i,]") , rhs0 , fixed=TRUE )
#rhs0 <- gsub( name_original , name_merge , rhs0 , fixed=TRUE )
# construct merged matrix in posterior
# this "parameter" in constant in columns for observed
# this "parameter" is constant in columns for observed
# but has samples in columns for imputed
n_cases <- length( data[[ name_original ]] )
var_merged <- matrix( NA , nrow=n , ncol=n_cases )
Expand Down Expand Up @@ -176,8 +177,8 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TR
}

# build inits
for ( j in 1:length(fit@pars) ) {
par_name <- fit@pars[ j ]
for ( j in 1:length(post) ) {
par_name <- names(post)[ j ]
dims <- dim( post[[par_name]] )
# scalar
if ( length(dims)==1 ) init[[par_name]] <- post[[par_name]][i]
Expand All @@ -194,7 +195,7 @@ function( fit , data , n=1000 , post , refresh=0.1 , replace=list() , flatten=TR
# ready environment
e <- list( as.list(data) , as.list(init) )
e <- unlist( e , recursive=FALSE )

# evaluate
r <- eval(parse(text=rhs[[k]]),envir=e)
flink <- fit@formula_parsed$lm[[k]]$link
Expand Down
5 changes: 3 additions & 2 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
theLib <- dirname(system.file(package = "rethinking"))
pkgdesc <- packageDescription("rethinking", lib.loc = theLib)
builddate <- gsub(';.*$', '', pkgdesc$Packaged)
packageStartupMessage(paste("rethinking (Version ", pkgdesc$Version, ", packaged: ", builddate, ")", sep = ""))
}
msg <- paste("rethinking (Version ", pkgdesc$Version, ")", sep = "")
packageStartupMessage(msg)
}
33 changes: 28 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
rethinking
==========

This R package accompanies a course and book on Bayesian data analysis. It contains tools for conducting both MAP estimation and Hamiltonian Monte Carlo (through RStan - mc-stan.org). These tools force the user to specify the model as a list of explicit distributional assumptions. This is more tedious than typical formula-based tools, but it is also much more flexible and powerful.
This R package accompanies a course and book on Bayesian data analysis (McElreath 2016. Statistical Rethinking. CRC Press.). It contains tools for conducting both MAP estimation and Hamiltonian Monte Carlo (through RStan - mc-stan.org). These tools force the user to specify the model as a list of explicit distributional assumptions. This is more tedious than typical formula-based tools, but it is also much more flexible and powerful.

For example, a simple Gaussian model could be specified with this list of formulas:

Expand All @@ -15,11 +15,32 @@ f <- alist(

The first formula in the list is the likelihood; the second is the prior for ``mu``; the third is the prior for ``sigma`` (implicitly a half-Cauchy, due to positive constraint on ``sigma``).

## Quick Installation

You can find a manual with expanded installation and usage instructions here: ``http://xcelab.net/rm/software/``

Here's the brief verison.

You'll need to install ``rstan`` first. Go to ``http://mc-stan.org`` and follow the instructions for your platform. Then you can install ``rethinking`` from within R using:
```
install.packages(c("coda","mvtnorm","devtools"))
library(devtools)
devtools::install_github("rmcelreath/rethinking")
```
If there are any problems, they likely arise when trying to install ``rstan``, so the ``rethinking`` package has nothing to do with it. See the manual linked above for some hints about getting ``rstan`` installed.

## MAP estimation

Then to use maximum a posteriori (MAP) fitting:
To use maximum a posteriori (MAP) fitting:
```
library(rethinking)
f <- alist(
y ~ dnorm( mu , sigma ),
mu ~ dnorm( 0 , 10 ),
sigma ~ dcauchy( 0 , 1 )
)
fit <- map(
f ,
data=list(y=c(-1,1)) ,
Expand Down Expand Up @@ -120,7 +141,8 @@ f3 <- alist(

## Nice covariance priors

And ``map2stan`` supports decomposition of covariance matrices into vectors of standard deviations and a correlation matrix, such that priors can be specified independently for each:
The ``inv_wishart`` prior in the model just above is conventional, but not appealing. Since Stan does not use Gibbs sampling, there is no advantage to the ``inv_wishart`` prior.
To escape these conventional priors, ``map2stan`` supports decomposition of covariance matrices into vectors of standard deviations and a correlation matrix, such that priors can be specified independently for each:
```
f4 <- alist(
y ~ dnorm( mu , sigma ),
Expand All @@ -134,6 +156,7 @@ f4 <- alist(
)
```


# Non-centered parameterization
Here is a non-centered parameterization that moves the scale parameters in the varying effects prior to the linear model, which is often more efficient for sampling:
```
Expand Down Expand Up @@ -164,7 +187,7 @@ f4nc <- alist(
Rho_group ~ dlkjcorr(2)
)
```
Internally, a Cholesky factor ``L_Rho_group`` is used to perform sampling.
Internally, a Cholesky factor ``L_Rho_group`` is used to perform sampling. It will appear in the returned samples, in addition to ``Rho_group``, which is constructed from it.

## Semi-automated Bayesian imputation

Expand All @@ -190,7 +213,7 @@ f5 <- alist(
)
m5 <- map2stan( f5 , data=list(y=y,x=x) )
```
What ``map2stan`` does is notice the missing values, see the distribution assigned to the variable with the missing values, build the Stan code that uses a mix of observed and estimated ``x`` values in the regression. See the ``stancode(m)`` for details of the implementation.
What ``map2stan`` does is notice the missing values, see the distribution assigned to the variable with the missing values, build the Stan code that uses a mix of observed and estimated ``x`` values in the regression. See the ``stancode(m5)`` for details of the implementation.

## Gaussian process

Expand Down
2 changes: 1 addition & 1 deletion man/HPDI.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ PI( samples , prob=0.95 )
}
\value{
}
\references{McElreath 2011, Statistical Rethinking.}
\references{}
\author{Richard McElreath}
\seealso{\code{\link{HPDinterval}}}
\examples{
Expand Down
Loading

0 comments on commit a6631ad

Please sign in to comment.