Skip to content

Commit

Permalink
v1.81 - minor notes
Browse files Browse the repository at this point in the history
- Some updates in progress of revising LOO/WAIC methods.
- Added a cross-validation function for quap fits. see cv_quap
- helper functions for ulam growing closer to feature complete
  • Loading branch information
Richard McElreath committed Jan 1, 2019
1 parent e627f5d commit ff4c6f1
Show file tree
Hide file tree
Showing 13 changed files with 307 additions and 22 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: rethinking
Type: Package
Title: Statistical Rethinking book package
Version: 1.80
Date: 2018-10-23
Version: 1.81
Date: 2019-01-01
Author: Richard McElreath
Maintainer: Richard McElreath <[email protected]>
Imports: coda, MASS, mvtnorm, loo
Expand Down
82 changes: 82 additions & 0 deletions R/HMC2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# SIMPLE IMPLEMENTATION OF HAMILTONIAN MONTE CARLO.
#
# Modified slightly based on code by
# Radford M. Neal, 2010.
#
# This original appears in Figure 2 of "MCMC using Hamiltonian dynamics",
# the Handbook of Markov Chain Monte Carlo.
#
# The arguments to the HMC function are as follows:
#
# U A function to evaluate minus the log of the density of the
# distribution to be sampled, plus any constant - ie, the
# "potential energy".
#
# grad_U A function to evaluate the gradient of U.
#
# epsilon The stepsize to use for the leapfrog steps.
#
# L The number of leapfrog steps to do to propose a new state.
#
# current_q The current state (position variables only).
#
# Momentum variables are sampled from independent standard normal
# distributions within this function. The value return is the vector
# of new position variables (equal to current_q if the endpoint of the
# trajectory was rejected).

# this modified version returns trajectories for q,p
# ... for arguments to pass to U and grad_U
# returns H for monitoring divergences
HMC2 <- function (U, grad_U, epsilon, L, current_q , ... ) {
q = current_q
p = rnorm(length(q),0,1) # independent standard normal variates
current_p = p
# Make a half step for momentum at the beginning
p = p - epsilon * grad_U( q , ... ) / 2
# Alternate full steps for position and momentum
qtraj <- matrix(NA,nrow=L+1,ncol=length(q))
ptraj <- qtraj
qtraj[1,] <- current_q
ptraj[1,] <- p
for (i in 1:L)
{
# Make a full step for the position
q = q + epsilon * p
# Make a full step for the momentum, except at end of trajectory
if (i!=L) {
p = p - epsilon * grad_U(q,...)
ptraj[i+1,] <- p
}
qtraj[i+1,] <- q
}
# Make a half step for momentum at the end.
p = p - epsilon * grad_U(q,...) / 2
ptraj[L+1,] <- p
# Negate momentum at end of trajectory to make the proposal symmetric
p = -p
# Evaluate potential and kinetic energies at start and end of trajectory
current_U = U(current_q,...)
current_K = sum(current_p^2) / 2
proposed_U = U(q,...)
proposed_K = sum(p^2) / 2
H0 <- current_U + current_K
H1 <- proposed_U + proposed_K
# Accept or reject the state at end of trajectory, returning either
# the position at the end of the trajectory or the initial position
new_q <- q
accept <- 0
if (runif(1) < exp(current_U-proposed_U+current_K-proposed_K)) {
new_q <- q # accept
accept <- 1
} else
new_q <- current_q # reject

return(
list(
q = new_q,
traj = qtraj,
ptraj = ptraj,
accept = accept ,
dH = H1 - H0 ) )
}
4 changes: 2 additions & 2 deletions R/coeftab.r
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ coeftab_show <- function( object ) {
}
setMethod( "show" , "coeftab" , function(object) coeftab_show(object) )

coeftab_plot <- function( x , y , pars , col.ci="black" , by.model=FALSE , prob=0.95 , ... ) {
coeftab_plot <- function( x , y , pars , col.ci="black" , by.model=FALSE , prob=0.95 , xlab="Value" , ... ) {
x.orig <- x
xse <- x@se
x <- x@coefs
Expand All @@ -41,7 +41,7 @@ coeftab_plot <- function( x , y , pars , col.ci="black" , by.model=FALSE , prob=

llim <- min(left,na.rm=TRUE)
rlim <- max(right,na.rm=TRUE)
dotchart( x , xlab="Estimate" , xlim=c(llim,rlim) , ... )
dotchart( x , xlab=xlab , xlim=c(llim,rlim) , ... )

for ( k in 1:nrow(x) ) {
for ( m in 1:ncol(x) ) {
Expand Down
2 changes: 1 addition & 1 deletion R/map2stan-templates.r
Original file line number Diff line number Diff line change
Expand Up @@ -971,7 +971,7 @@ dev <- dev + (-2)*(log_sum_exp(bernoulli_lpmf(1|PAR1),
bernoulli_lpmf(0|PAR1) + binomial_lpmf(OUTCOME|PAR2,PAR3)));
else
dev <- dev + (-2)*(bernoulli_lpmf(0|PAR1) + binomial_lpmf(OUTCOME|PAR2,PAR3));",
num_pars = 2,
num_pars = 3,
par_names = c("prob1","size","prob2"),
par_bounds = c("",""),
par_types = c("real","int","real"),
Expand Down
7 changes: 6 additions & 1 deletion R/map2stan.r
Original file line number Diff line number Diff line change
Expand Up @@ -1339,13 +1339,18 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
code_gq <- gsub( "OUTCOME" , outcome , code_gq , fixed=TRUE )
for ( j in 1:length(parstxt_L) ) {
parname <- as.character(parstxt_L[[j]])
if ( parname %in% names(d) ) {
# add [i]
parname <- concat( parname , "[i]" )
}
parpat <- concat( "PAR" , j )
code_model <- gsub( parpat , parname , code_model , fixed=TRUE )
code_gq <- gsub( parpat , parname , code_gq , fixed=TRUE )
}
# insert into Stan code
m_model_txt <- concat( m_model_txt , indent , code_model , "\n" )
m_gq <- concat( m_gq , indent , code_gq , "\n" )
if ( DIC==TRUE )
m_gq <- concat( m_gq , indent , code_gq , "\n" )

} else {

Expand Down
66 changes: 66 additions & 0 deletions R/sim_train_test.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,70 @@

# new version --- computes LOOIC, LOOCV
sim_train_test <- function (N = 20, k = 3, rho = c(0.15, -0.4), b_sigma = 100, WAIC = FALSE, LOOCV=FALSE , LOOIC=FALSE , cv.cores=1 , return_model=FALSE )
{
n_dim <- 1 + length(rho)
if (n_dim < k)
n_dim <- k
Rho <- diag(n_dim)
for (i in 1:length(rho)) {
Rho[1, i + 1] <- rho[i]
}
Rho[lower.tri(Rho)] <- Rho[upper.tri(Rho)]
X.train <- mvrnorm(n = N, mu = rep(0, n_dim), Sigma = Rho)
X.test <- mvrnorm(n = N, mu = rep(0, n_dim), Sigma = Rho)
mm.train <- matrix(1, nrow = N, ncol = 1)
bnames <- "a"
if (k > 1) {
mm.train <- cbind(mm.train, X.train[, 2:k])
bnames <- c("a", paste("b", 1:(k - 1), sep = ""))
pnames <- paste("b", 1:(k - 1), sep = "")
P <- paste("c(", paste(pnames, collapse = ","), ")",
sep = "", collapse = "")
Pf <- paste(P, "~ dnorm(0,", b_sigma, ")")
}
B <- paste("c(", paste(bnames, collapse = ","), ")", sep = "",
collapse = "")
d <- list(y = X.train[, 1], mm = mm.train, Bvec = B)
flist <- list(y ~ dnorm(mu, 1), mu ~ 0 + mm %*% eval(parse(text = Bvec)))
start.list <- list(a = 0)
if (k > 1) {
flist[[3]] <- eval(parse(text = Pf))
for (i in 2:k) {
ptext <- paste("b", i - 1, sep = "", collapse = "")
start.list[[i]] <- 0
names(start.list)[i] <- ptext
}
}
m <- quap(flist, data = d, start = start.list)
dev.train <- (-2)*sum(lppd(m))
mm.test <- matrix(1, nrow = N, ncol = 1)
if (k > 1)
mm.test <- cbind(mm.test, X.test[, 2:k])
d_test <- list( y=X.test[, 1] , mm=mm.test , Bvec = B )
dev.test <- (-2)*sum(lppd(m,data=d_test))
#dev.test <- sum(dnorm(X.test[, 1], mm.test %*% coef(m), 1, TRUE))
result <- c(dev.train, dev.test)
if (WAIC == TRUE) {
wx <- WAIC(m)
result <- c( result , wx , abs(wx-dev.test) )
}
if (LOOIC == TRUE) {
lx <- LOO(m)
result <- c( result , lx , abs(lx-dev.test) )
}
if (LOOCV == TRUE) {
# compute explicit leave-one-out cross-validation - expensive
cvx <- (-2)*cv_quap( m , start=start.list , cores=cv.cores )
result <- c( result , cvx , abs(cvx-dev.test) )
}
if ( return_model==TRUE ) {
result <- list( model=m , result=result )
}
return(result)
}


# old version from first edition
sim.train.test <- function( N=20 , k=3 , rho=c(0.15,-0.4) , b_sigma=100 , DIC=FALSE , WAIC=FALSE, devbar=FALSE , devbarout=FALSE ) {
#require(MASS)
n_dim <- 1+length(rho)
Expand Down
55 changes: 49 additions & 6 deletions R/ulam.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@
# allow explicit declarations
# see tests at end of this file for examples

ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 , iter=1000 , control=list(adapt_delta=0.95) , distribution_library=ulam_dists , macro_library=ulam_macros , custom , declare_all_data=TRUE , log_lik=FALSE , sample=TRUE , messages=TRUE , ... ) {
ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 , iter=1000 , control=list(adapt_delta=0.95) , distribution_library=ulam_dists , macro_library=ulam_macros , custom , declare_all_data=TRUE , log_lik=FALSE , sample=TRUE , messages=TRUE , pre_scan_data=TRUE , ... ) {

data <- as.list(data)

if ( pre_scan_data==TRUE ) {
# pre-scan for index variables (integer) that are numeric by accident
for ( i in 1:length(data) ) {
if ( all( as.integer(data[[i]])==data[[i]] ) )
data[[i]] <- as.integer(data[[i]])
}
}

########################################
# empty Stan code
m_funcs <- "" # functions
Expand Down Expand Up @@ -212,6 +220,7 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
declare_templates <- list(
real = list( patt="real<CONSTRAINT> NAME[DIM1]" , dims=1 ),
vector = list( patt="vector<CONSTRAINT>[DIM1] NAME[DIM2]" , dims=2 ),
row_vector = list( patt="row_vector<CONSTRAINT>[DIM1] NAME[DIM2]" , dims=2 ),
matrix = list( patt="matrix<CONSTRAINT>[DIM1,DIM2] NAME[DIM3]" , dims=3 ),
int = list( patt="int<CONSTRAINT> NAME[DIM1]" , dims=1 ),
corr_matrix = list( patt="corr_matrix<CONSTRAINT>[DIM1] NAME[DIM2]" , dims=2 ),
Expand All @@ -227,7 +236,7 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
symbol_list$dims <- list( symbol_list$dims , 1 )

if ( class( symbol_list$dims ) == "name" ) {
# a grouping variable, so need to fetch its UNIQUE length
# has a grouping variable, so need to fetch its UNIQUE length
ul <- length( unique( data[[ as.character(symbol_list$dims) ]] ) )
symbol_list$dims <- list( "vector" , ul )
}
Expand All @@ -251,9 +260,10 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
d <- symbol_list$dims[[i+1]]
if ( !is.null( data[[ as.character(d) ]] ) ) {
# either a grouping variable OR an index length (scalar)
if ( length( data[[ as.character(d) ]] ) > 1 )
if ( length( data[[ as.character(d) ]] ) > 1 ) {
# a grouping variable, so count number of unique values
d <- length(unique( data[[ as.character(d) ]] ))
}
# if an index, don't need to do anything, converted to text later
}
dd <- as.character( d )
Expand Down Expand Up @@ -344,6 +354,9 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
# compose local assignment (usually a linear model) for model block
# need to observe need for loop over assignment and insert [i] after the data symbols on right-hand side
# when assignment is <<- instead then no loop (vectorized assignment)
# need to check also for some operator conversions R -> Stan:
# %*% -> *
# can do this in text as last step? not elegant.
the_dims <- symbols[[symbol]]$dims
N <- NA
if ( class(the_dims)=="character" ) {
Expand Down Expand Up @@ -429,6 +442,14 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
out <- concat( loop_txt , out , indent , "}\n" )
}

# finally check for any function conversions R -> Stan
R_to_Stan_f <- list(
" %*% " = " * " ,
" as.vector(" = " to_vector("
)
for ( j in 1:length(R_to_Stan_f) )
out <- gsub( names(R_to_Stan_f)[j] , R_to_Stan_f[[j]] , out , fixed=TRUE )

# all done
return( out )
}
Expand Down Expand Up @@ -917,6 +938,10 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
if ( symbols[[i]]$type=="data" ) {
Z <- compose_declaration( names(symbols)[i] , symbols[[i]] )
m_data <- concat( m_data , indent , Z , ";\n" )
# check for type compatibility on R and Stan sides
stan_type <- symbols[[i]]$dims
if ( stan_type[[1]]=="int" )
data[[ names(symbols)[i] ]] <- as.integer( data[[ names(symbols)[i] ]] )
}
if ( symbols[[i]]$type=="par" ) {
Z <- compose_declaration( names(symbols)[i] , symbols[[i]] )
Expand Down Expand Up @@ -1102,7 +1127,11 @@ link_ulam <- function( fit , data , post , simplify=TRUE , symbols , ... ) {
return( f )
}

if ( missing(data) ) data <- fit@data
use_orig_data <- FALSE
if ( missing(data) ) {
data <- fit@data
use_orig_data <- TRUE
}
if ( missing(post) ) post <- extract.samples(fit@stanfit)

# how many link functions?
Expand All @@ -1124,7 +1153,11 @@ link_ulam <- function( fit , data , post , simplify=TRUE , symbols , ... ) {
if ( !(names( fit@formula_parsed$link_funcs )[j] %in% symbols) ) next

# get number of cases for this symbol
n_cases <- fit@formula_parsed$link_funcs[[ j ]]$N
if ( use_orig_data==TRUE )
n_cases <- fit@formula_parsed$link_funcs[[ j ]]$N
else
# guess from length of data
n_cases <- length(data[[1]])

# check whether this function contains symbols that need some dimensions added for samples
symbols_so_far <- names(out)
Expand Down Expand Up @@ -1283,5 +1316,15 @@ sim_ulam <- function( fit , data , post , variable , n=1000 , replace=list() , .
return(sim_out)
}


setMethod("nobs", "ulam", function (object, ...) {
z <- attr(object,"nobs")
if ( is.null(z) ) {
# try to get nobs from a link function
link_funcs <- object@formula_parsed$link_funcs
if ( length(link_funcs)>0 ) {
z <- link_funcs[[1]]$N
}
}
return(z)
} )

Loading

0 comments on commit ff4c6f1

Please sign in to comment.