Skip to content

Commit

Permalink
1.92 test test
Browse files Browse the repository at this point in the history
- adding tests for book code
- WAIC/PSIS refactored to return matrices
- added Laffer data example
- sim() method for ulam() refactored - still needs some tests
  • Loading branch information
Richard McElreath committed Nov 11, 2019
1 parent 2e01a81 commit f1a47a3
Show file tree
Hide file tree
Showing 27 changed files with 1,705 additions and 158 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
Package: rethinking
Type: Package
Title: Statistical Rethinking book package
Version: 1.91
Version: 1.92
Date: 2019-10-25
Author: Richard McElreath
Maintainer: Richard McElreath <[email protected]>
Imports: coda, MASS, mvtnorm, loo, shape
Depends: rstan (>= 2.10.0), parallel, methods, stats, graphics, dagitty
Suggests: testthat
Description: Utilities for fitting and comparing models
License: GPL (>= 3)
11 changes: 11 additions & 0 deletions R/ICnumeric_class.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# IC class and show method

setClass( "ICnumeric" , contains="numeric" )

setMethod( "show" , "ICnumeric" , function(object) {
cat( round( object , 1 ) )
if ( !is.null( attr(object,"se") ) ) {
cat( concat( " ±" , round( attr(object,"se") , 1 ) ) )
}
cat("\n")
} )
49 changes: 17 additions & 32 deletions R/compare.r
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ setMethod( "show" , "compareIC" , function(object) {
} )

# new compare function, defaulting to WAIC
compare <- function( ... , n=1e3 , sort="WAIC" , func=WAIC , WAIC=TRUE , refresh=0 ) {
compare <- function( ... , n=1e3 , sort="WAIC" , func=WAIC , WAIC=TRUE , refresh=0 , warn=TRUE ) {

# retrieve list of models
L <- list(...)
Expand All @@ -32,65 +32,48 @@ compare <- function( ... , n=1e3 , sort="WAIC" , func=WAIC , WAIC=TRUE , refresh

# check class of fit models and warn when more than one class represented
classes <- as.character(sapply( L , class ))
if ( any(classes!=classes[1]) ) {
if ( any(classes!=classes[1]) & warn==TRUE ) {
warning("Not all model fits of same class.\nThis is usually a bad idea, because it implies they were fit by different algorithms.\nCheck yourself, before you wreck yourself.")
}

# check nobs for all models
# if different, warn
nobs_list <- try( sapply( L , nobs ) )
if ( any(nobs_list != nobs_list[1]) ) {
if ( any(nobs_list != nobs_list[1]) & warn==TRUE ) {
nobs_out <- paste( mnames , nobs_list , "\n" )
nobs_out <- concat(nobs_out)
warning(concat(
"Different numbers of observations found for at least two models.\nInformation criteria only valid for comparing models fit to exactly same observations.\nNumber of observations for each model:\n",nobs_out))
"Different numbers of observations found for at least two models.\nModel comparison is valid only for models fit to exactly the same observations.\nNumber of observations for each model:\n",nobs_out))
}

dSE.matrix <- matrix( NA , nrow=length(L) , ncol=length(L) )
# deprecate WAIC==TRUE/FALSE flag
# catch it and convert to func intent
if ( WAIC==FALSE ) func <- DIC # assume old code that wants DIC

if ( the_func=="DIC" ) {
IC.list <- lapply( L , function(z) DIC( z , n=n ) )
p.list <- sapply( IC.list , function(x) attr(x,"pD") )
}
if ( the_func=="WAIC" ) {
# use WAIC
# WAIC is processed pointwise, so can compute SE of differences, and summed later
WAIC.list <- lapply( L , function(z) WAIC( z , n=n , refresh=refresh , pointwise=TRUE ) )
p.list <- sapply( WAIC.list , function(x) sum(attr(x,"pWAIC")) )
se.list <- sapply( WAIC.list , function(x) attr(x,"se") )
IC.list <- sapply( WAIC.list , sum )
# compute SE of differences between adjacent models from top to bottom in ranking
colnames(dSE.matrix) <- mnames
rownames(dSE.matrix) <- mnames
for ( i in 1:(length(L)-1) ) {
for ( j in (i+1):length(L) ) {
waic_ptw1 <- WAIC.list[[i]]
waic_ptw2 <- WAIC.list[[j]]
dSE.matrix[i,j] <- as.numeric( sqrt( length(waic_ptw1)*var( waic_ptw1 - waic_ptw2 ) ) )
dSE.matrix[j,i] <- dSE.matrix[i,j]
}#j
}#i
}
if ( the_func=="LOO" | the_func=="PSIS" ) {
# use LOO
LOO.list <- lapply( L , function(z) PSIS( z , n=n , refresh=refresh , pointwise=TRUE ) )
p.list <- sapply( LOO.list , function(x) sum(attr(x,"pPSIS")) )
se.list <- sapply( LOO.list , function(x) attr(x,"se") )
IC.list <- sapply( LOO.list , sum )

if ( the_func %in% c("WAIC","PSIS","LOO") ) {
IC.list.pw <- lapply( L , function(z) do.call( the_func , list( z , n=n , refresh=refresh , pointwise=TRUE , warn=warn ) ) )
p.list <- sapply( IC.list.pw , function(x) sum( x$penalty ) )
se.list <- sapply( IC.list.pw , function(x) x$std_err[1] )
IC.list <- sapply( IC.list.pw , function(x) sum( x[[1]] ) )
# compute SE of differences between adjacent models from top to bottom in ranking
colnames(dSE.matrix) <- mnames
rownames(dSE.matrix) <- mnames
for ( i in 1:(length(L)-1) ) {
for ( j in (i+1):length(L) ) {
loo_ptw1 <- LOO.list[[i]]
loo_ptw2 <- LOO.list[[j]]
dSE.matrix[i,j] <- as.numeric( sqrt( length(loo_ptw1)*var( loo_ptw1 - loo_ptw2 ) ) )
ic_ptw1 <- IC.list.pw[[i]][[1]]
ic_ptw2 <- IC.list.pw[[j]][[1]]
dSE.matrix[i,j] <- as.numeric( sqrt( length(ic_ptw1)*var( ic_ptw1 - ic_ptw2 ) ) )
dSE.matrix[j,i] <- dSE.matrix[i,j]
}#j
}#i
}

if ( !(the_func %in% c("DIC","WAIC","LOO","PSIS")) ) {
# unrecognized IC function; just wing it
IC.list <- lapply( L , function(z) func( z ) )
Expand All @@ -103,6 +86,7 @@ compare <- function( ... , n=1e3 , sort="WAIC" , func=WAIC , WAIC=TRUE , refresh

if ( the_func=="DIC" )
result <- data.frame( DIC=IC.list , pD=p.list , dDIC=dIC , weight=w.IC )

if ( the_func=="WAIC" ) {
# find out which model has dWAIC==0
topm <- which( dIC==0 )
Expand All @@ -116,6 +100,7 @@ compare <- function( ... , n=1e3 , sort="WAIC" , func=WAIC , WAIC=TRUE , refresh
result <- data.frame( PSIS=IC.list , pPSIS=p.list , dPSIS=dIC ,
weight=w.IC , SE=se.list , dSE=dSEcol )
}

if ( !(the_func %in% c("DIC","WAIC","LOO","PSIS")) ) {
result <- data.frame( IC=IC.list , dIC=dIC , weight=w.IC )
}
Expand Down
104 changes: 57 additions & 47 deletions R/sim.r
Original file line number Diff line number Diff line change
Expand Up @@ -6,55 +6,11 @@ function( fit , data , n=1000 , ... ) {
}
)

setMethod("sim", "map",
function( fit , data , n=1000 , post , vars , ll=FALSE , refresh=0 , replace=list() , debug=FALSE , ... ) {
# when ll=FALSE, simulates sampling, one sample for each sample in posterior
# when ll=TRUE, computes loglik of each observation, for each sample in posterior

# vars argument optional
# when missing, simulations only first observed variable in formula
# when present, should be a character vector naming variables to simulate, in order to simulate them
# order matters, because some may be needed for the others to simulate right

########################################
# check arguments
if ( class(fit)!="map" ) stop("Requires map/quap fit")
if ( missing(data) ) {
data <- fit@data
} else {
# make sure it's a list, otherwise can't hold sample matrices from sims
data <- as.list(data)
# check for vars to sim that are not in data list
if ( !missing(vars) ) {
for ( i in 1:length(vars) ) {
if ( !(vars[i] %in% names(data)) ) {
# insert dummy for var - will get simulated over later
data[[ vars[i] ]] <- rep( 0 , length(data[[1]]) )
}
}#i
}
}

if ( missing(post) ) {
post <- extract.samples(fit,n=n)
} else {
n <- dim(post[[1]])[1]
if ( is.null(n) ) n <- length(post[[1]])
}

# variables to sim
if ( missing(vars) ) {
# extract likelihood, assuming it is first element of formula
lik <- flist_untag(fit@formula)[[1]]
# discover outcome
vars <- as.character(lik[[2]])
} else {
# vars listed
if ( debug==TRUE ) print(vars)
}
# function at heart of all sim methods
sim_core <- function( fit , data , post , vars , n , refresh=0 , replace=list() , debug=FALSE , ll=FALSE , ... ) {

# loop over vars
sim_vars <- list()

for ( var in vars ) {

# find a distribtional assignment with var on left
Expand Down Expand Up @@ -206,6 +162,60 @@ function( fit , data , n=1000 , post , vars , ll=FALSE , refresh=0 , replace=lis
data[[ var ]] <- sim_out # make available to subsequent vars

}#var

return(sim_vars)

}

setMethod("sim", "map",
function( fit , data , n=1000 , post , vars , ll=FALSE , refresh=0 , replace=list() , debug=FALSE , ... ) {
# when ll=FALSE, simulates sampling, one sample for each sample in posterior
# when ll=TRUE, computes loglik of each observation, for each sample in posterior

# vars argument optional
# when missing, simulations only first observed variable in formula
# when present, should be a character vector naming variables to simulate, in order to simulate them
# order matters, because some may be needed for the others to simulate right

########################################
# check arguments
if ( class(fit)!="map" ) stop("Requires map/quap fit")
if ( missing(data) ) {
data <- fit@data
} else {
# make sure it's a list, otherwise can't hold sample matrices from sims
data <- as.list(data)
# check for vars to sim that are not in data list
if ( !missing(vars) ) {
for ( i in 1:length(vars) ) {
if ( !(vars[i] %in% names(data)) ) {
# insert dummy for var - will get simulated over later
data[[ vars[i] ]] <- rep( 0 , length(data[[1]]) )
}
}#i
}
}

if ( missing(post) ) {
post <- extract.samples(fit,n=n)
} else {
n <- dim(post[[1]])[1]
if ( is.null(n) ) n <- length(post[[1]])
}

# variables to sim
if ( missing(vars) ) {
# extract likelihood, assuming it is first element of formula
lik <- flist_untag(fit@formula)[[1]]
# discover outcome
vars <- as.character(lik[[2]])
} else {
# vars listed
if ( debug==TRUE ) print(vars)
}

# loop over vars
sim_vars <- sim_core( fit=fit , data=data , post=post , vars=vars , n=n , refresh=refresh , replace=replace , debug=debug , ll=ll , ... )

# result
if ( length(sim_vars)==1 ) sim_vars <- sim_vars[[1]]
Expand Down
12 changes: 10 additions & 2 deletions R/ulam-function.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,17 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
# pre-scan for character variables and remove
x_to_remove <- c()
for ( i in 1:length(data) ) {
if ( class(data[[i]]) %in% c("character","factor") ) {
if ( class(data[[i]]) =="character" ) {
x_to_remove <- c( x_to_remove , names(data)[i] )
}
if ( class(data[[i]]) =="factor" ) {
if ( coerce_int==FALSE )
x_to_remove <- c( x_to_remove , names(data)[i] )
else {
# coerce factor to index variable
data[[i]] <- as.integer( data[[i]] )
}
}
}#i
if ( length(x_to_remove)>0 ) {
if ( messages==TRUE ) {
Expand All @@ -43,7 +51,7 @@ ulam <- function( flist , data , pars , pars_omit , start , chains=1 , cores=1 ,
# pre-scan for index variables (integer) that are numeric by accident
for ( i in 1:length(data) ) {
if ( class(data[[i]])!="character" ) {
if ( all( as.integer(data[[i]])==data[[i]] ) ) {
if ( all( as.integer(data[[i]])==data[[i]] , na.rm=TRUE ) ) {
#data[[i]] <- as.integer(data[[i]])
if ( class(data[[i]])!="integer" & class(data[[i]])!="matrix" ) {
if ( coerce_int==TRUE ) {
Expand Down
59 changes: 57 additions & 2 deletions R/ulam-link-sim.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ setMethod( "link" , "ulam" , function(fit ,data ,...) link_ulam(fit,data,...) )

# sim method assumes outcome var is first var on left in formula line 1
# can use variable argument to specify any other variable
sim_ulam <- function( fit , data , post , variable , n=1000 , replace=list() , ... ) {
sim_ulam <- function( fit , data , post , vars , variable , n=1000 , replace=list() , ... ) {

########################################
# check arguments
Expand Down Expand Up @@ -211,6 +211,61 @@ sim_ulam <- function( fit , data , post , variable , n=1000 , replace=list() , .

return(sim_out)
}
setMethod( "sim" , "ulam" , function(fit,data,...) sim_ulam(fit,data,...) )

sim_ulam_new <- function( fit , data , post , vars , variable , n=1000 , replace=list() , debug=FALSE , ll=FALSE , refresh=0 , ... ) {

########################################
# check arguments
if ( missing(data) ) {
data <- fit@data
} else {
# make sure it's a list, otherwise can't hold sample matrices from sims
data <- as.list(data)
# check for vars to sim that are not in data list
if ( !missing(vars) ) {
for ( i in 1:length(vars) ) {
if ( !(vars[i] %in% names(data)) ) {
# insert dummy for var - will get simulated over later
data[[ vars[i] ]] <- rep( 0 , length(data[[1]]) )
}
}#i
}
}

if ( n==0 ) {
n <- stan_total_samples(fit@stanfit)
} else {
tot_samples <- stan_total_samples(fit@stanfit)
n <- min(n,tot_samples)
}

if ( missing(post) ) {
post <- extract.samples(fit,n=n)
} else {
n <- dim(post[[1]])[1]
if ( is.null(n) ) n <- length(post[[1]])
}

# variables to sim
if ( missing(vars) ) {
# extract likelihood, assuming it is first element of formula
lik <- flist_untag(fit@formula)[[1]]
# discover outcome
vars <- as.character(lik[[2]])
} else {
# vars listed
if ( debug==TRUE ) print(vars)
}

# loop over vars
sim_vars <- sim_core( fit=fit , data=data , post=post , vars=vars , n=n , refresh=refresh , replace=replace , debug=debug , ll=ll , ... )

# result
if ( length(sim_vars)==1 ) sim_vars <- sim_vars[[1]]
return( sim_vars )

}

setMethod( "sim" , "ulam" , function(fit,data,...) sim_ulam_new(fit,data,...) )


Loading

0 comments on commit f1a47a3

Please sign in to comment.