v1.47 - Gaussian processes and housekeeping

- added data(Crofoot) - data(islands) renamed data(Kline) - various man page updates - cleaning up namespace (in progress) - resample accepts optional data list now - Gaussian process L2 norm added to map2stan as GPL2 distribution. Still needs a man page, but example added to README - map2stan coerces index variables to integer now -
rmcelreath · Dec 24, 2014 · 0481862 · 0481862
1 parent 0f6fcc9
commit 0481862
Show file tree

Hide file tree

Showing 20 changed files with 349 additions and 78 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,12 +1,11 @@
 Package: rethinking
 Type: Package
 Title: Statistical Rethinking book package
-Version: 1.46
-Date: 2014-11-3
+Version: 1.47
+Date: 2014-12-23
 Author: Richard McElreath
 Maintainer: Richard McElreath <[email protected]>
-Depends:
-    MASS
-Imports: coda
+Imports: coda, MASS
+Extends: rstan
 Description: Utilities for fitting and comparing models
-License: GPLv3
+License: GPL (>= 3)
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,3 +1,4 @@
-exportPattern(".")
+exportPattern("^[^x]")
 importFrom(coda, HPDinterval)
-importFrom(coda, as.mcmc)
+importFrom(coda, as.mcmc)
+importFrom(MASS, mvrnorm)
diff --git a/R/distributions.r b/R/distributions.r
@@ -142,16 +142,8 @@ rlaplace <- function(n,location=0,lambda=1) {
     y
 }
 
-### softmax rule for multinomial
-multilogistic <- function( x , lambda=1 , diff=TRUE , log=FALSE ) {
-    if ( diff==TRUE ) x <- x - min(x)
-    f <- exp( lambda * x )
-    if ( log==FALSE ) result <- f / sum(f)
-    if ( log==TRUE ) result <- log(f) - log(sum(f))
-    result
-}
-
 
+# onion method correlation matrix
 dlkjcorr <- function( x , eta=1 , log=TRUE ) {
     det(x)^(eta-1)
 }
@@ -277,6 +269,15 @@ rcategorical <- function( n , prob ) {
     return(y)
 }
 
+### softmax rule for multinomial
+multilogistic <- function( x , lambda=1 , diff=TRUE , log=FALSE ) {
+    if ( diff==TRUE ) x <- x - min(x)
+    f <- exp( lambda * x )
+    if ( log==FALSE ) result <- f / sum(f)
+    if ( log==TRUE ) result <- log(f) - log(sum(f))
+    result
+}
+
 # multinomial logit link
 # needs to recognize vectors, as well
 softmax <- function( ... ) {

diff --git a/R/glimmer.R b/R/glimmer.R
@@ -2,7 +2,7 @@
 # translate glmer style model formulas into equivalent formula alist suitable for map2stan
 # just a stub for now
 
-parse_glimmer_formula <- function( formula , data ) {
+xparse_glimmer_formula <- function( formula , data ) {
     ## take a formula and parse into fixed effect and varying effect lists
     nobars <- function(term) {
         if (!('|' %in% all.names(term))) return(term)
@@ -162,7 +162,7 @@ glimmer <- function( formula , data , family=gaussian , prefix=c("b_","v_") , de
     prior_list <- alist()
 
     # parse
-    pf <- parse_glimmer_formula( formula , data )
+    pf <- xparse_glimmer_formula( formula , data )
     pf$yname <- undot(pf$yname)
 
     # build likelihood

diff --git a/R/map-class.r b/R/map-class.r
@@ -106,7 +106,7 @@ function( object , n=10000 , clean.names=TRUE , ... ) {
 
 setMethod("extract.samples", "map",
 function(object,n=1e4,...){
-    require(MASS)
+    # require(MASS) # import now, so no need to require?
     mu <- object@coef
     result <- as.data.frame( mvrnorm( n=n , mu=mu , Sigma=vcov(object) ) )
     # convert vector parameters to vectors in list

diff --git a/R/map2stan-class.r b/R/map2stan-class.r
@@ -140,14 +140,14 @@ setMethod("summary", "map2stan", function(object){
 
 # resample from compiled map2stan fit
 # can also run on multiple cores
-resample <- function( object , iter=1e4 , warmup=1000 , chains=1 , cores=1 , DIC=TRUE , WAIC=TRUE , rng_seed , ... ) {
+resample <- function( object , iter=1e4 , warmup=1000 , chains=1 , cores=1 , DIC=TRUE , WAIC=TRUE , rng_seed , data , ... ) {
     if ( !(class(object)%in%(c("map2stan"))) )
         stop( "Requires map2stan fit" )
-
+    if ( missing(data) ) data <- object@data
     init <- list()
     if ( cores==1 | chains==1 ) {
         for ( i in 1:chains ) init[[i]] <- object@start
-        fit <- stan( fit=object@stanfit , data=object@data , init=init , pars=object@pars , iter=iter , warmup=warmup , chains=chains , ... )
+        fit <- stan( fit=object@stanfit , data=data , init=init , pars=object@pars , iter=iter , warmup=warmup , chains=chains , ... )
     } else {
         init[[1]] <- object@start
         require(parallel)
@@ -158,14 +158,14 @@ resample <- function( object , iter=1e4 , warmup=1000 , chains=1 , cores=1 , DIC
             # hand off to mclapply
             sflist <- mclapply( 1:chains , mc.cores=cores ,
                 function(chainid)
-                    stan( fit=object@stanfit , data=object@data , init=init , pars=object@pars , iter=iter , warmup=warmup , chains=1 , seed=rng_seed, chain_id=chainid , ... )
+                    stan( fit=object@stanfit , data=data , init=init , pars=object@pars , iter=iter , warmup=warmup , chains=1 , seed=rng_seed, chain_id=chainid , ... )
             )
         } else {
             # Windows
             # so use parLapply instead
             CL = makeCluster(cores)
             fit <- object@stanfit
-            data <- object@data
+            #data <- object@data
             pars <- object@pars
             env0 <- list( fit=fit, data=data, pars=pars, rng_seed=rng_seed, iter=iter, warmup=warmup )
             clusterExport(cl = CL, c("iter","warmup","data", "fit", "pars", "rng_seed"), as.environment(env0))

diff --git a/R/map2stan-templates.r b/R/map2stan-templates.r
@@ -193,10 +193,15 @@ map2stan.templates <- list(
 
             # get constraints and add <lower=0> for sigma vector
             constr_list <- get( "constraints" , envir=e )
-            if ( is.null(constr_list[[Sigma_name]]) ) {
-                constr_list[[Sigma_name]] <- "lower=0"
-                assign( "constraints" , constr_list , envir=e )
+            isnum <- function(x) {
+                xn <- suppressWarnings(as.numeric(x))
+                return( ifelse(is.na(xn),FALSE,TRUE) ) 
             }
+            if ( !isnum(Sigma_name) )
+                if ( is.null(constr_list[[Sigma_name]]) ) {
+                    constr_list[[Sigma_name]] <- "lower=0"
+                    assign( "constraints" , constr_list , envir=e )
+                }
 
             # result
             return(kout);
@@ -205,15 +210,75 @@ map2stan.templates <- list(
     ),
     GaussianProcess = list(
         name = "GaussianProcess",
-        R_name = "dGP",
+        R_name = "GPL2",
         stan_name = "multi_normal",
-        num_pars = 3,
-        par_names = c("Dmat","scale","rate"),
-        par_bounds = c("","<lower=0,upper=1>","<lower=0>"),
-        par_types = c("matrix","real","real"),
+        num_pars = 4,
+        par_names = c("d","eta_sq","rho_sq","sig_sq"),
+        par_bounds = c("","<lower=0>","<lower=0>","<lower=0>"),
+        par_types = c("matrix","real","real","real"),
         out_type = "vector",
-        par_map = function(k,...) {
-            return(k);
+        par_map = function(k,e,n,N_txt,...) {
+            indent <- "    "
+            kout <- list()
+            # need to fill Mu with zeros
+            # make sure Mu matches length
+            kout[[1]] <- concat( "rep_vector(0," , N_txt , ")" )
+
+            # need to construct Sigma
+            Cov_name <- concat( "SIGMA_" , k[[1]] )
+            kout[[2]] <- Cov_name
+
+            ## handle temp cov matrix declaration
+            # get model declarations from parent
+            m_tmp <- get( "m_model_declare" , envir=e )
+            # add local covariance matrix variable
+            # will then build it from 'd' matrix in model block
+            m_tmp <- concat( m_tmp , indent , "matrix[",N_txt,",",N_txt,"] " , Cov_name , ";\n" )
+            # assign declarations text to parent environment
+            assign( "m_model_declare" , m_tmp , envir=e )
+
+            ## handle loop to construct cov matrix in model block
+            # do this in 'm_model_txt' so is just before the ~ statement
+            m_tmp <- get( "m_model_txt" , envir=e )
+            m_tmp <- concat( m_tmp , indent , "for ( i in 1:(" , N_txt , "-1) )\n" )
+            m_tmp <- concat( m_tmp , indent , indent , "for ( j in (i+1):" , N_txt , " ) {\n" )
+            m_tmp <- concat( m_tmp , indent , indent , indent , Cov_name , "[i,j] <- " , k[[2]] , "*exp(-" , k[[3]] , "*pow(" , k[[1]] , "[i,j],2));\n" )
+            m_tmp <- concat( m_tmp , indent , indent , indent , Cov_name , "[j,i] <- " , Cov_name , "[i,j];\n" )
+            m_tmp <- concat( m_tmp , indent , indent , "}\n" )
+            m_tmp <- concat( m_tmp , indent , "for ( k in 1:" , N_txt , " )\n" )
+            m_tmp <- concat( m_tmp , indent , indent , Cov_name ,"[k,k] <- " , k[[2]] , " + " , k[[4]] , ";\n" )
+            assign( "m_model_txt" , m_tmp , envir=e )
+
+            # have to make sure the distance matrix is declared in the data block
+            # so add it to used_predictors master list
+            # this also let's us keep text dimension declaration
+            fp_tmp <- get( "fp" , envir=e )
+            var_name <- as.character(k[[1]])
+            fp_tmp[['used_predictors']][[ var_name ]] <- list( var=var_name , N=c(N_txt,N_txt) , type="matrix" )
+            assign( "fp" , fp_tmp , envir=e )
+
+            # make sure parameters have positive contraint
+            constr_list <- get( "constraints" , envir=e )
+            isnum <- function(x) {
+                xn <- suppressWarnings(as.numeric(x))
+                return( ifelse(is.na(xn),FALSE,TRUE) ) 
+            }
+            eta_name <- as.character( k[[2]] )
+            rho_name <- as.character( k[[3]] )
+            sig_name <- as.character( k[[4]] )
+            if ( !isnum(eta_name) )
+                if ( is.null(constr_list[[ eta_name ]]) ) 
+                    constr_list[[eta_name]] <- "lower=0"
+            if ( !isnum(rho_name) )
+                if ( is.null(constr_list[[ rho_name ]]) ) 
+                    constr_list[[rho_name]] <- "lower=0"
+            if ( !isnum(sig_name) )
+                if ( is.null(constr_list[[ sig_name ]]) ) 
+                    constr_list[[sig_name]] <- "lower=0"
+            assign( "constraints" , constr_list , envir=e )
+
+            # return 2 element parameter vector for multi_normal
+            return(kout);
         },
         vectorized = FALSE
     ),
@@ -369,6 +434,7 @@ map2stan.templates <- list(
         name = "Binomial",
         R_name = "dbinom",
         stan_name = "binomial",
+        nat_link = "logit",
         num_pars = 2,
         par_names = c("size","prob"),
         par_bounds = c("<lower=1>","<lower=0,upper=1>"),
@@ -451,6 +517,7 @@ map2stan.templates <- list(
         name = "Poisson",
         R_name = "dpois",
         stan_name = "poisson",
+        nat_link = "log",
         num_pars = 1,
         par_names = c("lambda"),
         par_bounds = c("<lower=0>"),

diff --git a/R/map2stan.r b/R/map2stan.r
@@ -531,6 +531,12 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
                 xvp$T_text <- T_text
                 fp[['vprior']][[n+1]] <- xvp
                 fp_order <- listappend( fp_order , list(type="vprior",i=n+1) )
+                # make sure index variabe is class 'integer'
+                # if not, will be coerced and index automatically generated
+                if ( !is.null(d[[ xvp$group ]]) )
+                    if ( class(d[[ xvp$group ]])!="integer" )
+                        d[[ xvp$group ]] <- coerce_index( d[[ xvp$group ]] )
+                # next!
                 next
             }
         }
@@ -769,7 +775,7 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
 
             # format parmater inputs
             # use par_map function in template, so ordering etc can change
-            klist <- tmplt$par_map( vprior$pars_in , environment() , npars )
+            klist <- tmplt$par_map( vprior$pars_in , environment() , npars , N_txt )
             for ( j in 1:length(klist) ) {
                 l <- tag_var(klist[[j]])
                 if ( !is.null(l) ) {
@@ -998,10 +1004,18 @@ map2stan <- function( flist , data , start , pars , constraints=list() , types=l
             type <- "real"
             # integer check
             if ( class(d[[var$var]])=="integer" ) type <- "int"
+            if ( class(d[[var$var]])=="matrix" ) {
+                type <- "matrix"
+                if ( is.null(var$N) ) var$N <- dim(d[[var$var]])
+            }
             # coerce outcome type
             if ( !is.null(var$type) ) type <- var$type
             # build
-            m_data <- concat( m_data , indent , type , " " , var$var , "[" , var$N , "];\n" )
+            if ( type=="matrix" )
+                # rely on var$N being vector of matrix dimensions
+                m_data <- concat( m_data , indent , type , "[",var$N[1],",",var$N[2],"] " , var$var , ";\n" )
+            else
+                m_data <- concat( m_data , indent , type , " " , var$var , "[" , var$N , "];\n" )
         }#i
     }
 

diff --git a/R/precis.r b/R/precis.r
@@ -98,7 +98,7 @@ precis <- function( model , depth=1 , pars , ci=TRUE , level=0.95 , corr=FALSE ,
     result <- data.frame( est=est , se=se )
     colnames(result) <- c("Mean","StdDev")
     if ( ci==TRUE ) {
-        ci <- confint.quad( est=est , se=se , level=level )
+        ci <- confint_quad( est=est , se=se , level=level )
         if ( the.class=="data.frame" ) {
             # HPDI from samples
             ci <- t( apply( model , 2 , HPDI , prob=level ) )

diff --git a/R/utilities.r b/R/utilities.r
@@ -125,7 +125,7 @@ se <- function( model ) {
 }
 
 # quadratic estimate confidence intervals from means and standard errors
-confint.quad <- function( model=NULL , est , se , level=0.95 ) {
+confint_quad <- function( model=NULL , est , se , level=0.95 ) {
     if ( !is.null(model) ) {
         found.class <- FALSE
         if ( class(model)=="lm" ) {