-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.Rhistory
512 lines (512 loc) · 15 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
#traditional model, catchability coefficient
A <- list()
if(all(!is.null(initial_values))){
for(i in 1:n.chain){
A[[i]] <- list(
if('mu' %in% names(initial_values[[i]])){
mu <- initial_values[[i]]$mu
} else {
mu <- stats::runif(length(unique(count_all$L_ind)), 0.01, 5)
},
if('q' %in% names(initial_values[[i]])){
q <- as.data.frame(initial_values[[i]]$q)
} else {
q <- as.data.frame(stats::runif(length(q_names),0.01,1))
}
)
names(A[[i]]) <- c('mu','q')
}
} else {
for(i in 1:n.chain){
A[[i]] <- list(
mu <- stats::runif(length(unique(count_all$L_ind)), 0.01, 5),
q <- as.data.frame(stats::runif(length(q_names),0.01,1))
)
names(A[[i]]) <- c('mu','q')
}
}
return(A)
}
#' @noRd
init_trad <- function(n.chain, count_all, initial_values){
#helper function
#traditional model
A <- list()
if(all(!is.null(initial_values))){
for(i in 1:n.chain){
A[[i]] <- list(
if('mu' %in% names(initial_values[[i]])){
mu <- initial_values[[i]]$mu
} else {
mu <- stats::runif(length(unique(count_all$L_ind)), 0.01, 5)
}
)
names(A[[i]]) <- 'mu'
}
} else {
for(i in 1:n.chain){
A[[i]] <- list(
mu <- stats::runif(length(unique(count_all$L_ind)), 0.01, 5)
)
names(A[[i]]) <- 'mu'
}
}
return(A)
}
# function for input checks
#' @srrstats {G5.2a} Pre-processing routines to check inputs have unique
#' messages
#' @noRd
traditionalModel_input_checks <- function(data, family, q, phipriors, n.chain,
n.iter.burn, n.iter.sample,
thin, adapt_delta, seed){
## make sure all data tags are valid -- if q == TRUE
#' @srrstats {G2.13} Pre-processing routines to check for missing data
if (q == TRUE && !all(c('count.type','count') %in% names(data))){
errMsg <- "Data should include 'count' and 'count.type'."
stop(errMsg)
}
## make sure all data tags are valid -- if q == FALSE
#' @srrstats {G2.13} Pre-processing routines to check for missing data
if (q == FALSE && !all(c('count') %in% names(data))){
errMsg <- "Data should include 'count'."
stop(errMsg)
}
## make sure count is not zero-length
#' @srrstats {G5.8,G5.8a} Pre-processing routines to check for
#' zero-length data
if (dim(data$count)[1] == 0) {
errMsg <- "count contains zero-length data."
stop(errMsg)
}
## make sure no column is entirely NA in count
#' @srrstats {G5.8,G5.8c} Pre-processing routines to check for column
#' with all NA
if (any(apply(data$count, 2, function(col) all(is.na(col))))) {
errMsg <- "count contains a column with all NA."
stop(errMsg)
}
## make sure dimensions of count and count.type are equal, if count.type is
## present
#' @srrstats {BS2.1,G2.13} Pre-processing routines to ensure all input data
#' is dimensionally commensurate
if (q == TRUE){
if(dim(data$count)[1] != dim(data$count.type)[1]|
dim(data$count)[2] != dim(data$count.type)[2]) {
errMsg <- "Dimensions of count and count.type do not match."
stop(errMsg)
}
}
## make sure no data are undefined
#' @srrstats {G2.16} Pre-processing routines to check for undefined data
if(any(data$count == Inf,na.rm=TRUE) | any(data$count == -Inf,na.rm=TRUE)){
errMsg <- "count contains undefined values (i.e., Inf or -Inf)"
stop(errMsg)
}
## make sure all data is numeric -- if q == TRUE
#' @srrstats {BS2.5} Checks of appropriateness of numeric values submitted
#' for distributional parameters (i.e., count data must numeric),
#' implemented prior to analytic routines
#' @srrstats {G5.8,G5.8b} Pre-processing routines to check for data of
#' unsupported type
if (q == TRUE) {
if(is.numeric(data$count) == FALSE |
is.numeric(data$count.type) == FALSE) {
errMsg <- "Data should be numeric."
stop(errMsg)
}
}
## make sure all data is numeric -- if q == FALSE
#' @srrstats {BS2.5} Checks of appropriateness of numeric values submitted
#' for distributional parameters (i.e., count data must positive and
#' numeric), implemented prior to analytic routines
#' @srrstats {G5.8,G5.8b} Pre-processing routines to check for data of
#' unsupported type
if (q == FALSE) {
if(is.numeric(data$count) == FALSE | any(data$count < 0, na.rm=TRUE)) {
errMsg <- "Data should be numeric."
stop(errMsg)
}
}
if(q == TRUE){
## make sure locations of NAs in count data match locations of NAs in
## count.type data
#' @srrstats {BS2.1,G2.13} Pre-processing routines to ensure all input
#' data is dimensionally commensurate
if(any((which(is.na(data$count.type)) == which(is.na(data$count))) == FALSE)
){
errMsg <- paste0("Empty data cells (NA) in count data should match ",
"empty data cells (NA) in count.type data.")
stop(errMsg)
}
## make sure count.type is not zero-length
#' @srrstats {G5.8,G5.8a} Pre-processing routines to check for
#' zero-length data
if (dim(data$count.type)[1] == 0) {
errMsg <- "count.type contains zero-length data."
stop(errMsg)
}
## make sure no column is entirely NA in count.type
#' @srrstats {G5.8,G5.8c} Pre-processing routines to check for column
#' with all NA
if (any(apply(data$count.type, 2, function(col) all(is.na(col))))) {
errMsg <- "count.type contains a column with all NA."
stop(errMsg)
}
}
## make sure family is either 'poisson', 'negbin', or 'gamma'
#' @srrstats {G2.3,G2.3a,G2.3b} Permit only expected univariate
#' (case-insensitive) parameter values
if(!c(tolower(family) %in% c('poisson','negbin','gamma'))){
errMsg <- "Invalid family. Options include 'poisson', 'negbin', or 'gamma'."
stop(errMsg)
}
## the smallest count.type is 1
if(q == TRUE && min(data$count.type,na.rm=TRUE) != 1){
errMsg <- paste0("The first gear type should be referenced as 1 in ",
"count.type. Subsequent gear types should be ",
"referenced 2, 3, 4, etc.")
stop(errMsg)
}
## count are integers, if family is poisson or negbin
#' @srrstats {BS2.5} Checks of appropriateness of numeric values submitted
#' for distributional parameters (i.e., count data must be non-negative
#' integers if a poisson or negative binomial distribution is used),
#' implemented prior to analytic routines
if(tolower(family) %in% c('poisson','negbin')){
if(!all(data$count %% 1 %in% c(0,NA)) | any(data$count < 0,na.rm=TRUE)){
errMsg <- paste0("All values in count should be non-negative integers. ",
"Use family = 'gamma' if count is continuous.")
stop(errMsg)
}
}
## count.type are integers
#' @srrstats {G5.8,G5.8b} Pre-processing routines to check for data of
#' unsupported type
if(q == TRUE && !all(data$count.type %% 1 %in% c(0,NA))){
errMsg <- "All values in count.type should be integers."
stop(errMsg)
}
## phipriors is a vector of two numeric values
#' @srrstats {G2.0,BS2.2,BS2.3,BS2.4,BS2.5} Checks of vector length and
#' appropriateness of distributional parameters (i.e., vector of length 2,
#' numeric values > 0), implemented prior to analytic routines
if(family == 'negbin'){
if(!is.numeric(phipriors) | length(phipriors) != 2 | any(phipriors <= 0)){
errMsg <- paste0("phipriors should be a vector of two positive ",
"numeric values. ex. c(0.25,0.25)")
stop(errMsg)
}
}
## check length and range of n.chain
if(any(length(as.integer(n.chain)) > 1 | n.chain < 1)){
errMsg <- "n.chain should be an integer > 0 and of length 1."
stop(errMsg)
}
## check length and range of n.iter.sample
if(any(length(as.integer(n.iter.sample)) > 1 | n.iter.sample < 1)){
errMsg <- "n.iter.sample should be an integer > 0 and of length 1."
stop(errMsg)
}
## check length and range of n.iter.burn
if(any(length(as.integer(n.iter.burn)) > 1 | n.iter.burn < 1)){
errMsg <- "n.iter.burn should be an integer > 0 and of length 1."
stop(errMsg)
}
## check length and range of thin
if(any(length(as.integer(thin)) > 1 | thin < 1)){
errMsg <- "thin should be an integer > 0 and of length 1."
stop(errMsg)
}
## check length and range of adapt_delta
if(any(length(adapt_delta) > 1 | adapt_delta < 0 | adapt_delta > 1)){
errMsg <- paste0("adapt_delta should be a numeric value > 0 and < 1 and ",
"of length 1.")
stop(errMsg)
}
## check length of seed
if(!is.null(seed)){
if(length(as.integer(seed)) > 1){
errMsg <- "seed should be an integer of length 1."
stop(errMsg)
}
}
}
# checks if initial values are provided
#' @noRd
initial_values_checks_trad <- function(initial_values,data,n.chain){
## length of initial values is equal to the number of chains
if(length(initial_values) != n.chain){
errMsg <- paste0("The length of the list of initial values should equal ",
"the number of chains (n.chain, default is 4).")
stop(errMsg)
}
for(i in 1:n.chain){
## check mu input
if('mu' %in% names(initial_values[[i]])){
## if mu is numeric
if(any(!is.numeric(initial_values[[i]]$mu)) |
any(initial_values[[i]]$mu < 0)){
errMsg <- "Initial values for 'mu' should be numeric values > 0."
stop(errMsg)
}
## check mu length
if(length(initial_values[[i]]$mu) != dim(data$count)[1]){
errMsg <- paste0("The length of initial values for 'mu' should ",
"equal the number of sites.")
stop(errMsg)
}
}
## check q input
if('q' %in% names(initial_values[[i]])){
## if q is numeric
if(any(!is.numeric(initial_values[[i]]$q)) |
any(initial_values[[i]]$q < 0)){
errMsg <- "Initial values for 'q' should be numeric."
stop(errMsg)
}
## check q length
if(length(initial_values[[i]]$q) != (length(table(data$count.type))-1)){
errMsg <- paste0("The length of initial values for 'q' should equal: ",
"# unique gear types - 1 (i.e., q for reference ",
"type = 1).")
stop(errMsg)
}
}
}
}
source('../get_model_data.R')
source('../jointModel_helper.R')
cov = NULL
family = 'gamma'
p10priors = c(1,20)
q = FALSE
phipriors = NULL
multicore = FALSE
initial_values = inits
n.chain = 1
n.iter.burn = 100
n.iter.sample = 75
thin = 1
adapt_delta = 0.9
verbose = TRUE
seed = NULL
# moddat <- get_model_data(data,cov,family,p10priors,q,phipriors,
# multicore,initial_values,n.chain,n.iter.burn,
# n.iter.sample,thin,adapt_delta,verbose,seed)
moddat <- get_model_data_trad(data, family,q,phipriors,multicore,
initial_values,n.chain,n.iter.burn,
n.iter.sample,thin,adapt_delta,
verbose, seed)
model_data <- moddat[[1]]
inits <- moddat[[2]]
# run model
out <- rstan::sampling(
model,
data = model_data,
cores = 1,
#' @srrstats {G2.4,G2.4a} explicit conversion to
#' integers for sampling arguments
chains = n.chain,
thin = as.integer(thin),
warmup = as.integer(n.iter.burn),
iter = (
as.integer(n.iter.burn) + as.integer(n.iter.sample)
),
init = inits,
refresh = ifelse(verbose == TRUE,500,0)
)
rstan::summary(out,pars=c('mu'))$summary
model <- rstan::stan_model('inst/stan/traditional_count.stan')
## 17.
# model, pois (traditional model)
# constants
nsite <- 20
nobs_count <- 100
# params
mu <- rlnorm(nsite, meanlog = log(1), sdlog = 1)
# count
count <- matrix(NA, nrow = nsite, ncol = nobs_count)
for(i in 1:nsite){
count[i,] <- rpois(n = nobs_count, mu[i])
}
# collect data
data <- list(
count = count
)
# initial values
inits <- list()
inits[[1]] <- list(
mu = mu
)
names(inits[[1]]) <- c('mu')
source('../get_model_data.R')
source('../jointModel_helper.R')
cov = NULL
family = 'poisson'
p10priors = c(1,20)
q = FALSE
phipriors = NULL
multicore = FALSE
initial_values = inits
n.chain = 1
n.iter.burn = 100
n.iter.sample = 75
thin = 1
adapt_delta = 0.9
verbose = TRUE
seed = NULL
# moddat <- get_model_data(data,cov,family,p10priors,q,phipriors,
# multicore,initial_values,n.chain,n.iter.burn,
# n.iter.sample,thin,adapt_delta,verbose,seed)
moddat <- get_model_data_trad(data, family,q,phipriors,multicore,
initial_values,n.chain,n.iter.burn,
n.iter.sample,thin,adapt_delta,
verbose, seed)
model_data <- moddat[[1]]
inits <- moddat[[2]]
# run model
out <- rstan::sampling(
model,
data = model_data,
cores = 1,
#' @srrstats {G2.4,G2.4a} explicit conversion to
#' integers for sampling arguments
chains = n.chain,
thin = as.integer(thin),
warmup = as.integer(n.iter.burn),
iter = (
as.integer(n.iter.burn) + as.integer(n.iter.sample)
),
init = inits,
refresh = ifelse(verbose == TRUE,500,0)
)
rstan::summary(out,pars=c('mu'))$summary
# constants
nsite <- 20
nobs_count <- 100
nobs_pcr <- 8
# params
mu <- rlnorm(nsite, meanlog = log(1), sdlog = 1)
beta <- 0.5
log_p10 <- -4.5
q <- 2
phi <- 1.2
# traditional type
count_type <- cbind(matrix(1, nrow = nsite, ncol = nobs_count/2),
matrix(2, nrow = nsite, ncol = nobs_count/2))
# count
count <- matrix(NA, nrow = nsite, ncol = nobs_count)
for(i in 1:nsite){
for(j in 1:nobs_count){
if(count_type[i,j]==1){
count[i,j] <- rnbinom(n = 1, mu = mu[i], size = phi)
} else {
count[i,j] <- rnbinom(n = 1, mu = mu[i]*q, size = phi)
}
}
}
# p11 (probability of true positive eDNA detection) and p (probability
# of eDNA detection)
p11 <- rep(NA,nsite)
p <- rep(NA,nsite)
for (i in 1:nsite){
p11[i] <- mu[i] / (mu[i] + exp(beta))
p[i] <- min(p11[i] + exp(log_p10),1)
}
# qPCR.N (# qPCR observations)
qPCR.N <- matrix(NA, nrow = nsite, ncol = nobs_pcr)
for(i in 1:nsite){
qPCR.N[i,] <- rep(3,nobs_pcr)
}
# qPCR.K (# positive qPCR detections)
qPCR.K <- matrix(NA, nrow = nsite, ncol = nobs_pcr)
for (i in 1:nsite){
qPCR.K[i,] <- rbinom(nobs_pcr, qPCR.N[i,], rep(p[i],nobs_pcr))
}
# collect data
data <- list(
qPCR.N = qPCR.N,
qPCR.K = qPCR.K,
count = count,
count.type = count_type
)
# initial values
inits <- list()
inits[[1]] <- list(
mu = mu,
p10 = exp(log_p10),
alpha = beta,
phi = phi,
q = q
)
names(inits[[1]]) <- c('mu','p10','alpha','phi','q')
source('R/utils-joint.R')
cov = NULL
family = 'negbin'
p10priors = c(1,20)
q = TRUE
phipriors = NULL
multicore = FALSE
initial_values = inits
n.chain = 1
n.iter.burn = 100
n.iter.sample = 75
thin = 1
adapt_delta = 0.9
verbose = TRUE
seed = NULL
# make character inputs case-insensitive
#' @srrstats {G2.3b} Allow case-insensitive character parameter values
family <- tolower(family)
# get phipriors
if(family != 'negbin'){
phipriors <- NULL
} else if(family == 'negbin' && is.null(phipriors)){
phipriors <- c(0.25,0.25)
} else if(family == 'negbin' && !is.null(phipriors)){
phipriors <- phipriors
}
if (q == TRUE) {
# model with catchability coefficients
catchability_checks(data,cov)
} else {
# model without catchability coefficients
no_catchability_checks(data,cov)
}
# model with covariates
if (all(!is.null(cov))) {
covariate_checks(data,cov)
}
# all models
all_checks(data,cov,family,p10priors,phipriors,n.chain,n.iter.burn,
n.iter.sample,thin,adapt_delta,seed)
# initial value checks
if(all(!is.null(initial_values))){
initial_values_checks(initial_values,data,cov,n.chain)
}
if (!requireNamespace("rstan", quietly = TRUE)){
stop ("The 'rstan' package is not installed.", call. = FALSE)
}
# convert qPCR data to long format
#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular
#' form (i.e., matrix, etc.)
qPCR_all <- as.data.frame(data$qPCR.N) %>%
dplyr::mutate(L_ind = 1:dim(data$qPCR.N)[1]) |>
tidyr::pivot_longer(cols =! L_ind, values_to = 'n_N') |>
#' @srrstats {G2.15} Software does not assume non-missingness and actually
#' expects it if the number of observations across sites is unequal
tidyr::drop_na()
# convert qPCR data to long format
#' @srrstats {G2.7} Use as.data.frame() to allow input list of any tabular
#' form (i.e., matrix, etc.)
qPCR_all <- as.data.frame(data$qPCR.N) |>
dplyr::mutate(L_ind = 1:dim(data$qPCR.N)[1]) |>
tidyr::pivot_longer(cols =! L_ind, values_to = 'n_N') |>
#' @srrstats {G2.15} Software does not assume non-missingness and actually
#' expects it if the number of observations across sites is unequal
tidyr::drop_na()
get_family_index('gamma')
roxygen2::roxygenise()
install.packages('roxygen2')
roxygen2::roxygenise()