diff --git a/.DS_Store b/.DS_Store index 3536493..5dfa5cf 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/DESCRIPTION b/DESCRIPTION index a7331dd..2684339 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,47 +1,34 @@ -Package: zoidtmb -Title: Zero-and-One Inflated Dirichlet Regression Modelling in TMB -Version: 1.3.0 -Authors@R: - c(person(given = "Eric J.", - family = "Ward", - role = c("aut", "cre"), - email = "eric.ward@noaa.gov", - comment = c(ORCID = "0000-0002-4359-0296")), - person(given = "Alexander J.", - family = "Jensen", - role = c("aut"), - email = "jensena1@uw.edu", - comment = c(ORCID = "0000-0002-2911-8884")), - person(given = "Ryan P.", - family = "Kelly", - role = c("aut"), - email = "rpkelly@uw.edu", - comment = c(ORCID = "0000-0001-5037-2441")), - person(given = "Andrew O.", - family = "Shelton", - role = c("aut"), - email = "ole.shelton@noaa.gov", - comment = c(ORCID = "0000-0002-8045-6141")), - person(given = "William H.", - family = "Satterthwaite", - role = c("aut"), - email = "will.satterthwaite@noaa.gov", - comment = c(ORCID = "0000-0002-0436-7390")), - person(given = "Eric C.", - family = "Anderson", - role = c("aut"), - email = "eric.anderson@noaa.gov", - comment = c(ORCID = "0000-0003-1326-0840"))) -Description: Fits Dirichlet regression and zero-and-one inflated Dirichlet regression with Bayesian methods implemented in Stan. These models are sometimes referred to as trinomial mixture models; covariates and overdispersion can optionally be included. +Type: Package +Package: phenomix +Title: Fit Density Curves to Peak Timing Data that Varies over Time +Version: 1.0.4 +Authors@R: c(person(given = c("Eric", "J."), + family = "Ward", + role = c("aut", "cre"), + email = "eric.ward@noaa.gov"), + person(given = c("Samantha", "M."), + family = "Wilson", + role = c("ctb")), + person(given = c("Joseph", "H."), + family = "Anderson", + role = c("ctb"))) +Description: The 'salmix' package fits time-varying density curves to run + timing type data commonly encountered in fisheries and ecology. Example + applications include to peak run timing curves collected for juvenile or + adult Pacific salmon, though could also be applied to other kinds + of data such as hydrographs, plant phenology (flowering, leaf out). License: GPL (>=3) -URL: https://nwfsc-cb.github.io/zoidtmb/, https://github.com/nwfsc-cb/zoidtmb/issues +URL: https://ericward-noaa.github.io/phenomix, https://github.com/ericward-noaa/phenomix Depends: R (>= 4.0.0) Imports: - gtools, + dplyr, + ggplot2, + gnorm, + methods, stats, TMB (>= 1.7.20), - Rcpp + nlme Suggests: testthat, knitr, diff --git a/NAMESPACE b/NAMESPACE index 0d2d948..df593ff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,34 @@ # Generated by roxygen2: do not edit by hand -export(broken_stick) -export(fit_zoidTMB) -import(Rcpp) -importFrom(gtools,rdirichlet) -importFrom(stats,as.formula) -importFrom(stats,model.frame) +S3method(extractAIC,phenomix) +S3method(fitted,phenomix) +S3method(fixef,phenomix) +S3method(logLik,phenomix) +S3method(nobs,phenomix) +S3method(predict,phenomix) +S3method(ranef,phenomix) +export(create_data) +export(extract_all) +export(extract_annual) +export(extract_lower) +export(extract_means) +export(extract_sigma) +export(extract_theta) +export(extract_upper) +export(fit) +export(pars) +export(plot_diagnostics) +import(ggplot2) +importFrom(TMB,MakeADFun) +importFrom(TMB,sdreport) +importFrom(dplyr,left_join) +importFrom(methods,is) +importFrom(nlme,fixef) +importFrom(nlme,ranef) +importFrom(stats,logLik) importFrom(stats,model.matrix) -importFrom(stats,rbeta) -importFrom(stats,rbinom) -useDynLib(zoidtmb, .registration = TRUE) +importFrom(stats,nobs) +importFrom(stats,predict) +importFrom(stats,rnorm) +importFrom(stats,runif) +useDynLib(phenomix, .registration = TRUE) diff --git a/R/broken_stick.R b/R/broken_stick.R deleted file mode 100644 index 3211414..0000000 --- a/R/broken_stick.R +++ /dev/null @@ -1,103 +0,0 @@ -# Random generation of datasets using the dirichlet broken stick method -#' -#' Random generation of datasets using the dirichlet broken stick method -#' -#' @param n_obs Number of observations (rows of data matrix to simulate). Defaults to 10 -#' @param n_groups Number of categories for each observation (columns of data matrix). Defaults to 10 -#' @param ess_fraction The effective sample size fraction, defaults to 1 -#' @param tot_n The total sample size to simulate for each observation. This is approximate and the actual -#' simulated sample size will be slightly smaller. Defaults to 100 -#' @param p The stock proportions to simulate from, as a vector. Optional, and when not included, -#' random draws from the dirichlet are used -#' @return A 2-element list, whose 1st element `X_obs` is the simulated dataset, and whose -#' 2nd element is the underlying vector of proportions `p` used to generate the data -#' @export -#' @importFrom gtools rdirichlet -#' @importFrom stats rbeta rbinom -#' -#' @examples -#' \donttest{ -#' y <- broken_stick(n_obs = 3, n_groups = 5, tot_n = 100) -#' -#' # add custom proportions -#' y <- broken_stick( -#' n_obs = 3, n_groups = 5, tot_n = 100, -#' p = c(0.1, 0.2, 0.3, 0.2, 0.2) -#' ) -#' } -broken_stick <- function(n_obs = 1000, - n_groups = 10, - ess_fraction = 1, - tot_n = 100, - p = NULL) { - if (is.null(p)) { - p <- gtools::rdirichlet(1, rep(1, n_groups)) - } - - ess <- tot_n * ess_fraction - # first, determine the presence of zeros using the stick breaking algorithm - # second, for instances where zeros and tot_n are not observed, rescale the parameters and - # use the stick breaking algorithm for the Dirichlet process a second time - - X_mean_prob <- matrix(p, n_obs, n_groups, byrow = T) - X_var_prob <- matrix((p * (1 - p)) / (ess + 1), - n_obs, n_groups, - byrow = T - ) - X_mean <- X_mean_prob * tot_n - X_var <- X_var_prob * tot_n^2 - - # Simulate occurrence of 0s, 1s - X_obs_0 <- matrix(NA, n_obs, n_groups) #1 indicates that the observation is a zero - X_obs_1 <- matrix(NA, n_obs, n_groups) - for(i in 1:n_obs){ - BREAK = "FALSE" - repeat{ - for(j in 1:n_groups){ - X_obs_0[i,j] <- rbinom(1,1,(1-p[j])^ess) - } - if(sum(X_obs_0[i,])0){ - mu_vals[i,j] <- rbeta(1,X_alpha_mod[i,j],sum(X_alpha_mod[i,(j+1):n_groups])) - q_vals[i,j] = mu_vals[i,j] - } - }else if(j>1 ){ - if(X_alpha_mod[i,j]>0){ - mu_vals[i,j] <- rbeta(1,X_alpha_mod[i,j],sum(X_alpha_mod[i,(j+1):n_groups])) - q_vals[i,j] <- prod(1 - mu_vals[i,(1:j-1)]) * mu_vals[i,j] - } - } - } - if(X_alpha_mod[i,n_groups]>0){ - q_vals[i,n_groups] <- 1 - sum(q_vals[i,(1:n_groups-1)]) - } - X_obs[i,] <- q_vals[i,] * (tot_n) - } - } - - return(list(X_obs = X_obs, p = p)) -} diff --git a/R/create_data.R b/R/create_data.R new file mode 100644 index 0000000..8d751d4 --- /dev/null +++ b/R/create_data.R @@ -0,0 +1,140 @@ +#' Create data file for fitting time varying run timing distributions with TMB +#' +#' Does minimal processing of data to use as argument to fitting function +#' +#' @param data A data frame +#' @param min_number A minimum threshold to use, defaults to 0 +#' @param variable A character string of the name of the variable in 'data' that contains the response (e.g. counts) +#' @param time A character string of the name of the variable in 'data' that contains the time variable (e.g. year) +#' @param date A character string of the name of the variable in 'data' that contains the response (e.g. day of year). The actual +#' #' column should contain a numeric response -- for example, the result from using lubridate::yday(x) +#' @param mu An optional formula allowing the mean to be a function of covariates. Random effects are not included in the formula +#' but specified with the `est_mu_re` argument +#' @param sigma An optional formula allowing the standard deviation to be a function of covariates. For asymmetric models, +#' each side of the distribution is allowed a different set of covariates. Random effects are not included in the formula +#' but specified with the `est_sigma_re` argument +#' @param covar_data a data frame containing covariates specific to each time step. These are used in the formulas `mu` and `sigma` +#' @param asymmetric_model Boolean, whether or not to let model be asymmetric (e.g. run timing before peak has a +#' different shape than run timing after peak) +#' @param est_sigma_re Whether to estimate random effects by year in sigma parameter controlling tail of distribution. Defaults to TRUE +#' @param est_mu_re Whether to estimate random effects by year in mu parameter controlling location of distribution. Defaults to TRUE +#' @param tail_model Whether to fit Gaussian ("gaussian"), Student-t ("student_t") or generalized normal ("gnorm"). Defaults to Student-t +#' @param family Response for observation model, options are "gaussian", "poisson", "negbin", "binomial", "lognormal". The default ("lognormal") is +#' not a true lognormal distribution, but a normal-log in that it assumes log(y) ~ Normal() +#' @param max_theta Maximum value of log(pred) when `limits=TRUE`. Defaults to 10 +#' @param share_shape Boolean argument for whether asymmetric student-t and generalized normal distributions should share the shape parameter (nu for the student-t; +#' beta for the generalized normal). Defaults to TRUE +#' @param nu_prior Two element vector (optional) for penalized prior on student t df, defaults to a Gamma(shape=2, scale=10) distribution +#' @param beta_prior Two element vector (optional) for penalized prior on generalized normal beta, defaults to a Normal(2, 1) distribution +#' @export +#' @importFrom stats model.matrix +#' @examples +#' data(fishdist) +#' datalist <- create_data(fishdist, +#' min_number = 0, variable = "number", time = "year", +#' date = "doy", asymmetric_model = TRUE, family = "gaussian" +#' ) +create_data <- function(data, + min_number = 0, + variable = "number", + time = "year", + date = "doy", + asymmetric_model = TRUE, + mu = ~1, + sigma = ~1, + covar_data = NULL, + est_sigma_re = TRUE, + est_mu_re = TRUE, + tail_model = "student_t", + family = "lognormal", + max_theta = 10, + share_shape = TRUE, + nu_prior = c(2,10), + beta_prior = c(2,1)) { + + dist <- c("gaussian", "poisson", "negbin", "binomial", "lognormal") + fam <- match(family, dist) + if (is.na(fam)) { + stop("Make sure the entered family is in the list of accepted distributions") + } + + tail <- c("gaussian", "student_t", "gnorm") + tailmod <- match(tail_model, tail) + if (is.na(tailmod)) { + stop("Make sure the entered tail model is in the list of accepted distributions") + } + + # check to make sure year and date are numeric + if (!is.numeric(data[, time])) { + stop("The time variable in the data frame (e.g. year) needs to be numeric") + } + if (is.numeric(data[, date])) { + if (max(data[, date], na.rm = T) > 365) stop("The date variable in the data frame contains values greater than 365") + if (min(data[, date], na.rm = T) < 1) stop("The date variable in the data frame contains values less than 1") + } else { + stop("The date variable in the data frame (e.g. day_of_year) needs to be numeric") + } + + # optional priors + use_t_prior = TRUE + if (length(nu_prior) != 2) { + if(is.na(nu_prior)) { + use_t_prior = FALSE + } else { + stop("The nu prior must be a numeric 2-element vector or NA") + } + } + use_beta_prior = TRUE + if (length(beta_prior) != 2) { + if(is.na(beta_prior)) { + use_beta_prior = FALSE + } else { + stop("The beta prior must be a numeric 2-element vector or NA") + } + } + + # if 1 level, turn off trend and random effect estimation + if (length(unique(as.numeric(data[, time]))) == 1) { + est_sigma_re <- FALSE + est_mu_re <- FALSE + } + + # drop rows below threshold or NAs + drop_rows <- which(is.na(data[, variable]) | data[, variable] < min_number) + if (length(drop_rows) > 0) data <- data[-drop_rows, ] + + # rescale year variable to start at 1 for indexing + data$year <- data[, time] - min(data[, time]) + 1 + + # parse formulas. covar_data contains covariates specific to each time step + if (is.null(covar_data)) { + covar_data <- data.frame(year = unique(data$year)) + } + mu_mat <- model.matrix(mu, data = covar_data) + sig_mat <- model.matrix(sigma, data = covar_data) + + data_list <- list( + y = data[, variable], + yint = round(data[, variable]), + years = as.numeric(as.factor(data$year)), + x = data[, date], + year_levels = as.numeric(as.factor(unique(data$year))), + unique_years = unique(data$year), + nLevels = length(unique(data$year)), + asymmetric = as.numeric(asymmetric_model), + family = fam, + mu_mat = mu_mat, + sig_mat = sig_mat, + tail_model = as.numeric(tailmod) - 1, + est_sigma_re = as.numeric(est_sigma_re), + est_mu_re = as.numeric(est_mu_re), + max_theta = max_theta, + share_shape = as.numeric(share_shape), + use_t_prior = as.numeric(use_t_prior), + use_beta_prior = as.numeric(use_beta_prior), + beta_prior = beta_prior, + nu_prior = nu_prior + ) + + return(data_list) +} diff --git a/R/data.R b/R/data.R index 65d7931..f27bda1 100644 --- a/R/data.R +++ b/R/data.R @@ -1,19 +1,14 @@ -#' Data from Satterthwaite, W.H., Ciancio, J., Crandall, E., Palmer-Zwahlen, -#' M.L., Grover, A.M., O’Farrell, M.R., Anson, E.C., Mohr, M.S. & Garza, -#' J.C. (2015). Stock composition and ocean spatial distribution from -#' California recreational chinook salmon fisheries using genetic stock -#' identification. Fisheries Research, 170, 166–178. The data -#' genetic data collected from port-based sampling of recreationally-landed -#' Chinook salmon in California from 1998-2002. +#' Example simulate data for fish distributions from multiple years #' -#' @format A data frame. -"chinook" +#' @format A data frame containing simulated data. +#' @keywords internal +"fishdist" -#' Data from Magnussen, E. 2011. Food and feeding habits of cod (Gadus morhua) -#' on the Faroe Bank. – ICES Journal of Marine Science, 68: 1909–1917. The data -#' here are Table 3 from the paper, with sample proportions (columns w) multiplied -#' by total weight to yield total grams (g) for each sample-diet item combination. Dashes -#' have been replaced with 0s. +#' Count data collected by Washington Department of Fish and Wildlife on +#' chum salmon from the Skagit River (Washington state). Each row of the +#' dataframe contains an observation ("number") on a given date ("date"). +#' The year ("year") and calendar day ("doy") are also included. #' #' @format A data frame. -"coddiet" +#' @keywords internal +"chum" diff --git a/R/extract.R b/R/extract.R new file mode 100644 index 0000000..750f8b2 --- /dev/null +++ b/R/extract.R @@ -0,0 +1,137 @@ +#' Output processing function to be called by user +#' +#' This function extracts the annual totals +#' +#' @param fit A fitted object returned from fit() +#' @param log Whether to return estimates in log space, defaults to TRUE +#' @export +#' +extract_annual <- function(fit, log = TRUE) { + all_name <- names(fit$sdreport$value) + if(log==TRUE) { + idx <- grep("year_log_tot", all_name) + } else { + idx <- grep("year_tot", all_name) + } + + par_name <- "year_tot" + if(log==TRUE) par_name <- "year_log_tot" + + df <- data.frame("value" = fit$sdreport$value[idx], + "sd" = fit$sdreport$sd[idx], + "par" = par_name) + return(df) +} + + +#' Output processing function to be called by user +#' +#' This function extracts the parameter means and respective sds +#' +#' @param fit A fitted object returned from fit() +#' @export +#' +extract_means <- function(fit) { + all_name <- names(fit$sdreport$value) + idx <- which(all_name == "mu") + df <- data.frame("value" = fit$sdreport$value[idx], + "sd" = fit$sdreport$sd[idx], + "par" = "mu") + return(df) +} + +#' Output processing function to be called by user +#' +#' This function extracts the parameter sigma and respective sds +#' +#' @param fit A fitted object returned from fit() +#' @export +#' +extract_sigma <- function(fit) { + all_name <- names(fit$sdreport$value) + idx <- which(all_name == "sigma1") + df <- data.frame("value" = fit$sdreport$value[idx], + "sd" = fit$sdreport$sd[idx], + "par" = "sigma1") + # attempt to add in 2nd sigma + idx <- which(all_name == "sigma2") + if(length(idx) > 0) { + df2 <- data.frame("value" = fit$sdreport$value[idx], + "sd" = fit$sdreport$sd[idx], + "par" = "sigma2") + df <- rbind(df,df2) + } + return(df) +} + +#' Output processing function to be called by user +#' +#' This function extracts the parameter theta and respective sds +#' +#' @param fit A fitted object returned from fit() +#' @export +#' +extract_theta <- function(fit) { + all_name <- names(fit$sdreport$value) + idx <- which(all_name == "theta") + df <- data.frame("value" = fit$sdreport$value[idx], + "sd" = fit$sdreport$sd[idx], + "par" = "theta") + return(df) +} + +#' Output processing function to be called by user +#' +#' This function extracts the lower quartiles (25%) and respective sds +#' +#' @param fit A fitted object returned from fit() +#' @export +#' +extract_lower <- function(fit) { + all_name <- names(fit$sdreport$value) + idx <- which(all_name == "lower25") + df <- data.frame("value" = fit$sdreport$value[idx], + "sd" = fit$sdreport$sd[idx], + "par" = "lower25") + return(df) +} + +#' Output processing function to be called by user +#' +#' This function extracts the upper quartiles (25%) and respective sds +#' +#' @param fit A fitted object returned from fit() +#' @export +#' +extract_upper <- function(fit) { + all_name <- names(fit$sdreport$value) + idx <- which(all_name == "upper75") + df <- data.frame("value" = fit$sdreport$value[idx], + "sd" = fit$sdreport$sd[idx], + "par" = "upper75") + return(df) +} + +#' Output processing function to be called by user +#' +#' This function extracts the means, sigmas, thetas, +#' lower (25%) and upper (75%) quartiles, and respective sds +#' +#' @param fit A fitted object returned from fit() +#' @export +#' +extract_all <- function(fit) { + lower <- extract_lower(fit) + upper <- extract_upper(fit) + m <- extract_means(fit) + sig <- extract_sigma(fit) + theta <- extract_theta(fit) + df <- rbind(m, sig, theta, + lower, + upper) + return(df) +} + + + + diff --git a/R/fit.R b/R/fit.R new file mode 100644 index 0000000..df50e4c --- /dev/null +++ b/R/fit.R @@ -0,0 +1,252 @@ +#' @useDynLib phenomix, .registration = TRUE +NULL + +#' Fitting function to be called by user +#' +#' This function creates a list of parameters, sets up TMB object and attempts to +#' do fitting / estimation +#' +#' @param data_list A list of data, as output from create_data +#' @param silent Boolean passed to TMB::MakeADFun, whether to be verbose or not (defaults to FALSE) +#' @param inits Optional named list of parameters for starting values, defaults to NULL +#' @param control Optional control list for stats::nlminb. For arguments see ?nlminb. Defaults to eval.max=2000, iter.max=1000, rel.tol=1e-10. For final model runs, the rel.tol should be even smaller +#' @param limits Whether to include limits for stats::nlminb. Can be a list of (lower, upper), or TRUE to use suggested hardcoded limits. Defaults to NULL, +#' where no limits used. +#' @param fit_model Whether to fit the model. If not, returns a list including the data, parameters, and initial values. Defaults to TRUE +#' @importFrom stats runif rnorm +#' @importFrom methods is +#' @export +#' @examples +#' data(fishdist) +#' +#' # example of fitting fixed effects, no trends, no random effects +#' set.seed(1) +#' datalist <- create_data(fishdist[which(fishdist$year > 1970), ], +#' asymmetric_model = FALSE, +#' est_mu_re = FALSE, est_sigma_re = FALSE +#' ) +#' fit <- fit(datalist) +#' # +#' # # example of model with random effects in means only, and symmetric distribution +#' # set.seed(1) +#' # datalist <- create_data(fishdist[which(fishdist$year > 1970),], asymmetric_model = FALSE, +#' # est_sigma_re = FALSE) +#' # fit <- fit(datalist) +#' # # example of model with random effects in variances +#' # set.seed(1) +#' # datalist <- create_data(fishdist[which(fishdist$year > 1970),], asymmetric_model = TRUE, +#' # est_mu_re = TRUE) +#' # fit <- fit(datalist) +#' # +#' # # example of model with poisson response +#' # set.seed(1) +#' # datalist <- create_data(fishdist[which(fishdist$year > 1970),], asymmetric_model = FALSE, +#' # est_sigma_trend=FALSE, est_mu_trend=FALSE, est_mu_re = TRUE, +#' # family="poisson") +#' # fit <- fit(datalist) +fit <- function(data_list, + silent = FALSE, + inits = NULL, + control = list(eval.max = 2000, iter.max = 1000, rel.tol = 1e-10), + limits = NULL, # can also be a list, or TRUE + fit_model = TRUE) { + + # create list of parameter starting values -- used in both + # asymmetric and symmetric models + parameters <- list( + theta = rnorm(n = data_list$nLevels, log(mean(data_list$y[which(!is.na(data_list$y))]))), + b_mu = rep(0, ncol(data_list$mu_mat)),#runif(ncol(data_list$mu_mat),0,10), #rep(0, ncol(data_list$mu_mat)), + log_sigma_mu_devs = 0, + mu_devs = rep(0, data_list$nLevels), + b_sig1 = rep(1, ncol(data_list$sig_mat)), + b_sig2 = rep(1, ncol(data_list$sig_mat)), + log_sigma1_sd = 0, + sigma1_devs = rep(0, data_list$nLevels), + log_sigma2_sd = 0, + sigma2_devs = rep(0, data_list$nLevels), + log_obs_sigma = 0.0, + log_tdf_1 = 0, + log_tdf_2 = 0, + log_beta_1 = 0.1, + log_beta_2 = 0.1 + ) + parameters$b_mu[1] <- mean(data_list$x[which(!is.na(data_list$y))]) + #CV <- 0.1 + #parameters$b_sig1[1] <- 1# CV * parameters$b_mu[1] + #parameters$b_sig2[1] <- parameters$b_sig1[1] + if (data_list$family == 1) { + # for gaussian, don't log-transform theta + parameters$theta <- rep(0, data_list$nLevels) # rnorm(n = data_list$nLevels, mean(data_list$y)) + } + + # If inits is included, use that instead of parameters + # if (!is.null(inits)) parameters <- inits + + # Mapping off params as needed: + tmb_map <- list() + if (data_list$family %in% c(2, 4)) { + # don't include obs_sigma for poisson or binomial + tmb_map <- c(tmb_map, list(log_obs_sigma = as.factor(NA))) + } + + if (data_list$asymmetric == 0) { + # map off pars not needed + tmb_map <- c(tmb_map, list( + b_sig2 = rep(as.factor(NA), ncol(data_list$sig_mat)), + log_sigma2_sd = as.factor(NA), + sigma2_devs = rep(as.factor(NA), data_list$nLevels) + )) + } + + if (data_list$tail_model == 0) { + # then fit gaussian model and map off both parameters + tmb_map <- c(tmb_map, list( + log_tdf_1 = as.factor(NA), + log_tdf_2 = as.factor(NA), + log_beta_1 = as.factor(NA), + log_beta_2 = as.factor(NA) + )) + } + if (data_list$tail_model == 1) { + # fit the t-model + if (data_list$asymmetric == 0) { + # then map off the tdf2, because model is symmetric + tmb_map <- c(tmb_map, list( + log_tdf_2 = as.factor(NA), + log_beta_1 = as.factor(NA), + log_beta_2 = as.factor(NA) + )) + } else { + tmb_map <- c(tmb_map, list( + log_beta_1 = as.factor(NA), + log_beta_2 = as.factor(NA) + )) + } + } + if (data_list$tail_model == 2) { + # then fit gaussian model and map off both parameters + if (data_list$asymmetric == 0) { + # then map off the beta2, because model is symmetric + tmb_map <- c(tmb_map, list( + log_beta_2 = as.factor(NA), + log_tdf_1 = as.factor(NA), + log_tdf_2 = as.factor(NA) + )) + } else { + tmb_map <- c(tmb_map, list( + log_tdf_1 = as.factor(NA), + log_tdf_2 = as.factor(NA) + )) + } + } + if (data_list$asymmetric == 1) { + if (data_list$share_shape == 1) { + if (data_list$tail_model == 1) { + # map off 2nd nu parameter + tmb_map <- c(tmb_map, list( + log_tdf_2 = as.factor(NA) + )) + } + if (data_list$tail_model == 2) { + tmb_map <- c(tmb_map, list( + log_beta_2 = as.factor(NA) + )) + } + } + } + + if (data_list$nLevels == 1) { + data_list$est_mu_re <- 0 + data_list$est_sigma_re <- 0 + } + + # if don't estimate mean random effects map them off + if (data_list$est_mu_re == 0) { + tmb_map <- c(tmb_map, list( + log_sigma_mu_devs = as.factor(NA), + mu_devs = rep(as.factor(NA), data_list$nLevels) + )) + } + + if (data_list$est_sigma_re == 0) { + tmb_map <- c(tmb_map, list( + log_sigma1_sd = as.factor(NA), + log_sigma2_sd = as.factor(NA), + sigma1_devs = rep(as.factor(NA), data_list$nLevels), + sigma2_devs = rep(as.factor(NA), data_list$nLevels) + )) + } else { + if (data_list$asymmetric == 0) { + tmb_map <- c(tmb_map, list( + log_sigma2_sd = as.factor(NA), + sigma2_devs = rep(as.factor(NA), data_list$nLevels) + )) + } + } + + random <- "" + if (data_list$est_mu_re == 1) { + random <- c(random, "mu_devs") + } + if (data_list$est_sigma_re == 1) { + random <- c(random, "sigma1_devs") + if (data_list$asymmetric == TRUE) random <- c(random, "sigma2_devs") + } + random <- random[-1] + if (length(random) == 0) random <- NULL + + obj <- TMB::MakeADFun( + data = data_list, + parameters = parameters, + map = tmb_map, + DLL = "phenomix", + random = random, + silent = silent + ) + + mod_list <- list( + obj = obj, + init_vals = obj$par, + # pars = pars, + # sdreport = sdreport, + data_list = data_list + ) + + if (fit_model == TRUE) { + # Attempt to do estimation + if (!is.null(inits)) { + init <- inits + } else { + init <- obj$par + } + if (is.null(limits)) { + pars <- stats::nlminb( + start = init, + objective = obj$fn, + gradient = obj$gr, + control = control + ) + } else { + if (is(limits,"list")) { + lower_limits <- limits$lower + upper_limits <- limits$upper + } else { + lim <- limits(parnames = names(obj$par), max_theta = data_list$max_theta) + lower_limits <- lim$lower + upper_limits <- lim$upper + } + + pars <- stats::nlminb( + start = init, objective = obj$fn, + gradient = obj$gr, control = control, + lower = lower_limits, + upper = upper_limits + ) + } + + sdreport <- TMB::sdreport(obj) + mod_list <- c(mod_list, list(pars = pars, sdreport = sdreport, tmb_map = tmb_map, tmb_random = random, tmb_parameters = parameters)) + } + + return(structure(mod_list, class = "phenomix")) +} diff --git a/R/fitting_tmb.R b/R/fitting_tmb.R deleted file mode 100644 index 113214f..0000000 --- a/R/fitting_tmb.R +++ /dev/null @@ -1,211 +0,0 @@ -#' Fit a trinomial mixture model with TMB -#' -#' Fit a trinomial mixture model that optionally includes covariates to estimate -#' effects of factor or continuous variables on proportions. -#' -#' @param formula The model formula for the design matrix. Does not need to have a response specified. If =NULL, then -#' the design matrix is ignored and all rows are treated as replicates -#' @param design_matrix A data frame, dimensioned as number of observations, and covariates in columns -#' @param data_matrix A matrix, with observations on rows and number of groups across columns -#' @param overdispersion Whether or not to include overdispersion parameter, defaults to FALSE -#' @param overdispersion_sd Prior standard deviation on 1/overdispersion parameter, Defaults to inv-Cauchy(0,5) -#' @param prior_sd Optional prior sd / penalty for fixed effects -#' @export -#' @import Rcpp -#' @importFrom stats model.frame -#' -#' @examples -#' \donttest{ -# #y <- matrix(c(3.77, 6.63, 2.60, 0.9, 1.44, 0.66, 2.10, 3.57, 1.33), -# # nrow = 3, byrow = TRUE -# #) -# #fit a model with no covariates -# #fit <- fit_zoidTMB(data_matrix = y) -#' -#' # fit a model with 1 factor -#' #design <- data.frame("fac" = c("spring", "spring", "fall")) -#' #fit <- fit_zoidTMB(formula = ~fac, design_matrix = design, data_matrix = y) -#' -#' } -#' -fit_zoidTMB <- function(formula = NULL, - design_matrix, - data_matrix, - overdispersion = FALSE, - overdispersion_sd = 5, - prior_sd = NA) { - - # if a single observation - if (class(data_matrix)[1] != "matrix") { - data_matrix <- matrix(data_matrix, nrow = 1) - } - - # fill with dummy values - parsed_res <- list(design_matrix = matrix(0, nrow(data_matrix),ncol=1), - var_indx = 1, - n_re_by_group = 1, - tot_re = 1, - n_groups = 1) - est_re <- FALSE - re_group_names <- NA - if (!is.null(formula)) { - model_frame <- model.frame(formula, design_matrix) - model_matrix <- model.matrix(formula, model_frame) - # extract the random effects - res <- parse_re_formula(formula, design_matrix) - if(length(res$var_indx) > 0) { - parsed_res <- res # only update if REs are in formula - est_re <- TRUE - model_matrix <- res$fixed_design_matrix - re_group_names <- res$random_effect_group_names - } - } else { - model_matrix <- matrix(1, nrow = nrow(data_matrix)) - colnames(model_matrix) <- "(Intercept)" - } - - sd_prior <- 1 / ncol(data_matrix) # default if no covariates - if (ncol(model_matrix) > 1) sd_prior <- 1 - use_prior_sd = 0L - if (!is.na(prior_sd)) { - sd_prior <- prior_sd - use_prior_sd = 1L - } else { - sd_prior <- 0 - } - - par_names <- colnames(model_matrix) - - prod_idx <- matrix(0, ncol(data_matrix), ncol(data_matrix)-1) - for(j in 1:ncol(data_matrix)){ - prod_idx[j,] <- seq(1,ncol(data_matrix),1)[-j] - } - - N_bins = ncol(data_matrix) - N_covar = ncol(model_matrix) - n_groups = parsed_res$n_group - tot_re = parsed_res$tot_re - - tmb_data <- list( - X = data_matrix, - prod_idx = prod_idx - 1L, # -1 for indexing to 0 - design_X = model_matrix, - overdisp = ifelse(overdispersion == TRUE, 1, 0), - overdispersion_sd = overdispersion_sd, - use_prior_sd = use_prior_sd, - prior_sd = sd_prior, - #design_Z = parsed_res$design_matrix, # design matrix for Z (random int) - re_var_indx = c(parsed_res$var_indx, 1) - 1L, # index of the group for each re - #n_re_by_group = c(parsed_res$n_re_by_group, 1), # number of random ints per group - #tot_re = tot_re, # total number of random ints, across all groups - n_groups = n_groups, - est_re = as.numeric(est_re) - ) - - # estimated parameters from TMB. - # beta is included in all models - # zeta_sds and zeta_vec are only in random effects models - # phi_inv is only in overdispersion model - tmb_pars <- list(beta_raw = matrix(0, N_bins-1, N_covar), - log_phi_inv = 0) - #log_zeta_sds = rep(0, n_groups), - #zeta_vec = rep(0, (N_bins - 1) * tot_re)) - - tmb_map <- c() - if(overdispersion == FALSE) { - tmb_map <- c(tmb_map, list(log_phi_inv = as.factor(NA))) - } - - #random <- "zeta_vec" - #if(est_re == FALSE) { - # random <- NULL - #tmb_map <- c(tmb_map, - # list(z = rep(as.factor(NA), (N_bins - 1) * tot_re))) - #tmb_map <- c(tmb_map, list(log_zeta_sds = as.factor(rep(NA, n_groups)),#log_zeta_sds = as.factor(rep(NA, n_groups)), - # zetavec = as.factor(rep(NA, (N_bins - 1) * tot_re)))) - #} - - obj <- TMB::MakeADFun( - data = tmb_data, - parameters = tmb_pars, - map = tmb_map, - DLL = "zoidtmb", - random = NULL, - silent = TRUE - ) - - pars <- stats::nlminb( - start = obj$par, - objective = obj$fn, - gradient = obj$gr - ) - sdreport <- TMB::sdreport(obj) - - mod_list <- list(pars = pars, sdreport = sdreport, - tmb_data = tmb_data, tmb_map = tmb_map, - #tmb_random = random, - tmb_parameters = tmb_pars) - - # tidy the mu and beta vectors - idx <- grep("mu", names(sdreport$value)) - mod_list$mu <- matrix(sdreport$value[idx], nrow=nrow(data_matrix)) - mod_list$mu_se <- matrix(sdreport$sd[idx], nrow=nrow(data_matrix)) - - return(mod_list) -} - -#' Fit a trinomial mixture model that optionally includes covariates to estimate -#' effects of factor or continuous variables on proportions. -#' -#' @param formula The model formula for the design matrix. -#' @param data The data matrix used to construct RE design matrix -#' @importFrom stats model.matrix as.formula -parse_re_formula <- function(formula, data) { - # Convert the formula to a character string - formula_str <- as.character(formula) - # Split the formula into parts based on '+' and '-' symbols - formula_parts <- unlist(strsplit(formula_str, split = "[-+]", perl = TRUE)) - # Trim whitespace from each part - formula_parts <- trimws(formula_parts) - # Identify parts containing a bar '|' - random_effects <- grep("\\|", formula_parts, value = TRUE) - fixed_effects <- setdiff(formula_parts, random_effects) - - # Create design matrix for fixed effects. Catch the cases where no fixed - # effects are included, or intercept-only models used - if (length(fixed_effects) > 1 || (length(fixed_effects) == 1 && fixed_effects != "~")) { - fixed_formula_str <- paste("~", paste(fixed_effects, collapse = "+")) - } else { - fixed_formula_str <- "~ 1" # Only intercept - } - fixed_design_matrix <- model.matrix(as.formula(fixed_formula_str), data) - - random_effect_group_names <- sapply(random_effects, function(part) { - # Extract the part after the '|' - split_part <- strsplit(part, "\\|", perl = TRUE)[[1]] - # Remove the closing parenthesis and trim - group_name <- gsub("\\)", "", split_part[2]) - trimws(group_name) - }) - - # create design matrices by group - for(i in 1:length(random_effects)) { - new_formula <- as.formula(paste("~", random_effect_group_names[i], "-1")) - if(i ==1) { - design_matrix <- model.matrix(new_formula, data) - var_indx <- rep(1, ncol(design_matrix)) - n_re <- length(var_indx) - } else { - design_matrix <- cbind(design_matrix, model.matrix(new_formula, data)) - var_indx <- c(var_indx, rep(i, ncol(design_matrix))) - n_re <- c(n_re, length(ncol(design_matrix))) - } - } - n_groups <- 0 - if(length(var_indx) > 0) n_groups <- max(var_indx) - return(list(design_matrix = design_matrix, var_indx = var_indx, n_re_by_group = n_re, - tot_re = sum(n_re), n_groups = n_groups, - fixed_design_matrix = fixed_design_matrix, - random_effect_group_names = random_effect_group_names)) -} - diff --git a/R/limits.R b/R/limits.R new file mode 100644 index 0000000..d96d1ba --- /dev/null +++ b/R/limits.R @@ -0,0 +1,21 @@ +#' Internal function to assign upper and lower bounds to parameters, based on their names +#' +#' Arguments can be adjusted as needed +#' +#' @param parnames A vector of character strings or names used for estimation +#' @param max_theta A scalar (optional) giving the maximum value of theta, log(pred) +#' +limits <- function(parnames, max_theta) { + df <- data.frame(name = parnames, lower = -1000, upper = 1000) + + for (i in 1:length(parnames)) { + if (length(grep("log_sigma1", parnames[i])) > 0) df[i, c("lower", "upper")] <- c(-Inf, 10) + if (length(grep("log_sigma2", parnames[i])) > 0) df[i, c("lower", "upper")] <- c(-Inf, 10) + if (length(grep("theta", parnames[i])) > 0) df[i, c("lower", "upper")] <- c(0, max_theta) + if (length(grep("log_sigma_mu_devs", parnames[i])) > 0) df[i, c("lower", "upper")] <- c(-Inf, 6) + if (length(grep("log_obs_sigma", parnames[i])) > 0) df[i, c("lower", "upper")] <- c(-Inf, 5) + if (length(grep("log_tdf", parnames[i])) > 0) df[i, c("lower", "upper")] <- c(-Inf, 10) + if (length(grep("log_beta", parnames[i])) > 0) df[i, c("lower", "upper")] <- c(log(0.01), log(1e5)) + } + return(df) +} diff --git a/R/methods.R b/R/methods.R new file mode 100644 index 0000000..cc74944 --- /dev/null +++ b/R/methods.R @@ -0,0 +1,165 @@ +#' Extract the number of observations of a phenomix model. Modified from sdmTMB implementation +#' +#' @param object The fitted sdmTMB model object +#' @importFrom stats nobs +#' @export +#' @noRd +nobs.phenomix <- function(object, ...) { + length(object$data_list$y) +} + + +#' Extract the log likelihood of a phenomix model. Modified from sdmTMB implementation +#' +#' @param object The fitted phenomix model object +#' @importFrom stats logLik +#' @export +#' @noRd +logLik.phenomix <- function(object, ...) { + val <- -object$par$objective + + nobs <- nobs.phenomix(object) + df <- length(object$par) # fixed effects only + structure(val, + nobs = nobs, nall = nobs, df = df, + class = "logLik" + ) +} + +#' Extract the AIC of a phenomix model. Modified from sdmTMB implementation +#' +#' @param fit The fitted phenomix model +#' @param scale The scale (note used) +#' @param k Penalization parameter, defaults to 2 +#' @param ... Anything else +#' @noRd +#' +#' @export +extractAIC.phenomix <- function(fit, scale, k = 2, ...) { + L <- logLik(fit) + edf <- attr(L, "df") + return(data.frame("df" = edf, "AIC" = -2 * L + k * edf)) +} + +#' @importFrom stats predict +#' @export +fitted.phenomix <- function(object, ...) { + predict(object) +} + +#' Get predicted values from model object, copying glmmTMB 'fast' implementation +#' +#' @param object The fitted phenomix model +#' @param se.fit Boolean, whether to produce SEs defaults to FALSE +#' @param ... Extra parameters +#' @importFrom TMB MakeADFun sdreport +#' @noRd +#' +#' @export +predict.phenomix <- function (object, se.fit=FALSE, ...) { + ee <- environment(object$obj$fn) + + new_sdreport <- get_sdreport(object) + + indx <- which(names(new_sdreport$value)=="pred") + pred <- new_sdreport$value[indx] + + dat <- data.frame(y = ee$data$y, + yint = ee$data$yint, + x = ee$data$x, + years = ee$data$years, + pred = pred) + + if(se.fit==TRUE) { + dat$`se.fit` <- new_sdreport$sd[indx] + } + + return(dat) +} + +#' Get parameters from model object, copying glmmTMB 'fast' implementation +#' +#' @param object The fitted phenomix model +#' @importFrom TMB MakeADFun sdreport +#' @export +#' @examples +#' data(fishdist) +#' +#' # example of fitting fixed effects, no trends, no random effects +#' set.seed(1) +#' datalist <- create_data(fishdist[which(fishdist$year > 1970), ], +#' asymmetric_model = FALSE, +#' est_mu_re = FALSE, est_sigma_re = FALSE +#' ) +#' fit <- fit(datalist) +#' p <- pars(fit) +#' names(p) +#' @export +#' @keywords internal +pars <- function (object) { + new_sdreport <- get_sdreport(object) + #new_sdreport[["pdHess"]] = NULL + new_sdreport[["env"]] = NULL + #new_sdreport[["gradient.fixed"]] = NULL + return(new_sdreport) +} + + +#' Get fixed effects parameters from model object, copying glmmTMB 'fast' implementation +#' +#' @param object The fitted phenomix model +#' @param ... Additional arguments +#' @importFrom TMB MakeADFun sdreport +#' @importFrom nlme fixef +#' @export +fixef.phenomix <- function (object, ...) { + new_sdreport <- get_sdreport(object) + p <- data.frame("par" = names(new_sdreport$par.fixed), + "estimate" = new_sdreport$par.fixed, + "se" = sqrt(diag(new_sdreport$cov.fixed))) + return(p) +} + + +#' Get random effects parameters from model object, copying glmmTMB 'fast' implementation +#' +#' @param object The fitted phenomix model +#' @param ... Additional arguments +#' @importFrom TMB MakeADFun sdreport +#' @importFrom nlme ranef +#' @export +ranef.phenomix <- function (object, ...) { + new_sdreport <- get_sdreport(object) + p <- data.frame("par" = names(new_sdreport$par.random), + "estimate" = new_sdreport$par.random, + "se" = sqrt(new_sdreport$diag.cov.random)) + return(p) +} + + +#' Get sdreport for predictions / coefficients, copying glmmTMB 'fast' implementation +#' +#' @param object The fitted phenomix model +#' @keywords internal +get_sdreport = function(object) { + ee <- environment(object$obj$fn) + lp <- ee$last.par.best # best maximum likelihood estimate, similar to what glmmTMB uses + + names <- unique(names(lp)) + par_list <- ee$parList() + for(i in 1:length(names)) { + par_indx <- which(names(par_list) == names[i]) + par_list[[par_indx]] <- as.numeric(lp[which(names(lp) == names[i])]) + } + + new_tmb_obj <- MakeADFun( + data = object$data_list, + parameters = par_list, + map = object$tmb_map, + random = object$tmb_random, + DLL = "phenomix", + silent = TRUE + ) + new_sdreport <- sdreport(new_tmb_obj) + return(new_sdreport) +} diff --git a/R/zoidtmb-package.R b/R/phenomix-package.R similarity index 50% rename from R/zoidtmb-package.R rename to R/phenomix-package.R index ee7f281..df04248 100644 --- a/R/zoidtmb-package.R +++ b/R/phenomix-package.R @@ -1,9 +1,9 @@ -#' The 'zoidtmb' package. +#' The 'phenomix' package. #' #' @description A DESCRIPTION OF THE PACKAGE #' #' @docType package -#' @name zoidtmb-package +#' @name phenomix-package #' @keywords internal -#' @useDynLib zoidtmb, .registration = TRUE +#' @useDynLib phenomix, .registration = TRUE NULL diff --git a/R/plot_diagnostics.R b/R/plot_diagnostics.R new file mode 100644 index 0000000..0ca1356 --- /dev/null +++ b/R/plot_diagnostics.R @@ -0,0 +1,99 @@ +#' Plotting function to be called by user +#' +#' These functions make some basic plots for the user +#' +#' @param fitted A fitted model object +#' @param type A plot type for ggplot, either "timing" or "scatter" +#' @param logspace whether to plot the space in log space, defaults to TRUE +#' @import ggplot2 +#' @importFrom dplyr left_join +#' @export +plot_diagnostics <- function(fitted, type = "timing", logspace = TRUE) { + + # rebuild data frame + df <- predict(fitted) + + # join in mean + mus <- data.frame( + years = unique(df$years), + mu = fitted$sdreport$value[which(names(fitted$sdreport$value) == "mu")] + ) + df <- left_join(df, mus) + df$timing <- as.factor(ifelse(df$x < df$mu, "pre", "post")) + + if (type == "scatter") { + if (logspace == TRUE) { + if(fitted$data_list$family %in% c(2,3,5)) { + g <- ggplot(df, aes(pred, log(y), fill = timing, col = timing)) + + geom_point(alpha = 0.5) + + facet_wrap(~years, scales = "free") + + geom_abline(intercept = 0, slope = 1) + + xlab("Ln predicted") + + ylab("Ln obs") + } + if(fitted$data_list$family %in% c(1)) { + g <- ggplot(df, aes(log(pred), log(y), fill = timing, col = timing)) + + geom_point(alpha = 0.5) + + facet_wrap(~years, scales = "free") + + geom_abline(intercept = 0, slope = 1) + + xlab("Ln predicted") + + ylab("Ln obs") + } + } else { + if(fitted$data_list$family %in% c(2,3,5)) { + g <- ggplot(df, aes(exp(pred), y, fill = timing, col = timing)) + + geom_point(alpha = 0.5) + + facet_wrap(~years, scales = "free") + + geom_abline(intercept = 0, slope = 1) + + xlab("Ln predicted") + + ylab("Ln obs") + } + if(fitted$data_list$family %in% c(1)) { + g <- ggplot(df, aes(pred, y, fill = timing, col = timing)) + + geom_point(alpha = 0.5) + + facet_wrap(~years, scales = "free") + + geom_abline(intercept = 0, slope = 1) + + xlab("Predicted") + + ylab("Obs") + } + } + } + if (type == "timing") { + if (logspace == TRUE) { + if(fitted$data_list$family %in% c(2,3,5)) { + g <- ggplot(df, aes(x, pred, fill = timing, col = timing)) + + facet_wrap(~years, scales = "free") + + xlab("Calendar day") + + ylab("Ln pred and obs") + + geom_point(aes(x, log(y), fill = timing, col = timing), size = 1, alpha = 0.5) + + geom_line(col = "black") + } + if(fitted$data_list$family %in% c(1)) { + g <- ggplot(df, aes(x, log(pred), fill = timing, col = timing)) + + facet_wrap(~years, scales = "free") + + xlab("Calendar day") + + ylab("Ln pred and obs") + + geom_point(aes(x, log(y), fill = timing, col = timing), size = 1, alpha = 0.5) + + geom_line(col = "black") + } + } else { + if(fitted$data_list$family %in% c(2,3,5)) { + g <- ggplot(df, aes(x, exp(pred), fill = timing, col = timing)) + + facet_wrap(~years, scales = "free") + + xlab("Calendar day") + + ylab("Ln pred and obs") + + geom_point(aes(x, y, fill = timing, col = timing), size = 1, alpha = 0.5) + + geom_line(col = "black") + } + if(fitted$data_list$family %in% c(1)) { + g <- ggplot(df, aes(x, pred, fill = timing, col = timing)) + + facet_wrap(~years, scales = "free") + + xlab("Calendar day") + + ylab("Ln pred and obs") + + geom_point(aes(x, y, fill = timing, col = timing), size = 1, alpha = 0.5) + + geom_line(col = "black") + } + } + } + return(g) +} diff --git a/README.Rmd b/README.Rmd index 7ba9a0b..f4f7cec 100644 --- a/README.Rmd +++ b/README.Rmd @@ -14,34 +14,73 @@ knitr::opts_chunk$set( ) ``` -## zoidtmb +# phenomix +R package for fitting distributions to run timing data via maximum likelihood -`zoidtmb` is an implementation of the `zoid` package in TMB, using marginal maximum likelihood instead of Bayesian estimation. This may be particularly useful for datasets with large dimensions. +[![R build status](https://github.com/ericward-noaa/phenomix/workflows/R-CMD-check/badge.svg)](https://github.com/ericward-noaa/phenomix/actions) -The syntax of fitting models matches `zoid`, and can be done with +pkgdown site: [https://ericward-noaa.github.io/phenomix/](https://ericward-noaa.github.io/phenomix/) -```{r eval=FALSE} -fit <- fit_zoidTMB(formula = formula, # required formula - design_matrix = design_matrix, # optional covariates - data_matrix = data.matrix, # required data matrix - overdispersion = FALSE, # optional estimated overdispersion - overdispersion_sd = 5, # optional penalty/prior on overdispersion - prior_sd = NA) # optional prior/penalty on fixed effects +## Installation + +You can install phenomix with: + +```{r, eval=TRUE} +remotes::install_github("ericward-noaa/phenomix",build_vignettes = TRUE) +``` + +Load libraries +```{r} +library(phenomix) +library(ggplot2) +``` + +## Functions + +The package pheomix provides a suite of curve fitting to describe data that may be generated from a process when distributions in time might be concentrated (from fisheries, this occurs with counts over time of salmon returning from the ocean to spawn or juvenile fish emigrating from streams to the ocean). + +```{r echo=FALSE} +set.seed(123) +df = expand.grid(year = 1:10, doy=100:250) +mus = rnorm(10, 175, 10) +thetas = runif(10, 5, 8) +sigmas = runif(10, 7, 10) +df$y = dnorm(df$doy, mus[df$year], sigmas[df$year], log=TRUE) + thetas[df$year] +df$year = as.factor(df$year) +df$data = rnorm(nrow(df), df$y, 0.2) +``` + +```{r fig.cap="Predicted (black line) and observed counts (red dots) for hypothetical dataset. Multiple observations may exist for some days, or no observations on others.", echo=FALSE} +ggplot(dplyr::filter(df,year==1), aes(doy,exp(y))) + + geom_line() + xlab("Calendar day") + ylab("Count") + + geom_point(aes(doy,exp(data)),col="red") + theme_bw() ``` -Parameter estimates and uncertainty can be extracted from -```{r eval=FALSE} -fit$sdreport +In a given year, the curve might be described by a symmetric or asymmetric Gaussian or Student-t distribution (shown here in log-scale on the y-axis). Questions of interest might be +- are the means (x-axis) shifting through time? +- are the variances shifting through time? +- does the model support a symmetric or asymmetric distribution? + +```{r echo=FALSE} +ggplot(df, aes(doy,exp(y),col=year,group=year)) + + geom_line() + xlab("Calendar day") + ylab("Count") + theme_bw() ``` -Derived quantities (means) are provided with `fit$mu` and uncertainties `fit$mu_se`. +## Examples + +The main functions are `create_data()` and `fit()`. See `?create_data` and `?fit` for additional details and examples. A vignette includes additional detail, and examples of several models as well as function arguments available [https://ericward-noaa.github.io/phenomix/](https://ericward-noaa.github.io/phenomix/). + +## References + +For description of fisheries applications of asymmetric models: -For additional details, see the documentation for `zoid`, (https://github.com/nwfsc-cb/zoid/tree/main) +Methot, R.D. 2000. Technical description of the stock synthesis assessment program. U.S. Dept. Commer., NOAA Tech. Memo. NMFS-NWFSC-43, 46 p. [link](https://repository.library.noaa.gov/view/noaa/3172/noaa_3172_DS1.pdf) -## How to cite +For statistical background of asymmetric models: -Jensen, Alexander J., Ryan P. Kelly, Eric C. Anderson, William H. Satterthwaite, Andrew Olaf Shelton, and Eric J. Ward. 2022. “ Introducing Zoid: A Mixture Model and R Package for Modeling Proportional Data with Zeros and Ones in Ecology.” Ecology 103(11): e3804. https://doi.org/10.1002/ecy.3804 +Rubio, F.J. and Steel, M.F.J. 2020. The family of two-piece distributions. Significance, 17(1) 120--13. [link](https://rss.onlinelibrary.wiley.com/doi/full/10.1111/j.1740-9713.2020.01352.x) +Wallis, K.F. 2014. The two-piece normal, binormal, or double Gaussian distribution: Its origin and rediscoveries. Statistical Science, 29(1), 106--112. [link](https://arxiv.org/abs/1405.4995) ## NOAA Disclaimer diff --git a/data/chinook.rda b/data/chinook.rda deleted file mode 100644 index 60bfee6..0000000 Binary files a/data/chinook.rda and /dev/null differ diff --git a/data/chum.rda b/data/chum.rda new file mode 100644 index 0000000..e79bc61 Binary files /dev/null and b/data/chum.rda differ diff --git a/data/coddiet.rda b/data/coddiet.rda deleted file mode 100644 index 37d0010..0000000 Binary files a/data/coddiet.rda and /dev/null differ diff --git a/data/fishdist.rda b/data/fishdist.rda new file mode 100644 index 0000000..2f1ad31 Binary files /dev/null and b/data/fishdist.rda differ diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 0000000..f58d70c --- /dev/null +++ b/docs/404.html @@ -0,0 +1,122 @@ + + + + + + + +Page not found (404) • phenomix + + + + + + + + + + + + +
+
+ + + + +
+
+ + +Content not found. Please use links in the navbar. + +
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.7.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/a1_examples.html b/docs/articles/a1_examples.html new file mode 100644 index 0000000..735c19a --- /dev/null +++ b/docs/articles/a1_examples.html @@ -0,0 +1,447 @@ + + + + + + + +Fitting time varying phenology models with the phenomix package • phenomix + + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +
+library(ggplot2)
+library(phenomix)
+#> Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
+#> TMB was built with Matrix version 1.5.4
+#> Current Matrix version is 1.5.4.1
+#> Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
+library(dplyr)
+library(TMB)
+
+

Overview +

+

The phenomix R package is designed to be a robust and +flexible tool for modeling phenological change. The name of the package +is in reference to modeling run timing data for salmon, but more +generally this framework can be applied to any kind of phenology data – +timing of leaf-out or flowering in plants, breeding bird surveys, etc. +Observations may be collected across multiple years, or for a single +year, and may be discrete or continuous. For a given time step, these +data may look like this:

+
+set.seed(123)
+df = data.frame(x = seq(110,150,1))
+df$y = dnorm(df$x, mean = 130, 10) * 10000
+df$obs = rnorm(nrow(df), df$y, 30)
+ggplot(df, aes(x, obs)) + geom_point() + 
+  geom_smooth() + 
+  theme_bw() + 
+  xlab("Day of year") + 
+  ylab("Observation")
+

+

For demonstration purposes, we incuded an example dataset, based on +run timing data for Pacific salmon.

+
+glimpse(fishdist)
+#> Rows: 1,005
+#> Columns: 3
+#> $ year   <dbl> 1960, 1960, 1960, 1960, 1960, 1960, 1960, 1960, 1960, 1960, 196…
+#> $ number <int> 5, 5, 5, 3, 9, 4, 3, 3, 5, 3, 3, 7, 2, 9, 21, 11, 75, 368, 385,…
+#> $ doy    <int> 95, 113, 120, 121, 122, 123, 124, 126, 127, 128, 130, 131, 132,…
+
+

Manipulating data for estimation +

+

The main data processing function in the package is called +create_data, which builds the data and model arguments to +be used for fitting. Note the names of the variables in our dataset,

+
+names(fishdist)
+#> [1] "year"   "number" "doy"
+

We’ll start with the default arguments for the function, before going +into detail about what they all mean. The only argument that we’ve +initially changed from the default is using +asymmetric_model = FALSE to fit the symmetric model.

+

First, if we’re fitting a model with covariates affecting the mean +and standard deviation of the phenological response, we need to create a +data frame indexed by time step.

+
+cov_dat = data.frame(nyear = unique(fishdist$year))
+# rescale year -- could also standardize with scale()
+cov_dat$nyear = cov_dat$nyear - min(cov_dat$nyear) 
+
+datalist = create_data(fishdist, 
+  min_number=0, 
+  variable = "number", 
+  time="year", 
+  date = "doy",
+  asymmetric_model = FALSE, 
+  mu = ~ nyear,
+  sigma = ~ nyear,
+  covar_data = cov_dat,
+  est_sigma_re = TRUE,
+  est_mu_re = TRUE,
+  tail_model = "gaussian")
+

The min_number argument represents an optional threshold +below which data is ignored. The variable argument is a +character identifying the name of the response variable in the data +frame. Similarly, the time and date arguments +specify the labels of the temporal (e.g. year) and seasonal variables +(day of year).

+

The remaining arguments to the function concern model fitting. The +phenomix package can fit asymmetric or symmetric models to +the distribution data (whether the left side of the curve is the same +shape as the right) and defaults to FALSE. The mean and standard +deviations that control the distributions are allowed to vary over time +(as fixed or random effects) but we can also covariates in the mean and +standard deviations (via formulas). Mathematically the mean location for +a model with random effects is \(u_y \sim +Normal(u_0, u_{\sigma})\), where \(u_0\) is the global mean and \(u_{\sigma}\) is the standard deviation. +Adding a trend in normal space, this becomes \(u_y \sim Normal(u_0 + b_u*y, u_{\sigma})\), +where the parameter \(b_u\) controlls +the trend. To keep the variance parameters controlling the tails of the +run timing distribution positive, we estimate those random effects in +log-space. This is specified as \(\sigma_y = +exp(\sigma_0 + b_{\sigma}*y + \delta_{y})\), and \(\delta_{y} \sim Normal(0, +\sigma_{\sigma})\). Trends in both the mean and variance are +estimated by default, and controlled with the +est_sigma_trend and est_mu_trend arguments. As +described with the equations above, The trends are log-linear for the +standard deviation parameters, but in normal space for the mean +parameters.

+

Finally, the tails of the response curves may be estimated via +fitting a Gaussian (normal) distribution, Student-t distribution, or generalized +normal distribution. By default, the Student-t tails are estimated, +and this parameter is controlled with the tail_model +argument (“gaussian”, “student_t”, “gnorm”).

+

Last, we can model the observed count data with a number of different +distributions, and set this with the family argument. +Currently supported distributions include the lognorma (default, +“lognormal”), Poisson (“poisson”), Negative Binomial (“negbin”), +Binomial (“binomial”), and Gaussian (“gaussian”). The lognormal, Poisson +and Negative Binomial models include a log link; Gaussian model includes +an identity link, and Binomial model includes a logit link.

+
+
+

Fitting the model +

+

Next, we’ll use the fit function to do maximum +likelihood estimation in TMB. Additional arguments can be found in the +help file, but the most important one is the list of data created above +with create_data.

+
+set.seed(1)
+fitted = fit(datalist)
+

We don’t get any warnings out of the estimation, but let’s look at +the sdreport in more detail. fitted is of the +phenomix class and contains the following

+
+names(fitted)
+#> [1] "obj"            "init_vals"      "data_list"      "pars"          
+#> [5] "sdreport"       "tmb_map"        "tmb_random"     "tmb_parameters"
+

The init_values are the initial parameter values where +optimization was started from, and the data_list represents +the raw data. The pars component contains information +relative to convergence and iterations, including the convergence code +(0 = successful convergence),

+
+fitted$pars$convergence
+#> [1] 0
+

This looks like things are converging. But sometimes relative +convergence (code = 4) won’t throw warnings. We can also look at the +variance estimates, which also are estimated (a good sign things are +converged!). These are all included in the sdreport – the +parameters and their standard errors are accessible with

+
+sdrep_df = data.frame("par"=names(fitted$sdreport$value),
+  "value"=fitted$sdreport$value, "sd"=fitted$sdreport$sd)
+head(sdrep_df)
+#>     par    value        sd
+#> 1 theta 8.399995 0.2234016
+#> 2 theta 8.638915 0.2258454
+#> 3 theta 8.467173 0.2068679
+#> 4 theta 8.154405 0.1577986
+#> 5 theta 8.111876 0.1897451
+#> 6 theta 8.243943 0.2206906
+

If for some reason, these need to be re-generated, the +obj object can be fed into

+
+TMB::sdreport(fitted$obj)
+
+
+

Getting coefficients +

+

There are three ways we can get coefficients and uncertainty +estimates out of model objects. First, we can use the standard +fixef and ranef arguments to return fixed and +random coefficients, respectively

+
+fixef(fitted)
+
+ranef(fitted)
+

Calling the pars function is another way; this returns a +list of objects including the full variance-covariance matrix of fixed +effects, and gradient.

+
+pars(fitted)
+#> sdreport(.) result
+#>                       Estimate Std. Error
+#> log_sigma1_sd       1.06408861 0.17047802
+#> theta               8.39999518 0.22340156
+#> theta               8.63891509 0.22584537
+#> theta               8.46717349 0.20686792
+#> theta               8.15440479 0.15779864
+#> theta               8.11187622 0.18974509
+#> theta               8.24394282 0.22069056
+#> theta               8.16297796 0.20627479
+#> theta               8.18093911 0.18399646
+#> theta               8.44583428 0.18735588
+#> theta               8.11022595 0.15356321
+#> theta               7.82421118 0.16969770
+#> theta               8.19871109 0.19105389
+#> theta               8.24277550 0.20384630
+#> theta               8.29030015 0.20386679
+#> theta               8.40639953 0.18200414
+#> theta               8.14420040 0.19208359
+#> theta               7.87879972 0.17770564
+#> theta               8.08659425 0.18277170
+#> theta               8.10094697 0.16879708
+#> theta               8.10021210 0.19721592
+#> theta               7.88135896 0.17350336
+#> log_sigma_mu_devs   1.54231647 0.17917533
+#> log_obs_sigma       0.11308948 0.02285357
+#> b_mu              144.09324082 2.11794216
+#> b_mu               -0.12661799 0.18018190
+#> b_sig1             10.63160617 1.27793188
+#> b_sig1             -0.02649122 0.10913118
+#> Maximum gradient component: 0.000472572
+
+
+

Getting predicted values +

+

Predicted values can be returned with a call to predict. +The se.fit argument is optional,

+
+predict(fitted, se.fit=TRUE)
+
+
+

Plotting results +

+

Using our fitted object, there are some basic plotting functions +included for diagnostics. Let’s plot run timing over time, by year:

+
+g = plot_diagnostics(fitted, type="timing", logspace=TRUE)
+#> Joining with `by = join_by(years)`
+g
+
+Fitted symmetric model with tails from a  Gaussian distribution

+Fitted symmetric model with tails from a Gaussian distribution +

+
+

The object returned by plot_diagnostics is just a ggplot +object, so additional arguments or themes can be added with ++ statements. The plot can be shown in normal space by +setting logspace=FALSE. These run timing plots are the +default, but additonal scatterplots of observed vs predicted values can +be shown by setting type=scatter.

+
+
+

Additional examples +

+

First, we can try to fit the same model, but using an asymmetric +t-distribution.

+
+set.seed(2)
+
+datalist = create_data(fishdist, 
+  min_number=0, 
+  variable = "number", 
+  time="year", 
+  date = "doy",
+  asymmetric_model = FALSE, 
+  mu = ~ nyear,
+  sigma = ~ nyear,
+  covar_data = cov_dat,
+  est_sigma_re = TRUE,
+  est_mu_re = TRUE,
+  tail_model = "student_t")
+
+fitted_t = fit(datalist)
+
+plot_diagnostics(fitted_t)
+#> Joining with `by = join_by(years)`
+
+Fitted asymmetric model with heavy tails from a t-distribution

+Fitted asymmetric model with heavy tails from a t-distribution +

+
+

We can also compare the two models using AIC. Note we’re not +comparing random effects structures (where we’d need REML), and that +this is an approximation (because only fixed effects parameters are +included). This comparison shows that maybe not surprisingly, the +Student-t model is more parsimoinious than the Gaussian tailed model +(aic_2 < aic_1).

+
+aic_1 = extractAIC(fitted)$AIC
+aic_1
+#> [1] 3226.133
+
+aic_2 = extractAIC(fitted_t)$AIC
+aic_2
+#> [1] 2448.533
+

Second, we can we can try to fit the same model, but using a +generalized normal distribution. This distribution has a plateau or +‘flat top’. For examples, see the gnorm package on CRAN here +or Wikipedia +page.

+
+set.seed(5)
+
+datalist = create_data(fishdist, 
+  min_number=0, 
+  variable = "number", 
+  time="year", 
+  date = "doy",
+  asymmetric_model = FALSE, 
+  tail_model = "gnorm")
+fitted = fit(datalist, limits = TRUE)
+
+
+

Diagnosing lack of convergence +

+

In addition to the warning message about the Hessian not being +positive definite, the NaNs in the sd column are a sure +sign that things aren’t converging.

+

Fixes to improve convergence may be to start from a different set of +starting values (try passing inits into the +fit function), or placing stricter bounds on convergence +statistics. Or it may be that the model isn’t a good fit to the data. +Specifically, the convergence criterion can be modified with the +control argument – which is passed into +stats::nlminb. One parameter in this list that can be +changed is rel.tol, e.g.

+
+fit = fitted(..., control=list(rel.tol = 1.0e-12, 
+  eval.max=4000, iter.max=4000))
+
+
+
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.7.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/a1_examples_files/accessible-code-block-0.0.1/empty-anchor.js b/docs/articles/a1_examples_files/accessible-code-block-0.0.1/empty-anchor.js new file mode 100644 index 0000000..ca349fd --- /dev/null +++ b/docs/articles/a1_examples_files/accessible-code-block-0.0.1/empty-anchor.js @@ -0,0 +1,15 @@ +// Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> +// v0.0.1 +// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. + +document.addEventListener('DOMContentLoaded', function() { + const codeList = document.getElementsByClassName("sourceCode"); + for (var i = 0; i < codeList.length; i++) { + var linkList = codeList[i].getElementsByTagName('a'); + for (var j = 0; j < linkList.length; j++) { + if (linkList[j].innerHTML === "") { + linkList[j].setAttribute('aria-hidden', 'true'); + } + } + } +}); diff --git a/docs/articles/a1_examples_files/figure-html/unnamed-chunk-1-1.png b/docs/articles/a1_examples_files/figure-html/unnamed-chunk-1-1.png new file mode 100644 index 0000000..319912f Binary files /dev/null and b/docs/articles/a1_examples_files/figure-html/unnamed-chunk-1-1.png differ diff --git a/docs/articles/a1_examples_files/figure-html/unnamed-chunk-11-1.png b/docs/articles/a1_examples_files/figure-html/unnamed-chunk-11-1.png new file mode 100644 index 0000000..03134ce Binary files /dev/null and b/docs/articles/a1_examples_files/figure-html/unnamed-chunk-11-1.png differ diff --git a/docs/articles/a1_examples_files/figure-html/unnamed-chunk-13-1.png b/docs/articles/a1_examples_files/figure-html/unnamed-chunk-13-1.png new file mode 100644 index 0000000..bb2384b Binary files /dev/null and b/docs/articles/a1_examples_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/articles/a1_examples_files/figure-html/unnamed-chunk-14-1.png b/docs/articles/a1_examples_files/figure-html/unnamed-chunk-14-1.png new file mode 100644 index 0000000..03134ce Binary files /dev/null and b/docs/articles/a1_examples_files/figure-html/unnamed-chunk-14-1.png differ diff --git a/docs/articles/a1_examples_files/figure-html/unnamed-chunk-16-1.png b/docs/articles/a1_examples_files/figure-html/unnamed-chunk-16-1.png new file mode 100644 index 0000000..fe86385 Binary files /dev/null and b/docs/articles/a1_examples_files/figure-html/unnamed-chunk-16-1.png differ diff --git a/docs/articles/a2_covariates.html b/docs/articles/a2_covariates.html new file mode 100644 index 0000000..01a2ac8 --- /dev/null +++ b/docs/articles/a2_covariates.html @@ -0,0 +1,206 @@ + + + + + + + +Including covariates • phenomix + + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +
+library(ggplot2)
+library(phenomix)
+#> Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
+#> TMB was built with Matrix version 1.5.4
+#> Current Matrix version is 1.5.4.1
+#> Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
+library(dplyr)
+library(TMB)
+
+

Covariates +

+

Covariates may be included in phenomix models in several +ways. First, we allow them to affect the mean,

+

\[\mu = \mathbf{X}\mathbf{B} + +\mathbf{Z}\mathbf{\alpha}\]

+

where fixed effect covariates \(\mathbf{X}\) are linked to the mean via +coefficients \(\mathbf{B}\) and random +effects \(\mathbf{\alpha}\) are linked +to the mean via design matrix \(\mathbf{Z}\). For simplicity, we only +assume that the random effects are IID in time, e.g. \(\mu_{\delta} \sim Normal(0,\sigma_{\mu})\). +The random effects are optional of course, and may be turned on / off +with the est_mu_re argument.

+

Fixed effects for the mean at minimum include a global intercept, but +may include any other covariates via the formula interface. The formula +is included as an argument to the fit() function, and +defaults to mu ~ 1, or an intercept only model. Including +temperature could be done as mu ~ temp, which also includes +an intercept.

+

Importantly, if covariates are to be included in the mean or standard +deviation, a second data frame covar_data must be included +that has a unique covariate value for each time slice of data +(e.g. year). For example, in the example above with temperature, covar +data would have to look something like this:

+
+# temp is a dummy variable here representing annual deviations in temperature, 
+# but you could replace it here with your own data
+covar_data = data.frame(year = 2000:2020, 
+                        temp = rnorm(21,mean=0,sd=1))
+

Similarly, we could include a linear trend in the mean +mu ~ nyear, where nyear is a numeric version of year. One +way to pass in the data would be simply making nyear +redundant with year,

+
+covar_data = data.frame(year = 2000:2020)
+covar_data$nyear <- covar_data$year
+

However, we’ve found that approach can be slightly unstable. Instead, +we can center or scale the numeric year variable. Here, we’ll scale it +to start at 1.

+
+covar_data = data.frame(year = 2000:2020)
+covar_data$nyear <- covar_data$year - min(covar_data$year) + 1
+

In addition to the mean, we allow covariates to affect the standard +deviation. The overall approach is exactly as above with the mean, +however a couple points are worth highlighting:

+
    +
  • With the mean, we did not use a link function. With the standard +deviation(s) we assume the covariate effects are estimated in log-space, +to keep standard deviations positive.
  • +
+

\[log(\sigma_{1}) = \mathbf{X}\mathbf{B} + +\mathbf{Z}\mathbf{\alpha}\] * For asymmetric models, we assume +that the same covariates act on both the left and right sides of the +peak (in other words, we do not allow separate formulas for each +side)

+
    +
  • The relationship between covariates and the variances is +specified through a separate formula, sigma ~ 1

  • +
  • For asymmetric models, we estimate different coefficients on each +side. So for a model with sigma~temp we could +estimate

  • +
  • Random effects in the standard deviation are turned on/off with +the est_sigma_re argument in +create_data()

  • +
+
+
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.7.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/a2_covariates_files/accessible-code-block-0.0.1/empty-anchor.js b/docs/articles/a2_covariates_files/accessible-code-block-0.0.1/empty-anchor.js new file mode 100644 index 0000000..ca349fd --- /dev/null +++ b/docs/articles/a2_covariates_files/accessible-code-block-0.0.1/empty-anchor.js @@ -0,0 +1,15 @@ +// Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> +// v0.0.1 +// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. + +document.addEventListener('DOMContentLoaded', function() { + const codeList = document.getElementsByClassName("sourceCode"); + for (var i = 0; i < codeList.length; i++) { + var linkList = codeList[i].getElementsByTagName('a'); + for (var j = 0; j < linkList.length; j++) { + if (linkList[j].innerHTML === "") { + linkList[j].setAttribute('aria-hidden', 'true'); + } + } + } +}); diff --git a/docs/articles/a3_troubleshooting.html b/docs/articles/a3_troubleshooting.html new file mode 100644 index 0000000..dabf058 --- /dev/null +++ b/docs/articles/a3_troubleshooting.html @@ -0,0 +1,289 @@ + + + + + + + +Troubleshooting • phenomix + + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +
+library(ggplot2)
+library(phenomix)
+#> Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
+#> TMB was built with Matrix version 1.5.4
+#> Current Matrix version is 1.5.4.1
+#> Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
+library(dplyr)
+library(TMB)
+
+

Troubleshooting +

+

There are a number of reasons why phenomix models may +not converge. First, it’s likely that all models won’t converge for a +single dataset. As an example, we can create some data representing an +asymmetric distribution, with gaussian tails (no extremes) and random +variability by year (both in the mean and standard deviations).

+
+# create 20 years of data
+set.seed(123)
+df <- expand.grid("doy" = 100:200, "year" = 1:20)
+df$mu <- rnorm(unique(df$year), 150, 5)[df$year]
+df$sig1 <- rnorm(unique(df$year), 30, 5)[df$year]
+df$sig2 <- rnorm(unique(df$year), 30, 5)[df$year]
+df$sig <- ifelse(df$doy < df$mu, df$sig1, df$sig2)
+df$pred <- dnorm(df$doy, df$mu, sd = df$sig, log = TRUE)
+df$pred <- exp(df$pred + 8)
+df$number <- round(rnorm(nrow(df), df$pred, 0.1))
+

The model with student-t errors that is fit to these data

+
+set.seed(1)
+fit_t <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1,
+                          tail_model = "student_t"),
+   silent = TRUE,
+   control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7)
+)
+

But the model with the generalized normal tails struggles.

+
+fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1,
+                          tail_model = "gnorm"),
+   silent = TRUE,limits=TRUE,
+   control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7)
+)
+

This is an example where simplifying helps greatly. The model with +generalized normal tails will converge when the random effects in the +mean and variance are turned off, but has a hard time estimating the +shape parameters with them on.

+
+fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1,
+                          tail_model = "gnorm",
+                          est_mu_re = FALSE,
+                          est_sigma_re = FALSE),
+   silent = TRUE,
+   control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7)
+)
+
+

Using initial values +

+

Sometimes, it helps to specify the initial values for a model. We can +do that in a couple ways, but it may be easiest to first construct a +model to see what initial values are needed.

+

Using the above example, we can specify +fit_model = FALSE.

+
+fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1,
+                          tail_model = "gnorm",
+                          est_mu_re = FALSE,
+                          est_sigma_re = FALSE),
+   silent = TRUE,
+   control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7),
+   fit_model = FALSE
+)
+

The $init_vals contains the initial values that would be +used to fit the model.

+
+fit_gnorm$init_vals
+#>         theta         theta         theta         theta         theta 
+#>     2.0308397     2.2015325     3.5295172     3.1596658     2.9785872 
+#>         theta         theta         theta         theta         theta 
+#>     0.9586761     1.9551192     3.1481328     2.4014047     2.4826709 
+#>         theta         theta         theta         theta         theta 
+#>     4.3289806     3.4550282     2.2355953     2.8107439     2.9945364 
+#>         theta         theta         theta         theta         theta 
+#>     2.5991447     2.3153863     3.2276358     4.0206346     3.5062919 
+#>    log_beta_1 log_obs_sigma          b_mu        b_sig1        b_sig2 
+#>     0.1000000     0.0000000   150.0000000     1.0000000     1.0000000
+

Suppose we wanted to start at a higher value for +log_beta_1. We could change that with

+
+inits = fit_gnorm$init_vals
+inits["log_beta_1"] = 2
+

Now we can try re-fitting the model,

+
+fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1,
+                          tail_model = "gnorm",
+                          est_mu_re = FALSE,
+                          est_sigma_re = FALSE),
+   silent = TRUE,
+   control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7),
+   fit_model = TRUE
+)
+
+
+

Specifying limits +

+

There are two ways to specify limits on estimated parameters. First, +we include hard coded limits that may be turned on with +limits = TRUE, e.g.

+
+fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1,
+                          tail_model = "gnorm",
+                          est_mu_re = FALSE,
+                          est_sigma_re = FALSE),
+   silent = TRUE, 
+   limits = TRUE,
+   control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7),
+   fit_model = TRUE
+)
+

However these limits may not be reasonable for every situation. We +can also change these manually, but including a list of limits based on +the parameters being estimated. We do this using the same approach +above, with specifying initial values. First we construct the model but +don’t do estimation,

+
+fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1,
+                          tail_model = "gnorm",
+                          est_mu_re = FALSE,
+                          est_sigma_re = FALSE),
+   silent = TRUE,
+   control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7),
+   fit_model = FALSE
+)
+

The parameters need to be in the same order as +init_vals, so we can try something like this:

+
+lower = c(rep(0, 20), # lower for theta
+          0.05, # lower log_beta_1
+          -3, # lower log_obs_sigma
+          140, # lower on mu
+          15,# lower on b_sig1
+          15)# lower on b_sig2
+upper = c(rep(10, 20),# upper for theta
+          log(10),# upper log_beta_1
+          0, # upper log_obs_sigma
+          160,# upper on mu
+          40,# upper on b_sig1
+          40)# upper on b_sig2
+

Then we can pass these in as a list,

+
+fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1,
+                          tail_model = "gnorm",
+                          est_mu_re = FALSE,
+                          est_sigma_re = FALSE),
+   silent = TRUE, 
+   limits = list(lower = lower, upper = upper),
+   control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7)
+)
+
+
+
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.7.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/a3_troubleshooting_files/accessible-code-block-0.0.1/empty-anchor.js b/docs/articles/a3_troubleshooting_files/accessible-code-block-0.0.1/empty-anchor.js new file mode 100644 index 0000000..ca349fd --- /dev/null +++ b/docs/articles/a3_troubleshooting_files/accessible-code-block-0.0.1/empty-anchor.js @@ -0,0 +1,15 @@ +// Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> +// v0.0.1 +// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. + +document.addEventListener('DOMContentLoaded', function() { + const codeList = document.getElementsByClassName("sourceCode"); + for (var i = 0; i < codeList.length; i++) { + var linkList = codeList[i].getElementsByTagName('a'); + for (var j = 0; j < linkList.length; j++) { + if (linkList[j].innerHTML === "") { + linkList[j].setAttribute('aria-hidden', 'true'); + } + } + } +}); diff --git a/docs/articles/a4_derived.html b/docs/articles/a4_derived.html new file mode 100644 index 0000000..b5e41c0 --- /dev/null +++ b/docs/articles/a4_derived.html @@ -0,0 +1,218 @@ + + + + + + + +Extracting estimated and derived parameters • phenomix + + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +
+library(ggplot2)
+library(phenomix)
+#> Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
+#> TMB was built with Matrix version 1.5.4
+#> Current Matrix version is 1.5.4.1
+#> Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
+library(dplyr)
+library(TMB)
+

We will start with the original simple model fit to the +fishdist data,

+
+data("fishdist")
+cov_dat = data.frame(nyear = unique(fishdist$year))
+# rescale year -- could also standardize with scale()
+cov_dat$nyear = cov_dat$nyear - min(cov_dat$nyear) 
+
+datalist = create_data(fishdist, 
+  min_number=0, 
+  variable = "number", 
+  time="year", 
+  date = "doy",
+  asymmetric_model = FALSE, 
+  mu = ~ nyear,
+  sigma = ~ nyear,
+  covar_data = cov_dat,
+  est_sigma_re = TRUE,
+  est_mu_re = TRUE,
+  tail_model = "gaussian")
+
+set.seed(1)
+fitted = fit(datalist)
+
+

Extracting parameters manually +

+

The first option for extracting parameters manually is via the +sdreport of TMB objects. This includes both estimated and +derived quantities. You can see the names of what’s available by looking +at

+
+fitted$sdreport$value
+

or perhaps look at these in tabular form,

+
+table(names(fitted$sdreport$value))
+#> 
+#>         b_mu       b_sig1      lower25           mu    obs_sigma         pred 
+#>            2            2           21           21            1         1005 
+#>        range       sigma1        theta      upper75 year_log_tot     year_tot 
+#>           21           21           21           21           21           21
+

So if you wanted to extract the mean phenological peak in each time +step (named mu), you could look at the indices of the +names

+
+idx <- which(names(fitted$sdreport$value) == "mu")
+

And then use the respective means and sds corresponding to those +indices,

+
+m <- fitted$sdreport$value[idx]
+s <- fitted$sdreport$sd[idx]
+
+
+

Helper functions +

+

We’ve also included some helper functions to extract these more +quickly yourself. These all take in a fitted model object, and can be +called as

+
+m <- extract_means(fitted) # means
+s <- extract_sigma(fitted) # sigmas, describing tails
+t <- extract_theta(fitted) # theta parameters (scaling)
+u <- extract_upper(fitted) # upper quartile
+m <- extract_lower(fitted) # lower quartile
+

Or if you want to extact all of the above, you can use

+
+e <- extract_all(fitted)
+
+
+

Annual summaries +

+

In addition to the parameters above, we can extract the derived +annual totals (predicted across all days 1-365). These are extracted +with the following extract_annual function. Note that the +second parameter controls whether the estimates are in normal or log +space.

+
+est_log <- extract_annual(fitted, log=TRUE)
+est_normal <- extract_annual(fitted, log=FALSE)
+
+
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.7.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/examples.html b/docs/articles/examples.html new file mode 100644 index 0000000..ed437db --- /dev/null +++ b/docs/articles/examples.html @@ -0,0 +1,291 @@ + + + + + + + +Fitting time varying phenology models with the phenomix package • phenomix + + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +
+library(ggplot2)
+library(phenomix)
+library(dplyr)
+library(TMB)
+#> Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
+#> TMB was built with Matrix version 1.3.4
+#> Current Matrix version is 1.4.0
+#> Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
+
+

Overview +

+

The phenomix R package is designed to be a robust and flexible tool for modeling phenological change. The name of the package is in reference to modeling run timing data for salmon, but more generally this framework can be applied to any kind of phenology data – timing of leaf-out or flowering in plants, breeding bird surveys, etc. Observations may be collected across multiple years, or for a single year, and may be discrete or continuous. For a given time step, these data may look like this:

+
+set.seed(123)
+df = data.frame(x = seq(110,150,1))
+df$y = dnorm(df$x, mean = 130, 10) * 10000
+df$obs = rnorm(nrow(df), df$y, 30)
+ggplot(df, aes(x, obs)) + geom_point() + 
+  geom_smooth() + 
+  theme_bw() + 
+  xlab("Day of year") + 
+  ylab("Observation")
+

+

For demonstration purposes, we incuded an example dataset, based on run timing data for Pacific salmon.

+
+glimpse(fishdist)
+#> Rows: 1,005
+#> Columns: 3
+#> $ year   <dbl> 1960, 1960, 1960, 1960, 1960, 1960, 1960, 1960, 1960, 1960, 196…
+#> $ number <int> 5, 5, 5, 3, 9, 4, 3, 3, 5, 3, 3, 7, 2, 9, 21, 11, 75, 368, 385,…
+#> $ doy    <int> 95, 113, 120, 121, 122, 123, 124, 126, 127, 128, 130, 131, 132,…
+
+

Manipulating data for estimation +

+

The main data processing function in the package is called create_data, which builds the data and model arguments to be used for fitting. Note the names of the variables in our dataset,

+
+names(fishdist)
+#> [1] "year"   "number" "doy"
+

We’ll start with the default arguments for the function, before going into detail about what they all mean. The only argument that we’ve initially changed from the default is using asymmetric_model = FALSE to fit the symmetric model.

+

First, if we’re fitting a model with covariates affecting the mean and standard deviation of the phenological response, we need to create a data frame indexed by time step.

+
+cov_dat = data.frame(nyear = unique(fishdist$year))
+# rescale year -- could also standardize with scale()
+cov_dat$nyear = cov_dat$nyear - min(cov_dat$nyear) 
+
+datalist = create_data(fishdist, 
+  min_number=0, 
+  variable = "number", 
+  time="year", 
+  date = "doy",
+  asymmetric_model = FALSE, 
+  mu = ~ nyear,
+  sigma = ~ nyear,
+  covar_data = cov_dat,
+  est_sigma_re = TRUE,
+  est_mu_re = TRUE,
+  tail_model = "gaussian", 
+  family = "gaussian")
+

The min_number argument represents an optional threshold below which data is ignored. The variable argument is a character identifying the name of the response variable in the data frame. Similarly, the time and date arguments specify the labels of the temporal (e.g. year) and seasonal variables (day of year).

+

The remaining arguments to the function concern model fitting. The phenomix package can fit asymmetric or symmetric models to the distribution data (whether the left side of the curve is the same shape as the right) and defaults to FALSE. The mean and standard deviations that control the distributions are allowed to vary over time (as fixed or random effects) but we can also covariates in the mean and standard deviations (via formulas). Mathematically the mean location for a model with random effects is \(u_y \sim Normal(u_0, u_{\sigma})\), where \(u_0\) is the global mean and \(u_{\sigma}\) is the standard deviation. Adding a trend in normal space, this becomes \(u_y \sim Normal(u_0 + b_u*y, u_{\sigma})\), where the parameter \(b_u\) controlls the trend. To keep the variance parameters controlling the tails of the run timing distribution positive, we estimate those random effects in log-space. This is specified as \(\sigma_y = exp(\sigma_0 + b_{\sigma}*y + \delta_{y})\), and \(\delta_{y} \sim Normal(0, \sigma_{\sigma})\). Trends in both the mean and variance are estimated by default, and controlled with the est_sigma_trend and est_mu_trend arguments. As described with the equations above, The trends are log-linear for the standard deviation parameters, but in normal space for the mean parameters.

+

Finally, the tails of the response curves may be estimated via fitting a Gaussian (normal) distribution, Student-t distribution, or generalized normal distribution. By default, the Gaussian tails are estimated, and this parameter is controlled with the tail_model argument (“gaussian”, “student_t”, “gnorm”).

+

Last, we can model the observed count data with a number of different distributions, and set this with the family argument. Currently supported distributions include the Gaussian (default, “gaussian”), Poisson (“poisson”), and Negative Binomial (“negbin”), and all include a log-link.

+
+
+

Fitting the model +

+

Next, we’ll use the fit function to do maximum likelihood estimation in TMB. Additional arguments can be found in the help file, but the most important one is the list of data created above with create_data.

+
+set.seed(1)
+fitted = fit(datalist)
+

We don’t get any warnings out of the estimation, but let’s look at the sdreport in more detail. fitted is of the phenomix class and contains the following

+
+names(fitted)
+#> [1] "obj"         "pars"        "sdreport"    "init_values" "data_list"
+

The init_values are the initial parameter values where optimization was started from, and the data_list represents the raw data. The pars component contains information relative to convergence and iterations, including the convergence code (0 = successful convergence),

+
+fitted$pars$convergence
+#> [1] 0
+

This looks like things are converging. But sometimes relative convergence (code = 4) won’t throw warnings. We can also look at the variance estimates, which also are estimated (a good sign things are converged!). These are all included in the sdreport – the parameters and their standard errors are accessible with

+
+sdrep_df = data.frame("par"=names(fitted$sdreport$value),
+  "value"=fitted$sdreport$value, "sd"=fitted$sdreport$sd)
+head(sdrep_df)
+#>     par    value       sd
+#> 1 theta 307.7776 32.32788
+#> 2 theta 176.9818 29.38491
+#> 3 theta 162.7978 28.89745
+#> 4 theta 133.6147 25.56292
+#> 5 theta 173.8464 28.45921
+#> 6 theta 150.5007 29.56181
+

If for some reason, these need to be re-generated, the obj object can be fed into

+
+TMB::sdreport(fitted$obj)
+
+
+

Plotting results +

+

Using our fitted object, there are some basic plotting functions included for diagnostics. Let’s plot run timing over time, by year:

+
+g = plot_diagnostics(fitted, type="timing", logspace=TRUE)
+#> Joining, by = "years"
+g
+
+Fitted symmetric model with tails from a  Gaussian distribution

+Fitted symmetric model with tails from a Gaussian distribution +

+
+

The object returned by plot_diagnostics is just a ggplot object, so additional arguments or themes can be added with + statements. The plot can be shown in normal space by setting logspace=FALSE. These run timing plots are the default, but additonal scatterplots of observed vs predicted values can be shown by setting type=scatter.

+
+
+

Additional examples +

+

First, we can try to fit the same model, but using an asymmetric t-distribution.

+
+set.seed(2)
+
+datalist = create_data(fishdist, 
+  min_number=0, 
+  variable = "number", 
+  time="year", 
+  date = "doy",
+  asymmetric_model = TRUE, 
+  tail_model = "student_t")
+fitted_t = fit(datalist)
+
+plot_diagnostics(fitted_t)
+#> Joining, by = "years"
+
+Fitted asymmetric model with heavy tails from a t-distribution

+Fitted asymmetric model with heavy tails from a t-distribution +

+
+

We can also compare the two models using AIC. Note we’re not comparing random effects structures (where we’d need REML), and that this is an approximation (because only fixed effects parameters are included). This comparison shows that maybe not surprisingly, the Student-t model is more parsimoinious than the Gaussian tailed model (aic_2 < aic_1).

+
+aic_1 = extractAIC(fitted)$AIC
+aic_1
+#> [1] 13318.64
+
+aic_2 = extractAIC(fitted_t)$AIC
+aic_2
+#> [1] 2525.229
+

Second, we can we can try to fit the same model, but using a generalized normal distribution. This distribution has a plateau or ‘flat top’. For examples, see the gnorm package on CRAN here or Wikipedia page.

+
+set.seed(5)
+
+datalist = create_data(fishdist, 
+  min_number=0, 
+  variable = "number", 
+  time="year", 
+  date = "doy",
+  asymmetric_model = FALSE, 
+  tail_model = "gnorm")
+fitted = fit(datalist, limits = TRUE)
+
+
+

Diagnosing lack of convergence +

+

In addition to the warning message about the Hessian not being positive definite, the NaNs in the sd column are a sure sign that things aren’t converging.

+

Fixes to improve convergence may be to start from a different set of starting values (try passing inits into the fit function), or placing stricter bounds on convergence statistics. Or it may be that the model isn’t a good fit to the data. Specifically, the convergence criterion can be modified with the control argument – which is passed into stats::nlminb. One parameter in this list that can be changed is rel.tol, e.g.

+
+fit = fitted(..., control=list(rel.tol = 1.0e-12, 
+  eval.max=4000, iter.max=4000))
+
+
+

Adding covariates +

+

Finally, it’s possible to include covariates instead of inter-annual trends - either in the mean or standard deviation parameters. These arguments are mu_covariate and sigma_covariate, which are string variables declaring the name of the variable in the data frame passed to create_data. These variables may be the same or different variables, and for asymmetric models the trend variables are required to be the same on the left and right hand sides of the distribution.

+
+
+
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.2.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/examples_files/accessible-code-block-0.0.1/empty-anchor.js b/docs/articles/examples_files/accessible-code-block-0.0.1/empty-anchor.js new file mode 100644 index 0000000..ca349fd --- /dev/null +++ b/docs/articles/examples_files/accessible-code-block-0.0.1/empty-anchor.js @@ -0,0 +1,15 @@ +// Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> +// v0.0.1 +// Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. + +document.addEventListener('DOMContentLoaded', function() { + const codeList = document.getElementsByClassName("sourceCode"); + for (var i = 0; i < codeList.length; i++) { + var linkList = codeList[i].getElementsByTagName('a'); + for (var j = 0; j < linkList.length; j++) { + if (linkList[j].innerHTML === "") { + linkList[j].setAttribute('aria-hidden', 'true'); + } + } + } +}); diff --git a/docs/articles/examples_files/figure-html/unnamed-chunk-1-1.png b/docs/articles/examples_files/figure-html/unnamed-chunk-1-1.png new file mode 100644 index 0000000..319912f Binary files /dev/null and b/docs/articles/examples_files/figure-html/unnamed-chunk-1-1.png differ diff --git a/docs/articles/examples_files/figure-html/unnamed-chunk-10-1.png b/docs/articles/examples_files/figure-html/unnamed-chunk-10-1.png new file mode 100644 index 0000000..6789bd4 Binary files /dev/null and b/docs/articles/examples_files/figure-html/unnamed-chunk-10-1.png differ diff --git a/docs/articles/examples_files/figure-html/unnamed-chunk-11-1.png b/docs/articles/examples_files/figure-html/unnamed-chunk-11-1.png new file mode 100644 index 0000000..9c581db Binary files /dev/null and b/docs/articles/examples_files/figure-html/unnamed-chunk-11-1.png differ diff --git a/docs/articles/examples_files/figure-html/unnamed-chunk-12-1.png b/docs/articles/examples_files/figure-html/unnamed-chunk-12-1.png new file mode 100644 index 0000000..541be79 Binary files /dev/null and b/docs/articles/examples_files/figure-html/unnamed-chunk-12-1.png differ diff --git a/docs/articles/examples_files/figure-html/unnamed-chunk-13-1.png b/docs/articles/examples_files/figure-html/unnamed-chunk-13-1.png new file mode 100644 index 0000000..b9ca9ed Binary files /dev/null and b/docs/articles/examples_files/figure-html/unnamed-chunk-13-1.png differ diff --git a/docs/articles/examples_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/examples_files/figure-html/unnamed-chunk-6-1.png new file mode 100644 index 0000000..580f359 Binary files /dev/null and b/docs/articles/examples_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/examples_files/figure-html/unnamed-chunk-8-1.png b/docs/articles/examples_files/figure-html/unnamed-chunk-8-1.png new file mode 100644 index 0000000..e36a204 Binary files /dev/null and b/docs/articles/examples_files/figure-html/unnamed-chunk-8-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html new file mode 100644 index 0000000..2b84b08 --- /dev/null +++ b/docs/articles/index.html @@ -0,0 +1,98 @@ + +Articles • phenomix + + +
+
+ + + +
+ + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/authors.html b/docs/authors.html new file mode 100644 index 0000000..cb879a5 --- /dev/null +++ b/docs/authors.html @@ -0,0 +1,121 @@ + +Authors and Citation • phenomix + + +
+
+ + + +
+
+
+ + + +
  • +

    Eric J. Ward. Author, maintainer. +

    +
  • +
  • +

    Samantha M. Wilson. Contributor. +

    +
  • +
  • +

    Joseph H. Anderson. Contributor. +

    +
  • +
+
+
+

Citation

+ Source: DESCRIPTION +
+
+ + +

Ward EJ (2023). +phenomix: Fit Density Curves to Peak Timing Data that Varies over Time. +https://ericward-noaa.github.io/phenomix, https://github.com/ericward-noaa/phenomix. +

+
@Manual{,
+  title = {phenomix: Fit Density Curves to Peak Timing Data that Varies over Time},
+  author = {Eric J. Ward},
+  year = {2023},
+  note = {https://ericward-noaa.github.io/phenomix, https://github.com/ericward-noaa/phenomix},
+}
+ +
+ +
+ + + +
+ +
+

Site built with pkgdown 2.0.7.

+
+ +
+ + + + + + + + diff --git a/docs/bootstrap-toc.css b/docs/bootstrap-toc.css new file mode 100644 index 0000000..5a85941 --- /dev/null +++ b/docs/bootstrap-toc.css @@ -0,0 +1,60 @@ +/*! + * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) + * Copyright 2015 Aidan Feldman + * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ + +/* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ + +/* All levels of nav */ +nav[data-toggle='toc'] .nav > li > a { + display: block; + padding: 4px 20px; + font-size: 13px; + font-weight: 500; + color: #767676; +} +nav[data-toggle='toc'] .nav > li > a:hover, +nav[data-toggle='toc'] .nav > li > a:focus { + padding-left: 19px; + color: #563d7c; + text-decoration: none; + background-color: transparent; + border-left: 1px solid #563d7c; +} +nav[data-toggle='toc'] .nav > .active > a, +nav[data-toggle='toc'] .nav > .active:hover > a, +nav[data-toggle='toc'] .nav > .active:focus > a { + padding-left: 18px; + font-weight: bold; + color: #563d7c; + background-color: transparent; + border-left: 2px solid #563d7c; +} + +/* Nav: second level (shown on .active) */ +nav[data-toggle='toc'] .nav .nav { + display: none; /* Hide by default, but at >768px, show it */ + padding-bottom: 10px; +} +nav[data-toggle='toc'] .nav .nav > li > a { + padding-top: 1px; + padding-bottom: 1px; + padding-left: 30px; + font-size: 12px; + font-weight: normal; +} +nav[data-toggle='toc'] .nav .nav > li > a:hover, +nav[data-toggle='toc'] .nav .nav > li > a:focus { + padding-left: 29px; +} +nav[data-toggle='toc'] .nav .nav > .active > a, +nav[data-toggle='toc'] .nav .nav > .active:hover > a, +nav[data-toggle='toc'] .nav .nav > .active:focus > a { + padding-left: 28px; + font-weight: 500; +} + +/* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ +nav[data-toggle='toc'] .nav > .active > ul { + display: block; +} diff --git a/docs/bootstrap-toc.js b/docs/bootstrap-toc.js new file mode 100644 index 0000000..1cdd573 --- /dev/null +++ b/docs/bootstrap-toc.js @@ -0,0 +1,159 @@ +/*! + * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) + * Copyright 2015 Aidan Feldman + * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ +(function() { + 'use strict'; + + window.Toc = { + helpers: { + // return all matching elements in the set, or their descendants + findOrFilter: function($el, selector) { + // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ + // http://stackoverflow.com/a/12731439/358804 + var $descendants = $el.find(selector); + return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); + }, + + generateUniqueIdBase: function(el) { + var text = $(el).text(); + var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); + return anchor || el.tagName.toLowerCase(); + }, + + generateUniqueId: function(el) { + var anchorBase = this.generateUniqueIdBase(el); + for (var i = 0; ; i++) { + var anchor = anchorBase; + if (i > 0) { + // add suffix + anchor += '-' + i; + } + // check if ID already exists + if (!document.getElementById(anchor)) { + return anchor; + } + } + }, + + generateAnchor: function(el) { + if (el.id) { + return el.id; + } else { + var anchor = this.generateUniqueId(el); + el.id = anchor; + return anchor; + } + }, + + createNavList: function() { + return $(''); + }, + + createChildNavList: function($parent) { + var $childList = this.createNavList(); + $parent.append($childList); + return $childList; + }, + + generateNavEl: function(anchor, text) { + var $a = $(''); + $a.attr('href', '#' + anchor); + $a.text(text); + var $li = $('
  • '); + $li.append($a); + return $li; + }, + + generateNavItem: function(headingEl) { + var anchor = this.generateAnchor(headingEl); + var $heading = $(headingEl); + var text = $heading.data('toc-text') || $heading.text(); + return this.generateNavEl(anchor, text); + }, + + // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). + getTopLevel: function($scope) { + for (var i = 1; i <= 6; i++) { + var $headings = this.findOrFilter($scope, 'h' + i); + if ($headings.length > 1) { + return i; + } + } + + return 1; + }, + + // returns the elements for the top level, and the next below it + getHeadings: function($scope, topLevel) { + var topSelector = 'h' + topLevel; + + var secondaryLevel = topLevel + 1; + var secondarySelector = 'h' + secondaryLevel; + + return this.findOrFilter($scope, topSelector + ',' + secondarySelector); + }, + + getNavLevel: function(el) { + return parseInt(el.tagName.charAt(1), 10); + }, + + populateNav: function($topContext, topLevel, $headings) { + var $context = $topContext; + var $prevNav; + + var helpers = this; + $headings.each(function(i, el) { + var $newNav = helpers.generateNavItem(el); + var navLevel = helpers.getNavLevel(el); + + // determine the proper $context + if (navLevel === topLevel) { + // use top level + $context = $topContext; + } else if ($prevNav && $context === $topContext) { + // create a new level of the tree and switch to it + $context = helpers.createChildNavList($prevNav); + } // else use the current $context + + $context.append($newNav); + + $prevNav = $newNav; + }); + }, + + parseOps: function(arg) { + var opts; + if (arg.jquery) { + opts = { + $nav: arg + }; + } else { + opts = arg; + } + opts.$scope = opts.$scope || $(document.body); + return opts; + } + }, + + // accepts a jQuery object, or an options object + init: function(opts) { + opts = this.helpers.parseOps(opts); + + // ensure that the data attribute is in place for styling + opts.$nav.attr('data-toggle', 'toc'); + + var $topContext = this.helpers.createChildNavList(opts.$nav); + var topLevel = this.helpers.getTopLevel(opts.$scope); + var $headings = this.helpers.getHeadings(opts.$scope, topLevel); + this.helpers.populateNav($topContext, topLevel, $headings); + } + }; + + $(function() { + $('nav[data-toggle="toc"]').each(function(i, el) { + var $nav = $(el); + Toc.init($nav); + }); + }); +})(); diff --git a/docs/docsearch.css b/docs/docsearch.css new file mode 100644 index 0000000..e5f1fe1 --- /dev/null +++ b/docs/docsearch.css @@ -0,0 +1,148 @@ +/* Docsearch -------------------------------------------------------------- */ +/* + Source: https://github.com/algolia/docsearch/ + License: MIT +*/ + +.algolia-autocomplete { + display: block; + -webkit-box-flex: 1; + -ms-flex: 1; + flex: 1 +} + +.algolia-autocomplete .ds-dropdown-menu { + width: 100%; + min-width: none; + max-width: none; + padding: .75rem 0; + background-color: #fff; + background-clip: padding-box; + border: 1px solid rgba(0, 0, 0, .1); + box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); +} + +@media (min-width:768px) { + .algolia-autocomplete .ds-dropdown-menu { + width: 175% + } +} + +.algolia-autocomplete .ds-dropdown-menu::before { + display: none +} + +.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { + padding: 0; + background-color: rgb(255,255,255); + border: 0; + max-height: 80vh; +} + +.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { + margin-top: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion { + padding: 0; + overflow: visible +} + +.algolia-autocomplete .algolia-docsearch-suggestion--category-header { + padding: .125rem 1rem; + margin-top: 0; + font-size: 1.3em; + font-weight: 500; + color: #00008B; + border-bottom: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { + float: none; + padding-top: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { + float: none; + width: auto; + padding: 0; + text-align: left +} + +.algolia-autocomplete .algolia-docsearch-suggestion--content { + float: none; + width: auto; + padding: 0 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--content::before { + display: none +} + +.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { + padding-top: .75rem; + margin-top: .75rem; + border-top: 1px solid rgba(0, 0, 0, .1) +} + +.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { + display: block; + padding: .1rem 1rem; + margin-bottom: 0.1; + font-size: 1.0em; + font-weight: 400 + /* display: none */ +} + +.algolia-autocomplete .algolia-docsearch-suggestion--title { + display: block; + padding: .25rem 1rem; + margin-bottom: 0; + font-size: 0.9em; + font-weight: 400 +} + +.algolia-autocomplete .algolia-docsearch-suggestion--text { + padding: 0 1rem .5rem; + margin-top: -.25rem; + font-size: 0.8em; + font-weight: 400; + line-height: 1.25 +} + +.algolia-autocomplete .algolia-docsearch-footer { + width: 110px; + height: 20px; + z-index: 3; + margin-top: 10.66667px; + float: right; + font-size: 0; + line-height: 0; +} + +.algolia-autocomplete .algolia-docsearch-footer--logo { + background-image: url("data:image/svg+xml;utf8,"); + background-repeat: no-repeat; + background-position: 50%; + background-size: 100%; + overflow: hidden; + text-indent: -9000px; + width: 100%; + height: 100%; + display: block; + transform: translate(-8px); +} + +.algolia-autocomplete .algolia-docsearch-suggestion--highlight { + color: #FF8C00; + background: rgba(232, 189, 54, 0.1) +} + + +.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { + box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) +} + +.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { + background-color: rgba(192, 192, 192, .15) +} diff --git a/docs/docsearch.js b/docs/docsearch.js new file mode 100644 index 0000000..b35504c --- /dev/null +++ b/docs/docsearch.js @@ -0,0 +1,85 @@ +$(function() { + + // register a handler to move the focus to the search bar + // upon pressing shift + "/" (i.e. "?") + $(document).on('keydown', function(e) { + if (e.shiftKey && e.keyCode == 191) { + e.preventDefault(); + $("#search-input").focus(); + } + }); + + $(document).ready(function() { + // do keyword highlighting + /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ + var mark = function() { + + var referrer = document.URL ; + var paramKey = "q" ; + + if (referrer.indexOf("?") !== -1) { + var qs = referrer.substr(referrer.indexOf('?') + 1); + var qs_noanchor = qs.split('#')[0]; + var qsa = qs_noanchor.split('&'); + var keyword = ""; + + for (var i = 0; i < qsa.length; i++) { + var currentParam = qsa[i].split('='); + + if (currentParam.length !== 2) { + continue; + } + + if (currentParam[0] == paramKey) { + keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); + } + } + + if (keyword !== "") { + $(".contents").unmark({ + done: function() { + $(".contents").mark(keyword); + } + }); + } + } + }; + + mark(); + }); +}); + +/* Search term highlighting ------------------------------*/ + +function matchedWords(hit) { + var words = []; + + var hierarchy = hit._highlightResult.hierarchy; + // loop to fetch from lvl0, lvl1, etc. + for (var idx in hierarchy) { + words = words.concat(hierarchy[idx].matchedWords); + } + + var content = hit._highlightResult.content; + if (content) { + words = words.concat(content.matchedWords); + } + + // return unique words + var words_uniq = [...new Set(words)]; + return words_uniq; +} + +function updateHitURL(hit) { + + var words = matchedWords(hit); + var url = ""; + + if (hit.anchor) { + url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; + } else { + url = hit.url + '?q=' + escape(words.join(" ")); + } + + return url; +} diff --git a/docs/extra.css b/docs/extra.css new file mode 100644 index 0000000..ed01162 --- /dev/null +++ b/docs/extra.css @@ -0,0 +1 @@ +@import url("https://nmfs-general-modeling-tools.github.io/nmfspalette/extra.css"); diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..94c673f --- /dev/null +++ b/docs/index.html @@ -0,0 +1,198 @@ + + + + + + + +Fit Density Curves to Peak Timing Data that Varies over Time • phenomix + + + + + + + + + + + + + +
    +
    + + + + +
    +
    +
    + +

    R package for fitting distributions to run timing data via maximum likelihood

    +

    R build status

    +

    DOI pkgdown site: https://ericward-noaa.github.io/phenomix/

    +
    +

    Installation +

    +

    You can install phenomix with:

    +
    +remotes::install_github("ericward-noaa/phenomix",build_vignettes = TRUE)
    +

    Load libraries

    + +
    +
    +

    Functions +

    +

    The package pheomix provides a suite of curve fitting to describe data that may be generated from a process when distributions in time might be concentrated (from fisheries, this occurs with counts over time of salmon returning from the ocean to spawn or juvenile fish emigrating from streams to the ocean).

    +
    +Predicted (black line) and observed counts (red dots) for hypothetical dataset. Multiple observations may exist for some days, or no observations on others.
    Predicted (black line) and observed counts (red dots) for hypothetical dataset. Multiple observations may exist for some days, or no observations on others.
    +
    +

    In a given year, the curve might be described by a symmetric or asymmetric Gaussian or Student-t distribution (shown here in log-scale on the y-axis). Questions of interest might be - are the means (x-axis) shifting through time? - are the variances shifting through time? - does the model support a symmetric or asymmetric distribution?

    +

    +
    +
    +

    Examples +

    +

    The main functions are create_data() and fit(). See ?create_data and ?fit for additional details and examples. A vignette includes additional detail, and examples of several models as well as function arguments available https://ericward-noaa.github.io/phenomix/.

    +
    +
    +

    References +

    +

    For description of fisheries applications of asymmetric models:

    +

    Methot, R.D. 2000. Technical description of the stock synthesis assessment program. U.S. Dept. Commer., NOAA Tech. Memo. NMFS-NWFSC-43, 46 p. link

    +

    For statistical background of asymmetric models:

    +

    Rubio, F.J. and Steel, M.F.J. 2020. The family of two-piece distributions. Significance, 17(1) 120–13. link

    +

    Wallis, K.F. 2014. The two-piece normal, binormal, or double Gaussian distribution: Its origin and rediscoveries. Statistical Science, 29(1), 106–112. link

    +
    +
    +

    NOAA Disclaimer +

    +

    This repository is a scientific product and is not official communication of the National Oceanic and Atmospheric Administration, or the United States Department of Commerce. All NOAA GitHub project code is provided on an ‘as is’ basis and the user assumes responsibility for its use. Any claims against the Department of Commerce or Department of Commerce bureaus stemming from the use of this GitHub project will be governed by all applicable Federal law. Any reference to specific commercial products, processes, or services by service mark, trademark, manufacturer, or otherwise, does not constitute or imply their endorsement, recommendation or favoring by the Department of Commerce. The Department of Commerce seal and logo, or the seal and logo of a DOC bureau, shall not be used in any manner to imply endorsement of any commercial product or activity by DOC or the United States Government.

    +

    NOAA Fisheries

    +

    U.S. Department of Commerce | National Oceanographic and Atmospheric Administration | NOAA Fisheries

    +
    +
    +
    + + +
    + + +
    + +
    +

    +

    Site built with pkgdown 2.0.7.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/link.svg b/docs/link.svg new file mode 100644 index 0000000..88ad827 --- /dev/null +++ b/docs/link.svg @@ -0,0 +1,12 @@ + + + + + + diff --git a/docs/pkgdown.css b/docs/pkgdown.css new file mode 100644 index 0000000..80ea5b8 --- /dev/null +++ b/docs/pkgdown.css @@ -0,0 +1,384 @@ +/* Sticky footer */ + +/** + * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ + * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css + * + * .Site -> body > .container + * .Site-content -> body > .container .row + * .footer -> footer + * + * Key idea seems to be to ensure that .container and __all its parents__ + * have height set to 100% + * + */ + +html, body { + height: 100%; +} + +body { + position: relative; +} + +body > .container { + display: flex; + height: 100%; + flex-direction: column; +} + +body > .container .row { + flex: 1 0 auto; +} + +footer { + margin-top: 45px; + padding: 35px 0 36px; + border-top: 1px solid #e5e5e5; + color: #666; + display: flex; + flex-shrink: 0; +} +footer p { + margin-bottom: 0; +} +footer div { + flex: 1; +} +footer .pkgdown { + text-align: right; +} +footer p { + margin-bottom: 0; +} + +img.icon { + float: right; +} + +/* Ensure in-page images don't run outside their container */ +.contents img { + max-width: 100%; + height: auto; +} + +/* Fix bug in bootstrap (only seen in firefox) */ +summary { + display: list-item; +} + +/* Typographic tweaking ---------------------------------*/ + +.contents .page-header { + margin-top: calc(-60px + 1em); +} + +dd { + margin-left: 3em; +} + +/* Section anchors ---------------------------------*/ + +a.anchor { + display: none; + margin-left: 5px; + width: 20px; + height: 20px; + + background-image: url(./link.svg); + background-repeat: no-repeat; + background-size: 20px 20px; + background-position: center center; +} + +h1:hover .anchor, +h2:hover .anchor, +h3:hover .anchor, +h4:hover .anchor, +h5:hover .anchor, +h6:hover .anchor { + display: inline-block; +} + +/* Fixes for fixed navbar --------------------------*/ + +.contents h1, .contents h2, .contents h3, .contents h4 { + padding-top: 60px; + margin-top: -40px; +} + +/* Navbar submenu --------------------------*/ + +.dropdown-submenu { + position: relative; +} + +.dropdown-submenu>.dropdown-menu { + top: 0; + left: 100%; + margin-top: -6px; + margin-left: -1px; + border-radius: 0 6px 6px 6px; +} + +.dropdown-submenu:hover>.dropdown-menu { + display: block; +} + +.dropdown-submenu>a:after { + display: block; + content: " "; + float: right; + width: 0; + height: 0; + border-color: transparent; + border-style: solid; + border-width: 5px 0 5px 5px; + border-left-color: #cccccc; + margin-top: 5px; + margin-right: -10px; +} + +.dropdown-submenu:hover>a:after { + border-left-color: #ffffff; +} + +.dropdown-submenu.pull-left { + float: none; +} + +.dropdown-submenu.pull-left>.dropdown-menu { + left: -100%; + margin-left: 10px; + border-radius: 6px 0 6px 6px; +} + +/* Sidebar --------------------------*/ + +#pkgdown-sidebar { + margin-top: 30px; + position: -webkit-sticky; + position: sticky; + top: 70px; +} + +#pkgdown-sidebar h2 { + font-size: 1.5em; + margin-top: 1em; +} + +#pkgdown-sidebar h2:first-child { + margin-top: 0; +} + +#pkgdown-sidebar .list-unstyled li { + margin-bottom: 0.5em; +} + +/* bootstrap-toc tweaks ------------------------------------------------------*/ + +/* All levels of nav */ + +nav[data-toggle='toc'] .nav > li > a { + padding: 4px 20px 4px 6px; + font-size: 1.5rem; + font-weight: 400; + color: inherit; +} + +nav[data-toggle='toc'] .nav > li > a:hover, +nav[data-toggle='toc'] .nav > li > a:focus { + padding-left: 5px; + color: inherit; + border-left: 1px solid #878787; +} + +nav[data-toggle='toc'] .nav > .active > a, +nav[data-toggle='toc'] .nav > .active:hover > a, +nav[data-toggle='toc'] .nav > .active:focus > a { + padding-left: 5px; + font-size: 1.5rem; + font-weight: 400; + color: inherit; + border-left: 2px solid #878787; +} + +/* Nav: second level (shown on .active) */ + +nav[data-toggle='toc'] .nav .nav { + display: none; /* Hide by default, but at >768px, show it */ + padding-bottom: 10px; +} + +nav[data-toggle='toc'] .nav .nav > li > a { + padding-left: 16px; + font-size: 1.35rem; +} + +nav[data-toggle='toc'] .nav .nav > li > a:hover, +nav[data-toggle='toc'] .nav .nav > li > a:focus { + padding-left: 15px; +} + +nav[data-toggle='toc'] .nav .nav > .active > a, +nav[data-toggle='toc'] .nav .nav > .active:hover > a, +nav[data-toggle='toc'] .nav .nav > .active:focus > a { + padding-left: 15px; + font-weight: 500; + font-size: 1.35rem; +} + +/* orcid ------------------------------------------------------------------- */ + +.orcid { + font-size: 16px; + color: #A6CE39; + /* margins are required by official ORCID trademark and display guidelines */ + margin-left:4px; + margin-right:4px; + vertical-align: middle; +} + +/* Reference index & topics ----------------------------------------------- */ + +.ref-index th {font-weight: normal;} + +.ref-index td {vertical-align: top; min-width: 100px} +.ref-index .icon {width: 40px;} +.ref-index .alias {width: 40%;} +.ref-index-icons .alias {width: calc(40% - 40px);} +.ref-index .title {width: 60%;} + +.ref-arguments th {text-align: right; padding-right: 10px;} +.ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px} +.ref-arguments .name {width: 20%;} +.ref-arguments .desc {width: 80%;} + +/* Nice scrolling for wide elements --------------------------------------- */ + +table { + display: block; + overflow: auto; +} + +/* Syntax highlighting ---------------------------------------------------- */ + +pre, code, pre code { + background-color: #f8f8f8; + color: #333; +} +pre, pre code { + white-space: pre-wrap; + word-break: break-all; + overflow-wrap: break-word; +} + +pre { + border: 1px solid #eee; +} + +pre .img, pre .r-plt { + margin: 5px 0; +} + +pre .img img, pre .r-plt img { + background-color: #fff; +} + +code a, pre a { + color: #375f84; +} + +a.sourceLine:hover { + text-decoration: none; +} + +.fl {color: #1514b5;} +.fu {color: #000000;} /* function */ +.ch,.st {color: #036a07;} /* string */ +.kw {color: #264D66;} /* keyword */ +.co {color: #888888;} /* comment */ + +.error {font-weight: bolder;} +.warning {font-weight: bolder;} + +/* Clipboard --------------------------*/ + +.hasCopyButton { + position: relative; +} + +.btn-copy-ex { + position: absolute; + right: 0; + top: 0; + visibility: hidden; +} + +.hasCopyButton:hover button.btn-copy-ex { + visibility: visible; +} + +/* headroom.js ------------------------ */ + +.headroom { + will-change: transform; + transition: transform 200ms linear; +} +.headroom--pinned { + transform: translateY(0%); +} +.headroom--unpinned { + transform: translateY(-100%); +} + +/* mark.js ----------------------------*/ + +mark { + background-color: rgba(255, 255, 51, 0.5); + border-bottom: 2px solid rgba(255, 153, 51, 0.3); + padding: 1px; +} + +/* vertical spacing after htmlwidgets */ +.html-widget { + margin-bottom: 10px; +} + +/* fontawesome ------------------------ */ + +.fab { + font-family: "Font Awesome 5 Brands" !important; +} + +/* don't display links in code chunks when printing */ +/* source: https://stackoverflow.com/a/10781533 */ +@media print { + code a:link:after, code a:visited:after { + content: ""; + } +} + +/* Section anchors --------------------------------- + Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 +*/ + +div.csl-bib-body { } +div.csl-entry { + clear: both; +} +.hanging-indent div.csl-entry { + margin-left:2em; + text-indent:-2em; +} +div.csl-left-margin { + min-width:2em; + float:left; +} +div.csl-right-inline { + margin-left:2em; + padding-left:1em; +} +div.csl-indent { + margin-left: 2em; +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js new file mode 100644 index 0000000..6f0eee4 --- /dev/null +++ b/docs/pkgdown.js @@ -0,0 +1,108 @@ +/* http://gregfranko.com/blog/jquery-best-practices/ */ +(function($) { + $(function() { + + $('.navbar-fixed-top').headroom(); + + $('body').css('padding-top', $('.navbar').height() + 10); + $(window).resize(function(){ + $('body').css('padding-top', $('.navbar').height() + 10); + }); + + $('[data-toggle="tooltip"]').tooltip(); + + var cur_path = paths(location.pathname); + var links = $("#navbar ul li a"); + var max_length = -1; + var pos = -1; + for (var i = 0; i < links.length; i++) { + if (links[i].getAttribute("href") === "#") + continue; + // Ignore external links + if (links[i].host !== location.host) + continue; + + var nav_path = paths(links[i].pathname); + + var length = prefix_length(nav_path, cur_path); + if (length > max_length) { + max_length = length; + pos = i; + } + } + + // Add class to parent
  • , and enclosing
  • if in dropdown + if (pos >= 0) { + var menu_anchor = $(links[pos]); + menu_anchor.parent().addClass("active"); + menu_anchor.closest("li.dropdown").addClass("active"); + } + }); + + function paths(pathname) { + var pieces = pathname.split("/"); + pieces.shift(); // always starts with / + + var end = pieces[pieces.length - 1]; + if (end === "index.html" || end === "") + pieces.pop(); + return(pieces); + } + + // Returns -1 if not found + function prefix_length(needle, haystack) { + if (needle.length > haystack.length) + return(-1); + + // Special case for length-0 haystack, since for loop won't run + if (haystack.length === 0) { + return(needle.length === 0 ? 0 : -1); + } + + for (var i = 0; i < haystack.length; i++) { + if (needle[i] != haystack[i]) + return(i); + } + + return(haystack.length); + } + + /* Clipboard --------------------------*/ + + function changeTooltipMessage(element, msg) { + var tooltipOriginalTitle=element.getAttribute('data-original-title'); + element.setAttribute('data-original-title', msg); + $(element).tooltip('show'); + element.setAttribute('data-original-title', tooltipOriginalTitle); + } + + if(ClipboardJS.isSupported()) { + $(document).ready(function() { + var copyButton = ""; + + $("div.sourceCode").addClass("hasCopyButton"); + + // Insert copy buttons: + $(copyButton).prependTo(".hasCopyButton"); + + // Initialize tooltips: + $('.btn-copy-ex').tooltip({container: 'body'}); + + // Initialize clipboard: + var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { + text: function(trigger) { + return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); + } + }); + + clipboardBtnCopies.on('success', function(e) { + changeTooltipMessage(e.trigger, 'Copied!'); + e.clearSelection(); + }); + + clipboardBtnCopies.on('error', function() { + changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); + }); + }); + } +})(window.jQuery || window.$) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml new file mode 100644 index 0000000..8ad33c2 --- /dev/null +++ b/docs/pkgdown.yml @@ -0,0 +1,10 @@ +pandoc: 3.1.1 +pkgdown: 2.0.7 +pkgdown_sha: ~ +articles: + a1_examples: a1_examples.html + a2_covariates: a2_covariates.html + a3_troubleshooting: a3_troubleshooting.html + a4_derived: a4_derived.html +last_built: 2023-07-10T17:03Z + diff --git a/docs/reference/Rplot001.png b/docs/reference/Rplot001.png new file mode 100644 index 0000000..17a3580 Binary files /dev/null and b/docs/reference/Rplot001.png differ diff --git a/docs/reference/chum.html b/docs/reference/chum.html new file mode 100644 index 0000000..7f93e21 --- /dev/null +++ b/docs/reference/chum.html @@ -0,0 +1,117 @@ + +Count data collected by Washington Department of Fish and Wildlife on +chum salmon from the Skagit River (Washington state). Each row of the +dataframe contains an observation ("number") on a given date ("date"). +The year ("year") and calendar day ("doy") are also included. — chum • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    Count data collected by Washington Department of Fish and Wildlife on +chum salmon from the Skagit River (Washington state). Each row of the +dataframe contains an observation ("number") on a given date ("date"). +The year ("year") and calendar day ("doy") are also included.

    +
    + +
    +
    chum
    +
    + +
    +

    Format

    +

    A data frame.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/create_data.html b/docs/reference/create_data.html new file mode 100644 index 0000000..27f41fe --- /dev/null +++ b/docs/reference/create_data.html @@ -0,0 +1,203 @@ + +Create data file for fitting time varying run timing distributions with TMB — create_data • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    Does minimal processing of data to use as argument to fitting function

    +
    + +
    +
    create_data(
    +  data,
    +  min_number = 0,
    +  variable = "number",
    +  time = "year",
    +  date = "doy",
    +  asymmetric_model = TRUE,
    +  mu = ~1,
    +  sigma = ~1,
    +  covar_data = NULL,
    +  est_sigma_re = TRUE,
    +  est_mu_re = TRUE,
    +  tail_model = "student_t",
    +  family = "lognormal",
    +  max_theta = 10,
    +  share_shape = TRUE,
    +  nu_prior = c(2, 10),
    +  beta_prior = c(2, 1)
    +)
    +
    + +
    +

    Arguments

    +
    data
    +

    A data frame

    + + +
    min_number
    +

    A minimum threshold to use, defaults to 0

    + + +
    variable
    +

    A character string of the name of the variable in 'data' that contains the response (e.g. counts)

    + + +
    time
    +

    A character string of the name of the variable in 'data' that contains the time variable (e.g. year)

    + + +
    date
    +

    A character string of the name of the variable in 'data' that contains the response (e.g. day of year). The actual +#' column should contain a numeric response -- for example, the result from using lubridate::yday(x)

    + + +
    asymmetric_model
    +

    Boolean, whether or not to let model be asymmetric (e.g. run timing before peak has a +different shape than run timing after peak)

    + + +
    mu
    +

    An optional formula allowing the mean to be a function of covariates. Random effects are not included in the formula +but specified with the est_mu_re argument

    + + +
    sigma
    +

    An optional formula allowing the standard deviation to be a function of covariates. For asymmetric models, +each side of the distribution is allowed a different set of covariates. Random effects are not included in the formula +but specified with the est_sigma_re argument

    + + +
    covar_data
    +

    a data frame containing covariates specific to each time step. These are used in the formulas mu and sigma

    + + +
    est_sigma_re
    +

    Whether to estimate random effects by year in sigma parameter controlling tail of distribution. Defaults to TRUE

    + + +
    est_mu_re
    +

    Whether to estimate random effects by year in mu parameter controlling location of distribution. Defaults to TRUE

    + + +
    tail_model
    +

    Whether to fit Gaussian ("gaussian"), Student-t ("student_t") or generalized normal ("gnorm"). Defaults to Student-t

    + + +
    family
    +

    Response for observation model, options are "gaussian", "poisson", "negbin", "binomial", "lognormal". The default ("lognormal") is +not a true lognormal distribution, but a normal-log in that it assumes log(y) ~ Normal()

    + + +
    max_theta
    +

    Maximum value of log(pred) when limits=TRUE. Defaults to 10

    + + +
    share_shape
    +

    Boolean argument for whether asymmetric student-t and generalized normal distributions should share the shape parameter (nu for the student-t; +beta for the generalized normal). Defaults to TRUE

    + + +
    nu_prior
    +

    Two element vector (optional) for penalized prior on student t df, defaults to a Gamma(shape=2, scale=10) distribution

    + + +
    beta_prior
    +

    Two element vector (optional) for penalized prior on generalized normal beta, defaults to a Normal(2, 1) distribution

    + +
    + +
    +

    Examples

    +
    data(fishdist)
    +datalist <- create_data(fishdist,
    +  min_number = 0, variable = "number", time = "year",
    +  date = "doy", asymmetric_model = TRUE, family = "gaussian"
    +)
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/extract_all.html b/docs/reference/extract_all.html new file mode 100644 index 0000000..1d52774 --- /dev/null +++ b/docs/reference/extract_all.html @@ -0,0 +1,107 @@ + +Output processing function to be called by user — extract_all • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    This function extracts the means, sigmas, thetas, +lower (25%) and upper (75%) quartiles, and respective sds

    +
    + +
    +
    extract_all(fit)
    +
    + +
    +

    Arguments

    +
    fit
    +

    A fitted object returned from fit()

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/extract_annual.html b/docs/reference/extract_annual.html new file mode 100644 index 0000000..daaa1f6 --- /dev/null +++ b/docs/reference/extract_annual.html @@ -0,0 +1,109 @@ + +Output processing function to be called by user — extract_annual • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    This function extracts the annual totals

    +
    + +
    +
    extract_annual(fit, log = TRUE)
    +
    + +
    +

    Arguments

    +
    fit
    +

    A fitted object returned from fit()

    + + +
    log
    +

    Whether to return estimates in log space, defaults to TRUE

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/extract_lower.html b/docs/reference/extract_lower.html new file mode 100644 index 0000000..baa4008 --- /dev/null +++ b/docs/reference/extract_lower.html @@ -0,0 +1,105 @@ + +Output processing function to be called by user — extract_lower • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    This function extracts the lower quartiles (25%) and respective sds

    +
    + +
    +
    extract_lower(fit)
    +
    + +
    +

    Arguments

    +
    fit
    +

    A fitted object returned from fit()

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/extract_means.html b/docs/reference/extract_means.html new file mode 100644 index 0000000..ea57313 --- /dev/null +++ b/docs/reference/extract_means.html @@ -0,0 +1,105 @@ + +Output processing function to be called by user — extract_means • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    This function extracts the parameter means and respective sds

    +
    + +
    +
    extract_means(fit)
    +
    + +
    +

    Arguments

    +
    fit
    +

    A fitted object returned from fit()

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/extract_sigma.html b/docs/reference/extract_sigma.html new file mode 100644 index 0000000..c0a5fe3 --- /dev/null +++ b/docs/reference/extract_sigma.html @@ -0,0 +1,105 @@ + +Output processing function to be called by user — extract_sigma • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    This function extracts the parameter sigma and respective sds

    +
    + +
    +
    extract_sigma(fit)
    +
    + +
    +

    Arguments

    +
    fit
    +

    A fitted object returned from fit()

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/extract_theta.html b/docs/reference/extract_theta.html new file mode 100644 index 0000000..f937258 --- /dev/null +++ b/docs/reference/extract_theta.html @@ -0,0 +1,105 @@ + +Output processing function to be called by user — extract_theta • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    This function extracts the parameter theta and respective sds

    +
    + +
    +
    extract_theta(fit)
    +
    + +
    +

    Arguments

    +
    fit
    +

    A fitted object returned from fit()

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/extract_upper.html b/docs/reference/extract_upper.html new file mode 100644 index 0000000..26352dc --- /dev/null +++ b/docs/reference/extract_upper.html @@ -0,0 +1,105 @@ + +Output processing function to be called by user — extract_upper • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    This function extracts the upper quartiles (25%) and respective sds

    +
    + +
    +
    extract_upper(fit)
    +
    + +
    +

    Arguments

    +
    fit
    +

    A fitted object returned from fit()

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/figures/unnamed-chunk-5-1.png b/docs/reference/figures/unnamed-chunk-5-1.png new file mode 100644 index 0000000..9c3ff92 Binary files /dev/null and b/docs/reference/figures/unnamed-chunk-5-1.png differ diff --git a/docs/reference/figures/unnamed-chunk-6-1.png b/docs/reference/figures/unnamed-chunk-6-1.png new file mode 100644 index 0000000..3e2d1b6 Binary files /dev/null and b/docs/reference/figures/unnamed-chunk-6-1.png differ diff --git a/docs/reference/fishdist.html b/docs/reference/fishdist.html new file mode 100644 index 0000000..9937aa0 --- /dev/null +++ b/docs/reference/fishdist.html @@ -0,0 +1,103 @@ + +Example simulate data for fish distributions from multiple years — fishdist • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    Example simulate data for fish distributions from multiple years

    +
    + +
    +
    fishdist
    +
    + +
    +

    Format

    +

    A data frame containing simulated data.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fit.html b/docs/reference/fit.html new file mode 100644 index 0000000..afcc276 --- /dev/null +++ b/docs/reference/fit.html @@ -0,0 +1,286 @@ + +Fitting function to be called by user — fit • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    This function creates a list of parameters, sets up TMB object and attempts to +do fitting / estimation

    +
    + +
    +
    fit(
    +  data_list,
    +  silent = FALSE,
    +  inits = NULL,
    +  control = list(eval.max = 2000, iter.max = 1000, rel.tol = 1e-10),
    +  limits = NULL,
    +  fit_model = TRUE
    +)
    +
    + +
    +

    Arguments

    +
    data_list
    +

    A list of data, as output from create_data

    + + +
    silent
    +

    Boolean passed to TMB::MakeADFun, whether to be verbose or not (defaults to FALSE)

    + + +
    inits
    +

    Optional named list of parameters for starting values, defaults to NULL

    + + +
    control
    +

    Optional control list for stats::nlminb. For arguments see ?nlminb. Defaults to eval.max=2000, iter.max=1000, rel.tol=1e-10. For final model runs, the rel.tol should be even smaller

    + + +
    limits
    +

    Whether to include limits for stats::nlminb. Can be a list of (lower, upper), or TRUE to use suggested hardcoded limits. Defaults to NULL, +where no limits used.

    + + +
    fit_model
    +

    Whether to fit the model. If not, returns a list including the data, parameters, and initial values. Defaults to TRUE

    + +
    + +
    +

    Examples

    +
    data(fishdist)
    +
    +# example of fitting fixed effects, no trends, no random effects
    +set.seed(1)
    +datalist <- create_data(fishdist[which(fishdist$year > 1970), ],
    +  asymmetric_model = FALSE,
    +  est_mu_re = FALSE, est_sigma_re = FALSE
    +)
    +fit <- fit(datalist)
    +#> Order of parameters:
    +#>  [1] "theta"             "b_mu"              "log_sigma_mu_devs"
    +#>  [4] "mu_devs"           "b_sig1"            "b_sig2"           
    +#>  [7] "log_sigma1_sd"     "sigma1_devs"       "log_sigma2_sd"    
    +#> [10] "sigma2_devs"       "log_obs_sigma"     "log_tdf_1"        
    +#> [13] "log_tdf_2"         "log_beta_1"        "log_beta_2"       
    +#> Not matching template order:
    +#>  [1] "log_sigma1_sd"     "sigma1_devs"       "log_sigma2_sd"    
    +#>  [4] "sigma2_devs"       "theta"             "mu_devs"          
    +#>  [7] "log_sigma_mu_devs" "log_tdf_1"         "log_tdf_2"        
    +#> [10] "log_beta_1"        "log_beta_2"        "log_obs_sigma"    
    +#> [13] "b_mu"              "b_sig1"            "b_sig2"           
    +#> Your parameter list has been re-ordered.
    +#> (Disable this warning with checkParameterOrder=FALSE)
    +#> Constructing atomic D_lgamma
    +#> Constructing atomic qnorm1
    +#> outer mgc:  25034.62 
    +#> outer mgc:  2574.348 
    +#> outer mgc:  1978.553 
    +#> outer mgc:  200.0837 
    +#> outer mgc:  201.8165 
    +#> outer mgc:  165.3092 
    +#> outer mgc:  135.522 
    +#> outer mgc:  356.5337 
    +#> outer mgc:  120.1827 
    +#> outer mgc:  138.867 
    +#> outer mgc:  123.0076 
    +#> outer mgc:  72.46919 
    +#> outer mgc:  34.31654 
    +#> outer mgc:  42.47041 
    +#> outer mgc:  77.59186 
    +#> outer mgc:  15.58226 
    +#> outer mgc:  55.94099 
    +#> outer mgc:  18.16199 
    +#> outer mgc:  5.722491 
    +#> outer mgc:  30.59853 
    +#> outer mgc:  12.27546 
    +#> outer mgc:  23.85256 
    +#> outer mgc:  4.909127 
    +#> outer mgc:  16.05365 
    +#> outer mgc:  2.657193 
    +#> outer mgc:  5.044657 
    +#> outer mgc:  1.330298 
    +#> outer mgc:  5.259609 
    +#> outer mgc:  2.40497 
    +#> outer mgc:  1.914672 
    +#> outer mgc:  0.3580338 
    +#> outer mgc:  1.486636 
    +#> outer mgc:  0.4165588 
    +#> outer mgc:  0.243783 
    +#> outer mgc:  0.3935108 
    +#> outer mgc:  0.2062873 
    +#> outer mgc:  0.3560087 
    +#> outer mgc:  0.2509 
    +#> outer mgc:  0.2320703 
    +#> outer mgc:  0.2115966 
    +#> outer mgc:  0.1605053 
    +#> outer mgc:  0.5950958 
    +#> outer mgc:  0.1653233 
    +#> outer mgc:  0.2129569 
    +#> outer mgc:  0.4200267 
    +#> outer mgc:  0.153131 
    +#> outer mgc:  0.4769271 
    +#> outer mgc:  0.4690585 
    +#> outer mgc:  0.2463581 
    +#> outer mgc:  2.354513 
    +#> outer mgc:  0.5144498 
    +#> outer mgc:  0.3407818 
    +#> outer mgc:  0.7156203 
    +#> outer mgc:  0.2290494 
    +#> outer mgc:  0.3342214 
    +#> outer mgc:  0.1680087 
    +#> outer mgc:  0.05198574 
    +#> outer mgc:  0.07477491 
    +#> outer mgc:  0.01717437 
    +#> outer mgc:  0.02687706 
    +#> outer mgc:  0.007900119 
    +#> outer mgc:  0.008503209 
    +#> outer mgc:  0.002959148 
    +#> outer mgc:  0.002543861 
    +#> outer mgc:  0.0009483259 
    +#> outer mgc:  0.0007261544 
    +#> outer mgc:  0.000288091 
    +#> outer mgc:  0.0002083385 
    +#> outer mgc:  8.78474e-05 
    +#> outer mgc:  6.004225e-05 
    +#> outer mgc:  2.612294e-05 
    +#> outer mgc:  1.697649e-05 
    +#> outer mgc:  7.594411e-06 
    +#> outer mgc:  4.810276e-06 
    +#> outer mgc:  4.810276e-06 
    +#> outer mgc:  0.05355298 
    +#> outer mgc:  0.05355582 
    +#> outer mgc:  0.05127458 
    +#> outer mgc:  0.05127639 
    +#> outer mgc:  0.05241413 
    +#> outer mgc:  0.05241576 
    +#> outer mgc:  0.05583219 
    +#> outer mgc:  0.05583443 
    +#> outer mgc:  0.05241437 
    +#> outer mgc:  0.05241552 
    +#> outer mgc:  0.05697153 
    +#> outer mgc:  0.056974 
    +#> outer mgc:  0.05583182 
    +#> outer mgc:  0.0558348 
    +#> outer mgc:  0.06266844 
    +#> outer mgc:  0.06267164 
    +#> outer mgc:  0.05469277 
    +#> outer mgc:  0.05469494 
    +#> outer mgc:  0.05697135 
    +#> outer mgc:  0.05697418 
    +#> outer mgc:  4.814768e-06 
    +#> outer mgc:  4.80579e-06 
    +#> outer mgc:  0.9690304 
    +#> outer mgc:  0.9709709 
    +#> outer mgc:  0.01573754 
    +#> outer mgc:  0.01574038 
    +#> outer mgc:  0.04861318 
    +#> outer mgc:  0.0486681 
    +#> outer mgc:  6321.927 
    +#
    +# # example of model with random effects in means only, and symmetric distribution
    +# set.seed(1)
    +# datalist <- create_data(fishdist[which(fishdist$year > 1970),], asymmetric_model = FALSE,
    +#                      est_sigma_re = FALSE)
    +# fit <- fit(datalist)
    +# # example of model with random effects in variances
    +# set.seed(1)
    +# datalist <- create_data(fishdist[which(fishdist$year > 1970),], asymmetric_model = TRUE,
    +#                          est_mu_re = TRUE)
    +# fit <- fit(datalist)
    +#
    +# # example of model with poisson response
    +# set.seed(1)
    +# datalist <- create_data(fishdist[which(fishdist$year > 1970),], asymmetric_model = FALSE,
    +#                         est_sigma_trend=FALSE, est_mu_trend=FALSE, est_mu_re = TRUE,
    +#                         family="poisson")
    +# fit <- fit(datalist)
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fitted.phenomix.html b/docs/reference/fitted.phenomix.html new file mode 100644 index 0000000..a4884e4 --- /dev/null +++ b/docs/reference/fitted.phenomix.html @@ -0,0 +1,104 @@ + +Get fitted values from model object, copying glmmTMB 'fast' implementation — fitted.phenomix • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    Get fitted values from model object, copying glmmTMB 'fast' implementation

    +
    + +
    +
    # S3 method for phenomix
    +fitted(object, ...)
    +
    + +
    +

    Arguments

    +
    ...
    +

    Extra parameters

    +
    fit
    +

    The fitted phenomix model

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.2.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fixef.phenomix.html b/docs/reference/fixef.phenomix.html new file mode 100644 index 0000000..17447c4 --- /dev/null +++ b/docs/reference/fixef.phenomix.html @@ -0,0 +1,110 @@ + +Get fixed effects parameters from model object, copying glmmTMB 'fast' implementation — fixef.phenomix • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    Get fixed effects parameters from model object, copying glmmTMB 'fast' implementation

    +
    + +
    +
    # S3 method for phenomix
    +fixef(object, ...)
    +
    + +
    +

    Arguments

    +
    object
    +

    The fitted phenomix model

    + + +
    ...
    +

    Additional arguments

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/get_sdreport.html b/docs/reference/get_sdreport.html new file mode 100644 index 0000000..ded9e70 --- /dev/null +++ b/docs/reference/get_sdreport.html @@ -0,0 +1,105 @@ + +Get sdreport for predictions / coefficients, copying glmmTMB 'fast' implementation — get_sdreport • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    Get sdreport for predictions / coefficients, copying glmmTMB 'fast' implementation

    +
    + +
    +
    get_sdreport(object)
    +
    + +
    +

    Arguments

    +
    object
    +

    The fitted phenomix model

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html new file mode 100644 index 0000000..f8e1104 --- /dev/null +++ b/docs/reference/index.html @@ -0,0 +1,157 @@ + +Function reference • phenomix + + +
    +
    + + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +

    Processing phenology data

    +

    Functions for manipulating data, prior to fitting

    +
    +

    create_data()

    +

    Create data file for fitting time varying run timing distributions with TMB

    +

    limits()

    +

    Internal function to assign upper and lower bounds to parameters, based on their names

    +

    Fitting phenological models

    +

    Function for fitting the model with TMB

    +
    +

    fit()

    +

    Fitting function to be called by user

    +

    Plotting output

    +

    Function for plotting estimates and diagnostics

    +
    +

    plot_diagnostics()

    +

    Plotting function to be called by user

    +

    Extracting output

    +

    Functions for extracting estimates

    +
    +

    extract_means()

    +

    Output processing function to be called by user

    +

    extract_sigma()

    +

    Output processing function to be called by user

    +

    extract_lower()

    +

    Output processing function to be called by user

    +

    extract_upper()

    +

    Output processing function to be called by user

    +

    extract_theta()

    +

    Output processing function to be called by user

    +

    extract_all()

    +

    Output processing function to be called by user

    +

    extract_annual()

    +

    Output processing function to be called by user

    +

    fixef(<phenomix>)

    +

    Get fixed effects parameters from model object, copying glmmTMB 'fast' implementation

    +

    ranef(<phenomix>)

    +

    Get random effects parameters from model object, copying glmmTMB 'fast' implementation

    + + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/limits.html b/docs/reference/limits.html new file mode 100644 index 0000000..a45bb17 --- /dev/null +++ b/docs/reference/limits.html @@ -0,0 +1,109 @@ + +Internal function to assign upper and lower bounds to parameters, based on their names — limits • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    Arguments can be adjusted as needed

    +
    + +
    +
    limits(parnames, max_theta)
    +
    + +
    +

    Arguments

    +
    parnames
    +

    A vector of character strings or names used for estimation

    + + +
    max_theta
    +

    A scalar (optional) giving the maximum value of theta, log(pred)

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/pars.html b/docs/reference/pars.html new file mode 100644 index 0000000..bd14c8e --- /dev/null +++ b/docs/reference/pars.html @@ -0,0 +1,240 @@ + +Get parameters from model object, copying glmmTMB 'fast' implementation — pars • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    Get parameters from model object, copying glmmTMB 'fast' implementation

    +
    + +
    +
    pars(object)
    +
    + +
    +

    Arguments

    +
    object
    +

    The fitted phenomix model

    + +
    + +
    +

    Examples

    +
    data(fishdist)
    +
    +# example of fitting fixed effects, no trends, no random effects
    +set.seed(1)
    +datalist <- create_data(fishdist[which(fishdist$year > 1970), ],
    +  asymmetric_model = FALSE,
    +  est_mu_re = FALSE, est_sigma_re = FALSE
    +)
    +fit <- fit(datalist)
    +#> Order of parameters:
    +#>  [1] "theta"             "b_mu"              "log_sigma_mu_devs"
    +#>  [4] "mu_devs"           "b_sig1"            "b_sig2"           
    +#>  [7] "log_sigma1_sd"     "sigma1_devs"       "log_sigma2_sd"    
    +#> [10] "sigma2_devs"       "log_obs_sigma"     "log_tdf_1"        
    +#> [13] "log_tdf_2"         "log_beta_1"        "log_beta_2"       
    +#> Not matching template order:
    +#>  [1] "log_sigma1_sd"     "sigma1_devs"       "log_sigma2_sd"    
    +#>  [4] "sigma2_devs"       "theta"             "mu_devs"          
    +#>  [7] "log_sigma_mu_devs" "log_tdf_1"         "log_tdf_2"        
    +#> [10] "log_beta_1"        "log_beta_2"        "log_obs_sigma"    
    +#> [13] "b_mu"              "b_sig1"            "b_sig2"           
    +#> Your parameter list has been re-ordered.
    +#> (Disable this warning with checkParameterOrder=FALSE)
    +#> outer mgc:  25034.62 
    +#> outer mgc:  2574.348 
    +#> outer mgc:  1978.553 
    +#> outer mgc:  200.0837 
    +#> outer mgc:  201.8165 
    +#> outer mgc:  165.3092 
    +#> outer mgc:  135.522 
    +#> outer mgc:  356.5337 
    +#> outer mgc:  120.1827 
    +#> outer mgc:  138.867 
    +#> outer mgc:  123.0076 
    +#> outer mgc:  72.46919 
    +#> outer mgc:  34.31654 
    +#> outer mgc:  42.47041 
    +#> outer mgc:  77.59186 
    +#> outer mgc:  15.58226 
    +#> outer mgc:  55.94099 
    +#> outer mgc:  18.16199 
    +#> outer mgc:  5.722491 
    +#> outer mgc:  30.59853 
    +#> outer mgc:  12.27546 
    +#> outer mgc:  23.85256 
    +#> outer mgc:  4.909127 
    +#> outer mgc:  16.05365 
    +#> outer mgc:  2.657193 
    +#> outer mgc:  5.044657 
    +#> outer mgc:  1.330298 
    +#> outer mgc:  5.259609 
    +#> outer mgc:  2.40497 
    +#> outer mgc:  1.914672 
    +#> outer mgc:  0.3580338 
    +#> outer mgc:  1.486636 
    +#> outer mgc:  0.4165588 
    +#> outer mgc:  0.243783 
    +#> outer mgc:  0.3935108 
    +#> outer mgc:  0.2062873 
    +#> outer mgc:  0.3560087 
    +#> outer mgc:  0.2509 
    +#> outer mgc:  0.2320703 
    +#> outer mgc:  0.2115966 
    +#> outer mgc:  0.1605053 
    +#> outer mgc:  0.5950958 
    +#> outer mgc:  0.1653233 
    +#> outer mgc:  0.2129569 
    +#> outer mgc:  0.4200267 
    +#> outer mgc:  0.153131 
    +#> outer mgc:  0.4769271 
    +#> outer mgc:  0.4690585 
    +#> outer mgc:  0.2463581 
    +#> outer mgc:  2.354513 
    +#> outer mgc:  0.5144498 
    +#> outer mgc:  0.3407818 
    +#> outer mgc:  0.7156203 
    +#> outer mgc:  0.2290494 
    +#> outer mgc:  0.3342214 
    +#> outer mgc:  0.1680087 
    +#> outer mgc:  0.05198574 
    +#> outer mgc:  0.07477491 
    +#> outer mgc:  0.01717437 
    +#> outer mgc:  0.02687706 
    +#> outer mgc:  0.007900119 
    +#> outer mgc:  0.008503209 
    +#> outer mgc:  0.002959148 
    +#> outer mgc:  0.002543861 
    +#> outer mgc:  0.0009483259 
    +#> outer mgc:  0.0007261544 
    +#> outer mgc:  0.000288091 
    +#> outer mgc:  0.0002083385 
    +#> outer mgc:  8.78474e-05 
    +#> outer mgc:  6.004225e-05 
    +#> outer mgc:  2.612294e-05 
    +#> outer mgc:  1.697649e-05 
    +#> outer mgc:  7.594411e-06 
    +#> outer mgc:  4.810276e-06 
    +#> outer mgc:  4.810276e-06 
    +#> outer mgc:  0.05355298 
    +#> outer mgc:  0.05355582 
    +#> outer mgc:  0.05127458 
    +#> outer mgc:  0.05127639 
    +#> outer mgc:  0.05241413 
    +#> outer mgc:  0.05241576 
    +#> outer mgc:  0.05583219 
    +#> outer mgc:  0.05583443 
    +#> outer mgc:  0.05241437 
    +#> outer mgc:  0.05241552 
    +#> outer mgc:  0.05697153 
    +#> outer mgc:  0.056974 
    +#> outer mgc:  0.05583182 
    +#> outer mgc:  0.0558348 
    +#> outer mgc:  0.06266844 
    +#> outer mgc:  0.06267164 
    +#> outer mgc:  0.05469277 
    +#> outer mgc:  0.05469494 
    +#> outer mgc:  0.05697135 
    +#> outer mgc:  0.05697418 
    +#> outer mgc:  4.814768e-06 
    +#> outer mgc:  4.80579e-06 
    +#> outer mgc:  0.9690304 
    +#> outer mgc:  0.9709709 
    +#> outer mgc:  0.01573754 
    +#> outer mgc:  0.01574038 
    +#> outer mgc:  0.04861318 
    +#> outer mgc:  0.0486681 
    +#> outer mgc:  6321.927 
    +p <- pars(fit)
    +names(p)
    +#> [1] "value"          "sd"             "cov"            "par.fixed"     
    +#> [5] "cov.fixed"      "pdHess"         "gradient.fixed"
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/phenomix-package.html b/docs/reference/phenomix-package.html new file mode 100644 index 0000000..3e0ecaa --- /dev/null +++ b/docs/reference/phenomix-package.html @@ -0,0 +1,96 @@ + +The 'phenomix' package. — phenomix-package • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    A DESCRIPTION OF THE PACKAGE

    +
    + + + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/plot_diagnostics.html b/docs/reference/plot_diagnostics.html new file mode 100644 index 0000000..2fb9dde --- /dev/null +++ b/docs/reference/plot_diagnostics.html @@ -0,0 +1,113 @@ + +Plotting function to be called by user — plot_diagnostics • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    These functions make some basic plots for the user

    +
    + +
    +
    plot_diagnostics(fitted, type = "timing", logspace = TRUE)
    +
    + +
    +

    Arguments

    +
    fitted
    +

    A fitted model object

    + + +
    type
    +

    A plot type for ggplot, either "timing" or "scatter"

    + + +
    logspace
    +

    whether to plot the space in log space, defaults to TRUE

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/ranef.phenomix.html b/docs/reference/ranef.phenomix.html new file mode 100644 index 0000000..7acd978 --- /dev/null +++ b/docs/reference/ranef.phenomix.html @@ -0,0 +1,110 @@ + +Get random effects parameters from model object, copying glmmTMB 'fast' implementation — ranef.phenomix • phenomix + + +
    +
    + + + +
    +
    + + +
    +

    Get random effects parameters from model object, copying glmmTMB 'fast' implementation

    +
    + +
    +
    # S3 method for phenomix
    +ranef(object, ...)
    +
    + +
    +

    Arguments

    +
    object
    +

    The fitted phenomix model

    + + +
    ...
    +

    Additional arguments

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/search.json b/docs/search.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/docs/search.json @@ -0,0 +1 @@ +[] diff --git a/docs/sitemap.xml b/docs/sitemap.xml new file mode 100644 index 0000000..14ec543 --- /dev/null +++ b/docs/sitemap.xml @@ -0,0 +1,90 @@ + + + + /404.html + + + /articles/a1_examples.html + + + /articles/a2_covariates.html + + + /articles/a3_troubleshooting.html + + + /articles/a4_derived.html + + + /articles/examples.html + + + /articles/index.html + + + /authors.html + + + /index.html + + + /reference/chum.html + + + /reference/create_data.html + + + /reference/extract_all.html + + + /reference/extract_annual.html + + + /reference/extract_lower.html + + + /reference/extract_means.html + + + /reference/extract_sigma.html + + + /reference/extract_theta.html + + + /reference/extract_upper.html + + + /reference/fishdist.html + + + /reference/fit.html + + + /reference/fitted.phenomix.html + + + /reference/fixef.phenomix.html + + + /reference/get_sdreport.html + + + /reference/index.html + + + /reference/limits.html + + + /reference/pars.html + + + /reference/phenomix-package.html + + + /reference/plot_diagnostics.html + + + /reference/ranef.phenomix.html + + diff --git a/man/broken_stick.Rd b/man/broken_stick.Rd deleted file mode 100644 index 606aa12..0000000 --- a/man/broken_stick.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/broken_stick.R -\name{broken_stick} -\alias{broken_stick} -\title{Random generation of datasets using the dirichlet broken stick method} -\usage{ -broken_stick( - n_obs = 1000, - n_groups = 10, - ess_fraction = 1, - tot_n = 100, - p = NULL -) -} -\arguments{ -\item{n_obs}{Number of observations (rows of data matrix to simulate). Defaults to 10} - -\item{n_groups}{Number of categories for each observation (columns of data matrix). Defaults to 10} - -\item{ess_fraction}{The effective sample size fraction, defaults to 1} - -\item{tot_n}{The total sample size to simulate for each observation. This is approximate and the actual -simulated sample size will be slightly smaller. Defaults to 100} - -\item{p}{The stock proportions to simulate from, as a vector. Optional, and when not included, -random draws from the dirichlet are used} -} -\value{ -A 2-element list, whose 1st element \code{X_obs} is the simulated dataset, and whose -2nd element is the underlying vector of proportions \code{p} used to generate the data -} -\description{ -Random generation of datasets using the dirichlet broken stick method -} -\examples{ -\donttest{ -y <- broken_stick(n_obs = 3, n_groups = 5, tot_n = 100) - -# add custom proportions -y <- broken_stick( - n_obs = 3, n_groups = 5, tot_n = 100, - p = c(0.1, 0.2, 0.3, 0.2, 0.2) -) -} -} diff --git a/man/chinook.Rd b/man/chinook.Rd deleted file mode 100644 index 6aa9dd8..0000000 --- a/man/chinook.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{chinook} -\alias{chinook} -\title{Data from Satterthwaite, W.H., Ciancio, J., Crandall, E., Palmer-Zwahlen, -M.L., Grover, A.M., O’Farrell, M.R., Anson, E.C., Mohr, M.S. & Garza, -J.C. (2015). Stock composition and ocean spatial distribution from -California recreational chinook salmon fisheries using genetic stock -identification. Fisheries Research, 170, 166–178. The data -genetic data collected from port-based sampling of recreationally-landed -Chinook salmon in California from 1998-2002.} -\format{ -A data frame. -} -\usage{ -chinook -} -\description{ -Data from Satterthwaite, W.H., Ciancio, J., Crandall, E., Palmer-Zwahlen, -M.L., Grover, A.M., O’Farrell, M.R., Anson, E.C., Mohr, M.S. & Garza, -J.C. (2015). Stock composition and ocean spatial distribution from -California recreational chinook salmon fisheries using genetic stock -identification. Fisheries Research, 170, 166–178. The data -genetic data collected from port-based sampling of recreationally-landed -Chinook salmon in California from 1998-2002. -} -\keyword{datasets} diff --git a/man/chum.Rd b/man/chum.Rd new file mode 100644 index 0000000..d861df5 --- /dev/null +++ b/man/chum.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{chum} +\alias{chum} +\title{Count data collected by Washington Department of Fish and Wildlife on +chum salmon from the Skagit River (Washington state). Each row of the +dataframe contains an observation ("number") on a given date ("date"). +The year ("year") and calendar day ("doy") are also included.} +\format{ +A data frame. +} +\usage{ +chum +} +\description{ +Count data collected by Washington Department of Fish and Wildlife on +chum salmon from the Skagit River (Washington state). Each row of the +dataframe contains an observation ("number") on a given date ("date"). +The year ("year") and calendar day ("doy") are also included. +} +\keyword{internal} diff --git a/man/coddiet.Rd b/man/coddiet.Rd deleted file mode 100644 index 9bb7d18..0000000 --- a/man/coddiet.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{coddiet} -\alias{coddiet} -\title{Data from Magnussen, E. 2011. Food and feeding habits of cod (Gadus morhua) -on the Faroe Bank. – ICES Journal of Marine Science, 68: 1909–1917. The data -here are Table 3 from the paper, with sample proportions (columns w) multiplied -by total weight to yield total grams (g) for each sample-diet item combination. Dashes -have been replaced with 0s.} -\format{ -A data frame. -} -\usage{ -coddiet -} -\description{ -Data from Magnussen, E. 2011. Food and feeding habits of cod (Gadus morhua) -on the Faroe Bank. – ICES Journal of Marine Science, 68: 1909–1917. The data -here are Table 3 from the paper, with sample proportions (columns w) multiplied -by total weight to yield total grams (g) for each sample-diet item combination. Dashes -have been replaced with 0s. -} -\keyword{datasets} diff --git a/man/create_data.Rd b/man/create_data.Rd new file mode 100644 index 0000000..9a76d27 --- /dev/null +++ b/man/create_data.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/create_data.R +\name{create_data} +\alias{create_data} +\title{Create data file for fitting time varying run timing distributions with TMB} +\usage{ +create_data( + data, + min_number = 0, + variable = "number", + time = "year", + date = "doy", + asymmetric_model = TRUE, + mu = ~1, + sigma = ~1, + covar_data = NULL, + est_sigma_re = TRUE, + est_mu_re = TRUE, + tail_model = "student_t", + family = "lognormal", + max_theta = 10, + share_shape = TRUE, + nu_prior = c(2, 10), + beta_prior = c(2, 1) +) +} +\arguments{ +\item{data}{A data frame} + +\item{min_number}{A minimum threshold to use, defaults to 0} + +\item{variable}{A character string of the name of the variable in 'data' that contains the response (e.g. counts)} + +\item{time}{A character string of the name of the variable in 'data' that contains the time variable (e.g. year)} + +\item{date}{A character string of the name of the variable in 'data' that contains the response (e.g. day of year). The actual +#' column should contain a numeric response -- for example, the result from using lubridate::yday(x)} + +\item{asymmetric_model}{Boolean, whether or not to let model be asymmetric (e.g. run timing before peak has a +different shape than run timing after peak)} + +\item{mu}{An optional formula allowing the mean to be a function of covariates. Random effects are not included in the formula +but specified with the \code{est_mu_re} argument} + +\item{sigma}{An optional formula allowing the standard deviation to be a function of covariates. For asymmetric models, +each side of the distribution is allowed a different set of covariates. Random effects are not included in the formula +but specified with the \code{est_sigma_re} argument} + +\item{covar_data}{a data frame containing covariates specific to each time step. These are used in the formulas \code{mu} and \code{sigma}} + +\item{est_sigma_re}{Whether to estimate random effects by year in sigma parameter controlling tail of distribution. Defaults to TRUE} + +\item{est_mu_re}{Whether to estimate random effects by year in mu parameter controlling location of distribution. Defaults to TRUE} + +\item{tail_model}{Whether to fit Gaussian ("gaussian"), Student-t ("student_t") or generalized normal ("gnorm"). Defaults to Student-t} + +\item{family}{Response for observation model, options are "gaussian", "poisson", "negbin", "binomial", "lognormal". The default ("lognormal") is +not a true lognormal distribution, but a normal-log in that it assumes log(y) ~ Normal()} + +\item{max_theta}{Maximum value of log(pred) when \code{limits=TRUE}. Defaults to 10} + +\item{share_shape}{Boolean argument for whether asymmetric student-t and generalized normal distributions should share the shape parameter (nu for the student-t; +beta for the generalized normal). Defaults to TRUE} + +\item{nu_prior}{Two element vector (optional) for penalized prior on student t df, defaults to a Gamma(shape=2, scale=10) distribution} + +\item{beta_prior}{Two element vector (optional) for penalized prior on generalized normal beta, defaults to a Normal(2, 1) distribution} +} +\description{ +Does minimal processing of data to use as argument to fitting function +} +\examples{ +data(fishdist) +datalist <- create_data(fishdist, + min_number = 0, variable = "number", time = "year", + date = "doy", asymmetric_model = TRUE, family = "gaussian" +) +} diff --git a/man/extract_all.Rd b/man/extract_all.Rd new file mode 100644 index 0000000..1c338ba --- /dev/null +++ b/man/extract_all.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extract.R +\name{extract_all} +\alias{extract_all} +\title{Output processing function to be called by user} +\usage{ +extract_all(fit) +} +\arguments{ +\item{fit}{A fitted object returned from fit()} +} +\description{ +This function extracts the means, sigmas, thetas, +lower (25\%) and upper (75\%) quartiles, and respective sds +} diff --git a/man/extract_lower.Rd b/man/extract_lower.Rd new file mode 100644 index 0000000..d965e0e --- /dev/null +++ b/man/extract_lower.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extract.R +\name{extract_lower} +\alias{extract_lower} +\title{Output processing function to be called by user} +\usage{ +extract_lower(fit) +} +\arguments{ +\item{fit}{A fitted object returned from fit()} +} +\description{ +This function extracts the lower quartiles (25\%) and respective sds +} diff --git a/man/extract_means.Rd b/man/extract_means.Rd new file mode 100644 index 0000000..fea979e --- /dev/null +++ b/man/extract_means.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extract.R +\name{extract_means} +\alias{extract_means} +\title{Output processing function to be called by user} +\usage{ +extract_means(fit) +} +\arguments{ +\item{fit}{A fitted object returned from fit()} +} +\description{ +This function extracts the parameter means and respective sds +} diff --git a/man/extract_sigma.Rd b/man/extract_sigma.Rd new file mode 100644 index 0000000..3e7879c --- /dev/null +++ b/man/extract_sigma.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extract.R +\name{extract_sigma} +\alias{extract_sigma} +\title{Output processing function to be called by user} +\usage{ +extract_sigma(fit) +} +\arguments{ +\item{fit}{A fitted object returned from fit()} +} +\description{ +This function extracts the parameter sigma and respective sds +} diff --git a/man/extract_theta.Rd b/man/extract_theta.Rd new file mode 100644 index 0000000..36a8901 --- /dev/null +++ b/man/extract_theta.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extract.R +\name{extract_theta} +\alias{extract_theta} +\title{Output processing function to be called by user} +\usage{ +extract_theta(fit) +} +\arguments{ +\item{fit}{A fitted object returned from fit()} +} +\description{ +This function extracts the parameter theta and respective sds +} diff --git a/man/extract_upper.Rd b/man/extract_upper.Rd new file mode 100644 index 0000000..45c7fc8 --- /dev/null +++ b/man/extract_upper.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extract.R +\name{extract_upper} +\alias{extract_upper} +\title{Output processing function to be called by user} +\usage{ +extract_upper(fit) +} +\arguments{ +\item{fit}{A fitted object returned from fit()} +} +\description{ +This function extracts the upper quartiles (25\%) and respective sds +} diff --git a/man/fishdist.Rd b/man/fishdist.Rd new file mode 100644 index 0000000..c8c31a3 --- /dev/null +++ b/man/fishdist.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{fishdist} +\alias{fishdist} +\title{Example simulate data for fish distributions from multiple years} +\format{ +A data frame containing simulated data. +} +\usage{ +fishdist +} +\description{ +Example simulate data for fish distributions from multiple years +} +\keyword{internal} diff --git a/man/fit.Rd b/man/fit.Rd new file mode 100644 index 0000000..9502ed2 --- /dev/null +++ b/man/fit.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit.R +\name{fit} +\alias{fit} +\title{Fitting function to be called by user} +\usage{ +fit( + data_list, + silent = FALSE, + inits = NULL, + control = list(eval.max = 2000, iter.max = 1000, rel.tol = 1e-10), + limits = NULL, + fit_model = TRUE +) +} +\arguments{ +\item{data_list}{A list of data, as output from create_data} + +\item{silent}{Boolean passed to TMB::MakeADFun, whether to be verbose or not (defaults to FALSE)} + +\item{inits}{Optional named list of parameters for starting values, defaults to NULL} + +\item{control}{Optional control list for stats::nlminb. For arguments see ?nlminb. Defaults to eval.max=2000, iter.max=1000, rel.tol=1e-10. For final model runs, the rel.tol should be even smaller} + +\item{limits}{Whether to include limits for stats::nlminb. Can be a list of (lower, upper), or TRUE to use suggested hardcoded limits. Defaults to NULL, +where no limits used.} + +\item{fit_model}{Whether to fit the model. If not, returns a list including the data, parameters, and initial values. Defaults to TRUE} +} +\description{ +This function creates a list of parameters, sets up TMB object and attempts to +do fitting / estimation +} +\examples{ +data(fishdist) + +# example of fitting fixed effects, no trends, no random effects +set.seed(1) +datalist <- create_data(fishdist[which(fishdist$year > 1970), ], + asymmetric_model = FALSE, + est_mu_re = FALSE, est_sigma_re = FALSE +) +fit <- fit(datalist) +# +# # example of model with random effects in means only, and symmetric distribution +# set.seed(1) +# datalist <- create_data(fishdist[which(fishdist$year > 1970),], asymmetric_model = FALSE, +# est_sigma_re = FALSE) +# fit <- fit(datalist) +# # example of model with random effects in variances +# set.seed(1) +# datalist <- create_data(fishdist[which(fishdist$year > 1970),], asymmetric_model = TRUE, +# est_mu_re = TRUE) +# fit <- fit(datalist) +# +# # example of model with poisson response +# set.seed(1) +# datalist <- create_data(fishdist[which(fishdist$year > 1970),], asymmetric_model = FALSE, +# est_sigma_trend=FALSE, est_mu_trend=FALSE, est_mu_re = TRUE, +# family="poisson") +# fit <- fit(datalist) +} diff --git a/man/fit_zoidTMB.Rd b/man/fit_zoidTMB.Rd deleted file mode 100644 index a823d10..0000000 --- a/man/fit_zoidTMB.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fitting_tmb.R -\name{fit_zoidTMB} -\alias{fit_zoidTMB} -\title{Fit a trinomial mixture model with TMB} -\usage{ -fit_zoidTMB( - formula = NULL, - design_matrix, - data_matrix, - overdispersion = FALSE, - overdispersion_sd = 5, - prior_sd = NA -) -} -\arguments{ -\item{formula}{The model formula for the design matrix. Does not need to have a response specified. If =NULL, then -the design matrix is ignored and all rows are treated as replicates} - -\item{design_matrix}{A data frame, dimensioned as number of observations, and covariates in columns} - -\item{data_matrix}{A matrix, with observations on rows and number of groups across columns} - -\item{overdispersion}{Whether or not to include overdispersion parameter, defaults to FALSE} - -\item{overdispersion_sd}{Prior standard deviation on 1/overdispersion parameter, Defaults to inv-Cauchy(0,5)} - -\item{prior_sd}{Optional prior sd / penalty for fixed effects} -} -\description{ -Fit a trinomial mixture model that optionally includes covariates to estimate -effects of factor or continuous variables on proportions. -} -\examples{ -\donttest{ - -# fit a model with 1 factor -#design <- data.frame("fac" = c("spring", "spring", "fall")) -#fit <- fit_zoidTMB(formula = ~fac, design_matrix = design, data_matrix = y) - -} - -} diff --git a/man/fixef.phenomix.Rd b/man/fixef.phenomix.Rd new file mode 100644 index 0000000..11fabe4 --- /dev/null +++ b/man/fixef.phenomix.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods.R +\name{fixef.phenomix} +\alias{fixef.phenomix} +\title{Get fixed effects parameters from model object, copying glmmTMB 'fast' implementation} +\usage{ +\method{fixef}{phenomix}(object, ...) +} +\arguments{ +\item{object}{The fitted phenomix model} + +\item{...}{Additional arguments} +} +\description{ +Get fixed effects parameters from model object, copying glmmTMB 'fast' implementation +} diff --git a/man/get_sdreport.Rd b/man/get_sdreport.Rd new file mode 100644 index 0000000..76af54a --- /dev/null +++ b/man/get_sdreport.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods.R +\name{get_sdreport} +\alias{get_sdreport} +\title{Get sdreport for predictions / coefficients, copying glmmTMB 'fast' implementation} +\usage{ +get_sdreport(object) +} +\arguments{ +\item{object}{The fitted phenomix model} +} +\description{ +Get sdreport for predictions / coefficients, copying glmmTMB 'fast' implementation +} +\keyword{internal} diff --git a/man/limits.Rd b/man/limits.Rd new file mode 100644 index 0000000..23b6640 --- /dev/null +++ b/man/limits.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/limits.R +\name{limits} +\alias{limits} +\title{Internal function to assign upper and lower bounds to parameters, based on their names} +\usage{ +limits(parnames, max_theta) +} +\arguments{ +\item{parnames}{A vector of character strings or names used for estimation} + +\item{max_theta}{A scalar (optional) giving the maximum value of theta, log(pred)} +} +\description{ +Arguments can be adjusted as needed +} diff --git a/man/pars.Rd b/man/pars.Rd new file mode 100644 index 0000000..7a85412 --- /dev/null +++ b/man/pars.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods.R +\name{pars} +\alias{pars} +\title{Get parameters from model object, copying glmmTMB 'fast' implementation} +\usage{ +pars(object) +} +\arguments{ +\item{object}{The fitted phenomix model} +} +\description{ +Get parameters from model object, copying glmmTMB 'fast' implementation +} +\examples{ +data(fishdist) + +# example of fitting fixed effects, no trends, no random effects +set.seed(1) +datalist <- create_data(fishdist[which(fishdist$year > 1970), ], + asymmetric_model = FALSE, + est_mu_re = FALSE, est_sigma_re = FALSE +) +fit <- fit(datalist) +p <- pars(fit) +names(p) +} +\keyword{internal} diff --git a/man/parse_re_formula.Rd b/man/parse_re_formula.Rd deleted file mode 100644 index bf1df00..0000000 --- a/man/parse_re_formula.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fitting_tmb.R -\name{parse_re_formula} -\alias{parse_re_formula} -\title{Fit a trinomial mixture model that optionally includes covariates to estimate -effects of factor or continuous variables on proportions.} -\usage{ -parse_re_formula(formula, data) -} -\arguments{ -\item{formula}{The model formula for the design matrix.} - -\item{data}{The data matrix used to construct RE design matrix} -} -\description{ -Fit a trinomial mixture model that optionally includes covariates to estimate -effects of factor or continuous variables on proportions. -} diff --git a/man/phenomix-package.Rd b/man/phenomix-package.Rd new file mode 100644 index 0000000..de4e4dc --- /dev/null +++ b/man/phenomix-package.Rd @@ -0,0 +1,10 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/phenomix-package.R +\docType{package} +\name{phenomix-package} +\alias{phenomix-package} +\title{The 'phenomix' package.} +\description{ +A DESCRIPTION OF THE PACKAGE +} +\keyword{internal} diff --git a/man/plot_diagnostics.Rd b/man/plot_diagnostics.Rd new file mode 100644 index 0000000..b9c086d --- /dev/null +++ b/man/plot_diagnostics.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_diagnostics.R +\name{plot_diagnostics} +\alias{plot_diagnostics} +\title{Plotting function to be called by user} +\usage{ +plot_diagnostics(fitted, type = "timing", logspace = TRUE) +} +\arguments{ +\item{fitted}{A fitted model object} + +\item{type}{A plot type for ggplot, either "timing" or "scatter"} + +\item{logspace}{whether to plot the space in log space, defaults to TRUE} +} +\description{ +These functions make some basic plots for the user +} diff --git a/man/ranef.phenomix.Rd b/man/ranef.phenomix.Rd new file mode 100644 index 0000000..e9f307e --- /dev/null +++ b/man/ranef.phenomix.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods.R +\name{ranef.phenomix} +\alias{ranef.phenomix} +\title{Get random effects parameters from model object, copying glmmTMB 'fast' implementation} +\usage{ +\method{ranef}{phenomix}(object, ...) +} +\arguments{ +\item{object}{The fitted phenomix model} + +\item{...}{Additional arguments} +} +\description{ +Get random effects parameters from model object, copying glmmTMB 'fast' implementation +} diff --git a/man/zoidtmb-package.Rd b/man/zoidtmb-package.Rd deleted file mode 100644 index f063e3d..0000000 --- a/man/zoidtmb-package.Rd +++ /dev/null @@ -1,10 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/zoidtmb-package.R -\docType{package} -\name{zoidtmb-package} -\alias{zoidtmb-package} -\title{The 'zoidtmb' package.} -\description{ -A DESCRIPTION OF THE PACKAGE -} -\keyword{internal} diff --git a/pkgdown/extra.css b/pkgdown/extra.css new file mode 100644 index 0000000..ed01162 --- /dev/null +++ b/pkgdown/extra.css @@ -0,0 +1 @@ +@import url("https://nmfs-general-modeling-tools.github.io/nmfspalette/extra.css"); diff --git a/src/phenomix.cpp b/src/phenomix.cpp new file mode 100644 index 0000000..6a1e8df --- /dev/null +++ b/src/phenomix.cpp @@ -0,0 +1,490 @@ +#include +// #include + +template +Type qthill(Type quantile, Type v, Type mean, Type sigma) +{ + // implements algorithm from Hill et al. 1970 + // this is source of qt in R, and elsewhere -- e.g. + // https://repo.progsbase.com/repoviewer/no.inductive.libraries/BasicStatistics/0.1.14///HillsAlgorithm396/ + // I've extended this here to incldue mean and sigma + Type z, flip; + if(quantile > 0.5){ + flip = 1; + z = 2*(1 - quantile); + }else{ + flip = -1; + z = 2*quantile; + } + + Type a = 1/(v - 0.5); + Type b = 48/(a*a); + Type c = ((20700*a/b - 98)*a - 16)*a + 96.36; + Type d = ((94.5/(b + c) - 3)/b + 1)*sqrt(a*3.14159265/2)*v; + Type x = z*d; + Type y = pow(x, 2/v);//x**(2/v); + + if(y > 0.05 + a){ + x = qnorm(z*0.5, Type(0), Type(1)); + y = x*x; + if(v < 5){ + c = c + 0.3*(v - 4.5)*(x + 0.6); + } + c = c + (((0.05*d*x - 5)*x - 7)*x - 2)*x + b; + y = (((((0.4*y + 6.3)*y + 36)*y + 94.5)/c - y - 3)/b + 1)*x; + y = a*y*y; + if(y > 0.002){ + y = exp(y) - 1; + }else{ + y = y + 0.5*y*y; + } + }else{ + y = ((1/(((v + 6)/(v*y) - 0.089*d - 0.822)*(v + 2)*3) + 0.5/(v + 4))*y - 1)*(v + 1)/(v + 2) + 1/y; + } + + Type q = sqrt(v*y); + // flip sign if needed + q = q * flip; + + return (mean + sigma*q); +} + +template +Type dgnorm(Type x, Type mu, Type alpha, Type beta) +{ + // implements dgnorm + // copied from https://github.com/maryclare/gnorm/blob/master/R/gnorm.R + Type z = -pow(fabs(x - mu)/alpha,beta) + log(beta) - (log(2.0) + + log(alpha) + lgamma(1.0/beta)); + return(z); +} + +template +Type ddnorm(Type x, Type mu, Type sigma1, Type sigma2) +{ + // implements log density of double normal distribution, similar to Rubio et al. + Type z = log(2.0) - log(sigma1+sigma2); + if(x < mu) { + z += dnorm((x-mu)/sigma1, Type(0.0), Type(1.0), true); + } else { + z += dnorm((x- mu)/sigma2, Type(0.0), Type(1.0), true); + } + return(z); +} + +template +Type ddt(Type x, Type mu, Type sigma1, Type sigma2, Type tdf_1, Type tdf_2) +{ + // implements log density of double Student-t distribution, similar to Rubio et al. + Type z = log(2.0) - log(sigma1+sigma2); + if(x < mu) { + z += dt((x - mu) / sigma1, tdf_1, true);// - log(sigma1); + } else { + z += dt((x - mu) / sigma2, tdf_2, true);// - log(sigma2); + } + return(z); +} + +template +Type ddgnorm(Type x, Type mu, Type alpha1, Type alpha2, Type beta1, Type beta2, Type sigma1, Type sigma2) +{ + // implements log density of double gnormal distribution, similar to Rubio et al. + Type z = log(2.0) - log(sigma1+sigma2); + if(x < mu) { + z += dgnorm(x, mu, alpha1, beta1); + } else { + z += dgnorm(x, mu, alpha2, beta2); + } + return(z); +} + +template +Type qgnorm(Type quantile, Type mu, Type alpha, Type beta) +{ + // implements qgnorm + // copied from https://github.com/maryclare/gnorm/blob/master/R/gnorm.R + Type p = quantile; + if(p > 0.5) { + p = 1 - p; + } + Type sign = 0; + if(p - 0.5 > 0.0) { + sign = 1; + } + if(p - 0.5 < 0.0){ + sign = -1; + } + Type shape =1.0/beta; + Type scale = 1.0/pow(1.0/alpha, beta); + //return(sign(p - 0.5) * qgamma(abs(p - 0.5) * 2, shape = 1/beta, scale = 1/lambda)^(1/beta) + mu) + //return (sign*pow(qgamma(fabs(p - 0.5)*2, shape = shape, scale = scale), 1.0/beta) + mu); + //Type z = sign*exp(log(qgamma(fabs(p - 0.5)*2, shape = shape, scale = scale))/beta) + mu; + Type z = sign*exp(log(qgamma(fabs(p - 0.5)*2, shape, scale))/beta) + mu; + return (z); +} + +template +Type qdnorm(Type p, Type mu, Type sigma1, Type sigma2) +{ + // implements quantile of double normal distribution, similar to Rubio et al. + Type z = 0; + Type r = sigma1/(sigma1 + sigma2); + if(p < r) { + z = mu + sigma1 * qnorm(0.5 * p * (sigma1 + sigma2)/sigma1, Type(0.0), Type(1.0)); + } else { + z = mu + sigma2 * qnorm(0.5 * ((sigma1 + sigma2) * (1 + p) - 2 * sigma1)/sigma2, Type(0.0), Type(1.0)); + } + return(z); +} + +template +Type qdt(Type p, Type mu, Type sigma1, Type sigma2, Type tdf_1, Type tdf_2) +{ + // implements quantile of double normal distribution, similar to Rubio et al. + Type z = 0; + Type r = sigma1/(sigma1 + sigma2); + if(p < r) { + z = mu + sigma1 * qthill(0.5 * p * (sigma1 + sigma2)/sigma1, Type(tdf_1), Type(0.0), Type(1.0)); + } else { + z = mu + sigma2 * qthill(0.5 * ((sigma1 + sigma2) * (1 + p) - 2 * sigma1)/sigma2, Type(tdf_1), Type(0.0), Type(1.0)); + } + return(z); +} + +template +Type qdgnorm(Type p, Type mu, Type sigma1, Type sigma2, Type beta_ratio_1, Type beta_ratio_2, Type beta_1, Type beta_2) +{ + // implements quantile of double normal distribution, similar to Rubio et al. + Type z = 0; + Type r = sigma1/(sigma1 + sigma2); + if(p < r) { + z = mu + sigma1 * qgnorm(0.5 * p * (sigma1 + sigma2)/sigma1, mu, sigma1*beta_ratio_1, beta_1); + } else { + z = mu + sigma2 * qgnorm(0.5 * ((sigma1 + sigma2) * (1 + p) - 2 * sigma1)/sigma2, mu, sigma2*beta_ratio_2, beta_2); + } + return(z); +} + + +template +Type objective_function::operator() () +{ + using namespace density; + + DATA_VECTOR(y); // vector of counts + //DATA_IVECTOR(yint); // counts for poisson/negbin models + DATA_VECTOR(x); // vector of calendar dates + DATA_IVECTOR(years); // vector of years to assign each count to + DATA_IVECTOR(year_levels); + DATA_IVECTOR(unique_years); // vector containing unique years + DATA_INTEGER(nLevels); // number of unique years + DATA_INTEGER(asymmetric); // 0 if false, 1 = true. Whether to estimate same shape/scale parameters before/after mean + DATA_INTEGER(family); // 1 gaussian, 2 = poisson, 3 = neg bin, 4 = binomial, 5 = lognormal + DATA_INTEGER(tail_model); // 0 if gaussian, 1 = student_t, 2 = generalized normal for tails + DATA_INTEGER(est_sigma_re); // 0 if FALSE, 1 = TRUE. Whether to estimate deviations as random effects + DATA_INTEGER(est_mu_re); // 0 if FALSE, 1 = TRUE. Whether to estimate deviations as random effects + DATA_MATRIX(mu_mat); // matrix of covariates for mean trend + DATA_MATRIX(sig_mat); // matrix of covariates for sigma trend + DATA_INTEGER(share_shape); + DATA_INTEGER(use_t_prior); + DATA_INTEGER(use_beta_prior); + DATA_VECTOR(nu_prior); // prior for student t df parameter + DATA_VECTOR(beta_prior); // prior for gnorm parameter + + PARAMETER(log_sigma1_sd);// log sd of random effects on sigma1 + PARAMETER_VECTOR(sigma1_devs);// random effects on sigma1 + PARAMETER(log_sigma2_sd);// log sd of random effects on sigma2 + PARAMETER_VECTOR(sigma2_devs);// random effects on sigma2 + PARAMETER_VECTOR(theta); // scaling parameter + PARAMETER_VECTOR(mu_devs);// random effects on mean + PARAMETER(log_sigma_mu_devs); // log sd of random effects on mean + PARAMETER(log_tdf_1);// log of Student t df 1 + PARAMETER(log_tdf_2);// log of Student t df 2 + PARAMETER(log_beta_1); + PARAMETER(log_beta_2); + PARAMETER(log_obs_sigma);// log sd of observation error + + PARAMETER_VECTOR(b_mu); // coefficients for covariates on mean + PARAMETER_VECTOR(b_sig1); // coefficients for covariates on sigma1 + PARAMETER_VECTOR(b_sig2); // coefficients for covariates on sigma2 + + Type nll=0; + + // derived parameters + Type obs_sigma=exp(log_obs_sigma); + Type tdf_1 = exp(log_tdf_1) + 2; + Type tdf_2 = exp(log_tdf_2) + 2; + Type beta_1 = exp(log_beta_1); + Type beta_2 = exp(log_beta_2); + if(share_shape==1) { + tdf_2 = tdf_1; + beta_2 = beta_1; + } + + if(use_t_prior && tail_model == 1) { + //https://statmodeling.stat.columbia.edu/2015/05/17/do-we-have-any-recommendations-for-priors-for-student_ts-degrees-of-freedom-parameter/ + nll += dgamma(tdf_1, Type(nu_prior(0)), Type(nu_prior(1)), true); + if(asymmetric == 1) nll += dgamma(tdf_2, Type(nu_prior(0)), Type(nu_prior(1)), true); + } + if(use_beta_prior && tail_model == 2) { + // value ~ 2-3 is normal + nll += dgamma(beta_1, Type(beta_prior(0)), Type(beta_prior(1)), true); + if(asymmetric == 1) nll += dgamma(beta_2, Type(beta_prior(0)), Type(beta_prior(1)), true); + } + + vector sigma1(nLevels), mu(nLevels); + vector sigma2(nLevels); + vector alpha1(nLevels), alpha2(nLevels); + vector lower25(nLevels), upper75(nLevels); + vector range(nLevels); // 75th - 25th percentile + int i, t; + int n = y.size(); + vector log_dens(n); + vector pred(n); + + // calculations for beta for gnorm dist if implemented + vector beta_ratio(2); + if(tail_model == 2) { + beta_ratio(0) = sqrt(exp(lgamma(1.0/Type(beta_1))) / exp(lgamma(3.0/Type(beta_1)))); + if(asymmetric == 1) { + beta_ratio(1) = sqrt(exp(lgamma(1.0/Type(beta_2))) / exp(lgamma(3.0/Type(beta_2)))); + } + } + + // random effects components + for(i = 0; i < nLevels; i++) { + // random effects contributions of mean and sigma1 + if(est_mu_re==1) { + nll += dnorm(mu_devs(i), Type(0.0), exp(log_sigma_mu_devs), true); + } + if(est_sigma_re==1) { + nll += dnorm(sigma1_devs(i),Type(0.0),exp(log_sigma1_sd),true); + if(asymmetric == 1) { + nll += dnorm(sigma2_devs(i),Type(0.0),exp(log_sigma2_sd),true); + } + } + } + + // fixed effects for mu and both sigmas + mu = mu_mat * b_mu; + sigma1 = sig_mat * b_sig1; + if(asymmetric == 1) sigma2 = sig_mat * b_sig2; + + for(i = 0; i < nLevels; i++) { + + if(est_sigma_re == 1) { + sigma1(i) += sigma1_devs(i); + if(asymmetric == 1) sigma2(i) += sigma2_devs(i); + } + + // calculate alphas if the gnorm model is used + if(tail_model == 2) { + alpha1(i) = sigma1(years(i)-1)*beta_ratio(0); + if(asymmetric==1) { + alpha2(i) = sigma2(years(i)-1)*beta_ratio(1); + } + + } + + // trend in in normal space, e.g. not log-linear + if(est_mu_re == 1) { + mu(i) += mu_devs(i); + } + + // this is all for calculating quantiles of normal distributions + if(tail_model==0) { + if(asymmetric == 0) { + lower25(i) = qnorm(Type(0.25), mu(i), sigma1(i)); + upper75(i) = qnorm(Type(0.75), mu(i), sigma1(i)); + } else { + lower25(i) = qdnorm(Type(0.25), mu(i), sigma1(i), sigma2(i)); + upper75(i) = qdnorm(Type(0.75), mu(i), sigma1(i), sigma2(i)); + } + } + // this is all for calculating quantiles of Student-t distributions + if(tail_model==1) { + if(asymmetric == 0) { + lower25(i) = qthill(Type(0.25),Type(tdf_1), mu(i), sigma1(i)); + upper75(i) = qthill(Type(0.75),Type(tdf_1), mu(i), sigma1(i)); + } else { + lower25(i) = qdt(Type(0.25), mu(i), sigma1(i), sigma2(i), Type(tdf_1), Type(tdf_2)); + upper75(i) = qdt(Type(0.75), mu(i), sigma1(i), sigma2(i), Type(tdf_1), Type(tdf_2)); + } + } + // this is all for calculating quantiles of Student-t distributions + if(tail_model==2) { + if(asymmetric == 0) { + lower25(i) = qgnorm(Type(0.25), mu(i), sigma1(i)*beta_ratio(0), beta_1); + upper75(i) = qgnorm(Type(0.75), mu(i), sigma1(i)*beta_ratio(0), beta_1); + } else { + lower25(i) = qdgnorm(Type(0.25), mu(i), sigma1(i), sigma2(i), beta_ratio(0), beta_ratio(1), beta_1, beta_2); + upper75(i) = qdgnorm(Type(0.75), mu(i), sigma1(i), sigma2(i), beta_ratio(0), beta_ratio(1), beta_1, beta_2); + } + } + range(i) = upper75(i) - lower25(i); + } + + // this is for the predictions + for(i = 0; i < y.size(); i++) { + if(asymmetric == 1) { + // model is asymmetric, left side smaller / right side bigger + if(tail_model==0) { + log_dens(i) = ddnorm(x(i), mu(years(i)-1), sigma1(years(i)-1), sigma2(years(i)-1)); + } + if(tail_model==1) { + log_dens(i) = ddt(x(i), mu(years(i)-1), sigma1(years(i)-1), sigma2(years(i)-1), Type(tdf_1), Type(tdf_2)); + } + if(tail_model==2) { + log_dens(i) = ddgnorm(x(i), mu(years(i)-1), alpha1(years(i)-1), alpha2(years(i)-1), beta_1, beta_2, sigma1(years(i)-1), sigma2(years(i)-1)); + } + pred(i) = log_dens(i) + theta(years(i)-1); + + } else { + if(tail_model==0) { + // model is symmetric around mu, gaussian tails + log_dens(i) = dnorm(x(i), mu(years(i)-1), sigma1(years(i)-1), true); + } else { + if(tail_model==1) { + // model is symmetric around mu, student-t tails + log_dens(i) = dt((x(i) - mu(years(i)-1)) / sigma1(years(i)-1), tdf_1, true) - log(sigma1(years(i)-1)); + } else { + // gnorm, copied from maryclare/gnorm + // alpha = sqrt( var * gamma(1/beta) / gamma(3/beta) ), alpha = sigma(1)*beta_ratio(1) + log_dens(i) = dgnorm(x(i), mu(years(i)-1), alpha1(years(i)-1), beta_1); + } + } + pred(i) = log_dens(i) + theta(years(i)-1); + } + } + + // this is for the cumulative annual predictions + vector year_log_tot(nLevels); + vector year_tot(nLevels); + Type dens; + for(i = 0; i < nLevels; i++) { + year_log_tot(i) = 0; + year_tot(i) = 0; + for(t = 1; t < 366; t++) { + + if(asymmetric == 1) { + // model is asymmetric, left side smaller / right side bigger + if(tail_model==0) { + dens = ddnorm(Type(t), mu(i), sigma1(i), sigma2(i)); + } + if(tail_model==1) { + dens = ddt(Type(t), mu(i), sigma1(i), sigma2(i), Type(tdf_1), Type(tdf_2)); + } + if(tail_model==2) { + dens = ddgnorm(Type(t), mu(i), alpha1(i), alpha2(i), beta_1, beta_2, sigma1(i), sigma2(i)); + } + year_log_tot(i) = year_log_tot(i) + dens + theta(i); + year_tot(i) = year_tot(i) + exp(dens + theta(i)); + + } else { + if(tail_model==0) { + // model is symmetric around mu, gaussian tails + dens = dnorm(Type(t), mu(i), sigma1(i), true); + } else { + if(tail_model==1) { + // model is symmetric around mu, student-t tails + dens = dt((Type(t) - mu(i)) / sigma1(i), tdf_1, true) - log(sigma1(i)); + } else { + // gnorm, copied from maryclare/gnorm + // alpha = sqrt( var * gamma(1/beta) / gamma(3/beta) ), alpha = sigma(1)*beta_ratio(1) + dens = dgnorm(Type(t), mu(i), alpha1(i), beta_1); + } + } + year_log_tot(i) = year_log_tot(i) + dens + theta(i); + year_tot(i) = year_tot(i) + exp(dens + theta(i)); + } + } + } + + + // this is the likelihood + Type s1 = 0; + Type s2 = 0; + + if(family==1) { + // gaussian, both data and predictions in log space + nll += sum(dnorm(y, pred, obs_sigma, true)); + } + if(family==2) { + for(i = 0; i < n; i++) { + if(pred(i) > 20) pred(i) = 20; // not needed + nll += dpois(y(i), exp(pred(i)), true); + } + } + if(family==3) { + for(i = 0; i < n; i++) { + s1 = pred(i); + //s2 = s1 + pow(s1, Type(2))*obs_sigma; + s2 = 2. * s1 - log_obs_sigma; // log(var - mu) + nll += dnbinom_robust(y(i), s1, s2, true); + } + } + if(family==4) { + for(i = 0; i < n; i++) { + nll += dbinom_robust(y(i), Type(1.0), pred(i), true); + } + } + if(family==5) { + // lognormal, both data and predictions in log space + nll += sum(dnorm(log(y), pred, obs_sigma, true)); + } + // ADREPORT section + ADREPORT(theta); // nuisance parameter + REPORT(theta); + ADREPORT(sigma1); // sigma, LHS + REPORT(sigma1); + ADREPORT(mu); // mean of curves by year + REPORT(mu); + ADREPORT(b_mu); // sigma, LHS + REPORT(b_mu); + ADREPORT(b_sig1); // mean of curves by year + REPORT(b_sig1); + ADREPORT(year_tot); // mean of curves by year + REPORT(year_tot); + ADREPORT(year_log_tot); // mean of curves by year + REPORT(year_log_tot); + + if(family != 2 && family != 4) { + ADREPORT(obs_sigma); // obs sd (or phi, NB) + REPORT(obs_sigma); + } + ADREPORT(pred); // predictions in link space (log) + REPORT(pred); + + ADREPORT(lower25); // lower quartile + REPORT(lower25); + ADREPORT(upper75); // upper quartile + REPORT(upper75); + ADREPORT(range); // diff between upper and lower quartiles + REPORT(range); + if(tail_model==1) { + ADREPORT(tdf_1); // tdf for LHS + REPORT(tdf_1); + } + if(tail_model==2) { + ADREPORT(beta_1); // tdf for LHS + REPORT(beta_1); + } + if(asymmetric == 1) { + // these are only reported for asymmetric model + ADREPORT(b_sig2); // mean of curves by year + REPORT(b_sig2); + if(est_sigma_re==1) { + ADREPORT(sigma2); // same as above, but RHS optionally + REPORT(sigma2); + } + if(tail_model==1) { + ADREPORT(tdf_2); + REPORT(tdf_2); + } + if(tail_model==2) { + ADREPORT(beta_2); + REPORT(beta_2); + } + } + return (-nll); +} diff --git a/src/zoidtmb.cpp b/src/zoidtmb.cpp deleted file mode 100644 index e61f29c..0000000 --- a/src/zoidtmb.cpp +++ /dev/null @@ -1,190 +0,0 @@ -#include -// #include - -template -Type objective_function::operator() () -{ - using namespace density; - - DATA_INTEGER(overdisp); // whether or not to include overdispersion term - DATA_INTEGER(use_prior_sd); // whether to use penalty on fixed effects betas - DATA_INTEGER(n_groups); // number of re groups - DATA_INTEGER(est_re); // 0 or 1 indicator - - DATA_MATRIX(X); //[N_samples, N_bins] X; // proportions - DATA_MATRIX(design_X); //[N_samples, N_covar] design_X; - DATA_IMATRIX(prod_idx);//[N_bins,N_bins-1] int prod_idx; - DATA_SCALAR(prior_sd); - - //DATA_MATRIX(design_Z);//[N_samples, tot_re] design_Z; // design matrix of random ints - DATA_IVECTOR(re_var_indx); //[tot_re + 1]; // total index across acll groups - - // Parameters - PARAMETER(log_phi_inv); // log-transform to ensure positivity - PARAMETER_MATRIX(beta_raw); - //PARAMETER_VECTOR(zeta_vec); - //PARAMETER_VECTOR(log_zeta_sds); // log-transform for positivity - - // Transform parameters to original scale - Type phi_inv = exp(log_phi_inv); // Exponentiating to ensure positivity - //vector zeta_sds = exp(log_zeta_sds); // Same for standard deviations - - int N_samples = X.rows(); - int N_bins = X.cols(); - int N_covar = design_X.cols(); - //int tot_re = design_Z.cols(); - - // Transformed data - matrix is_zero(N_samples, N_bins); - matrix is_proportion(N_samples, N_bins); - matrix logX(N_samples, N_bins); - matrix logNX(N_samples, N_bins); - vector ESS(N_samples); - vector ones(N_bins); - - // The negative log-likelihood - Type nll = 0.0; - - // Calculate ones for Dirichlet - ones.setZero(); - ones += ones; - - // Calculate ESS, summed across cols for each row - ESS.setZero(); - for (int i = 0; i < N_samples; i++) { - for (int j = 0; j < N_bins; j++) { - ESS(i) += X(i, j); - } - } - - // Calculate is_zero and is_proportion indicators - for (int i = 0; i < N_samples; i++) { - for (int j = 0; j < N_bins; j++) { - is_zero(i, j) = (X(i, j) == 0) ? Type(1) : Type(0); - is_proportion(i, j) = (X(i, j) < ESS(i) && X(i, j) > 0) ? Type(1) : Type(0); - if (is_proportion(i, j) == Type(1)) { - logX(i, j) = log(X(i, j)); - logNX(i, j) = log(ESS(i) - X(i, j)); - } - } - } - - Type phi = 1.0; // Initialize phi - if (overdisp == 1) { - phi = Type(1) / phi_inv; - } - - matrix p_zero(N_samples, N_bins); // Probability of 0 for each cell - matrix p_one(N_samples, N_bins); // Probability of 1 for each cell - matrix beta(N_bins, N_covar); // Coefficients - //matrix zeta(N_bins, tot_re); // Coefficients for random effects - matrix mu(N_samples, N_bins); // Estimates, in normal space - - // Fill beta and zeta matrices - beta.row(N_bins - 1).setZero(); // For identifiability - for (int k = 0; k < (N_bins - 1); k++) { - beta.row(k) = beta_raw.row(k); - } - - //if (est_re == 1) { - // int counter = 0; - //zeta.row(N_bins - 1).setZero(); // For identifiability - // for (int k = 0; k < (N_bins - 1); k++) { - //for (int j = 0; j < tot_re; j++) { - //zeta(k,j) = zeta_vec(counter); - // counter += counter; - //} - // } - //} - - // Calculate mu - vector logits(N_bins); - for (int n = 0; n < N_samples; n++) { - logits.setZero(); - for (int m = 0; m < N_bins; m++) { - logits(m) = (design_X.row(n) * beta.col(m)).sum(); - //if (est_re == 1) { - //logits(m) += (design_Z.row(n) * zeta.col(m)).sum(); - //} - } - logits = logits.exp(); - mu.row(n) = logits / logits.sum(); // Softmax - } - - // Calculate p_zero and p_one - for (int i = 0; i < N_samples; i++) { - for (int j = 0; j < N_bins; j++) { - p_zero(i, j) = pow(1.0 - mu(i, j), ESS[i] * phi); - - // Calculation of p_one requires the product over a subset of p_zero - Type prod_p_zero = 1.0; - for (int idx = 0; idx < (N_bins - 1); idx++) { - if (prod_idx(j, idx) != j) { - prod_p_zero *= p_zero(i, prod_idx(j, idx)); - } - } - p_one(i, j) = (1.0 - p_zero(i, j)) * prod_p_zero; - } - } - - // Overdispersion prior - //if (overdisp == 1) { - // nll -= dcauchy(phi_inv[1], Type(0), Type(5), true); - //} - - // Priors for fixed effects for covariate factors - if(use_prior_sd == 1) { - for (int i = 0; i < N_covar; i++) { - for (int j = 0; j < (N_bins - 1); j++) { - nll -= dnorm(beta_raw(j, i), Type(0), prior_sd, true); - } - } - } - - // Priors for random effects - //if (est_re == 1) { - // for (int i = 0; i < n_groups; i++) { - // nll -= dstudent_t(zeta_sds[i], Type(3), Type(0), Type(2), true); - // } - //for (int i = 0; i < (N_bins - 1); i++) { - //for (int j = 0; j < tot_re; j++) { - //nll -= dnorm(zeta(i, j), Type(0), zeta_sds(re_var_indx(j)), true); - //} - //} - //} - - // Likelihood contributions for each sample and bin - for (int i = 0; i < N_samples; i++) { - for (int j = 0; j < N_bins; j++) { - // Marginals of the trinomial are independent binomials - nll -= dbinom(is_zero(i, j), Type(1), p_zero(i, j), true); - - if (is_proportion(i, j) == Type(1)) { - Type alpha_temp = mu(i, j) * ESS(i) * phi; - Type beta_temp = (Type(1) - mu(i, j)) * ESS(i) * phi; - // Beta log probability mass function for 3-parameter model - nll -= dbeta(X(i, j) / ESS(i), alpha_temp, beta_temp, true); - } - } - } - - // report w derivatives - ADREPORT(beta); - ADREPORT(phi_inv); - ADREPORT(log_phi_inv); - //ADREPORT(zeta_sds); - //ADREPORT(zeta); - ADREPORT(mu); - ADREPORT(p_zero); - ADREPORT(p_one); - - REPORT(beta); - REPORT(phi_inv); - REPORT(log_phi_inv); - //REPORT(zeta_sds); - //REPORT(zeta); - REPORT(mu); - REPORT(p_zero); - REPORT(p_one); - return nll; -} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..dab2c76 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(phenomix) + +test_check("phenomix") diff --git a/tests/testthat/test-fit.R b/tests/testthat/test-fit.R new file mode 100644 index 0000000..e64bdaf --- /dev/null +++ b/tests/testthat/test-fit.R @@ -0,0 +1,297 @@ +context("Fitting") + +library(gnorm) +rel_tol <- 1e-4 + +set.seed(123) + +# create 10 years of data +df <- expand.grid("doy" = 100:200, "year" = 1:10, sig = 10) +df$mu <- rnorm(10, 150, 5)[df$year] +df$pred <- dnorm(df$doy, df$mu, sd = df$sig, log = TRUE) +df$pred <- exp(df$pred + 10) +df$number <- round(rnorm(nrow(df), df$pred, 0.1)) + +test_that("gaussian model - symmetric works - 1 year", { + set.seed(1) + + d <- df[which(df$year == 1), ] + fitted <- fit(create_data(d, asymmetric_model = FALSE, min_number = 1, max_theta = 12, tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + f <- fit(create_data(d, asymmetric_model = TRUE, + min_number = 0.1, max_theta = 12, tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(f$sdreport$sd))), 0) +}) + +test_that("student-t model - symmetric works - 1 year", { + set.seed(1) + + d <- df[which(df$year == 1), ] + fitted <- fit(create_data(d, asymmetric_model = FALSE, min_number = 1, max_theta = 12, tail_model = "student_t"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + fitted <- fit(create_data(d, asymmetric_model = TRUE, min_number = 1, max_theta = 12, tail_model = "student_t"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) +}) + +test_that("gnorm model - symmetric works - 1 year", { + set.seed(1) + + d <- df[which(df$year == 1), ] + fitted <- fit(create_data(d, asymmetric_model = FALSE, min_number = 1, max_theta = 12, tail_model = "gnorm"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + fitted <- fit(create_data(d, asymmetric_model = TRUE, min_number = 1, max_theta = 12, tail_model = "gnorm"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) +}) + +test_that("other obs error families work - 1 year", { + set.seed(1) + + d <- df[which(df$year == 1), ] + fitted <- fit(create_data(d, asymmetric_model = FALSE, family = "negbin", tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + df <- expand.grid("doy" = 100:200, "year" = 1:10, sig = 10) + df$mu <- rnorm(10, 150, 5)[df$year] + df$pred <- dnorm(df$doy, df$mu, sd = df$sig, log = TRUE) + df$pred <- exp(df$pred) + df$number <- rnorm(nrow(df), df$pred, 0.001) + d <- df[which(df$year == 1), ] + fitted <- fit(create_data(d, asymmetric_model = FALSE, family = "gaussian", tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + df <- expand.grid("doy" = 100:200, "year" = 1:10, sig = 10) + df$mu <- rnorm(10, 150, 5)[df$year] + df$pred <- dnorm(df$doy, df$mu, sd = df$sig, log = TRUE) + df$pred <- exp(df$pred + 3) / (3 + exp(df$pred + 1)) + df$number <- rbinom(nrow(df), size = 1, prob = df$pred) + d <- df[which(df$year == 1), ] + fitted <- fit(create_data(d, asymmetric_model = FALSE, family = "binomial", tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + df <- expand.grid("doy" = 100:200, "year" = 1:10, sig = 10) + df$mu <- rnorm(10, 150, 5)[df$year] + df$pred <- dnorm(df$doy, df$mu, sd = df$sig, log = TRUE) + df$pred <- rpois(nrow(df), exp(df$pred + 8)) + df$number <- df$pred + d <- df[which(df$year == 1), ] + fitted <- fit(create_data(d, asymmetric_model = FALSE, family = "negbin", tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) +}) + + + +# create 20 years of data +set.seed(123) +df <- expand.grid("doy" = 100:200, "year" = 1:20) +df$mu <- rnorm(unique(df$year), 150, 5)[df$year] +df$sig1 <- rnorm(unique(df$year), 30, 5)[df$year] +df$sig2 <- rnorm(unique(df$year), 30, 5)[df$year] +df$sig <- ifelse(df$doy < df$mu, df$sig1, df$sig2) +df$pred <- dnorm(df$doy, df$mu, sd = df$sig, log = TRUE) +df$pred <- exp(df$pred + 8) +df$number <- round(rnorm(nrow(df), df$pred, 0.1)) + +test_that("gaussian model - symmetric works - multiple years", { + set.seed(1) + fitted <- fit(create_data(df, asymmetric_model = FALSE, min_number = 1, tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + set.seed(1) + fitted <- fit(create_data(df, asymmetric_model = FALSE, est_mu_re = FALSE, min_number = 1, tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + set.seed(1) + fitted <- fit(create_data(df, asymmetric_model = FALSE, est_sigma_re = FALSE, min_number = 1, tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol), limits = TRUE + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + set.seed(1) + fitted <- fit(create_data(df, asymmetric_model = FALSE, est_sigma_re = FALSE, est_mu_re = FALSE, min_number = 1, tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol), limits = TRUE + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) +}) + + +test_that("gaussian model - asymmetric works - multiple years", { + set.seed(1) + fitted <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1, tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + # commented out because this particular seed makes the unix test fail + # set.seed(1) + # fitted <- fit(create_data(df, asymmetric_model = TRUE, est_sigma_re = FALSE, min_number = 1), + # silent = TRUE, + # control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol), limits = TRUE + # ) + # expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + set.seed(1) + fitted <- fit(create_data(df, asymmetric_model = TRUE, est_mu_re = FALSE, min_number = 1, tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + set.seed(1) + fitted <- fit(create_data(df, asymmetric_model = TRUE, est_mu_re = FALSE, est_sigma_re = FALSE, min_number = 1, tail_model = "gaussian"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) +}) + +# library(LaplacesDemon) +# set.seed(123) +# df <- expand.grid("doy" = 100:200, "year" = 1:20) +# df$mu <- rnorm(unique(df$year), 150, 5)[df$year] +# df$sig1 <- rnorm(unique(df$year), 30, 5)[df$year] +# df$sig2 <- rnorm(unique(df$year), 30, 5)[df$year] +# df$sig <- ifelse(df$doy < df$mu, df$sig1, df$sig2) +# df$pred <- dst(df$doy, mu=df$mu, sigma = df$sig, nu=10,log = TRUE) +# df$pred <- exp(df$pred + 8) +# df$number <- round(rnorm(nrow(df), df$pred, 0.1)) + +# test_that("student-t model - symmetric works - multiple years", { +# #commented out because this particular seed makes the unix test fail +# set.seed(1) +# fitted <- fit(create_data(df, asymmetric_model = FALSE, min_number = 1, +# tail_model = "student_t"), +# silent = TRUE, +# control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) +# ) +# expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) +# }) + +# +# test_that("student-t model - asymmetric works - multiple years", { +# set.seed(2) +# fitted <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1, tail_model = "student_t"), +# silent = TRUE, limits = TRUE, +# control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) +# ) +# expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) +# }) + + +test_that("gnorm model - symmetric works - multiple years", { + # set.seed(1) + # fitted <- fit(create_data(df, asymmetric_model = FALSE, min_number = 1, tail_model = "gnorm"), + # silent = FALSE, + # control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + # ) + # expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + set.seed(4) + fitted <- fit(create_data(df, asymmetric_model = FALSE, est_sigma_re = FALSE, est_mu_re = FALSE, min_number = 1, tail_model = "gnorm"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + ) + expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) +}) + + +test_that("gnorm model - asymmetric works - multiple years", { + # set.seed(1) + # fitted <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1,tail_model = "gnorm"), silent = TRUE, + # control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol)) + # expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + # + # set.seed(1) + # fitted <- fit(create_data(df, asymmetric_model = TRUE, est_sigma_re = FALSE, min_number = 1,tail_model = "gnorm"), silent = TRUE, + # control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol)) + # expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + # + # set.seed(1) + # fitted <- fit(create_data(df, asymmetric_model = TRUE, est_mu_re = FALSE, min_number = 1,tail_model = "gnorm"), silent = TRUE, + # control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol), limits = TRUE) + # expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + # set.seed(1) + # fitted <- fit(create_data(df, asymmetric_model = TRUE, est_mu_re = FALSE, est_sigma_re = FALSE, min_number = 1, tail_model = "gnorm"), + # silent = TRUE, + # control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + # ) + # expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) +}) + +# create 20 years of data -- using gnorm +set.seed(123) +df <- expand.grid("doy" = 100:200, "year" = 1:20) +df$mu <- rnorm(unique(df$year), 150, 5)[df$year] +df$alpha1 <- rnorm(unique(df$year), 30, 5)[df$year] +# df$alpha2 <- rnorm(unique(df$year), 30, 5)[df$year] +# df$sig <- ifelse(df$doy < df$mu, df$alpha1, df$alpha2) +df$sig <- df$alpha1 +df$pred <- 0 +for (i in 1:nrow(df)) { + df$pred[i] <- dgnorm(df$doy[i], + mu = df$mu[i], + alpha = df$sig[i], beta = 10, log = TRUE + ) +} +df$pred <- exp(df$pred + 8) +df$number <- round(rnorm(nrow(df), df$pred, 0.1)) + + +test_that("gnorm model - asymmetric works - multiple years", { + # + # set.seed(5) + # fitted <- fit(create_data(df, asymmetric_model = TRUE, est_mu_re = FALSE, est_sigma_re = TRUE, min_number = 1, tail_model = "gnorm"), + # silent = TRUE,limits=TRUE, + # control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + # ) + # expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) + + # set.seed(1) + # fitted <- fit(create_data(df, asymmetric_model = TRUE, est_mu_re = FALSE, est_sigma_re = FALSE, min_number = 1, tail_model = "gnorm"), + # silent = TRUE, limits = TRUE, + # control = list(eval.max = 4000, iter.max = 5000, rel.tol = rel_tol) + # ) + # expect_equal(length(which(is.na(fitted$sdreport$sd))), 0) +}) diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/a1_examples.Rmd b/vignettes/a1_examples.Rmd new file mode 100644 index 0000000..843039f --- /dev/null +++ b/vignettes/a1_examples.Rmd @@ -0,0 +1,220 @@ +--- +title: "Fitting time varying phenology models with the phenomix package" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Fitting time varying phenology models with the phenomix package} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE, cache=FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.width = 7, + fig.asp = 0.618 +) +``` + +```{r packages, message=FALSE, warning=TRUE} +library(ggplot2) +library(phenomix) +library(dplyr) +library(TMB) +``` + +## Overview + +The `phenomix` R package is designed to be a robust and flexible tool for modeling phenological change. The name of the package is in reference to modeling run timing data for salmon, but more generally this framework can be applied to any kind of phenology data -- timing of leaf-out or flowering in plants, breeding bird surveys, etc. Observations may be collected across multiple years, or for a single year, and may be discrete or continuous. For a given time step, these data may look like this: + +```{r message=FALSE, warning=FALSE, results='hide'} +set.seed(123) +df = data.frame(x = seq(110,150,1)) +df$y = dnorm(df$x, mean = 130, 10) * 10000 +df$obs = rnorm(nrow(df), df$y, 30) +ggplot(df, aes(x, obs)) + geom_point() + + geom_smooth() + + theme_bw() + + xlab("Day of year") + + ylab("Observation") +``` + +For demonstration purposes, we incuded an example dataset, based on run timing data for Pacific salmon. + +```{r} +glimpse(fishdist) +``` + +### Manipulating data for estimation + +The main data processing function in the package is called `create_data`, which builds the data and model arguments to be used for fitting. Note the names of the variables in our dataset, + +```{r} +names(fishdist) +``` + +We'll start with the default arguments for the function, before going into detail about what they all mean. The only argument that we've initially changed from the default is using `asymmetric_model = FALSE` to fit the symmetric model. + +First, if we're fitting a model with covariates affecting the mean and standard deviation of the phenological response, we need to create a data frame indexed by time step. + +```{r} +cov_dat = data.frame(nyear = unique(fishdist$year)) +# rescale year -- could also standardize with scale() +cov_dat$nyear = cov_dat$nyear - min(cov_dat$nyear) +``` + + +```{r} +datalist = create_data(fishdist, + min_number=0, + variable = "number", + time="year", + date = "doy", + asymmetric_model = FALSE, + mu = ~ nyear, + sigma = ~ nyear, + covar_data = cov_dat, + est_sigma_re = TRUE, + est_mu_re = TRUE, + tail_model = "gaussian") +``` + +The `min_number` argument represents an optional threshold below which data is ignored. The `variable` argument is a character identifying the name of the response variable in the data frame. Similarly, the `time` and `date` arguments specify the labels of the temporal (e.g. year) and seasonal variables (day of year). + +The remaining arguments to the function concern model fitting. The `phenomix` package can fit asymmetric or symmetric models to the distribution data (whether the left side of the curve is the same shape as the right) and defaults to FALSE. The mean and standard deviations that control the distributions are allowed to vary over time (as fixed or random effects) but we can also covariates in the mean and standard deviations (via formulas). +Mathematically the mean location for a model with random effects is $u_y \sim Normal(u_0, u_{\sigma})$, where $u_0$ is the global mean and $u_{\sigma}$ is the standard deviation. Adding a trend in normal space, this becomes $u_y \sim Normal(u_0 + b_u*y, u_{\sigma})$, where the parameter $b_u$ controlls the trend. To keep the variance parameters controlling the tails of the run timing distribution positive, we estimate those random effects in log-space. This is specified as $\sigma_y = exp(\sigma_0 + b_{\sigma}*y + \delta_{y})$, and $\delta_{y} \sim Normal(0, \sigma_{\sigma})$. Trends in both the mean and variance are estimated by default, and controlled with the `est_sigma_trend` and `est_mu_trend` arguments. As described with the equations above, The trends are log-linear for the standard deviation parameters, but in normal space for the mean parameters. + +Finally, the tails of the response curves may be estimated via fitting a Gaussian (normal) distribution, Student-t distribution, or [generalized normal distribution](https://en.wikipedia.org/wiki/Generalized_normal_distribution). By default, the Student-t tails are estimated, and this parameter is controlled with the `tail_model` argument ("gaussian", "student_t", "gnorm"). + +Last, we can model the observed count data with a number of different distributions, and set this with the `family` argument. Currently supported distributions include the lognorma (default, "lognormal"), Poisson ("poisson"), Negative Binomial ("negbin"), Binomial ("binomial"), and Gaussian ("gaussian"). The lognormal, Poisson and Negative Binomial models include a log link; Gaussian model includes an identity link, and Binomial model includes a logit link. + +### Fitting the model + +Next, we'll use the `fit` function to do maximum likelihood estimation in TMB. Additional arguments can be found in the help file, but the most important one is the list of data created above with `create_data`. + +```{r message=FALSE, warning=FALSE, results='hide'} +set.seed(1) +fitted = fit(datalist) +``` + +We don't get any warnings out of the estimation, but let's look at the sdreport in more detail. `fitted` is of the `phenomix` class and contains the following + +```{r} +names(fitted) +``` + +The `init_values` are the initial parameter values where optimization was started from, and the `data_list` represents the raw data. The `pars` component contains information relative to convergence and iterations, including the convergence code (0 = successful convergence), +```{r} +fitted$pars$convergence +``` + +This looks like things are converging. But sometimes relative convergence (code = 4) won't throw warnings. We can also look at the variance estimates, which also are estimated (a good sign things are converged!). These are all included in the `sdreport` -- the parameters and their standard errors are accessible with + +```{r} +sdrep_df = data.frame("par"=names(fitted$sdreport$value), + "value"=fitted$sdreport$value, "sd"=fitted$sdreport$sd) +head(sdrep_df) +``` + +If for some reason, these need to be re-generated, the `obj` object can be fed into + +```{r eval = FALSE} +TMB::sdreport(fitted$obj) +``` + +### Getting coefficients + +There are three ways we can get coefficients and uncertainty estimates out of model objects. First, we can use the standard `fixef` and `ranef` arguments to return fixed and random coefficients, respectively +```{r eval = FALSE} +fixef(fitted) + +ranef(fitted) +``` + +Calling the `pars` function is another way; this returns a list of objects including the full variance-covariance matrix of fixed effects, and gradient. +```{r} +pars(fitted) +``` + +### Getting predicted values + +Predicted values can be returned with a call to `predict`. The `se.fit` argument is optional, + +```{r eval=FALSE} +predict(fitted, se.fit=TRUE) +``` + +### Plotting results + +Using our fitted object, there are some basic plotting functions included for diagnostics. Let's plot run timing over time, by year: + +```{r, fig.cap="Fitted symmetric model with tails from a Gaussian distribution", fig.width = 8} +g = plot_diagnostics(fitted, type="timing", logspace=TRUE) +g +``` + +The object returned by `plot_diagnostics` is just a ggplot object, so additional arguments or themes can be added with `+ ` statements. The plot can be shown in normal space by setting `logspace=FALSE`. These run timing plots are the default, but additonal scatterplots of observed vs predicted values can be shown by setting `type=scatter`. + +### Additional examples +First, we can try to fit the same model, but using an asymmetric t-distribution. + +```{r message=FALSE, warning=FALSE, results='hide'} +set.seed(2) + +datalist = create_data(fishdist, + min_number=0, + variable = "number", + time="year", + date = "doy", + asymmetric_model = FALSE, + mu = ~ nyear, + sigma = ~ nyear, + covar_data = cov_dat, + est_sigma_re = TRUE, + est_mu_re = TRUE, + tail_model = "student_t") + +fitted_t = fit(datalist) +``` + +```{r, fig.cap="Fitted asymmetric model with heavy tails from a t-distribution", fig.width = 8} +plot_diagnostics(fitted_t) +``` + +We can also compare the two models using AIC. Note we're not comparing random effects structures (where we'd need REML), and that this is an approximation (because only fixed effects parameters are included). This comparison shows that maybe not surprisingly, the Student-t model is more parsimoinious than the Gaussian tailed model (aic_2 < aic_1). + +```{r} +aic_1 = extractAIC(fitted)$AIC +aic_1 + +aic_2 = extractAIC(fitted_t)$AIC +aic_2 +``` + +Second, we can we can try to fit the same model, but using a generalized normal distribution. This distribution has a plateau or 'flat top'. For examples, see the `gnorm` package on CRAN [here](https://cran.r-project.org/web/packages/gnorm/index.html) or [Wikipedia page](https://en.wikipedia.org/wiki/Generalized_normal_distribution). + +```{r message=FALSE, warning=FALSE, results='hide', eval=FALSE} +set.seed(5) + +datalist = create_data(fishdist, + min_number=0, + variable = "number", + time="year", + date = "doy", + asymmetric_model = FALSE, + tail_model = "gnorm") +fitted = fit(datalist, limits = TRUE) +``` + +### Diagnosing lack of convergence + +In addition to the warning message about the Hessian not being positive definite, the NaNs in the `sd` column are a sure sign that things aren't converging. + +Fixes to improve convergence may be to start from a different set of starting values (try passing `inits` into the `fit` function), or placing stricter bounds on convergence statistics. Or it may be that the model isn't a good fit to the data. Specifically, the convergence criterion can be modified with the `control` argument -- which is passed into `stats::nlminb`. One parameter in this list that can be changed is `rel.tol`, e.g. + +```{r eval=FALSE} +fit = fitted(..., control=list(rel.tol = 1.0e-12, + eval.max=4000, iter.max=4000)) +``` + diff --git a/vignettes/a2_covariates.Rmd b/vignettes/a2_covariates.Rmd new file mode 100644 index 0000000..ecbd2e2 --- /dev/null +++ b/vignettes/a2_covariates.Rmd @@ -0,0 +1,71 @@ +--- +title: "Including covariates" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Including covariates} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE, cache=FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.width = 7, + fig.asp = 0.618 +) +``` + +```{r packages, message=FALSE, warning=TRUE} +library(ggplot2) +library(phenomix) +library(dplyr) +library(TMB) +``` + +## Covariates + +Covariates may be included in `phenomix` models in several ways. First, we allow them to affect the mean, + +$$\mu = \mathbf{X}\mathbf{B} + \mathbf{Z}\mathbf{\alpha}$$ + +where fixed effect covariates $\mathbf{X}$ are linked to the mean via coefficients $\mathbf{B}$ and random effects $\mathbf{\alpha}$ are linked to the mean via design matrix $\mathbf{Z}$. For simplicity, we only assume that the random effects are IID in time, e.g. $\mu_{\delta} \sim Normal(0,\sigma_{\mu})$. The random effects are optional of course, and may be turned on / off with the `est_mu_re` argument. + +Fixed effects for the mean at minimum include a global intercept, but may include any other covariates via the formula interface. The formula is included as an argument to the `fit()` function, and defaults to `mu ~ 1`, or an intercept only model. Including temperature could be done as `mu ~ temp`, which also includes an intercept. + +Importantly, if covariates are to be included in the mean or standard deviation, a second data frame `covar_data` must be included that has a unique covariate value for each time slice of data (e.g. year). For example, in the example above with temperature, covar data would have to look something like this: + +```{r} +# temp is a dummy variable here representing annual deviations in temperature, +# but you could replace it here with your own data +covar_data = data.frame(year = 2000:2020, + temp = rnorm(21,mean=0,sd=1)) +``` + +Similarly, we could include a linear trend in the mean `mu ~ nyear`, where nyear is a numeric version of year. One way to pass in the data would be simply making `nyear` redundant with `year`, + +```{r} +covar_data = data.frame(year = 2000:2020) +covar_data$nyear <- covar_data$year +``` + +However, we've found that approach can be slightly unstable. Instead, we can center or scale the numeric year variable. Here, we'll scale it to start at 1. + +```{r} +covar_data = data.frame(year = 2000:2020) +covar_data$nyear <- covar_data$year - min(covar_data$year) + 1 +``` + +In addition to the mean, we allow covariates to affect the standard deviation. The overall approach is exactly as above with the mean, however a couple points are worth highlighting: + +* With the mean, we did not use a link function. With the standard deviation(s) we assume the covariate effects are estimated in log-space, to keep standard deviations positive. + +$$log(\sigma_{1}) = \mathbf{X}\mathbf{B} + \mathbf{Z}\mathbf{\alpha}$$ +* For asymmetric models, we assume that the same covariates act on both the left and right sides of the peak (in other words, we do not allow separate formulas for each side) + +* The relationship between covariates and the variances is specified through a separate formula, `sigma ~ 1` + +* For asymmetric models, we estimate different coefficients on each side. So for a model with `sigma~temp` we could estimate + +* Random effects in the standard deviation are turned on/off with the `est_sigma_re` argument in `create_data()` diff --git a/vignettes/a3_troubleshooting.Rmd b/vignettes/a3_troubleshooting.Rmd new file mode 100644 index 0000000..d4576e2 --- /dev/null +++ b/vignettes/a3_troubleshooting.Rmd @@ -0,0 +1,180 @@ +--- +title: "Troubleshooting" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Troubleshooting} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE, cache=FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.width = 7, + fig.asp = 0.618 +) +``` + +```{r packages, message=FALSE, warning=TRUE} +library(ggplot2) +library(phenomix) +library(dplyr) +library(TMB) +``` + +## Troubleshooting + +There are a number of reasons why `phenomix` models may not converge. First, it's likely that all models won't converge for a single dataset. As an example, we can create some data representing an asymmetric distribution, with gaussian tails (no extremes) and random variability by year (both in the mean and standard deviations). + +```{r} +# create 20 years of data +set.seed(123) +df <- expand.grid("doy" = 100:200, "year" = 1:20) +df$mu <- rnorm(unique(df$year), 150, 5)[df$year] +df$sig1 <- rnorm(unique(df$year), 30, 5)[df$year] +df$sig2 <- rnorm(unique(df$year), 30, 5)[df$year] +df$sig <- ifelse(df$doy < df$mu, df$sig1, df$sig2) +df$pred <- dnorm(df$doy, df$mu, sd = df$sig, log = TRUE) +df$pred <- exp(df$pred + 8) +df$number <- round(rnorm(nrow(df), df$pred, 0.1)) + +``` + +The model with student-t errors that is fit to these data +```{r eval=FALSE} +set.seed(1) +fit_t <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1, + tail_model = "student_t"), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7) +) + +``` + +But the model with the generalized normal tails struggles. +```{r eval=FALSE} +fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1, + tail_model = "gnorm"), + silent = TRUE,limits=TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7) +) +``` + +This is an example where simplifying helps greatly. The model with generalized normal tails will converge when the random effects in the mean and variance are turned off, but has a hard time estimating the shape parameters with them on. + +```{r eval=FALSE} +fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1, + tail_model = "gnorm", + est_mu_re = FALSE, + est_sigma_re = FALSE), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7) +) +``` + +### Using initial values + +Sometimes, it helps to specify the initial values for a model. We can do that in a couple ways, but it may be easiest to first construct a model to see what initial values are needed. + +Using the above example, we can specify `fit_model = FALSE`. + +```{r} +fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1, + tail_model = "gnorm", + est_mu_re = FALSE, + est_sigma_re = FALSE), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7), + fit_model = FALSE +) +``` + +The `$init_vals` contains the initial values that would be used to fit the model. +```{r} +fit_gnorm$init_vals +``` + +Suppose we wanted to start at a higher value for `log_beta_1`. We could change that with + +```{r} +inits = fit_gnorm$init_vals +inits["log_beta_1"] = 2 +``` + +Now we can try re-fitting the model, + +```{r} +fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1, + tail_model = "gnorm", + est_mu_re = FALSE, + est_sigma_re = FALSE), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7), + fit_model = TRUE +) +``` + +### Specifying limits + +There are two ways to specify limits on estimated parameters. First, we include hard coded limits that may be turned on with `limits = TRUE`, e.g. + +```{r eval = FALSE} +fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1, + tail_model = "gnorm", + est_mu_re = FALSE, + est_sigma_re = FALSE), + silent = TRUE, + limits = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7), + fit_model = TRUE +) +``` + +However these limits may not be reasonable for every situation. We can also change these manually, but including a list of limits based on the parameters being estimated. We do this using the same approach above, with specifying initial values. First we construct the model but don't do estimation, + +```{r eval = FALSE} +fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1, + tail_model = "gnorm", + est_mu_re = FALSE, + est_sigma_re = FALSE), + silent = TRUE, + control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7), + fit_model = FALSE +) +``` + +The parameters need to be in the same order as `init_vals`, so we can try something like this: + +```{r eval = FALSE} +lower = c(rep(0, 20), # lower for theta + 0.05, # lower log_beta_1 + -3, # lower log_obs_sigma + 140, # lower on mu + 15,# lower on b_sig1 + 15)# lower on b_sig2 +upper = c(rep(10, 20),# upper for theta + log(10),# upper log_beta_1 + 0, # upper log_obs_sigma + 160,# upper on mu + 40,# upper on b_sig1 + 40)# upper on b_sig2 +``` + +Then we can pass these in as a list, + +```{r eval = FALSE} +fit_gnorm <- fit(create_data(df, asymmetric_model = TRUE, min_number = 1, + tail_model = "gnorm", + est_mu_re = FALSE, + est_sigma_re = FALSE), + silent = TRUE, + limits = list(lower = lower, upper = upper), + control = list(eval.max = 4000, iter.max = 5000, rel.tol = 1e-7) +) +``` + + + + diff --git a/vignettes/a4_derived.Rmd b/vignettes/a4_derived.Rmd new file mode 100644 index 0000000..2d51226 --- /dev/null +++ b/vignettes/a4_derived.Rmd @@ -0,0 +1,110 @@ +--- +title: "Extracting estimated and derived parameters" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Extracting estimated and derived parameters} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include = FALSE, cache=FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.width = 7, + fig.asp = 0.618 +) +``` + +```{r packages, message=FALSE, warning=TRUE} +library(ggplot2) +library(phenomix) +library(dplyr) +library(TMB) +``` + +We will start with the original simple model fit to the `fishdist` data, + +```{r} +data("fishdist") +cov_dat = data.frame(nyear = unique(fishdist$year)) +# rescale year -- could also standardize with scale() +cov_dat$nyear = cov_dat$nyear - min(cov_dat$nyear) +``` + + +```{r} +datalist = create_data(fishdist, + min_number=0, + variable = "number", + time="year", + date = "doy", + asymmetric_model = FALSE, + mu = ~ nyear, + sigma = ~ nyear, + covar_data = cov_dat, + est_sigma_re = TRUE, + est_mu_re = TRUE, + tail_model = "gaussian") +``` + +```{r message=FALSE, warning=FALSE, results='hide'} +set.seed(1) +fitted = fit(datalist) +``` + +## Extracting parameters manually + +The first option for extracting parameters manually is via the `sdreport` of TMB objects. This includes both estimated and derived quantities. You can see the names of what's available by looking at + +```{r eval=FALSE} +fitted$sdreport$value +``` + +or perhaps look at these in tabular form, +```{r} +table(names(fitted$sdreport$value)) +``` + +So if you wanted to extract the mean phenological peak in each time step (named `mu`), you could look at the indices of the names + +```{r} +idx <- which(names(fitted$sdreport$value) == "mu") +``` + +And then use the respective means and sds corresponding to those indices, + +```{r} +m <- fitted$sdreport$value[idx] +s <- fitted$sdreport$sd[idx] +``` + +## Helper functions + +We've also included some helper functions to extract these more quickly yourself. These all take in a fitted model object, and can be called as + +```{r} +m <- extract_means(fitted) # means +s <- extract_sigma(fitted) # sigmas, describing tails +t <- extract_theta(fitted) # theta parameters (scaling) +u <- extract_upper(fitted) # upper quartile +m <- extract_lower(fitted) # lower quartile +``` + +Or if you want to extact all of the above, you can use + +```{r} +e <- extract_all(fitted) +``` + +## Annual summaries + +In addition to the parameters above, we can extract the derived annual totals (predicted across all days 1-365). These are extracted with the following `extract_annual` function. Note that the second parameter controls whether the estimates are in normal or log space. + +```{r} +est_log <- extract_annual(fitted, log=TRUE) +est_normal <- extract_annual(fitted, log=FALSE) +``` + +