Revert "init commit"

This reverts commit 04b1f62.
nwfsc-cb · Feb 26, 2024 · 371200e · 371200e
1 parent 1e9d89d
commit 371200e
Show file tree

Hide file tree

Showing 110 changed files with 8,398 additions and 754 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,47 +1,34 @@
-Package: zoidtmb
-Title: Zero-and-One Inflated Dirichlet Regression Modelling in TMB
-Version: 1.3.0
-Authors@R: 
-    c(person(given = "Eric J.",
-           family = "Ward",
-           role = c("aut", "cre"),
-           email = "[email protected]",
-           comment = c(ORCID = "0000-0002-4359-0296")),
-      person(given = "Alexander J.",
-           family = "Jensen",
-           role = c("aut"),
-           email = "[email protected]",
-           comment = c(ORCID = "0000-0002-2911-8884")),
-      person(given = "Ryan P.",
-           family = "Kelly",
-           role = c("aut"),
-           email = "[email protected]",
-           comment = c(ORCID = "0000-0001-5037-2441")),
-      person(given = "Andrew O.",
-           family = "Shelton",
-           role = c("aut"),
-           email = "[email protected]",
-           comment = c(ORCID = "0000-0002-8045-6141")),
-      person(given = "William H.",
-           family = "Satterthwaite",
-           role = c("aut"),
-           email = "[email protected]",
-           comment = c(ORCID = "0000-0002-0436-7390")),
-      person(given = "Eric C.",
-           family = "Anderson",
-           role = c("aut"),
-           email = "[email protected]",
-           comment = c(ORCID = "0000-0003-1326-0840")))
-Description: Fits Dirichlet regression and zero-and-one inflated Dirichlet regression with Bayesian methods implemented in Stan. These models are sometimes referred to as trinomial mixture models; covariates and overdispersion can optionally be included.
+Type: Package
+Package: phenomix
+Title: Fit Density Curves to Peak Timing Data that Varies over Time
+Version: 1.0.4
+Authors@R: c(person(given = c("Eric", "J."),
+             family = "Ward",
+             role = c("aut", "cre"),
+             email = "[email protected]"),
+             person(given = c("Samantha", "M."),
+             family = "Wilson",
+             role = c("ctb")),
+             person(given = c("Joseph", "H."),
+             family = "Anderson",
+             role = c("ctb")))
+Description: The 'salmix' package fits time-varying density curves to run
+    timing type data commonly encountered in fisheries and ecology. Example 
+    applications include to peak run timing curves collected for juvenile or 
+    adult Pacific salmon, though could also be applied to other kinds
+    of data such as hydrographs, plant phenology (flowering, leaf out).
 License: GPL (>=3)
-URL: https://nwfsc-cb.github.io/zoidtmb/, https://github.com/nwfsc-cb/zoidtmb/issues
+URL: https://ericward-noaa.github.io/phenomix, https://github.com/ericward-noaa/phenomix
 Depends: 
     R (>= 4.0.0)
 Imports:
-    gtools,
+    dplyr,
+    ggplot2,
+    gnorm,
+    methods,
     stats,
     TMB (>= 1.7.20),
-    Rcpp
+    nlme
 Suggests:
     testthat,
     knitr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,12 +1,34 @@
 # Generated by roxygen2: do not edit by hand
 
-export(broken_stick)
-export(fit_zoidTMB)
-import(Rcpp)
-importFrom(gtools,rdirichlet)
-importFrom(stats,as.formula)
-importFrom(stats,model.frame)
+S3method(extractAIC,phenomix)
+S3method(fitted,phenomix)
+S3method(fixef,phenomix)
+S3method(logLik,phenomix)
+S3method(nobs,phenomix)
+S3method(predict,phenomix)
+S3method(ranef,phenomix)
+export(create_data)
+export(extract_all)
+export(extract_annual)
+export(extract_lower)
+export(extract_means)
+export(extract_sigma)
+export(extract_theta)
+export(extract_upper)
+export(fit)
+export(pars)
+export(plot_diagnostics)
+import(ggplot2)
+importFrom(TMB,MakeADFun)
+importFrom(TMB,sdreport)
+importFrom(dplyr,left_join)
+importFrom(methods,is)
+importFrom(nlme,fixef)
+importFrom(nlme,ranef)
+importFrom(stats,logLik)
 importFrom(stats,model.matrix)
-importFrom(stats,rbeta)
-importFrom(stats,rbinom)
-useDynLib(zoidtmb, .registration = TRUE)
+importFrom(stats,nobs)
+importFrom(stats,predict)
+importFrom(stats,rnorm)
+importFrom(stats,runif)
+useDynLib(phenomix, .registration = TRUE)
diff --git a/R/broken_stick.R b/R/broken_stick.R
diff --git a/R/create_data.R b/R/create_data.R
@@ -0,0 +1,140 @@
+#' Create data file for fitting time varying run timing distributions with TMB
+#'
+#' Does minimal processing of data to use as argument to fitting function
+#'
+#' @param data A data frame
+#' @param min_number A minimum threshold to use, defaults to 0
+#' @param variable A character string of the name of the variable in 'data' that contains the response (e.g. counts)
+#' @param time A character string of the name of the variable in 'data' that contains the time variable (e.g. year)
+#' @param date A character string of the name of the variable in 'data' that contains the response (e.g. day of year). The actual
+#' #' column should contain a numeric response -- for example, the result from using lubridate::yday(x)
+#' @param mu An optional formula allowing the mean to be a function of covariates. Random effects are not included in the formula
+#' but specified with the `est_mu_re` argument
+#' @param sigma An optional formula allowing the standard deviation to be a function of covariates. For asymmetric models,
+#' each side of the distribution is allowed a different set of covariates. Random effects are not included in the formula
+#' but specified with the `est_sigma_re` argument
+#' @param covar_data a data frame containing covariates specific to each time step. These are used in the formulas `mu` and `sigma`
+#' @param asymmetric_model Boolean, whether or not to let model be asymmetric (e.g. run timing before peak has a
+#' different shape than run timing after peak)
+#' @param est_sigma_re Whether to estimate random effects by year in sigma parameter controlling tail of distribution. Defaults to TRUE
+#' @param est_mu_re Whether to estimate random effects by year in mu parameter controlling location of distribution. Defaults to TRUE
+#' @param tail_model Whether to fit Gaussian ("gaussian"), Student-t ("student_t") or generalized normal ("gnorm"). Defaults to Student-t
+#' @param family Response for observation model, options are "gaussian", "poisson", "negbin", "binomial", "lognormal". The default ("lognormal") is
+#' not a true lognormal distribution, but a normal-log in that it assumes log(y) ~ Normal()
+#' @param max_theta Maximum value of log(pred) when `limits=TRUE`. Defaults to 10
+#' @param share_shape Boolean argument for whether asymmetric student-t and generalized normal distributions should share the shape parameter (nu for the student-t;
+#' beta for the generalized normal). Defaults to TRUE
+#' @param nu_prior Two element vector (optional) for penalized prior on student t df, defaults to a Gamma(shape=2, scale=10) distribution
+#' @param beta_prior Two element vector (optional) for penalized prior on generalized normal beta, defaults to a Normal(2, 1) distribution
+#' @export
+#' @importFrom stats model.matrix
+#' @examples
+#' data(fishdist)
+#' datalist <- create_data(fishdist,
+#'   min_number = 0, variable = "number", time = "year",
+#'   date = "doy", asymmetric_model = TRUE, family = "gaussian"
+#' )
+create_data <- function(data,
+                        min_number = 0,
+                        variable = "number",
+                        time = "year",
+                        date = "doy",
+                        asymmetric_model = TRUE,
+                        mu = ~1,
+                        sigma = ~1,
+                        covar_data = NULL,
+                        est_sigma_re = TRUE,
+                        est_mu_re = TRUE,
+                        tail_model = "student_t",
+                        family = "lognormal",
+                        max_theta = 10,
+                        share_shape = TRUE,
+                        nu_prior = c(2,10),
+                        beta_prior = c(2,1)) {
+
+  dist <- c("gaussian", "poisson", "negbin", "binomial", "lognormal")
+  fam <- match(family, dist)
+  if (is.na(fam)) {
+    stop("Make sure the entered family is in the list of accepted distributions")
+  }
+
+  tail <- c("gaussian", "student_t", "gnorm")
+  tailmod <- match(tail_model, tail)
+  if (is.na(tailmod)) {
+    stop("Make sure the entered tail model is in the list of accepted distributions")
+  }
+
+  # check to make sure year and date are numeric
+  if (!is.numeric(data[, time])) {
+    stop("The time variable in the data frame (e.g. year) needs to be numeric")
+  }
+  if (is.numeric(data[, date])) {
+    if (max(data[, date], na.rm = T) > 365) stop("The date variable in the data frame contains values greater than 365")
+    if (min(data[, date], na.rm = T) < 1) stop("The date variable in the data frame contains values less than 1")
+  } else {
+    stop("The date variable in the data frame (e.g. day_of_year) needs to be numeric")
+  }
+
+  # optional priors
+  use_t_prior = TRUE
+  if (length(nu_prior) != 2) {
+    if(is.na(nu_prior)) {
+      use_t_prior = FALSE
+    } else {
+      stop("The nu prior must be a numeric 2-element vector or NA")
+    }
+  }
+  use_beta_prior = TRUE
+  if (length(beta_prior) != 2) {
+    if(is.na(beta_prior)) {
+      use_beta_prior = FALSE
+    } else {
+      stop("The beta prior must be a numeric 2-element vector or NA")
+    }
+  }
+
+  # if 1 level, turn off trend and random effect estimation
+  if (length(unique(as.numeric(data[, time]))) == 1) {
+    est_sigma_re <- FALSE
+    est_mu_re <- FALSE
+  }
+
+  # drop rows below threshold or NAs
+  drop_rows <- which(is.na(data[, variable]) | data[, variable] < min_number)
+  if (length(drop_rows) > 0) data <- data[-drop_rows, ]
+
+  # rescale year variable to start at 1 for indexing
+  data$year <- data[, time] - min(data[, time]) + 1
+
+  # parse formulas. covar_data contains covariates specific to each time step
+  if (is.null(covar_data)) {
+    covar_data <- data.frame(year = unique(data$year))
+  }
+  mu_mat <- model.matrix(mu, data = covar_data)
+  sig_mat <- model.matrix(sigma, data = covar_data)
+
+  data_list <- list(
+    y = data[, variable],
+    yint = round(data[, variable]),
+    years = as.numeric(as.factor(data$year)),
+    x = data[, date],
+    year_levels = as.numeric(as.factor(unique(data$year))),
+    unique_years = unique(data$year),
+    nLevels = length(unique(data$year)),
+    asymmetric = as.numeric(asymmetric_model),
+    family = fam,
+    mu_mat = mu_mat,
+    sig_mat = sig_mat,
+    tail_model = as.numeric(tailmod) - 1,
+    est_sigma_re = as.numeric(est_sigma_re),
+    est_mu_re = as.numeric(est_mu_re),
+    max_theta = max_theta,
+    share_shape = as.numeric(share_shape),
+    use_t_prior = as.numeric(use_t_prior),
+    use_beta_prior = as.numeric(use_beta_prior),
+    beta_prior = beta_prior,
+    nu_prior = nu_prior
+  )
+
+  return(data_list)
+}
diff --git a/R/data.R b/R/data.R
@@ -1,19 +1,14 @@
-#' Data from Satterthwaite, W.H., Ciancio, J., Crandall, E., Palmer-Zwahlen,
-#' M.L., Grover, A.M., O’Farrell, M.R., Anson, E.C., Mohr, M.S. & Garza,
-#' J.C. (2015). Stock composition and ocean spatial distribution from
-#' California recreational chinook salmon fisheries using genetic stock
-#' identification. Fisheries Research, 170, 166–178. The data
-#' genetic data collected from port-based sampling of recreationally-landed
-#' Chinook salmon in California from 1998-2002.
+#' Example simulate data for fish distributions from multiple years
 #'
-#' @format A data frame.
-"chinook"
+#' @format A data frame containing simulated data.
+#' @keywords internal
+"fishdist"
 
-#' Data from Magnussen, E. 2011. Food and feeding habits of cod (Gadus morhua)
-#' on the Faroe Bank. – ICES Journal of Marine Science, 68: 1909–1917. The data
-#' here are Table 3 from the paper, with sample proportions (columns w) multiplied
-#' by total weight to yield total grams (g) for each sample-diet item combination. Dashes
-#' have been replaced with 0s.
+#' Count data collected by Washington Department of Fish and Wildlife on
+#' chum salmon from the Skagit River (Washington state). Each row of the
+#' dataframe contains an observation ("number") on a given date ("date").
+#' The year ("year") and calendar day ("doy") are also included.
 #'
 #' @format A data frame.
-"coddiet"
+#' @keywords internal
+"chum"