Skip to content

Commit

Permalink
Revert "init commit"
Browse files Browse the repository at this point in the history
This reverts commit 04b1f62.
  • Loading branch information
ericward-noaa committed Feb 26, 2024
1 parent 1e9d89d commit 371200e
Show file tree
Hide file tree
Showing 110 changed files with 8,398 additions and 754 deletions.
Binary file modified .DS_Store
Binary file not shown.
63 changes: 25 additions & 38 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,47 +1,34 @@
Package: zoidtmb
Title: Zero-and-One Inflated Dirichlet Regression Modelling in TMB
Version: 1.3.0
Authors@R:
c(person(given = "Eric J.",
family = "Ward",
role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0002-4359-0296")),
person(given = "Alexander J.",
family = "Jensen",
role = c("aut"),
email = "[email protected]",
comment = c(ORCID = "0000-0002-2911-8884")),
person(given = "Ryan P.",
family = "Kelly",
role = c("aut"),
email = "[email protected]",
comment = c(ORCID = "0000-0001-5037-2441")),
person(given = "Andrew O.",
family = "Shelton",
role = c("aut"),
email = "[email protected]",
comment = c(ORCID = "0000-0002-8045-6141")),
person(given = "William H.",
family = "Satterthwaite",
role = c("aut"),
email = "[email protected]",
comment = c(ORCID = "0000-0002-0436-7390")),
person(given = "Eric C.",
family = "Anderson",
role = c("aut"),
email = "[email protected]",
comment = c(ORCID = "0000-0003-1326-0840")))
Description: Fits Dirichlet regression and zero-and-one inflated Dirichlet regression with Bayesian methods implemented in Stan. These models are sometimes referred to as trinomial mixture models; covariates and overdispersion can optionally be included.
Type: Package
Package: phenomix
Title: Fit Density Curves to Peak Timing Data that Varies over Time
Version: 1.0.4
Authors@R: c(person(given = c("Eric", "J."),
family = "Ward",
role = c("aut", "cre"),
email = "[email protected]"),
person(given = c("Samantha", "M."),
family = "Wilson",
role = c("ctb")),
person(given = c("Joseph", "H."),
family = "Anderson",
role = c("ctb")))
Description: The 'salmix' package fits time-varying density curves to run
timing type data commonly encountered in fisheries and ecology. Example
applications include to peak run timing curves collected for juvenile or
adult Pacific salmon, though could also be applied to other kinds
of data such as hydrographs, plant phenology (flowering, leaf out).
License: GPL (>=3)
URL: https://nwfsc-cb.github.io/zoidtmb/, https://github.com/nwfsc-cb/zoidtmb/issues
URL: https://ericward-noaa.github.io/phenomix, https://github.com/ericward-noaa/phenomix
Depends:
R (>= 4.0.0)
Imports:
gtools,
dplyr,
ggplot2,
gnorm,
methods,
stats,
TMB (>= 1.7.20),
Rcpp
nlme
Suggests:
testthat,
knitr,
Expand Down
40 changes: 31 additions & 9 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,12 +1,34 @@
# Generated by roxygen2: do not edit by hand

export(broken_stick)
export(fit_zoidTMB)
import(Rcpp)
importFrom(gtools,rdirichlet)
importFrom(stats,as.formula)
importFrom(stats,model.frame)
S3method(extractAIC,phenomix)
S3method(fitted,phenomix)
S3method(fixef,phenomix)
S3method(logLik,phenomix)
S3method(nobs,phenomix)
S3method(predict,phenomix)
S3method(ranef,phenomix)
export(create_data)
export(extract_all)
export(extract_annual)
export(extract_lower)
export(extract_means)
export(extract_sigma)
export(extract_theta)
export(extract_upper)
export(fit)
export(pars)
export(plot_diagnostics)
import(ggplot2)
importFrom(TMB,MakeADFun)
importFrom(TMB,sdreport)
importFrom(dplyr,left_join)
importFrom(methods,is)
importFrom(nlme,fixef)
importFrom(nlme,ranef)
importFrom(stats,logLik)
importFrom(stats,model.matrix)
importFrom(stats,rbeta)
importFrom(stats,rbinom)
useDynLib(zoidtmb, .registration = TRUE)
importFrom(stats,nobs)
importFrom(stats,predict)
importFrom(stats,rnorm)
importFrom(stats,runif)
useDynLib(phenomix, .registration = TRUE)
103 changes: 0 additions & 103 deletions R/broken_stick.R

This file was deleted.

140 changes: 140 additions & 0 deletions R/create_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#' Create data file for fitting time varying run timing distributions with TMB
#'
#' Does minimal processing of data to use as argument to fitting function
#'
#' @param data A data frame
#' @param min_number A minimum threshold to use, defaults to 0
#' @param variable A character string of the name of the variable in 'data' that contains the response (e.g. counts)
#' @param time A character string of the name of the variable in 'data' that contains the time variable (e.g. year)
#' @param date A character string of the name of the variable in 'data' that contains the response (e.g. day of year). The actual
#' #' column should contain a numeric response -- for example, the result from using lubridate::yday(x)
#' @param mu An optional formula allowing the mean to be a function of covariates. Random effects are not included in the formula
#' but specified with the `est_mu_re` argument
#' @param sigma An optional formula allowing the standard deviation to be a function of covariates. For asymmetric models,
#' each side of the distribution is allowed a different set of covariates. Random effects are not included in the formula
#' but specified with the `est_sigma_re` argument
#' @param covar_data a data frame containing covariates specific to each time step. These are used in the formulas `mu` and `sigma`
#' @param asymmetric_model Boolean, whether or not to let model be asymmetric (e.g. run timing before peak has a
#' different shape than run timing after peak)
#' @param est_sigma_re Whether to estimate random effects by year in sigma parameter controlling tail of distribution. Defaults to TRUE
#' @param est_mu_re Whether to estimate random effects by year in mu parameter controlling location of distribution. Defaults to TRUE
#' @param tail_model Whether to fit Gaussian ("gaussian"), Student-t ("student_t") or generalized normal ("gnorm"). Defaults to Student-t
#' @param family Response for observation model, options are "gaussian", "poisson", "negbin", "binomial", "lognormal". The default ("lognormal") is
#' not a true lognormal distribution, but a normal-log in that it assumes log(y) ~ Normal()
#' @param max_theta Maximum value of log(pred) when `limits=TRUE`. Defaults to 10
#' @param share_shape Boolean argument for whether asymmetric student-t and generalized normal distributions should share the shape parameter (nu for the student-t;
#' beta for the generalized normal). Defaults to TRUE
#' @param nu_prior Two element vector (optional) for penalized prior on student t df, defaults to a Gamma(shape=2, scale=10) distribution
#' @param beta_prior Two element vector (optional) for penalized prior on generalized normal beta, defaults to a Normal(2, 1) distribution
#' @export
#' @importFrom stats model.matrix
#' @examples
#' data(fishdist)
#' datalist <- create_data(fishdist,
#' min_number = 0, variable = "number", time = "year",
#' date = "doy", asymmetric_model = TRUE, family = "gaussian"
#' )
create_data <- function(data,
min_number = 0,
variable = "number",
time = "year",
date = "doy",
asymmetric_model = TRUE,
mu = ~1,
sigma = ~1,
covar_data = NULL,
est_sigma_re = TRUE,
est_mu_re = TRUE,
tail_model = "student_t",
family = "lognormal",
max_theta = 10,
share_shape = TRUE,
nu_prior = c(2,10),
beta_prior = c(2,1)) {

dist <- c("gaussian", "poisson", "negbin", "binomial", "lognormal")
fam <- match(family, dist)
if (is.na(fam)) {
stop("Make sure the entered family is in the list of accepted distributions")
}

tail <- c("gaussian", "student_t", "gnorm")
tailmod <- match(tail_model, tail)
if (is.na(tailmod)) {
stop("Make sure the entered tail model is in the list of accepted distributions")
}

# check to make sure year and date are numeric
if (!is.numeric(data[, time])) {
stop("The time variable in the data frame (e.g. year) needs to be numeric")
}
if (is.numeric(data[, date])) {
if (max(data[, date], na.rm = T) > 365) stop("The date variable in the data frame contains values greater than 365")
if (min(data[, date], na.rm = T) < 1) stop("The date variable in the data frame contains values less than 1")
} else {
stop("The date variable in the data frame (e.g. day_of_year) needs to be numeric")
}

# optional priors
use_t_prior = TRUE
if (length(nu_prior) != 2) {
if(is.na(nu_prior)) {
use_t_prior = FALSE
} else {
stop("The nu prior must be a numeric 2-element vector or NA")
}
}
use_beta_prior = TRUE
if (length(beta_prior) != 2) {
if(is.na(beta_prior)) {
use_beta_prior = FALSE
} else {
stop("The beta prior must be a numeric 2-element vector or NA")
}
}

# if 1 level, turn off trend and random effect estimation
if (length(unique(as.numeric(data[, time]))) == 1) {
est_sigma_re <- FALSE
est_mu_re <- FALSE
}

# drop rows below threshold or NAs
drop_rows <- which(is.na(data[, variable]) | data[, variable] < min_number)
if (length(drop_rows) > 0) data <- data[-drop_rows, ]

# rescale year variable to start at 1 for indexing
data$year <- data[, time] - min(data[, time]) + 1

# parse formulas. covar_data contains covariates specific to each time step
if (is.null(covar_data)) {
covar_data <- data.frame(year = unique(data$year))
}
mu_mat <- model.matrix(mu, data = covar_data)
sig_mat <- model.matrix(sigma, data = covar_data)

data_list <- list(
y = data[, variable],
yint = round(data[, variable]),
years = as.numeric(as.factor(data$year)),
x = data[, date],
year_levels = as.numeric(as.factor(unique(data$year))),
unique_years = unique(data$year),
nLevels = length(unique(data$year)),
asymmetric = as.numeric(asymmetric_model),
family = fam,
mu_mat = mu_mat,
sig_mat = sig_mat,
tail_model = as.numeric(tailmod) - 1,
est_sigma_re = as.numeric(est_sigma_re),
est_mu_re = as.numeric(est_mu_re),
max_theta = max_theta,
share_shape = as.numeric(share_shape),
use_t_prior = as.numeric(use_t_prior),
use_beta_prior = as.numeric(use_beta_prior),
beta_prior = beta_prior,
nu_prior = nu_prior
)

return(data_list)
}
25 changes: 10 additions & 15 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
#' Data from Satterthwaite, W.H., Ciancio, J., Crandall, E., Palmer-Zwahlen,
#' M.L., Grover, A.M., O’Farrell, M.R., Anson, E.C., Mohr, M.S. & Garza,
#' J.C. (2015). Stock composition and ocean spatial distribution from
#' California recreational chinook salmon fisheries using genetic stock
#' identification. Fisheries Research, 170, 166–178. The data
#' genetic data collected from port-based sampling of recreationally-landed
#' Chinook salmon in California from 1998-2002.
#' Example simulate data for fish distributions from multiple years
#'
#' @format A data frame.
"chinook"
#' @format A data frame containing simulated data.
#' @keywords internal
"fishdist"

#' Data from Magnussen, E. 2011. Food and feeding habits of cod (Gadus morhua)
#' on the Faroe Bank. – ICES Journal of Marine Science, 68: 1909–1917. The data
#' here are Table 3 from the paper, with sample proportions (columns w) multiplied
#' by total weight to yield total grams (g) for each sample-diet item combination. Dashes
#' have been replaced with 0s.
#' Count data collected by Washington Department of Fish and Wildlife on
#' chum salmon from the Skagit River (Washington state). Each row of the
#' dataframe contains an observation ("number") on a given date ("date").
#' The year ("year") and calendar day ("doy") are also included.
#'
#' @format A data frame.
"coddiet"
#' @keywords internal
"chum"
Loading

0 comments on commit 371200e

Please sign in to comment.