diff --git a/NAMESPACE b/NAMESPACE index 6701e4b..e9386b8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,14 +12,19 @@ export(ED.plus) export(RSCABK) export(Tarone.test) export(addECxCI) +export(aggregate_from_individual_simple) +export(aggregate_from_individual_tidy) export(backCalcSE) export(calcNW) export(calcSteepnessOverlap) export(calcTaronesTest) export(contEndpoint) export(convert2Score) +export(convert_fish_data) export(dose.p.glmmPQL) export(drcCompare) +export(expand_to_individual_simple) +export(expand_to_individual_tidy) export(getEC50) export(getEndpoint) export(getLineContrast) diff --git a/R/MQJT.R b/R/MQJT.R index 87864d2..83f6766 100644 --- a/R/MQJT.R +++ b/R/MQJT.R @@ -1,3 +1,3 @@ -## Personal communication with John Green. - ## Placeholder for a MQJT function. + + diff --git a/R/RSCABS_AO.R b/R/RSCABS_AO.R new file mode 100644 index 0000000..046744c --- /dev/null +++ b/R/RSCABS_AO.R @@ -0,0 +1,163 @@ +#' Calculate Rao-Scott Adjusted Values for Clustered Binary Data +#' +#' This function calculates the Rao-Scott adjustment for clustered binary data +#' to account for intra-cluster correlation when analyzing dose-response relationships. +#' +#' @param group Vector of treatment group identifiers +#' @param replicate Vector of replicate/tank identifiers within treatment groups +#' @param affected Vector of counts of affected subjects (fish with injuries) in each replicate +#' @param total Vector of total subjects (fish) in each replicate +#' +#' @return A tibble containing the following columns: +#' \item{grp}{Treatment group identifier} +#' \item{x}{Total number of affected subjects in the treatment group} +#' \item{n}{Total number of subjects in the treatment group} +#' \item{m}{Number of replicates in the treatment group} +#' \item{p_hat}{Estimated proportion of affected subjects in the treatment group} +#' \item{b}{Binomial variance of p_hat} +#' \item{v}{Estimated variance accounting for clustering} +#' \item{D}{Design effect (ratio of cluster-adjusted variance to binomial variance)} +#' \item{n_tilde}{Adjusted sample size accounting for clustering} +#' \item{x_tilde}{Adjusted number of affected subjects accounting for clustering} +#' +#' @author Originally by Allen Olmstead +#' @details +#' The function first aggregates data by treatment group to calculate overall proportions. +#' It then computes the variance within each treatment group accounting for clustering, +#' and calculates a design effect (D) as the ratio of cluster-adjusted variance to +#' binomial variance. The sample size and affected counts are then adjusted by +#' dividing by this design effect. +#' +#' @examples +#' # Calculate adjusted values for the fish injury data +#' adj_vals <- get_RS_adj_val( +#' dat_bcs1$tmt, +#' dat_bcs1$tank, +#' dat_bcs1$S1 + dat_bcs1$S2 + dat_bcs1$S3, +#' dat_bcs1$total +#' ) +get_RS_adj_val <- function(group, replicate, affected, total) { + # create data frame from input vectors + dat <- tibble(grp = group, rep = replicate, aff = affected, tot = total) + + # create aggregates by dose levels + agg <- group_by(dat, grp) %>% + summarize(x = sum(aff), n = sum(tot), m = n()) %>% + mutate(p_hat = x/n, + b = p_hat*(1 - p_hat)/n) + + # add aggregates to original data frame + dat <- left_join(dat, agg, by = "grp") %>% + mutate(r2 = (aff - tot*p_hat)^2) # square of residuals + + # calculate subgroup variances + subgrp_var <- group_by(dat, grp, m, n) %>% + summarize(sum_r2 = sum(r2), .groups = "drop") %>% + mutate(v = m*sum_r2/n^2/(m - 1)) + + agg$v <- subgrp_var$v + + # calculate adjusted n and x values + mutate(agg, D = ifelse(v/b < 1, 1, v/b), + n_tilde = n/D, + x_tilde = x/D) +} + + + + +#' Calculate Cochran-Armitage Trend Test Z-Statistic +#' +#' This function calculates the Z-statistic for the Cochran-Armitage trend test +#' using adjusted counts and sample sizes. +#' +#' @param adj_x Vector of adjusted affected counts for each treatment group +#' @param adj_n Vector of adjusted sample sizes for each treatment group +#' +#' @return The Z-statistic for the Cochran-Armitage trend test +#' +#' @details +#' The Cochran-Armitage trend test examines whether there is a linear trend in +#' proportions across ordered categories (treatment groups). This implementation +#' uses adjusted values to account for clustering in the data. +#' +#' The function assigns scores (1, 2, 3, ...) to the treatment groups and +#' calculates the Z-statistic using the formula: +#' Z = [sum(adj_x*d) - N*p_bar*d_bar] / sqrt[p_bar*(1-p_bar)*(sum(adj_n*d^2) - N*d_bar^2)] +#' where: +#' - d are the scores (1, 2, 3, ...) +#' - N is the total adjusted sample size +#' - d_bar is the weighted average of scores +#' - p_bar is the overall proportion of affected subjects +#' +#' @examples +#' # Get adjusted values +#' adj_vals <- get_RS_adj_val( +#' dat_bcs1$tmt, +#' dat_bcs1$tank, +#' dat_bcs1$S1 + dat_bcs1$S2 + dat_bcs1$S3, +#' dat_bcs1$total +#' ) +#' # Calculate Z-statistic +#' Z <- get_CA_Z(adj_vals$x_tilde, adj_vals$n_tilde) +get_CA_Z <- function(adj_x, adj_n) { + d <- 1:length(adj_x) # Assign scores 1, 2, 3, ... to treatment groups + N <- sum(adj_n) # Total adjusted sample size + d_bar <- sum(d*adj_n)/N # Weighted average of scores + p_bar <- sum(adj_x)/N # Overall proportion of affected subjects + + # Calculate numerator and denominator of Z-statistic + num <- sum(adj_x*d) - N*p_bar*d_bar + den <- p_bar*(1 - p_bar)*(sum(adj_n*(d)^2) - N*d_bar^2) + + # Return Z-statistic + num/sqrt(den) +} + + +#' Run Rao-Scott Adjusted Cochran-Armitage Trend Test +#' +#' This function is a wrapper that performs the Rao-Scott adjusted Cochran-Armitage +#' trend test for clustered binary data. +#' +#' @param group Vector of treatment group identifiers +#' @param replicate Vector of replicate/tank identifiers within treatment groups +#' @param affected Vector of counts of affected subjects (fish with injuries) in each replicate +#' @param total Vector of total subjects (fish) in each replicate +#' +#' @return A list containing: +#' \item{interm_values}{A tibble with intermediate values from the Rao-Scott adjustment} +#' \item{Z}{The Z-statistic for the Cochran-Armitage trend test} +#' +#' @details +#' This function combines the Rao-Scott adjustment and the Cochran-Armitage trend test +#' to analyze dose-response relationships in clustered data. It first calculates +#' adjusted values accounting for clustering, then uses these values to perform +#' the trend test. +#' +#' The p-value can be calculated using: 2 * (1 - pnorm(abs(Z))) +#' +#' @examples +#' # Test for trend in injury rates across treatment groups +#' # Considering S1, S2, and S3 as "affected" +#' result <- run_RSCA( +#' dat_bcs1$tmt, +#' dat_bcs1$tank, +#' dat_bcs1$S1 + dat_bcs1$S2 + dat_bcs1$S3, +#' dat_bcs1$total +#' ) +#' +#' # View intermediate values +#' print(result$interm_values) +#' +#' # View Z-statistic +#' print(result$Z) +#' +#' # Calculate p-value +#' p_value <- 2 * (1 - pnorm(abs(result$Z))) +#' print(paste("p-value:", p_value)) +run_RSCA <- function(group, replicate, affected, total) { + interm_values <- get_RS_adj_val(group, replicate, affected, total) + Z <- get_CA_Z(interm_values$x_tilde, interm_values$n_tilde) + list(interm_values = interm_values, Z = Z) +} diff --git a/R/ordinal.R b/R/ordinal.R index 0d7d62d..1be0957 100644 --- a/R/ordinal.R +++ b/R/ordinal.R @@ -108,3 +108,289 @@ backCalcSE <- function(se,mu,approximate = FALSE){ } return(SE) } + + + + + + +#' Expand Aggregated Fish Data to Individual Records (tidyverse version) +#' +#' @param data A data frame containing aggregated fish data +#' @param treatment_col Name of the column containing treatment groups (default: "tmt") +#' @param replicate_col Name of the column containing tank/replicate IDs (default: "tank") +#' @param score_prefix Prefix used for score columns (default: "S") +#' @param total_col Name of the column containing total counts (default: "total") +#' +#' @return A data frame with individual fish records +#' @export +expand_to_individual_tidy <- function(data, treatment_col = "tmt", replicate_col = "tank", + score_prefix = "S", total_col = "total") { + # Input validation + if (!treatment_col %in% names(data)) { + stop("Treatment column '", treatment_col, "' not found in data") + } + if (!replicate_col %in% names(data)) { + stop("Replicate column '", replicate_col, "' not found in data") + } + if (!total_col %in% names(data)) { + stop("Total column '", total_col, "' not found in data") + } + + # Find score columns + score_cols <- grep(paste0("^", score_prefix, "[0-9]+$"), names(data), value = TRUE) + if (length(score_cols) == 0) { + stop("No score columns found with prefix '", score_prefix, "' followed by numbers") + } + + # Check if totals match sum of scores + data$check_sum <- rowSums(data[, score_cols, drop = FALSE]) + if (any(data$check_sum != data[[total_col]])) { + warning("Some rows have score counts that don't sum to the total") + } + + # Convert to long format and expand + result <- data %>% + tidyr::pivot_longer( + cols = tidyselect::all_of(score_cols), + names_to = "score", + values_to = "count_weight" # Use a specific name for the count column + ) %>% + dplyr::filter(count_weight > 0) %>% + dplyr::select(tidyselect::all_of(c(treatment_col, replicate_col, "score", "count_weight"))) %>% + tidyr::uncount(weights = count_weight) %>% # Use the specific column name + dplyr::arrange(.data[[treatment_col]], .data[[replicate_col]]) + + return(result) +} + +#' Aggregate Individual Fish Records to Summarized Format (tidyverse version) +#' +#' @param data A data frame containing individual fish records +#' @param treatment_col Name of the column containing treatment groups (default: "tmt") +#' @param replicate_col Name of the column containing tank/replicate IDs (default: "tank") +#' @param score_col Name of the column containing scores (default: "score") +#' @param total_col Name of the column to be created for total counts (default: "total") +#' +#' @return A data frame with aggregated counts per replicate +#' @export +aggregate_from_individual_tidy <- function(data, treatment_col = "tmt", replicate_col = "tank", + score_col = "score", total_col = "total") { + # Input validation + required_cols <- c(treatment_col, replicate_col, score_col) + if (!all(required_cols %in% names(data))) { + missing <- setdiff(required_cols, names(data)) + stop("Missing required columns: ", paste(missing, collapse = ", ")) + } + + # Aggregate the data + result <- data %>% + dplyr::group_by(dplyr::across(tidyselect::all_of(c(treatment_col, replicate_col, score_col)))) %>% + dplyr::summarize(n = dplyr::n(), .groups = "drop") %>% + tidyr::pivot_wider( + names_from = tidyselect::all_of(score_col), + values_from = n, + values_fill = 0 + ) %>% + dplyr::mutate(!!total_col := rowSums(dplyr::across(where(is.numeric)))) %>% + dplyr::arrange(.data[[treatment_col]], .data[[replicate_col]]) + + return(result) +} + + +#' Aggregate Individual Fish Records to Summarized Format (simple without tidyverse version) +#' +#' @param data A data frame containing individual fish records +#' @param treatment_col Name of the column containing treatment groups (default: "tmt") +#' @param replicate_col Name of the column containing tank/replicate IDs (default: "tank") +#' @param score_col Name of the column containing scores (default: "score") +#' @param total_col Name of the column to be created for total counts (default: "total") +#' @param all_scores Optional vector of all score categories to include in the result, +#' even if they have zero occurrences (default: NULL, which uses unique values in data) +#' +#' @return A data frame with aggregated counts per replicate +#' @export +aggregate_from_individual_simple <- function(data, treatment_col = "tmt", replicate_col = "tank", + score_col = "score", total_col = "total", + all_scores = NULL) { + # Input validation + required_cols <- c(treatment_col, replicate_col, score_col) + if (!all(required_cols %in% names(data))) { + missing <- setdiff(required_cols, names(data)) + stop("Missing required columns: ", paste(missing, collapse = ", ")) + } + + # Get unique combinations of treatment and replicate + unique_groups <- unique(data[, c(treatment_col, replicate_col)]) + + # Get unique score values + if (is.null(all_scores)) { + unique_scores <- sort(unique(data[[score_col]])) + } else { + unique_scores <- sort(all_scores) + # Check if data contains scores not in all_scores + data_scores <- unique(data[[score_col]]) + missing_scores <- setdiff(data_scores, all_scores) + if (length(missing_scores) > 0) { + warning("Data contains scores not in all_scores: ", + paste(missing_scores, collapse = ", ")) + } + } + + # Initialize result data frame + result <- unique_groups + + # Add columns for each score + for (score in unique_scores) { + result[[score]] <- 0 + } + + # Add total column + result[[total_col]] <- 0 + + # Fill in counts + for (i in 1:nrow(result)) { + treatment_val <- result[[treatment_col]][i] + replicate_val <- result[[replicate_col]][i] + + # Filter data for this treatment and replicate + subset_data <- data[data[[treatment_col]] == treatment_val & + data[[replicate_col]] == replicate_val, ] + + # Count occurrences of each score + for (score in unique_scores) { + count <- sum(subset_data[[score_col]] == score) + result[i, score] <- count + } + + # Calculate total + result[[total_col]][i] <- nrow(subset_data) + } + + # Sort the result + result <- result[order(result[[treatment_col]], result[[replicate_col]]), ] + rownames(result) <- NULL + + return(result) +} + +#' Expand Aggregated Fish Data to Individual Records (simple without tidyverse version) +#' +#' @param data A data frame containing aggregated fish data +#' @param treatment_col Name of the column containing treatment groups (default: "tmt") +#' @param replicate_col Name of the column containing tank/replicate IDs (default: "tank") +#' @param score_prefix Prefix used for score columns (default: "S") +#' @param total_col Name of the column containing total counts (default: "total") +#' +#' @return A list containing: +#' - data: A data frame with individual fish records +#' - score_columns: Vector of all score column names from the original data +#' +#' @export +expand_to_individual_simple <- function(data, treatment_col = "tmt", replicate_col = "tank", + score_prefix = "S", total_col = "total") { + # Input validation + if (!treatment_col %in% names(data)) { + stop("Treatment column '", treatment_col, "' not found in data") + } + if (!replicate_col %in% names(data)) { + stop("Replicate column '", replicate_col, "' not found in data") + } + if (!total_col %in% names(data)) { + stop("Total column '", total_col, "' not found in data") + } + + # Find score columns + score_cols <- grep(paste0("^", score_prefix, "[0-9]+$"), names(data), value = TRUE) + if (length(score_cols) == 0) { + stop("No score columns found with prefix '", score_prefix, "' followed by numbers") + } + + # Check if totals match sum of scores + data$check_sum <- rowSums(data[, score_cols, drop = FALSE]) + if (any(data$check_sum != data[[total_col]])) { + warning("Some rows have score counts that don't sum to the total") + } + + # Initialize empty data frame for individual records + result <- data.frame() + + # Process each row of the input data + for (i in 1:nrow(data)) { + row_data <- data[i, ] + + for (score in score_cols) { + count <- row_data[[score]] + if (count > 0) { + # Create a data frame for this score and count + new_rows <- data.frame( + row_data[rep(1, count), c(treatment_col, replicate_col)], + score = rep(score, count), + stringsAsFactors = FALSE + ) + result <- rbind(result, new_rows) + } + } + } + + # Sort the result + result <- result[order(result[[treatment_col]], result[[replicate_col]]), ] + rownames(result) <- NULL + + # Return both the individual data and the original score columns + return(list( + data = result, + score_columns = score_cols + )) +} + +#' Convert Between Aggregated and Individual Fish Data +#' +#' This function provides a complete workflow for converting between aggregated +#' fish data and individual fish records, ensuring all score categories are preserved. +#' +#' @param data A data frame containing either aggregated or individual fish data +#' @param direction Either "to_individual" or "to_aggregated" +#' @param treatment_col Name of the column containing treatment groups (default: "tmt") +#' @param replicate_col Name of the column containing tank/replicate IDs (default: "tank") +#' @param score_prefix Prefix used for score columns when direction is "to_individual" (default: "S") +#' @param score_col Name of the column containing scores when direction is "to_aggregated" (default: "score") +#' @param total_col Name of the column containing/for total counts (default: "total") +#' @param all_scores Optional vector of all score categories to include when direction is "to_aggregated" +#' +#' @return A data frame with the converted data +#' +#' @export +convert_fish_data <- function(data, direction, + treatment_col = "tmt", replicate_col = "tank", + score_prefix = "S", score_col = "score", + total_col = "total", all_scores = NULL) { + + if (direction == "to_individual") { + # Convert from aggregated to individual + result <- expand_to_individual_simple( + data, treatment_col, replicate_col, score_prefix, total_col + ) + + # Store score columns as an attribute for future reference + attr(result$data, "score_columns") <- result$score_columns + return(result$data) + + } else if (direction == "to_aggregated") { + # Check for stored score columns + if (is.null(all_scores) && !is.null(attr(data, "score_columns"))) { + all_scores <- attr(data, "score_columns") + } + + # Convert from individual to aggregated + return(aggregate_from_individual_simple( + data, treatment_col, replicate_col, score_col, total_col, all_scores + )) + + } else { + stop("Invalid direction. Use 'to_individual' or 'to_aggregated'.") + } +} + + diff --git a/man/aggregate_from_individual_simple.Rd b/man/aggregate_from_individual_simple.Rd new file mode 100644 index 0000000..6c15c8e --- /dev/null +++ b/man/aggregate_from_individual_simple.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ordinal.R +\name{aggregate_from_individual_simple} +\alias{aggregate_from_individual_simple} +\title{Aggregate Individual Fish Records to Summarized Format (simple without tidyverse version)} +\usage{ +aggregate_from_individual_simple( + data, + treatment_col = "tmt", + replicate_col = "tank", + score_col = "score", + total_col = "total", + all_scores = NULL +) +} +\arguments{ +\item{data}{A data frame containing individual fish records} + +\item{treatment_col}{Name of the column containing treatment groups (default: "tmt")} + +\item{replicate_col}{Name of the column containing tank/replicate IDs (default: "tank")} + +\item{score_col}{Name of the column containing scores (default: "score")} + +\item{total_col}{Name of the column to be created for total counts (default: "total")} + +\item{all_scores}{Optional vector of all score categories to include in the result, +even if they have zero occurrences (default: NULL, which uses unique values in data)} +} +\value{ +A data frame with aggregated counts per replicate +} +\description{ +Aggregate Individual Fish Records to Summarized Format (simple without tidyverse version) +} diff --git a/man/aggregate_from_individual_tidy.Rd b/man/aggregate_from_individual_tidy.Rd new file mode 100644 index 0000000..435cb94 --- /dev/null +++ b/man/aggregate_from_individual_tidy.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ordinal.R +\name{aggregate_from_individual_tidy} +\alias{aggregate_from_individual_tidy} +\title{Aggregate Individual Fish Records to Summarized Format (tidyverse version)} +\usage{ +aggregate_from_individual_tidy( + data, + treatment_col = "tmt", + replicate_col = "tank", + score_col = "score", + total_col = "total" +) +} +\arguments{ +\item{data}{A data frame containing individual fish records} + +\item{treatment_col}{Name of the column containing treatment groups (default: "tmt")} + +\item{replicate_col}{Name of the column containing tank/replicate IDs (default: "tank")} + +\item{score_col}{Name of the column containing scores (default: "score")} + +\item{total_col}{Name of the column to be created for total counts (default: "total")} +} +\value{ +A data frame with aggregated counts per replicate +} +\description{ +Aggregate Individual Fish Records to Summarized Format (tidyverse version) +} diff --git a/man/calcTaronesTest.Rd b/man/calcTaronesTest.Rd index de8cf12..be41740 100644 --- a/man/calcTaronesTest.Rd +++ b/man/calcTaronesTest.Rd @@ -3,7 +3,11 @@ \name{calcTaronesTest} \alias{calcTaronesTest} \alias{Tarones} -\title{Tarones test for Stratified OR (integrated from https://github.com/the8thday/ClinStats/blob/master/R/Tarones.R)} +\title{Tarones test for Stratified OR (integrated from +https://github.com/the8thday/ClinStats/blob/master/R/Tarones.R). The original +MIT license is included in the source code. The function +is included for validation purpose. Please use the most updated function in +ClinStats package for related calculation in a non-GLP environment.} \usage{ calcTaronesTest(mylist, referencerow = 2) } diff --git a/man/calpha.test.Rd b/man/calpha.test.Rd index 67050b1..398825d 100644 --- a/man/calpha.test.Rd +++ b/man/calpha.test.Rd @@ -21,7 +21,11 @@ Same kind of object as the one returns by the stats } \description{ The C(alpha) test is a test of the binomial distribution against the -alternative of the beta-binomial distribution. +alternative of the beta-binomial distribution. Note that this is not exported +to the package namespace but kept internal. The license is MIT with the +COPYRIGHT HOLDER: Christophe Gigot and YEAR: 2023 (included in source code). +The function is included for validation purpose. Please use epiphy package +for related calculation in a non-GLP environment. } \details{ It is based on calculation of a test statistic, z, that has an asymptotic @@ -43,7 +47,7 @@ for incidence data. my_fisher <- structure(list(index = 3.14396182555326, name = "Fisher's index of dispersion", flavor = "incidence", N = 75L, n = 40L), class = c("fisher", "agg_index")) -calpha.test(my_fisher) +drcHelper:::calpha.test(my_fisher) } \references{ diff --git a/man/convert_fish_data.Rd b/man/convert_fish_data.Rd new file mode 100644 index 0000000..6df6afe --- /dev/null +++ b/man/convert_fish_data.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ordinal.R +\name{convert_fish_data} +\alias{convert_fish_data} +\title{Convert Between Aggregated and Individual Fish Data} +\usage{ +convert_fish_data( + data, + direction, + treatment_col = "tmt", + replicate_col = "tank", + score_prefix = "S", + score_col = "score", + total_col = "total", + all_scores = NULL +) +} +\arguments{ +\item{data}{A data frame containing either aggregated or individual fish data} + +\item{direction}{Either "to_individual" or "to_aggregated"} + +\item{treatment_col}{Name of the column containing treatment groups (default: "tmt")} + +\item{replicate_col}{Name of the column containing tank/replicate IDs (default: "tank")} + +\item{score_prefix}{Prefix used for score columns when direction is "to_individual" (default: "S")} + +\item{score_col}{Name of the column containing scores when direction is "to_aggregated" (default: "score")} + +\item{total_col}{Name of the column containing/for total counts (default: "total")} + +\item{all_scores}{Optional vector of all score categories to include when direction is "to_aggregated"} +} +\value{ +A data frame with the converted data +} +\description{ +This function provides a complete workflow for converting between aggregated +fish data and individual fish records, ensuring all score categories are preserved. +} diff --git a/man/expand_to_individual_simple.Rd b/man/expand_to_individual_simple.Rd new file mode 100644 index 0000000..f5b16ec --- /dev/null +++ b/man/expand_to_individual_simple.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ordinal.R +\name{expand_to_individual_simple} +\alias{expand_to_individual_simple} +\title{Expand Aggregated Fish Data to Individual Records (simple without tidyverse version)} +\usage{ +expand_to_individual_simple( + data, + treatment_col = "tmt", + replicate_col = "tank", + score_prefix = "S", + total_col = "total" +) +} +\arguments{ +\item{data}{A data frame containing aggregated fish data} + +\item{treatment_col}{Name of the column containing treatment groups (default: "tmt")} + +\item{replicate_col}{Name of the column containing tank/replicate IDs (default: "tank")} + +\item{score_prefix}{Prefix used for score columns (default: "S")} + +\item{total_col}{Name of the column containing total counts (default: "total")} +} +\value{ +A list containing: +\itemize{ +\item data: A data frame with individual fish records +\item score_columns: Vector of all score column names from the original data +} +} +\description{ +Expand Aggregated Fish Data to Individual Records (simple without tidyverse version) +} diff --git a/man/expand_to_individual_tidy.Rd b/man/expand_to_individual_tidy.Rd new file mode 100644 index 0000000..372a31e --- /dev/null +++ b/man/expand_to_individual_tidy.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ordinal.R +\name{expand_to_individual_tidy} +\alias{expand_to_individual_tidy} +\title{Expand Aggregated Fish Data to Individual Records (tidyverse version)} +\usage{ +expand_to_individual_tidy( + data, + treatment_col = "tmt", + replicate_col = "tank", + score_prefix = "S", + total_col = "total" +) +} +\arguments{ +\item{data}{A data frame containing aggregated fish data} + +\item{treatment_col}{Name of the column containing treatment groups (default: "tmt")} + +\item{replicate_col}{Name of the column containing tank/replicate IDs (default: "tank")} + +\item{score_prefix}{Prefix used for score columns (default: "S")} + +\item{total_col}{Name of the column containing total counts (default: "total")} +} +\value{ +A data frame with individual fish records +} +\description{ +Expand Aggregated Fish Data to Individual Records (tidyverse version) +} diff --git a/man/get_CA_Z.Rd b/man/get_CA_Z.Rd new file mode 100644 index 0000000..e197199 --- /dev/null +++ b/man/get_CA_Z.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RSCABS_AO.R +\name{get_CA_Z} +\alias{get_CA_Z} +\title{Calculate Cochran-Armitage Trend Test Z-Statistic} +\usage{ +get_CA_Z(adj_x, adj_n) +} +\arguments{ +\item{adj_x}{Vector of adjusted affected counts for each treatment group} + +\item{adj_n}{Vector of adjusted sample sizes for each treatment group} +} +\value{ +The Z-statistic for the Cochran-Armitage trend test +} +\description{ +This function calculates the Z-statistic for the Cochran-Armitage trend test +using adjusted counts and sample sizes. +} +\details{ +The Cochran-Armitage trend test examines whether there is a linear trend in +proportions across ordered categories (treatment groups). This implementation +uses adjusted values to account for clustering in the data. + +The function assigns scores (1, 2, 3, ...) to the treatment groups and +calculates the Z-statistic using the formula: +Z = / sqrt +where: +\itemize{ +\item d are the scores (1, 2, 3, ...) +\item N is the total adjusted sample size +\item d_bar is the weighted average of scores +\item p_bar is the overall proportion of affected subjects +} +} +\examples{ +# Get adjusted values +adj_vals <- get_RS_adj_val( + dat_bcs1$tmt, + dat_bcs1$tank, + dat_bcs1$S1 + dat_bcs1$S2 + dat_bcs1$S3, + dat_bcs1$total +) +# Calculate Z-statistic +Z <- get_CA_Z(adj_vals$x_tilde, adj_vals$n_tilde) +} diff --git a/man/get_RS_adj_val.Rd b/man/get_RS_adj_val.Rd new file mode 100644 index 0000000..96b2f65 --- /dev/null +++ b/man/get_RS_adj_val.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RSCABS_AO.R +\name{get_RS_adj_val} +\alias{get_RS_adj_val} +\title{Calculate Rao-Scott Adjusted Values for Clustered Binary Data} +\usage{ +get_RS_adj_val(group, replicate, affected, total) +} +\arguments{ +\item{group}{Vector of treatment group identifiers} + +\item{replicate}{Vector of replicate/tank identifiers within treatment groups} + +\item{affected}{Vector of counts of affected subjects (fish with injuries) in each replicate} + +\item{total}{Vector of total subjects (fish) in each replicate} +} +\value{ +A tibble containing the following columns: +\item{grp}{Treatment group identifier} +\item{x}{Total number of affected subjects in the treatment group} +\item{n}{Total number of subjects in the treatment group} +\item{m}{Number of replicates in the treatment group} +\item{p_hat}{Estimated proportion of affected subjects in the treatment group} +\item{b}{Binomial variance of p_hat} +\item{v}{Estimated variance accounting for clustering} +\item{D}{Design effect (ratio of cluster-adjusted variance to binomial variance)} +\item{n_tilde}{Adjusted sample size accounting for clustering} +\item{x_tilde}{Adjusted number of affected subjects accounting for clustering} +} +\description{ +This function calculates the Rao-Scott adjustment for clustered binary data +to account for intra-cluster correlation when analyzing dose-response relationships. +} +\details{ +The function first aggregates data by treatment group to calculate overall proportions. +It then computes the variance within each treatment group accounting for clustering, +and calculates a design effect (D) as the ratio of cluster-adjusted variance to +binomial variance. The sample size and affected counts are then adjusted by +dividing by this design effect. +} +\examples{ +# Calculate adjusted values for the fish injury data +adj_vals <- get_RS_adj_val( + dat_bcs1$tmt, + dat_bcs1$tank, + dat_bcs1$S1 + dat_bcs1$S2 + dat_bcs1$S3, + dat_bcs1$total +) +} +\author{ +Originally by Allen Olmstead +} diff --git a/man/monotonicityTest.Rd b/man/monotonicityTest.Rd index 4319dcc..446881c 100644 --- a/man/monotonicityTest.Rd +++ b/man/monotonicityTest.Rd @@ -17,7 +17,9 @@ monotonicityTest(Data, Treatment, Response) monotonicity table } \description{ -Testing Monotonicity +The function is adapted from the archived version of StatCharrms developed by +Joe Swintek et al with CC0 license. It is not updated anymore and included +for validation purpose. There are other ways to perform a trend test. } \examples{ \dontrun{ diff --git a/man/runRSCABS.Rd b/man/runRSCABS.Rd index 62b8700..c6348b0 100644 --- a/man/runRSCABS.Rd +++ b/man/runRSCABS.Rd @@ -26,7 +26,11 @@ adjustment to the Cochran-Armitage test and "CA" to ignore the adjustment.} a table of the test results for each treatment and injury score. } \description{ -Runs the Rao-Scott adjusted Cochran-Armitage trend test by slices (RSCABS) analysis. +Runs the Rao-Scott adjusted Cochran-Armitage trend test by slices (RSCABS) +analysis.The function is adapted from the archived version of RSCABS developed by +Joe Swintek et al with CC0 license. It is not updated anymore and included +for validation purpose. It is provided in this package a different function +to perform the same task. } \examples{ \dontrun{ diff --git a/man/run_RSCA.Rd b/man/run_RSCA.Rd new file mode 100644 index 0000000..88d1f0e --- /dev/null +++ b/man/run_RSCA.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RSCABS_AO.R +\name{run_RSCA} +\alias{run_RSCA} +\title{Run Rao-Scott Adjusted Cochran-Armitage Trend Test} +\usage{ +run_RSCA(group, replicate, affected, total) +} +\arguments{ +\item{group}{Vector of treatment group identifiers} + +\item{replicate}{Vector of replicate/tank identifiers within treatment groups} + +\item{affected}{Vector of counts of affected subjects (fish with injuries) in each replicate} + +\item{total}{Vector of total subjects (fish) in each replicate} +} +\value{ +A list containing: +\item{interm_values}{A tibble with intermediate values from the Rao-Scott adjustment} +\item{Z}{The Z-statistic for the Cochran-Armitage trend test} +} +\description{ +This function is a wrapper that performs the Rao-Scott adjusted Cochran-Armitage +trend test for clustered binary data. +} +\details{ +This function combines the Rao-Scott adjustment and the Cochran-Armitage trend test +to analyze dose-response relationships in clustered data. It first calculates +adjusted values accounting for clustering, then uses these values to perform +the trend test. + +The p-value can be calculated using: 2 * (1 - pnorm(abs(Z))) +} +\examples{ +# Test for trend in injury rates across treatment groups +# Considering S1, S2, and S3 as "affected" +result <- run_RSCA( + dat_bcs1$tmt, + dat_bcs1$tank, + dat_bcs1$S1 + dat_bcs1$S2 + dat_bcs1$S3, + dat_bcs1$total +) + +# View intermediate values +print(result$interm_values) + +# View Z-statistic +print(result$Z) + +# Calculate p-value +p_value <- 2 * (1 - pnorm(abs(result$Z))) +print(paste("p-value:", p_value)) +} diff --git a/man/tsk.Rd b/man/tsk.Rd index 7b06159..a916d02 100644 --- a/man/tsk.Rd +++ b/man/tsk.Rd @@ -13,7 +13,8 @@ tsk(...) tsk estimations } \description{ -Trimmed Spearman-Karber Method by brsr +The function is adapted from the single function package tsk developed with +GPL3 license. It is included for validation purpose. } \references{ \url{https://github.com/brsr/tsk} diff --git a/man/williamsTest_JG.Rd b/man/williamsTest_JG.Rd index eb7e58d..bece311 100644 --- a/man/williamsTest_JG.Rd +++ b/man/williamsTest_JG.Rd @@ -21,7 +21,10 @@ williamsTest_JG(df, resp, trt, direction = "decreasing", SeIn = NULL) Williams' test result } \description{ -Williams Test from the StatCharrms Package +The function is adapted from the archived version of StatCharrms developed by +Joe Swintek et al with CC0 license. It is not updated anymore and included +for validation purpose. We recommend to use the williamsTest from PMCMRplus +package instead. } \examples{ ## Williams Test diff --git a/tests/testthat/test_individual_aggregation.R b/tests/testthat/test_individual_aggregation.R new file mode 100644 index 0000000..904586f --- /dev/null +++ b/tests/testthat/test_individual_aggregation.R @@ -0,0 +1,265 @@ +# Unit tests using testthat +## testthat::test_file("tests/testthat/test_individual_aggregation.R") + +describe("expand_to_individual", { + # Create a sample dataset with S0-S3 + standard_data <- data.frame( + tmt = c("C", "SC", "T1"), + tank = c("5A", "4A", "3A"), + S0 = c(2, 1, 4), + S1 = c(1, 3, 0), + S2 = c(0, 0, 0), + S3 = c(1, 0, 0), + total = c(4, 4, 4) + ) + + # Create a dataset with S0-S5 + extended_data <- data.frame( + tmt = c("C", "SC"), + tank = c("5A", "4A"), + S0 = c(2, 1), + S1 = c(1, 0), + S2 = c(0, 1), + S3 = c(1, 0), + S4 = c(0, 1), + S5 = c(0, 1), + total = c(4, 4) + ) + + # Create a dataset with custom column names + custom_data <- data.frame( + treatment = c("Control", "Treatment"), + replicate = c("R1", "R2"), + I0 = c(2, 1), + I1 = c(1, 2), + I2 = c(1, 1), + count = c(4, 4) + ) + + it("correctly expands standard data (S0-S3)", { + result <- expand_to_individual_simple(standard_data) + result_tidy <- expand_to_individual_tidy(standard_data) + expect_identical(result$data,as.data.frame(result_tidy)) + result <- result$data + # Check dimensions + expect_equal(nrow(result), sum(standard_data$total)) + expect_equal(ncol(result), 3) + + # Check column names + expect_equal(names(result), c("tmt", "tank", "score")) + + # Check counts match the original data + c_tank <- result[result$tank == "5A", ] + expect_equal(sum(c_tank$score == "S0"), 2) + expect_equal(sum(c_tank$score == "S1"), 1) + expect_equal(sum(c_tank$score == "S3"), 1) + }) + + it("correctly expands extended data (S0-S5)", { + result <- expand_to_individual_simple(extended_data) + result_tidy <- expand_to_individual_tidy(extended_data) + expect_identical(result$data,as.data.frame(result_tidy)) + result <- result$data + # Check dimensions + expect_equal(nrow(result), sum(extended_data$total)) + + # Check all score categories are represented + sc_tank <- result[result$tank == "4A", ] + expect_equal(sum(sc_tank$score == "S0"), 1) + expect_equal(sum(sc_tank$score == "S2"), 1) + expect_equal(sum(sc_tank$score == "S4"), 1) + expect_equal(sum(sc_tank$score == "S5"), 1) + }) + + it("works with custom column names", { + result <- expand_to_individual_simple( + custom_data, + treatment_col = "treatment", + replicate_col = "replicate", + score_prefix = "I", + total_col = "count" + ) + result_tidy <- expand_to_individual_tidy(custom_data, + treatment_col = "treatment", + replicate_col = "replicate", + score_prefix = "I", + total_col = "count") + expect_identical(result$data,as.data.frame(result_tidy)) + result <- result$data + # Check dimensions + expect_equal(nrow(result), sum(custom_data$count)) + + # Check column names + expect_equal(names(result), c("treatment", "replicate", "score")) + + # Check counts match the original data + control_rep <- result[result$treatment == "Control" & result$replicate == "R1", ] + expect_equal(sum(control_rep$score == "I0"), 2) + expect_equal(sum(control_rep$score == "I1"), 1) + expect_equal(sum(control_rep$score == "I2"), 1) + }) + + it("raises an error when required columns are missing", { + bad_data <- standard_data[, -which(names(standard_data) == "tmt")] + expect_error(expand_to_individual_simple(bad_data), "Treatment column 'tmt' not found") + }) + + it("raises an error when no score columns are found", { + no_score_data <- data.frame( + tmt = c("C", "SC"), + tank = c("5A", "4A"), + X0 = c(2, 1), # Not using S prefix + X1 = c(2, 3), + total = c(4, 4) + ) + expect_error(expand_to_individual_simple(no_score_data), "No score columns found with prefix") + }) + + it("gives a warning when totals don't match sum of scores", { + inconsistent_data <- standard_data + inconsistent_data$total[1] <- 5 # Change total but not individual counts + expect_warning(expand_to_individual_simple(inconsistent_data), + "score counts that don't sum to the total") + }) +}) + +describe("aggregate_from_individual", { + # Create a sample individual dataset + standard_individual <- data.frame( + tmt = c("C", "C", "C", "C", "SC", "SC", "SC", "SC"), + tank = c("5A", "5A", "5A", "5A", "4A", "4A", "4A", "4A"), + score = c("S0", "S0", "S1", "S3", "S0", "S1", "S1", "S1"), + stringsAsFactors = FALSE + ) + + # Create a dataset with extended scores + extended_individual <- data.frame( + tmt = c("C", "C", "C", "C", "SC", "SC", "SC", "SC"), + tank = c("5A", "5A", "5A", "5A", "4A", "4A", "4A", "4A"), + score = c("S0", "S0", "S1", "S3", "S0", "S4", "S4", "S5"), + stringsAsFactors = FALSE + ) + + # Create a dataset with custom column names + custom_individual <- data.frame( + treatment = c("Control", "Control", "Control", "Treatment", "Treatment"), + replicate = c("R1", "R1", "R1", "R2", "R2"), + injury = c("I0", "I0", "I1", "I0", "I2"), + stringsAsFactors = FALSE + ) + + it("correctly aggregates standard individual records", { + result <- aggregate_from_individual_simple(standard_individual) + result_tidy <- aggregate_from_individual_tidy(standard_individual) + expect_equal(result,as.data.frame(result_tidy)) ## not identical since + ## `actual$S0, S1, S3` is a double vector + ## `expected$S0, S1, S3` is an integer vector + # Check dimensions + expect_equal(nrow(result), 2) # Two unique tank/treatment combinations + expect_equal(ncol(result), 6) # tmt, tank, S0, S1, S3, total + + # Check column names + expect_true(all(c("tmt", "tank", "S0", "S1", "S3", "total") %in% names(result))) + + # Check counts for first row (C, 5A) + expect_equal(result$S0[1], 2) + expect_equal(result$S1[1], 1) + expect_equal(result$S3[1], 1) + expect_equal(result$total[1], 4) + + # Check counts for second row (SC, 4A) + expect_equal(result$S0[2], 1) + expect_equal(result$S1[2], 3) + expect_equal(result$total[2], 4) + }) + + it("correctly aggregates extended individual records", { + result <- aggregate_from_individual_simple(extended_individual) + result_tidy <- aggregate_from_individual_tidy(extended_individual) + expect_equal(result,as.data.frame(result_tidy)) + # Check all score categories are included + expect_true(all(c("S0", "S1", "S3", "S4", "S5") %in% names(result))) + + # Check counts + expect_equal(result$S4[2], 2) + expect_equal(result$S5[2], 1) + }) + + it("works with custom column names", { + result <- aggregate_from_individual_simple( + custom_individual, + treatment_col = "treatment", + replicate_col = "replicate", + score_col = "injury", + total_col = "count" + ) + result_tidy <- aggregate_from_individual_tidy( custom_individual, + treatment_col = "treatment", + replicate_col = "replicate", + score_col = "injury", + total_col = "count") + expect_equal(result,as.data.frame(result_tidy)) + # Check dimensions and column names + expect_equal(nrow(result), 2) + expect_true(all(c("treatment", "replicate", "I0", "I1", "I2", "count") %in% names(result))) + + # Check counts + control_row <- result[result$treatment == "Control", ] + expect_equal(control_row$I0, 2) + expect_equal(control_row$I1, 1) + expect_equal(control_row$count, 3) + + treatment_row <- result[result$treatment == "Treatment", ] + expect_equal(treatment_row$I0, 1) + expect_equal(treatment_row$I2, 1) + expect_equal(treatment_row$count, 2) + }) + + it("raises an error when required columns are missing", { + bad_data <- standard_individual[, -which(names(standard_individual) == "score")] + expect_error(aggregate_from_individual_simple(bad_data), "Missing required columns") + }) +}) + + + +describe("convert_fish_data",{ + # Test data + test_data <- data.frame( + tmt = c("C", "SC"), + tank = c("5A", "4A"), + S0 = c(2, 1), + S1 = c(1, 3), + S2 = c(0, 0), # Note: S2 has zero occurrences + S3 = c(1, 0), + total = c(4, 4) + ) + + # Convert to individual records + individual_data <- convert_fish_data(test_data, direction = "to_individual") + + + # Convert back to aggregated format + aggregated_data <- convert_fish_data(individual_data, direction = "to_aggregated") + + + it("converted individual data",{ + expect_identical(individual_data,structure(list(tmt = c("C", "C", "C", "C", "SC", "SC", "SC", + "SC"), tank = c("5A", "5A", "5A", "5A", "4A", "4A", "4A", "4A" + ), score = c("S0", "S0", "S1", "S3", "S0", "S1", "S1", "S1")), row.names = c(NA, + -8L), class = "data.frame", score_columns = c("S0", "S1", "S2", + "S3"))) + }) + it("convert back to aggregated data",{ + expect_identical(aggregated_data,structure(list(tmt = c("C", "SC"), tank = c("5A", "4A"), S0 = c(2, + 1), S1 = c(1, 3), S2 = c(0, 0), S3 = c(1, 0), total = c(4, 4)), row.names = c(NA, + -2L), class = "data.frame")) + + }) + it("Verify that the conversion is lossless",{ + # + all.equal(test_data[order(test_data$tmt, test_data$tank), c("tmt", "tank", "S0", "S1", "S2", "S3", "total")], + aggregated_data[order(aggregated_data$tmt, aggregated_data$tank), c("tmt", "tank", "S0", "S1", "S2", "S3", "total")]) + + }) +}) diff --git a/vignettes/articles/Example_RSCABS.Rmd b/vignettes/articles/Example_RSCABS.Rmd index fb3238c..5948ab5 100644 --- a/vignettes/articles/Example_RSCABS.Rmd +++ b/vignettes/articles/Example_RSCABS.Rmd @@ -22,8 +22,9 @@ Steps in the testing procedure: 3. The by slices (BS) part allows for testing at each severity score (*e.g.*, from 1 to 5) instead of just presences or absence. By slices works by splitting the severity scores associated with an endpoint into **two** groups based on the severity score being tested. The RSCA test statistic is calculated based on these two groups. 4. Carry out a step-down procedure by excluding the highest treatment level in the analysis and recalculate the RSCA test statistic until the test stats is not significant or there is only control group left. +Current implementation used Rao-Scott correction always, potential inflated type-I error in slices with very low occurences. High resolution of scoring system (many categories) could be less powerful due to the violation of monotonicity. + -Current implementation used Rao-Scott correction always, exact CA test is not possible, potential inflated type-I error in slices with very low occurences. High resolution of scoring system (many categories) could be less powerful due to the violation of monotonicity. ```{r, include = FALSE} knitr::opts_chunk$set( @@ -39,6 +40,224 @@ library(drcHelper) library(tidyverse) ``` + +### Some Backgrounds + +The Rao-Scott adjustment is a method to account for clustering in binary data. Using a fish injury dataset as an example, fish are clustered within tanks (replicates), which means observations within the same tank may be correlated. Standard statistical tests assume independent observations, which could lead to incorrect inference if clustering is ignored. + +The adjustment works by: + +1. Calculating the observed variance within treatment groups, accounting for clustering +2. Comparing this to the expected variance under a simple binomial model +3. Computing a design effect (D) as the ratio of these variances +4. Adjusting the sample sizes and counts by dividing by D + +**What is the Cochran-Armitage Trend Test?** + +The Cochran-Armitage trend test examines whether there is a linear trend in proportions across ordered categories. In this context, it tests whether the proportion of injured fish increases (or decreases) systematically with treatment level. + +The test assigns scores to treatment groups (typically 1, 2, 3, ...) and calculates a Z-statistic that measures the strength of the linear trend. + +## Example 1: synthetic dataset + +I'll create a simulated dataset with Control and 3 treatment groups (T1, T2, T3), each with 4 tanks, and 6 fish per tank. The data will show a trend where higher treatment groups have more severe injuries (higher scores). Injury scores from S0 (no injury) to S4 (severe injury). + +Treatment effects: +- Control group has mostly healthy fish (S0) +- As treatment level increases, the proportion of higher injury scores increases +- T3 (highest treatment) has the most severe injuries + +Variability: + +- Small random variations in the probabilities for each tank +- This simulates natural tank-to-tank variability within treatment groups + + + +```{r} +# Set seed for reproducibility +set.seed(123) + +# Define parameters +treatments <- c("Control", "T1", "T2", "T3") +tanks_per_treatment <- 4 +fish_per_tank <- 6 +tank_labels <- paste0(rep(1:tanks_per_treatment, length(treatments)), + rep(LETTERS[1:length(treatments)], each = tanks_per_treatment)) + +# Create base data frame +sim_data <- data.frame( + tmt = rep(treatments, each = tanks_per_treatment), + tank = tank_labels, + total = fish_per_tank +) + +# Define probability distributions for each treatment +# Format: list of vectors, where each vector contains probabilities for [S0, S1, S2, S3, S4] +# Trend: Control has mostly S0, T3 has more severe injuries +prob_distributions <- list( + Control = c(0.80, 0.15, 0.03, 0.02, 0.00), # Mostly healthy fish + T1 = c(0.60, 0.25, 0.10, 0.04, 0.01), # Slight increase in injuries + T2 = c(0.40, 0.30, 0.15, 0.10, 0.05), # Moderate increase in injuries + T3 = c(0.20, 0.25, 0.25, 0.20, 0.10) # Substantial increase in injuries +) + +# Function to generate counts based on multinomial distribution +generate_counts <- function(n, probs) { + # Add small random variation to probabilities (within tanks) + varied_probs <- pmax(0, probs + rnorm(length(probs), 0, 0.03)) + varied_probs <- varied_probs / sum(varied_probs) # Normalize to sum to 1 + + # Generate counts using multinomial distribution + counts <- rmultinom(1, n, varied_probs) + return(counts) +} + +# Generate injury counts for each tank +for (i in 1:nrow(sim_data)) { + treatment <- sim_data$tmt[i] + probs <- prob_distributions[[treatment]] + + counts <- generate_counts(fish_per_tank, probs) + + sim_data$S0[i] <- counts[1] + sim_data$S1[i] <- counts[2] + sim_data$S2[i] <- counts[3] + sim_data$S3[i] <- counts[4] + sim_data$S4[i] <- counts[5] +} + +# Verify that the total counts match +sim_data$check_sum <- sim_data$S0 + sim_data$S1 + sim_data$S2 + sim_data$S3 + sim_data$S4 +all(sim_data$check_sum == sim_data$total) # Should be TRUE + +# Remove the check column +sim_data$check_sum <- NULL + +# Display the simulated data +print(sim_data) + +# Calculate the average proportion of each score by treatment group +summary_by_treatment <- aggregate( + cbind(S0, S1, S2, S3, S4) ~ tmt, + data = sim_data, + FUN = function(x) round(mean(x/fish_per_tank), 2) +) + +print("Average proportion of each score by treatment:") +print(summary_by_treatment) + +# Calculate the average severity score by treatment +sim_data$severity_score <- with(sim_data, + (0*S0 + 1*S1 + 2*S2 + 3*S3 + 4*S4) / total) + +avg_severity <- aggregate(severity_score ~ tmt, data = sim_data, FUN = mean) +print("Average severity score by treatment:") +print(avg_severity) + +# Visualize the trend +if (requireNamespace("ggplot2", quietly = TRUE)) { + library(ggplot2) + + # Convert to long format for plotting + sim_data_long <- reshape2::melt( + sim_data, + id.vars = c("tmt", "tank", "total"), + measure.vars = c("S0", "S1", "S2", "S3", "S4"), + variable.name = "score", + value.name = "count" + ) + + # Calculate proportions + sim_data_long$proportion <- sim_data_long$count / sim_data_long$total + + # Plot + ggplot(sim_data_long, aes(x = tmt, y = proportion, fill = score)) + + geom_bar(stat = "identity", position = "stack") + + labs(title = "Distribution of Injury Scores by Treatment", + x = "Treatment Group", + y = "Proportion of Fish", + fill = "Injury Score") + + theme_minimal() + + scale_fill_brewer(palette = "YlOrRd") + + # Plot severity score + ggplot(sim_data, aes(x = tmt, y = severity_score)) + + geom_boxplot() + + geom_jitter(width = 0.2, alpha = 0.5) + + labs(title = "Average Injury Severity by Treatment", + x = "Treatment Group", + y = "Severity Score (weighted average)") + + theme_minimal() +} +``` + + +The dataset shows a clear trend of increasing injury severity across treatment groups. This can be seen in both the distribution of scores and the average severity score. + + + +```{r} +# Define affected fish as those with any injury (S1-S4) +affected <- sim_data$S1 + sim_data$S2 + sim_data$S3 + sim_data$S4 + +# Run the Rao-Scott adjusted Cochran-Armitage test +result <- run_RSCA( + group = sim_data$tmt, + replicate = sim_data$tank, + affected = affected, + total = sim_data$total +) + +# View results +print(result$interm_values) +print(paste("Z-statistic:", round(result$Z, 3))) +p_value <- 2 * (1 - pnorm(abs(result$Z))) +print(paste("p-value:", round(p_value, 4))) + +# You can also test for a trend in more severe injuries only (S2-S4) +severe_affected <- sim_data$S2 + sim_data$S3 + sim_data$S4 +result_severe <- run_RSCA( + group = sim_data$tmt, + replicate = sim_data$tank, + affected = severe_affected, + total = sim_data$total +) + +print(paste("Z-statistic (severe injuries):", round(result_severe$Z, 3))) +p_value_severe <- 2 * (1 - pnorm(abs(result_severe$Z))) +print(paste("p-value (severe injuries):", round(p_value_severe, 4))) +``` + + +```{r} +# Test data +test_data <- data.frame( + tmt = c("C", "SC"), + tank = c("5A", "4A"), + S0 = c(2, 1), + S1 = c(1, 3), + S2 = c(0, 0), + S3 = c(1, 0), + total = c(4, 4) +) + +# Test the simplified expansion function +individual_data <- expand_to_individual(test_data) +print(individual_data) + +# Test the simplified aggregation function +aggregated_data <- aggregate_from_individual(individual_data) +print(aggregated_data) + +# Verify that the conversion is lossless +all.equal(test_data[order(test_data$tmt, test_data$tank), c("tmt", "tank", "S0", "S1", "S2", "S3", "total")], + aggregated_data[order(aggregated_data$tmt, aggregated_data$tank), c("tmt", "tank", "S0", "S1", "S2", "S3", "total")]) +``` + +## Example 2: + + - Take the subset of F2-females with 16 weeks of age, run RSCABS. ```{r}