Add the modified pseudo F statistic from Anderson et al 2017 to account for heterogeneity in adonis/adonis2 #491

gavinsimpson · 2022-02-28T12:15:01Z

Anderson et al (2017: doi: 10.1111/anzs.12176) have proposed a modification to the pseudo F statistic in PERMANOVA that accounts for group heterogeneity.

The modified F is:

and provide an efficient way to get the group dispersions from the projection matrix that forms part of the calculation of F. This should fit nicely with our permutation-based testing.

Anderseon et al (2017) also provide two bootstrap-based testing options which we might explore, but these are of lesser priority.

Thoughts? (I can pass on the paper if you don't have access to it)

MoREpro · 2023-08-03T09:55:06Z

I would really need this modified F as my data have a very unbalanced design and the dispersion is heavily inhomogeneous in different groups. H would be really happy to see this implemented but unfortunately I’m not able to do it.

caonetto · 2023-09-11T08:21:23Z

Same here.

Sirna-82 · 2025-01-22T23:14:54Z

# Modified adonis function with F2 statistic implementation 


adonis_F2 <- function(formula, data = NULL, permutations = 999, method = "bray", 
                      strata = NULL, contr.unordered = "contr.sum", contr.ordered = "contr.poly", 
                      parallel = getOption("mc.cores"), ...) {
  
  EPS <- sqrt(.Machine$double.eps)
  TOL <- 1e-07
  Terms <- terms(formula, data = data)
  lhs <- formula[[2]]
  lhs <- eval(lhs, data, parent.frame())
  formula[[2]] <- NULL
  rhs.frame <- model.frame(formula, data, drop.unused.levels = TRUE)
  op.c <- options()$contrasts
  options(contrasts = c(contr.unordered, contr.ordered))
  rhs <- model.matrix(formula, rhs.frame)
  options(contrasts = op.c)
  grps <- attr(rhs, "assign")
  qrhs <- qr(rhs)
  rhs <- rhs[, qrhs$pivot, drop = FALSE]
  rhs <- rhs[, 1:qrhs$rank, drop = FALSE]
  grps <- grps[qrhs$pivot][1:qrhs$rank]
  u.grps <- unique(grps)
  nterms <- length(u.grps) - 1
  
  if (nterms < 1) stop("right-hand-side of formula has no usable terms")
  
  H.s <- lapply(2:length(u.grps), function(j) {
    Xj <- rhs[, grps %in% u.grps[1:j]]
    qrX <- qr(Xj, tol = TOL)
    Q <- qr.Q(qrX)
    tcrossprod(Q[, 1:qrX$rank])
  })
  
  if (inherits(lhs, "dist")) {
    if (any(lhs < -TOL)) stop("dissimilarities must be non-negative")
    dmat <- as.matrix(lhs^2)
  } else if ((is.matrix(lhs) || is.data.frame(lhs)) && isSymmetric(unname(as.matrix(lhs)))) {
    dmat <- as.matrix(lhs^2)
    lhs <- as.dist(lhs)
  } else {
    dist.lhs <- as.matrix(vegdist(lhs, method = method, ...))
    dmat <- dist.lhs^2
  }
  
  n <- nrow(dmat)
  G <- -sweep(dmat, 1, rowMeans(dmat)) / 2
  SS.Exp.comb <- sapply(H.s, function(hat) sum(G * t(hat)))
  SS.Exp.each <- c(SS.Exp.comb - c(0, SS.Exp.comb[-nterms]))
  H.snterm <- H.s[[nterms]]
  tIH.snterm <- t(diag(n) - H.snterm)
  
  if (length(H.s) > 1) 
    for (i in length(H.s):2) H.s[[i]] <- H.s[[i]] - H.s[[i - 1]]
  
  SS.Res <- sum(G * tIH.snterm)
  df.Exp <- sapply(u.grps[-1], function(i) sum(grps == i))
  df.Res <- n - qrhs$rank
  
  # Calculate beta.sites and beta.spp
  if (inherits(lhs, "dist")) {
    beta.sites <- qr.coef(qrhs, as.matrix(lhs))
    beta.spp <- NULL
  } else {
    beta.sites <- qr.coef(qrhs, dist.lhs)
    beta.spp <- qr.coef(qrhs, as.matrix(lhs))
  }
  
  colnames(beta.spp) <- colnames(lhs)
  colnames(beta.sites) <- rownames(lhs)
  
  ### Modified F2 calculation ###
  
  # Group ID
  grp_ID <- as.character(interaction(as.data.frame(rhs))) # Group membership
  n_per_group <- table(grp_ID) # Sample size of each group
  VL <- rep(0, length = length(unique(grp_ID))) # Initialize
  names(VL) <- names(n_per_group)
  
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      d_ij2 <- dmat[i, j]
      grp_i <- grp_ID[i]
      grp_j <- grp_ID[j]
      
      if (grp_i == grp_j) {
        Eij_L <- 1
      } else {
        Eij_L <- 0
      } # Indicator whether observations i and j are in the same group
      
      nL <- n_per_group[grp_i] # Number of observations in group L
      VL[grp_i] <- VL[grp_i] + (Eij_L * d_ij2 / (nL * (nL - 1))) # Within-group dispersion for each group
    }
  }
  
  # Denominator for F2
  F.Mod_denominator <- 0
  for (i in 1:length(VL)) {
    F.Mod_denominator <- F.Mod_denominator + ((1 - (n_per_group[i] / n)) * VL[i])
  }
  names(F.Mod_denominator) <- NULL
  
  # Numerator (among-group sum of squares)
  F.Mod_numerator <- SS.Exp.comb
  
  # Modified F-statistic
  F.Mod <- F.Mod_numerator / F.Mod_denominator
  
  ########### Permutation procedure ###########
  
  # Generate permutation matrix manually
  p <- matrix(sample(1:n, size = permutations * n, replace = TRUE), 
              nrow = permutations, ncol = n)
  
  # Permutation test function
  f.test <- function(tH, G, df.Exp, df.Res, tIH.snterm) {
    (sum(G * tH) / df.Exp) / (sum(G * tIH.snterm) / df.Res)
  }
  
  permutations <- nrow(p)
  
  if (permutations) {
    tH.s <- lapply(H.s, t)
    
    if (is.null(parallel)) 
      parallel <- 1
    
    hasClus <- inherits(parallel, "cluster")
    isParal <- hasClus || parallel > 1
    isMulticore <- .Platform$OS.type == "unix" && !hasClus
    
    if (isParal && !isMulticore && !hasClus) {
      parallel <- makeCluster(parallel)
    }
    
    if (isParal) {
      if (isMulticore) {
        f.perms <- sapply(1:nterms, function(i) unlist(mclapply(1:permutations, 
                                                                function(j) f.test(tH.s[[i]], G[p[j, ], p[j, ]], df.Exp[i], df.Res, tIH.snterm), mc.cores = parallel)))
      } else {
        f.perms <- sapply(1:nterms, function(i) parSapply(parallel, 
                                                          1:permutations, function(j) f.test(tH.s[[i]], G[p[j, ], p[j, ]], df.Exp[i], df.Res, tIH.snterm)))
      }
    } else {
      f.perms <- sapply(1:nterms, function(i) sapply(1:permutations, 
                                                     function(j) f.test(tH.s[[i]], G[p[j, ], p[j, ]], df.Exp[i], df.Res, tIH.snterm)))
    }
    
    if (isParal && !isMulticore && !hasClus) 
      stopCluster(parallel)
    
    P <- (rowSums(t(f.perms) >= F.Mod - EPS) + 1) / (permutations + 1)
  } else {
    f.perms <- P <- rep(NA, nterms)
  }
  
  # Results
  SumsOfSqs = c(SS.Exp.each, SS.Res, sum(SS.Exp.each) + SS.Res)
  tab <- data.frame(Df = c(df.Exp, df.Res, n - 1), SumsOfSqs = SumsOfSqs, 
                    MeanSqs = c(SS.Exp.each / df.Exp, SS.Res / df.Res, NA), 
                    F.Model = c(F.Mod, NA, NA), R2 = SumsOfSqs / SumsOfSqs[length(SumsOfSqs)], 
                    P = c(P, NA, NA))
  rownames(tab) <- c(attr(attr(rhs.frame, "terms"), "term.labels")[u.grps], 
                     "Residuals", "Total")
  colnames(tab)[ncol(tab)] <- "Pr(>F)"
  
  class(tab) <- c("anova", class(tab))
  
  # Output object
  out <- list(aov.tab = tab, call = match.call(), coefficients = beta.spp, 
              coef.sites = beta.sites, f.perms = f.perms, model.matrix = rhs, 
              terms = Terms)
  class(out) <- "adonis"
  out
}


# Load the vegan package
library(vegan)

# Load the dune dataset
data(dune)
data(dune.env)

# Check the structure of the data
head(dune)
head(dune.env)

# Run the adonis_F2 function on the dune dataset
# Use Management (a factor variable in dune.env) as the grouping variable
result <- adonis_F2(dune ~ Management, data = dune.env, permutations = 999)

# Print the result
print(result)

#> print(result)

#Call:
#  adonis_F2(formula = dune ~ Management, data = dune.env, permutations = 999) 

# Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
# Management  3    1.4686 0.48953  3.1302 0.34161  0.026 *
#  Residuals  16    2.8304 0.17690         0.65839         
# Total      19    4.2990                 1.00000         
# ---
#  Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

jarioksa · 2025-01-23T10:11:58Z

@Sirna-82 : your code modifies observed F statistic, but it does not modify simulated statistic in the same way. In your example your modified $F = 3.1302$, but permuted statistic with the original order 1:20 gives the unmodified $F = 2.7672$ giving wrong permutation P values as you compare unmodified values to a modified value. Moreover, you make the changes in a function that is to be removed from vegan and in a way that makes practically impossible to see what you have changed. You should base your work on adonis2 and provide a diff file instead of dumping the whole code.

Sirna-82 · 2025-01-23T15:44:33Z

My apologies for any inconveniences. This is my last attempt. I apologize in advance for any possible errors that might render this code useless.

The main changes are:

Modified F2:

# Modified F-statistic (F2)
F.Mod_numerator <- SS.Exp.comb
F.Mod_denominator <- 0
for (i in 1:length(VL)) {
    F.Mod_denominator <- F.Mod_denominator + ((1 - (n_per_group[i] / n)) * VL[i])
}
F.Mod <- F.Mod_numerator / F.Mod_denominator
# VL represents the within-group dispersion for each group, which is calculated using pairwise distances for each group

Calculate within-group dispersion for each group

for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
        d_ij2 <- dmat[i, j]
        grp_i <- grp_ID[i]
        grp_j <- grp_ID[j]
        
        if (grp_i == grp_j) {
            Eij_L <- 1
        } else {
            Eij_L <- 0
        }
        
        nL <- n_per_group[grp_i] # Number of observations in group L
        VL[grp_i] <- VL[grp_i] + (Eij_L * d_ij2 / (nL * (nL - 1))) # Within-group dispersion
    }
}

Premutation procedure

# Permutation test for F2 (modified statistic)
f.test <- function(dmat, rhs, p) {
    perm_dmat <- dmat[p, p]
    perm_G <- -sweep(perm_dmat, 1, rowMeans(perm_dmat)) / 2
    perm_SS.Exp.comb <- sum(perm_G * t(rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
    perm_SS.Res <- sum(perm_G * (diag(n) - rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
    return(perm_SS.Exp.comb / perm_SS.Res)
}

### This is an attempt to implement F2 in adonis2; I paste here the complete code of the function:

adonis2_F2 <- function(formula, data = NULL, permutations = 999, method = "bray", 
                       strata = NULL, contr.unordered = "contr.sum", contr.ordered = "contr.poly", 
                       parallel = getOption("mc.cores"), ...) {
  
  EPS <- sqrt(.Machine$double.eps)
  TOL <- 1e-07
  Terms <- terms(formula, data = data)
  lhs <- formula[[2]]
  lhs <- eval(lhs, data, parent.frame())
  formula[[2]] <- NULL
  rhs.frame <- model.frame(formula, data, drop.unused.levels = TRUE)
  op.c <- options()$contrasts
  options(contrasts = c(contr.unordered, contr.ordered))
  rhs <- model.matrix(formula, rhs.frame)
  options(contrasts = op.c)
  grps <- attr(rhs, "assign")
  qrhs <- qr(rhs)
  rhs <- rhs[, qrhs$pivot, drop = FALSE]
  rhs <- rhs[, 1:qrhs$rank, drop = FALSE]
  grps <- grps[qrhs$pivot][1:qrhs$rank]
  u.grps <- unique(grps)
  nterms <- length(u.grps) - 1
  
  if (nterms < 1) stop("right-hand-side of formula has no usable terms")
  
  # Calculate the total sum of squares (within-group and between-group)
  if (inherits(lhs, "dist")) {
    if (any(lhs < -TOL)) stop("dissimilarities must be non-negative")
    dmat <- as.matrix(lhs^2)
  } else {
    dist.lhs <- as.matrix(vegdist(lhs, method = method, ...))
    dmat <- dist.lhs^2
  }
  
  n <- nrow(dmat)
  G <- -sweep(dmat, 1, rowMeans(dmat)) / 2
  
  # Sums of squares between groups and residuals
  SS.Exp.comb <- sum(G * t(rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs))) 
  SS.Res <- sum(G * (diag(n) - rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
  
  df.Exp <- length(u.grps) - 1
  df.Res <- n - df.Exp - 1
  
  # F2 statistic: between-group variance over residual variance
  F.Mod <- SS.Exp.comb / SS.Res
  
  ########### Permutation procedure ###########
  
  # Generate permutation matrix manually
  p <- matrix(sample(1:n, size = permutations * n, replace = TRUE), 
              nrow = permutations, ncol = n)
  
  # Permutation test function for F2
  f.test <- function(dmat, rhs, p) {
    perm_dmat <- dmat[p, p]
    perm_G <- -sweep(perm_dmat, 1, rowMeans(perm_dmat)) / 2
    perm_SS.Exp.comb <- sum(perm_G * t(rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
    perm_SS.Res <- sum(perm_G * (diag(n) - rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
    return(perm_SS.Exp.comb / perm_SS.Res)
  }
  
  # Generate F2 values for each permutation
  f.perms <- sapply(1:permutations, function(j) f.test(dmat, rhs, p[j, ]))
  
  # Calculate permutation p-value
  P_val <- (sum(f.perms >= F.Mod) + 1) / (permutations + 1)
  
  # Results - show observed F2 statistic and permutation p-value
  SumsOfSqs = c(SS.Exp.comb, SS.Res, sum(SS.Exp.comb) + SS.Res)
  tab <- data.frame(
    Df = c(df.Exp, df.Res, n - 1),
    SumsOfSqs = SumsOfSqs,
    MeanSqs = c(SS.Exp.comb / df.Exp, SS.Res / df.Res, NA),
    F.Model = c(F.Mod, NA, NA),  # Observed F2 statistic for model
    R2 = SumsOfSqs / SumsOfSqs[length(SumsOfSqs)],
    P = c(P_val, NA, NA)  # P-value from permutation test
  )
  
  # Update the ANOVA table with the correct row names
  rownames(tab) <- c(attr(attr(rhs.frame, "terms"), "term.labels")[u.grps], 
                     "Residuals", "Total")
  colnames(tab)[ncol(tab)] <- "Pr(>F)"
  
  # Return the output as class "anova"
  class(tab) <- c("anova", class(tab))
  
  return(tab)
} # I hope this works. All the best, Sirna-82

gavinsimpson added enhancement request-for-comments labels Feb 28, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add the modified pseudo F statistic from Anderson et al 2017 to account for heterogeneity in adonis/adonis2 #491

Add the modified pseudo F statistic from Anderson et al 2017 to account for heterogeneity in adonis/adonis2 #491

gavinsimpson commented Feb 28, 2022 •

edited

Loading

MoREpro commented Aug 3, 2023

caonetto commented Sep 11, 2023

Sirna-82 commented Jan 22, 2025 •

edited by jarioksa

Loading

jarioksa commented Jan 23, 2025

Sirna-82 commented Jan 23, 2025 •

edited by gavinsimpson

Loading

Add the modified pseudo F statistic from Anderson et al 2017 to account for heterogeneity in adonis/adonis2 #491

Add the modified pseudo F statistic from Anderson et al 2017 to account for heterogeneity in adonis/adonis2 #491

Comments

gavinsimpson commented Feb 28, 2022 • edited Loading

MoREpro commented Aug 3, 2023

caonetto commented Sep 11, 2023

Sirna-82 commented Jan 22, 2025 • edited by jarioksa Loading

jarioksa commented Jan 23, 2025

Sirna-82 commented Jan 23, 2025 • edited by gavinsimpson Loading

gavinsimpson commented Feb 28, 2022 •

edited

Loading

Sirna-82 commented Jan 22, 2025 •

edited by jarioksa

Loading

Sirna-82 commented Jan 23, 2025 •

edited by gavinsimpson

Loading