Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the modified pseudo F statistic from Anderson et al 2017 to account for heterogeneity in adonis/adonis2 #491

Open
gavinsimpson opened this issue Feb 28, 2022 · 5 comments

Comments

@gavinsimpson
Copy link
Contributor

gavinsimpson commented Feb 28, 2022

Anderson et al (2017: doi: 10.1111/anzs.12176) have proposed a modification to the pseudo F statistic in PERMANOVA that accounts for group heterogeneity.

The modified F is:

Screenshot from 2022-02-28 13-11-21

and provide an efficient way to get the group dispersions from the projection matrix that forms part of the calculation of F. This should fit nicely with our permutation-based testing.

Anderseon et al (2017) also provide two bootstrap-based testing options which we might explore, but these are of lesser priority.

Thoughts? (I can pass on the paper if you don't have access to it)

@MoREpro
Copy link

MoREpro commented Aug 3, 2023

I would really need this modified F as my data have a very unbalanced design and the dispersion is heavily inhomogeneous in different groups. H would be really happy to see this implemented but unfortunately I’m not able to do it.

@caonetto
Copy link

Same here.

@Sirna-82
Copy link

Sirna-82 commented Jan 22, 2025

# Modified adonis function with F2 statistic implementation 


adonis_F2 <- function(formula, data = NULL, permutations = 999, method = "bray", 
                      strata = NULL, contr.unordered = "contr.sum", contr.ordered = "contr.poly", 
                      parallel = getOption("mc.cores"), ...) {
  
  EPS <- sqrt(.Machine$double.eps)
  TOL <- 1e-07
  Terms <- terms(formula, data = data)
  lhs <- formula[[2]]
  lhs <- eval(lhs, data, parent.frame())
  formula[[2]] <- NULL
  rhs.frame <- model.frame(formula, data, drop.unused.levels = TRUE)
  op.c <- options()$contrasts
  options(contrasts = c(contr.unordered, contr.ordered))
  rhs <- model.matrix(formula, rhs.frame)
  options(contrasts = op.c)
  grps <- attr(rhs, "assign")
  qrhs <- qr(rhs)
  rhs <- rhs[, qrhs$pivot, drop = FALSE]
  rhs <- rhs[, 1:qrhs$rank, drop = FALSE]
  grps <- grps[qrhs$pivot][1:qrhs$rank]
  u.grps <- unique(grps)
  nterms <- length(u.grps) - 1
  
  if (nterms < 1) stop("right-hand-side of formula has no usable terms")
  
  H.s <- lapply(2:length(u.grps), function(j) {
    Xj <- rhs[, grps %in% u.grps[1:j]]
    qrX <- qr(Xj, tol = TOL)
    Q <- qr.Q(qrX)
    tcrossprod(Q[, 1:qrX$rank])
  })
  
  if (inherits(lhs, "dist")) {
    if (any(lhs < -TOL)) stop("dissimilarities must be non-negative")
    dmat <- as.matrix(lhs^2)
  } else if ((is.matrix(lhs) || is.data.frame(lhs)) && isSymmetric(unname(as.matrix(lhs)))) {
    dmat <- as.matrix(lhs^2)
    lhs <- as.dist(lhs)
  } else {
    dist.lhs <- as.matrix(vegdist(lhs, method = method, ...))
    dmat <- dist.lhs^2
  }
  
  n <- nrow(dmat)
  G <- -sweep(dmat, 1, rowMeans(dmat)) / 2
  SS.Exp.comb <- sapply(H.s, function(hat) sum(G * t(hat)))
  SS.Exp.each <- c(SS.Exp.comb - c(0, SS.Exp.comb[-nterms]))
  H.snterm <- H.s[[nterms]]
  tIH.snterm <- t(diag(n) - H.snterm)
  
  if (length(H.s) > 1) 
    for (i in length(H.s):2) H.s[[i]] <- H.s[[i]] - H.s[[i - 1]]
  
  SS.Res <- sum(G * tIH.snterm)
  df.Exp <- sapply(u.grps[-1], function(i) sum(grps == i))
  df.Res <- n - qrhs$rank
  
  # Calculate beta.sites and beta.spp
  if (inherits(lhs, "dist")) {
    beta.sites <- qr.coef(qrhs, as.matrix(lhs))
    beta.spp <- NULL
  } else {
    beta.sites <- qr.coef(qrhs, dist.lhs)
    beta.spp <- qr.coef(qrhs, as.matrix(lhs))
  }
  
  colnames(beta.spp) <- colnames(lhs)
  colnames(beta.sites) <- rownames(lhs)
  
  ### Modified F2 calculation ###
  
  # Group ID
  grp_ID <- as.character(interaction(as.data.frame(rhs))) # Group membership
  n_per_group <- table(grp_ID) # Sample size of each group
  VL <- rep(0, length = length(unique(grp_ID))) # Initialize
  names(VL) <- names(n_per_group)
  
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      d_ij2 <- dmat[i, j]
      grp_i <- grp_ID[i]
      grp_j <- grp_ID[j]
      
      if (grp_i == grp_j) {
        Eij_L <- 1
      } else {
        Eij_L <- 0
      } # Indicator whether observations i and j are in the same group
      
      nL <- n_per_group[grp_i] # Number of observations in group L
      VL[grp_i] <- VL[grp_i] + (Eij_L * d_ij2 / (nL * (nL - 1))) # Within-group dispersion for each group
    }
  }
  
  # Denominator for F2
  F.Mod_denominator <- 0
  for (i in 1:length(VL)) {
    F.Mod_denominator <- F.Mod_denominator + ((1 - (n_per_group[i] / n)) * VL[i])
  }
  names(F.Mod_denominator) <- NULL
  
  # Numerator (among-group sum of squares)
  F.Mod_numerator <- SS.Exp.comb
  
  # Modified F-statistic
  F.Mod <- F.Mod_numerator / F.Mod_denominator
  
  ########### Permutation procedure ###########
  
  # Generate permutation matrix manually
  p <- matrix(sample(1:n, size = permutations * n, replace = TRUE), 
              nrow = permutations, ncol = n)
  
  # Permutation test function
  f.test <- function(tH, G, df.Exp, df.Res, tIH.snterm) {
    (sum(G * tH) / df.Exp) / (sum(G * tIH.snterm) / df.Res)
  }
  
  permutations <- nrow(p)
  
  if (permutations) {
    tH.s <- lapply(H.s, t)
    
    if (is.null(parallel)) 
      parallel <- 1
    
    hasClus <- inherits(parallel, "cluster")
    isParal <- hasClus || parallel > 1
    isMulticore <- .Platform$OS.type == "unix" && !hasClus
    
    if (isParal && !isMulticore && !hasClus) {
      parallel <- makeCluster(parallel)
    }
    
    if (isParal) {
      if (isMulticore) {
        f.perms <- sapply(1:nterms, function(i) unlist(mclapply(1:permutations, 
                                                                function(j) f.test(tH.s[[i]], G[p[j, ], p[j, ]], df.Exp[i], df.Res, tIH.snterm), mc.cores = parallel)))
      } else {
        f.perms <- sapply(1:nterms, function(i) parSapply(parallel, 
                                                          1:permutations, function(j) f.test(tH.s[[i]], G[p[j, ], p[j, ]], df.Exp[i], df.Res, tIH.snterm)))
      }
    } else {
      f.perms <- sapply(1:nterms, function(i) sapply(1:permutations, 
                                                     function(j) f.test(tH.s[[i]], G[p[j, ], p[j, ]], df.Exp[i], df.Res, tIH.snterm)))
    }
    
    if (isParal && !isMulticore && !hasClus) 
      stopCluster(parallel)
    
    P <- (rowSums(t(f.perms) >= F.Mod - EPS) + 1) / (permutations + 1)
  } else {
    f.perms <- P <- rep(NA, nterms)
  }
  
  # Results
  SumsOfSqs = c(SS.Exp.each, SS.Res, sum(SS.Exp.each) + SS.Res)
  tab <- data.frame(Df = c(df.Exp, df.Res, n - 1), SumsOfSqs = SumsOfSqs, 
                    MeanSqs = c(SS.Exp.each / df.Exp, SS.Res / df.Res, NA), 
                    F.Model = c(F.Mod, NA, NA), R2 = SumsOfSqs / SumsOfSqs[length(SumsOfSqs)], 
                    P = c(P, NA, NA))
  rownames(tab) <- c(attr(attr(rhs.frame, "terms"), "term.labels")[u.grps], 
                     "Residuals", "Total")
  colnames(tab)[ncol(tab)] <- "Pr(>F)"
  
  class(tab) <- c("anova", class(tab))
  
  # Output object
  out <- list(aov.tab = tab, call = match.call(), coefficients = beta.spp, 
              coef.sites = beta.sites, f.perms = f.perms, model.matrix = rhs, 
              terms = Terms)
  class(out) <- "adonis"
  out
}


# Load the vegan package
library(vegan)

# Load the dune dataset
data(dune)
data(dune.env)

# Check the structure of the data
head(dune)
head(dune.env)

# Run the adonis_F2 function on the dune dataset
# Use Management (a factor variable in dune.env) as the grouping variable
result <- adonis_F2(dune ~ Management, data = dune.env, permutations = 999)

# Print the result
print(result)

#> print(result)

#Call:
#  adonis_F2(formula = dune ~ Management, data = dune.env, permutations = 999) 

# Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
# Management  3    1.4686 0.48953  3.1302 0.34161  0.026 *
#  Residuals  16    2.8304 0.17690         0.65839         
# Total      19    4.2990                 1.00000         
# ---
#  Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

@jarioksa
Copy link
Contributor

@Sirna-82 : your code modifies observed F statistic, but it does not modify simulated statistic in the same way. In your example your modified $F = 3.1302$, but permuted statistic with the original order 1:20 gives the unmodified $F = 2.7672$ giving wrong permutation P values as you compare unmodified values to a modified value. Moreover, you make the changes in a function that is to be removed from vegan and in a way that makes practically impossible to see what you have changed. You should base your work on adonis2 and provide a diff file instead of dumping the whole code.

@Sirna-82
Copy link

Sirna-82 commented Jan 23, 2025

My apologies for any inconveniences. This is my last attempt. I apologize in advance for any possible errors that might render this code useless.

The main changes are:

  1. Modified F2:
# Modified F-statistic (F2)
F.Mod_numerator <- SS.Exp.comb
F.Mod_denominator <- 0
for (i in 1:length(VL)) {
    F.Mod_denominator <- F.Mod_denominator + ((1 - (n_per_group[i] / n)) * VL[i])
}
F.Mod <- F.Mod_numerator / F.Mod_denominator
# VL represents the within-group dispersion for each group, which is calculated using pairwise distances for each group
  1. Calculate within-group dispersion for each group
for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
        d_ij2 <- dmat[i, j]
        grp_i <- grp_ID[i]
        grp_j <- grp_ID[j]
        
        if (grp_i == grp_j) {
            Eij_L <- 1
        } else {
            Eij_L <- 0
        }
        
        nL <- n_per_group[grp_i] # Number of observations in group L
        VL[grp_i] <- VL[grp_i] + (Eij_L * d_ij2 / (nL * (nL - 1))) # Within-group dispersion
    }
}
  1. Premutation procedure
# Permutation test for F2 (modified statistic)
f.test <- function(dmat, rhs, p) {
    perm_dmat <- dmat[p, p]
    perm_G <- -sweep(perm_dmat, 1, rowMeans(perm_dmat)) / 2
    perm_SS.Exp.comb <- sum(perm_G * t(rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
    perm_SS.Res <- sum(perm_G * (diag(n) - rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
    return(perm_SS.Exp.comb / perm_SS.Res)
}
### This is an attempt to implement F2 in adonis2; I paste here the complete code of the function:

adonis2_F2 <- function(formula, data = NULL, permutations = 999, method = "bray", 
                       strata = NULL, contr.unordered = "contr.sum", contr.ordered = "contr.poly", 
                       parallel = getOption("mc.cores"), ...) {
  
  EPS <- sqrt(.Machine$double.eps)
  TOL <- 1e-07
  Terms <- terms(formula, data = data)
  lhs <- formula[[2]]
  lhs <- eval(lhs, data, parent.frame())
  formula[[2]] <- NULL
  rhs.frame <- model.frame(formula, data, drop.unused.levels = TRUE)
  op.c <- options()$contrasts
  options(contrasts = c(contr.unordered, contr.ordered))
  rhs <- model.matrix(formula, rhs.frame)
  options(contrasts = op.c)
  grps <- attr(rhs, "assign")
  qrhs <- qr(rhs)
  rhs <- rhs[, qrhs$pivot, drop = FALSE]
  rhs <- rhs[, 1:qrhs$rank, drop = FALSE]
  grps <- grps[qrhs$pivot][1:qrhs$rank]
  u.grps <- unique(grps)
  nterms <- length(u.grps) - 1
  
  if (nterms < 1) stop("right-hand-side of formula has no usable terms")
  
  # Calculate the total sum of squares (within-group and between-group)
  if (inherits(lhs, "dist")) {
    if (any(lhs < -TOL)) stop("dissimilarities must be non-negative")
    dmat <- as.matrix(lhs^2)
  } else {
    dist.lhs <- as.matrix(vegdist(lhs, method = method, ...))
    dmat <- dist.lhs^2
  }
  
  n <- nrow(dmat)
  G <- -sweep(dmat, 1, rowMeans(dmat)) / 2
  
  # Sums of squares between groups and residuals
  SS.Exp.comb <- sum(G * t(rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs))) 
  SS.Res <- sum(G * (diag(n) - rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
  
  df.Exp <- length(u.grps) - 1
  df.Res <- n - df.Exp - 1
  
  # F2 statistic: between-group variance over residual variance
  F.Mod <- SS.Exp.comb / SS.Res
  
  ########### Permutation procedure ###########
  
  # Generate permutation matrix manually
  p <- matrix(sample(1:n, size = permutations * n, replace = TRUE), 
              nrow = permutations, ncol = n)
  
  # Permutation test function for F2
  f.test <- function(dmat, rhs, p) {
    perm_dmat <- dmat[p, p]
    perm_G <- -sweep(perm_dmat, 1, rowMeans(perm_dmat)) / 2
    perm_SS.Exp.comb <- sum(perm_G * t(rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
    perm_SS.Res <- sum(perm_G * (diag(n) - rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
    return(perm_SS.Exp.comb / perm_SS.Res)
  }
  
  # Generate F2 values for each permutation
  f.perms <- sapply(1:permutations, function(j) f.test(dmat, rhs, p[j, ]))
  
  # Calculate permutation p-value
  P_val <- (sum(f.perms >= F.Mod) + 1) / (permutations + 1)
  
  # Results - show observed F2 statistic and permutation p-value
  SumsOfSqs = c(SS.Exp.comb, SS.Res, sum(SS.Exp.comb) + SS.Res)
  tab <- data.frame(
    Df = c(df.Exp, df.Res, n - 1),
    SumsOfSqs = SumsOfSqs,
    MeanSqs = c(SS.Exp.comb / df.Exp, SS.Res / df.Res, NA),
    F.Model = c(F.Mod, NA, NA),  # Observed F2 statistic for model
    R2 = SumsOfSqs / SumsOfSqs[length(SumsOfSqs)],
    P = c(P_val, NA, NA)  # P-value from permutation test
  )
  
  # Update the ANOVA table with the correct row names
  rownames(tab) <- c(attr(attr(rhs.frame, "terms"), "term.labels")[u.grps], 
                     "Residuals", "Total")
  colnames(tab)[ncol(tab)] <- "Pr(>F)"
  
  # Return the output as class "anova"
  class(tab) <- c("anova", class(tab))
  
  return(tab)
} # I hope this works. All the best, Sirna-82

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

5 participants