-
Notifications
You must be signed in to change notification settings - Fork 97
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add the modified pseudo F statistic from Anderson et al 2017 to account for heterogeneity in adonis/adonis2 #491
Comments
I would really need this modified F as my data have a very unbalanced design and the dispersion is heavily inhomogeneous in different groups. H would be really happy to see this implemented but unfortunately I’m not able to do it. |
Same here. |
# Modified adonis function with F2 statistic implementation
adonis_F2 <- function(formula, data = NULL, permutations = 999, method = "bray",
strata = NULL, contr.unordered = "contr.sum", contr.ordered = "contr.poly",
parallel = getOption("mc.cores"), ...) {
EPS <- sqrt(.Machine$double.eps)
TOL <- 1e-07
Terms <- terms(formula, data = data)
lhs <- formula[[2]]
lhs <- eval(lhs, data, parent.frame())
formula[[2]] <- NULL
rhs.frame <- model.frame(formula, data, drop.unused.levels = TRUE)
op.c <- options()$contrasts
options(contrasts = c(contr.unordered, contr.ordered))
rhs <- model.matrix(formula, rhs.frame)
options(contrasts = op.c)
grps <- attr(rhs, "assign")
qrhs <- qr(rhs)
rhs <- rhs[, qrhs$pivot, drop = FALSE]
rhs <- rhs[, 1:qrhs$rank, drop = FALSE]
grps <- grps[qrhs$pivot][1:qrhs$rank]
u.grps <- unique(grps)
nterms <- length(u.grps) - 1
if (nterms < 1) stop("right-hand-side of formula has no usable terms")
H.s <- lapply(2:length(u.grps), function(j) {
Xj <- rhs[, grps %in% u.grps[1:j]]
qrX <- qr(Xj, tol = TOL)
Q <- qr.Q(qrX)
tcrossprod(Q[, 1:qrX$rank])
})
if (inherits(lhs, "dist")) {
if (any(lhs < -TOL)) stop("dissimilarities must be non-negative")
dmat <- as.matrix(lhs^2)
} else if ((is.matrix(lhs) || is.data.frame(lhs)) && isSymmetric(unname(as.matrix(lhs)))) {
dmat <- as.matrix(lhs^2)
lhs <- as.dist(lhs)
} else {
dist.lhs <- as.matrix(vegdist(lhs, method = method, ...))
dmat <- dist.lhs^2
}
n <- nrow(dmat)
G <- -sweep(dmat, 1, rowMeans(dmat)) / 2
SS.Exp.comb <- sapply(H.s, function(hat) sum(G * t(hat)))
SS.Exp.each <- c(SS.Exp.comb - c(0, SS.Exp.comb[-nterms]))
H.snterm <- H.s[[nterms]]
tIH.snterm <- t(diag(n) - H.snterm)
if (length(H.s) > 1)
for (i in length(H.s):2) H.s[[i]] <- H.s[[i]] - H.s[[i - 1]]
SS.Res <- sum(G * tIH.snterm)
df.Exp <- sapply(u.grps[-1], function(i) sum(grps == i))
df.Res <- n - qrhs$rank
# Calculate beta.sites and beta.spp
if (inherits(lhs, "dist")) {
beta.sites <- qr.coef(qrhs, as.matrix(lhs))
beta.spp <- NULL
} else {
beta.sites <- qr.coef(qrhs, dist.lhs)
beta.spp <- qr.coef(qrhs, as.matrix(lhs))
}
colnames(beta.spp) <- colnames(lhs)
colnames(beta.sites) <- rownames(lhs)
### Modified F2 calculation ###
# Group ID
grp_ID <- as.character(interaction(as.data.frame(rhs))) # Group membership
n_per_group <- table(grp_ID) # Sample size of each group
VL <- rep(0, length = length(unique(grp_ID))) # Initialize
names(VL) <- names(n_per_group)
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
d_ij2 <- dmat[i, j]
grp_i <- grp_ID[i]
grp_j <- grp_ID[j]
if (grp_i == grp_j) {
Eij_L <- 1
} else {
Eij_L <- 0
} # Indicator whether observations i and j are in the same group
nL <- n_per_group[grp_i] # Number of observations in group L
VL[grp_i] <- VL[grp_i] + (Eij_L * d_ij2 / (nL * (nL - 1))) # Within-group dispersion for each group
}
}
# Denominator for F2
F.Mod_denominator <- 0
for (i in 1:length(VL)) {
F.Mod_denominator <- F.Mod_denominator + ((1 - (n_per_group[i] / n)) * VL[i])
}
names(F.Mod_denominator) <- NULL
# Numerator (among-group sum of squares)
F.Mod_numerator <- SS.Exp.comb
# Modified F-statistic
F.Mod <- F.Mod_numerator / F.Mod_denominator
########### Permutation procedure ###########
# Generate permutation matrix manually
p <- matrix(sample(1:n, size = permutations * n, replace = TRUE),
nrow = permutations, ncol = n)
# Permutation test function
f.test <- function(tH, G, df.Exp, df.Res, tIH.snterm) {
(sum(G * tH) / df.Exp) / (sum(G * tIH.snterm) / df.Res)
}
permutations <- nrow(p)
if (permutations) {
tH.s <- lapply(H.s, t)
if (is.null(parallel))
parallel <- 1
hasClus <- inherits(parallel, "cluster")
isParal <- hasClus || parallel > 1
isMulticore <- .Platform$OS.type == "unix" && !hasClus
if (isParal && !isMulticore && !hasClus) {
parallel <- makeCluster(parallel)
}
if (isParal) {
if (isMulticore) {
f.perms <- sapply(1:nterms, function(i) unlist(mclapply(1:permutations,
function(j) f.test(tH.s[[i]], G[p[j, ], p[j, ]], df.Exp[i], df.Res, tIH.snterm), mc.cores = parallel)))
} else {
f.perms <- sapply(1:nterms, function(i) parSapply(parallel,
1:permutations, function(j) f.test(tH.s[[i]], G[p[j, ], p[j, ]], df.Exp[i], df.Res, tIH.snterm)))
}
} else {
f.perms <- sapply(1:nterms, function(i) sapply(1:permutations,
function(j) f.test(tH.s[[i]], G[p[j, ], p[j, ]], df.Exp[i], df.Res, tIH.snterm)))
}
if (isParal && !isMulticore && !hasClus)
stopCluster(parallel)
P <- (rowSums(t(f.perms) >= F.Mod - EPS) + 1) / (permutations + 1)
} else {
f.perms <- P <- rep(NA, nterms)
}
# Results
SumsOfSqs = c(SS.Exp.each, SS.Res, sum(SS.Exp.each) + SS.Res)
tab <- data.frame(Df = c(df.Exp, df.Res, n - 1), SumsOfSqs = SumsOfSqs,
MeanSqs = c(SS.Exp.each / df.Exp, SS.Res / df.Res, NA),
F.Model = c(F.Mod, NA, NA), R2 = SumsOfSqs / SumsOfSqs[length(SumsOfSqs)],
P = c(P, NA, NA))
rownames(tab) <- c(attr(attr(rhs.frame, "terms"), "term.labels")[u.grps],
"Residuals", "Total")
colnames(tab)[ncol(tab)] <- "Pr(>F)"
class(tab) <- c("anova", class(tab))
# Output object
out <- list(aov.tab = tab, call = match.call(), coefficients = beta.spp,
coef.sites = beta.sites, f.perms = f.perms, model.matrix = rhs,
terms = Terms)
class(out) <- "adonis"
out
}
# Load the vegan package
library(vegan)
# Load the dune dataset
data(dune)
data(dune.env)
# Check the structure of the data
head(dune)
head(dune.env)
# Run the adonis_F2 function on the dune dataset
# Use Management (a factor variable in dune.env) as the grouping variable
result <- adonis_F2(dune ~ Management, data = dune.env, permutations = 999)
# Print the result
print(result)
#> print(result)
#Call:
# adonis_F2(formula = dune ~ Management, data = dune.env, permutations = 999)
# Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
# Management 3 1.4686 0.48953 3.1302 0.34161 0.026 *
# Residuals 16 2.8304 0.17690 0.65839
# Total 19 4.2990 1.00000
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 |
@Sirna-82 : your code modifies observed F statistic, but it does not modify simulated statistic in the same way. In your example your modified |
My apologies for any inconveniences. This is my last attempt. I apologize in advance for any possible errors that might render this code useless. The main changes are:
# Modified F-statistic (F2)
F.Mod_numerator <- SS.Exp.comb
F.Mod_denominator <- 0
for (i in 1:length(VL)) {
F.Mod_denominator <- F.Mod_denominator + ((1 - (n_per_group[i] / n)) * VL[i])
}
F.Mod <- F.Mod_numerator / F.Mod_denominator
# VL represents the within-group dispersion for each group, which is calculated using pairwise distances for each group
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
d_ij2 <- dmat[i, j]
grp_i <- grp_ID[i]
grp_j <- grp_ID[j]
if (grp_i == grp_j) {
Eij_L <- 1
} else {
Eij_L <- 0
}
nL <- n_per_group[grp_i] # Number of observations in group L
VL[grp_i] <- VL[grp_i] + (Eij_L * d_ij2 / (nL * (nL - 1))) # Within-group dispersion
}
}
# Permutation test for F2 (modified statistic)
f.test <- function(dmat, rhs, p) {
perm_dmat <- dmat[p, p]
perm_G <- -sweep(perm_dmat, 1, rowMeans(perm_dmat)) / 2
perm_SS.Exp.comb <- sum(perm_G * t(rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
perm_SS.Res <- sum(perm_G * (diag(n) - rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
return(perm_SS.Exp.comb / perm_SS.Res)
} ### This is an attempt to implement F2 in adonis2; I paste here the complete code of the function:
adonis2_F2 <- function(formula, data = NULL, permutations = 999, method = "bray",
strata = NULL, contr.unordered = "contr.sum", contr.ordered = "contr.poly",
parallel = getOption("mc.cores"), ...) {
EPS <- sqrt(.Machine$double.eps)
TOL <- 1e-07
Terms <- terms(formula, data = data)
lhs <- formula[[2]]
lhs <- eval(lhs, data, parent.frame())
formula[[2]] <- NULL
rhs.frame <- model.frame(formula, data, drop.unused.levels = TRUE)
op.c <- options()$contrasts
options(contrasts = c(contr.unordered, contr.ordered))
rhs <- model.matrix(formula, rhs.frame)
options(contrasts = op.c)
grps <- attr(rhs, "assign")
qrhs <- qr(rhs)
rhs <- rhs[, qrhs$pivot, drop = FALSE]
rhs <- rhs[, 1:qrhs$rank, drop = FALSE]
grps <- grps[qrhs$pivot][1:qrhs$rank]
u.grps <- unique(grps)
nterms <- length(u.grps) - 1
if (nterms < 1) stop("right-hand-side of formula has no usable terms")
# Calculate the total sum of squares (within-group and between-group)
if (inherits(lhs, "dist")) {
if (any(lhs < -TOL)) stop("dissimilarities must be non-negative")
dmat <- as.matrix(lhs^2)
} else {
dist.lhs <- as.matrix(vegdist(lhs, method = method, ...))
dmat <- dist.lhs^2
}
n <- nrow(dmat)
G <- -sweep(dmat, 1, rowMeans(dmat)) / 2
# Sums of squares between groups and residuals
SS.Exp.comb <- sum(G * t(rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
SS.Res <- sum(G * (diag(n) - rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
df.Exp <- length(u.grps) - 1
df.Res <- n - df.Exp - 1
# F2 statistic: between-group variance over residual variance
F.Mod <- SS.Exp.comb / SS.Res
########### Permutation procedure ###########
# Generate permutation matrix manually
p <- matrix(sample(1:n, size = permutations * n, replace = TRUE),
nrow = permutations, ncol = n)
# Permutation test function for F2
f.test <- function(dmat, rhs, p) {
perm_dmat <- dmat[p, p]
perm_G <- -sweep(perm_dmat, 1, rowMeans(perm_dmat)) / 2
perm_SS.Exp.comb <- sum(perm_G * t(rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
perm_SS.Res <- sum(perm_G * (diag(n) - rhs %*% solve(t(rhs) %*% rhs) %*% t(rhs)))
return(perm_SS.Exp.comb / perm_SS.Res)
}
# Generate F2 values for each permutation
f.perms <- sapply(1:permutations, function(j) f.test(dmat, rhs, p[j, ]))
# Calculate permutation p-value
P_val <- (sum(f.perms >= F.Mod) + 1) / (permutations + 1)
# Results - show observed F2 statistic and permutation p-value
SumsOfSqs = c(SS.Exp.comb, SS.Res, sum(SS.Exp.comb) + SS.Res)
tab <- data.frame(
Df = c(df.Exp, df.Res, n - 1),
SumsOfSqs = SumsOfSqs,
MeanSqs = c(SS.Exp.comb / df.Exp, SS.Res / df.Res, NA),
F.Model = c(F.Mod, NA, NA), # Observed F2 statistic for model
R2 = SumsOfSqs / SumsOfSqs[length(SumsOfSqs)],
P = c(P_val, NA, NA) # P-value from permutation test
)
# Update the ANOVA table with the correct row names
rownames(tab) <- c(attr(attr(rhs.frame, "terms"), "term.labels")[u.grps],
"Residuals", "Total")
colnames(tab)[ncol(tab)] <- "Pr(>F)"
# Return the output as class "anova"
class(tab) <- c("anova", class(tab))
return(tab)
} # I hope this works. All the best, Sirna-82 |
Anderson et al (2017: doi: 10.1111/anzs.12176) have proposed a modification to the pseudo F statistic in PERMANOVA that accounts for group heterogeneity.
The modified F is:
and provide an efficient way to get the group dispersions from the projection matrix that forms part of the calculation of F. This should fit nicely with our permutation-based testing.
Anderseon et al (2017) also provide two bootstrap-based testing options which we might explore, but these are of lesser priority.
Thoughts? (I can pass on the paper if you don't have access to it)
The text was updated successfully, but these errors were encountered: