Skip to content

Commit

Permalink
Add pseudocount to WMS fix V13 by removing samples with only 1 featur…
Browse files Browse the repository at this point in the history
…e - gingival dataset
  • Loading branch information
sdgamboa committed Jan 17, 2025
1 parent 8480221 commit a7f268e
Show file tree
Hide file tree
Showing 3 changed files with 427 additions and 113 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: MicrobiomeBenchmarkDataAnalyses
Title: Benchmarking analyses with the MicrobiomeBenchmarkData package
Version: 0.99.27
Version: 0.99.28
Authors@R:
person("Samuel David", "Gamboa-Tuz", email = "[email protected]",
role = c("aut", "cre"), comment = c(ORCID = "0000-0002-6863-7943"))
Expand Down
339 changes: 339 additions & 0 deletions vignettes/articles/HMP_2012_16S_gingival_V13.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,339 @@
---
title: "Subgingival vs supgragingival plaque (V13 - OTU)"
subtitle: "HMP_2012_16S_gingival_V13"
author: "Samuel Gamboa"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
html_document:
toc: yes
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
warning = FALSE
)
```

```{r packages, message=FALSE, warning=FALSE}
library(MicrobiomeBenchmarkDataAnalyses)
library(MicrobiomeBenchmarkData)
library(dplyr)
library(purrr)
library(phyloseq)
library(mia)
library(benchdamic)
library(ggplot2)
library(ggpubr)
```

## Data

Import data:

```{r import data, message=FALSE}
dat_name <- 'HMP_2012_16S_gingival_V13'
conditions_col <- 'body_subsite'
conditions <- c(condB = 'subgingival_plaque', condA = 'supragingival_plaque')
tse <- getBenchmarkData(dat_name, dryrun = FALSE)[[1]]
tse
```

Filter low-abundant features:

```{r filter features}
tse_subset <- filterTaxa(tse, min_ab = 1, min_per = 0.2)
tse_subset <- tse_subset[,which(!colSums(assay(tse_subset)) <= 1)] ## Needed for metagenomeSeq
tse_subset
```

Counts per body subsite:

```{r samples per body subsite}
col_data <- as_tibble(colData(tse_subset))
count(col_data, body_subsite)
```

Number of subjects:

```{r unique subjects}
length(unique(col_data$subject_id))
```

## Prior information

```{r feature annotations}
row_data <- as.data.frame(rowData(tse_subset))
prior_info <- row_data[, c('genus', 'taxon_annotation')]
prior_info$taxon_name <- rownames(row_data)
prior_info$new_names <- paste0(prior_info$taxon_name, '|', prior_info$genus)
prior_info <-
dplyr::relocate(prior_info, taxon_name, new_names, genus, taxon_annotation)
head(prior_info)
```

## Convert to phyloseq

```{r convert to phyloseq}
ps <- convertToPhyloseq(tse_subset)
sample_data(ps)[[conditions_col]] <-
factor(sample_data(ps)[[conditions_col]], levels = conditions)
ps
```

## Differential abundance analysis

Set normalization, weights, and DA method options:

```{r norm, weights, methods}
ps <- runNormalizations(set_norm_list(), ps, verbose = FALSE)
zw <- weights_ZINB(ps, design = conditions_col)
DA_methods <- set_DA_methods_list(conditions_col, conditions)
for (i in seq_along(DA_methods)) {
if (grepl("Seurat", names(DA_methods)[i])) {
names(DA_methods[[i]]$contrast) <- NULL
} else {
next
}
}
names(DA_methods)
```

Run DA analysis:

```{r run DA, warning=FALSE, message=FALSE}
tim <- system.time({
DA_output <- imap(DA_methods, ~ {
message("Running method ", .y, " - ", Sys.time())
tryCatch(
error = function(e) NULL,
runDA(list(.x), ps, weights = zw, verbose = FALSE)
)
}) |>
list_flatten(name_spec = "{outer}") |>
discard(is.null)
DA_output <- map2(DA_output, names(DA_output), ~ {
.x$name <- .y
.x
})
})
tim
```

## Enrichment analysis

### Define a threshold for LEFSE with CLR

Define threshold variables:

```{r define variables to use in createEnrichment}
direction <- get_direction_cols(DA_output, conditions_col, conditions)
adjThr<- rep(0.1, length(DA_output))
names(adjThr) <- names(DA_output)
esThr <- rep(0, length(DA_output))
names(esThr) <- names(DA_output)
esThr[grep("lefse.TSS", names(esThr))] <- 2
esThr[grep("lefse.CLR", names(esThr))] <-
median(DA_output$LEfSe.CLR$statInfo$abs_score)
slotV <- ifelse(grepl("lefse", names(DA_output)), "statInfo", "pValMat")
colNameV <- ifelse(grepl("lefse", names(DA_output)), "LDA_scores", "adjP")
typeV <- ifelse(grepl("lefse", names(DA_output)), "logfc", "pvalue")
```

Run enrichment:

```{r perform enrichment}
enrichment <- createEnrichment(
object = DA_output,
priorKnowledge = prior_info,
enrichmentCol = "taxon_annotation",
namesCol = "new_names",
slot = slotV, colName = colNameV, type = typeV,
direction = direction,
threshold_pvalue = adjThr,
threshold_logfc = esThr,
top = NULL, # No top feature selected
alternative = "greater",
verbose = FALSE
)
```

Extract summary of the enrichment analysis:

```{r enrichment summary}
enrichmentSummary <- purrr::map(enrichment, ~ {
.x$summaries |>
purrr::map(function(x) {
x |>
tibble::rownames_to_column(var = "direction") |>
tidyr::pivot_longer(
names_to = "annotation", values_to = "n",
cols = 2
)
}) |>
dplyr::bind_rows() |>
dplyr::relocate(pvalue)
}) |>
dplyr::bind_rows(.id = "method") |>
dplyr::mutate(
sig = dplyr::case_when(
pvalue < 0.05 & pvalue > 0.01 ~ "*",
pvalue < 0.01 & pvalue > 0.001 ~ "**",
pvalue < 0.001 ~ "***",
TRUE ~ ""
)
) |>
dplyr::mutate(
direction = dplyr::case_when(
direction == "DOWN Abundant" ~ "Subgingival",
direction == "UP Abundant" ~ "Supragingival",
TRUE ~ direction
)
)
head(enrichmentSummary)
```

## Plots

### Enrichment plot

```{r enrichment plot}
nn <- unique(enrichmentSummary$annotation)
nn <- nn[!is.na(nn)]
colorPalette <- palette.colors(palette = "Okabe-Ito")[2:(length(nn) + 1)]
enPlot <- enrichmentSummary |>
dplyr::left_join(getMethodClass(), by = "method") |>
mutate(
direction = factor(direction, levels = c("Supragingival", "Subgingival")),
annotation = factor(annotation, levels = nn)
) |>
mutate(
annotation = case_when(
annotation == "aerobic" ~ "Aerobic",
annotation == "anaerobic" ~ "Anaerobic",
annotation == "facultative_anaerobic" ~ "Facultative anaerobic",
TRUE ~ annotation
)
) |>
ggplot(aes(method, n)) +
geom_col(
aes(fill = annotation),
position = position_dodge2(width = 0.9)
) +
geom_text(
aes(label = sig, color = annotation),
position = position_dodge2(width = 0.9)
) +
facet_grid(
direction ~ method_class, scales = "free_x", space = "free"
) +
scale_fill_manual(values = colorPalette, name = "Annotation") +
scale_color_manual(values = colorPalette, name = "Annotation") +
labs(
x = "DA method", y = "Number of DAFs"
) +
theme_bw() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom",
strip.background = element_rect(fill = "white")
)
```

### Putative true positives - putative false positives

Calculate TP - FP ratio (no threshold):

```{r positives object}
positives <- map(1:length(DA_output), .f = function(i) {
positives <- createPositives(
object = DA_output[i],
priorKnowledge = prior_info,
enrichmentCol = "taxon_annotation", namesCol = "new_names",
slot = slotV[i], colName = colNameV[i], type = typeV[i],
direction = direction[i],
threshold_pvalue = 1,
threshold_logfc = 0,
top = seq.int(from = 0, to = 50, by = 5),
alternative = "greater",
verbose = FALSE,
TP = list(c("DOWN Abundant", "anaerobic"), c("UP Abundant", "aerobic")),
FP = list(c("DOWN Abundant", "aerobic"), c("UP Abundant", "anaerobic"))
) |>
dplyr::left_join(getMethodClass(), by = 'method')
}) |> bind_rows() |>
mutate(diff = jitter(TP - FP, amount = 1.5, factor = 2))
```

Positives plot:

```{r positives plot, fig.height = 4, fig.width=12}
# positives <- positives |>
# mutate(diff = jitter(TP - FP, amount = 1.5, factor = 2)) |>
# mutate(
# base_method = case_when(
# grepl("lefse", base_method) ~ sub("lefse", "LEfSe", base_method),
# grepl("wilcox", base_method) ~ sub("wilcox", "Wilcox", base_method),
# TRUE ~ base_method
# ),
# method = case_when(
# grepl("lefse", method) ~ sub("lefse", "LEfSe", method),
# grepl("wilcox", method) ~ sub("wilcox", "Wilcox", method),
# TRUE ~ method
# )
# )
vec <- positives$color
names(vec) <- positives$base_method
posPlot <- positives |>
ggplot(aes(top, diff)) +
geom_line(
aes(
group = method, color = base_method, linetype = norm,
),
) +
geom_point(
aes(
color = base_method, shape = norm
),
) +
facet_wrap(~method_class, nrow = 1) +
labs(
x = "Top DAFs", y = "TP - FP"
) +
scale_shape(name = "Normalization") +
scale_linetype(name = "Normalization") +
scale_color_manual(values = vec, name = "Base DA method") +
theme_bw() +
theme(
legend.position = "bottom",
strip.background = element_rect(fill = "white")
)
```

### Combined plots

```{r combined plots, fig.height=9, fig.width=10}
pp <- ggarrange(
plotlist = list(enPlot, posPlot), ncol = 1, heights = c(1.5, 1)
)
pp
```

```{r export plots, eval=TRUE, echo=FALSE}
## Nice figure to explore. Not included in the paper
# ggsave(
# filename = "FigureX.pdf", plot = pp, dpi = 300,
# height = 9, width = 10,
# )
```

# Session Info

```{r session information}
sessioninfo::session_info()
```
Loading

0 comments on commit a7f268e

Please sign in to comment.