-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update vignette with all data for V1-3
- Loading branch information
Showing
9 changed files
with
775 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
Package: MicrobiomeBenchmarkDataAnalyses | ||
Title: Benchmarking analyses with the MicrobiomeBenchmarkData package | ||
Version: 0.99.20 | ||
Version: 0.99.21 | ||
Authors@R: | ||
person("Samuel David", "Gamboa-Tuz", email = "[email protected]", | ||
role = c("aut", "cre"), comment = c(ORCID = "0000-0002-6863-7943")) | ||
|
@@ -50,3 +50,4 @@ Depends: | |
R (>= 4.2) | ||
URL: https://github.com/waldronlab/MicrobiomeBenchmarkDataAnalyses | ||
BugReports: https://github.com/waldronlab/MicrobiomeBenchmarkDataAnalyses/issues | ||
Config/Needs/website: rmarkdown |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
*.html | ||
*.R |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,359 @@ | ||
--- | ||
title: " HMP_2012_16S_gingival_V35 - subgingival vs supgragingival plaque - whole" | ||
output: | ||
html_document: | ||
toc: true | ||
--- | ||
|
||
```{r setup, include = FALSE} | ||
knitr::opts_chunk$set( | ||
collapse = TRUE, | ||
comment = "#>", | ||
message = FALSE, | ||
warning = FALSE | ||
) | ||
``` | ||
|
||
```{r load packages, message=FALSE, warning=FALSE} | ||
library(MicrobiomeBenchmarkDataAnalyses) | ||
library(MicrobiomeBenchmarkData) | ||
library(dplyr) | ||
library(purrr) | ||
library(phyloseq) | ||
library(mia) | ||
library(benchdamic) | ||
library(ggplot2) | ||
library(ggpubr) | ||
``` | ||
|
||
+ Analyses were carried out in a subset of subjects: subjects with samples | ||
taken for both subgingival and supragingival body subsites in the same visit. | ||
Only one visit was considered per subject. | ||
|
||
+ All analyses were performed at the OTU level. | ||
|
||
## Data | ||
|
||
```{r import data} | ||
dat_name <- 'HMP_2012_16S_gingival_V13' | ||
conditions_col <- 'body_subsite' | ||
conditions <- c(condB = 'subgingival_plaque', condA = 'supragingival_plaque') | ||
tse <- getBenchmarkData(dat_name, dryrun = FALSE)[[1]] | ||
tse_subset <- filterTaxa(tse,min_ab = 1, min_per = 0.2) | ||
tse_subset | ||
``` | ||
|
||
Unique subjects: | ||
|
||
```{r extract subjects} | ||
col_data <- tse_subset |> | ||
colData() |> | ||
as.data.frame() |> | ||
tibble::rownames_to_column("sample_name") |> | ||
as_tibble() | ||
col_data |> | ||
count(body_subsite) | ||
``` | ||
```{r} | ||
subjects <- col_data |> | ||
pull(subject_id) |> | ||
unique() | ||
length(subjects) | ||
``` | ||
|
||
# Prior information | ||
|
||
```{r prior information} | ||
row_data <- as.data.frame(rowData(tse_subset)) | ||
prior_info <- row_data[, c('genus', 'taxon_annotation')] | ||
prior_info$taxon_name <- rownames(row_data) | ||
prior_info$new_names <- paste0(prior_info$taxon_name, '|', prior_info$genus) | ||
prior_info <- | ||
dplyr::relocate(prior_info, taxon_name, new_names, genus, taxon_annotation) | ||
head(prior_info) | ||
``` | ||
|
||
Convert to phyloseq | ||
|
||
```{r convert to phyloseq} | ||
ps <- convertToPhyloseq(tse_subset) | ||
sample_data(ps)[[conditions_col]] <- | ||
factor(sample_data(ps)[[conditions_col]], levels = conditions) | ||
ps | ||
``` | ||
|
||
## Differential abundance analysis | ||
|
||
Perform normalization, calculate weights, and select DA methods: | ||
|
||
```{r norm, weights, methods} | ||
ps2 <- runNormalizations(set_norm_list()[2:4], ps, verbose = FALSE) | ||
zw <- weights_ZINB(ps, design = conditions_col) | ||
DA_methods <- set_DA_methods_list(conditions_col, conditions) | ||
DA_methods <- DA_methods[grep("metagenomeSeq", names(DA_methods), invert = TRUE)] | ||
for (i in seq_along(DA_methods)) { | ||
## This was a nacessary change for when the version increased: | ||
if (grepl("Seurat", names(DA_methods)[i])) { | ||
names(DA_methods[[i]]$contrast) <- NULL | ||
} else { | ||
next | ||
} | ||
} | ||
names(DA_methods) | ||
``` | ||
|
||
Run all of the differential analysis (DA) methods: | ||
|
||
```{r run DA, warning=FALSE} | ||
tim <- system.time({ | ||
DA_output <- vector("list", length(DA_methods)) | ||
for (i in seq_along(DA_output)) { | ||
message( | ||
"Running method ", i, ": ", names(DA_methods)[i], " - ", Sys.time() | ||
) | ||
DA_output[[i]] <- tryCatch( | ||
error = function(e) NULL, | ||
runDA(DA_methods[i], ps, weights = zw, verbose = FALSE) | ||
) | ||
} | ||
DA_output <- purrr::list_flatten(DA_output, name_spec = "{inner}") | ||
DA_output <- purrr::discard(DA_output, is.null) | ||
}) | ||
tim | ||
``` | ||
|
||
## Enrichment analysis | ||
|
||
### Define a threshold for LEFSE with CLR | ||
|
||
Lefser uses two values to define differential abundant taxa, p-value and LDA. | ||
|
||
```{r LEFSER TSS scores hist} | ||
DA_output$lefse.TSS$statInfo$abs_score |> hist() | ||
``` | ||
|
||
```{r LEFSE CLR scores hist} | ||
DA_output$lefse.CLR$statInfo$abs_score |> hist() | ||
``` | ||
|
||
```{r check median scores} | ||
c( | ||
lefse.TSS = median(DA_output$lefse.TSS$statInfo$abs_score), | ||
lefse.CLR = median(DA_output$lefse.CLR$statInfo$abs_score) | ||
) | ||
``` | ||
|
||
Create variables of thresholds: | ||
|
||
```{r define variables to use in createEnrichment} | ||
direction <- get_direction_cols(DA_output, conditions_col, conditions) | ||
adjThr<- rep(0.1, length(DA_output)) | ||
names(adjThr) <- names(DA_output) | ||
esThr <- rep(0, length(DA_output)) | ||
names(esThr) <- names(DA_output) | ||
esThr[grep("lefse.TSS", names(esThr))] <- 2 | ||
esThr[grep("lefse.CLR", names(esThr))] <- 0.06 | ||
slotV <- ifelse(grepl("lefse", names(DA_output)), "statInfo", "pValMat") | ||
colNameV <- ifelse(grepl("lefse", names(DA_output)), "LDA_scores", "adjP") | ||
typeV <- ifelse(grepl("lefse", names(DA_output)), "logfc", "pvalue") | ||
``` | ||
|
||
Run enrichment: | ||
|
||
```{r perform enrichment} | ||
enrichment <- createEnrichment( | ||
object = DA_output, | ||
priorKnowledge = prior_info, | ||
enrichmentCol = "taxon_annotation", | ||
namesCol = "new_names", | ||
slot = slotV, colName = colNameV, type = typeV, | ||
direction = direction, | ||
threshold_pvalue = adjThr, | ||
threshold_logfc = esThr, | ||
top = NULL, # No top feature selected | ||
alternative = "greater", | ||
verbose = FALSE | ||
) | ||
``` | ||
|
||
Extract summary of the enrichment analysis: | ||
|
||
```{r enrichment summary} | ||
enrichmentSummary <- purrr::map(enrichment, ~ { | ||
.x$summaries |> | ||
purrr::map(function(x) { | ||
pos <- which(colnames(x) != "pvalue") | ||
x |> | ||
tibble::rownames_to_column(var = "direction") |> | ||
tidyr::pivot_longer( | ||
names_to = "annotation", values_to = "n", | ||
cols = 2 | ||
) | ||
}) |> | ||
dplyr::bind_rows() |> | ||
dplyr::relocate(pvalue) | ||
}) |> | ||
dplyr::bind_rows(.id = "method") |> | ||
dplyr::mutate( | ||
sig = dplyr::case_when( | ||
pvalue < 0.05 & pvalue > 0.01 ~ "*", | ||
pvalue < 0.01 & pvalue > 0.001 ~ "**", | ||
pvalue < 0.001 ~ "***", | ||
TRUE ~ "" | ||
) | ||
) |> | ||
dplyr::mutate( | ||
direction = dplyr::case_when( | ||
direction == "DOWN Abundant" ~ "Subgingival", | ||
direction == "UP Abundant" ~ "Supragingival", | ||
TRUE ~ direction | ||
) | ||
) | ||
head(enrichmentSummary) | ||
``` | ||
|
||
## Plots | ||
|
||
### Enrichment plot | ||
|
||
```{r enrichment plot} | ||
enPlot <- enrichmentSummary |> | ||
dplyr::left_join(get_meth_class(), by = "method") |> | ||
mutate( | ||
direction = factor( | ||
direction, levels = c("Supragingival", "Subgingival") | ||
) | ||
) |> | ||
mutate( | ||
method = case_when( | ||
grepl("lefse", method) ~ sub("lefse", "LEfSe", method), | ||
grepl("wilcox", method) ~ sub("wilcox", "Wilcox", method), | ||
TRUE ~ method | ||
) | ||
) |> | ||
mutate( | ||
annotation = case_when( | ||
annotation == "aerobic" ~ "Aerobic", | ||
annotation == "anaerobic" ~ "Anaerobic", | ||
annotation == "facultative_anaerobic" ~ "Facultative anaerobic", | ||
TRUE ~ annotation | ||
) | ||
) |> | ||
ggplot(aes(method, n)) + | ||
geom_col( | ||
aes(fill = annotation), | ||
position = position_dodge2(width = 0.9) | ||
) + | ||
geom_text( | ||
aes(label = sig, color = annotation), | ||
position = position_dodge2(width = 0.9) | ||
) + | ||
facet_grid( | ||
direction ~ method_class, scales = "free_x", space = "free" | ||
) + | ||
scale_fill_viridis_d(option = "D", name = "Biological data") + | ||
scale_color_viridis_d(option = "D", name = "Biological data") + | ||
labs( | ||
x = "DA method", y = "Number of DA taxa" | ||
) + | ||
theme_minimal() + | ||
theme( | ||
axis.text.x = element_text(angle = 45, hjust = 1), | ||
legend.position = "bottom" | ||
) | ||
``` | ||
|
||
### Putative true positives - putative false positives | ||
|
||
Calculate TP - FP ratio (no threshold) | ||
|
||
```{r positives object} | ||
positives <- map(1:length(DA_output), .f = function(i) { | ||
positives <- createPositives( | ||
object = DA_output[i], | ||
priorKnowledge = prior_info, | ||
enrichmentCol = "taxon_annotation", namesCol = "new_names", | ||
slot = slotV[i], colName = colNameV[i], type = typeV[i], | ||
direction = direction[i], | ||
threshold_pvalue = 1, | ||
threshold_logfc = 0, | ||
top = seq.int(from = 0, to = 50, by = 5), | ||
alternative = "greater", | ||
verbose = FALSE, | ||
TP = list(c("DOWN Abundant", "anaerobic"), c("UP Abundant", "aerobic")), | ||
FP = list(c("DOWN Abundant", "aerobic"), c("UP Abundant", "anaerobic")) | ||
) |> | ||
dplyr::left_join(get_meth_class(), by = 'method') | ||
}) |> bind_rows() | ||
``` | ||
|
||
Positives plot: | ||
|
||
```{r positives plot, fig.height = 4, fig.width=12} | ||
# names(vec) <- positives$base_method | ||
positives <- positives |> | ||
mutate(diff = jitter(TP - FP, amount = 1.5, factor = 2)) |> | ||
mutate( | ||
base_method = case_when( | ||
grepl("lefse", base_method) ~ sub("lefse", "LEfSe", base_method), | ||
grepl("wilcox", base_method) ~ sub("wilcox", "Wilcox", base_method), | ||
TRUE ~ base_method | ||
), | ||
method = case_when( | ||
grepl("lefse", method) ~ sub("lefse", "LEfSe", method), | ||
grepl("wilcox", method) ~ sub("wilcox", "Wilcox", method), | ||
TRUE ~ method | ||
) | ||
) | ||
vec <- positives$color | ||
names(vec) <- positives$base_method | ||
posPlot <- positives |> | ||
ggplot(aes(top, diff)) + | ||
geom_line( | ||
aes( | ||
group = method, color = base_method, linetype = norm, | ||
), | ||
) + | ||
geom_point( | ||
aes( | ||
color = base_method, shape = norm | ||
), | ||
) + | ||
facet_wrap(~method_class, nrow = 1) + | ||
labs( | ||
x = "Top DA features", y = "TP - FP" | ||
) + | ||
scale_shape(name = "Normalization") + | ||
scale_linetype(name = "Normalization") + | ||
scale_color_manual(values = vec, name = "Base method") + | ||
theme_minimal() + | ||
theme(legend.position = "bottom") | ||
``` | ||
|
||
### Combined plots | ||
|
||
```{r combined plots, fig.height=9, fig.width=10} | ||
pp <- ggarrange( | ||
plotlist = list(enPlot, posPlot), ncol = 1, heights = c(1.5, 1) | ||
) | ||
pp | ||
``` | ||
|
||
```{r export plots, eval=TRUE, echo=FALSE} | ||
## Export the figure | ||
ggsave( | ||
filename = "Figure1.pdf", plot = pp, dpi = 300, | ||
height = 9, width = 11, | ||
) | ||
``` | ||
|
||
# Session Info | ||
|
||
```{r session information} | ||
sessioninfo::session_info() | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.