-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVarPart_crumblr.R
671 lines (510 loc) · 22.1 KB
/
VarPart_crumblr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
# ------------------------------------------------------------------------------
# Project: Single-cell Multi-Modal Analysis - filtering and clustering
# Date: August 2024
# Script name: VarPart_crumblr.R
# Description: VariancePartion analysis and crumblr
#
# outputs: Potential plots for Fig. 1e, Fig.5 b
# ------------------------------------------------------------------------------
# -------------------------------
# Setup and Library Loading
# -------------------------------
# Set library paths (optional)
.libPaths()
# Load Required Libraries
## Dreamlet Universe: For multi-omics analysis
library(dreamlet) # Dreamlet package for mixed models
library(crumblr) # Crumblr for correlation modeling
library(zenith) # Zenith for data integration
## Data Input/Output: For handling single-cell data
library(SingleCellExperiment) # SingleCellExperiment object
library(zellkonverter) # Conversion between SingleCellExperiment and other formats
library(tidyr) # Data tidying
## Plotting: For visualization
library(ggplot2) # Data visualization
library(dplyr) # Data manipulation
library(RColorBrewer) # Color palettes
library(cowplot) # Plot composition
library(ggtree) # Tree visualization
library(aplot) # Advanced plotting
## Metadata Analysis: For meta-analysis
library(muscat) # Multi-level analysis of single-cell data
library(metafor) # Meta-analysis
library(broom) # Convert statistical analysis objects into tidy tibbles
library(tidyverse) # Collection of R packages for data science
## Gene Set Enrichment Analysis (GSEA): For pathway analysis
library(topGO) # Gene ontology enrichment
library(enrichR) # Enrichr API
# Initialize Enrichr settings
listEnrichrSites() # List available Enrichr sites
setEnrichrSite("Enrichr") # Set the Enrichr site to Human genes
websiteLive <- TRUE # Flag to check if Enrichr website is accessible
dbs <- listEnrichrDbs() # List available databases in Enrichr
# Check if Enrichr website is accessible
if (is.null(dbs)) {
websiteLive <- FALSE
}
# Display available databases if website is live
if (websiteLive) {
head(dbs)
}
# Additional libraries for enrichment analysis
library(clusterProfiler) # ClusterProfiler for enrichment analysis
library(enrichplot) # Enrichment plot visualization
# -------------------------------
# Function Definitions
# -------------------------------
#' Perform Meta-Analysis on a List of Data Tables
#'
#' This function performs meta-analysis on a list of data tables, computing effect sizes
#' and adjusting p-values for multiple testing.
#'
#' @param tabList A list of data frames, each containing gene-level statistics.
#' @return A tibble with meta-analysis results, including effect sizes and FDR-adjusted p-values.
meta_analysis <- function(tabList) {
# Assign default names if none are provided
if (is.null(names(tabList))) {
names(tabList) <- as.character(seq(length(tabList)))
}
# Add a 'Dataset' column to each data frame in the list
for (key in names(tabList)) {
tabList[[key]]$Dataset <- key
}
# Combine all data frames into one
df <- do.call(rbind, tabList)
# Perform meta-analysis for each assay
results <- df %>%
as_tibble() %>%
group_by(assay) %>%
do(tidy(rma(yi = logFC, sei = logFC / t, data = ., method = "FE"))) %>%
select(-term, -type) %>%
ungroup() %>%
mutate(FDR = p.adjust(p.value, method = "fdr")) %>%
mutate(log10FDR = -log10(FDR))
return(results)
}
#' Plot Phylogenetic Tree
#'
#' This function plots a phylogenetic tree using ggtree with customizable color scales.
#'
#' @param tree A phylogenetic tree object.
#' @param low Color for low values (default: "grey90").
#' @param mid Color for mid values (default: "red").
#' @param high Color for high values (default: "darkred").
#' @param xmax.scale Scale factor to increase the x-axis limit (default: 1.5).
#' @return A ggplot object representing the tree.
plotTree <- function(tree, low = "grey90", mid = "red", high = "darkred", xmax.scale = 1.5) {
# Create base tree plot
fig <- ggtree(tree, branch.length = "none") +
geom_tiplab(color = "black", size = 4, hjust = 0, offset = 0.2) +
theme(legend.position = "top left", plot.title = element_text(hjust = 0.5))
# Get current maximum x-axis value
xmax <- layer_scales(fig)$x$range$range[2]
# Adjust x-axis limits
fig <- fig + xlim(0, xmax * xmax.scale)
return(fig)
}
#' Plot Effect Sizes
#'
#' This function creates a plot of effect sizes with error bars and significance indicators.
#'
#' @param tab Data frame containing the results (must include 'estimate', 'std.error', 'FDR', and 'assay' columns).
#' @param coef The coefficient name used in the analysis.
#' @param fig.tree A tree object used for ordering the cell types.
#' @param low Color for low FDR (default: "grey90").
#' @param mid Color for mid FDR (default: "red").
#' @param high Color for high FDR (default: "darkred").
#' @param ylab Label for the y-axis.
#' @param tick_size Size of the axis tick labels.
#' @return A ggplot object representing the effect sizes.
plotCoef2 <- function(tab, coef, fig.tree, low = "grey90", mid = "red", high = "darkred", ylab, tick_size) {
# Prepare data
tab$logFC <- tab$estimate
tab$celltype <- factor(tab$assay, levels = rev(ggtree::get_taxa_name(fig.tree)))
tab$se <- tab$std.error
# Create effect size plot
fig.es <- ggplot(tab, aes(x = celltype, y = logFC)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey", linewidth = 1) +
geom_errorbar(aes(ymin = logFC - 1.96 * se, ymax = logFC + 1.96 * se), width = 0) +
geom_point2(aes(color = pmin(4, -log10(FDR)), size = pmin(4, -log10(FDR)))) +
scale_color_gradient2(
name = expression(-log[10]~FDR),
limits = c(0, 4),
low = low,
mid = mid,
high = high,
midpoint = -log10(0.01)
) +
scale_size_area(name = expression(-log[10]~FDR), limits = c(0, 4)) +
geom_text2(aes(label = '+', subset = FDR < 0.05), color = "white", size = 6, vjust = 0.3, hjust = 0.5) +
theme_classic() +
coord_flip() +
xlab('') +
ylab(ylab) +
theme(
axis.text.y = element_blank(),
axis.text = element_text(size = 12),
axis.ticks.y = element_blank(),
text = element_text(size = tick_size)
) +
scale_y_continuous(breaks = scales::breaks_pretty(3))
return(fig.es)
}
#' Select Top Genes for Enrichment Analysis
#'
#' This function selects the top N genes for each covariate based on the specified criteria.
#'
#' @param VP.LST List containing variance partition data.
#' @param cova Vector of covariate names.
#' @param topN_genes Number of top genes to select for each covariate.
#' @param analysis Description of the analysis being performed.
#' @return A data frame with selected genes and metadata.
SelectGenes <- function(VP.LST, cova, topN_genes, analysis) {
DF <- data.frame()
for (i in cova) {
print(i)
# Select top N genes based on the covariate
top_genes <- VP.LST %>%
as_tibble() %>%
arrange(desc(!!sym(i))) %>%
slice_head(n = topN_genes)
Gene <- top_genes$gene
# Create a data frame for the selected genes
df <- data.frame(
Gene = Gene,
covariate = i,
analysis = analysis,
topN_genes = topN_genes,
stringsAsFactors = FALSE
)
DF <- rbind(DF, df)
}
# Note: The following line seems to overwrite 'topN_genes' with 'gene', which might be unintended.
# Verify if this is desired or if it should be removed/commented out.
# DF$topN_genes <- DF$gene
return(DF)
}
#' Retrieve Expression Values for a Gene
#'
#' This function extracts expression values for a specified gene from a data frame.
#'
#' @param dataframe Data frame containing gene expression data.
#' @param Gene_name Name of the gene to retrieve.
#' @return A list of expression values for the specified gene.
ListExpression <- function(dataframe, Gene_name) {
# Subset the data frame for the specified gene
test <- subset(dataframe, subset = Gene == Gene_name)
# Remove the last column if it's non-expression data (adjust as needed)
test.df <- test[head(seq_len(ncol(test)), -1)]
# Transpose and convert to a list
test.list <- as.list(as.data.frame(t(test.df)))
return(test.list)
}
#' Prepare Data for Dotplot Visualization
#'
#' This function filters and selects genes for dotplot visualization based on specified criteria.
#'
#' @param Df Data frame containing enrichment results.
#' @param Database Name of the enrichment database to filter.
#' @param numGenes Number of genes to select.
#' @param numGenesCutoff Maximum number of genes to include after cutoff.
#' @return A filtered data frame ready for dotplot visualization.
PrepforDotplot <- function(Df, Database, numGenes, numGenesCutoff) {
# Filter data for the specified database
Df_bio_proc <- Df[which(Df$database == Database), ]
# Count the number of genes per row (assuming 'Genes' are separated by ';')
CNT <- sapply(Df_bio_proc$Genes, function(gens) length(strsplit(gens, split = ";")[[1]]))
Df_bio_proc$Gene_count <- CNT
D0 <- data.frame()
# Select top genes for each 'Top_DE'
for (i in unique(Df_bio_proc$Top_DE)) {
d <- Df_bio_proc[which(Df_bio_proc$Top_DE == i), ]
d0 <- d %>% top_n(n = -numGenes, wt = Adjusted.P.value)
print(paste("Number of genes after initial filtering for", i, ":", nrow(d0)))
if (nrow(d0) > numGenesCutoff) {
d0 <- head(d0, numGenesCutoff)
}
D0 <- rbind(D0, d0)
}
return(D0)
}
#' Run Gene Ontology (GO) Enrichment Analysis
#'
#' This function performs GO enrichment analysis for each covariate and visualizes the results.
#'
#' @param df_corr Data frame containing correlated genes and covariates.
#' @return A combined data frame with enrichment results.
runGOEnrichment <- function(df_corr) {
Df <- data.frame()
# Iterate over each covariate
for (i in unique(df_corr$covariate)) {
temp <- df_corr[df_corr$covariate == i, ]
Cluster_DE <- i
print(paste("Performing enrichment for:", Cluster_DE))
GENES <- temp$Gene
# Define enrichment databases
dbs <- c("GO_Molecular_Function_2021", "GO_Biological_Process_2021",
"WikiPathway_2021_Human", "Elsevier_Pathway_Collection", "Reactome_2022")
# Perform enrichment if the website is accessible
if (websiteLive) {
enriched <- enrichr(GENES, dbs)
} else {
warning("Enrichr website is not accessible. Skipping enrichment analysis.")
next
}
# Process each enrichment result
for (j in seq_along(enriched)) {
enriched[[j]]$Top_DE <- Cluster_DE
enriched[[j]]$database <- dbs[j]
# Extract GO terms and numbers
TEXT <- sapply(enriched[[j]]$Term, function(term) strsplit(term, split = "\\(")[[1]][1])
GOT <- sapply(enriched[[j]]$Term, function(term) {
parts <- strsplit(term, split = "\\(")[[1]]
if (length(parts) > 1) {
strsplit(parts[2], split = "\\)")[[1]][1]
} else {
NA
}
})
enriched[[j]]$GO_text <- TEXT
enriched[[j]]$GO_number <- GOT
}
# Combine all enriched data
Df <- rbind(Df, do.call(rbind, enriched))
# Generate enrichment plots
if (websiteLive) {
p0 <- plotEnrich(enriched[[1]], showTerms = 20, numChar = 50,
y = "Count", orderBy = "Adjusted.P.value")
p1 <- plotEnrich(enriched[[2]], showTerms = 20, numChar = 50,
y = "Count", orderBy = "Adjusted.P.value")
p3 <- plotEnrich(enriched[[3]], showTerms = 20, numChar = 50,
y = "Count", orderBy = "Adjusted.P.value")
p4 <- plotEnrich(enriched[[4]], showTerms = 20, numChar = 50,
y = "Count", orderBy = "Adjusted.P.value")
p5 <- plotEnrich(enriched[[5]], showTerms = 20, numChar = 50,
y = "Count", orderBy = "Adjusted.P.value")
# Set plot dimensions
options(repr.plot.width = 20, repr.plot.height = 8)
# Combine and print plots
print(p0 + p1)
print(p3 + p4)
print(p1 + p5)
}
}
return(Df)
}
# --------------------------------------------
# Main Analysis Workflow for VariancePartition
# --------------------------------------------
# --- 1) Prepare data and convert to pseudobulk
# Load data; from Seurat object - DietSeurat() to single assay and then save as h5ad
# Create pseudobulk data by specifying cluster_id and sample_id
# Count data for each cell type is then stored in the `assay` field
# assay: entry in assayNames(sce) storing raw counts
# cluster_id: variable in colData(sce) indicating cell clusters
# sample_id: variable in colData(sce) indicating sample id for aggregating cells
pb <- aggregateToPseudoBulk(sce,
assay = "counts",
cluster_id = "cell",
sample_id = "id",
verbose = FALSE
)
# one 'assay' per cell type
assayNames(pb)
# --- 2) Perform preprocessing
# Define the model formula with fixed and random effects
form <- ~ (Age) + (1 | Brain.region) + (1 | Sex) + (1 | Brain.bank) + (1 | Individual)
# Normalize and apply voom/voomWithDreamWeights
res.proc <- processAssays(pb, form, min.count = 5)
# the resulting object of class dreamletProcessedData stores
# normalized data and other information
res.proc
# view details of dropping samples
details(res.proc)
# show voom plot for each cell clusters
plotVoom(res.proc)
#----- 3) Variance Partition analysis
# run variance partitioning analysis
vp.lst <- fitVarPart(res.proc, form)
# --- 4) Generate plots from Variance Partition
# Set plot dimensions for variance partition plot
options(repr.plot.width = 10, repr.plot.height = 10)
# Plot variance partition results
plotVarPart(sortCols(vp.lst), label.angle = 60, ncol = 4)
# --------------------------------------------
# Main Analysis Workflow for crumblr
# --------------------------------------------
# --- 1) Prepare data for pseudobulk
pb_celltype <- pb #use pseudobulk from VariancePartition analysis
# --- 2) Crumblr on Pseudobulk Data
# Create crumblr object from cell counts
cobj <- crumblr(cellCounts(pb_celltype))
# Define the model formula with fixed and random effects
form <- ~ (Age) + (1 | Brain.region) + (1 | Sex) + (1 | Brain.bank) + (1 | Individual)
# Fit variance partition model and extract results
vp.c <- fitExtractVarPartModel(cobj, form, colData(pb_celltype))
# Set plot dimensions for variance partition plot
options(repr.plot.width = 6, repr.plot.height = 5)
# Plot variance partition results for crumblr
plotVarPart(sortCols(vp.c), label.angle = 60, ncol = 4)
# Create percent bars plot for variance partition
fig.vp <- plotPercentBars(sortCols(vp.c))
fig.vp
# --- 3) Analysis with dream()
# Define the model formula (redundant; already defined earlier)
# form <- ~ (Age) + (1 | Brain.region) + (1 | Sex) + (1 | Brain.bank) + (1 | Individual)
# Fit dream model using the crumblr object and the formula
fit <- dream(cobj, form, colData(pb_celltype))
# Apply empirical Bayes moderation
fit <- eBayes(fit)
# --- 4) Meta-Analysis
# Define prefix for output files
prefix <- 'Age_analysis_'
# Extract top table for the 'Age' coefficient
res.age <- topTable(fit, coef = 'Age', number = Inf) %>%
rownames_to_column('assay')
# Perform meta-analysis on the 'Age' results
res.age <- meta_analysis(list(res.age))
head(res.age)
# View unique coefficients (for verification)
unique(fit$coefficients)
# Build cluster tree from pseudobulk data
hc <- buildClusterTreeFromPB(pb_celltype)
# Plot the phylogenetic tree with increased x-axis scale and adjust legend position
fig.tree <- plotTree(ape::as.phylo(hc), xmax.scale = 2.2) + theme(legend.position = "bottom")
# --- 4) Effect Size Plotting
# Create effect size plot for 'Age' meta-analysis results
fig.es1 <- plotCoef2(res.age, coef = 'Age', fig.tree, ylab = 'Age', tick_size = 12)
# --- 5) Combine Plots
# Set plot dimensions
options(repr.plot.width = 8, repr.plot.height = 5)
# Combine effect size plot and tree plot
combined_plot <- fig.es1 %>% insert_left(fig.tree, width = 1.3)
# Save the combined plot as a PDF
ggsave(filename = paste0(prefix, '_controls_age_sex.pdf'), plot = combined_plot, width = 13, height = 5)
# --------------------------------------------------------------------
# Main Analysis Workflow for Gene Set Enrichment Analysis (GSEA)
# --------------------------------------------------------------------
# The following steps for GSEA are outlined via function definitions above.
# To execute GSEA, you would typically:
# 1. Select top genes using `SelectGenes()`
# 2. Retrieve their expression values using `ListExpression()`
# 3. Prepare data for visualization using `PrepforDotplot()`
# 4. Run enrichment analysis using `runGOEnrichment()`
# Example (assuming you have appropriate data):
# selected_genes <- SelectGenes(VP.LST = vp.c, cova = c("Age"), topN_genes = 100, analysis = "Age Analysis")
# expression_values <- ListExpression(dataframe = your_expression_data, Gene_name = "GeneX")
# dotplot_data <- PrepforDotplot(Df = your_enrichment_results, Database = "GO_Molecular_Function_2021", numGenes = 20, numGenesCutoff = 50)
# enrichment_results <- runGOEnrichment(df_corr = your_correlated_genes)
# Adjust and execute these steps based on your specific data and analysis needs.
# GSEA analysis functions
SelectGenes <- function(VP.LST, cova, topN_genes, analysis) {
DF <- data.frame()
for (i in cova) {
print(i)
VP.LST %>%
as_tibble() %>%
arrange(desc(!!sym(i))) %>%
slice_head(n = topN_genes) -> top100
Gene <- top100$gene
df <- as.data.frame(Gene)
df$covariate <- i
df$analysis <- analysis
df$topN_genes <- topN_genes
DF <- rbind(DF, df)
}
DF$topN_genes <- DF$gene
return(DF)
}
ListExpression <- function(dataframe, Gene_name) {
test <- subset(dataframe, subset = Gene == Gene_name)
test.df <- test[head(seq_len(ncol(test)), -1)]
test.list <- as.list(as.data.frame(t(test.df)))
return(test.list)
}
PrepforDotplot <- function(Df, Database, numGenes, numGenesCutoff) {
Df_bio_proc <- Df[which(Df$database == Database),]
CNT <- c()
for (i in 1:nrow(Df_bio_proc)) {
gens <- Df_bio_proc[i,]$Genes
CNT <- append(CNT, count.fields(textConnection(gens), sep = ";"))
}
Df_bio_proc$Gene_count <- CNT
D <- Df_bio_proc
D0 <- data.frame()
for (i in unique(D$Top_DE)) {
d <- D[which(D$Top_DE == i),]
d %>% top_n(n = -numGenes, wt = Adjusted.P.value) -> d0
if (nrow(d0) > numGenes) {
d0 <- head(d0, numGenesCutoff)
}
D0 <- rbind(D0, d0)
}
return(D0)
}
runGOEnrichment <- function(df_corr) {
Df <- data.frame()
for (i in unique(df_corr$covariate)) {
temp <- df_corr[which(df_corr$covariate == i),]
Cluster_DE <- i
print(Cluster_DE)
GENES <- temp$Gene
# EnrichR and GO
dbs <- c("GO_Molecular_Function_2021", "GO_Biological_Process_2021",
"WikiPathway_2021_Human", "Elsevier_Pathway_Collection", "Reactome_2022")
if (websiteLive) {
enriched <- enrichr(GENES, dbs)
}
enriched[[1]]$Top_DE <- Cluster_DE
enriched[[1]]$database <- 'GO_Molecular_Function_2021'
enriched[[1]]$GO_text <- sapply(enriched[[1]]$Term, function(x) strsplit(x, split = "\\(")[[1]][1])
enriched[[1]]$GO_number <- sapply(enriched[[1]]$Term, function(x) strsplit(strsplit(x, split = "\\(")[[1]][2], split = "\\)")[[1]][1])
# Similar processing for other databases
for (db_index in 2:5) {
enriched[[db_index]]$Top_DE <- Cluster_DE
enriched[[db_index]]$database <- names(dbs)[db_index]
enriched[[db_index]]$GO_text <- sapply(enriched[[db_index]]$Term, function(x) strsplit(x, split = "\\(")[[1]][1])
enriched[[db_index]]$GO_number <- sapply(enriched[[db_index]]$Term, function(x) strsplit(strsplit(x, split = "\\(")[[1]][2], split = "\\)")[[1]][1])
}
Df <- rbind(Df, do.call(rbind, enriched))
if (websiteLive) {
p0 <- plotEnrich(enriched[[1]], showTerms = 20, numChar = 50, y = "Count", orderBy = "Adjusted.P.value")
p1 <- plotEnrich(enriched[[2]], showTerms = 20, numChar = 50, y = "Count", orderBy = "Adjusted.P.value")
p3 <- plotEnrich(enriched[[3]], showTerms = 20, numChar = 50, y = "Count", orderBy = "Adjusted.P.value")
p4 <- plotEnrich(enriched[[4]], showTerms = 20, numChar = 50, y = "Count", orderBy = "Adjusted.P.value")
p5 <- plotEnrich(enriched[[5]], showTerms = 20, numChar = 50, y = "Count", orderBy = "Adjusted.P.value")
options(repr.plot.width = 20, repr.plot.height = 8)
print(p0 + p1)
print(p3 + p4)
print(p1 + p5)
}
}
return(Df)
}
# -------------------------------
# End of Script
# -------------------------------
# -----------------------------------------------------------------------------
# Summary of the Code
# -----------------------------------------------------------------------------
# **Libraries**:
# - Various R libraries for data processing, meta-analysis, and plotting are loaded,
# including 'dreamlet', 'crumblr', and 'zenith' for specialized analysis, and 'ggplot2', 'dplyr', 'ggtree' for visualization.
# **Functions**:
# - `meta_analysis()`: Performs meta-analysis on provided data, adjusting p-values and calculating FDR.
# - `plotTree()`: Visualizes a tree structure with customizable color gradients and x-axis scaling.
# - `plotCoef2()`: Creates a plot of effect sizes with error bars and color-coded significance.
# **Analysis Workflow**:
# Variance Partition Analysis
# 1. Prepare data and convert to pseudobulk format
# 2. Normalize data and apply voom or voomWithDreamWeights
# 3. Fit the variance partition model and visualize results
# Crumblr Analysis
# 1. Perform crumblr analysis on data
# 2. Interpret and visualize results
# 3. *Dream Analysis*: Fit linear models and perform eBayes analysis.
# 4. *Meta-Analysis*: Conduct meta-analysis on results and plot effect sizes.
# 5. *GSEA Analysis*: Perform gene set enrichment analysis using various databases, and plot results if Enrichr is available.
# **Output**:
# - Various plots and saved results from meta-analysis and GSEA analysis.
# -----------------------------------------------------------------------------