-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathOPCOligo_Hotspot_SCENIC.R
203 lines (158 loc) · 8.54 KB
/
OPCOligo_Hotspot_SCENIC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# -----------------------------------------------------------------------------
# Title: Downstream analysis from oligodendrogenesis pseudotime
# correlated with hotspot and SCENIC+ results
# Date: August 2024
# Script name: OPCOligo_Hotspot_SCENIC.R
# Description: This script performs module scoring and downstream analysis for
# Hotspot results and pseudotime. The script also includes
# correlation analysis between pseudotime and eRegulon activity
# using SCENIC results.
#
# outputs: Potential plots for Fig. 5e-f, Fig. 6 a-c
# -----------------------------------------------------------------------------
# Load necessary libraries
library(Seurat) # For single-cell RNA-seq data analysis
library(Signac) # For single-cell ATAC-seq data analysis
library(ggplot2) # For data visualization
library(cowplot) # For combining ggplot objects
library(reshape2) # For data reshaping
# Load preprocessed Seurat object
opc_olig3 <- readRDS('OPCOligObject.rds') # Replace with the correct path if needed
# Load external cell annotations
annot_file <- '3DG_OPCOlig_annotation_Sep2023.csv'
annot.use <- read.csv(annot_file, row.names = 1)
rownames(annot.use) <- annot.use$Colnames
# Add annotations to the Seurat object
opc_olig3 <- AddMetaData(opc_olig3, metadata = annot.use[Cells(opc_olig3), ])
# Load gene modules from Hotspot analysis
hs_file <- 'hs_results_modules.csv'
hs.modules <- read.csv(hs_file, row.names = 1)
# Convert modules into a list excluding non-informative module '-1'
hs.list <- lapply(setdiff(sort(unique(hs.modules$Module)), -1), function(x) {
rownames(hs.modules)[hs.modules$Module == x]
})
# Add module scores to Seurat object
opc_olig3 <- AddModuleScore(opc_olig3, features = hs.list)
# Rename module score columns for clarity
colnames([email protected])[grep('^Cluster', colnames([email protected]))] <- paste0('HS_', 1:length(hs.list))
# Merge gene modules with visually similar patterns into combined modules
hs.new <- list(c(1, 3, 8), c(2, 6), c(4, 5, 7, 11, 12), c(9, 10))
hs.list.2 <- lapply(hs.new, function(x) {
rownames(hs.modules)[hs.modules$Module %in% x]
})
# Recalculate module scores for the newly combined modules
opc_olig3 <- AddModuleScore(opc_olig3, features = hs.list.2)
# Rename the new module score columns for clarity
colnames([email protected])[grep('^Cluster', colnames([email protected]))] <- paste0('HS2_', 1:length(hs.list.2))
# Set cell identities based on predefined annotations
opc_olig3$fgroup <- factor(opc_olig3$anno_clus_dreamorigBB_bioarx_v2,
levels = c('TypeI_OPC', 'TypeII_OPC', 'COP',
'TypeI_Oligo_OPALIN', 'TypeII_Oligo_RBFOX1'))
Idents(opc_olig3) <- 'fgroup'
# Plot scatter plots for each combined gene module score vs. pseudotime
plot.list <- lapply(1:length(hs.list.2), function(x) {
FeatureScatter(opc_olig3, feature1 = 'pseudotime_sc_OPC1',
feature2 = paste0('HS2_', x), pt.size = 0.6)
})
# Display scatter plots in a grid layout
cowplot::plot_grid(plotlist = plot.list)
# Predict trend lines for hotspot modules over pseudotime using LOESS regression
pred.use <- c()
for (i in 1:length(hs.list)) {
data.use <- FetchData(opc_olig3, vars = c('pseudotime_sc_OPC1', paste0('HS_', i)))
colnames(data.use) <- c('pt', 'feature')
# Fit a LOESS model for the trend line
model.use <- loess(feature ~ pt, data = data.use)
pred.use <- cbind(pred.use, model.use$fitted)
}
# Convert predictions to a data frame for plotting
pred.use <- as.data.frame(pred.use)
colnames(pred.use) <- paste0('HS_', 1:ncol(pred.use))
pred.use$pt <- data.use$pt
rownames(pred.use) <- Cells(opc_olig3)
# Plot trend lines for each merged gene module
plot.list <- lapply(1:length(hs.new), function(x) {
plot.data <- reshape2::melt(pred.use[, paste0('HS_', hs.new[[x]])])
colnames(plot.data) <- c('Module', 'Value')
plot.data$pt <- rep(pred.use$pt, times = length(hs.new[[x]]))
ggplot(plot.data, aes(x = pt, y = Value, color = Module)) +
geom_line(size = 1.2) + theme_bw() + ggtitle(paste0('Trend_', x))
})
# Display trend line plots in a grid layout
cowplot::plot_grid(plotlist = plot.list)
# -------- Pseudotime vs eRegulon Activity Correlation Analysis --------
# Load the processed Seurat object and SCENIC results
opc_olig3 <- readRDS('OPCOligObject.rds')
ereg.use <- readRDS('scenic_sc.rds')
# Ensure only shared cells between Seurat and SCENIC results are used
cells.use <- rownames(ereg.use)
opc_olig3 <- subset(opc_olig3, cells = cells.use)
# Add SCENIC results as a new assay
opc_olig3[['scenic']] <- CreateAssayObject(data = t(ereg.use))
DefaultAssay(opc_olig3) <- 'scenic'
# Scale SCENIC assay data
opc_olig3 <- ScaleData(opc_olig3, features = rownames(opc_olig3))
# Randomly sample cells for plotting, stratified by pseudotime bins
set.seed(42)
plot.cells <- lapply(levels(opc_olig3$pt_bin), function(x) {
sample(Cells(opc_olig3)[opc_olig3$pt_bin == x], size = min(1000, sum(opc_olig3$pt_bin == x)))
})
plot.cells <- unlist(plot.cells)
plot.cells <- plot.cells[order([email protected][plot.cells, 'pseudotime_sc_OPC1'])]
# Correlation between pseudotime and eRegulon AUC scores
cor.ereg <- sapply(1:ncol(ereg.use), function(x)
cor(opc_olig3$pseudotime_sc_OPC1, ereg.use[cells.use, x])
)
names(cor.ereg) <- colnames(ereg.use)
cor.ereg <- sort(cor.ereg)
# Identify top 10 positively and negatively correlated eRegulons
plot.features <- c(head(names(cor.ereg), 10), tail(names(cor.ereg), 10))
plot.features <- gsub('_', '-', plot.features)
# Plot heatmap of eRegulon activity
DoHeatmap(opc_olig3, features = plot.features, group.by = 'is_cell',
cells = plot.cells, label = FALSE)
# Switch to RNA assay and plot TF expression for top correlated eRegulons
DefaultAssay(opc_olig3) <- 'SCT'
plot.features <- unname(sapply(plot.features, function(x)
unlist(strsplit(x, '-'))[1]
))
opc_olig3 <- GetResidual(opc_olig3, plot.features)
# Plot heatmap of TF expression
DoHeatmap(opc_olig3, features = plot.features, group.by = 'is_cell',
cells = plot.cells, label = FALSE)
# Save the final Seurat object
saveRDS(opc_olig3, file = 'OPC_Olig_final.rds')
# -----------------------------------------------------------------------------
# END OF SCRIPT
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Summary of the Code
# -----------------------------------------------------------------------------
# 1. **Load Necessary Libraries**:
# - Load required R libraries for single-cell RNA-seq and ATAC-seq data analysis, visualization, and data manipulation.
# 2. **Load Data**:
# - Load the preprocessed Seurat object (`OPCOligObject.rds`) containing the OPC and oligodendrocyte data.
# - Load and integrate external cell annotations (`3DG_OPCOlig_annotation_Sep2023.csv`) into the Seurat object.
# 3. **Process Gene Modules from Hotspot **:
# - Load gene module information from Hotspot analysis (`hs_results_modules.csv`).
# - Convert gene modules into a list, excluding non-informative modules.
# - Calculate module scores for each cell based on these gene modules and add them to the Seurat object.
# 4. **Combine Gene Modules**:
# - Merge visually similar gene modules into new combined modules.
# - Recalculate module scores for these newly combined modules and update the Seurat object.
# 5. **Set Cell Identities**:
# - Set cell identities based on predefined annotations and update the Seurat object's identity classes.
# 6. **Plot Gene Module Scores vs. Pseudotime**:
# - Create scatter plots to visualize the relationship between pseudotime and each gene module score.
# - Display these plots in a grid layout.
# 7. **Predict Trend Lines**:
# - Fit LOESS regression models to predict trend lines for each gene module score over pseudotime.
# - Plot these trend lines to visualize trends.
# 8. **Pseudotime vs. eRegulon Activity Correlation Analysis**:
# - Load SCENIC results (`scenic_sc.rds`) and ensure compatibility with the Seurat object.
# - Add SCENIC results as a new assay in the Seurat object and scale the data.
# - Randomly sample cells for plotting and calculate correlations between pseudotime and eRegulon AUC scores.
# - Identify the top 10 positively and negatively correlated eRegulons and plot heatmaps of their activity and transcription factor expression.
# 9. **Save Final Object**:
# - Save the updated Seurat object with processed data and results to an RDS file (`OPC_Olig_final.rds`).
# -----------------------------------------------------------------------------