-
Notifications
You must be signed in to change notification settings - Fork 4
/
fig_smart_seq.Rmd
217 lines (170 loc) · 8.12 KB
/
fig_smart_seq.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
---
title: "Analysis of Smart-Seq2 data"
output: html_notebook
date: "`r format(Sys.time(), '%d %B, %Y')`"
---
```{r, message=FALSE, warning=FALSE}
library(pagoda2)
library(conos)
library(magrittr)
library(ggplot2)
library(pbapply)
library(tidyverse)
library(cowplot)
library(readr)
library(scrattch.io)
library(ggalluvial)
devtools::load_all()
theme_set(theme_bw())
outPath <- function(...) OutputPath("fig_smart_seq", ...)
allenDataPath <- function(...) file.path("~/mh/Data/allen_human_cortex/", ...)
```
## Load data
```{r, message=FALSE, warning=FALSE}
con <- CachePath("con_filt_cells.rds") %>% read_rds()
sample_per_cell <- con$getDatasetPerCell()
annotation_by_level <- read_csv(MetadataPath("annotation.csv"))
annotation <- annotation_by_level %$% setNames(l4, cell) %>% .[names(sample_per_cell)] %>%
.[. != "Excluded"]
neuron_type_per_type <- ifelse(grepl("L[2-6].+", unique(annotation)), "Excitatory", "Inhibitory") %>%
setNames(unique(annotation))
type_order <- names(neuron_type_per_type)[order(neuron_type_per_type, names(neuron_type_per_type))]
```
## Alignment to our Smart-Seq dataset
```{r}
so <- readRDS("/d0-mendel/home/demharters/R/projects/UPF9_14_17_19_22_23_24_32_33/seurat_upf.rds")
annot_old <- [email protected] %$% setNames(subtypesKU, rownames(.))
cm_ss <- [email protected]
dim(cm_ss)
median(Matrix::colSums(cm_ss))
median(Matrix::colSums(cm_ss > 0))
dim(cm_ss)
Matrix::colSums(cm_ss > 0) %>% qplot(xlab="#Genes", ylab="#Cells")
Matrix::colSums(cm_ss) %>% log10() %>% qplot()
```
```{r}
p2_ss <- GetPagoda(cm_ss)
con_ss <- Conos$new(c(con$samples, list(SS2=p2_ss)), n.cores=40)
con_ss$buildGraph(verbose=T, var.scale=T, k=15, k.self=5, k.self.weight=0.1)
con_ss$findCommunities(method=conos::leiden.community, resolution=10)
con_ss$embedGraph(method="UMAP", spread=1.5, min.dist=1, verbose=T, n.cores=30, min.prob.lower=1e-5)
write_rds(con_ss, CachePath("con_ss.rds"))
con_ss <- CachePath("con_ss.rds") %>% read_rds()
```
```{r}
annot_ss_new <- con_ss$propagateLabels(annotation, max.iters=50, verbose=T) %$%
labels[rownames(con_ss$samples$SS2$counts)]
tibble(Cell=names(annot_ss_new), Type=annot_ss_new) %>%
write_csv(MetadataPath("annotation_smart_seq.csv"))
```
```{r, fig.width=7, fig.height=7, message=FALSE}
p_all <- con_ss$plotGraph(alpha=0.2, size=0.05, mark.groups=T, show.legend=F, groups=c(annotation, annot_ss_new),
raster=T, raster.dpi=150, font.size=4, shuffle.colors=T, show.labels=T, plot.na=F) +
theme(panel.grid=element_blank()) + labs(x="UMAP 1", y="UMAP 2")
p_emb <- con_ss$plotGraph(alpha=0.2, size=0.05, mark.groups=T, show.legend=F, groups=annot_ss_new,
raster=T, raster.dpi=150, font.size=4, show.labels=T) +
theme(panel.grid=element_blank()) + labs(x="UMAP 1", y="UMAP 2")
p_emb$layers[[3]]$aes_params$alpha <- 0.01
p_emb$layers[[1]]$aes_params$alpha <- 1
p_emb$layers[[2]] <- p_all$layers[[2]]
p_emb$layers <- p_emb$layers[c(3, 1, 2)]
p_emb$scales$scales[[2]] <- p_all$scales$scales[[2]]
# ggsave(outPath("ss_all.pdf"), p_all)
# ggsave(outPath("ss_subset.pdf"), p_emb)
p_all
p_emb
```
```{r, message=FALSE}
num_df <- table(Type=annot_ss_new) %>% as_tibble() %>%
mutate(Frac = n / sum(n), NeuronType=neuron_type_per_type[Type]) %>%
mutate(Type = factor(Type, levels=type_order))
ggplot(num_df) +
geom_bar(aes(x=Type, y=Frac * 100, fill=NeuronType), stat="identity") +
scale_y_continuous(expand=expansion(c(0, 0.05))) +
scale_fill_brewer("", palette="Set2") +
labs(x="", y="% of cells") +
theme(legend.position=c(1, 1.1), legend.justification=c(1, 1), panel.grid.major.x=element_blank(),
axis.text.x=element_text(angle=90, hjust=1, vjust=0.5), legend.background=element_blank())
ggsave(outPath("ss_type_frac.pdf"))
```
## Annotation to Allen data
```{r, message=FALSE, warning=FALSE}
sample_info <- allenDataPath("sample_annotations.csv") %>% read_csv()
annot_allen <- sample_info %$% list(
l0=setNames(as.character(class_label), sample_name),
l1=setNames(as.character(subclass_label), sample_name),
l3=setNames(as.character(cluster_label), sample_name)
)
annot_allen$l2 <- strsplit(annot_allen$l3, " ") %>% sapply(function(x) paste(x[1:(length(x)-1)], collapse=" "))
```
```{r}
tome <- allenDataPath("transcrip.tome")
cm_allen <- read_tome_dgCMatrix(tome, "data/t_exon") %>%
set_colnames(read_tome_sample_names(tome)) %>% set_rownames(read_tome_gene_names(tome)) %>%
.[, !(annot_allen$l0[colnames(.)] %in% c("Exclude", "Non-neuronal"))]
dataset_id <- sample_info %$% setNames(external_donor_name_label, sample_name)
cms_per_dataset <- colnames(cm_allen) %>% split(dataset_id[.]) %>% lapply(function(ns) cm_allen[,ns])
p2s_allen <- lapply(cms_per_dataset, basicP2proc, n.cores=30, k=15,
get.largevis=F, make.geneknn=F, get.tsne=F)
```
```{r, message=FALSE, warning=FALSE}
con_allen <- con$samples[c("C6", "C7", "C8")] %>% c(p2s_allen) %>% Conos$new(n.cores=30)
source_fac <- names(con_allen$samples) %>% setNames(., .) %>% substr(1, 1)
con_allen$buildGraph(verbose=T, var.scale=T, k=20, k.self=5, k.self.weight=0.1, space="CCA",
same.factor.downweight=0.1, balancing.factor.per.sample=source_fac)
con_allen$embedGraph(method="UMAP", spread=1, min.dist=0.5, verbose=T, n.cores=30, min.prob.lower=1e-7)
write_rds(con_allen, CachePath("con_allen.rds"))
con_allen <- CachePath("con_allen.rds") %>% read_rds()
```
```{r, fig.width=7, fig.height=7, message=FALSE}
con_allen$plotGraph(color.by='sample', size=0.1, alpha=0.1, mark.groups=F, show.legend=T,
legend.pos=c(1, 1), raster=T, raster.dpi=150) +
theme(legend.title=element_blank())
ggsave(outPath("allen_alignment_sample.pdf"))
con_allen$plotGraph(groups=annot_allen$l2, size=0.1, font.size=c(2, 3), alpha=0.1,
raster=T, raster.dpi=150)
ggsave(outPath("allen_alignment_allen.pdf"))
con_allen$plotGraph(groups=annotation, size=0.1, font.size=c(2, 3), alpha=0.1,
raster=T, raster.dpi=150)
ggsave(outPath("allen_alignment_ours.pdf"))
```
```{r}
labels_prop <- con_allen$propagateLabels(annot_allen$l3, max.iters=50, verbose=T)
con_allen$plotGraph(groups=labels_prop$labels, size=0.1, font.size=c(2, 3), alpha=0.1)
```
```{r}
type_ranks <- factor(type_order, levels=type_order) %>% as.integer() %>% setNames(type_order)
t_ann <- annotation %>% .[intersect(names(.), names(con_allen$getDatasetPerCell()))]
freq_df <- table(KU=t_ann, Allen=labels_prop$labels[names(t_ann)]) %>% as_tibble() %>%
group_by(KU) %>% mutate(total_n=sum(n), freq=n / total_n) %>% ungroup() %>% filter(n > 1, freq > 0.05) %>%
mutate(NeuronType=neuron_type_per_type[KU])
allen_type_order <- freq_df %>% split(.$Allen) %>% lapply(function(x) x %$% setNames(n, KU) %>% `/`(sum(.))) %>%
sapply(function(ws) as.numeric(type_ranks[names(ws)[which.max(ws)]]) + sum(type_ranks[names(ws)] * ws) / 10) %>% sort() %>% names()
freq_df %<>% mutate(KU=factor(KU, levels=type_order), Allen=factor(Allen, levels=allen_type_order))
```
```{r}
plotAlluvium <- function(p.df, min.box.h, width.left, width.right, alpha=1.0, ...) {
s.ku <- p.df %$% split(n, droplevels(KU)) %>% sapply(sum)
s.allen <- p.df %$% split(n, droplevels(Allen)) %>% sapply(sum)
s.total <- c(rev(s.ku), rev(s.allen)) %>% sqrt() %>% sqrt() %>% `/`(max(.))
palette <- sccore::fac2col(p.df$KU, return.details=T, ...) %$%
setNames(sample(palette), names(palette)) %>% alpha(alpha=alpha)
ggplot(p.df, aes(axis1=KU, axis2=Allen, y=pmax(sqrt(n), min.box.h))) +
geom_alluvium(aes(fill=KU)) +
geom_stratum(width=c(rep(width.left, length(s.ku)), rep(width.right, length(s.allen)))) +
geom_text(stat="stratum", infer.label=TRUE, size=s.total * 2 + 1) +
scale_x_continuous(expand=c(0, 0)) +
scale_y_continuous(expand=c(0, 0)) +
theme_void() + theme(legend.position="none") +
scale_fill_manual(values=palette)
}
```
```{r, fig.width=5, fig.height=6, message=FALSE}
filter(freq_df, NeuronType == "Excitatory") %>% plotAlluvium(10, 0.3, 0.5, v=0.7, s=1.0)
ggsave(outPath("allen_mapping_ex.pdf"))
```
```{r, fig.width=5, fig.height=8, message=FALSE}
filter(freq_df, NeuronType == "Inhibitory") %>%
plotAlluvium(2, 0.25, 0.4, v=0.8, s=1.0)
ggsave(outPath("allen_mapping_inh.pdf"))
```