Skip to content

Commit

Permalink
add all other figures
Browse files Browse the repository at this point in the history
  • Loading branch information
wikiselev committed Mar 1, 2017
1 parent 480d84c commit 3b9ab0e
Show file tree
Hide file tree
Showing 25 changed files with 61,704 additions and 0 deletions.
183 changes: 183 additions & 0 deletions 2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
library(cowplot)
library(pheatmap)
library(dplyr)

set.seed(12323414)
options(stringsAsFactors = FALSE)
font_size <- 8

gtable_select <- function (x, ...)
{
matches <- c(...)
x$layout <- x$layout[matches, , drop = FALSE]
x$grobs <- x$grobs[matches]
x
}

gtable_stack <- function(g1, g2){
g1$grobs <- c(g1$grobs, g2$grobs)
g1$layout <- transform(g1$layout, z= z-max(z), name="g2")
g1$layout <- rbind(g1$layout, g2$layout)
g1
}

get_a <- function() {
d <- read.csv("data/2a.csv")

d[d$Hierarchy == "Kolodziejczyk", ]$Hierarchy <- "Kolodz."

d$Hierarchy <- factor(
d$Hierarchy,
levels = c("Biase", "Yan", "Goolam", "Deng", "Pollen1", "Pollen2",
"Kolodz.", "Treutlein", "Ting",
"Patel", "Usoskin1", "Usoskin2", "Usoskin3",
"Klein", "Zeisel")
)

d$Method <- factor(
d$Method,
levels = c("SC3", "tSNE+kmeans", "pcaReduce", "SNN-Cliq", "SINCERA", "SEURAT")
)

cols <- c("Biase" = "#bc80bd", "Treutlein" = "#8dd3c7", "Ting" = "#ffffb3",
"Yan" = "#ccebc5", "Goolam" = "#ffed6f", "Deng" = "#bebada",
"Pollen1" = "#fb8072", "Pollen2" = "#fb8072",
"Patel" = "#80b1d3", "Usoskin1" = "#fdb462", "Usoskin2" = "#fdb462",
"Usoskin3" = "#fdb462", "Kolodz." = "#bf812d",
"Klein" = "#b3de69", "Zeisel" = "#fccde5", "Macosko" = "#d9d9d9")

meth_cols <- c(
"SC3" = "#e41a1c",
"tSNE+kmeans" = "#377eb8",
"pcaReduce" = "#40E0D0",
"SNN-Cliq" = "#984ea3",
"SINCERA" = "#ff7f00",
"SEURAT" = "#ffff33"
)

d1 <- d %>%
group_by(Method, Hierarchy) %>%
dplyr::summarise(Median = median(ARI))

p <- ggplot(d, aes(x = 1, y = ARI, fill = Method, group = Method)) +
geom_bar(data = d1, aes(y = Median), position="dodge", stat="identity") +
geom_point(position = position_jitterdodge(jitter.width = 0.45, dodge.width = 0.9), size = 0.4) +
facet_wrap(ncol = 5, ~ Hierarchy) +
scale_fill_manual(values = meth_cols) +
scale_colour_manual(values = meth_cols) +
geom_hline(yintercept = 0.8) +
labs(x = "") +
theme_classic(base_size = font_size) +
theme(axis.ticks.x = element_blank(), axis.text.x=element_blank(),
axis.title.x=element_blank(), axis.line=element_blank(),
legend.key.size = unit(0.4, "cm")) +
annotate("segment", x=-Inf, xend=Inf, y=-Inf, yend=-Inf, color = "black")+
annotate("segment", x=-Inf, xend=-Inf, y=-Inf, yend=Inf, color = "black")


dummy <- ggplot(d, aes(x = 1, y = ARI, fill = Method)) +
facet_wrap(ncol = 5, ~ Hierarchy) +
geom_rect(aes(fill = Hierarchy), xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
scale_fill_manual(values = cols) +
theme_minimal()

g1 <- ggplotGrob(p)
g2 <- ggplotGrob(dummy)

panels <- grepl(pattern="panel", g2$layout$name)
strips <- grepl(pattern="strip-t", g2$layout$name)
g2$layout$t[panels] <- g2$layout$t[panels] - 1
g2$layout$b[panels] <- g2$layout$b[panels] - 1

new_strips <- gtable_select(g2, panels | strips)

new_plot <- gtable_stack(g1, new_strips)
return(new_plot)
}

get_c <- function() {
cols <- c("Treutlein" = "#8dd3c7", "Ting" = "#ffffb3", "Deng" = "#bebada",
"Pollen2" = "#fb8072", "Patel" = "#80b1d3",
"Kolodziejczyk" = "#bf812d", "Usoskin3" = "#fdb462",
"Klein" = "#40E0D0", "Zeisel" = "#fccde5", "Macosko" = "#d9d9d9")

d <- read.csv("data/2c.csv")

d$Dataset <- factor(
d$Dataset,
levels = c(
"Deng",
"Pollen2",
"Kolodziejczyk",
"Patel",
"Usoskin3",
"Klein",
"Zeisel",
"Macosko"
)
)

d$Fraction <- factor(
d$Fraction,
levels = sort(unique(as.numeric(d$Fraction)))
)

p <- ggplot(d, aes(x = 1, ARI, fill = Dataset, color = Dataset)) +
geom_boxplot(position = position_dodge(width = 1.5), outlier.size = 0.8) +
geom_hline(yintercept = 0.8) +
labs(x = "# of training cells as % of N", y = "ARI") +
scale_fill_manual(values = cols) +
scale_colour_manual(values = cols) +
facet_grid(. ~ Fraction) +
theme_classic(base_size = font_size) +
theme(axis.ticks.x = element_blank(), axis.text.x=element_blank(),
axis.title.x=element_blank(), axis.line=element_blank(),
strip.background = element_rect(colour = "white"),
legend.key.size = unit(0.4, "cm")) +
ylim(0,1) +
annotate("segment", x=-Inf, xend=Inf, y=-Inf, yend=-Inf, color = "black")+
annotate("segment", x=-Inf, xend=-Inf, y=-Inf, yend=Inf, color = "black")
p <- ggdraw(p) +
draw_label("% of total # of cells\nin a training set",
fontface = "bold",
size = font_size-3,
x = 0.87, y = 0.93)
return(p)
}

get_d <- function() {
d <- readRDS("data/2d.rds")
ann <- data.frame(Stage = factor(d$cell.names, levels = unique(d$cell.names)))
anno_colors <- list(Stage = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C",
"#FB9A99", "#FF00FF", "#FDBF6F", "#FF7F00",
"#CAB2D6", "#6A3D9A"))
names(anno_colors$Stage) <- levels(ann$Stage)
dat <- d$consensus
colnames(dat) <- d$cell.names
write.csv(dat[d$hc$order, d$hc$order], file = "data/2d.csv", quote = FALSE, row.names = FALSE)
p <- pheatmap(d$consensus,
cluster_rows = d$hc,
cluster_cols = d$hc,
cutree_rows = 10,
cutree_cols = 10,
treeheight_col = 9,
treeheight_row = 9,
annotation_col = ann,
annotation_colors = anno_colors,
show_rownames = F,
show_colnames = F,
fontsize = font_size,
annotation_names_col = F,
silent = TRUE)
return(p$gtable)
}

first_col <- plot_grid(get_a(), get_c(), nrow = 2, labels = c("a", "c"), rel_heights = c(2, 1))

second_col <- plot_grid(NULL, get_d(), nrow = 2, labels = c("b", "d"), rel_heights = c(1.5, 1))

plot_grid(first_col, second_col, ncol = 2)

ggsave("jpeg/2.jpeg", w = 9, h = 6)
ggsave("pdf/2.pdf", w = 9, h = 6)

28 changes: 28 additions & 0 deletions 3.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
library(pheatmap)
library(cowplot)

options(stringsAsFactors = FALSE)
font_size <- 8

d <- readRDS("data/3.rds")

dat <- d$exprs
write.csv(dat[, d$hc$order], file = "data/3.csv", quote = FALSE, row.names = FALSE)

anno_colors <- list(Cluster = c("#e41a1c", "#984ea3", "#40E0D0", "#FFFF33"))
names(anno_colors$Cluster) <- levels(d$ann$Cluster)

p <- pheatmap(d$exprs,
cluster_cols = d$hc,
cluster_rows = F,
cutree_cols = 3,
gaps_row = c(10, 20),
annotation_col = d$ann,
annotation_colors = anno_colors,
show_colnames = F,
silent = T)

plot_grid(p$gtable, nrow = 1)

ggsave("jpeg/3.jpeg", w = 9, h = 6)
ggsave("pdf/3.pdf", w = 9, h = 6)
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,22 @@ This repository contains scripts for recreation of the figures from the SC3 pape

![](jpeg/1d.jpeg)

### Fig. 2
[data_a](data/2a.csv)
[data_c](data/2c.csv)
[data_d](data/2d.csv)
[script](2.R)
[pdf](pdf/2.pdf)

![](jpeg/2.jpeg)

### Fig. 3
[data](data/3.csv)
[script](3.R)
[pdf](pdf/3.pdf)

![](jpeg/3.jpeg)

### Fig. S1

[data](data/S1.csv)
Expand Down Expand Up @@ -76,6 +92,22 @@ This repository contains scripts for recreation of the figures from the SC3 pape

![](jpeg/S6.jpeg)

### Fig. S7
[data_a1](data/S7a1.csv)
[data_a2](data/S7a2.csv)
[data_b1](data/S7b1.csv)
[data_b2](data/S7b2.csv)
[script](S7.R)

![](jpeg/S7.jpeg)

### Fig. S12
[data_a](data/S12a.csv)
[script](S12.R)
[pdf](pdf/S12.pdf)

![](jpeg/S12.jpeg)

## Session Info

```
Expand Down
15 changes: 15 additions & 0 deletions S12.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
library(cowplot)

d <- read.csv("data/S12a.csv")

d$Genotype <- factor(d$Genotype, levels = c("WT", "Tet2"))

p <- ggplot(d, aes(Genotype, CoV)) +
geom_boxplot(width = 0.3) +
theme_classic(base_size = 9) +
labs(x = "", y = "Coefficient of variation")

plot_grid(p, NULL, ncol = 2, labels = c("a", "b"))

ggsave("pdf/S12.pdf", w = 9, h = 6)
ggsave("jpeg/S12.jpeg", w = 9, h = 6)
38 changes: 38 additions & 0 deletions S7.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
library(cowplot)

options(stringsAsFactors = FALSE)
font_size <- 9

d1 <- read.csv("data/S7a1.csv")
d2 <- read.csv("data/S7a2.csv")
ercc.fraction1 <- read.csv("data/S7b1.csv")
ercc.fraction2 <- read.csv("data/S7b2.csv")

p1 <- ggplot(d1, aes(x)) +
geom_histogram(bins = 130) +
geom_vline(xintercept = 420, color = "red") +
labs(x = "# of expressed genes", y = "# of cells", title = "Patient 1") +
theme_classic(base_size = font_size)

p2 <- ggplot(d2, aes(x)) +
geom_histogram(bins = 130) +
geom_vline(xintercept = 900, color = "red") +
labs(x = "# of expressed genes", y = "# of cells", title = "Patient 2") +
theme_classic(base_size = font_size)

p3 <- ggplot(ercc.fraction1, aes(x)) +
geom_histogram(bins = 100) +
labs(x = "(# of ERCC reads)/(# endogenous reads)", y = "# of cells") +
xlim(0, 0.15) +
theme_classic(base_size = font_size)

p4 <- ggplot(ercc.fraction2, aes(x)) +
geom_histogram(bins = 100) +
labs(x = "(# of ERCC reads)/(# endogenous reads)", y = "# of cells") +
xlim(0, 0.15) +
geom_vline(xintercept = 0.05, color = "red") +
theme_classic(base_size = font_size)

plot_grid(p1, p2, p3, p4, ncol = 2, labels = c("a", "", "b", ""))

ggsave("jpeg/S8.jpeg", w = 9, h = 6)
Loading

0 comments on commit 3b9ab0e

Please sign in to comment.