-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path-DESKTOP-9TH2FK4.Rhistory
105 lines (105 loc) · 4.29 KB
/
-DESKTOP-9TH2FK4.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
library(dplyr)
library(ggplot2)
library(tidyr)
library(ggpubr)
library(NeoEnrichment)
library(ggprism)
library(cowplot)
library(patchwork)
library(reshape2)
library(stringr)
devtools::install_github("wt12318/NeoEnrichment",ref="dev")
BiocManager::install("BSgenome")
devtools::install_github("wt12318/NeoEnrichment",ref="dev")
library(NeoEnrichment)
##keep TCGA samples
dt <- readxl::read_xlsx("../data/Extrachromosomal DNA is associated with oncogene amplification and poor outcome across multiple cancers.xlsx",sheet = 3) %>%
filter(tumor_or_normal!="normal") %>%
filter(grepl("TCGA",sample_barcode)) %>%
group_by(sample_barcode) %>%
summarise(ecDNA=ifelse(any(sample_classification=="Circular"),"yes","no"))
dt$cancer <- get_cancer_type(dt$sample_barcode)
tpm_gene_log2 <- readRDS("../../tmp/tpm_gene_log2.rds")
tpm_gene <- tpm_gene_log2 %>%
select(-id) %>%
filter(gene %in% c("HLA-DQB1","HLA-DRB1","HLA-DQA1",
"HLA-DRB5","HLA-DRA","HLA-C","HLA-A","HLA-DPB1",
"HLA-DPA1","HLA-DQB2","HLA-B","HLA-DQA2","HLA-DOA",
"HLA-DMA","HLA-DPB2","HLA-DRB6","HLA-DOB",
"HLA-DMB","HLA-E","HLA-F","HLA-G","HLA-H","CIITA","HSPH1","IFNL1",
"IL33","NLRC5","NLRP12","AZU1","CIITA","IFNG","IL10","IL33",
"IL4","JAK2","SIRT1","TLR4","TMEM106A","XBP1")) %>%
select(gene,any_of(dt$sample_barcode))
rownames(tpm_gene) <- tpm_gene$gene
tpm_gene <- tpm_gene %>% select(-gene)
tpm_gene <- t(tpm_gene) %>% as.data.frame()
tpm_gene$sample <- rownames(tpm_gene)
tpm_gene$CIITA_1 <- tpm_gene$CIITA
tpm_gene$IL33_1 <- tpm_gene$IL33
tpm_dt <- left_join(tpm_gene,
dt %>% rename(sample=sample_barcode))
tpm_dt %>% group_by(cancer) %>%
summarise(total_sample=n(),
ecDNA_postive=sum(ecDNA=="yes")) -> tpm_summ
tpm_summ <- tpm_summ %>% arrange(desc(total_sample))
need_cancer <- tpm_summ %>% filter(ecDNA_postive > 20)
need_cancer <- need_cancer$cancer
tpm_dt <- tpm_dt %>% filter(cancer %in% need_cancer)
tpm_dt <- tpm_dt %>% select(c("CIITA_1","IL33_1"),everything())
tpm_dt <- tpm_dt %>%
tidyr::pivot_longer(cols = "CIITA_1":"HLA-DMB",names_to = "gene",values_to = "exp")
tpm_dt$exp <- as.numeric(tpm_dt$exp)
tpm_dt$cancer <- get_cancer_type(tpm_dt$sample)
tpm_dt$cancer <- get_cancer_type(tpm_dt$sample)
tpm_dt$gene <- factor(tpm_dt$gene,levels = c("HLA-A","HLA-B","HLA-C","HLA-E","HLA-F","HLA-G","HLA-H",
"HLA-DQB1","HLA-DRB1","HLA-DQA1",
"HLA-DRB5","HLA-DRA","HLA-DPB1",
"HLA-DPA1","HLA-DQB2","HLA-DQA2","HLA-DOA",
"HLA-DMA","HLA-DPB2","HLA-DRB6","HLA-DOB",
"HLA-DMB","CIITA","HSPH1","IFNL1",
"IL33","NLRC5","NLRP12","AZU1","IFNG","IL10",
"IL4","JAK2","SIRT1","TLR4","TMEM106A","XBP1","CIITA_1","IL33_1"))
ggplot(data=tpm_dt,aes(x=gene,y=exp,fill=ecDNA))+
geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
labs(y="log2(TPM + 0.001)")+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
theme(axis.title.x = element_blank())+
guides(fill=F)
dt %>% group_by(cancer) %>%
summarise(total_sample=n(),
ecDNA_postive=sum(ecDNA=="yes")) %>% arrange(desc(total_sample)) %>%
pivot_longer(cols = c("total_sample","ecDNA_postive"),
names_to = "type",
values_to = "counts") -> summ
summ$cancer <- factor(summ$cancer,levels = unique(summ$cancer))
ggplot(data=summ,aes(x=cancer,y=counts,fill=type))+
geom_bar(stat="identity",position = "dodge")+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
theme(axis.title.x = element_blank())
ggplot(data=summ,aes(x=cancer,y=counts,fill=type))+
geom_bar(stat="identity",position = "dodge")+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
theme(axis.title.x = element_blank())
dt_exp <- dt %>% filter(sample_barcode %in% colnames(tpm_gene_log2))
dt_exp %>% group_by(cancer) %>%
summarise(total_sample=n(),
ecDNA_postive=sum(ecDNA=="yes")) %>%
arrange(desc(total_sample)) %>%
pivot_longer(cols = c("total_sample","ecDNA_postive"),
names_to = "type",
values_to = "counts") -> summ
summ$cancer <- factor(summ$cancer,levels = unique(summ$cancer))
ggplot(data=summ,aes(x=cancer,y=counts,fill=type))+
geom_bar(stat="identity",position = "dodge")+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
theme(axis.title.x = element_blank())
ggplot(data=summ,aes(x=cancer,y=counts,fill=type))+
geom_bar(stat="identity",position = "dodge")+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
theme(axis.title.x = element_blank())