Skip to content

Commit

Permalink
adjusted y-axes for figs without natural origins
Browse files Browse the repository at this point in the history
  • Loading branch information
milkschen committed Nov 24, 2020
1 parent 554c383 commit 8838c56
Showing 1 changed file with 148 additions and 13 deletions.
161 changes: 148 additions & 13 deletions experiments/r_plots/hg38/hg38_rev1.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@ source("plotting_utils.R")
```{r boxplot_sensitivity_refflow_prev_reads}
total <- 2000000
save = F
# df_sensitivity <- read.delim("data/map_accuracy/0906_100indiv.tsv", header = TRUE, sep = "\t")
df_sensitivity <- read.delim("data/map_accuracy/0914_100indiv.tsv", header = TRUE, sep = "\t")
sensitivity_method_name <- c("chr21-hisat_0.1", "chr21-grc", "chr21-major-1kg", "chr21-AFR-chr21_superpop_AFR_thrds0.5_S0_b1_ld0", "chr21-AMR-chr21_superpop_AMR_thrds0.5_S0_b1_ld0", "chr21-SAS-chr21_superpop_SAS_thrds0.5_S0_b1_ld0", "chr21-EAS-chr21_superpop_EAS_thrds0.5_S0_b1_ld0", "chr21-EUR-chr21_superpop_EUR_thrds0.5_S0_b1_ld0", "Matched", "chr21-vg_linear_p13", "chr21-major-10-thrds0.5_S0_b1_ld0-1kg", "chr21-vg_0.01", "chr21-vg_from_randflow_ld_six_p13", "chr21-vg_0.1", "chr21-randflow", "chr21-major-10-thrds0_S1_b1000_ld1-1kg", "chr21-randflow_26", "chr21-randflow_ld_26", "chr21-per", "chr21-per10")
# previous RandFlow: "chr21-major-10-thrds0_S1_b1_ld0-1kg"
sensitivity_rmethod_name <- c("HISAT2-10%", "GRCh38", "Major", "AFR", "AMR", "SAS", "EAS", "EUR", "Matched", "vg-GRC", "MajorFlow", "vg-1%", "vg-RandFlow-LD", "vg-10%", "RandFlow", "RandFlow-LD", "RandFlow-26", "RandFlow-LD-26", "Personalized-2", "Personalized")
sensitivity_rmethod_name <- c("HISAT2-10%", "GRCh38", "Major", "AFR", "AMR", "SAS", "EAS", "EUR", "Matched", "vg-GRCh38", "MajorFlow", "vg-1%", "vg-RandFlow-LD", "vg-10%", "RandFlow", "RandFlow-LD", "RandFlow-26", "RandFlow-LD-26", "Personalized-2", "Personalized")
dict_method_to_reduced <- hash()
for (i in seq(1, length(sensitivity_method_name))){
dict_method_to_reduced[[sensitivity_method_name[i]]] <- sensitivity_rmethod_name[i]}
Expand All @@ -45,17 +44,17 @@ map_median_per <- med[med$Reduced =="Personalized",]$True.Positive / total
# One-pass methods
filt_sensitivity_onepass <- c("GRCh38", "Major", "AFR", "AMR", "SAS", "EAS", "EUR", "Matched", "Personalized")
fig_sensitivity_onepass <- ggplot(df_sensitivity %>% filter(Reduced %in% filt_sensitivity_onepass), aes(x=Reduced, y=True.Positive/total, color = Super.Population)) +
geom_boxplot() +
fig_sensitivity_onepass <- ggplot(df_sensitivity %>% filter(Reduced %in% filt_sensitivity_onepass), aes(x=Reduced, y=True.Positive/total, color = Super.Population)) +
geom_boxplot() +
ylab("Sensitivity") + xlab("Method") + labs(color="Super Population") +
theme_bw() +
theme_bw() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1), axis.title.x=element_text(vjust=0, hjust=0.45), legend.position = "right") +
theme(axis.title.x=element_text(vjust=0, hjust=0.45), legend.position = "right") +
theme(axis.title.x=element_text(vjust=0, hjust=0.45), legend.position = "right") +
scale_y_continuous(sec.axis = sec_axis(~(.-map_median_grc)/(map_median_per-map_median_grc), name = "Gain of Personalization"))
fig_sensitivity_onepass
# Two-pass methods and baselines
filt_sensitivity_twopass <- c("GRCh38", "Major", "Matched", "MajorFlow", "vg-GRC", "vg-RandFlow-LD", "vg-1%", "vg-10%", "RandFlow", "RandFlow-LD", "Personalized")
filt_sensitivity_twopass <- c("GRCh38", "Major", "Matched", "MajorFlow", "vg-GRCh38", "vg-RandFlow-LD", "vg-1%", "vg-10%", "RandFlow", "RandFlow-LD", "Personalized")
fig_sensitivity_twopass <- ggplot(df_sensitivity %>% filter(Reduced %in% filt_sensitivity_twopass), aes(x=Reduced, y=True.Positive/total, color = Super.Population)) +
geom_boxplot() +
ylab("Sensitivity") + xlab("Method") + labs(color="Super Population") +
Expand All @@ -66,7 +65,7 @@ fig_sensitivity_twopass <- ggplot(df_sensitivity %>% filter(Reduced %in% filt_se
fig_sensitivity_twopass
# Graph-related methods
filt_sensitivity_graph <- c("HISAT2-10%", "GRCh38", "vg-GRC", "vg-RandFlow-LD", "vg-1%", "vg-10%", "RandFlow", "RandFlow-LD", "RandFlow-LD-26", "Personalized-2", "Personalized")
filt_sensitivity_graph <- c("HISAT2-10%", "GRCh38", "vg-GRCh38", "vg-RandFlow-LD", "vg-1%", "vg-10%", "RandFlow", "RandFlow-LD", "RandFlow-LD-26", "Personalized-2", "Personalized")
fig_sensitivity_graph <- ggplot(df_sensitivity %>% filter(Reduced %in% filt_sensitivity_graph), aes(x=Reduced, y=True.Positive/total, color = Super.Population)) +
geom_boxplot() +
ylab("Sensitivity") + xlab("Method") + labs(color="Super Population") +
Expand All @@ -87,13 +86,14 @@ if (save == T){
}
```


```{r boxplot_for_summary_bias}
df_summary_bias <- read.delim("data/allelic_bias/0906_25indiv.bias.tsv", header = T, sep = "\t")
df_summary_bias$SUM_TAIL <- df_summary_bias$NUM_REFBIAS.0.8. + df_summary_bias$NUM_ALTBIAS.0.2.
bias_method_name <- c("hisat_0.1", "grc", "major-1kg", "matched", "vg_linear", "thrds0.5_S0_b1_ld0-1kg", "vg_0.01", "vg_from_randflow_ld_six", "vg_0.1", "thrds0_S1_b1_ld0-1kg", "thrds0_S1_b1000_ld1-1kg", "randflow_ld_26", "per", "per10")
bias_rmethod_name <- c("HISAT2-10%", "GRCh38", "Major", "Matched", "vg-GRC", "MajorFlow", "vg-1%", "vg-RandFlow-LD", "vg-10%", "RandFlow", "RandFlow-LD", "RandFlow-LD-26", "Personalized-2", "Personalized")
bias_rmethod_name <- c("HISAT2-10%", "GRCh38", "Major", "Matched", "vg-GRCh38", "MajorFlow", "vg-1%", "vg-RandFlow-LD", "vg-10%", "RandFlow", "RandFlow-LD", "RandFlow-LD-26", "Personalized-2", "Personalized")
dict_bias_method_to_reduced <- hash()
for (i in seq(1,length(bias_method_name))){
dict_bias_method_to_reduced[[bias_method_name[i]]] <- bias_rmethod_name[i]}
Expand Down Expand Up @@ -164,12 +164,85 @@ sim21_bias
if (save == T){
ggsave("figures/revision1/fig_boxplot_num_bias_vg_hisat2.pdf", width = 12, height = 6, plot=p_summary_bias)
ggsave("figures/revision1/fig_boxplot_r2a_vg_hisat2.pdf", width = 12, height = 6, plot=p_summary_r2a)
ggsave("figures/revision1/graph/fig_boxplot_num_bias_graph.pdf", width = 12, height = 6, plot=p_summary_bias_graph)
ggsave("figures/revision1/graph/fig_boxplot_r2a_graph.pdf", width = 12, height = 6, plot=p_summary_r2a_graph)
# ggsave("figures/revision1/graph/fig_boxplot_num_bias_graph.pdf", width = 12, height = 6, plot=p_summary_bias_graph)
# ggsave("figures/revision1/graph/fig_boxplot_r2a_graph.pdf", width = 12, height = 6, plot=p_summary_r2a_graph)
ggsave("figures/revision2/fig_boxplot_num_bias_graph.pdf", width = 12, height = 6, plot=p_summary_bias_graph)
ggsave("figures/revision2/fig_boxplot_r2a_graph.pdf", width = 12, height = 6, plot=p_summary_r2a_graph)
}
```

```{r boxplot_personalization_lhs}
fig_sensitivity_onepass <- ggplot(df_sensitivity %>% filter(Reduced %in% filt_sensitivity_onepass), aes(x=Reduced, y=(True.Positive/total-map_median_grc) / (map_median_per-map_median_grc), color = Super.Population)) +
geom_boxplot() +
ylab("Gain of Personalization") + xlab("Method") + labs(color="Super Population") +
theme_bw() +
theme(axis.title.x=element_text(vjust=0, hjust=0.45), legend.position = "right") +
scale_y_continuous(sec.axis = sec_axis(~(.*(map_median_per-map_median_grc) + map_median_grc), name = "Sensitivity"))
fig_sensitivity_onepass
fig_sensitivity_twopass <- ggplot(df_sensitivity %>% filter(Reduced %in% filt_sensitivity_twopass), aes(x=Reduced, y=(True.Positive/total-map_median_grc) / (map_median_per-map_median_grc), color = Super.Population)) +
geom_boxplot() +
ylab("Gain of Personalization") + xlab("Method") + labs(color="Super Population") +
theme_bw() +
theme(axis.title.x=element_text(vjust=0, hjust=0.45), legend.position = "bottom") +
scale_y_continuous(sec.axis = sec_axis(~(.*(map_median_per-map_median_grc) + map_median_grc), name = "Sensitivity"))
fig_sensitivity_twopass
fig_sensitivity_graph <- ggplot(df_sensitivity %>% filter(Reduced %in% filt_sensitivity_graph), aes(x=Reduced, y=(True.Positive/total-map_median_grc) / (map_median_per-map_median_grc), color = Super.Population)) +
geom_boxplot() +
ylab("Gain of Personalization") + xlab("Method") + labs(color="Super Population") +
theme_bw() +
theme(axis.title.x=element_text(vjust=0, hjust=0.45), legend.position = "bottom") +
scale_y_continuous(sec.axis = sec_axis(~(.*(map_median_per-map_median_grc) + map_median_grc), name = "Sensitivity"))
fig_sensitivity_graph
# p_summary_bias <- ggplot(df_summary_bias, aes(x=Reduced, y=(SUM_TAIL-bias_median_grc)/(bias_median_per-bias_median_grc), color = Super_Population)) +
# geom_boxplot() +
# geom_point(position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.8)) +
# ylab("Gain of Personalization") + xlab("Method") + labs(color="Super Population") +
# theme_bw() +
# scale_y_continuous(sec.axis = sec_axis(~(.*(bias_median_per-bias_median_grc) + bias_median_grc), name = "Num. strongly biased sites")) +
# theme(legend.position = "bottom")
# p_summary_bias
#
# p_summary_r2a <- ggplot(df_summary_bias, aes(x=Reduced, y=(REF_TO_ALT-bias_r2a_median_grc)/(bias_r2a_median_per-bias_r2a_median_grc), color = Super_Population)) +
# geom_boxplot() +
# geom_point(position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.8)) +
# ylab("REF to ALT ratio") + xlab("Method") + labs(color="Super Population") +
# theme_bw() +
# scale_y_continuous(sec.axis = sec_axis(~(.*(bias_r2a_median_per-bias_r2a_median_grc) + bias_r2a_median_grc), name = "Gain of Personalization")) +
# theme(legend.position = "bottom")
# p_summary_r2a
#
# p_summary_bias_graph <- ggplot(df_summary_bias %>% filter(Reduced %in% filt_sensitivity_graph), aes(x=Reduced, y=(SUM_TAIL-bias_median_grc)/(bias_median_per-bias_median_grc), color = Super_Population)) +
# geom_boxplot() +
# geom_point(position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.8)) +
# ylab("Gain of Personalization") + xlab("Method") + labs(color="Super Population") +
# theme_bw() +
# scale_y_continuous(sec.axis = sec_axis(~(.*(bias_median_per-bias_median_grc) + bias_median_grc), name = "Num. strongly biased sites")) +
# theme(legend.position = "bottom")
# p_summary_bias_graph
#
# p_summary_r2a_graph <- ggplot(df_summary_bias %>% filter(Reduced %in% filt_sensitivity_graph), aes(x=Reduced, y=(REF_TO_ALT-bias_r2a_median_grc)/(bias_r2a_median_per-bias_r2a_median_grc), color = Super_Population)) +
# geom_boxplot() +
# geom_point(position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.8)) +
# ylab("Gain of Personalization") + xlab("Method") + labs(color="Super Population") +
# theme_bw() +
# scale_y_continuous(sec.axis = sec_axis(~(.*(bias_r2a_median_per-bias_r2a_median_grc) + bias_r2a_median_grc), name = "REF to ALT ratio")) +
# theme(legend.position = "bottom")
# # theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "bottom")
# p_summary_r2a_graph
if (save == T){
ggsave("figures/revision2/fig_boxplot_onepass.pdf", width = 12, height = 6, plot=fig_sensitivity_onepass)
ggsave("figures/revision2/fig_boxplot_second_pass_vg.pdf", width = 12, height = 6, plot=fig_sensitivity_twopass)
ggsave("figures/revision2/fig_boxplot_graph.pdf", width = 12, height = 6, plot=fig_sensitivity_graph)
}
```



```{r randflow_ld_variant_af}
# Randflow-LD-merged
df_randflow_ld_variant <- read.delim("data/chr21-randflow_ld.allele_frequencies", header = F, col.names = "AF")
Expand Down Expand Up @@ -422,6 +495,13 @@ fig_unaligned_all <- ggplot(df_chr21_num_unaligned %>% filter(!Reduced %in% c("H
scale_y_continuous(sec.axis = sec_axis(~(./total), name = "Fraction of unaligned reads"))
fig_unaligned_all
med <- df_chr21_num_unaligned %>% group_by(Reduced) %>% summarise_at(vars(Unaligned), ~ median(., na.rm = TRUE))
num_unaligned_median_grc <- med[med$Reduced =="GRCh38",]$Unaligned
num_unaligned_median_per <- med[med$Reduced =="Personalized",]$Unaligned
sim21_num_unaligned <- tabular(Heading('Method') * factor(Reduced) ~
Justify(r) * Heading('Fraction of unaligned alignments') * (I(Unaligned) + I((Unaligned-num_unaligned_median_grc)/(num_unaligned_median_per-num_unaligned_median_grc))) * Format(sprintf("%0.6f")) * (mean + median + max + min), data=df_chr21_num_unaligned)
sim21_num_unaligned
save = F
if (save == T){
ggsave("figures/revision1/chr21_num_unaligned.pdf", fig_unaligned_all, width = 14, height = 5)
Expand Down Expand Up @@ -470,6 +550,22 @@ if (save == T){
}
```

```{r chr21_num_incorrect_personalization_as_lhs}
fig_incorrect_all <- ggplot(df_chr21_num_incorrect %>% filter(!Reduced %in% c("HISAT2-10%", "AMR", "AFR", "EAS", "EUR", "SAS", "RandFlow-26", "Personalized-2")),
aes(x=Reduced, y=(Num.Incorrect/total-num_incorrect_median_grc) / (num_incorrect_median_per - num_incorrect_median_grc), color = Super.Population)) +
geom_boxplot() +
ylab("Gain of Personalization") + xlab("Method") + labs(color="Super Population") +
theme_bw() +
theme(axis.title.x=element_text(vjust=0, hjust=0.45), legend.position = "bottom") +
scale_y_continuous(sec.axis = sec_axis(~(.*(num_incorrect_median_per - num_incorrect_median_grc)+num_incorrect_median_grc), name = "Fraction of incorrect alignments"))
fig_incorrect_all
save = F
if (save == T){
ggsave("figures/revision2/chr21_num_incorrect.pdf", fig_incorrect_all, width = 14, height = 5)
}
```


```{r whole_genome_num_unaligned}
num_reads_real <- 1436823773
Expand All @@ -483,7 +579,7 @@ fig_real_num_unaligned <- ggplot(df_real_num_unaligned, aes(x=Method, y=Num.Unal
geom_bar(stat="identity") +
labs(fill="Method", x="Method", y="Number of unaligned reads") +
scale_y_continuous(sec.axis = sec_axis(~(.)/(num_reads_real), name = "Fraction of unaligned reads")) +
coord_cartesian(ylim=c(50000000, 110000000)) +
coord_cartesian(ylim=c(0, 110000000)) +
theme_bw() + theme(legend.position = "none")
fig_real_num_unaligned
Expand Down Expand Up @@ -702,7 +798,6 @@ fig_rf26_combined_three_rows
total <- 2000000
save = F
# df_sensitivity_rep <- read.delim("data/map_accuracy/0906_100indiv.tsv", header = TRUE, sep = "\t")
df_sensitivity_rep <- read.delim("data/map_accuracy/0914_100indiv.tsv", header = TRUE, sep = "\t")
rand_trials <- c("chr21-randflow_ld-rand0", "chr21-randflow_ld-rand1", "chr21-randflow_ld-rand2",
Expand Down Expand Up @@ -752,6 +847,46 @@ if (save == T){
}
```

```{r using_26_population_refs_personalization_as_lhs}
fig_sensitivity_rf26 <- ggplot(df_sensitivity %>% filter(Reduced %in% filt_sensitivity_rf), aes(x=Reduced, y=(True.Positive/total - map_median_grc)/(map_median_per-map_median_grc), color = Super.Population)) +
geom_boxplot() +
ylab("Gain of Personalization") + xlab("Method") + labs(color="Super Population") +
theme_bw() +
theme(axis.title.x=element_text(vjust=0, hjust=0.45), legend.position = "bottom") +
scale_y_continuous(sec.axis = sec_axis(~(.*(map_median_per-map_median_grc)) + map_median_grc, name = "Sensitivity"))
fig_sensitivity_rf26
# p_summary_bias_rf26 <- ggplot(df_summary_bias %>% filter(Reduced %in% filt_sensitivity_rf), aes(x=Reduced, y=(SUM_TAIL-bias_median_grc)/(bias_median_per-bias_median_grc), color = Super_Population)) +
# geom_boxplot() +
# geom_point(position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.8)) +
# ylab("Gain of Personalization") + xlab("Method") + labs(color="Super Population") +
# theme_bw() +
# scale_y_continuous(sec.axis = sec_axis(~(.*(bias_median_per-bias_median_grc)+bias_median_grc), name = "Num. strongly biased sites")) +
# theme(legend.position = "bottom")
# p_summary_bias_rf26
#
# p_summary_r2a_rf26 <- ggplot(df_summary_bias %>% filter(Reduced %in% filt_sensitivity_rf), aes(x=Reduced, y=(REF_TO_ALT-bias_r2a_median_grc)/(bias_r2a_median_per-bias_r2a_median_grc), color = Super_Population)) +
# geom_boxplot() +
# geom_point(position = position_jitterdodge(jitter.width = 0.2, dodge.width = 0.8)) +
# ylab("Gain of Personalization") + xlab("Method") + labs(color="Super Population") +
# theme_bw() +
# scale_y_continuous(sec.axis = sec_axis(~(.*(bias_r2a_median_per-bias_r2a_median_grc) + bias_r2a_median_grc), name = "REF to ALT ratio")) +
# theme(legend.position = "bottom")
# p_summary_r2a_rf26
fig_rf26_combined_three_rows <- grid_arrange_shared_legend(
T, 3, 1,
fig_sensitivity_rf26 + labs(tag = "(a)") + xlab(""),
p_summary_bias_rf26 + labs(tag = "(b)") + xlab(""),
p_summary_r2a_rf26 + labs(tag = "(c)")
)
fig_rf26_combined_three_rows
if (save == T){
ggsave("figures/revision2/fig_boxplot_rf26.pdf", fig_rf26_combined_three_rows, width = 12, height = 12)
}
```

# Analysis of theoretical allelic distribution using error-free reads

Expand Down

0 comments on commit 8838c56

Please sign in to comment.