.Rhistory

# Filter the first ten mutations of each replicate and prepare a figure
gathered_data <- all_results_final_outcome %>% ungroup() %>%
group_by(Replicate, Complex, Conditions) %>%
mutate(mut_counter = row_number()) %>%
filter(mut_counter <= 10) %>%
gather(mut_eff_binding_energy_AA, mut_eff_binding_energy_AB, mut_eff_binding_energy_BB,
mut_eff_stab_A, mut_eff_stab_B, key = Mut_eff_type, value = Mut_eff_value)
p <- gathered_data %>%
filter(Mut_eff_value != 0) %>%
ggplot(aes(x = outcome, y = Mut_eff_value, fill = Mut_eff_type)) +
geom_boxplot(outlier.shape = NA) +
facet_grid(Conditions ~ Complex) +
labs(fill = '') +
ylim(-2.5, 2.5) +
theme(axis.text.x = element_text(angle = 90, size = 10))
p
ggsave(plot = p, device = cairo_pdf, width = 21, height = 14, dpi = 300,
filename = '/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Figures/2021-08-19_First_mutations/1.First_10_mut_eff.pdf')
gathered_data <- all_results_final_outcome %>% ungroup() %>%
filter(Conditions == 'aff = -10, stab = -5') %>%
mutate(total_complexes = cAA + cAB + cBB + cA + cB) %>%
mutate(pct_HET = (cAB * 100)/ total_complexes) %>%
group_by(Replicate, Complex, Conditions) %>%
gather(mut_eff_binding_energy_AA, mut_eff_binding_energy_AB, mut_eff_binding_energy_BB,
mut_eff_stab_A, mut_eff_stab_B, key = Mut_eff_type, value = Mut_eff_value)
p <- gathered_data %>% ungroup() %>%
filter(Mut_eff_value != 0) %>%
select(outcome, Mut_eff_type, Mut_eff_value, Complex) %>%
ggplot(aes(x = outcome, y = Mut_eff_value, fill = Mut_eff_type)) +
facet_wrap(~Complex, nrow = 8) +
geom_boxplot(outlier.shape = NA) +
theme(axis.text.x = element_text(angle = 90)) +
ylim(-2.5, 2.5)
p
# An alternative view in which I don't separate by outcome
p <- gathered_data %>% ungroup() %>%
filter(Mut_eff_value != 0) %>%
select(outcome, Mut_eff_type, Mut_eff_value, Complex) %>%
ggplot(aes(x = Mut_eff_type, y = Mut_eff_value)) +
facet_wrap(~Complex, nrow = 8) +
geom_boxplot(outlier.shape = NA) +
theme(axis.text.x = element_text(angle = 90)) +
ylim(-2.5, 2.5)
p
# Get the summary for the fixed mutations
gathered_data_summary <- gathered_data %>% ungroup() %>%
filter(Mut_eff_value != 0) %>%
group_by(Complex, Mut_eff_type) %>%
summarise(mean_effect = mean(Mut_eff_value),
pct75_effect = quantile(Mut_eff_value, probs = 0.75),
pct25_effect = quantile(Mut_eff_value, probs = 0.25))
# Get the percentages of HET at the end of the simulation
gathered_pct_final <- gathered_data %>% ungroup() %>%
filter(fixed_mut == 200) %>%
group_by(Complex) %>%
# summarise(mean_pct_HET = mean(pct_HET))
summarise(median_pct_HET = median(pct_HET))
# Use an inner join
final_data <- inner_join(x = gathered_data_summary, y = gathered_pct_final,
by = c('Complex' = 'Complex'))
# Do some plots
p <- final_data %>%
filter(Mut_eff_type == 'mut_eff_binding_energy_AA') %>%
ggplot(aes(x = mean_effect, y = mean_pct_HET)) +
geom_point() +
xlab('Mean effect on binding energy of AA') + ylab('Mean percentage of HET (%)') +
stat_smooth(method = 'lm') +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.01)
p
# For the second homomer
p <- final_data %>%
filter(Mut_eff_type == 'mut_eff_binding_energy_BB') %>%
ggplot(aes(x = mean_effect, y = mean_pct_HET)) +
geom_point() +
xlab('Mean effect on binding energy of BB') + ylab('Mean percentage of HET (%)') +
stat_smooth(method = 'lm') +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.01)
p
# Same but for HET
p <- final_data %>%
filter(Mut_eff_type == 'mut_eff_binding_energy_AB') %>%
ggplot(aes(x = mean_effect, y = mean_pct_HET)) +
geom_point() +
xlab('Mean effect on binding energy of AB') + ylab('Mean percentage of HET (%)') +
stat_smooth(method = 'lm') +
stat_cor(p.accuracy = 0.001, r.accuracy = 0.01)
p
fixed_effects_final <- final_data %>% ungroup() %>%
select(-pct75_effect, -pct25_effect) %>%
filter(Mut_eff_type %in% c('mut_eff_binding_energy_AA', 'mut_eff_binding_energy_AB')) %>%
# spread(key = Mut_eff_type, value = mean_effect)
pivot_wider(names_from = Mut_eff_type, values_from = mean_effect) %>%
mutate(outcome = ifelse(median_pct_HET < 30, 'Mostly HM',
ifelse(median_pct_HET < 70, 'Both HM and HET',
'Mostly HET'))) %>%
mutate(outcome = factor(outcome, levels = c('Mostly HET', 'Both HM and HET', 'Mostly HM')))
p <- fixed_effects_final %>%
ggplot(aes(x = mut_eff_binding_energy_AA, y = mut_eff_binding_energy_AB, colour = outcome)) +
geom_point() +
geom_abline(slope = 1, intercept = 0, linetype = 'dashed') +
xlab('Mean effect on binding energy of AA') +
ylab('Mean effect on binding energy of AB')
p
# Get the summary for the fixed mutations
gathered_data_summary <- gathered_data %>% ungroup() %>%
filter(Mut_eff_value != 0) %>%
group_by(Complex, Replicate, Mut_eff_type) %>%
summarise(mean_effect = mean(Mut_eff_value),
pct75_effect = quantile(Mut_eff_value, probs = 0.75),
pct25_effect = quantile(Mut_eff_value, probs = 0.25))
# Get the percentages of HET at the end of the simulation
gathered_pct_final <- gathered_data %>% ungroup() %>%
filter(fixed_mut == 200) %>%
group_by(Complex, Replicate) %>%
# summarise(mean_pct_HET = mean(pct_HET))
summarise(median_pct_HET = median(pct_HET))
# Use an inner join
final_data <- inner_join(x = gathered_data_summary, y = gathered_pct_final,
by = c('Complex' = 'Complex', 'Replicate' = 'Replicate'))
# Format the data to make it easier to plot
fixed_effects_final <- final_data %>% ungroup() %>%
select(-pct75_effect, -pct25_effect) %>%
filter(Mut_eff_type %in% c('mut_eff_binding_energy_AA', 'mut_eff_binding_energy_AB')) %>%
# spread(key = Mut_eff_type, value = mean_effect)
pivot_wider(names_from = Mut_eff_type, values_from = mean_effect) %>%
mutate(outcome = ifelse(median_pct_HET < 30, 'Mostly HM',
ifelse(median_pct_HET < 70, 'Both HM and HET',
'Mostly HET'))) %>%
mutate(outcome = factor(outcome, levels = c('Mostly HET', 'Both HM and HET', 'Mostly HM')))
# Do the plots, one with the discrete classification, another trying to see if there is a gradient
# Deal with outliers
p <- fixed_effects_final %>%
mutate(
bool_shape = ifelse(or(mut_eff_binding_energy_AA > 1,
or(mut_eff_binding_energy_AA < -0.75,
or(mut_eff_binding_energy_AB > 1,
mut_eff_binding_energy_AB < -0.75))),
1, 0)
) %>%
mutate(mut_eff_binding_energy_AA = ifelse(mut_eff_binding_energy_AA > 1, 1,
ifelse(mut_eff_binding_energy_AA < -0.75, -0.75,
mut_eff_binding_energy_AA)),
mut_eff_binding_energy_AB = ifelse(mut_eff_binding_energy_AB > 1, 1,
ifelse(mut_eff_binding_energy_AB < -0.75, -0.75,
mut_eff_binding_energy_AB))
) %>%
ggplot(aes(x = mut_eff_binding_energy_AA, y = mut_eff_binding_energy_AB, colour = outcome)) +
geom_point(aes(shape = as.factor(bool_shape))) +
scale_shape_discrete(guide = FALSE) +
geom_abline(slope = 1, intercept = 0, linetype = 'dashed') +
xlab('Mean effect on binding energy of AA') +
ylab('Mean effect on binding energy of AB') +
xlim(-0.75, 1) + ylim(-0.75, 1)
p
# Repeat but use the actual percentage of HET, not the discrete outcome
p <- fixed_effects_final %>%
mutate(
bool_shape = ifelse(or(mut_eff_binding_energy_AA > 1,
or(mut_eff_binding_energy_AA < -0.75,
or(mut_eff_binding_energy_AB > 1,
mut_eff_binding_energy_AB < -0.75))),
1, 0)
) %>%
mutate(mut_eff_binding_energy_AA = ifelse(mut_eff_binding_energy_AA > 1, 1,
ifelse(mut_eff_binding_energy_AA < -0.75, -0.75,
mut_eff_binding_energy_AA)),
mut_eff_binding_energy_AB = ifelse(mut_eff_binding_energy_AB > 1, 1,
ifelse(mut_eff_binding_energy_AB < -0.75, -0.75,
mut_eff_binding_energy_AB))
) %>%
ggplot(aes(x = mut_eff_binding_energy_AA, y = mut_eff_binding_energy_AB, colour = median_pct_HET)) +
geom_point(aes(shape = as.factor(bool_shape)), alpha = 0.5) +
scale_shape_discrete(guide = FALSE) +
scale_colour_viridis_c() +
geom_abline(slope = 1, intercept = 0, linetype = 'dashed') +
xlab('Mean effect on binding energy of AA') +
ylab('Mean effect on binding energy of AB') +
xlim(-0.75, 1) + ylim(-0.75, 1) +
labs(colour = 'Final %HET')
p
p <- fixed_effects_final %>%
mutate(ratio_HET_HM = mut_eff_binding_energy_AB / mut_eff_binding_energy_AA) %>%
ggplot(aes(x = outcome, y = ratio_HET_HM)) +
geom_violin() +
geom_boxplot(outlier.shape = NA, alpha = 0.5) +
geom_point(position = position_jitter(width = 0.3), alpha = 0.2) +
ylim(-1, 2) +
xlab('Outcome') +
ylab('Ratio of fixed mutational effects (HET/HM)')
p
# Load libraries
library(ggplot2)
library(tidyverse)
library(magrittr)
library(cowplot)
library(Cairo)
theme_set(theme_cowplot())
# Prepare a table of amino acid names
aa_three2one <- data.frame(cbind(c('A', 'R', 'D', 'N', 'C',
'E', 'Q', 'G', 'H', 'I',
'L', 'K', 'M', 'F', 'P',
'S', 'T', 'W', 'Y', 'V'),
c('ALA', 'ARG', 'ASN', 'ASP', 'CYS',
'GLU', 'GLN', 'GLY', 'HIS', 'ILE',
'LEU', 'LYS', 'MET', 'PHE', 'PRO',
'SER', 'THR', 'TRP', 'TYR', 'VAL')))
colnames(aa_three2one) <- c('One-letter', 'Three-letter')
# Fit a linear model to see the slope that best fits the data
lm_eqn <- function(df){
y <- df$Mean_ddG_int_HM
x <- df$Mean_ddG_int_HET
m <- lm(y ~ x, df)
eq <- substitute(bold(bolditalic(y) == a + b %.% bolditalic(x)*","~~bolditalic(R)^2~"="~r2),
list(a = format(unname(coef(m)[1]), digits = 2),
b = format(unname(coef(m)[2]), digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
return(list(as.character(as.expression(eq)), coef(m)[1], coef(m)[2]))
}
# 1M38
final_mat_1m38 <- read_delim('/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Data/Example_data_1m38/final_mat_1m38.txt', delim = '\t')
lm_1m38 <- lm_eqn(final_mat_1m38)
p_affinity_1m38 <- final_mat_1m38 %>%
ggplot(aes(x = Mean_ddG_int_HET, y = Mean_ddG_int_HM)) +
stat_binhex(aes(fill=log2(..count..))) +
scale_fill_gradient2(limits = c(0, 12.5), low = '#2c7bb6', high = '#d7191c', mid = '#ffffbf') +
xlab('ddG binding energy (HET)') + ylab('ddG binding energy (HM)') +
geom_abline(slope = lm_1m38[[3]], intercept = lm_1m38[[2]], linetype = 'dashed') +
annotate(geom = 'text', label = lm_1m38[[1]], x = Inf, y = -Inf , hjust = 1.1, vjust = -0.5,
parse = TRUE, colour = 'black', size = 6) +
ggtitle('1M38') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
panel.background = element_rect(fill = 'grey50'),
plot.title = element_text(hjust = 0.5, size = 26))
p_affinity_1m38
# 4FGW
final_mat_4fgw <- read_delim('/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Data/Example_data_1m38/final_mat_4fgw.txt', delim = '\t')
lm_4fgw <- lm_eqn(final_mat_4fgw)
p_affinity_4fgw <- final_mat_4fgw %>%
ggplot(aes(x = Mean_ddG_int_HET, y = Mean_ddG_int_HM)) +
stat_binhex(aes(fill=log2(..count..))) +
scale_fill_gradient2(limits = c(0, 12.5), low = '#2c7bb6', high = '#d7191c', mid = '#ffffbf') +
xlab('ddG binding energy (HET)') + ylab('ddG binding energy (HM)') +
geom_abline(slope = lm_4fgw[[3]], intercept = lm_4fgw[[2]], linetype = 'dashed') +
annotate(geom = 'text', label = lm_4fgw[[1]], x = Inf, y = -Inf , hjust = 1.1, vjust = -0.5,
parse = TRUE, colour = 'black', size = 6) +
ggtitle('4FGW') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
panel.background = element_rect(fill = 'grey50'),
plot.title = element_text(hjust = 0.5, size = 26))
p_affinity_4fgw
# 1A72
final_mat_1a72 <- read_delim('/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Results_organized/007_final_matrices/final_mat_1a72.txt', delim = '\t')
lm_1a72 <- lm_eqn(final_mat_1a72)
p_affinity_1a72 <- final_mat_1a72 %>%
ggplot(aes(x = Mean_ddG_int_HET, y = Mean_ddG_int_HM)) +
stat_binhex(aes(fill=log2(..count..))) +
scale_fill_gradient2(limits = c(0, 12.5), low = '#2c7bb6', high = '#d7191c', mid = '#ffffbf') +
xlab('ddG binding energy (HET)') + ylab('ddG binding energy (HM)') +
geom_abline(slope = lm_1a72[[3]], intercept = lm_1a72[[2]], linetype = 'dashed') +
annotate(geom = 'text', label = lm_1a72[[1]], x = Inf, y = -Inf , hjust = 1.1, vjust = -0.5,
parse = TRUE, colour = 'black', size = 6) +
ggtitle('1A72') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
panel.background = element_rect(fill = 'grey50'),
plot.title = element_text(hjust = 0.5, size = 26))
p_affinity_1a72
# 1AI2
final_mat_1ai2 <- read_delim('/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Results_organized/007_final_matrices/final_mat_1ai2.txt', delim = '\t')
lm_1ai2 <- lm_eqn(final_mat_1ai2)
p_affinity_1ai2 <- final_mat_1ai2 %>%
ggplot(aes(x = Mean_ddG_int_HET, y = Mean_ddG_int_HM)) +
stat_binhex(aes(fill=log2(..count..))) +
scale_fill_gradient2(limits = c(0, 12.5), low = '#2c7bb6', high = '#d7191c', mid = '#ffffbf') +
xlab('ddG binding energy (HET)') + ylab('ddG binding energy (HM)') +
geom_abline(slope = lm_1ai2[[3]], intercept = lm_1ai2[[2]], linetype = 'dashed') +
annotate(geom = 'text', label = lm_1ai2[[1]], x = Inf, y = -Inf , hjust = 1.1, vjust = -0.5,
parse = TRUE, colour = 'black', size = 6) +
ggtitle('1AI2') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
panel.background = element_rect(fill = 'grey50'),
plot.title = element_text(hjust = 0.5, size = 26))
p_affinity_1ai2
# 1HPS
final_mat_1hps <- read_delim('/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Results_organized/007_final_matrices/final_mat_1hps.txt', delim = '\t')
lm_1hps <- lm_eqn(final_mat_1hps)
p_affinity_1hps <- final_mat_1hps %>%
ggplot(aes(x = Mean_ddG_int_HET, y = Mean_ddG_int_HM)) +
stat_binhex(aes(fill=log2(..count..))) +
scale_fill_gradient2(limits = c(0, 12.5), low = '#2c7bb6', high = '#d7191c', mid = '#ffffbf') +
xlab('ddG binding energy (HET)') + ylab('ddG binding energy (HM)') +
geom_abline(slope = lm_1hps[[3]], intercept = lm_1hps[[2]], linetype = 'dashed') +
annotate(geom = 'text', label = lm_1hps[[1]], x = Inf, y = -Inf , hjust = 1.1, vjust = -0.5,
parse = TRUE, colour = 'black', size = 6) +
ggtitle('1HPS') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
panel.background = element_rect(fill = 'grey50'),
plot.title = element_text(hjust = 0.5, size = 26))
p_affinity_1hps
# 1P6O
final_mat_1p6o <- read_delim('/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Data/Example_data_1m38/final_mat_1p6o.txt', delim = '\t')
lm_1p6o <- lm_eqn(final_mat_1p6o)
p_affinity_1p6o <- final_mat_1p6o %>%
ggplot(aes(x = Mean_ddG_int_HET, y = Mean_ddG_int_HM)) +
stat_binhex(aes(fill=log2(..count..))) +
scale_fill_gradient2(limits = c(0, 12.5), low = '#2c7bb6', high = '#d7191c', mid = '#ffffbf') +
xlab('ddG binding energy (HET)') + ylab('ddG binding energy (HM)') +
geom_abline(slope = lm_1p6o[[3]], intercept = lm_1p6o[[2]], linetype = 'dashed') +
annotate(geom = 'text', label = lm_1p6o[[1]], x = Inf, y = -Inf , hjust = 1.1, vjust = -0.5,
parse = TRUE, colour = 'black', size = 6) +
ggtitle('1P6O') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
panel.background = element_rect(fill = 'grey50'),
plot.title = element_text(hjust = 0.5, size = 26))
p_affinity_1p6o
# 2P09
final_mat_2p09 <- read_delim('/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Results_organized/007_final_matrices/final_mat_2p09.txt', delim = '\t')
lm_2p09 <- lm_eqn(final_mat_2p09)
p_affinity_2p09 <- final_mat_2p09 %>%
ggplot(aes(x = Mean_ddG_int_HET, y = Mean_ddG_int_HM)) +
stat_binhex(aes(fill=log2(..count..))) +
scale_fill_gradient2(limits = c(0, 12.5), low = '#2c7bb6', high = '#d7191c', mid = '#ffffbf') +
xlab('ddG binding energy (HET)') + ylab('ddG binding energy (HM)') +
geom_abline(slope = lm_2p09[[3]], intercept = lm_2p09[[2]], linetype = 'dashed') +
annotate(geom = 'text', label = lm_2p09[[1]], x = Inf, y = -Inf , hjust = 1.1, vjust = -0.5,
parse = TRUE, colour = 'black', size = 6) +
ggtitle('2P09') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
panel.background = element_rect(fill = 'grey50'),
plot.title = element_text(hjust = 0.5, size = 26))
p_affinity_2p09
# 4FGW
final_mat_4fgw <- read_delim('/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Data/Example_data_1m38/final_mat_4fgw.txt', delim = '\t')
lm_4fgw <- lm_eqn(final_mat_4fgw)
p_affinity_4fgw <- final_mat_4fgw %>%
ggplot(aes(x = Mean_ddG_int_HET, y = Mean_ddG_int_HM)) +
stat_binhex(aes(fill=log2(..count..))) +
scale_fill_gradient2(limits = c(0, 12.5), low = '#2c7bb6', high = '#d7191c', mid = '#ffffbf') +
xlab('ddG binding energy (HET)') + ylab('ddG binding energy (HM)') +
geom_abline(slope = lm_4fgw[[3]], intercept = lm_4fgw[[2]], linetype = 'dashed') +
annotate(geom = 'text', label = lm_4fgw[[1]], x = Inf, y = -Inf , hjust = 1.1, vjust = -0.5,
parse = TRUE, colour = 'black', size = 6) +
ggtitle('4FGW') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
panel.background = element_rect(fill = 'grey50'),
plot.title = element_text(hjust = 0.5, size = 26))
p_affinity_4fgw
# 4RFP
final_mat_4rfp <- read_delim('/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Results_organized/007_final_matrices/final_mat_4rfp.txt', delim = '\t')
lm_4rfp <- lm_eqn(final_mat_4rfp)
p_affinity_4rfp <- final_mat_4rfp %>%
ggplot(aes(x = Mean_ddG_int_HET, y = Mean_ddG_int_HM)) +
stat_binhex(aes(fill=log2(..count..))) +
scale_fill_gradient2(limits = c(0, 12.5), low = '#2c7bb6', high = '#d7191c', mid = '#ffffbf') +
xlab('ddG binding energy (HET)') + ylab('ddG binding energy (HM)') +
geom_abline(slope = lm_4rfp[[3]], intercept = lm_4rfp[[2]], linetype = 'dashed') +
annotate(geom = 'text', label = lm_4rfp[[1]], x = Inf, y = -Inf , hjust = 1.1, vjust = -0.5,
parse = TRUE, colour = 'black', size = 6) +
ggtitle('4RFP') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
panel.background = element_rect(fill = 'grey50'),
plot.title = element_text(hjust = 0.5, size = 26))
p_affinity_4rfp
# 4Z5Z
final_mat_4z5z <- read_delim('/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Results_organized/007_final_matrices/final_mat_4z5z.txt', delim = '\t')
lm_4z5z <- lm_eqn(final_mat_4z5z)
p_affinity_4z5z <- final_mat_4z5z %>%
ggplot(aes(x = Mean_ddG_int_HET, y = Mean_ddG_int_HM)) +
stat_binhex(aes(fill=log2(..count..))) +
scale_fill_gradient2(limits = c(0, 12.5), low = '#2c7bb6', high = '#d7191c', mid = '#ffffbf') +
xlab('ddG binding energy (HET)') + ylab('ddG binding energy (HM)') +
geom_abline(slope = lm_4z5z[[3]], intercept = lm_4z5z[[2]], linetype = 'dashed') +
annotate(geom = 'text', label = lm_4z5z[[1]], x = Inf, y = -Inf , hjust = 1.1, vjust = -0.5,
parse = TRUE, colour = 'black', size = 6) +
ggtitle('4Z5Z') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
panel.background = element_rect(fill = 'grey50'),
plot.title = element_text(hjust = 0.5, size = 26))
p_affinity_4z5z
# 5RFD
final_mat_5rfd <- read_delim('/home/angelfcc/Documents/PhD_projects/Homomer_duplication/Results_organized/007_final_matrices/final_mat_5rfd.txt', delim = '\t')
lm_5rfd <- lm_eqn(final_mat_5rfd)
p_affinity_5rfd <- final_mat_5rfd %>%
ggplot(aes(x = Mean_ddG_int_HET, y = Mean_ddG_int_HM)) +
stat_binhex(aes(fill=log2(..count..))) +
scale_fill_gradient2(limits = c(0, 12.5), low = '#2c7bb6', high = '#d7191c', mid = '#ffffbf') +
xlab('ddG binding energy (HET)') + ylab('ddG binding energy (HM)') +
geom_abline(slope = lm_5rfd[[3]], intercept = lm_5rfd[[2]], linetype = 'dashed') +
annotate(geom = 'text', label = lm_5rfd[[1]], x = Inf, y = -Inf , hjust = 1.1, vjust = -0.5,
parse = TRUE, colour = 'black', size = 6) +
ggtitle('5RFD') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
panel.background = element_rect(fill = 'grey50'),
plot.title = element_text(hjust = 0.5, size = 26))
p_affinity_5rfd
# Show all the figures together
# Order them according to the [HET] / [most abundant HM] ratio, see variable in 009.2 script:
# all_data_summary_final_points$Complex
p_affinity_all <- plot_grid(p_affinity_5rfd, p_affinity_2p09, p_affinity_4rfp, p_affinity_4fgw, p_affinity_1ai2,
p_affinity_4z5z, p_affinity_1p6o, p_affinity_1a72, p_affinity_1m38, p_affinity_1hps,
ncol = 5, nrow = 2)
# 1A72
p_stab_1a72 <- final_mat_1a72 %>%
ggplot(aes(x = Mean_ddG_stab_HET)) +
geom_histogram(binwidth = 0.5) +
ggtitle('1A72') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
plot.title = element_text(hjust = 0.5, size = 26)) +
ylab('Mutation count') + xlab('ddG stability') +
xlim(-5, 40)
p_stab_1a72
# 1AI2
p_stab_1ai2 <- final_mat_1ai2 %>%
ggplot(aes(x = Mean_ddG_stab_HET)) +
geom_histogram(binwidth = 0.5) +
ggtitle('1AI2') +
theme(legend.position = 'right',
axis.text = element_text(size = 22),
axis.title = element_text(size = 26),
plot.title = element_text(hjust = 0.5, size = 26)) +
ylab('Mutation count') + xlab('ddG stability') +
xlim(-5, 40)
p_stab_1ai2
## Set path to home folder (DfrB1_DMS_2022)
# setwd('/path/to/DfrB1_DMS_2022/')
# setwd('/media/axelle/afe8c733-963d-4db8-a2ee-551a0b73c9d7/Angel/PhD_projects/R67_DMS_December2020/DfrB1_DMS_2022')
setwd('/home/angelfcc/Documents/PhD_projects/R67_DMS_December2020/Github_repos/DfrB1_DMS_2022/')
metadata <- read.xlsx(
'Data/SuppTables_ManuscriptDfrB1.xlsx',
sheetName = 'TableS2_DMS_sample_description',
# rowIndex = 1:89)
rowIndex = 2:78)
### The 001_sequencing_data_processing must have been run before to load the data
codon_file_list <- list.files(
# 'Data/Analysis_NovaSeq/read_abundances/Codons/',
'Data/Analysis_NovaSeq/aggregate_dataframes/Codons/',
include.dirs = F, full.names = T)
# Define the genetic code
codons <- c(
'ATA', 'ATC', 'ATT', 'ATG',
'ACA', 'ACC', 'ACG', 'ACT',
'AAC', 'AAT', 'AAA', 'AAG',
'AGC', 'AGT', 'AGA', 'AGG',
'CTA', 'CTC', 'CTG', 'CTT',
'CCA', 'CCC', 'CCG', 'CCT',
'CAC', 'CAT', 'CAA', 'CAG',
'CGA', 'CGC', 'CGG', 'CGT',
'GTA', 'GTC', 'GTG', 'GTT',
'GCA', 'GCC', 'GCG', 'GCT',
'GAC', 'GAT', 'GAA', 'GAG',
'GGA', 'GGC', 'GGG', 'GGT',
'TCA', 'TCC', 'TCG', 'TCT',
'TTC', 'TTT', 'TTA', 'TTG',
'TAC', 'TAT', 'TAA', 'TAG',
'TGC', 'TGT', 'TGA', 'TGG'
)
residues <- c(
'I', 'I', 'I', 'M',
'T', 'T', 'T', 'T',
'N', 'N', 'K', 'K',
'S', 'S', 'R', 'R',
'L', 'L', 'L', 'L',
'P', 'P', 'P', 'P',
'H', 'H', 'Q', 'Q',
'R', 'R', 'R', 'R',
'V', 'V', 'V', 'V',
'A', 'A', 'A', 'A',
'D', 'D', 'E', 'E',
'G', 'G', 'G', 'G',
'S', 'S', 'S', 'S',
'F', 'F', 'L', 'L',
'Y', 'Y', '*', '*',
'C', 'C', '*', 'W'
)
genetic_code <- data.frame(Codons = codons, Encoded_residues = residues)
all_codon_data <- c()
for(infile in codon_file_list){
## Extract the sample ID from the name
sample_id <- str_split(string = basename(infile), pattern = '_')[[1]][4]
# Read the file
codon_df <- read_delim(delim = '\t', col_names = T, file = infile)
colnames(codon_df)[1] <- 'Codon'
# Transform into a tidy formatted df and add the pool number
new_codon_df <- codon_df %>% gather(-Codon, key = Position, value = read_abundance)
new_codon_df %<>% mutate(Sample = sample_id)
all_codon_data <- rbind(all_codon_data, new_codon_df)
}
all_codon_data$Position <- as.numeric(all_codon_data$Position)
all_codon_data$Sample <- as.numeric(all_codon_data$Sample)