-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patho-visualize-qual-scores-along-columns.py
executable file
·87 lines (65 loc) · 2.71 KB
/
o-visualize-qual-scores-along-columns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2010 - 2012, A. Murat Eren
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# Please read the COPYING file.
#
# python $me fasta.fa quals_dict qual_stats_dict
#
# don't have them? quals_dict and qual_stats_dict are being generated by gen_dicts_for_qual_stats.py
#
import sys
import pickle
from scipy import log2 as log
import matplotlib.pyplot as plt
import Oligotyping.lib.fastalib as u
from Oligotyping.utils.utils import get_qual_stats_dict
from Oligotyping.utils.random_colors import get_list_of_colors
COLORS = {'A': 'red',
'T': 'blue',
'C': 'green',
'G': 'purple',
'N': 'white'}
alignment = u.SequenceSource(sys.argv[1])
quals_dict = pickle.load(open(sys.argv[2]))
quals_dict_filtered = {}
ids_in_alignment_file = []
while next(alignment):
ids_in_alignment_file.append(alignment.id)
ids_in_alignment_file = set(ids_in_alignment_file)
for read_id in quals_dict:
if read_id in ids_in_alignment_file:
quals_dict_filtered[read_id] = quals_dict[read_id]
ids_in_alignment_file.remove(read_id)
qual_stats_dict = get_qual_stats_dict(quals_dict_filtered)
colors = get_list_of_colors(21, colormap="RdYlGn")
colors = [colors[0] for _ in range(0, 20)] + colors
max_count = max([qual_stats_dict[q]['count'] for q in qual_stats_dict if qual_stats_dict[q]])
alignment_length = len(list(quals_dict.values())[0])
fig = plt.figure(figsize = (25, 8))
plt.rc('grid', color='0.50', linestyle='-', linewidth=0.1)
plt.grid(True)
plt.subplots_adjust(left=0.02, bottom = 0.09, top = 0.95, right = 0.98)
for position in range(0, alignment_length):
print(position)
if not qual_stats_dict[position]:
continue
column_q_list = [quals_dict[read_id][position] for read_id in quals_dict if quals_dict[read_id][position]]
if column_q_list:
b = plt.boxplot(column_q_list, positions=[position + 0.5], sym=',', widths=0.9)
plt.setp(b['medians'], color='red')
plt.setp(b['whiskers'], color='black', alpha=0.6)
plt.setp(b['boxes'], color='black', alpha=0.8)
plt.setp(b['caps'], color='black', alpha=0.6)
plt.setp(b['fliers'], color='#EEEEEE', alpha=0.01)
mean = int(round(qual_stats_dict[position]['mean']))
count = qual_stats_dict[position]['count']
plt.fill_between([position, position + 1], y1 = 0, y2 = 41, color = colors[mean], alpha = (log(count) / log(max_count)) / 5)
plt.ylim(ymin=0, ymax=41)
plt.xlim(xmin=0, xmax=alignment_length)
plt.show()