-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathplots.py
111 lines (100 loc) · 5.48 KB
/
plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import pydca.dca as _dca
import pydca.mycontact as _mc
import matplotlib.pyplot as _plt
import numpy as _np
def plot_contacts(dca_obj,n_pairs=None,lower_half=False,iseq=None,colormap=_plt.cm.CMRmap_r,binary=True,offset=0,score='DI'):
"""Prints the contact maps derived from a DCA object.
if iseq>0 will remap the indexes to the original aminoacids of the sequence;
if offset>0 will shift the index of the first aminoacid (use it to compare dca on different part of a sequence);
if you want to compare the contacts from two dca objects, just use
plot_contacts(dca_obj1)
plot_contacts(dca_obj2,lower_half=True)
score options:
'DI' (default) -> direct information as defined by Morcos et al., PNAS 2011
'CFN' -> Frobenius norm as defined by Ekerberg et al., PRE 2013
lower_half=True prints the contact map in the bottom-right triangle of the plot,
default prints it on top-left side
"""
### TODO: how can we change this to plot and compare two contact maps?
### is it better to do it inside the function or outside?
ix,iy,old_ix,old_iy=dca_obj.get_pair_idx(n_pairs=n_pairs,iseq=iseq,score=score)
matr=_np.zeros((dca_obj.alignment.N_orig[iseq],dca_obj.alignment.N_orig[iseq])) # matrix of zeros
if lower_half:
if binary:
matr[[ix,iy]]=1 # black and white plot
else:
# color based on the score
if score=='DI':
matr[[ix,iy]]=dca_obj.direct_information[[old_ix,old_iy]]
if score=='CFN':
matr[[ix,iy]]=dca_obj.CFN[[old_ix,old_iy]]
iny, inx = _np.indices(matr.shape)
my_mask=inx<=iny # This will fill only the desired half of the canvas
else:
if binary:
matr[[iy,ix]]=1 # black and white plot
else:
# color based on the score
if score=='DI':
matr[[iy,ix]]=dca_obj.direct_information[[old_iy,old_ix]]
if score=='CFN':
matr[[iy,ix]]=dca_obj.CFN[[old_iy,old_ix]]
iny, inx = _np.indices(matr.shape)
my_mask=inx>=iny # This will fill only the desired half of the canvas
# Now let's plot the contact map...
_plt.imshow(_np.ma.array(matr,mask=my_mask),cmap=colormap,origin='lower',\
extent=[offset,dca_obj.alignment.N_orig[iseq]+offset,\
offset,dca_obj.alignment.N_orig[iseq]+offset])
# ...and we draw a line on the diagonal, just for fun
_plt.plot(range(dca_obj.alignment.N_orig[iseq]),color='black')
return matr # return matrix of contacts if one wants to replot it differently
def scatter_contacts(dca_obj1,dca_obj2,n_pairs=(None,None),iseq=(0,0),score='DI'):
"""Another function to plot dca/pdb contacts. This uses a scatterplot and also compare the two contact maps and find the intersection
"""
### TODO: this can be modified to be used also with a contact map from a PDB structure.
ix,iy,old_ix,old_iy=dca_obj1.get_pair_idx(n_pairs=n_pairs[0],iseq=iseq[0],score=score)
idx1=_np.array((ix,iy)).T
ix,iy,old_ix,old_iy=dca_obj2.get_pair_idx(n_pairs=n_pairs[1],iseq=iseq[1],score=score)
idx2=_np.array((ix,iy)).T
idx_both=_np.array([x for x in set(tuple(x) for x in idx1) & set(tuple(x) for x in idx2)])
print('Number common contacts = %d'%(idx_both.shape[0]))
print('Fraction of common contacts = %.2f'%(idx_both.shape[0]/(len(idx1)+len(idx2))*2))
_plt.figure(figsize=(10,10))
_plt.scatter(idx1[:,0],idx1[:,1],alpha=0.99,s=10,c='cyan',marker='s',label='DCA 1')
_plt.scatter(idx2[:,1],idx2[:,0],alpha=0.99,s=10,c='green',marker='s',label='DCA 2')
_plt.scatter(idx_both[:,0],idx_both[:,1],alpha=0.99,s=18,c='red',label='common contacts')
_plt.scatter(idx_both[:,1],idx_both[:,0],alpha=0.99,s=18,c='red')
_plt.legend()
def plotDCAandPDB(dca_obj,pdb_obj,n_pairs=None,iseq=0,score='DI',cutoff=0.5,pdb_seq_subset=None):
"""Function to plot dca/pdb contacts. This uses a scatterplot and also compares the two contact maps and find the intersection.
"""
ix,iy,old_ix,old_iy=dca_obj.get_pair_idx(n_pairs=n_pairs,iseq=iseq,score=score)
idx1=_np.array((ix,iy)).T
ix,iy=pdb_obj.get_pair_idx(cutoff=cutoff)
if pdb_seq_subset != None:
# defining the remapping array
remapp=_np.ones(len(pdb_obj.seq),dtype=_np.int32)*-1
remapp[pdb_seq_subset]=_np.arange(pdb_seq_subset.shape[0],dtype=_np.int32)
# remapping indexes on the seq subset of interest
ix=remapp[ix]
iy=remapp[iy]
# removing negative indeces
iy=iy[ix>=0]
ix=ix[ix>=0]
ix=ix[iy>=0]
iy=iy[iy>=0]
else:
ix_new=ix
iy_new=iy
idx2=_np.array((ix,iy)).T
idx_both=_np.array([x for x in set(tuple(x) for x in idx1) & set(tuple(x) for x in idx2)])
### TODO: up to here this is the same as mycontacts.py->compareDCAandPDB, so maybe they can be merged together somehow...
print( 'Number common contacts = %d' % (idx_both.shape[0]) )
print( 'Common contacts / DCA pairs = %.2f' % \
(idx_both.shape[0]/len(idx1)) )
_plt.figure(figsize=(10,10))
_plt.scatter(idx1[:,0],idx1[:,1],alpha=0.99,s=10,c='cyan',marker='s',label='DCA')
_plt.scatter(idx2[:,1],idx2[:,0],alpha=0.99,s=10,c='green',marker='s',label='PDB')
_plt.scatter(idx_both[:,0],idx_both[:,1],alpha=0.99,s=18,c='red',label='common contacts')
_plt.scatter(idx_both[:,1],idx_both[:,0],alpha=0.99,s=18,c='red')
_plt.legend()