-
Notifications
You must be signed in to change notification settings - Fork 66
/
Copy pathrenumber_pdb_file.py
144 lines (106 loc) · 4.23 KB
/
renumber_pdb_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""renumber a pdb file
python renumber_pdb_file.py [OPTIONS] < in > out
-s, --sequence: renumber according to sequence (fasta format)
-c, --compress: compress gaps due to missing residues
-g, --replace_glycine: replace modified amino acids with glycine
-a, --chain: only take a specified chain
"""
import sys, os, getopt, re, string
import PdbTools
param_sequence = None
param_loglevel = 0
param_compress = None
param_glycine = None
param_chain = None
# FORMAT(6A1,I5,1X,A4,A1,A3,1X,A1,I4,A1,3X,3F8.3,2F6.2,1X,I3)
# Column Content
# 1-6 'ATOM' or 'HETATM'
# 7-11 Atom serial number (may have gaps)
# 13-16 Atom name, in IUPAC standard format
# 17 Alternate location indicator indicated by A, B or C
# 18-20 Residue name, in IUPAC standard format
# 23-26 Residue sequence number (ordered as below)
# 27 Code for insertions of residues (i.e. 66A & 66B)
# 31-38 X coordinate
# 39-46 Y coordinate
# 47-54 Z coordinate
# 55-60 Occupancy
# 61-66 Temperature factor
# 68-70 Footnote number
AMINOACIDS_TO_REPLACE = {
'KCX' : 1,
}
if __name__ == '__main__':
try:
optlist, args = getopt.getopt(sys.argv[1:],
"s:V:c:ga:",
["sequence=","Verbose=", "compress",
"replace_glycine", "chain="])
except getopt.error, msg:
print USAGE
sys.exit(2)
for o,a in optlist:
if o in ( "-s", "--sequence" ):
param_sequence = a
elif o in ("-V", "--Verbose"):
param_loglevel = string.atoi(a)
elif o in ("-g", "--replace_glycine"):
param_glycine = 1
elif o in ("-a", "--chain"):
if len(a) > 1:
if string.find(a, "-"):
a = string.split(a, "-")[1]
elif len(a) == 5:
a = a[4]
param_chain = a
if param_sequence:
param_sequence = re.sub("\s+", "", string.join(open(param_sequence, "r").readlines()[1:], ""))
last_chain = None
last_number = None
current_number = 1
current_atom_number = 1
while 1:
line = sys.stdin.readline()
if not line: break
if line[:6] not in ("ATOM ", "HETATM"):
if param_loglevel == 0:
print line[:-1]
continue
header = line[:6]
aminoacid = line[17:20]
number=line[22:26]
chain =line[21]
atomtype = string.strip(line[13:16])
## if param_loglevel >= 1:
## print "%s %s %i %s" % (aa, number, current_number, param_sequence[current_number-1])
if (last_chain != chain):
current_number = 1
else:
if last_number != number:
delta = string.atoi(number) - string.atoi(last_number)
if param_loglevel >= 2:
if delta > 1:
print "# gap between %s:%s and %s:%s %s" % (last_number, last_chain, number, chain, aminoacid )
if param_compress:
delta = 1
if param_sequence:
aa = PdbTools.TranslateAminoAcid( aminoacid )
if aa != param_sequence[current_number - 1]:
continue
current_number += delta
last_number = number
last_chain = chain
new_line = None
if param_glycine:
if aminoacid in AMINOACIDS_TO_REPLACE:
if atomtype in ['N', 'CA', 'C', 'O']:
header = "ATOM "
aminoacid = "GLY"
else:
continue
new_line = "%-6s" % header + "%5i " % current_atom_number + " %-4s" % atomtype + "%-3s" % aminoacid + " %s" % chain +\
"%4i" % current_number + line[26:-1]
current_atom_number += 1
if param_chain and chain != param_chain:
continue
print new_line