-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprotein.py
248 lines (193 loc) · 10.5 KB
/
protein.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import os
import universe, utils, topol
class Residue: # Stores a single residue's data.
def __init__(self, atoms, ali, resname, chain, resid, x, y, z):
self.d_atoms = atoms # list holds atom names
self.d_ali = ali # list holds alternative loc. ind.
self.d_resname = resname # string holds residue name
self.d_chain = chain # string holds chain name (A, B)
self.d_resid = resid # int holds residue number
self.d_x = x # list holds x-coordiantes
self.d_y = y # list holds y-coordinates
self.d_z = z # list holds z-coordinatees
def process(fname, d_model=1, d_ALI='A', d_chain=[], resetResId=False):
basename = os.path.basename(fname)
universe.add('d_pdbName', basename[0:len(basename)-4])
universe.add('d_model', d_model)
universe.add('d_ALI', d_ALI)
load(fname, d_model, d_ALI, d_chain)
# Update d_chain from [] to a list of actual chains used:
d_chain = []
for residue in universe.get('d_residues'):
d_chain.append(residue.d_chain)
d_chain = list(set(d_chain))
d_chain.sort()
universe.add('d_chain', d_chain)
utils.update("process", 'file={0}, MODEL#={1}, ALI={2}, chain(s)={3}...'.format(fname, d_model, d_ALI, d_chain))
# Write processed.pdb to file:
utils.update("process", "writing processed .pdb file to {}_PR1.pdb...".format(universe.get('d_pdbName')))
write("{0}_PR1.pdb".format(universe.get('d_pdbName')))
# Load a .pdb file into the universe.
def load(fname, d_model=1, d_ALI='A', d_chain=[]):
d_residues = []
atomLines = []
with open(fname) as file: # Read .pdb line-by-line.
read = True # True if no specific MODEL specified.
for line in file.readlines():
# This is to make sure we only import the specified MODEL.
if ((line[0:6]) == "MODEL "):
if ("MODEL {:8d}".format(d_model) in line):
read = True
else:
read = False
# Get title.
if ((line[0:6]) == "TITLE "):
d_title = line[7:80].rstrip(); universe.add('d_title', d_title)
# Get periodic box information (if any).
if ((line[0:6]) == "CRYST1"):
d_box = line[7:80].rstrip(); universe.add('d_box', d_box)
# if our line specifies an ATOM,
if (line[0:6] == "ATOM "):
# and we are currently reading the correct MODEL,
if (read == True):
# and our line contains the correct specified alternate
# location specifier (if any at all),
if (line[16:17] in [d_ALI, " "]):
# and we want all chains,
if (d_chain == []):
# then load the atom.
atomLines.append(line)
# or if we want a selection of chains,
elif (line[21:22] in d_chain):
# load that selection.
atomLines.append(line)
# Add one line of padding to prevent IndexError
atomLines.append("0000000000000000000000000000000000000000000000000000")
# Loop through lines and create list of Residue objects.
atoms = []; ali = []; xCoord = []; yCoord = []; zCoord = []
for idx in range(0, len(atomLines) - 1):
atoms.append(atomLines[idx][12:16])
ali.append(atomLines[idx][16:17])
xCoord.append(float(atomLines[idx][30:38]))
yCoord.append(float(atomLines[idx][38:46]))
zCoord.append(float(atomLines[idx][46:54]))
# If the resid of the next line is different, we are at end
if (atomLines[idx][22:26] != atomLines[idx + 1][22:26]):
# put the data in a Residue object and append to d_residues:
d_residues.append(
Residue
(
atoms,
ali,
atomLines[idx][17:20],
atomLines[idx][21:22],
int(atomLines[idx][22:26]),
xCoord,
yCoord,
zCoord
))
# Reset.
atoms = []; ali = []; xCoord = []; yCoord = []; zCoord = []
universe.add('d_residues', d_residues)
# Write (modified) .pdb file.
def write(name):
with open(name, 'w') as file:
if universe.has('d_title'):
file.write("TITLE {0}\n".format(universe.get('d_title')))
if universe.has('d_box'):
file.write("CRYST1{0}\n".format(universe.get('d_box')))
file.write("MODEL {:8d}\n".format(universe.get('d_model')))
atomNumber = 1
for residue in universe.get('d_residues'):
for idx in range(0, len(residue.d_atoms)):
file.write("{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}{:1s} {:8.3f}{:8.3f}{:8.3f}\n".format('ATOM', atomNumber, residue.d_atoms[idx], residue.d_ali[idx], residue.d_resname, residue.d_chain, residue.d_resid, '', residue.d_x[idx], residue.d_y[idx], residue.d_z[idx]))
atomNumber += 1
file.write("TER\nENDMDL\n")
utils.add_to_nameList(name)
# Count number of residues with a specific residue name.
def countRes(resName):
count = 0
for residue in universe.get('d_residues'):
if (residue.d_resname == resName):
count += 1
return count
def add_box(d_boxMargin, d_boxType='cubic'):
utils.update("add_box", "adding box using gmx editconf (boxMargin = {0}, boxType = {1})...".format(d_boxMargin, d_boxType))
os.system("gmx editconf -f {0} -o {1}_BOX.pdb -d {2} -bt {3} >> builder.log 2>&1".format(universe.get('d_nameList')[-1], universe.get('d_pdbName'), d_boxMargin, d_boxType))
# To set d_boxMargin and d_boxType.
universe.add('d_boxMargin', d_boxMargin)
universe.add('d_boxType', d_boxType)
# To update d_box.
load("{0}_BOX.pdb".format(universe.get('d_pdbName')))
# To update d_nameList.
utils.add_to_nameList("{0}_BOX.pdb".format(universe.get('d_pdbName')))
def add_buffer(ph_bufpdbName, ph_bufitpName, ph_bufMargin=2.0, ph_bufnmol=-1, attempts=100000):
if not (universe.get('ph_constantpH') and universe.get('ph_restrainpH')):
utils.update("add_buffer", "either ph_constantpH or ph_restrainpH is False --> skipping...")
return
# If user doesn't specified the amount, use #BUF = #ACID.
if (ph_bufnmol == -1):
ph_bufnmol = countRes('ASP') + countRes('GLU')
utils.update("add_buffer", "will attempt to add {0} buffer molecule(s)...".format(ph_bufnmol))
# RUN GROMACS INSERT-MOLECULES COMMAND
os.system("touch vdwradii.dat") # we need this dummy file for this to work.
os.system("gmx insert-molecules -f {0} -o {1}_BUF.pdb -ci {2} -nmol {3} -scale 1.0 -radius {4} -try {5} >> builder.log 2>&1".format(
universe.get('d_nameList')[-1],
universe.get('d_pdbName'),
ph_bufpdbName,
ph_bufnmol,
0.5 * ph_bufMargin,
int(attempts / ph_bufnmol)))
os.remove("vdwradii.dat") # clean dummy file.
# To update d_residues.
load("{0}_BUF.pdb".format(universe.get('d_pdbName')))
# Give user a warning if there wasn't enough space.
actual = countRes('BUF')
if actual < ph_bufnmol:
utils.update("add_buffer", "warning: only {0}/{1} requested buffer molecules inserted after {2} attempts,".format(actual, ph_bufnmol, attempts))
utils.update("add_buffer", "warning: try decreasing ph_bufMargin (={0}nm) or increasing d_boxMargin (={1}nm)...".format(ph_bufMargin, universe.get('d_boxMargin')))
else:
utils.update("add_buffer", "succesfully added {0} buffer molecule(s)...".format(actual))
# To add buffer topology to topol.top.
utils.update("add_buffer", "updating topology...")
os.system("cp {} .".format(ph_bufitpName))
topol.add_mol(os.path.basename(ph_bufitpName), "Include buffer topology", 'BUF', actual)
# Set some parameters in the universe.
universe.add('ph_bufpdbName', ph_bufpdbName)
universe.add('ph_bufitpName', ph_bufitpName)
universe.add('ph_bufMargin', ph_bufMargin)
universe.add('ph_bufnmol', actual)
# To update d_nameList.
utils.add_to_nameList("{0}_BUF.pdb".format(universe.get('d_pdbName')))
def add_water():
utils.update("add_water", "running gmx solvate...")
os.system("gmx solvate -cp {0} -o {1}_SOL.pdb >> builder.log 2>&1".format(universe.get('d_nameList')[-1], universe.get('d_pdbName')))
# To update d_residues.
load("{0}_SOL.pdb".format(universe.get('d_pdbName')))
# To update topol.top.
topol.add_mol("{0}.ff/{1}.itp".format(universe.get('d_modelFF'), universe.get('d_modelWater')), "Include water topology", "SOL", countRes('SOL'))
# To update d_nameList.
utils.add_to_nameList("{0}_SOL.pdb".format(universe.get('d_pdbName')))
def add_ions(neutral=True, conc=0, pname='NA', nname='CL'):
if (not neutral) and (conc == 0):
utils.update("add_ions", "no ions will be added...")
return
if neutral:
utils.update("add_ions", "will add ions ({}/{}) to neutralize the system...".format(pname, nname))
if conc > 0:
utils.update("add_ions", "will add ions ({}/{}) for target concentration = {} mmol/ml...".format(pname, nname, conc))
# Generate IONS.mdp (just a dummy required).
os.system('touch IONS.mdp')
# Add ion topology to topol.top.
topol.add_mol("{0}.ff/ions.itp".format(universe.get('d_modelFF')), "Include ion topology")
# Run gmx grompp and genion.
utils.update("add_ions", "running gmx grompp and genion to add ions...")
os.system("gmx grompp -f IONS.mdp -c {0} -p topol.top -o IONS.tpr >> builder.log 2>&1".format(universe.get('d_nameList')[-1]))
if neutral:
os.system("gmx genion -s IONS.tpr -o {0}_ION.pdb -p topol.top -pname {1} -nname {2} -conc {3} -neutral >> builder.log 2>&1 << EOF\nSOL\nEOF".format(universe.get('d_pdbName'), pname, nname, conc))
else:
os.system("gmx genion -s IONS.tpr -o {0}_ION.pdb -p topol.top -pname {1} -nname {2} -conc {3} >> builder.log 2>&1 << EOF\nSOL\nEOF".format(universe.get('d_pdbName'), pname, nname, conc))
# To update d_residues.
load("{0}_ION.pdb".format(universe.get('d_pdbName')))
# To update d_nameList.
utils.add_to_nameList("{0}_ION.pdb".format(universe.get('d_pdbName')))