Skip to content

Commit

Permalink
added diff_pdb
Browse files Browse the repository at this point in the history
  • Loading branch information
zwang123 committed Sep 6, 2018
1 parent 6f97ba1 commit 4f55960
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 59 deletions.
62 changes: 3 additions & 59 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,3 @@
from collections import OrderedDict

def parse_pdb_line(l):
l = l.rstrip()
return OrderedDict(
[
# TODO support more than 100000 atoms
("Record" , l[ 0: 6] ),
("serial" , int(l[ 6:11])),
("name" , l[12:16] ),
("altLoc" , l[16:17] ),
("resName" , l[17:21] ),
("chainID" , l[21:22] ),
# TODO support more than 10000 residues
("resSeq" , int(l[22:26])),
("iCode" , l[26:27] ),
("x" , float(l[30:38])),
("y" , float(l[38:46])),
("z" , float(l[46:54])),
("occupancy" , float(l[54:60])),
("tempFactor", float(l[60:66])),
("segment" , l[72:76] ),
("element" , l[76:78] ),
("charge" , l[78:80] ),
]
)

def parse_pdb_file(filename):
with open(filename) as f:
rtn = []
for l in f:
if l[:6] in ["ATOM ", "HETATM"]:
rtn.append(parse_pdb_line(l))
return rtn

def write_pdb_line(entry):
name = entry["name"]
if len(name) < 4:
name = ' ' + name
mod_entry = entry.copy()
mod_entry["name"] = name
return '{: <6s}{: >5d} {: <4s}{:1s}{: <4s}{:1s}{: >4d}{:1s} {: >8.3f}{: >8.3f}{: >8.3f}{: >6.2f}{: >6.2f} {: <4s}{: >2s}{: >2s}'.format(*mod_entry.values()).rstrip() + '\n'

def write_pdb_file(pdbdata):
return ''.join([write_pdb_line(entry) for entry in pdbdata])

if __name__ == "__main__":
#parse_pdb_line("ATOM 1914 SOD SOD S1127 0.016 -3.389 -0.040 1.00 58.57 S NA")
source = "/home/zhiwang/my_proj/charmm-gui_e86p_A51_drude/step2_drude.pdb"
source = "/home/zhiwang/my_proj/charmm-gui_e86p_A51/step5_assembly.namd.pdb"
#source = "orig.pdb"
pdb = (parse_pdb_file(source))
#print(pdb[6].values())
with open("dd.pdb", "w") as f:
f.write(write_pdb_file(pdb))
with open(source) as f:
with open('ref.pdb', 'w') as fout:
for l in f:
fout.write(l.rstrip() + '\n')
from .parse import *
from .diff import *
from .pdblist import *
74 changes: 74 additions & 0 deletions diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from pdblib import parse_pdb_file, parse_pdb_line
from .pdblist import PDBList
#from sys import stderr

def equal(lhs, rhs, tol=1e-6):
"""
An equal function considering floating points
"""
try:
return abs(lhs - rhs) < tol
except:
return lhs == rhs

def entry_match(lhs, rhs, matching_keys =
["name", "resName", "chainID", "resSeq", "segment"]):
"""
Return True if lhs and rhs match for all keys in matching_keys
"""
for key in matching_keys:
if not equal(lhs[key], rhs[key]):
return False
return True

def entry_compare(lhs, rhs, cmp_keys = ["x", "y", "z"]):
"""
Return True if lhs and rhs mismatch for any key in cmp_keys
"""
return not entry_match(lhs, rhs, cmp_keys)

def diff_pdb(query, subject, match=entry_match, compare=entry_compare):
"""
calculate the difference between pdb files
query : in, short pdb filename
subject : in, long pdb filename
match : in, a callable to determine whether two entries match,
match(query_entry, subject_entry) = True if they match
compare : in, a callable to determine whether two matching entries are
inequivalent, return True if not identical
matchlist, the entries in subject file that match the query file
diff1list, diff2lis, the entries where two files are different
notfoundlist, the entries in the query file not found in the subject file
"""
# pdb1 < pdb2
pdb1 = parse_pdb_file(query)
pdb2 = parse_pdb_file(subject)

#lpdb2 = len(pdb2)
matchlist = PDBList()
diff1list = PDBList()
diff2list = PDBList()
notfoundlist = PDBList()
#idx2 = 0
for entry in pdb1:
found = False
#for idx2 in range(idx2, lpdb2):
for idx2 in range(len(pdb2)):
if match(entry, pdb2[idx2]):
matchlist.append(pdb2[idx2])
if compare(entry, pdb2[idx2]):
diff1list.append(entry)
diff2list.append(pdb2[idx2])
del pdb2[idx2]
found = True
break
#if idx2 == lpdb2:
if not found:
#print("Warning, entry not found:", entry, file=sys.stderr)
#matchlist.append(parse_pdb_line(""))
notfoundlist.append(entry)
#return False, [], []

return matchlist, diff1list, diff2list, notfoundlist
72 changes: 72 additions & 0 deletions parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from collections import OrderedDict
from .pdblist import PDBList

def parse_pdb_line(l):
"""
Parse a line in a pdb file and convert it to a OrderedDict
"""
l = l.rstrip()
return OrderedDict(
[
# TODO support more than 100000 atoms
("Record" , l[ 0: 6] ),
("serial" , int(l[ 6:11])),
("name" , l[12:16] ),
("altLoc" , l[16:17] ),
("resName" , l[17:21] ),
("chainID" , l[21:22] ),
# TODO support more than 10000 residues
("resSeq" , int(l[22:26])),
("iCode" , l[26:27] ),
("x" , float(l[30:38])),
("y" , float(l[38:46])),
("z" , float(l[46:54])),
("occupancy" , float(l[54:60])),
("tempFactor", float(l[60:66])),
("segment" , l[72:76] ),
("element" , l[76:78] ),
("charge" , l[78:80] ),
]
)

def parse_pdb_file(filename):
"""
Given the filename, parse a pdb file and convert it to a list of OrderedDict
"""
with open(filename) as f:
rtn = PDBList()
for l in f:
if l[:6] in ["ATOM ", "HETATM"]:
rtn.append(parse_pdb_line(l))
return rtn

def write_pdb_line(entry):
"""
Convert a OrderedDict to a pdb line
"""
name = entry["name"]
if len(name) < 4:
name = ' ' + name
mod_entry = entry.copy()
mod_entry["name"] = name
return '{: <6s}{: >5d} {: <4s}{:1s}{: <4s}{:1s}{: >4d}{:1s} {: >8.3f}{: >8.3f}{: >8.3f}{: >6.2f}{: >6.2f} {: <4s}{: >2s}{: >2s}'.format(*mod_entry.values()).rstrip() + '\n'

def write_pdb_file(pdbdata):
"""
Convert a list of OrderedDict to a pdb file string
"""
return ''.join([write_pdb_line(entry) for entry in pdbdata])

if __name__ == "__main__":
#parse_pdb_line("ATOM 1914 SOD SOD S1127 0.016 -3.389 -0.040 1.00 58.57 S NA")
source = "/home/zhiwang/my_proj/charmm-gui_e86p_A51_drude/step2_drude.pdb"
source = "/home/zhiwang/my_proj/charmm-gui_e86p_A51/step5_assembly.namd.pdb"
#source = "orig.pdb"
pdb = (parse_pdb_file(source))
#print(pdb[6].values())
with open("dd.pdb", "w") as f:
f.write(write_pdb_file(pdb))
with open(source) as f:
with open('ref.pdb', 'w') as fout:
for l in f:
fout.write(l.rstrip() + '\n')
12 changes: 12 additions & 0 deletions pdblist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
class PDBList(list):
def __getitem__(self, key):
#if type(key) is int:
try:
if abs(float(key) - int(key)) > 1e-6:
raise ValueError
return super().__getitem__(int(key))
except ValueError:
try:
return [x[key] for x in self]
except:
return [[x[key] for x in y ] for y in self]

0 comments on commit 4f55960

Please sign in to comment.