From 4f55960f283b5b9cd64150681322bd56fb72289d Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Thu, 6 Sep 2018 16:36:40 -0500 Subject: [PATCH] added diff_pdb --- __init__.py | 62 +++----------------------------------------- diff.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++ parse.py | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ pdblist.py | 12 +++++++++ 4 files changed, 161 insertions(+), 59 deletions(-) create mode 100644 diff.py create mode 100644 parse.py create mode 100644 pdblist.py diff --git a/__init__.py b/__init__.py index 1a25fa0..26abbef 100644 --- a/__init__.py +++ b/__init__.py @@ -1,59 +1,3 @@ -from collections import OrderedDict - -def parse_pdb_line(l): - l = l.rstrip() - return OrderedDict( - [ - # TODO support more than 100000 atoms - ("Record" , l[ 0: 6] ), - ("serial" , int(l[ 6:11])), - ("name" , l[12:16] ), - ("altLoc" , l[16:17] ), - ("resName" , l[17:21] ), - ("chainID" , l[21:22] ), - # TODO support more than 10000 residues - ("resSeq" , int(l[22:26])), - ("iCode" , l[26:27] ), - ("x" , float(l[30:38])), - ("y" , float(l[38:46])), - ("z" , float(l[46:54])), - ("occupancy" , float(l[54:60])), - ("tempFactor", float(l[60:66])), - ("segment" , l[72:76] ), - ("element" , l[76:78] ), - ("charge" , l[78:80] ), - ] - ) - -def parse_pdb_file(filename): - with open(filename) as f: - rtn = [] - for l in f: - if l[:6] in ["ATOM ", "HETATM"]: - rtn.append(parse_pdb_line(l)) - return rtn - -def write_pdb_line(entry): - name = entry["name"] - if len(name) < 4: - name = ' ' + name - mod_entry = entry.copy() - mod_entry["name"] = name - return '{: <6s}{: >5d} {: <4s}{:1s}{: <4s}{:1s}{: >4d}{:1s} {: >8.3f}{: >8.3f}{: >8.3f}{: >6.2f}{: >6.2f} {: <4s}{: >2s}{: >2s}'.format(*mod_entry.values()).rstrip() + '\n' - -def write_pdb_file(pdbdata): - return ''.join([write_pdb_line(entry) for entry in pdbdata]) - -if __name__ == "__main__": - #parse_pdb_line("ATOM 1914 SOD SOD S1127 0.016 -3.389 -0.040 1.00 58.57 S NA") - source = "/home/zhiwang/my_proj/charmm-gui_e86p_A51_drude/step2_drude.pdb" - source = "/home/zhiwang/my_proj/charmm-gui_e86p_A51/step5_assembly.namd.pdb" - #source = "orig.pdb" - pdb = (parse_pdb_file(source)) - #print(pdb[6].values()) - with open("dd.pdb", "w") as f: - f.write(write_pdb_file(pdb)) - with open(source) as f: - with open('ref.pdb', 'w') as fout: - for l in f: - fout.write(l.rstrip() + '\n') +from .parse import * +from .diff import * +from .pdblist import * diff --git a/diff.py b/diff.py new file mode 100644 index 0000000..5f71859 --- /dev/null +++ b/diff.py @@ -0,0 +1,74 @@ +from pdblib import parse_pdb_file, parse_pdb_line +from .pdblist import PDBList +#from sys import stderr + +def equal(lhs, rhs, tol=1e-6): + """ + An equal function considering floating points + """ + try: + return abs(lhs - rhs) < tol + except: + return lhs == rhs + +def entry_match(lhs, rhs, matching_keys = + ["name", "resName", "chainID", "resSeq", "segment"]): + """ + Return True if lhs and rhs match for all keys in matching_keys + """ + for key in matching_keys: + if not equal(lhs[key], rhs[key]): + return False + return True + +def entry_compare(lhs, rhs, cmp_keys = ["x", "y", "z"]): + """ + Return True if lhs and rhs mismatch for any key in cmp_keys + """ + return not entry_match(lhs, rhs, cmp_keys) + +def diff_pdb(query, subject, match=entry_match, compare=entry_compare): + """ + calculate the difference between pdb files + + query : in, short pdb filename + subject : in, long pdb filename + match : in, a callable to determine whether two entries match, + match(query_entry, subject_entry) = True if they match + compare : in, a callable to determine whether two matching entries are + inequivalent, return True if not identical + + matchlist, the entries in subject file that match the query file + diff1list, diff2lis, the entries where two files are different + notfoundlist, the entries in the query file not found in the subject file + """ + # pdb1 < pdb2 + pdb1 = parse_pdb_file(query) + pdb2 = parse_pdb_file(subject) + + #lpdb2 = len(pdb2) + matchlist = PDBList() + diff1list = PDBList() + diff2list = PDBList() + notfoundlist = PDBList() + #idx2 = 0 + for entry in pdb1: + found = False + #for idx2 in range(idx2, lpdb2): + for idx2 in range(len(pdb2)): + if match(entry, pdb2[idx2]): + matchlist.append(pdb2[idx2]) + if compare(entry, pdb2[idx2]): + diff1list.append(entry) + diff2list.append(pdb2[idx2]) + del pdb2[idx2] + found = True + break + #if idx2 == lpdb2: + if not found: + #print("Warning, entry not found:", entry, file=sys.stderr) + #matchlist.append(parse_pdb_line("")) + notfoundlist.append(entry) + #return False, [], [] + + return matchlist, diff1list, diff2list, notfoundlist diff --git a/parse.py b/parse.py new file mode 100644 index 0000000..95a25bf --- /dev/null +++ b/parse.py @@ -0,0 +1,72 @@ +from collections import OrderedDict +from .pdblist import PDBList + +def parse_pdb_line(l): + """ + Parse a line in a pdb file and convert it to a OrderedDict + """ + l = l.rstrip() + return OrderedDict( + [ + # TODO support more than 100000 atoms + ("Record" , l[ 0: 6] ), + ("serial" , int(l[ 6:11])), + ("name" , l[12:16] ), + ("altLoc" , l[16:17] ), + ("resName" , l[17:21] ), + ("chainID" , l[21:22] ), + # TODO support more than 10000 residues + ("resSeq" , int(l[22:26])), + ("iCode" , l[26:27] ), + ("x" , float(l[30:38])), + ("y" , float(l[38:46])), + ("z" , float(l[46:54])), + ("occupancy" , float(l[54:60])), + ("tempFactor", float(l[60:66])), + ("segment" , l[72:76] ), + ("element" , l[76:78] ), + ("charge" , l[78:80] ), + ] + ) + +def parse_pdb_file(filename): + """ + Given the filename, parse a pdb file and convert it to a list of OrderedDict + """ + with open(filename) as f: + rtn = PDBList() + for l in f: + if l[:6] in ["ATOM ", "HETATM"]: + rtn.append(parse_pdb_line(l)) + return rtn + +def write_pdb_line(entry): + """ + Convert a OrderedDict to a pdb line + """ + name = entry["name"] + if len(name) < 4: + name = ' ' + name + mod_entry = entry.copy() + mod_entry["name"] = name + return '{: <6s}{: >5d} {: <4s}{:1s}{: <4s}{:1s}{: >4d}{:1s} {: >8.3f}{: >8.3f}{: >8.3f}{: >6.2f}{: >6.2f} {: <4s}{: >2s}{: >2s}'.format(*mod_entry.values()).rstrip() + '\n' + +def write_pdb_file(pdbdata): + """ + Convert a list of OrderedDict to a pdb file string + """ + return ''.join([write_pdb_line(entry) for entry in pdbdata]) + +if __name__ == "__main__": + #parse_pdb_line("ATOM 1914 SOD SOD S1127 0.016 -3.389 -0.040 1.00 58.57 S NA") + source = "/home/zhiwang/my_proj/charmm-gui_e86p_A51_drude/step2_drude.pdb" + source = "/home/zhiwang/my_proj/charmm-gui_e86p_A51/step5_assembly.namd.pdb" + #source = "orig.pdb" + pdb = (parse_pdb_file(source)) + #print(pdb[6].values()) + with open("dd.pdb", "w") as f: + f.write(write_pdb_file(pdb)) + with open(source) as f: + with open('ref.pdb', 'w') as fout: + for l in f: + fout.write(l.rstrip() + '\n') diff --git a/pdblist.py b/pdblist.py new file mode 100644 index 0000000..77ead4c --- /dev/null +++ b/pdblist.py @@ -0,0 +1,12 @@ +class PDBList(list): + def __getitem__(self, key): + #if type(key) is int: + try: + if abs(float(key) - int(key)) > 1e-6: + raise ValueError + return super().__getitem__(int(key)) + except ValueError: + try: + return [x[key] for x in self] + except: + return [[x[key] for x in y ] for y in self]