From e528159445d894dbff69f6d8dd57d75e0b0407fc Mon Sep 17 00:00:00 2001 From: James Krieger Date: Mon, 18 Apr 2022 20:24:07 +0100 Subject: [PATCH 1/3] parseSTARSection handle missing keys --- prody/proteins/ciffile.py | 5 +++-- prody/proteins/starfile.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/prody/proteins/ciffile.py b/prody/proteins/ciffile.py index 9435f41b2..3d583511a 100644 --- a/prody/proteins/ciffile.py +++ b/prody/proteins/ciffile.py @@ -409,8 +409,9 @@ def _parseMMCIFLines(atomgroup, lines, model, chain, subset, anisou = None siguij = None try: - anisou_data = data = parseSTARSection(lines, "_atom_site_anisotrop") - except ValueError: + data = parseSTARSection(lines, "_atom_site_anisotrop") + x = data[0] # check if data has anything in it + except IndexError: LOGGER.warn("No anisotropic B factors found") else: anisou = np.zeros((acount, 6), diff --git a/prody/proteins/starfile.py b/prody/proteins/starfile.py index c9ca3e796..e535852c2 100644 --- a/prody/proteins/starfile.py +++ b/prody/proteins/starfile.py @@ -1077,6 +1077,7 @@ def parseSTARSection(lines, key): else: data = [loop_dict["data"]] else: - raise ValueError("Could not find {0} in lines.".format(key)) + LOGGER.warn("Could not find {0} in lines.".format(key)) + return [] return data \ No newline at end of file From 965492c3cdee50e082aeb22920c59d9efa2a03f2 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Mon, 18 Apr 2022 20:24:25 +0100 Subject: [PATCH 2/3] cif header chemicals fix --- prody/proteins/cifheader.py | 179 +++++++++--------------------------- 1 file changed, 43 insertions(+), 136 deletions(-) diff --git a/prody/proteins/cifheader.py b/prody/proteins/cifheader.py index a57942c34..8cc7b9574 100644 --- a/prody/proteins/cifheader.py +++ b/prody/proteins/cifheader.py @@ -1140,152 +1140,59 @@ def _getChemicals(lines): # 1st block we need is has info about location in structure # this instance only includes single sugars not branched structures - i = 0 - fields1 = OrderedDict() - fieldCounter1 = -1 - foundChemBlock1 = False - foundChemBlockData1 = False - doneChemBlock1 = False - start1 = 0 - stop1 = 0 - while not doneChemBlock1 and i < len(lines): - line = lines[i] - if line[:21] == '_pdbx_nonpoly_scheme.': - fieldCounter1 += 1 - fields1[line.split('.')[1].strip()] = fieldCounter1 - if not foundChemBlock1: - foundChemBlock1 = True - - if foundChemBlock1: - if not line.startswith('#') and not line.startswith('_'): - if not foundChemBlockData1: - start1 = i - foundChemBlockData1 = True - else: - if foundChemBlockData1: - doneChemBlock1 = True - stop1 = i - - i += 1 - - if i < len(lines): - for line in lines[start1:stop1]: - data = split(line, shlex=True) + items = parseSTARSection(lines, "_pdbx_nonpoly_scheme") - resname = data[fields1["mon_id"]] - if resname in flags.AMINOACIDS or resname == "HOH": - continue + for data in items: + resname = data["_pdbx_nonpoly_scheme.mon_id"] + if resname in flags.AMINOACIDS or resname == "HOH": + continue - chem = Chemical(resname) - chem.chain = data[fields1["pdb_strand_id"]] - chem.resnum = int(data[fields1["pdb_seq_num"]]) + chem = Chemical(resname) + chem.chain = data["_pdbx_nonpoly_scheme.pdb_strand_id"] + chem.resnum = int(data["_pdbx_nonpoly_scheme.pdb_seq_num"]) - icode = data[fields1["pdb_ins_code"]] - if icode == '.': - icode = '' - chem.icode = icode - chem.description = '' # often empty in .pdb and not clearly here - chemicals[chem.resname].append(chem) + icode = data["_pdbx_nonpoly_scheme.pdb_ins_code"] + if icode == '.': + icode = '' + chem.icode = icode + chem.description = '' # often empty in .pdb and not clearly here + chemicals[chem.resname].append(chem) # next we get the equivalent one for branched sugars part - i = 0 - fields1 = OrderedDict() - fieldCounter1 = -1 - foundChemBlock1 = False - foundChemBlockData1 = False - doneChemBlock1 = False - start1 = 0 - stop1 = 0 - while not doneChemBlock1 and i < len(lines): - line = lines[i] - if line[:20] == '_pdbx_branch_scheme.': - fieldCounter1 += 1 - fields1[line.split('.')[1].strip()] = fieldCounter1 - if not foundChemBlock1: - foundChemBlock1 = True - - if foundChemBlock1: - if not line.startswith('#') and not line.startswith('_'): - if not foundChemBlockData1: - start1 = i - foundChemBlockData1 = True - else: - if foundChemBlockData1: - doneChemBlock1 = True - stop1 = i - - i += 1 - - if i < len(lines): - for line in lines[start1:stop1]: - data = split(line, shlex=True) + items = parseSTARSection(lines, "_pdbx_branch_scheme") - resname = data[fields1["mon_id"]] - if resname in flags.AMINOACIDS or resname == "HOH": - continue + for data in items: + resname = data["_pdbx_branch_scheme.mon_id"] + if resname in flags.AMINOACIDS or resname == "HOH": + continue - chem = Chemical(resname) - chem.chain = data[fields1["pdb_asym_id"]] - chem.resnum = int(data[fields1["pdb_seq_num"]]) + chem = Chemical(resname) + chem.chain = data["_pdbx_branch_scheme.pdb_asym_id"] + chem.resnum = int(data["_pdbx_branch_scheme.pdb_seq_num"]) - chem.icode = '' # this part doesn't have this field - chem.description = '' # often empty in .pdb and not clearly here - chemicals[chem.resname].append(chem) + chem.icode = '' # this part doesn't have this field + chem.description = '' # often empty in .pdb and not clearly here + chemicals[chem.resname].append(chem) # 2nd block to get has general info e.g. name and formula - i = 0 - fields2 = OrderedDict() - fieldCounter2 = -1 - foundChemBlock2 = False - foundChemBlockData2 = False - doneChemBlock2 = False - start2 = 0 - stop2 = 0 - while not doneChemBlock2 and i < len(lines): - line = lines[i] - if line[:11] == '_chem_comp.': - fieldCounter2 += 1 - fields2[line.split('.')[1].strip()] = fieldCounter2 - if not foundChemBlock2: - start2 = i - foundChemBlock2 = True - - if foundChemBlock2: - if not line.startswith('#') and not line.startswith('_'): - if not foundChemBlockData2: - foundChemBlockData2 = True - else: - if foundChemBlockData2: - doneChemBlock2 = True - stop2 = i - - i += 1 - - if i < len(lines): - star_dict, _ = parseSTARLines(lines[:2] + lines[start2-1:stop2], shlex=True) - loop_dict = list(star_dict.values())[0] - - if lines[start2-1].strip() == "loop_": - items = loop_dict[0]["data"].values() - else: - items = [loop_dict["data"]] - - for data in items: - resname = data["_chem_comp.id"] - if resname in flags.AMINOACIDS or resname == "HOH": - continue - - chem_names[resname] += data["_chem_comp.name"].upper() - - synonym = data["_chem_comp.pdbx_synonyms"] - if synonym == '?': - synonym = ' ' - synonym = synonym.rstrip() - if synonym.startswith(';') and synonym.endswith(';'): - synonym = synonym[1:-1] - chem_synonyms[resname] += synonym - - chem_formulas[resname] += data["_chem_comp.formula"] + items = parseSTARSection(lines, "_chem_comp") + + for data in items: + resname = data["_chem_comp.id"] + if resname in flags.AMINOACIDS or resname == "HOH": + continue + + chem_names[resname] += data["_chem_comp.name"].upper() + + synonym = data["_chem_comp.pdbx_synonyms"] + if synonym == '?': + synonym = ' ' + synonym = synonym.rstrip() + if synonym.startswith(';') and synonym.endswith(';'): + synonym = synonym[1:-1] + chem_synonyms[resname] += synonym + + chem_formulas[resname] += data["_chem_comp.formula"] for key, name in chem_names.items(): # PY3K: OK From d3ffe8825562e7a32048d9e7255c84b34bc73acd Mon Sep 17 00:00:00 2001 From: James Krieger Date: Mon, 18 Apr 2022 19:54:04 +0100 Subject: [PATCH 3/3] parseScipionModes fix no pdb --- prody/dynamics/functions.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/prody/dynamics/functions.py b/prody/dynamics/functions.py index e33c18e69..0859d69f9 100644 --- a/prody/dynamics/functions.py +++ b/prody/dynamics/functions.py @@ -335,13 +335,17 @@ def parseScipionModes(run_path, title=None, pdb=None): n_modes = star_loop.numRows() - atoms = parsePDB(pdb) - n_atoms = atoms.numAtoms() - row1 = star_loop[0] mode1 = parseArray(top_dirs + row1['_nmaModefile']).reshape(-1) dof = mode1.shape[0] + if pdb is not None: + atoms = parsePDB(pdb) + n_atoms = atoms.numAtoms() + else: + # assume standard NMA + n_atoms = dof//3 + vectors = np.zeros((dof, n_modes)) vectors[:, 0] = mode1