patch linter (#224)

* patch linter * remove pylint: disable=import-error when possible * remove more import-error pylint statements * remove openmm disable pylint import error * add openmm pylint dependency * remove #For pylint comment * remove top level import * remove top level imports nmr4md.py --------- Co-authored-by: Brandon Duane Walker <[email protected]>
PolusAI · Jan 19, 2024 · b6051af · b6051af
1 parent 3cb4ff6
commit b6051af
Show file tree

Hide file tree

Showing 12 changed files with 92 additions and 48 deletions.
diff --git a/examples/diffdock/pose_cluster_filter.py b/examples/diffdock/pose_cluster_filter.py
@@ -41,8 +41,8 @@ def EuclideanDist(pi: Chem.SDMolSupplier, pj: Chem.SDMolSupplier) -> float:
     # GetConformers will just use input coordinates if conformations are not pre-generated
     confi = pi.GetConformers()[0]
     confj = pj.GetConformers()[0]
-    centeri = rdmt.ComputeCentroid(confi)
-    centerj = rdmt.ComputeCentroid(confj)
+    centeri = rdmt.ComputeCentroid(confi)  # pylint: disable=c-extension-no-member
+    centerj = rdmt.ComputeCentroid(confj)  # pylint: disable=c-extension-no-member
     dv = np.array([centeri.x, centeri.y, centeri.z]) - np.array([centerj.x, centerj.y, centerj.z])
     return float(np.sqrt(np.dot(dv, dv)))
 

diff --git a/examples/scripts/atomselect.py b/examples/scripts/atomselect.py
@@ -1,10 +1,16 @@
-# type: ignore
-from workflow_types import *
+from workflow_types import string, pdbfile  # pylint: disable=import-error
 # NOTE: No other top-level imports supported
 
 
-def main(selection_string, input_pdb_path, output_pdb_path):
-    import mdtraj  # pylint:disable=import-error
+def main(selection_string: str, input_pdb_path: str, output_pdb_path: str) -> None:
+    """Restrict a PDB file to a selection of atoms and save it.
+
+    Args:
+        selection_string (str): Selection string for mdtraj
+        input_pdb_path (str): The path to the input PDB file
+        output_pdb_path (str): The path to the output PDB file
+    """
+    import mdtraj  # pylint: disable=import-outside-toplevel
     traj = mdtraj.load(input_pdb_path)
     print(traj)
     selection_indices = traj.topology.select(selection_string)

diff --git a/examples/scripts/autodock_vina_filter.py b/examples/scripts/autodock_vina_filter.py
@@ -82,7 +82,7 @@
                 floats = [float(x) for x in strs[1:]]
                 score = floats[0]
                 scores.append(score)
-            except Exception as e:
+            except Exception as e:  # pylint: disable=broad-exception-caught
                 scores_all.append(scores)
                 parsing = False
 

diff --git a/examples/scripts/calculate_net_charge.py b/examples/scripts/calculate_net_charge.py
@@ -33,7 +33,7 @@ def get_net_charge(file_path: str, addhydrogens: bool = False) -> Optional[int]:
 
     try:
         mol = Chem.MolFromMol2File(file_path, removeHs=False)
-    except Exception:
+    except Exception:  # pylint: disable=broad-exception-caught
         return None
 
     if not mol:
@@ -45,7 +45,7 @@ def get_net_charge(file_path: str, addhydrogens: bool = False) -> Optional[int]:
         AllChem.ComputeGasteigerCharges(mol,
                                         nIter=50,
                                         throwOnParamFailure=True)
-    except Exception:
+    except Exception:  # pylint: disable=broad-exception-caught
         return None
 
     num_atoms = mol.GetNumAtoms()

diff --git a/examples/scripts/combine_structure.py b/examples/scripts/combine_structure.py
@@ -2,7 +2,6 @@
 import os
 import argparse
 from typing import Optional
-import numpy as np
 
 from rdkit import Chem
 
@@ -21,16 +20,16 @@ def parse_arguments() -> argparse.Namespace:
     return args
 
 
-def read_xyz_rdkit(input_structure_path: str) -> Optional[Chem.rdchem.Mol]:
-    """ Read a PDB file using RDKit 
+def read_xyz_rdkit(input_structure_path: str) -> Optional[Chem.rdchem.Mol]:  # pylint: disable=c-extension-no-member
+    """ Read a PDB file using RDKit
 
     Args:
-        input_structure_path (str): The path to the xyz structure 
+        input_structure_path (str): The path to the xyz structure
 
     Returns:
         Optional[Chem.rdchem.Mol]: The created molecule object
     """
-    xyz = Chem.rdmolfiles.MolFromXYZFile(input_structure_path)
+    xyz = Chem.rdmolfiles.MolFromXYZFile(input_structure_path)  # pylint: disable=c-extension-no-member
 
     if not xyz:
         print(f'Error: failed to generate molecule from file {input_structure_path}')
@@ -43,18 +42,18 @@ def combine_structure_rdkit(input_structure1_path: str, input_structure2_path: s
     """ Combine two structures into a single PDB file using RDKit
 
     Args:
-        input_structure1_path (str): The path to the xyz structure 1 
-        input_structure2_path (str): The path to the xyz structure 2 
+        input_structure1_path (str): The path to the xyz structure 1
+        input_structure2_path (str): The path to the xyz structure 2
         output_structure_path (str): The path to the output combined structure
     """
 
     structure1 = read_xyz_rdkit(input_structure1_path)
     structure2 = read_xyz_rdkit(input_structure2_path)
 
     if structure1 and structure2:
-        combo = Chem.CombineMols(structure1, structure2)
+        combo = Chem.CombineMols(structure1, structure2)  # pylint: disable=no-member
 
-        with Chem.PDBWriter(output_structure_path) as writer:
+        with Chem.PDBWriter(output_structure_path) as writer:  # pylint: disable=no-member
             writer.write(combo)
 
 

diff --git a/examples/scripts/extract_protein.py b/examples/scripts/extract_protein.py
@@ -2,8 +2,7 @@
 import sys
 import os
 import argparse
-from typing import Optional, List
-import numpy as np
+from typing import List
 
 import openmm.app as omma
 

diff --git a/examples/scripts/generate_conformers.py b/examples/scripts/generate_conformers.py
@@ -54,6 +54,8 @@ def calculate_dG(Kd: float) -> float:
     dG = RT * math.log(Kd / standard_concentration)
     return dG
 
+# pylint: disable=too-many-arguments,too-many-locals
+
 
 def load_data(input_excel_path: str, query: str, smiles_column: str, binding_data_column: str,
               output_txt_path: str, min_row: int = 1, max_row: int = -1, convert_Kd_dG: bool = False) -> None:
@@ -84,20 +86,39 @@ def load_data(input_excel_path: str, query: str, smiles_column: str, binding_dat
 
     # For ncats_phenotypic_curated.csv
     # duplicate-classifier ['duplicate', 'unique']
-    # Virus ['Dengue Virus', 'Sandfly_Fever', 'HCoV-229E', 'MERS-CoV', 'Yellow Fever Virus', 'Zika Virus', 'RSV', 'Powassan', 'SARS-CoV-2', 'H7N7', 'H1N2', 'HPIV-3', 'West Nile Virus']
+    # Virus ['Dengue Virus', 'Sandfly_Fever', 'HCoV-229E', 'MERS-CoV', 'Yellow Fever Virus',\
+    # 'Zika Virus', 'RSV', 'Powassan', 'SARS-CoV-2', 'H7N7', 'H1N2', 'HPIV-3', 'West Nile Virus']
     # BAO Label ['cell-based format']
-    # Cell_Type [nan, 'Unknown', 'Huh-7', 'Hep-2', 'HEL', 'CEF', 'HG23', 'HMC3', 'HBMEC', 'BHK-21', 'Hepa1-6', 'K-562', 'BHK1', 'J774A.1', 'BHK15', 'MA104', 'BHK', 'LLC-MK2', 'BHK21', 'A549/BHK21', 'MK2', 'JEG3', 'CaCo-2', 'MT-4', 'Hela', 'kidney', 'Vero 76', 'WS1', 'HuH-7', 'Vero', 'HEP-2', 'RAW 264.7', 'Huh-5-2', 'C6/36', 'BHK-D2RepT', 'HuH7', 'MDCK', 'NSC', 'HelaM', 'PBMC', 'A549', 'HELF', 'HEK293', 'BSC-40', 'HAE', 'HFF', 'TREx293', 'MDDC', 'BE(2)-C', 'EAC', 'Vero C1008', 'PEK', 'CEM', 'HEK-293', 'Caco-2', 'BHK-WII', 'HeLa', 'HepG2', 'Vero-76']
-    # Standard Type ['EC90', 'Activity', 'CC50', 'EC50', 'TD50', 'Cytotoxicity', 'ID50', 'MIC', 'IC90', 'MCC50', 'Dose', 'Inhibition', 'EC99', 'pIC50', 'MNTD', 'ED50', 'MIC50', 'IC50']
+    # Cell_Type [nan, 'Unknown', 'Huh-7', 'Hep-2', 'HEL', 'CEF', 'HG23', 'HMC3', 'HBMEC', 'BHK-21', \
+    # 'Hepa1-6', 'K-562', 'BHK1', 'J774A.1', 'BHK15', 'MA104', 'BHK', 'LLC-MK2', 'BHK21', 'A549/BHK21',\
+    # 'MK2', 'JEG3', 'CaCo-2', 'MT-4', 'Hela', 'kidney', 'Vero 76', 'WS1', 'HuH-7', 'Vero', 'HEP-2', \
+    # 'RAW 264.7', 'Huh-5-2', 'C6/36', 'BHK-D2RepT', 'HuH7', 'MDCK', 'NSC', 'HelaM', 'PBMC', 'A549', \
+    # 'HELF', 'HEK293', 'BSC-40', 'HAE', 'HFF', 'TREx293', 'MDDC', 'BE(2)-C', 'EAC', 'Vero C1008', \
+    # 'PEK', 'CEM', 'HEK-293', 'Caco-2', 'BHK-WII', 'HeLa', 'HepG2', 'Vero-76']
+    # Standard Type ['EC90', 'Activity', 'CC50', 'EC50', 'TD50', 'Cytotoxicity', 'ID50', 'MIC', \
+    # 'IC90', 'MCC50', 'Dose', 'Inhibition', 'EC99', 'pIC50', 'MNTD', 'ED50', 'MIC50', 'IC50']
     # Standard Relation [nan, "'~'", "'<'", "'<='", "'>'", "'='", "'>='"]
     # Standard Units [nan, 'uM', '%']
     # Outcome ['Active', 'Inactive', 'Inconclusive']
-    # Assay_Type [nan, 'Viral_Replication', 'Unknown', 'Cell_Viability', 'Plaque_Inhibition', 'Focus_Reduction_Assay', 'Proliferation', 'Antigen_Expression', 'Staining_Based', 'Flourescence', 'Viral_Titer', 'Cell_Viability_By_Neutral_Red_Uptake', 'eGFP_Reduction', 'Immunofluorescence', 'Protein_Expression', 'CFI', 'Green_Flourescent_Protein_(eGFP)', 'Viral_Infection', 'Microscopy', 'Immunodetection', 'Replicon_Assay', 'Antigen_Synthesis', 'Viral_RNA_Detection,Plaque_Inhibition,Cell_Viability', 'Cell-based_flavivirus_infection_(CFI)_assay', 'Viral_Yield_Reduction', 'Focus_Forming_Unit_(FFU)_Assay', 'Luciferase', 'Viral_RNA_Detection', 'Luciferase_Reporter_Assay', 'Viral_Entry', 'MTT_Assay', 'RT-PCR', 'Cytopathy', 'Flow_Cytometry', 'Colorimetric', 'Luciferase_Reporter_Gene', 'Cell_Titer', 'Western_Blot', 'Cytotoxicity', 'SDS-PAGE', 'Fluorescence', 'Image-Based', 'Crystal_Violet_Staining_Assay', 'Viral_Reduction_Assay']
+    # Assay_Type [nan, 'Viral_Replication', 'Unknown', 'Cell_Viability', 'Plaque_Inhibition', \
+    # 'Focus_Reduction_Assay', 'Proliferation', 'Antigen_Expression', 'Staining_Based', \
+    # 'Flourescence', 'Viral_Titer', 'Cell_Viability_By_Neutral_Red_Uptake', 'eGFP_Reduction', \
+    # 'Immunofluorescence', 'Protein_Expression', 'CFI', 'Green_Flourescent_Protein_(eGFP)', \
+    # 'Viral_Infection', 'Microscopy', 'Immunodetection', 'Replicon_Assay', 'Antigen_Synthesis', \
+    # 'Viral_RNA_Detection,Plaque_Inhibition,Cell_Viability', 'Cell-based_flavivirus_infection_(CFI)_assay', \
+    # 'Viral_Yield_Reduction', 'Focus_Forming_Unit_(FFU)_Assay', 'Luciferase', 'Viral_RNA_Detection', \
+    # 'Luciferase_Reporter_Assay', 'Viral_Entry', 'MTT_Assay', 'RT-PCR', 'Cytopathy', 'Flow_Cytometry', \
+    # 'Colorimetric', 'Luciferase_Reporter_Gene', 'Cell_Titer', 'Western_Blot', 'Cytotoxicity', 'SDS-PAGE', \
+    # 'Fluorescence', 'Image-Based', 'Crystal_Violet_Staining_Assay', 'Viral_Reduction_Assay']
 
     # For ncats_target_based_curated.csv
     # duplicate-type-classifier ['unique', 'duplicate']
     # Virus ['SNV', 'Zika', 'West_Nile', 'RSV', 'SARS-CoV-2', 'H7N7', '229E', 'MERS-CoV', 'HPIV3', 'Dengue']
     # Target Type ['PROTEIN COMPLEX', 'UNCHECKED', 'ORGANISM', 'SINGLE PROTEIN']
-    # Target ['Matrix M2-1', 'Phosphoprotein', 'NS2B-NS3 Protease', 'NS5', 'Integrin alpha-V/beta-3', 'not defined', 'Hemagglutinin-neuraminidase', 'Nucleocapsid protein', 'PLpro', 'Spike protein', 'Main Protease (3CLpro, Mpro)', 'Nucleoprotein', 'Fusion glycoprotein F0', 'Neuraminidase', 'Matrix protein 2', 'RDRP']
+    # Target ['Matrix M2-1', 'Phosphoprotein', 'NS2B-NS3 Protease', 'NS5', 'Integrin alpha-V/beta-3', \
+    # 'not defined', 'Hemagglutinin-neuraminidase', 'Nucleocapsid protein', 'PLpro', 'Spike protein', \
+    # 'Main Protease (3CLpro, Mpro)', 'Nucleoprotein', 'Fusion glycoprotein F0', 'Neuraminidase', \
+    # 'Matrix protein 2', 'RDRP']
     # Outcome ['Inactive', 'Active', 'Unclear', 'Inconclusive', 'Undetermined']
     # Standard Type ['IC50', 'EC90', 'Inhibition', 'Kd', 'EC50', 'Activity', 'Ki']
     # Standard Relation [nan, "<'", "<='", ">'", "='", ">='"]
@@ -135,18 +156,18 @@ def load_data(input_excel_path: str, query: str, smiles_column: str, binding_dat
             smiles_binding_data.append(f'{smiles} {binding_datum}')
 
         # See https://www.rdkit.org/docs/GettingStartedInPython.html#working-with-3d-molecules
-        mol_2D: rdkit.Chem.rdchem.Mol = Chem.MolFromSmiles(smiles)
-        AllChem.Compute2DCoords(mol_2D)
+        mol_2D: rdkit.Chem.rdchem.Mol = Chem.MolFromSmiles(smiles)  # pylint: disable=c-extension-no-member,no-member
+        AllChem.Compute2DCoords(mol_2D)  # pylint: disable=no-member
 
         # See https://www.rdkit.org/docs/source/rdkit.Chem.rdmolops.html#rdkit.Chem.rdmolops.AddHs
         # NOTE: "Much of the code assumes that Hs are not included in the molecular topology,
         # so be very careful with the molecule that comes back from this function."
-        mol_3D = Chem.AddHs(mol_2D)
-        AllChem.EmbedMolecule(mol_3D)
-        AllChem.MMFFOptimizeMolecule(mol_3D)
+        mol_3D = Chem.AddHs(mol_2D)  # pylint: disable=no-member
+        AllChem.EmbedMolecule(mol_3D)  # pylint: disable=no-member
+        AllChem.MMFFOptimizeMolecule(mol_3D)  # pylint: disable=no-member
 
         filename = f'ligand_{idx}.sdf'  # chemblid is NOT unique!
-        writer = Chem.SDWriter(filename)
+        writer = Chem.SDWriter(filename)  # pylint: disable=no-member
         # writer = Chem.rdmolfiles.PDBWriter(filename)
         writer.write(mol_3D)
         writer.close()
@@ -157,7 +178,7 @@ def load_data(input_excel_path: str, query: str, smiles_column: str, binding_dat
 
 def main() -> None:
     """ Reads the command line arguments and loads an Excel database of small molecules,
-    performs a query to extract the SMILES and binding affinity, generates 3D structures 
+    performs a query to extract the SMILES and binding affinity, generates 3D structures
     and saves them in SDF format.
     """
     args = parse_arguments()

diff --git a/examples/scripts/generate_pdbbind_complex.py b/examples/scripts/generate_pdbbind_complex.py
@@ -91,6 +91,8 @@ def read_index_file(index_file_path: str) -> pd.DataFrame:
 
     return pd.DataFrame.from_dict(data)
 
+# pylint: disable=too-many-arguments,too-many-locals
+
 
 def load_data(index_file_name: str, base_dir: str, query: str, output_txt_path: str,
               min_row: int = 1, max_row: int = -1, convert_Kd_dG: bool = False) -> None:
@@ -117,7 +119,7 @@ def load_data(index_file_name: str, base_dir: str, query: str, output_txt_path:
     if int(min_row) != 1 or int(max_row) != -1:
         # We want to convert to zero-based indices and we also want
         # the upper index to be inclusive (i.e. <=) so -1 lower index.
-        df = df[(int(min_row) - 1):int(max_row)]
+        df = df[(int(min_row) - 1):int(max_row)]  # pylint: disable=unsubscriptable-object
 
     # Calculate dG
     df = df[['PDB_code', 'value', 'Kd_Ki']]

diff --git a/examples/scripts/nmr4md.py b/examples/scripts/nmr4md.py
@@ -1,10 +1,18 @@
-from workflow_types import *
+from workflow_types import tprfile, trrfile, pngfile, string  # pylint: disable=import-error
 
 
-def main(input_tpr_path, input_trr_path, output_png_path):  # type: ignore[no-untyped-def]
-    import MDAnalysis as mda
-    import nmrformd
-    from matplotlib import pyplot as plt
+def main(input_tpr_path: str, input_trr_path: str, output_png_path: str) -> None:
+    """Generate NMR analysis plots from trajectory files.
+
+    Args:
+        input_tpr_path (str): Input tpr file path
+        input_trr_path (str): Input trr file path
+        output_png_path (str): Output png file path
+    """
+
+    import MDAnalysis as mda  # pylint: disable=import-outside-toplevel
+    import nmrformd  # pylint: disable=import-error,import-outside-toplevel
+    from matplotlib import pyplot as plt  # pylint: disable=import-outside-toplevel
 
     # The following code comes directly from the nmr4md tutorial at
     # https://github.com/simongravelle/nmrformd/blob/main/docs/source/tutorials/bulk-water.rst

diff --git a/examples/scripts/pdb_fixer.py b/examples/scripts/pdb_fixer.py
@@ -32,8 +32,8 @@ def check_pdb_null(input_pdb_path: str, pdbid: str, url: str) -> bool:
 
     Args:
         input_pdb_path (str): The input PDB structure path
-        pdbid (str): PDB id from RCSB 
-        url (str): URL to retrieve PDB fro
+        pdbid (str): PDB id from RCSB
+        url (str): URL to retrieve PDB from
     Returns:
         bool: Return True if all of the residues are unknown
     """
@@ -50,9 +50,9 @@ def check_pdb_null(input_pdb_path: str, pdbid: str, url: str) -> bool:
 
 
 def find_missing_residues(fixer: PDBFixer) -> PDBFixer:
-    """ Finds the missing residues and adds missing residues within a 
-    chain to prevent "floppy tails," which can lead to an increase in the box size, 
-    significantly increasingthe computation time. This step is taken as floppy tails 
+    """ Finds the missing residues and adds missing residues within a
+    chain to prevent "floppy tails," which can lead to an increase in the box size,
+    significantly increasingthe computation time. This step is taken as floppy tails
     are generally not critical for binding.
 
     Args:
@@ -77,11 +77,13 @@ def find_missing_residues(fixer: PDBFixer) -> PDBFixer:
         fixer.missingResidues[key] = [r for r in resnames if r in fixer.templates]
     return fixer
 
+# pylint: disable=too-many-arguments
+
 
 def runpdbfixer(input_pdb_path: str, input_helper_pdb_path: str, output_pdb_path: str,
                 add_atoms: str, add_res: bool, pdbid: str, url: str, rep_nonstandard: bool, heterogens: str) -> None:
-    """ Fixes the protein structure using PDBFixer.PDBFixer offers options 
-    to add hydrogens and solvate the system, but in our usage, we employ 
+    """ Fixes the protein structure using PDBFixer.PDBFixer offers options
+    to add hydrogens and solvate the system, but in our usage, we employ
     PDBFixer solely for adding missing heavy atoms and residues.
 
     Args:
@@ -90,7 +92,7 @@ def runpdbfixer(input_pdb_path: str, input_helper_pdb_path: str, output_pdb_path
         input_helper_pdb_path (str): The input helper PDB structure path
         add_atoms (str): What missing atoms to add: all, heavy, hydrogen, or none
         add_res (bool): If set to True, adds missing residues
-        pdbid (str): PDB id from RCSB 
+        pdbid (str): PDB id from RCSB
         url (str): URL to retrieve PDB from
         rep_nonstandard (bool): Replace nonstandard residues with standard equivalents
     """

diff --git a/install/system_deps.yml b/install/system_deps.yml
@@ -18,6 +18,7 @@ dependencies:
   - xorg-libxrender
   - mdtraj
   - pymol-open-source
+  - pdbfixer
 # Alternatively, can use pymol-bundle from the schrodinger channel.
 #  - pymol-bundle
   - openbabel

diff --git a/pyproject.toml b/pyproject.toml
@@ -52,13 +52,19 @@ mypy-types = [
     "types-openpyxl",
     "types-pyyaml",
 ]
-# For pylint
+
 workflow-deps = [
     "matplotlib",
     "pandas",
     "numpy>=1.21.0",
     "mdanalysis",
     "nmrformd",
+    "mdanalysis",
+    "mdtraj",
+    "rdkit",
+    "pymol",
+    "pdbfixer @ git+https://github.com/openmm/pdbfixer.git",
+    "openmm",
 ]
 # See docs/requirements.txt
 doc = [
@@ -141,7 +147,7 @@ ignore_errors = false
 # versioneer to the exclude=regex above, but this works.
 
 [[tool.mypy.overrides]]
-module = ["BioSimSpace.*", "MDAnalysis.*", "pymol.*", "rdkit.*", "nmrformd.*", "setuptools.*", "parmed.*", "pdbfixer.*", "openmm.*"]
+module = ["BioSimSpace.*", "MDAnalysis.*", "pymol.*", "rdkit.*", "nmrformd.*", "setuptools.*", "parmed.*", "pdbfixer.*", "openmm.*", "mdtraj.*"]
 ignore_missing_imports = true