Skip to content

Commit

Permalink
patch linter (#224)
Browse files Browse the repository at this point in the history
* patch linter

* remove pylint: disable=import-error when possible

* remove more import-error pylint statements

* remove openmm disable pylint import error

* add openmm pylint dependency

* remove #For pylint comment

* remove top level import

* remove top level imports nmr4md.py

---------

Co-authored-by: Brandon Duane Walker <[email protected]>
  • Loading branch information
misterbrandonwalker and Brandon Duane Walker authored Jan 19, 2024
1 parent 3cb4ff6 commit b6051af
Show file tree
Hide file tree
Showing 12 changed files with 92 additions and 48 deletions.
4 changes: 2 additions & 2 deletions examples/diffdock/pose_cluster_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def EuclideanDist(pi: Chem.SDMolSupplier, pj: Chem.SDMolSupplier) -> float:
# GetConformers will just use input coordinates if conformations are not pre-generated
confi = pi.GetConformers()[0]
confj = pj.GetConformers()[0]
centeri = rdmt.ComputeCentroid(confi)
centerj = rdmt.ComputeCentroid(confj)
centeri = rdmt.ComputeCentroid(confi) # pylint: disable=c-extension-no-member
centerj = rdmt.ComputeCentroid(confj) # pylint: disable=c-extension-no-member
dv = np.array([centeri.x, centeri.y, centeri.z]) - np.array([centerj.x, centerj.y, centerj.z])
return float(np.sqrt(np.dot(dv, dv)))

Expand Down
14 changes: 10 additions & 4 deletions examples/scripts/atomselect.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
# type: ignore
from workflow_types import *
from workflow_types import string, pdbfile # pylint: disable=import-error
# NOTE: No other top-level imports supported


def main(selection_string, input_pdb_path, output_pdb_path):
import mdtraj # pylint:disable=import-error
def main(selection_string: str, input_pdb_path: str, output_pdb_path: str) -> None:
"""Restrict a PDB file to a selection of atoms and save it.
Args:
selection_string (str): Selection string for mdtraj
input_pdb_path (str): The path to the input PDB file
output_pdb_path (str): The path to the output PDB file
"""
import mdtraj # pylint: disable=import-outside-toplevel
traj = mdtraj.load(input_pdb_path)
print(traj)
selection_indices = traj.topology.select(selection_string)
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/autodock_vina_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
floats = [float(x) for x in strs[1:]]
score = floats[0]
scores.append(score)
except Exception as e:
except Exception as e: # pylint: disable=broad-exception-caught
scores_all.append(scores)
parsing = False

Expand Down
4 changes: 2 additions & 2 deletions examples/scripts/calculate_net_charge.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def get_net_charge(file_path: str, addhydrogens: bool = False) -> Optional[int]:

try:
mol = Chem.MolFromMol2File(file_path, removeHs=False)
except Exception:
except Exception: # pylint: disable=broad-exception-caught
return None

if not mol:
Expand All @@ -45,7 +45,7 @@ def get_net_charge(file_path: str, addhydrogens: bool = False) -> Optional[int]:
AllChem.ComputeGasteigerCharges(mol,
nIter=50,
throwOnParamFailure=True)
except Exception:
except Exception: # pylint: disable=broad-exception-caught
return None

num_atoms = mol.GetNumAtoms()
Expand Down
17 changes: 8 additions & 9 deletions examples/scripts/combine_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import os
import argparse
from typing import Optional
import numpy as np

from rdkit import Chem

Expand All @@ -21,16 +20,16 @@ def parse_arguments() -> argparse.Namespace:
return args


def read_xyz_rdkit(input_structure_path: str) -> Optional[Chem.rdchem.Mol]:
""" Read a PDB file using RDKit
def read_xyz_rdkit(input_structure_path: str) -> Optional[Chem.rdchem.Mol]: # pylint: disable=c-extension-no-member
""" Read a PDB file using RDKit
Args:
input_structure_path (str): The path to the xyz structure
input_structure_path (str): The path to the xyz structure
Returns:
Optional[Chem.rdchem.Mol]: The created molecule object
"""
xyz = Chem.rdmolfiles.MolFromXYZFile(input_structure_path)
xyz = Chem.rdmolfiles.MolFromXYZFile(input_structure_path) # pylint: disable=c-extension-no-member

if not xyz:
print(f'Error: failed to generate molecule from file {input_structure_path}')
Expand All @@ -43,18 +42,18 @@ def combine_structure_rdkit(input_structure1_path: str, input_structure2_path: s
""" Combine two structures into a single PDB file using RDKit
Args:
input_structure1_path (str): The path to the xyz structure 1
input_structure2_path (str): The path to the xyz structure 2
input_structure1_path (str): The path to the xyz structure 1
input_structure2_path (str): The path to the xyz structure 2
output_structure_path (str): The path to the output combined structure
"""

structure1 = read_xyz_rdkit(input_structure1_path)
structure2 = read_xyz_rdkit(input_structure2_path)

if structure1 and structure2:
combo = Chem.CombineMols(structure1, structure2)
combo = Chem.CombineMols(structure1, structure2) # pylint: disable=no-member

with Chem.PDBWriter(output_structure_path) as writer:
with Chem.PDBWriter(output_structure_path) as writer: # pylint: disable=no-member
writer.write(combo)


Expand Down
3 changes: 1 addition & 2 deletions examples/scripts/extract_protein.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import sys
import os
import argparse
from typing import Optional, List
import numpy as np
from typing import List

import openmm.app as omma

Expand Down
45 changes: 33 additions & 12 deletions examples/scripts/generate_conformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def calculate_dG(Kd: float) -> float:
dG = RT * math.log(Kd / standard_concentration)
return dG

# pylint: disable=too-many-arguments,too-many-locals


def load_data(input_excel_path: str, query: str, smiles_column: str, binding_data_column: str,
output_txt_path: str, min_row: int = 1, max_row: int = -1, convert_Kd_dG: bool = False) -> None:
Expand Down Expand Up @@ -84,20 +86,39 @@ def load_data(input_excel_path: str, query: str, smiles_column: str, binding_dat

# For ncats_phenotypic_curated.csv
# duplicate-classifier ['duplicate', 'unique']
# Virus ['Dengue Virus', 'Sandfly_Fever', 'HCoV-229E', 'MERS-CoV', 'Yellow Fever Virus', 'Zika Virus', 'RSV', 'Powassan', 'SARS-CoV-2', 'H7N7', 'H1N2', 'HPIV-3', 'West Nile Virus']
# Virus ['Dengue Virus', 'Sandfly_Fever', 'HCoV-229E', 'MERS-CoV', 'Yellow Fever Virus',\
# 'Zika Virus', 'RSV', 'Powassan', 'SARS-CoV-2', 'H7N7', 'H1N2', 'HPIV-3', 'West Nile Virus']
# BAO Label ['cell-based format']
# Cell_Type [nan, 'Unknown', 'Huh-7', 'Hep-2', 'HEL', 'CEF', 'HG23', 'HMC3', 'HBMEC', 'BHK-21', 'Hepa1-6', 'K-562', 'BHK1', 'J774A.1', 'BHK15', 'MA104', 'BHK', 'LLC-MK2', 'BHK21', 'A549/BHK21', 'MK2', 'JEG3', 'CaCo-2', 'MT-4', 'Hela', 'kidney', 'Vero 76', 'WS1', 'HuH-7', 'Vero', 'HEP-2', 'RAW 264.7', 'Huh-5-2', 'C6/36', 'BHK-D2RepT', 'HuH7', 'MDCK', 'NSC', 'HelaM', 'PBMC', 'A549', 'HELF', 'HEK293', 'BSC-40', 'HAE', 'HFF', 'TREx293', 'MDDC', 'BE(2)-C', 'EAC', 'Vero C1008', 'PEK', 'CEM', 'HEK-293', 'Caco-2', 'BHK-WII', 'HeLa', 'HepG2', 'Vero-76']
# Standard Type ['EC90', 'Activity', 'CC50', 'EC50', 'TD50', 'Cytotoxicity', 'ID50', 'MIC', 'IC90', 'MCC50', 'Dose', 'Inhibition', 'EC99', 'pIC50', 'MNTD', 'ED50', 'MIC50', 'IC50']
# Cell_Type [nan, 'Unknown', 'Huh-7', 'Hep-2', 'HEL', 'CEF', 'HG23', 'HMC3', 'HBMEC', 'BHK-21', \
# 'Hepa1-6', 'K-562', 'BHK1', 'J774A.1', 'BHK15', 'MA104', 'BHK', 'LLC-MK2', 'BHK21', 'A549/BHK21',\
# 'MK2', 'JEG3', 'CaCo-2', 'MT-4', 'Hela', 'kidney', 'Vero 76', 'WS1', 'HuH-7', 'Vero', 'HEP-2', \
# 'RAW 264.7', 'Huh-5-2', 'C6/36', 'BHK-D2RepT', 'HuH7', 'MDCK', 'NSC', 'HelaM', 'PBMC', 'A549', \
# 'HELF', 'HEK293', 'BSC-40', 'HAE', 'HFF', 'TREx293', 'MDDC', 'BE(2)-C', 'EAC', 'Vero C1008', \
# 'PEK', 'CEM', 'HEK-293', 'Caco-2', 'BHK-WII', 'HeLa', 'HepG2', 'Vero-76']
# Standard Type ['EC90', 'Activity', 'CC50', 'EC50', 'TD50', 'Cytotoxicity', 'ID50', 'MIC', \
# 'IC90', 'MCC50', 'Dose', 'Inhibition', 'EC99', 'pIC50', 'MNTD', 'ED50', 'MIC50', 'IC50']
# Standard Relation [nan, "'~'", "'<'", "'<='", "'>'", "'='", "'>='"]
# Standard Units [nan, 'uM', '%']
# Outcome ['Active', 'Inactive', 'Inconclusive']
# Assay_Type [nan, 'Viral_Replication', 'Unknown', 'Cell_Viability', 'Plaque_Inhibition', 'Focus_Reduction_Assay', 'Proliferation', 'Antigen_Expression', 'Staining_Based', 'Flourescence', 'Viral_Titer', 'Cell_Viability_By_Neutral_Red_Uptake', 'eGFP_Reduction', 'Immunofluorescence', 'Protein_Expression', 'CFI', 'Green_Flourescent_Protein_(eGFP)', 'Viral_Infection', 'Microscopy', 'Immunodetection', 'Replicon_Assay', 'Antigen_Synthesis', 'Viral_RNA_Detection,Plaque_Inhibition,Cell_Viability', 'Cell-based_flavivirus_infection_(CFI)_assay', 'Viral_Yield_Reduction', 'Focus_Forming_Unit_(FFU)_Assay', 'Luciferase', 'Viral_RNA_Detection', 'Luciferase_Reporter_Assay', 'Viral_Entry', 'MTT_Assay', 'RT-PCR', 'Cytopathy', 'Flow_Cytometry', 'Colorimetric', 'Luciferase_Reporter_Gene', 'Cell_Titer', 'Western_Blot', 'Cytotoxicity', 'SDS-PAGE', 'Fluorescence', 'Image-Based', 'Crystal_Violet_Staining_Assay', 'Viral_Reduction_Assay']
# Assay_Type [nan, 'Viral_Replication', 'Unknown', 'Cell_Viability', 'Plaque_Inhibition', \
# 'Focus_Reduction_Assay', 'Proliferation', 'Antigen_Expression', 'Staining_Based', \
# 'Flourescence', 'Viral_Titer', 'Cell_Viability_By_Neutral_Red_Uptake', 'eGFP_Reduction', \
# 'Immunofluorescence', 'Protein_Expression', 'CFI', 'Green_Flourescent_Protein_(eGFP)', \
# 'Viral_Infection', 'Microscopy', 'Immunodetection', 'Replicon_Assay', 'Antigen_Synthesis', \
# 'Viral_RNA_Detection,Plaque_Inhibition,Cell_Viability', 'Cell-based_flavivirus_infection_(CFI)_assay', \
# 'Viral_Yield_Reduction', 'Focus_Forming_Unit_(FFU)_Assay', 'Luciferase', 'Viral_RNA_Detection', \
# 'Luciferase_Reporter_Assay', 'Viral_Entry', 'MTT_Assay', 'RT-PCR', 'Cytopathy', 'Flow_Cytometry', \
# 'Colorimetric', 'Luciferase_Reporter_Gene', 'Cell_Titer', 'Western_Blot', 'Cytotoxicity', 'SDS-PAGE', \
# 'Fluorescence', 'Image-Based', 'Crystal_Violet_Staining_Assay', 'Viral_Reduction_Assay']

# For ncats_target_based_curated.csv
# duplicate-type-classifier ['unique', 'duplicate']
# Virus ['SNV', 'Zika', 'West_Nile', 'RSV', 'SARS-CoV-2', 'H7N7', '229E', 'MERS-CoV', 'HPIV3', 'Dengue']
# Target Type ['PROTEIN COMPLEX', 'UNCHECKED', 'ORGANISM', 'SINGLE PROTEIN']
# Target ['Matrix M2-1', 'Phosphoprotein', 'NS2B-NS3 Protease', 'NS5', 'Integrin alpha-V/beta-3', 'not defined', 'Hemagglutinin-neuraminidase', 'Nucleocapsid protein', 'PLpro', 'Spike protein', 'Main Protease (3CLpro, Mpro)', 'Nucleoprotein', 'Fusion glycoprotein F0', 'Neuraminidase', 'Matrix protein 2', 'RDRP']
# Target ['Matrix M2-1', 'Phosphoprotein', 'NS2B-NS3 Protease', 'NS5', 'Integrin alpha-V/beta-3', \
# 'not defined', 'Hemagglutinin-neuraminidase', 'Nucleocapsid protein', 'PLpro', 'Spike protein', \
# 'Main Protease (3CLpro, Mpro)', 'Nucleoprotein', 'Fusion glycoprotein F0', 'Neuraminidase', \
# 'Matrix protein 2', 'RDRP']
# Outcome ['Inactive', 'Active', 'Unclear', 'Inconclusive', 'Undetermined']
# Standard Type ['IC50', 'EC90', 'Inhibition', 'Kd', 'EC50', 'Activity', 'Ki']
# Standard Relation [nan, "<'", "<='", ">'", "='", ">='"]
Expand Down Expand Up @@ -135,18 +156,18 @@ def load_data(input_excel_path: str, query: str, smiles_column: str, binding_dat
smiles_binding_data.append(f'{smiles} {binding_datum}')

# See https://www.rdkit.org/docs/GettingStartedInPython.html#working-with-3d-molecules
mol_2D: rdkit.Chem.rdchem.Mol = Chem.MolFromSmiles(smiles)
AllChem.Compute2DCoords(mol_2D)
mol_2D: rdkit.Chem.rdchem.Mol = Chem.MolFromSmiles(smiles) # pylint: disable=c-extension-no-member,no-member
AllChem.Compute2DCoords(mol_2D) # pylint: disable=no-member

# See https://www.rdkit.org/docs/source/rdkit.Chem.rdmolops.html#rdkit.Chem.rdmolops.AddHs
# NOTE: "Much of the code assumes that Hs are not included in the molecular topology,
# so be very careful with the molecule that comes back from this function."
mol_3D = Chem.AddHs(mol_2D)
AllChem.EmbedMolecule(mol_3D)
AllChem.MMFFOptimizeMolecule(mol_3D)
mol_3D = Chem.AddHs(mol_2D) # pylint: disable=no-member
AllChem.EmbedMolecule(mol_3D) # pylint: disable=no-member
AllChem.MMFFOptimizeMolecule(mol_3D) # pylint: disable=no-member

filename = f'ligand_{idx}.sdf' # chemblid is NOT unique!
writer = Chem.SDWriter(filename)
writer = Chem.SDWriter(filename) # pylint: disable=no-member
# writer = Chem.rdmolfiles.PDBWriter(filename)
writer.write(mol_3D)
writer.close()
Expand All @@ -157,7 +178,7 @@ def load_data(input_excel_path: str, query: str, smiles_column: str, binding_dat

def main() -> None:
""" Reads the command line arguments and loads an Excel database of small molecules,
performs a query to extract the SMILES and binding affinity, generates 3D structures
performs a query to extract the SMILES and binding affinity, generates 3D structures
and saves them in SDF format.
"""
args = parse_arguments()
Expand Down
4 changes: 3 additions & 1 deletion examples/scripts/generate_pdbbind_complex.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def read_index_file(index_file_path: str) -> pd.DataFrame:

return pd.DataFrame.from_dict(data)

# pylint: disable=too-many-arguments,too-many-locals


def load_data(index_file_name: str, base_dir: str, query: str, output_txt_path: str,
min_row: int = 1, max_row: int = -1, convert_Kd_dG: bool = False) -> None:
Expand All @@ -117,7 +119,7 @@ def load_data(index_file_name: str, base_dir: str, query: str, output_txt_path:
if int(min_row) != 1 or int(max_row) != -1:
# We want to convert to zero-based indices and we also want
# the upper index to be inclusive (i.e. <=) so -1 lower index.
df = df[(int(min_row) - 1):int(max_row)]
df = df[(int(min_row) - 1):int(max_row)] # pylint: disable=unsubscriptable-object

# Calculate dG
df = df[['PDB_code', 'value', 'Kd_Ki']]
Expand Down
18 changes: 13 additions & 5 deletions examples/scripts/nmr4md.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
from workflow_types import *
from workflow_types import tprfile, trrfile, pngfile, string # pylint: disable=import-error


def main(input_tpr_path, input_trr_path, output_png_path): # type: ignore[no-untyped-def]
import MDAnalysis as mda
import nmrformd
from matplotlib import pyplot as plt
def main(input_tpr_path: str, input_trr_path: str, output_png_path: str) -> None:
"""Generate NMR analysis plots from trajectory files.
Args:
input_tpr_path (str): Input tpr file path
input_trr_path (str): Input trr file path
output_png_path (str): Output png file path
"""

import MDAnalysis as mda # pylint: disable=import-outside-toplevel
import nmrformd # pylint: disable=import-error,import-outside-toplevel
from matplotlib import pyplot as plt # pylint: disable=import-outside-toplevel

# The following code comes directly from the nmr4md tutorial at
# https://github.com/simongravelle/nmrformd/blob/main/docs/source/tutorials/bulk-water.rst
Expand Down
18 changes: 10 additions & 8 deletions examples/scripts/pdb_fixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def check_pdb_null(input_pdb_path: str, pdbid: str, url: str) -> bool:
Args:
input_pdb_path (str): The input PDB structure path
pdbid (str): PDB id from RCSB
url (str): URL to retrieve PDB fro
pdbid (str): PDB id from RCSB
url (str): URL to retrieve PDB from
Returns:
bool: Return True if all of the residues are unknown
"""
Expand All @@ -50,9 +50,9 @@ def check_pdb_null(input_pdb_path: str, pdbid: str, url: str) -> bool:


def find_missing_residues(fixer: PDBFixer) -> PDBFixer:
""" Finds the missing residues and adds missing residues within a
chain to prevent "floppy tails," which can lead to an increase in the box size,
significantly increasingthe computation time. This step is taken as floppy tails
""" Finds the missing residues and adds missing residues within a
chain to prevent "floppy tails," which can lead to an increase in the box size,
significantly increasingthe computation time. This step is taken as floppy tails
are generally not critical for binding.
Args:
Expand All @@ -77,11 +77,13 @@ def find_missing_residues(fixer: PDBFixer) -> PDBFixer:
fixer.missingResidues[key] = [r for r in resnames if r in fixer.templates]
return fixer

# pylint: disable=too-many-arguments


def runpdbfixer(input_pdb_path: str, input_helper_pdb_path: str, output_pdb_path: str,
add_atoms: str, add_res: bool, pdbid: str, url: str, rep_nonstandard: bool, heterogens: str) -> None:
""" Fixes the protein structure using PDBFixer.PDBFixer offers options
to add hydrogens and solvate the system, but in our usage, we employ
""" Fixes the protein structure using PDBFixer.PDBFixer offers options
to add hydrogens and solvate the system, but in our usage, we employ
PDBFixer solely for adding missing heavy atoms and residues.
Args:
Expand All @@ -90,7 +92,7 @@ def runpdbfixer(input_pdb_path: str, input_helper_pdb_path: str, output_pdb_path
input_helper_pdb_path (str): The input helper PDB structure path
add_atoms (str): What missing atoms to add: all, heavy, hydrogen, or none
add_res (bool): If set to True, adds missing residues
pdbid (str): PDB id from RCSB
pdbid (str): PDB id from RCSB
url (str): URL to retrieve PDB from
rep_nonstandard (bool): Replace nonstandard residues with standard equivalents
"""
Expand Down
1 change: 1 addition & 0 deletions install/system_deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies:
- xorg-libxrender
- mdtraj
- pymol-open-source
- pdbfixer
# Alternatively, can use pymol-bundle from the schrodinger channel.
# - pymol-bundle
- openbabel
Expand Down
10 changes: 8 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,19 @@ mypy-types = [
"types-openpyxl",
"types-pyyaml",
]
# For pylint

workflow-deps = [
"matplotlib",
"pandas",
"numpy>=1.21.0",
"mdanalysis",
"nmrformd",
"mdanalysis",
"mdtraj",
"rdkit",
"pymol",
"pdbfixer @ git+https://github.com/openmm/pdbfixer.git",
"openmm",
]
# See docs/requirements.txt
doc = [
Expand Down Expand Up @@ -141,7 +147,7 @@ ignore_errors = false
# versioneer to the exclude=regex above, but this works.

[[tool.mypy.overrides]]
module = ["BioSimSpace.*", "MDAnalysis.*", "pymol.*", "rdkit.*", "nmrformd.*", "setuptools.*", "parmed.*", "pdbfixer.*", "openmm.*"]
module = ["BioSimSpace.*", "MDAnalysis.*", "pymol.*", "rdkit.*", "nmrformd.*", "setuptools.*", "parmed.*", "pdbfixer.*", "openmm.*", "mdtraj.*"]
ignore_missing_imports = true


Expand Down

0 comments on commit b6051af

Please sign in to comment.