Skip to content

Commit

Permalink
Open-ComBind Defaults, CLI, and Documentation (#37)
Browse files Browse the repository at this point in the history
* fix docs issue

* install theme first

* updating docking defaults

* docstring updates

* remove unecessary hierarchy

* add preprint to README

* fix docs issue

* install theme first

* updating docking defaults

* docstring updates

* remove unecessary hierarchy

* add preprint to README

* bugfix

* bugfix with IFP parallel

* prettify and consistency

* prettify and fix docs

* making the CLI nice
  • Loading branch information
drewnutt authored Nov 7, 2023
1 parent 463d90c commit 6655cd7
Show file tree
Hide file tree
Showing 10 changed files with 151 additions and 230 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

Open-source docking pipeline leveraging pairwise statistics

Open-ComBind is currently a [preprint](https://doi.org/10.26434/chemrxiv-2023-xjl84)

This is a fork of [ComBind](https://github.com/drorlab/combind) that removes all uses of proprietary API calls or proprietary software. This will produce similar (if not the exact same) results
as the original ComBind and it will be completely free to use.
Expand Down
3 changes: 0 additions & 3 deletions docs/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@ Open-Combind is primarily used through its command line interface. It provides a

.. toctree::

Main Combind Commands
---------------------

.. autofunction:: open_combind.structprep

.. autofunction:: open_combind.ligprep
Expand Down
1 change: 1 addition & 0 deletions docs/requirements.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ dependencies:
- ProDy>=2.0
- click
- plumbum
- sphinx_rtd_theme
198 changes: 108 additions & 90 deletions open_combind/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
import click
import os
import sys
from argparse import Namespace
from glob import glob

from open_combind.utils import *
import open_combind as oc
from open_combind.dock.ligprep import main as _single_ligprep
###############################################################################

# Defaults
Expand All @@ -33,30 +35,23 @@ def cli(ctx):


@cli.command()
@click.argument('struct', default='')
@click.option('--templ-struct')
def structprep(templ_struct='', struct='', raw_dir='structures/raw',
@click.option('--align-struct', default='', help='Structure to use for alignment. Defaults to alphabetically lowest.')
@click.option('--templ-struct', help='Structure to use as template for alignment. Defaults to alphabetically lowest.')
def structprep(templ_struct='', align_struct='', raw_dir='structures/raw',
align_dir='structures/aligned', processed_dir='structures/processed',
template_dir='structures/dir', ligand_dir='structures/ligands',
protein_dir='structures/proteins' ):
"""
Prepare structures and make a docking template file.
"struct" specifies the name of the structure for which to make a docking
template file. (Not the full path, generally just the PDB code.) Defaults to the
structure with alphabetically lowest name.
"templ_struct" specifies the name of the structure to use as a template for
docking. Defaults to the structure with alphabetically lowest name.
raw_dir, align_dir, processed_dir, template_dir, ligand_dir, and protein_dir
specify the directories where the raw structures, aligned structures,
processed structures, docking templates, ligands, and proteins will be
written, respectively. Defaults to "structures/raw", "structures/aligned",
"structures/processed", "structures/templates", "structures/ligands", and
"structures/proteins", respectively.
written, respectively.
Defaults to "structures/raw", "structures/aligned", "structures/processed",
"structures/templates", "structures/ligands", and "structures/proteins", respectively.
The following directory structure is required:
The following directory structure is recommended:
<raw_dir>/
structure_name.pdb
Expand All @@ -80,125 +75,148 @@ def structprep(templ_struct='', struct='', raw_dir='structures/raw',
The process can be started from any step, e.g. if you have processed
versions of your structures, you can place these in the processed directory.
Files ending with _lig contain only the small molecule ligand present in the
structure, and files ending with _prot contain everything else.
"""
oc.structprep(templ_struct=templ_struct, struct=struct, raw_dir=raw_dir,
oc.structprep(templ_struct=templ_struct, struct=align_struct, raw_dir=raw_dir,
align_dir=align_dir, processed_dir=processed_dir,
template_dir=template_dir, ligand_dir=ligand_dir,
protein_dir=protein_dir)


@cli.command()
@click.argument('smiles')
@click.option('--root', default='ligands')
@click.option('--multiplex', is_flag=True)
@click.option('--sdffile', is_flag=True)
@click.option('--ligand-names', default='ID')
@click.option('--ligand-smiles', default='SMILES')
@click.option('--delim', default=',')
@click.option('--num-out-confs', default=10)
@click.option('--num-confs', default=50)
@click.option('--confgen', default='etkdg_v2')
@click.option('--ff', default="UFF")
@click.option('--max-iterations', default=1000)
@click.option('--seed', default=-1)
@click.option('--processes', default=1)
@click.argument('smiles', required=True)
@click.option('--root', default='ligands',
help='Root directory for output of ligprep', type=click.Path(), show_default=True)
@click.option('--multiplex', is_flag=True, help='Write all ligands to one file.')
@click.option('--sdffile', is_flag=True,
help='`smiles` is a SDF file containing all the ligands')
@click.option('--ligand-names', default='ID', help='Column name for ligand names',
show_default=True)
@click.option('--ligand-smiles', default='SMILES', help='Column name for ligand smiles',
show_default=True)
@click.option('--delim', default=',', help='Delimiter for smiles file', show_default=True)
@click.option('--num-out-confs', default=10, help='Number of output conformers', show_default=True)
@click.option('--num-confs', default=50,
help='Number of conformers to generate and query energy', show_default=True)
@click.option('--confgen', default='etkdg_v2', help='Conformer generation method',
show_default=True)
@click.option('--ff', default="UFF",
help='Force field for minimization and energy computation', show_default=True)
@click.option('--max-iterations', default=1000,
help='Maximum number of iterations for minimization', show_default=True)
@click.option('--seed', default=-1, help='Random seed for conformer generation')
@click.option('--processes', default=1 , help='Number of processes to use', show_default=True)
def ligprep(smiles, root, multiplex, ligand_names, ligand_smiles,
delim, processes, sdffile, num_out_confs, num_confs, confgen,
max_iterations, seed, ff):
"""
Prepare ligands for docking, from smiles or sdf.
Prepare ligands in `smiles` for docking, from SMILES or SDF.
Specifically, this will utilize RDKit to generate a 3D conformer for each of the ligands
"smiles" should be a `delim` delimited file with columns "ligand-names"
and "ligand-smiles". Alternatively, "smiles" can be a SDF file with multiple
ligands as different entries, but you must specify "--sdffile".
"root" specifies where the processed ligands will be written.
By default, an individual file will be made for each ligand. If multiplex is
set, then only one file, containing all the ligands, will be produced.
Multiprocessing is only supported for non-multiplexed mode.
Specifically, this will utilize RDKit to generate a set of 3D conformers for each of the ligands
"""
oc.ligprep(smiles, root=root, multiplex=multiplex, ligand_names=ligand_names,
ligand_smiles=ligand_smiles, delim=delim, sdffile=sdffile,
num_out_confs=num_out_confs, num_confs=num_confs, confgen=confgen,
max_iterations=max_iterations, ff=ff, processes=processes, seed=seed)


@cli.command()
@click.argument('ligands', nargs=-1)
@click.option('--root', default='docking')
@click.option('--template')
@click.option('--screen', is_flag=True)
@click.option('--slurm', is_flag=True)
@click.option('--now', is_flag=True)
@click.option('--dock-file')
def dock_ligands(template, root, ligands, screen, slurm, now, dock_file):
@click.argument('input-file', required=True)
@click.argument('output-file', required=True)
@click.option('--num-out-confs', default=10, help='Number of output conformers', show_default=True)
@click.option('--num-confs', default=50,
help='Number of conformers to generate and query energy', show_default=True)
@click.option('--confgen', default='etkdg_v2', help='Conformer generation method',
show_default=True)
@click.option('--ff', default="UFF",
help='Force field for minimization and energy computation', show_default=True)
@click.option('--max-iterations', default=1000,
help='Maximum number of iterations for minimization', show_default=True)
@click.option('--seed', default=-1, help='Random seed for conformer generation')
def singleligprep(input_file, output_file, num_out_confs, num_confs, confgen,
max_iterations, seed, ff):
"""
Generate conformations for a single `input-file`, provided as a SMILES or SDF (detected via file extension). Output as an SDF to `output-file`
"""
Dock "ligands" to "grid".
"root" specifies where the docking results will be written.

Setting "screen" limits the thoroughness of the pose sampling. Recommended
for screening, but not pose prediction.
args = Namespace(input_file=input_file, output_file=output_file,
num_out_confs=num_out_confs, num_confs=num_confs, confgen=confgen,
maxIters=max_iterations, ff=ff, seed=seed)
_single_ligprep(args)

"ligands" are paths to prepared ligand files. Multiple can be specified.
@cli.command()
@click.argument('ligands', nargs=-1, required=True)
@click.option('--root', default='docking',
help='Root directory for output of docking', type=click.Path(), show_default=True)
@click.option('--template',
help='Template file for docking, defaults to the alphabetically first file in `structures/templates/*.template`',
type=click.Path())
@click.option('--screen', is_flag=True,
help='Screening mode, for faster docking (i.e. less sampling).')
@click.option('--slurm', is_flag=True,
help='If running on a cluster, outputs tarball of docking inputs and makes dockfile use paths that tarball has.')
@click.option('--now', is_flag=True, help='Run docking now, instead of just writing dockfile.')
@click.option('--processes', default=1, help='Number of processes to use for docking.', show_default=True, type=int)
@click.option('--dock-file',
help='Format string for dockfile. Defaults to "-l {lig} -o {out} --exhaustiveness {exh} --num_modes 30 --min_rmsd_filter 0.01 > {log}", should include `{lig}` and `{out}` at a minimum.')
def dock_ligands(template, root, ligands, screen, slurm, now, dock_file, processes):
"""
Dock "ligands" using template.
"dock_file" is a format string that will be used with all of the ligands to
create a docking file. The default looks like:
"-l {lig} -o {out} --exhaustiveness {exh} --num_modes 200 > {log} \n"
"""
oc.dock_ligands(ligands, template=template, dock_file=dock_file, root=root, screen=screen, slurm=slurm, now=now)
oc.dock_ligands(ligands, template=template, dock_file=dock_file, root=root, screen=screen, slurm=slurm, now=now, processes=processes)

################################################################################


@cli.command()
@click.argument('root')
@click.argument('poseviewers', nargs=-1)
@click.option('--native', default='structures/ligands/*_lig.sdf')
@click.option('--ifp-version', default=IFP_VERSION)
@click.option('--mcss-param', default=MCSS_PARAM)
@click.option('--screen', is_flag=True)
@click.option('--max-poses', default=100)
@click.option('--no-mcss', is_flag=True)
@click.option('--no-cnn', is_flag=True)
@click.option('--use-shape', is_flag=True)
@click.option('--processes', default=1)
@click.option('--template', default='structures/template/*.template')
@click.option('--check-center-ligs', is_flag=True)
@click.option('--newscore', default=None)
@click.option('--no-reverse', is_flag=False)
@click.argument('root', required=True)
@click.argument('poseviewers', nargs=-1, required=True)
@click.option('--native', default='structures/ligands/*_lig.sdf', help='Native ligand files')
@click.option('--ifp-version', default=IFP_VERSION, help='IFP version to use')
@click.option('--mcss-param', default=MCSS_PARAM, help='MCSS parameter setup to use')
@click.option('--max-poses', default=100,
help='Maximum number of poses to featurize per ligand', show_default=True)
@click.option('--no-mcss', is_flag=True, help='Do not use RMSD of MCSS as a feature')
@click.option('--no-cnn', is_flag=True, help='Do not extract CNN scores from poses')
@click.option('--use-shape', is_flag=True, help='Use shape comparison as a feature')
@click.option('--processes', default=1, help='Number of processes to use', show_default=True)
@click.option('--template', default='structures/template/*.template',
help='Template files used for docking')
@click.option('--check-center-ligs', is_flag=True,
help='Check that the center of the ligands poses are near the center of the native ligand')
@click.option('--newscore', default=None,
help='If using a score other than Vina score, specify the name of the score here')
@click.option('--no-reverse', is_flag=False,
help='Sort the poses from lowest (first) to highest score')
def featurize(root, poseviewers, native, ifp_version,
mcss_param, screen, no_mcss, use_shape, processes, max_poses,
mcss_param, no_mcss, use_shape, processes, max_poses,
no_cnn, template, check_center_ligs, newscore, no_reverse):
"""
Featurize docking poses.
"""

oc.featurize(root, poseviewers, native=native, no_mcss=no_mcss, use_shape=use_shape,
max_poses=max_poses, no_cnn=no_cnn, screen=screen, ifp_version=ifp_version,
max_poses=max_poses, no_cnn=no_cnn, ifp_version=ifp_version,
mcss_param=mcss_param, processes=processes, template=template,
check_center_ligs=check_center_ligs, newscore=newscore, reverse=not no_reverse)
################################################################################


@cli.command()
@click.argument('root')
@click.argument('out', default="poses.csv")
@click.argument('root', required=True)
@click.argument('ligands', nargs=-1)
@click.option('--features', default='mcss,hbond,saltbridge,contact')
@click.option('--alpha', default=1.0)
@click.option('--stats-root', default=None)
@click.option('--restart', default=500)
@click.option('--max-iterations', default=1000)
@click.option('--newscore', default=None)
@click.option('--out', default="poses.csv", help='Output file', show_default=True)
@click.option('--features', default='mcss,hbond,saltbridge,contact',
help='Features to use for pose prediction', show_default=True)
@click.option('--alpha', default=-1.0, help='Weight of the docking score', show_default=True)
@click.option('--stats-root', default=None, help='Root directory of the statistics files, defaults to Open-ComBind default statistics')
@click.option('--restart', default=500, help='Number of likelihood minimization runs to run from random initialization', show_default=True)
@click.option('--max-iterations', default=1000, help='Maximum number of iterations for the likelihood minimization for each run', show_default=True)
@click.option('--newscore', default=None, help='If using a score other than Vina score, specify the name of the score here')
def pose_prediction(root, out, ligands, alpha, stats_root, features, restart,
max_iterations, newscore):
"""
Run ComBind pose prediction.
Run ComBind pose prediction. Using the features and ligands in `root`, unless `ligands` are specified.
"""
features = features.split(',')

Expand Down
8 changes: 4 additions & 4 deletions open_combind/dock/dock.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import subprocess
from glob import glob

GNINA = ' -l {lig} -o {out} --exhaustiveness {exh} --num_modes 200 --min_rmsd_filter 0 > {log}' #: default command for GNINA docking
GNINA = ' -l {lig} -o {out} --exhaustiveness {exh} --num_modes 30 --min_rmsd_filter 0.01 > {log}' #: default command for GNINA docking

def docking_failed(gnina_log):
if not os.path.exists(gnina_log):
Expand Down Expand Up @@ -38,8 +38,8 @@ def check_dock_line(infile):
assert '{log}' in infile, "need to have {log} in your docking line to specify logfile"
if '{exh}' not in infile:
print('Warning: your docking line does not contain {exh}\n\
Docking will use either your specified exhaustiveness \
(if specified) or the default GNINA exchaustiveness of 8')
\t\tDocking will use either your specified exhaustiveness\n \
\t\t(if specified) or the default GNINA exhaustiveness of 8')

return infile

Expand Down Expand Up @@ -79,7 +79,7 @@ def dock(template, ligands, root, name, enhanced, infile=None, slurm=False, now=
infile = check_dock_line(infile)
exh = 8
if enhanced:
exh = 16
exh *= 2
dock_template = open(template).readlines()[0].strip('\n')
recname = os.path.splitext(os.path.split(dock_template.split('-r')[-1].strip().split(' ')[0])[1])[0]
# aboxname = os.path.splitext(os.path.split(dock_template.split('--autobox_ligand')[-1].strip().split(' ')[0])[1])[0]
Expand Down
38 changes: 3 additions & 35 deletions open_combind/dock/ligand_handling.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import fileinput
import requests
import urllib.parse
from rdkit import Chem
from rdkit.Chem.AllChem import AssignBondOrdersFromTemplate
from prody import writePDBStream
import fileinput


class RDKitParseException(Exception):
Expand All @@ -17,7 +17,7 @@ def __init__(self, message):
def get_ligands_from_RCSB(pdbid, lig_code=None, specific_chain=None,
save_file="{pdbid}_{chem_name}_{chain_id}_{seq_id}.sdf", first_only=False):
"""
Given a PDBFile (or a PDB Header file), download the ligands present in the PDB file directly
Given a PDB ID, download the ligands present in the PDB file directly
from the RCSB as a SDF file
Parameters
Expand Down Expand Up @@ -110,7 +110,7 @@ def ligand_selection_to_mol(ligand_selection, query_ligand, outfile=None):

def get_ligand_from_SMILES(lig_id):
"""
Using the RCSB API, pulls the SMILES string containin stereochemical features for the given ligand ID.
Using the RCSB API, pulls the SMILES string containing stereochemical features for the given ligand ID.
If a ligand has a dedicated page at ``https://www.rcsb.org/ligand/{ligand_id}`` then it likely has a SMILES string
(does not need to be a 2 or 3 letter identifier)
Expand Down Expand Up @@ -141,38 +141,6 @@ def get_ligand_from_SMILES(lig_id):

return mol



# def scrape_rcsb_webpage(pdb_id):
# rec_page_url = "https://www.rcsb.org/structure/{receptor}"
# receptor_page = requests.get(rec_page_url.format(receptor=pdb_id))
# soup = BeautifulSoup(receptor_page.text, 'html.parser')
# ligand_rows = soup.find_all('tr', id=re.compile('ligand_row_\w{1,3}'))
# ligand_chain_info = dict()
# for lig_row in ligand_rows:
# lig_cols = lig_row.find_all('td')
# lig_name = lig_cols[0].a.text
# chain_info = dict()
# for link_tag in lig_cols[0].find_all('ul', class_='dropdown-menu')[0].find_all('a'):
# url = link_tag.get('href')
# if 'encoding=sdf' not in url:
# continue
# text = link_tag.text.split(',')[-1]
# chain = text.split()[1] # first word is always "chain"
# auth_chain = re.findall('\[auth ([A-Z])\]', text)
# if len(auth_chain) == 1:
# auth_chain = auth_chain[0]
# elif len(auth_chain) > 1:
# raise IndexError(f"more than one auth chain for {text} in {pdb_id}")
# else:
# auth_chain = chain
# chain_info[chain] = {"auth_chain": auth_chain, "url": url}

# ligand_chain_info[lig_name] = chain_info

# return ligand_chain_info


def get_ligand_info_RCSB(pdb_id):
"""
Downloads the information for all of the nonpolymer entities in the given `pdb_id` from RCSB.
Expand Down
Loading

0 comments on commit 6655cd7

Please sign in to comment.