Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

p2rank protein pocket detection #236

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions cwl_adapters/insert_ligand_pocket.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: Insert ligand into protein pocket(s)

doc: |-
Insert ligand into protein pocket(s)

baseCommand: ["python", "/insert_ligand_pocket.py"]

hints:
DockerRequirement:
dockerPull: mrbrandonwalker/insert_ligand_pocket

requirements:
InlineJavascriptRequirement: {}

inputs:

protein_filename:
type: File
format:
- edam:format_1476
inputBinding:
prefix: --protein_filename

ligand_filename:
type: File
format:
- edam:format_3814
inputBinding:
prefix: --ligand_filename

input_pocket_predictions:
type: File
format: edam:format_3752
inputBinding:
prefix: --pocket_predictions

top_n_pockets:
type: int
inputBinding:
prefix: --top_n_pockets

protein_ligand_complexes:
type: string?

outputs:

protein_ligand_complexes:
type: File[]
outputBinding:
glob: "*.pdb"
format: edam:format_1476

$namespaces:
edam: https://edamontology.org/
cwltool: http://commonwl.org/cwltool#

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
52 changes: 52 additions & 0 deletions cwl_adapters/p2rank.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: P2rank protein pocket detection and ranking

doc: |-
P2rank protein pocket detection and ranking

baseCommand: ["/p2rank/prank", "predict"]

hints:
DockerRequirement:
dockerPull: mrbrandonwalker/p2rank

requirements:
InlineJavascriptRequirement: {}

inputs:

protein_filename:
type: File
format:
- edam:format_1476
inputBinding:
prefix: -f

out_dir:
type: string
inputBinding:
prefix: -o
default: "test_output"

pocket_predictions:
type: string?

outputs:

pocket_predictions:
type: File
outputBinding:
# If input filename is 1fbl.pdb, then output filename is 1fbl.pdb_predictions.csv
glob: $(inputs.out_dir)/$(inputs.protein_filename.basename)_predictions.csv
format: edam:format_3752

$namespaces:
edam: https://edamontology.org/
cwltool: http://commonwl.org/cwltool#

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
5 changes: 5 additions & 0 deletions docker/dockerBuild.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,8 @@ sudo docker build --no-cache --pull -f Dockerfile_rmsd_pose_cluster -t mrbrandon
sudo docker build --no-cache --pull -f Dockerfile_rank_diffdock_poses -t mrbrandonwalker/rank_diffdock_poses .
sudo docker build --no-cache --pull -f Dockerfile_sanitize_ligand -t mrbrandonwalker/sanitize_ligand .
cd ../..

cd examples/pocket_detection/
sudo docker build --no-cache --pull -f Dockerfile_insert_ligand_pocket -t mrbrandonwalker/insert_ligand_pocket .
sudo docker build --no-cache --pull -f Dockerfile_p2rank -t mrbrandonwalker/p2rank .
cd ../..
14 changes: 14 additions & 0 deletions examples/pocket_detection/Dockerfile_insert_ligand_pocket
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

# docker build -f Dockerfile_insert_ligand_pocket -t mrbrandonwalker/insert_ligand_pocket .

FROM condaforge/mambaforge

RUN conda install -c conda-forge rdkit --yes

RUN conda init bash

COPY insert_ligand_pocket.py /

RUN mamba clean --all --yes

ADD Dockerfile_insert_ligand_pocket .
10 changes: 10 additions & 0 deletions examples/pocket_detection/Dockerfile_p2rank
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# docker build -f Dockerfile_p2rank -t mrbrandonwalker/p2rank .
FROM eclipse-temurin:17-jdk-jammy

RUN wget https://github.com/rdk/p2rank/releases/download/2.4.1/p2rank_2.4.1.tar.gz

RUN tar -xzf p2rank_2.4.1.tar.gz

RUN mv p2rank_2.4.1 p2rank

WORKDIR /p2rank
193 changes: 193 additions & 0 deletions examples/pocket_detection/insert_ligand_pocket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
"""Insert ligand into protein pocket(s)"""

import argparse

from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
from typing import List


parser = argparse.ArgumentParser()
parser.add_argument('--protein_filename', type=str, help='input protein file')
parser.add_argument('--ligand_filename', type=str, help='input ligand file')
parser.add_argument('--pocket_predictions', type=str, help='input pocket prediction csv file')
parser.add_argument('--top_n_pockets', type=str, help='top n pockets to insert ligand into')
args = parser.parse_args()

steric_threshold = .5 # Threshold for steric clashes
translation_refinement_increment = 0.5 # Increment for refining translation direction
max_translation_iter = 20 # Maximum number of iterations for refining translation direction
# the closer dot_prod_acceptable is to -1 the more the vector must point
# opposite of ligand center to protein center
dot_prod_acceptable = -0.05 # Acceptable dot product for solvent direction


def compute_bounding_box_volume(molecule: Chem.SDMolSupplier, confId: int, atom_ids: List[int]) -> float:
"""Compute the volume of the bounding box of a molecule

Args:
protein_lig (Chem.SDMolSupplier): The protein-ligand complex
confId (int): Conformer ID
atom_ids (List[int]): List of atom indices

Returns:
float: Volume of the bounding box
"""
coords = np.array([molecule.GetConformer(confId).GetAtomPosition(i) for i in atom_ids])
min_coords = np.min(coords, axis=0)
max_coords = np.max(coords, axis=0)
volume = float(np.prod(max_coords - min_coords))
return volume


def check_steric_clash(protein_lig: Chem.SDMolSupplier, pro_indices: List[int], lig_indices: List[int], thresh: float) -> bool:
"""Check for steric clashes between protein and ligand atoms

Args:
protein_lig (Chem.SDMolSupplier): The protein-ligand complex
pro_indices (List[int]): protein atom indices
lig_indices (List[int]): ligand atom indices
steric_threshold (float): Threshold for steric clashes

Returns:
bool: True if there is a steric clash, False otherwise
"""
# compute pairwise distances between protein and ligand atoms
distances = AllChem.Get3DDistanceMatrix(protein_lig, confId=0)

protein_ligand_distances = distances[np.ix_(pro_indices, lig_indices)]

# Check for steric clashes
clash = bool(np.any(protein_ligand_distances < thresh))

return clash


# assuming rank, center_x, center_y, center_z, surf_atom_ids are in csv file
rank_to_center = {}
rank_to_surf_atom_ids = {}
with open(args.pocket_predictions, 'r', encoding="utf-8") as f:
lines = f.readlines()
headers = lines[0].strip().split(',')
headers = [x.strip() for x in headers]
for line in lines[1:]:
data = {headers[i]: value.strip() for i, value in enumerate(line.split(','))}
rank = int(data['rank'])
center_x = float(data['center_x'])
center_y = float(data['center_y'])
center_z = float(data['center_z'])
surf_atom_ids = [int(x) - 1 for x in data['surf_atom_ids'].strip().split(' ')]
rank_to_center[rank] = (center_x, center_y, center_z)
rank_to_surf_atom_ids[rank] = surf_atom_ids

top_n = int(args.top_n_pockets)
top_n_centers = [rank_to_center[rank] for rank in range(1, top_n + 1)]

# rdkit valence error (sanitization error) is ignored
protein = Chem.MolFromPDBFile(args.protein_filename, removeHs=False, sanitize=False)
ligand = Chem.MolFromMolFile(args.ligand_filename, removeHs=False)

# Insert ligand into protein pocket(s)
# Compute bounding box of ligand and protein and compare volumes
# if ligand volume is larger than protein volume, then ligand cannot be inserted into protein
# Would be very complicated to do rigid rotation and conformer scanning,
# so we will just skip this case if can't insert via translation of center of ligand to center of pocket as first guess
# If protein pocket volume is larger but there is steric clash need to find translation direction
# that points towards solvent and translate ligand in that direction via tiny steps
# until sterically feasible (if possible)
for rank_idx, center in enumerate(top_n_centers):
rank = rank_idx + 1
ligand_copy = Chem.RWMol(ligand)
conf = ligand_copy.GetConformer()

# compute bounding box of ligand
ligand_volume = compute_bounding_box_volume(ligand_copy, confId=0, atom_ids=list(range(ligand_copy.GetNumAtoms())))

# define protein pocket coords as vectors from protein center to surf atom centers
surf_atom_ids = rank_to_surf_atom_ids[rank]
surf_atom_coords = np.array([protein.GetConformer(0).GetAtomPosition(i) for i in surf_atom_ids])
surf_atom_coords = surf_atom_coords - np.array(center)

# now compute bounding box of protein pocket
protein_volume = compute_bounding_box_volume(protein, confId=0, atom_ids=surf_atom_ids)

# compare volumes
# note that alpha carbons are used as the surf atoms, so when translating towards boundary
# can still clash with residue sticking into pocket
print(f'Ligand volume {ligand_volume} protein volume {protein_volume}')
if ligand_volume > protein_volume:
continue
else:
print("Ligand should be able to fit into pocket")

lig_num_atoms = conf.GetNumAtoms()
# Now grab coordinates of ligand and determine ligand center position
ligand_coords = np.array([conf.GetAtomPosition(i) for i in range(lig_num_atoms)])
# Determine center of ligand
ligand_center = [sum(x) / len(x) for x in zip(*ligand_coords)]
# Determine translation vector
translation_vector = [center[i] - ligand_center[i] for i in range(3)]
# Translate ligand by using rdMolTransforms.TransformConformer
new_coords = ligand_coords + translation_vector
# Set the new coordinates back to the molecule
for i in range(lig_num_atoms):
conf.SetAtomPosition(i, new_coords[i])

# Add ligand to protein
pro_lig = Chem.CombineMols(protein, ligand_copy)
# Get the number of atoms in the original protein and ligand
num_protein_atoms = protein.GetNumAtoms()
num_ligand_atoms = ligand_copy.GetNumAtoms()
pro_idxs = list(range(num_protein_atoms))
lig_idxs = list(range(num_protein_atoms, num_protein_atoms + num_ligand_atoms))
steric_clash = check_steric_clash(pro_lig, pro_idxs, lig_idxs, steric_threshold)
if steric_clash:
print(f'Rank {rank} has steric clash, attempting to pull towards solvent')
# Grab list of vectors from pocket center that point to all pocket protein atoms
vectors = np.array([protein.GetConformer(0).GetAtomPosition(i) - np.array(center) for i in surf_atom_ids])
# Convert to unit vectors
unit_vectors = vectors / np.linalg.norm(vectors, axis=0)

# find center of entire protein
pro_center = np.array([sum(x) / len(x)
for x in zip(*[protein.GetConformer(0).GetAtomPosition(i) for i in pro_idxs])])

lig_center_to_pro_center = pro_center - np.array(center)
# normalize the vector
lig_center_to_pro_center = lig_center_to_pro_center / np.linalg.norm(lig_center_to_pro_center)
# compute dot products of all unit_vectors to lig_center_to_pro_center
dot_products = np.dot(unit_vectors, lig_center_to_pro_center)
# if the dot product is negative, then the vector points away from protein center.
# this assumes that the pocket is not near the center of the protein
solvent_vectors = unit_vectors[dot_products < dot_prod_acceptable]
# for troubleshooting print the proteins atoms that point towards solvent
# solvent_protein_atoms = [surf_atom_ids[i] for i in range(len(surf_atom_ids)) \
# if dot_products[i] < dot_prod_acceptable]

# find the vector that points the most towards solvent
# average the vectors that point towards solvent
solvent_vector = np.mean(solvent_vectors, axis=0)
# normalize the vector
solvent_vector = solvent_vector / np.linalg.norm(solvent_vector)

# Iteratively translate ligand in translation direction via tiny steps until sterically feasible
iterations = 0
conf = pro_lig.GetConformer()
new_coords = np.array([conf.GetAtomPosition(i) for i in lig_idxs])
Chem.MolToPDBFile(pro_lig, f'initial_trans{rank}.pdb', confId=0)
while steric_clash:
if iterations > max_translation_iter:
break
new_coords = new_coords + solvent_vector * translation_refinement_increment
# only update ligand coordinates (ligand is after protein indices)
for i in range(num_protein_atoms, num_protein_atoms + num_ligand_atoms):
lig_index = i - num_protein_atoms
conf.SetAtomPosition(i, new_coords[lig_index])
steric_clash = check_steric_clash(pro_lig, pro_idxs, lig_idxs, steric_threshold)
iterations += 1
if not steric_clash:
Chem.MolToPDBFile(pro_lig, f'protein_with_ligand_rank_{rank}.pdb', confId=0)

else:
Chem.MolToPDBFile(pro_lig, f'protein_with_ligand_rank_{rank}.pdb', confId=0)
Loading
Loading