Skip to content

Commit

Permalink
Add onionnet-v1 scoring
Browse files Browse the repository at this point in the history
  • Loading branch information
cyangNYU committed Nov 14, 2023
1 parent f773fcd commit 4d1063f
Show file tree
Hide file tree
Showing 9 changed files with 382 additions and 5 deletions.
48 changes: 48 additions & 0 deletions cwl_adapters/clean_smina_pdb.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0

class: CommandLineTool

label: Clean smina pdb file (clean the pdb and rename the resname to LIG)

doc: |
Clean smina pdb file (clean the pdb and rename the resname to LIG)

baseCommand: ["python", "/clean_smina_pdb.py"]

hints:
DockerRequirement:
dockerPull: cyangnyu/clean_smina_pdb

requirements:
InlineJavascriptRequirement: {}

inputs:
input_pdb:
label: Input pdb file
type: File
format:
- edam:format_1476
inputBinding:
prefix: --input_pdb

output_pdb:
label: Output pdb file
type: string?
format:
- edam:format_1476
inputBinding:
prefix: --output_pdb

outputs:
output_pdb:
type: File
format: edam:format_1476
outputBinding:
glob: $(inputs.output_pdb)

$namespaces:
edam: https://edamontology.org/

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
56 changes: 56 additions & 0 deletions cwl_adapters/onionnet-feature.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool

label: OnionNet (version1) for feature generation of docking poses

baseCommand: ["python", "/onionnet/generate_features.py"]

hints:
DockerRequirement:
dockerPull: cyangnyu/onionnet

requirements:
InlineJavascriptRequirement: {}

inputs:
complex_path_file:
label: path file of protein-ligand complexes (structures in pdb format)
type: File?
format:
- edam:format_1476
inputBinding:
prefix: -inp

num_of_cpus:
label: number of CPUs to use.
type: int?
format:
- edam:format_2330
inputBinding:
prefix: -nt
default: 1

output_feature_file:
label: the output file name containing the features.
type: string?
format:
- edam:format_3752
inputBinding:
prefix: -out
default: "output.csv"

outputs:
output_feature_file:
type: File
format: edam:format_3752
outputBinding:
glob: $(inputs.output_feature_file)

$namespaces:
edam: https://edamontology.org/
cwltool: http://commonwl.org/cwltool#

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
90 changes: 90 additions & 0 deletions cwl_adapters/onionnet-score.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool

label: OnionNet (version1) for rescoring of docking poses

baseCommand: ["python", "/onionnet/predict.py"]

hints:
DockerRequirement:
dockerPull: cyangnyu/onionnet

requirements:
InlineJavascriptRequirement: {}

inputs:
input_feature_file:
label: feature csv file for protein-ligand complexes
type: File?
format:
- edam:format_3752
inputBinding:
prefix: -fn

scaler:
label: the standard scaler file.
type: string?
format:
- edam:format_2330
inputBinding:
prefix: -scaler
default: "/onionnet/models/StandardScaler.model"

weights:
label: the trained DNN model file.
type: string?
format:
- edam:format_2330
inputBinding:
prefix: -weights
default: "/onionnet/models/CNN_final_model_weights.h5"

output_score_file:
label: the predicted pKa values file
type: string?
format:
- edam:format_3752
inputBinding:
prefix: -out
default: "predicted_pKa.csv"

onionnet_score:
type: string?

outputs:
output_score_file:
type: File
outputBinding:
glob: $(inputs.output_score_file)
format: edam:format_3752

onionnet_score:
label: Estimated Free Energy of Binding (onionnet score)
doc: |-
Estimated Free Energy of Binding
type: float
outputBinding:
glob: $(inputs.output_score_file)
loadContents: true
outputEval: |
${
const lines = self[0].contents.split("\n");
// The correct line should be of the form
// ,pKa_predicted
// /var/lib/cwl/stg19c300d1-f7fd-4a38-80d2-0f5615e3eb8f/complex_pdbs.pdb,7.441
const bfe_line = lines[1];
// refactor can be used to convert pKa to binding free enegy, based on deltaG = -RT*lnK
const refactor = -0.73349;
const docking_score_string = bfe_line.split(",").filter(function(s) {return !isNaN(parseFloat(s))})[0];
const onionnet_score = parseFloat(docking_score_string)/refactor;
return onionnet_score
}

$namespaces:
edam: https://edamontology.org/
cwltool: http://commonwl.org/cwltool#

$schemas:
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl
17 changes: 12 additions & 5 deletions cwl_adapters/smina_docking.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ inputs:
- edam:format_3815
- edam:format_3816
inputBinding:
position: 1
prefix: -r

ligand_file:
Expand All @@ -50,7 +49,6 @@ inputs:
- edam:format_3815
- edam:format_3816
inputBinding:
position: 2
prefix: -l

ligand_box:
Expand All @@ -67,14 +65,24 @@ inputs:
- edam:format_3815
- edam:format_3816
inputBinding:
position: 3
prefix: --autobox_ligand

local_only:
label: try local minimization only rather than docking
type: boolean?
inputBinding:
prefix: --local_only

score_only:
label: Do not do any conformational search; simply rescore.
type: boolean?
inputBinding:
prefix: --score_only

scoring:
label: scoring function option, default is vina, options can be (vina, vinardo, or a customized scoring function)
type: string?
inputBinding:
position: 4
prefix: --scoring
default: "vina"

Expand All @@ -83,7 +91,6 @@ inputs:
type: string?
format: edam:format_1476
inputBinding:
position: 5
prefix: -o
default: "docked.pdb"

Expand Down
100 changes: 100 additions & 0 deletions examples/rescoring/docking_rescoring_onionnet_workflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
## Protein-ligand docking and docking poses re-ranking
##
## input: pdb structures from PDBbind refined dataset
## output:
## 1. docking poses
## 2. scoring file (vina score, sfct correction, combined_score for re-ranking docking poses)

steps:
#
- extract_pdbbind_refined:
in:
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.query.html
# "The query() method uses a slightly modified Python syntax by default.
# For example, the & and | (bitwise) operators have the precedence of their boolean cousins, and and or.
# This is syntactically valid Python, however the semantics are different."
query: '(Kd_Ki == "Kd") and (value < 0.000002)'
# to obtain a broader experimental dGs
max_row: 1
convert_Kd_dG: 'True'
output_txt_path: '&binding_data.txt'
output_pdb_paths: '&pdbbind_pdbs'
output_sdf_paths: '&pdbbind_sdfs'
experimental_dGs: '&exp_dGs'

- fix_side_chain:
scatter: [input_pdb_path]
in:
input_pdb_path: '*pdbbind_pdbs'
output_pdb_path: '&pdbbind_pdbs.pdb'

- minimize_ligand_only.yml:
scatter: [sdf_path]
in:
sdf_path: '*pdbbind_sdfs'

- smina_docking:
scatter: [receptor_file, ligand_file, ligand_box]
scatterMethod: dotproduct
in:
receptor_file: '*pdbbind_pdbs.pdb'
ligand_file: '*ligand_min.mol2'
ligand_box: '*ligand_min.mol2'
scoring: 'vina'
local_only: True
output_dock_file: '&ligand_opt.pdb'
output_path: output

- clean_smina_pdb:
scatter: [input_pdb]
in:
input_pdb: '*ligand_opt.pdb'
output_pdb: '&ligand_opt_clean.pdb'

- cat_pdb:
scatter: [input_structure1, input_structure2]
scatterMethod: dotproduct
in:
input_structure1: '*pdbbind_pdbs.pdb'
input_structure2: '*ligand_opt_clean.pdb'
output_structure_path: '&complex_pdbs.pdb'

- onionnet-feature:
scatter: [complex_path_file]
in:
complex_path_file: '*complex_pdbs.pdb'
output_feature_file: '&output_features.csv'

- onionnet-score:
scatter: [input_feature_file]
in:
input_feature_file: '*output_features.csv'
output_score_file: '&predicted_pKa.csv'
onionnet_score: '&onionnet_score'

- scatter_plot:
in:
xs: '*exp_dGs'
ys: '*onionnet_score'

wic:
graphviz:
label: Protein-ligand docking (Smina) and docking poses re-ranking (OnionNet-sfct)
steps:
(1, extract_pdbbind_refined):
wic:
graphviz:
label: extract protein-ligand structure (protein.pdb and ligand.sdf) from pdbbind_refined dataset
(2, fix_side_chain):
wic:
graphviz:
label: fix_side_chain of protein structure.
(3, minimize_ligand_only.yml):
wic:
inlineable: False
graphviz:
label: minimize (obminimize) ligand structure.
(4, smina_docking):
wic:
graphviz:
label: Smina docking (flexible ligand - rigid protein docking)
6 changes: 6 additions & 0 deletions examples/scripts/Dockerfile_clean_smina_pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
FROM python

RUN apt-get update && apt-get install -y wget
RUN apt-get clean

COPY clean_smina_pdb.py /
40 changes: 40 additions & 0 deletions examples/scripts/Dockerfile_onionnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
FROM condaforge/mambaforge
# NOT mambaforge-pypy3 (pandas & rdkit & mdtraj are incompatible with pypy)

# Install requirements
RUN apt-get update && apt-get install -y wget git

# Create environment
# Since python 3.10 is already installed in the base image condaforge/mambaforge,
# if not specify the python version requirement, python version will has conflict with the openbabel <3.0.
#0 23.40 Pinned packages:
#0 23.40 - python 3.10.*
#0 23.40 The following packages are incompatible
#0 23.40 └─ openbabel <3.0 is installable with the potential options
#0 23.40 ├─ openbabel 2.4.1 would require
#0 23.40 │ └─ python >=2.7,<2.8.0a0 , which can be installed;
#0 23.40 ├─ openbabel 2.4.1 would require
#0 23.40 │ └─ python >=3.6,<3.7.0a0 , which can be installed;
#0 23.40 └─ openbabel 2.4.1 would require
#0 23.40 └─ python >=3.7,<3.8.0a0 , which can be installed.
# So, explicitly downgrade to python=3.7.*
RUN mamba install -c conda-forge "python=3.7.*" "openbabel<3.0" numpy pandas mdtraj biopandas tensorflow -y
# /opt/conda/lib/python3.7/site-packages/sklearn/externals/joblib/__init__.py:15:
# FutureWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23.
RUN pip install -U "scikit-learn<0.23" rdkit-pypi

# cleanup
RUN apt-get clean
RUN mamba clean --all --yes
RUN pip cache purge

# Install onionnet
RUN git clone https://github.com/cyangNYU/onionnet.git
WORKDIR /onionnet

# Download models
## the default model of onionnet-v1 in github repo is not correct, the actually size is around 600 MB.
## The authors provided a google drive link to download it,
## but their command wget "https://drive.google.com/uc?export=download&id=1cwJN44TgaVBWYEEb_SGU5JBJp6WbFdM1" -O "CNN_final_model_weights.h5" is not working.
RUN cd models && rm CNN_final_model_weights.h5 && wget https://huggingface.co/cyangNYU/onionnet-v1/resolve/main/CNN_final_model_weights.h5
ADD Dockerfile_onionnet .
Loading

0 comments on commit 4d1063f

Please sign in to comment.