From e09dd313f268f369082fee96bff3bab4351fa6a6 Mon Sep 17 00:00:00 2001 From: jnwei Date: Tue, 21 Nov 2023 15:49:06 -0500 Subject: [PATCH 1/4] Fixes imports to colab notebook. --- notebooks/OpenFold.ipynb | 186 ++++++++++++++++++--------------------- 1 file changed, 84 insertions(+), 102 deletions(-) diff --git a/notebooks/OpenFold.ipynb b/notebooks/OpenFold.ipynb index 7ef57298..c14e5c29 100755 --- a/notebooks/OpenFold.ipynb +++ b/notebooks/OpenFold.ipynb @@ -1,21 +1,4 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "accelerator": "GPU", - "colab": { - "name": "OpenFold.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { "cell_type": "markdown", @@ -57,10 +40,12 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { - "id": "rowN0bVYLe9n", - "cellView": "form" + "cellView": "form", + "id": "rowN0bVYLe9n" }, + "outputs": [], "source": [ "#@markdown ### Enter the amino acid sequence to fold ⬇️\n", "sequence = 'MAAHKGAEHHHKAAEHHEQAAKHHHAAAEHHEKGEHEQAAHHADTAYAHHKHAEEHAAQAAKHDAEHHAPKPH' #@param {type:\"string\"}\n", @@ -78,16 +63,16 @@ "\n", "#@markdown After making your selections, execute this cell by pressing the\n", "#@markdown *Play* button on the left." - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { - "id": "woIxeCPygt7K", - "cellView": "form" + "cellView": "form", + "id": "woIxeCPygt7K" }, + "outputs": [], "source": [ "#@title Install third-party software\n", "#@markdown Please execute this cell by pressing the *Play* button on \n", @@ -97,75 +82,54 @@ "#@markdown **Note**: This installs the software on the Colab \n", "#@markdown notebook in the cloud and not on your computer.\n", "\n", - "import sys\n", + "import os, time\n", "from IPython.utils import io\n", - "import os\n", + "from sys import version_info\n", "import subprocess\n", - "import tqdm.notebook\n", "\n", - "TQDM_BAR_FORMAT = '{l_bar}{bar}| {n_fmt}/{total_fmt} [elapsed: {elapsed} remaining: {remaining}]'\n", + "python_version = f\"{version_info.major}.{version_info.minor}\"\n", "\n", - "python_version = '.'.join(sys.version.split('.')[:2]) #get string like \"3.9\"\n", + "\n", + "os.system(\"wget -qnc https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh\")\n", + "os.system(\"bash Mambaforge-Linux-x86_64.sh -bfp /usr/local\")\n", + "os.system(\"mamba config --set auto_update_conda false\")\n", + "os.system(f\"mamba install -y -c conda-forge -c bioconda kalign2=2.04 hhsuite=3.3.0 openmm=7.7.0 python={python_version} pdbfixer\")\n", + "\n", + "\n", + "os.system(\"pip install -q \\\"torch<2\\\" biopython ml_collections py3Dmol modelcif\")\n", "\n", "try:\n", " with io.capture_output() as captured:\n", - " %shell sudo apt install --quiet --yes hmmer\n", - "\n", - " # Install py3dmol.\n", - " %shell pip install py3dmol\n", - "\n", - " %shell rm -rf /opt/conda\n", - " %shell wget -q -P /tmp \\\n", - " https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \\\n", - " && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \\\n", - " && rm /tmp/Miniconda3-latest-Linux-x86_64.sh\n", - "\n", - " PATH=%env PATH\n", - " %env PATH=/opt/conda/bin:{PATH}\n", - "\n", - " # Install the required versions of all dependencies.\n", - " %shell conda install -y -q conda==4.13.0\n", - " %shell conda install -y -q -c conda-forge -c bioconda \\\n", - " kalign2=2.04 \\\n", - " hhsuite=3.3.0 \\\n", - " python={python_version} \\\n", - " openmm=7.7.0 \\\n", - " pdbfixer \\\n", - " 2>&1 1>/dev/null\n", - " %shell pip install -q \\\n", - " ml-collections==0.1.0 \\\n", - " PyYAML==5.4.1 \\\n", - " biopython==1.79 \\\n", - " modelcif==0.7\n", "\n", " # Create a ramdisk to store a database chunk to make Jackhmmer run fast.\n", + " %shell sudo apt install --quiet --yes hmmer\n", " %shell sudo mkdir -m 777 --parents /tmp/ramdisk\n", " %shell sudo mount -t tmpfs -o size=9G ramdisk /tmp/ramdisk\n", "\n", " %shell wget -q -P /content \\\n", " https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt\n", "\n", - " # Install AWS CLI\n", - " %shell curl \"https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip\" -o \"awscliv2.zip\"\n", - " %shell unzip -qq awscliv2.zip\n", - " %shell sudo ./aws/install\n", - " %shell rm awscliv2.zip\n", - " %shell rm -rf ./aws\n", + " %shell mkdir -p /content/openfold/openfold/resourcees\n", + " \n", + " commit = \"099769d2ecfd01a8baa8d950030df454a042c910\"\n", + " os.system(f\"pip install -q git+https://github.com/aqlaboratory/openfold.git@{commit}\")\n", + " \n", + " %shell cp -f /content/stereo_chemical_props.txt /usr/local/lib/python3.10/site-packages/openfold/resources/\n", + "\n", "except subprocess.CalledProcessError as captured:\n", - " print(captured)\n", - " raise" - ], - "execution_count": null, - "outputs": [] + " print(captured)" + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { - "id": "VzJ5iMjTtoZw", - "cellView": "form" + "cellView": "form", + "id": "VzJ5iMjTtoZw" }, + "outputs": [], "source": [ - "#@title Install OpenFold\n", + "#@title Download model weights \n", "#@markdown Please execute this cell by pressing the *Play* button on \n", "#@markdown the left.\n", "\n", @@ -180,13 +144,6 @@ "\n", "try:\n", " with io.capture_output() as captured:\n", - " # Run setup.py to install only Openfold.\n", - " %shell rm -rf openfold\n", - " %shell git clone \"{GIT_REPO}\" openfold 2>&1 1> /dev/null\n", - " %shell mkdir -p /content/openfold/openfold/resources\n", - " %shell cp -f /content/stereo_chemical_props.txt /content/openfold/openfold/resources\n", - " %shell /usr/bin/python3 -m pip install -q ./openfold\n", - "\n", " if(weight_set == 'AlphaFold'):\n", " %shell mkdir --parents \"{ALPHAFOLD_PARAMS_DIR}\"\n", " %shell wget -O {ALPHAFOLD_PARAMS_PATH} {ALPHAFOLD_PARAM_SOURCE_URL}\n", @@ -194,7 +151,14 @@ " --directory=\"{ALPHAFOLD_PARAMS_DIR}\" --preserve-permissions\n", " %shell rm \"{ALPHAFOLD_PARAMS_PATH}\"\n", " elif(weight_set == 'OpenFold'):\n", + " # Install AWS CLI\n", + " %shell curl \"https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip\" -o \"awscliv2.zip\"\n", + " %shell unzip -qq awscliv2.zip\n", + " %shell sudo ./aws/install\n", + " %shell rm awscliv2.zip\n", + " %shell rm -rf ./aws\n", " %shell mkdir --parents \"{OPENFOLD_PARAMS_DIR}\"\n", + "\n", " %shell aws s3 cp \\\n", " --no-sign-request \\\n", " --region us-east-1 \\\n", @@ -203,14 +167,17 @@ " else:\n", " raise ValueError(\"Invalid weight set\")\n", "except subprocess.CalledProcessError as captured:\n", - " print(captured)\n", - " raise" - ], - "execution_count": null, - "outputs": [] + " print(captured)" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "_FpxxMo-mvcP" + }, + "outputs": [], "source": [ "#@title Import Python packages\n", "#@markdown Please execute this cell by pressing the *Play* button on \n", @@ -219,8 +186,8 @@ "import unittest.mock\n", "import sys\n", "\n", + "sys.path.insert(0, f'/usr/local/lib/python{python_version}/dist-packages/')\n", "sys.path.insert(0, f'/usr/local/lib/python{python_version}/site-packages/')\n", - "sys.path.append(f'/opt/conda/lib/python{python_version}/site-packages')\n", "\n", "# Allows us to skip installing these packages\n", "unnecessary_modules = [\n", @@ -245,6 +212,10 @@ "import py3Dmol\n", "import torch\n", "import shutil\n", + "import tqdm\n", + "import tqdm.notebook\n", + "\n", + "TQDM_BAR_FORMAT = '{l_bar}{bar}| {n_fmt}/{total_fmt} [elapsed: {elapsed} remaining: {remaining}]'\n", "\n", "# Prevent shell magic being broken by openmm, prevent this cryptic error:\n", "# \"NotImplementedError: A UTF-8 locale is required. Got ANSI_X3.4-1968\"\n", @@ -280,13 +251,7 @@ "from IPython import display\n", "from ipywidgets import GridspecLayout\n", "from ipywidgets import Output" - ], - "metadata": { - "id": "_FpxxMo-mvcP", - "cellView": "form" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -301,10 +266,12 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { - "id": "2tTeTTsLKPjB", - "cellView": "form" + "cellView": "form", + "id": "2tTeTTsLKPjB" }, + "outputs": [], "source": [ "#@title Search against genetic databases\n", "\n", @@ -420,16 +387,16 @@ "plt.ylabel('Non-Gap Count')\n", "plt.yticks(range(0, num_alignments + 1, max(1, int(num_alignments / 3))))\n", "plt.show()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { - "id": "XUo6foMQxwS2", - "cellView": "form" + "cellView": "form", + "id": "XUo6foMQxwS2" }, + "outputs": [], "source": [ "#@title Run OpenFold and download prediction\n", "\n", @@ -693,9 +660,7 @@ "# --- Download the predictions ---\n", "shutil.make_archive(base_name='prediction', format='zip', root_dir=output_dir)\n", "files.download(f'{output_dir}.zip')" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -789,5 +754,22 @@ "* BFD: (modified), by Steinegger M. and Söding J., modified by DeepMind, available under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by/4.0/). See the Methods section of the [AlphaFold proteome paper](https://www.nature.com/articles/s41586-021-03828-1) for details." ] } - ] + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "OpenFold.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } From 6f10686bcd023638065878614ae47b5df9528a65 Mon Sep 17 00:00:00 2001 From: Jennifer Wei <97625454+jnwei@users.noreply.github.com> Date: Tue, 21 Nov 2023 15:59:27 -0500 Subject: [PATCH 2/4] Includes button for colab link and specifies GPU type Edits colab metadata --- notebooks/OpenFold.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/notebooks/OpenFold.ipynb b/notebooks/OpenFold.ipynb index c14e5c29..f0f2ab1b 100755 --- a/notebooks/OpenFold.ipynb +++ b/notebooks/OpenFold.ipynb @@ -760,7 +760,9 @@ "colab": { "collapsed_sections": [], "name": "OpenFold.ipynb", - "provenance": [] + "provenance": [], + "gpuType": "T4", + "include_colab_link": true }, "kernelspec": { "display_name": "Python 3", From 59be42ca7bcf444d7e29df323cd676922969db21 Mon Sep 17 00:00:00 2001 From: Jennifer Wei <97625454+jnwei@users.noreply.github.com> Date: Tue, 21 Nov 2023 16:09:25 -0500 Subject: [PATCH 3/4] Update OpenFold.ipynb Adds cell to add a button to launch colab from github. --- notebooks/OpenFold.ipynb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/notebooks/OpenFold.ipynb b/notebooks/OpenFold.ipynb index f0f2ab1b..3e0e3837 100755 --- a/notebooks/OpenFold.ipynb +++ b/notebooks/OpenFold.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, { "cell_type": "markdown", "metadata": { From 4384ac083ab82ef2dafeb156847c53057b315860 Mon Sep 17 00:00:00 2001 From: Jennifer Wei <97625454+jnwei@users.noreply.github.com> Date: Tue, 21 Nov 2023 16:10:44 -0500 Subject: [PATCH 4/4] Update OpenFold.ipynb fix small typo on button. --- notebooks/OpenFold.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/OpenFold.ipynb b/notebooks/OpenFold.ipynb index 3e0e3837..013d79bf 100755 --- a/notebooks/OpenFold.ipynb +++ b/notebooks/OpenFold.ipynb @@ -7,7 +7,7 @@ "colab_type": "text" }, "source": [ - "\"Open" + "\"Open" ] }, {