Init commit for release

3dem · Oct 2, 2022 · 7e5bd58 · 7e5bd58
commit 7e5bd58
Show file tree

Hide file tree

Showing 56 changed files with 13,841 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,132 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Pycharm
+.idea/
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Kiarash Jamali
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,90 @@
+# ModelAngelo
+
+ModelAngelo is an automatic atomic model building program for cryo-EM maps.
+
+## Compute requirements
+It is highly recommended to have access to GPUs with at least 8GB of memory. ModelAngelo performs well on NVIDIA GPUs such as 2080's and beyond.
+
+## Installation
+### Personal use
+(If you manage a computational cluster, please skip to the next section)
+
+**Step 1: Install Conda**
+
+To install ModelAngelo, you need Anaconda. We recommend installing miniconda3, as it is lighter. You can find that here: [miniconda](https://docs.conda.io/en/latest/miniconda.html)
+
+Once you have a version of Anaconda installed, check that it actually runs with:
+```
+conda info
+```
+
+**Step 2: Clone this repo**
+
+Now, you can install ModelAngelo. First, you need to clone this Github repository with 
+```
+git clone [email protected]:3dem/model-angelo.git
+```
+
+**Step 3: Run install script**
+
+After, all you need to do is go into the `model-angelo` directory and run the install script:
+```
+cd model-angelo
+bash install_script.sh
+```
+You will now have a conda environment called `model_angelo` that is able to run the program. 
+You need to activate this conda environment with `conda activate model_angelo`. 
+Now, you can run `model_angelo build -h` to see if the installation worked!
+
+### Installing for a shared computational environment
+If you manage a computational cluster with many users and would like to install ModelAngelo once to be used everywhere, 
+you should complete the above steps 1 and 2 for a public account.
+
+Next, you should designate a folder to save the shared weights of the model such that it can be *readable and executable*
+by all users of your cluster. Let's say that path is `/public/model_angelo_weights`.
+
+Now, you run the following:
+```
+export TORCH_HOME=/public/model_angelo_weights
+cd model-angelo
+bash install_script.sh --download-weights
+```
+Once the script is finished running, make sure that where it installed the weights is in the directory you set.
+
+Finally, you can make the following bash script available for all users to run:
+
+```
+#!/bin/bash
+source /path/to/conda/profile/conda.sh 
+conda activate model_angelo
+model_angelo "$@"
+```
+
+## Usage
+### Building a map with FASTA sequence
+This is the recommended use case, when you have access to a medium-high resolution cryo-EM map (resolutions exceeding 4 Å) as well as a FASTA file with all of your protein sequences.
+
+To familiarize yourself with the options available in `model_angelo build`, run `model_angelo build -h`.
+
+Let's say the map's name is `map.mrc` and the sequence file is `sequence.fasta`. To build your model in a directory named `output`, you run:
+```
+model_angelo build -v map.mrc -f sequence.fasta -o output
+```
+If the output of the program halts before the completion of `GNN model refinement, round 3 / 3`, there was a bug that you can see in `output/model_angelo.log`. Otherwise, you can find your model in `output/output.cif`. The name of the mmCIF file is based on the output folder name, so if you specify, for example, `-o testing/test/model_building`, the model will be in `testing/test/model_building/model_building.cif`.
+
+### Building a map with no FASTA sequence
+If you have a sample where you do not know all of the protein sequences that occur in the map, you can run `model_angelo build_no_seq` instead.
+This version of the program uses a network that was not trained with input sequences, nor does it do post-processing on the built map.
+
+Instead, in addition to a built model, it provides you with HMM profile files that you can use to search a database such as UniRef with HHblits.
+
+You run this command:
+```
+model_angelo build_no_seq -v map.mrc -o output
+```
+The model will be in `output/output.cif` as before. Now there are also HMM profiles for each chain in HHsearch's format here: `output/hmm_profiles`.
+To do a sequence search for chain A (for example), you should first install [HHblits](https://github.com/soedinglab/hh-suite) and download one of the [databases](https://github.com/soedinglab/hh-suite#available-databases). Then, you can run
+```
+hhblits -i output/hmm_profiles/A.hhm -d PATH_TO_DB -o A.hhr -oa3m A.a3m -M first
+```
+You will have your result as a multiple sequence alignment here: `A.a3m`. 
diff --git a/install_script.sh b/install_script.sh
@@ -0,0 +1,46 @@
+while test $# -gt 0; do
+  case "$1" in
+    -h|--help)
+      echo "Make sure you have conda installed"
+      echo "Make sure you have set the TORCH_HOME environment variable to a suitable public location (if installing on a cluster)"
+      echo "-h, --help                   simple help and instructions"
+      echo "-w, --download-weights       use if you want to also download the weights"
+      exit 0
+      ;;
+    -w|--download-weights)
+      echo "Downloading weights as well because flag -w or --download-weights was specified"
+      DOWNLOAD_WEIGHTS=1
+      shift
+      ;;
+  esac
+done
+
+if [ -z "${TORCH_HOME}" ] && [ -n "${DOWNLOAD_WEIGHTS}" ]; then
+  echo "ERROR: TORCH_HOME is not set, but --download-weights or -w flag is set";
+  echo "Please specify TORCH_HOME to a publicly available directory";
+  exit 1;
+fi
+
+is_conda_model_angelo_installed=$(conda info --envs | grep model_angelo -c)
+if [[ "${is_conda_model_angelo_installed}" == "0" ]];then
+  conda create -n model_angelo python=3.9 -y;
+fi
+
+torch_home_path="${TORCH_HOME}"
+
+source `which activate` model_angelo
+
+conda install -y pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch
+
+conda env config vars set TORCH_HOME="${torch_home_path}"
+
+pip install -r requirements.txt
+python setup.py install
+
+if [[ "${DOWNLOAD_WEIGHTS}" ]]; then
+  echo "Writing weights to ${TORCH_HOME}"
+  python model_angelo/utils/setup_weights.py --bundle-name original
+  python model_angelo/utils/setup_weights.py --bundle-name original_no_seq
+else
+  echo "Did not download weights because the flag -w or --download-weights was not specified"
+fi
diff --git a/model_angelo/__init__.py b/model_angelo/__init__.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python
+
+"""
+ModelAngelo - Automated Cryo-EM model building toolkit
+"""
+
+
+__version__ = "0.0.1"
diff --git a/model_angelo/__main__.py b/model_angelo/__main__.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+"""
+ModelAngelo: Automated Cryo-EM model building toolkit
+"""
+
+
+def main():
+    import argparse
+
+    import model_angelo
+
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"ModelAngelo {model_angelo.__version__}",
+    )
+
+    import model_angelo.apps.build
+    import model_angelo.apps.build_no_seq
+    import model_angelo.apps.evaluate
+    import model_angelo.apps.eval_per_resid
+
+    modules = {
+        "build": model_angelo.apps.build,
+        "build_no_seq": model_angelo.apps.build_no_seq,
+        "evaluate": model_angelo.apps.evaluate,
+        "eval_per_resid": model_angelo.apps.eval_per_resid,
+    }
+
+    subparsers = parser.add_subparsers(
+        title="Choose a module",
+    )
+    subparsers.required = "True"
+
+    for key in modules:
+        module_parser = subparsers.add_parser(
+            key,
+            description=modules[key].__doc__,
+            formatter_class=argparse.RawTextHelpFormatter,
+        )
+        modules[key].add_args(module_parser)
+        module_parser.set_defaults(func=modules[key].main)
+
+    try:
+        args = parser.parse_args()
+        args.func(args)
+    except TypeError:
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/model_angelo/apps/__init__.py b/model_angelo/apps/__init__.py