diff --git a/MANIFEST.in b/MANIFEST.in index f45d8d24..6508e758 100755 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,9 @@ include neofox/published_features/Tcell_predictor/genes-expression.pickle include neofox/published_features/Tcell_predictor/SIRdata.mat include neofox/published_features/Tcell_predictor/Classifier.pickle include neofox/references/install_r_dependencies.R +include neofox/published_features/hex/BLOSUM62.rda +include neofox/published_features/hex/EPItOMe_modified.R +include neofox/published_features/hex/hex.R include neofox/expression_imputation/tcga_cohort_code.tab include neofox/expression_imputation/tcga_exp_summary_modified.tab.gz.tbi include neofox/expression_imputation/tcga_exp_summary_modified.tab.gz \ No newline at end of file diff --git a/README.md b/README.md index bbabf7f5..4afbdcf7 100755 --- a/README.md +++ b/README.md @@ -4,7 +4,24 @@ [](https://badge.fury.io/py/neofox) -NeoFox annotates neoantigen candidate sequences with published neo-epitope descriptors. For a detailed documentation, please check out [https://neofox.readthedocs.io](https://neofox.readthedocs.io/) +NeoFox annotates neoantigen candidate sequences with published neoantigen features. + +For a detailed documentation, please check out [https://neofox.readthedocs.io](https://neofox.readthedocs.io/) + +If you use NeoFox, please cite the following publication: +Franziska Lang, Pablo Riesgo-Ferreiro, Martin Löwer, Ugur Sahin, Barbara Schrörs, **NeoFox: annotating neoantigen candidates with neoantigen features**, *Bioinformatics*, 2021;, btab344, [https://doi.org/10.1093/bioinformatics/btab344](https://doi.org/10.1093/bioinformatics/btab344) + +## Table of Contents + +[1 Implemented neoantigen features](#1-Implemented-Neoantigen-Features) +[2 NeoFox requirements](#2-NeoFox-Requirements) +[3 Usage from the command line](#3-Usage-from-the-command-line) +[4 Input data](#4-input-data) +[5 Output data](#5-output-data) + +## 1 Implemented Neoantigen Features + +NeoFox covers the following neoantigen features and prediction algorithms: | Name | Reference | DOI | |---------------------------------------------------------|--------------------------------------------------------------------------|-------------------------------------------------------------------------------------------| @@ -25,41 +42,60 @@ NeoFox annotates neoantigen candidate sequences with published neo-epitope descr | Tcell predictor | Besser et al, 2019, Journal for ImmunoTherapy of Cancer | https://doi.org/10.1186/s40425-019-0595-z | | neoag | Smith et al, 2019, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-19-0155 | | PRIME | Schmidt et al., 2021, Cell Reports Medicine | https://doi.org/10.1016/j.xcrm.2021.100194 | +| HEX | Chiaro et al., 2021, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-20-0814 | -## NeoFox Requirements +## 2 NeoFox Requirements -**Required Software/Tools/Dependencies:** +NeoFox depends on the following tools: + - Python >=3.6, <=3.8 - R 3.6.0 - BLAST 2.10.1 -- netMHCpan 4.0 -- netMHCIIpan 3.2 +- netMHCpan 4.1 +- netMHCIIpan 4.0 - MixMHCpred 2.1 - MixMHC2pred 1.2 - PRIME 1.0 -## Usage from the command line +## 3 Usage from the command line + +NeoFox can be used from the command line as shown below or programmatically (see [https://neofox.readthedocs.io](https://neofox.readthedocs.io/) for more information). ````commandline -neofox --candidate-file/--json-file neoantigens_candidates.tab/neoantigens_candidates.json --patient-data patient_data.txt --output-folder /path/to/out --output-prefix out_prefix [--with-short-wide-table] [--with-tall-skinny-table] [--with-json] [--num_cpus] +neofox --candidate-file/--json-file neoantigens_candidates.tab/neoantigens_candidates.json --patient-data/--patient-data-json patient_data.txt/patient_data.json --output-folder /path/to/out --output-prefix out_prefix [--patient-id] [--with-short-wide-table] [--with-tall-skinny-table] [--with-json] [--num_cpus] [--affinity-threshold] ```` - -where: -- `--candidate-file`: tab-separated values table with neoantigen candidates represented by long mutated peptide sequences -- `--json-file`: JSON file neoantigens in NeoFox model format +- `--candidate-file`: tab-separated values table with neoantigen candidates represented by long mutated peptide sequences as described [here](#41-neoantigen-candidates-in-tabular-format) +- `--json-file`: JSON file neoantigens in NeoFox model format as described [here](#42-neoantigen-candidates-in-json-format) - `--patient-id`: patient identifier (*optional*, this will be used if the patient id the column `patient` is missing the candidate input file) -- `--patient-data`: a table of tab separated values containing metadata on the patient +- `--patient-data`: a table of tab separated values containing metadata on the patient as described [here](#43-patient-data-format) - `--output-folder`: path to the folder to which the output files should be written - `--output-prefix`: prefix for the output files (*optional*) -- `--with-short-wide-table`: output file in short-wide format (*optional*) -- `--with-tall-skinny-table`: output file in tall-skinny format (*optional*) +- `--with-short-wide-table`: output file in short-wide format (*default*, *optional*) +- `--with-tall-skinny-table`:output file in tall-skinny format (*optional*) - `--with-json`: output file in JSON format (*optional*) - `--num_cpus`: number of CPUs to use (*optional*) +- `--config`: a config file with the paths to dependencies as shown below (*optional*) +- `--affinity-threshold`: a affinity value (*optional*) neoantigen candidates with a best predicted affinity greater than or equal than this threshold will be not annotated with features that specifically model + neoepitope recognition. A threshold that is commonly used is 500 nM. + + +The optional config file with the paths to the dependencies can look like this: +````commandline +NEOFOX_REFERENCE_FOLDER=path/to/reference/folder +NEOFOX_RSCRIPT=`which Rscript` +NEOFOX_BLASTP=path/to/ncbi-blast-2.10.1+/bin/blastp +NEOFOX_NETMHCPAN=path/to/netMHCpan-4.1/netMHCpan +NEOFOX_NETMHC2PAN=path/to/netMHCIIpan-4.0/netMHCIIpan +NEOFOX_MIXMHCPRED=path/to/MixMHCpred-2.1/MixMHCpred +NEOFOX_MIXMHC2PRED=path/to/MixMHC2pred-1.2/MixMHC2pred_unix +NEOFOX_MAKEBLASTDB=path/to/ncbi-blast-2.8.1+/bin/makeblastdb +NEOFOX_PRIME=/path/to/PRIME/PRIME +```` -### Input data +## 4 Input data -#### Neoantigen candidates in tabular format +### 4.1 Neoantigen candidates in tabular format This is an dummy example of a table with neoantigen candidates: | gene | mutation.wildTypeXmer | mutation.mutatedXmer | patientIdentifier | rnaExpression | rnaVariantAlleleFrequency | dnaVariantAlleleFrequency | external_annotation_1 | external_annotation_2 | @@ -81,7 +117,7 @@ where: **NOTE:** If rnaExpression is not provided, expression will be estimated by gene expression in TCGA cohort indicated in the `tumorType` in the patient data (see below). -### Neoantigen candidates in JSON format +### 4.2 Neoantigen candidates in JSON format Besides tabular format, neoantigen candidates can be provided as a list of neoantigen models in JSON format as shown below. To simplify, only one full neoantigen model is shown: @@ -97,7 +133,7 @@ Besides tabular format, neoantigen candidates can be provided as a list of neoan }] ``` -#### patient-file format +### 4.3 Patient-data format This is an dummy example of a patient file: @@ -113,35 +149,6 @@ where: - `tumorType`: tumour entity in TCGA study abbreviation format (https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations). This field is required for expression imputation and at the moment the following tumor types are supported: -| Study Name | Abbreviation | -|--------------------------------------------------------------------|-------------------| -| Adrenocortical carcinoma | ACC | -| Bladder Urothelial Carcinoma | BLCA | -| Breast invasive carcinoma | BRCA | -| Cervical squamous cell carcinoma and endocervical adenocarcinoma | CESC | -| Cholangiocarcinoma | CHOL | -| Colon adenocarcinoma | COAD | -| Esophageal carcinoma | ESCA | -| Glioblastoma multiforme | GBM | -| Head and Neck squamous cell carcinoma | HNSC | -| Kidney Chromophobe | KICH | -| Kidney renal papillary cell carcinoma | KIRP | -| Liver hepatocellular carcinoma | LIHC | -| Lung adenocarcinoma | LUAD | -| Lung squamous cell carcinoma | LUSC | -| Ovarian serous cystadenocarcinoma | OV | -| Pancreatic adenocarcinoma | PAAD | -| Prostate adenocarcinoma | PRAD | -| Rectum adenocarcinoma | READ | -| Sarcoma | SARC | -| Skin Cutaneous Melanoma | SKCM | -| Testicular Germ Cell Tumors | TGCT | -| Uterine Corpus Endometrial Carcinoma | UCEC | - - - -### Output data - -The output data is returned in a short wide tab separated values file (`--with-short-wide-table`). Optionally, it can be provided in a tall skinny tab separated values file (`--with-tall-skinny-table`) or in JSON format (`--with-json`). - -For a more information, please check out our documentation on [https://neofox.readthedocs.io](https://neofox.readthedocs.io/) \ No newline at end of file +## 5 Output data + +The output data is returned by default in a short wide tab separated values file (`--with-short-wide-table`). Optionally, it can be provided in a tall skinny tab separated values file (`--with-tall-skinny-table`) or in JSON format (`--with-json`). diff --git a/docs/resources/column_description.xlsx b/docs/resources/column_description.xlsx index a8a49420..6db8d41d 100755 Binary files a/docs/resources/column_description.xlsx and b/docs/resources/column_description.xlsx differ diff --git a/docs/resources/implemented_features_with_reference.xlsx b/docs/resources/implemented_features_with_reference.xlsx index 39000f76..59496cec 100755 Binary files a/docs/resources/implemented_features_with_reference.xlsx and b/docs/resources/implemented_features_with_reference.xlsx differ diff --git a/docs/source/01_overview.md b/docs/source/01_overview.md index a9c0c45c..00983e60 100644 --- a/docs/source/01_overview.md +++ b/docs/source/01_overview.md @@ -40,7 +40,8 @@ NeoFox covers neoepitope prediction by MHC binding and ligand prediction, simila | Priority score | Bjerregaard et al., 2017, Cancer Immunol Immunother. | https://doi.org/10.1007/s00262-017-2001-3 | | Tcell predictor | Besser et al., 2019, Journal for ImmunoTherapy of Cancer | https://doi.org/10.1186/s40425-019-0595-z | | neoag | Smith et al., 2019, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-19-0155 | -| PRIME | Schmidt et al., 2021, Cell Reports Medicine | https://doi.org/10.1016/j.xcrm.2021.100194 | +| PRIME | Schmidt et al., 2021, Cell Reports Medicine | https://doi.org/10.1016/j.xcrm.2021.100194 | +| HEX | Chiaro et al., 2021, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-20-0814 | Besides comprehensive annotation of neoantigen candidates, NeoFox creates biologically meaningful representations of neoantigens and related biological entities as programmatic models. For this purpose, Protocol buffers is employed to diff --git a/docs/source/02_installation.md b/docs/source/02_installation.md index 84a9402a..a65d97bf 100644 --- a/docs/source/02_installation.md +++ b/docs/source/02_installation.md @@ -150,6 +150,7 @@ caret Peptides doParallel gbm +Biostrings ``` Add the reference folder to the Path diff --git a/docs/source/03_02_output_data.md b/docs/source/03_02_output_data.md index 1ebf0de4..ec17218d 100644 --- a/docs/source/03_02_output_data.md +++ b/docs/source/03_02_output_data.md @@ -98,6 +98,7 @@ The following table describes each of the annotations in the output: |PRIME_best_peptide |best predicted neoepitope candidate by PRIME model |PRIME | |PRIME_best_rank |output rank score of PRIME model |PRIME | |PRIME_best_score |output score of PRIME model |PRIME | +|hex_alignment_score |the alignment score by HEX |HEX | ## Short-wide format @@ -107,7 +108,7 @@ This is a dummy example: | identifier | dnaVariantAlleleFrequency | gene |imputedGeneExpression | mutation.mutatedXmer | mutation.position | mutation.wildTypeXmer | patientIdentifier | rnaExpression | rnaVariantAlleleFrequency | +-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) | ADN_MHCI | ADN_MHCII | Amplitude_MHCII_rank | Amplitude_MHCI_affinity | Amplitude_MHCI_affinity_9mer | Best_affinity_MHCII_allele | Best_affinity_MHCII_allele_WT | Best_affinity_MHCII_epitope | Best_affinity_MHCII_epitope_WT | Best_affinity_MHCII_score | Best_affinity_MHCII_score_WT | Best_affinity_MHCI_9mer_allele | Best_affinity_MHCI_9mer_allele_WT | Best_affinity_MHCI_9mer_anchor_mutated | Best_affinity_MHCI_9mer_epitope | Best_affinity_MHCI_9mer_epitope_WT | Best_affinity_MHCI_9mer_position_mutation | Best_affinity_MHCI_9mer_score | Best_affinity_MHCI_9mer_score_WT | Best_affinity_MHCI_allele | Best_affinity_MHCI_allele_WT | Best_affinity_MHCI_epitope | Best_affinity_MHCI_epitope_WT | Best_affinity_MHCI_score | Best_affinity_MHCI_score_WT | Best_rank_MHCII_score | Best_rank_MHCII_score_WT | Best_rank_MHCII_score_allele | Best_rank_MHCII_score_allele_WT | Best_rank_MHCII_score_epitope | Best_rank_MHCII_score_epitope_WT | Best_rank_MHCI_9mer_allele | Best_rank_MHCI_9mer_allele_WT | Best_rank_MHCI_9mer_epitope | Best_rank_MHCI_9mer_epitope_WT | Best_rank_MHCI_9mer_score | Best_rank_MHCI_9mer_score_WT | Best_rank_MHCI_score | Best_rank_MHCI_score_WT | Best_rank_MHCI_score_allele | Best_rank_MHCI_score_allele_WT | Best_rank_MHCI_score_epitope | Best_rank_MHCI_score_epitope_WT | CDN_MHCI | CDN_MHCII | DAI_MHCI_affinity_cutoff500nM | Dissimilarity_MHCI_cutoff500nM | Expression_mutated_transcript | Generator_rate | IEDB_Immunogenicity_MHCI_cutoff500nM | Improved_Binder_MHCI | MixMHC2pred_best_allele | MixMHC2pred_best_peptide | MixMHC2pred_best_rank | MixMHCpred_best_allele | MixMHCpred_best_peptide | MixMHCpred_best_rank | MixMHCpred_best_score | Neoag_immunogenicity | Number_of_mismatches_MCHI | PHBR-I | PHBR-II | Pathogensimiliarity_MHCI_affinity_9mer | Priority_score | Recognition_Potential_MHCI_affinity_9mer | Selfsimilarity_MHCI_conserved_binder | Tcell_predictor_score_cutoff500nM | VAF_in_RNA | VAF_in_tumor | [WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) | mutation_not_found_in_proteome | patient | substitution | transcript_expression | vaxrank_binding_score | vaxrank_total_score | |--------------------------|---------------------------|-------|-----------------------------|-----------------------------|-------------------|-----------------------------|-------------------|---------------|---------------------------|----------------------------------------|----------|-----------|----------------------|-------------------------|------------------------------|----------------------------|-------------------------------|-----------------------------|--------------------------------|---------------------------|------------------------------|--------------------------------|-----------------------------------|----------------------------------------|---------------------------------|------------------------------------|-------------------------------------------|-------------------------------|----------------------------------|---------------------------|------------------------------|----------------------------|-------------------------------|--------------------------|-----------------------------|-----------------------|--------------------------|------------------------------|---------------------------------|-------------------------------|----------------------------------|----------------------------|-------------------------------|-----------------------------|--------------------------------|---------------------------|------------------------------|----------------------|-------------------------|-----------------------------|--------------------------------|------------------------------|---------------------------------|----------|-----------|-------------------------------|--------------------------------|-------------------------------|----------------|--------------------------------------|----------------------|-------------------------|--------------------------|-----------------------|------------------------|-------------------------|----------------------|-----------------------|----------------------|---------------------------|---------|---------|----------------------------------------|----------------|------------------------------------------|--------------------------------------|-----------------------------------|------------|--------------|---------------------------------------------|--------------------------------|---------|--------------|-----------------------|-----------------------|---------------------| -| ou11p7RD+tZvjY88DA55Mw== | 0.294 | BRCA2 | 0.5| AAAAAAAAAAAAAFAAAAAAAAAAAAA| AAAAAAAAAAAAAFAAAAAAAAAAAAA | 14 | AAAAAAAAAAAAALAAAAAAAAAAAAA | Ptx | 0.51950689 | 0.857 | AAAAAAAAAAAAAFAAAAAAAAAAAAA | 0 | 1 | 28 | 0.88723 | 0.88723 | HLA-DQA10401-DQB10402 | HLA-DQA10401-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | 251.77 | 513.02 | HLA-C*16:01 | HLA-C*16:01 | 1 | AAAAAAAAF | AAAAAAAAL | 9 | 24.3 | 21.7 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 24.3 | 21.7 | 0.05 | 1.4 | HLA-DQA10301-DQB10402 | HLA-DQA10301-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 0.0592 | 0.0493 | 0.0592 | 0.0493 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 1 | 1 | -2.6 | 1 | 0.44522 | 1 | 0.18288 | 0 | DPA1_01_03__DPB1_04_01 | AAAAFAAAAAAAAAAA | 0.997 | B0702 | AAAAAAAAF | 0.1 | 0.50487 | 13.16998 | 1 | 0.31193 | 0.21892 | 0 | 0.07017 | 0 | 0.99178271 | 0.40327581 | 0.857 | 0.294 | AAAAAAAAAAAAALAAAAAAAAAAAAA | 1 | Ptx | I547T | 0.51950689 | 3.7689 | 1.678 | +| ou11p7RD+tZvjY88DA55Mw== | 0.294 | BRCA2 | 0.5| AAAAAAAAAAAAAFAAAAAAAAAAAAA| 14 | AAAAAAAAAAAAALAAAAAAAAAAAAA | Ptx | 0.51950689 | 0.857 | AAAAAAAAAAAAAFAAAAAAAAAAAAA | 0 | 1 | 28 | 0.88723 | 0.88723 | HLA-DQA10401-DQB10402 | HLA-DQA10401-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | 251.77 | 513.02 | HLA-C*16:01 | HLA-C*16:01 | 1 | AAAAAAAAF | AAAAAAAAL | 9 | 24.3 | 21.7 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 24.3 | 21.7 | 0.05 | 1.4 | HLA-DQA10301-DQB10402 | HLA-DQA10301-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 0.0592 | 0.0493 | 0.0592 | 0.0493 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 1 | 1 | -2.6 | 1 | 0.44522 | 1 | 0.18288 | 0 | DPA1_01_03__DPB1_04_01 | AAAAFAAAAAAAAAAA | 0.997 | B0702 | AAAAAAAAF | 0.1 | 0.50487 | 13.16998 | 1 | 0.31193 | 0.21892 | 0 | 0.07017 | 0 | 0.99178271 | 0.40327581 | 0.857 | 0.294 | AAAAAAAAAAAAALAAAAAAAAAAAAA | 1 | Ptx | I547T | 0.51950689 | 3.7689 | 1.678 | | rzXB3nQlZ5misn6VN8EA2A== | 0.173 | BRCA2 | 0.5| AAAAAAAAAAAAAMAAAAAAAAAAAAA | 14 | AAAAAAAAAAAAARAAAAAAAAAAAAA | Ptx | 0.71575659 | 0.556 | AAAAAAAAAAAAAMAAAAAAAAAAAAA | 1 | 1 | 10 | 90.685 | 90.685 | HLA-DQA10401-DQB10402 | HLA-DQA10401-DQB10402 | AAAAAAAAAMAAAAA | AAAAAAAAARAAAAA | 421.53 | 554.92 | HLA-C*16:01 | HLA-C*16:01 | 1 | AAAAAAAAM | AAAAAAAAR | 9 | 24.1 | 6346.9 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 24.1 | 6346.9 | 0.25 | 2.5 | HLA-DQA10401-DQB10302 | HLA-DQA10401-DQB10302 | AAAAAAAAAAMAAAA | AAAAAAAAAARAAAA | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 0.0587 | 8.9317 | 0.0587 | 8.9317 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 1 | 1 | 6322.8 | 1 | 0.39796 | 1 | 0.18288 | 1 | DPA1_01_03__DPB1_04_01 | AAAAMAAAAAAAAAAA | 2.44 | B0702 | AAAAAAAAM | 0.07 | 0.5444 | 39.51379 | 1 | 0.29303 | 1.5594 | 0 | 0.10626 | 0 | NA | 0.46452844 | 0.556 | 0.173 | AAAAAAAAAAAAARAAAAAAAAAAAAA | 1 | Ptx | E135S | 0.71575659 | 3.8741 | 1.5417 | ## Tall-skinny format diff --git a/docs/source/03_03_usage.md b/docs/source/03_03_usage.md index e39e12c6..7b35066f 100644 --- a/docs/source/03_03_usage.md +++ b/docs/source/03_03_usage.md @@ -40,15 +40,15 @@ neofox --candidate-file neoantigens_candidates.tab --patient-id Ptx --patient-da The optional **config** file with the paths to the dependencies can look like this: ````commandline -export NEOFOX_REFERENCE_FOLDER=path/to/reference/folder -export NEOFOX_RSCRIPT=`which Rscript` -export NEOFOX_BLASTP=path/to/ncbi-blast-2.10.1+/bin/blastp -export NEOFOX_NETMHCPAN=path/to/netMHCpan-4.1/netMHCpan -export NEOFOX_NETMHC2PAN=path/to/netMHCIIpan-4.0/netMHCIIpan -export NEOFOX_MIXMHCPRED=path/to/MixMHCpred-2.1/MixMHCpred -export NEOFOX_MIXMHC2PRED=path/to/MixMHC2pred-1.2/MixMHC2pred_unix -export NEOFOX_MAKEBLASTDB=path/to/ncbi-blast-2.8.1+/bin/makeblastdb -export NEOFOX_PRIME=/path/to/PRIME/PRIME +NEOFOX_REFERENCE_FOLDER=path/to/reference/folder +NEOFOX_RSCRIPT=`which Rscript` +NEOFOX_BLASTP=path/to/ncbi-blast-2.10.1+/bin/blastp +NEOFOX_NETMHCPAN=path/to/netMHCpan-4.1/netMHCpan +NEOFOX_NETMHC2PAN=path/to/netMHCIIpan-4.0/netMHCIIpan +NEOFOX_MIXMHCPRED=path/to/MixMHCpred-2.1/MixMHCpred +NEOFOX_MIXMHC2PRED=path/to/MixMHC2pred-1.2/MixMHC2pred_unix +NEOFOX_MAKEBLASTDB=path/to/ncbi-blast-2.8.1+/bin/makeblastdb +NEOFOX_PRIME=/path/to/PRIME/PRIME ```` ### Running from docker diff --git a/docs/source/05_models.md b/docs/source/05_models.md index 49cb0b5c..350afc93 100644 --- a/docs/source/05_models.md +++ b/docs/source/05_models.md @@ -224,9 +224,9 @@ Valid names for MHC I classic genes | Name | Number | Description | | ---- | ------ | ----------- | -| A | 0 | | -| B | 1 | | -| C | 2 | | +| A | 0 | HLA-A | +| B | 1 | HLA-B | +| C | 2 | HLA-C| @@ -239,11 +239,11 @@ considered constant. | Name | Number | Description | | ---- | ------ | ----------- | -| DRB1 | 0 | | -| DPA1 | 1 | | -| DPB1 | 2 | | -| DQA1 | 3 | | -| DQB1 | 4 | | +| DRB1 | 0 | HLA-DRB1 | +| DPA1 | 1 | HLA-DPA1| +| DPB1 | 2 | HLA-DPB1| +| DQA1 | 3 | HLA-DQA1| +| DQB1 | 4 | HLA-DQB1| @@ -254,9 +254,9 @@ Valid names for MHC II classic molecules | Name | Number | Description | | ---- | ------ | ----------- | -| DR | 0 | | -| DP | 1 | | -| DQ | 2 | | +| DR | 0 | HLA-DR| +| DP | 1 | HLA-DP| +| DQ | 2 | HLA-DQ| diff --git a/neofox/__init__.py b/neofox/__init__.py index 22f35a9b..3300de67 100755 --- a/neofox/__init__.py +++ b/neofox/__init__.py @@ -16,7 +16,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>.# -VERSION = "0.5.0" +VERSION = "0.5.1.dev3" REFERENCE_FOLDER_ENV = "NEOFOX_REFERENCE_FOLDER" diff --git a/neofox/annotator.py b/neofox/annotator.py index ada2a208..2c36ed20 100755 --- a/neofox/annotator.py +++ b/neofox/annotator.py @@ -59,6 +59,7 @@ from neofox.published_features.expression import Expression from neofox.published_features.priority_score import PriorityScore from neofox.published_features.prime import Prime +from neofox.published_features.hex.hex import Hex from neofox.model.neoantigen import Patient, Neoantigen, NeoantigenAnnotations from neofox.references.references import ( ReferenceFolder, @@ -106,6 +107,7 @@ def __init__( self.priority_score_calculator = PriorityScore() self.iedb_immunogenicity = IEDBimmunogenicity(affinity_threshold=affinity_threshold) self.amplitude = Amplitude() + self.hex = Hex(runner=self.runner, configuration=configuration, references=references) self.hla_database = references.get_hla_database() self.mhc_parser = MhcParser(self.hla_database) @@ -328,6 +330,17 @@ def get_annotation( logger.info( "Vaxrank annotation elapsed time {} seconds".format(round(end - start, 3)) ) + + # hex + if netmhcpan and netmhcpan.epitope_affinities: + start = time.time() + self.annotations.annotations.extend( + self.hex.get_annotation(netmhcpan=netmhcpan) + ) + end = time.time() + logger.info( + "Hex annotation elapsed time {} seconds".format(round(end - start, 3)) + ) return self.annotations def _compute_long_running_tasks(self, neoantigen, patient, sequential=True): diff --git a/neofox/published_features/hex/BLOSUM62.rda b/neofox/published_features/hex/BLOSUM62.rda new file mode 100644 index 00000000..88991e87 Binary files /dev/null and b/neofox/published_features/hex/BLOSUM62.rda differ diff --git a/neofox/published_features/hex/__init__.py b/neofox/published_features/hex/__init__.py new file mode 100755 index 00000000..4a64d329 --- /dev/null +++ b/neofox/published_features/hex/__init__.py @@ -0,0 +1,18 @@ +# +# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH. +# +# This file is part of Neofox +# (see https://github.com/tron-bioinformatics/neofox). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>.# diff --git a/neofox/published_features/hex/hex.py b/neofox/published_features/hex/hex.py new file mode 100755 index 00000000..e34bd1b4 --- /dev/null +++ b/neofox/published_features/hex/hex.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# +# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH. +# +# This file is part of Neofox +# (see https://github.com/tron-bioinformatics/neofox). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>.# +from typing import List +import os +from neofox.model.neoantigen import Annotation +from neofox.model.wrappers import AnnotationFactory +from neofox.MHC_predictors.netmhcpan.combine_netmhcpan_pred_multiple_binders import ( + BestAndMultipleBinder, +) +from neofox.references.references import ( + ReferenceFolder, IEDB_FASTA +) + + +class Hex(object): + + def __init__(self, references: ReferenceFolder, runner, configuration): + """ + :type runner: neofox.helpers.runner.Runner + :type configuration: neofox.references.DependenciesConfiguration + """ + self.runner = runner + self.configuration = configuration + self.iedb_fasta = os.path.join(references.iedb, IEDB_FASTA) + + def apply_hex(self, mut_peptide): + """this function calls hex tool. this tool analyses the neoepitope candidate sequence for molecular mimicry to viral epitopes + """ + my_path = os.path.abspath(os.path.dirname(__file__)) + tool_path = os.path.join(my_path, "hex.R") + cmd = [self.configuration.rscript, tool_path, mut_peptide, self.iedb_fasta, my_path] + output, _ = self.runner.run_command(cmd) + if output == "": + output = None + return output + + def get_annotation( + self, netmhcpan: BestAndMultipleBinder) -> List[Annotation]: + """wrapper function for HEX (Homology evaluation of Xenopeptides) (Chiaro et al., 2021) + """ + # TODO: add annotation of b score when re-implemented in python. The annotation is too slow for bigger datasets + hex_aln_score = None + # hex_b_score = None + if netmhcpan.best_epitope_by_affinity.peptide: + # hex_aln_score, hex_b_score = self.apply_hex(netmhcpan.best_epitope_by_affinity.peptide).split(" ") + hex_aln_score = self.apply_hex(netmhcpan.best_epitope_by_affinity.peptide) + annotations = [ + AnnotationFactory.build_annotation( + value=hex_aln_score, name="hex_alignment_score"), + # AnnotationFactory.build_annotation( + # value=hex_b_score, name="hex_B_score" + #) + ] + return annotations diff --git a/neofox/references/install_r_dependencies.R b/neofox/references/install_r_dependencies.R index c18d4ea2..7b906a14 100644 --- a/neofox/references/install_r_dependencies.R +++ b/neofox/references/install_r_dependencies.R @@ -3,4 +3,7 @@ install.packages("ggplot2", repo="http://cran.rstudio.com/") install.packages("caret", repo="http://cran.rstudio.com/") install.packages("Peptides", repo="http://cran.rstudio.com/") install.packages("doParallel", repo="http://cran.rstudio.com/") -install.packages("gbm", repo="http://cran.rstudio.com/") \ No newline at end of file +install.packages("gbm", repo="http://cran.rstudio.com/") +if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") +BiocManager::install("Biostrings") \ No newline at end of file diff --git a/neofox/tests/integration_tests/test_hex.py b/neofox/tests/integration_tests/test_hex.py new file mode 100755 index 00000000..d6a24a96 --- /dev/null +++ b/neofox/tests/integration_tests/test_hex.py @@ -0,0 +1,44 @@ +# +# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH. +# +# This file is part of Neofox +# (see https://github.com/tron-bioinformatics/neofox). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>.# +from unittest import TestCase +from logzero import logger +from neofox.published_features.hex.hex import Hex +from neofox.helpers.runner import Runner + +import neofox.tests.integration_tests.integration_test_tools as integration_test_tools + + + +class TestHex(TestCase): + def setUp(self): + self.references, self.configuration = integration_test_tools.load_references() + self.runner = Runner() + + + def test_hex(self): + res = Hex( + runner=self.runner, configuration=self.configuration, references=self.references + ).apply_hex( + mut_peptide="FGLAIDVDD" + ) + logger.info(res) + self.assertEqual(float(res), 148) + + +