diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
old mode 100644
new mode 100755
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 2001a66b..a2e94a0c 100755
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -18,7 +18,7 @@
## along with this program. If not, see .##
# this image contains multiple Python interpreters
-image: fkrull/multi-python:bionic
+image: python:3.8.14-buster
# Change pip's cache directory to be inside the project directory since we can
# only cache local items.
@@ -27,7 +27,6 @@ variables:
# Pip's cache doesn't store the python packages
# https://pip.pypa.io/en/stable/reference/pip_install/#caching
-#
# If you want to also cache the installed packages, you have to install
# them in a virtualenv and cache it as well.
cache:
diff --git a/.readthedocs.yml b/.readthedocs.yml
old mode 100644
new mode 100755
diff --git a/Dockerfile b/Dockerfile
old mode 100644
new mode 100755
diff --git a/LICENSE b/LICENSE
old mode 100644
new mode 100755
diff --git a/MANIFEST.in b/MANIFEST.in
index d93ba50a..c010435d 100755
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,3 @@
-include neofox/published_features/neoag/neoag-master/*
include neofox/published_features/self_similarity/BLOSUM62-2.matrix.txt
include neofox/published_features/Tcell_predictor/amino-acids-features.pickle
include neofox/published_features/Tcell_predictor/genes-expression.pickle
@@ -6,11 +5,8 @@ include neofox/published_features/Tcell_predictor/SIRdata.mat
include neofox/published_features/Tcell_predictor/Classifier.pickle
include neofox/references/install_r_dependencies.R
include neofox/references/h2_database_allele_list.csv
-include neofox/published_features/hex/BLOSUM62.rda
-include neofox/published_features/hex/EPItOMe_modified.R
-include neofox/published_features/hex/hex.R
include neofox/expression_imputation/tcga_cohort_code.tab
include neofox/expression_imputation/tcga_exp_summary_modified.tab.gz.tbi
include neofox/expression_imputation/tcga_exp_summary_modified.tab.gz
include requirements.txt
-include README.md
\ No newline at end of file
+include README.md
diff --git a/README.md b/README.md
index 0d92c909..dae32e1d 100755
--- a/README.md
+++ b/README.md
@@ -29,8 +29,8 @@ NeoFox covers the following neoantigen features and prediction algorithms:
|---------------------------------------------------------|--------------------------------------------------------------------------|-------------------------------------------------------------------------------------------|
| MHC I binding affinity/rank score (netMHCpan-v4.1) | Reynisson et al, 2020, Nucleic Acids Research | https://doi.org/10.4049/jimmunol.1700893 |
| MHC II binding affinity/rank score (netMHCIIpan-v4.0) | Reynisson et al, 2020, Nucleic Acids Research | https://doi.org/10.1111/imm.12889 |
-| MixMHCpred score v2.1 § | Bassani-Sternberg et al., 2017, PLoS Comp Bio; Gfeller, 2018, J Immunol. | https://doi.org/10.1371/journal.pcbi.1005725 , https://doi.org/10.4049/jimmunol.1800914 |
-| MixMHC2pred score v1.2 § | Racle et al, 2019, Nat. Biotech. 2019 | https://doi.org/10.1038/s41587-019-0289-6 |
+| MixMHCpred score v2.2 § | Bassani-Sternberg et al., 2017, PLoS Comp Bio; Gfeller, 2018, J Immunol. | https://doi.org/10.1371/journal.pcbi.1005725 , https://doi.org/10.4049/jimmunol.1800914 |
+| MixMHC2pred score v2.0.2 § | Racle et al, 2019, Nat. Biotech. 2019 | https://doi.org/10.1038/s41587-019-0289-6 |
| Differential Agretopicity Index (DAI) | Duan et al, 2014, JEM; Ghorani et al., 2018, Ann Oncol. | https://doi.org/10.1084/jem.20141308 |
| Self-Similarity | Bjerregaard et al, 2017, Front Immunol. | https://doi.org/10.3389/fimmu.2017.01566 |
| IEDB immunogenicity | Calis et al, 2013, PLoS Comput Biol. | https://doi.org/10.1371/journal.pcbi.1003266 |
@@ -42,7 +42,6 @@ NeoFox covers the following neoantigen features and prediction algorithms:
| Vaxrank | Rubinsteyn, 2017, Front Immunol | https://doi.org/10.3389/fimmu.2017.01807 |
| Priority score | Bjerregaard et al, 2017, Cancer Immunol Immunother. | https://doi.org/10.1007/s00262-017-2001-3 |
| Tcell predictor | Besser et al, 2019, Journal for ImmunoTherapy of Cancer | https://doi.org/10.1186/s40425-019-0595-z |
-| neoag | Smith et al, 2019, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-19-0155 |
| PRIME § | Schmidt et al., 2021, Cell Reports Medicine | https://doi.org/10.1016/j.xcrm.2021.100194 |
| HEX § | Chiaro et al., 2021, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-20-0814 |
@@ -57,9 +56,9 @@ NeoFox depends on the following tools:
- BLAST 2.10.1
- netMHCpan 4.1
- netMHCIIpan 4.0
-- MixMHCpred 2.1
-- MixMHC2pred 1.2
-- PRIME 1.0
+- MixMHCpred 2.2 (optional)
+- MixMHC2pred 2.0.2 (optional)
+- PRIME 2.0 (optional)
Install from PyPI:
```
@@ -101,10 +100,10 @@ NEOFOX_RSCRIPT=`which Rscript`
NEOFOX_BLASTP=path/to/ncbi-blast-2.10.1+/bin/blastp
NEOFOX_NETMHCPAN=path/to/netMHCpan-4.1/netMHCpan
NEOFOX_NETMHC2PAN=path/to/netMHCIIpan-4.0/netMHCIIpan
-NEOFOX_MIXMHCPRED=path/to/MixMHCpred-2.1/MixMHCpred
-NEOFOX_MIXMHC2PRED=path/to/MixMHC2pred-1.2/MixMHC2pred_unix
+NEOFOX_MIXMHCPRED=path/to/MixMHCpred-2.2/MixMHCpred
+NEOFOX_MIXMHC2PRED=path/to/MixMHC2pred-2.0.1/MixMHC2pred_unix
NEOFOX_MAKEBLASTDB=path/to/ncbi-blast-2.8.1+/bin/makeblastdb
-NEOFOX_PRIME=/path/to/PRIME/PRIME
+NEOFOX_PRIME=/path/to/PRIME-2.0/PRIME
````
## 4 Input data
@@ -112,7 +111,7 @@ NEOFOX_PRIME=/path/to/PRIME/PRIME
### 4.1 Neoantigen candidates in tabular format
This is an dummy example of a table with neoantigen candidates:
-| gene | mutation.wildTypeXmer | mutation.mutatedXmer | patientIdentifier | rnaExpression | rnaVariantAlleleFrequency | dnaVariantAlleleFrequency | external_annotation_1 | external_annotation_2 |
+| gene | wildTypeXmer | mutatedXmer | patientIdentifier | rnaExpression | rnaVariantAlleleFrequency | dnaVariantAlleleFrequency | external_annotation_1 | external_annotation_2 |
|-------|-----------------------------|-----------------------------|-------------------|---------------|---------------------------|---------------------------|-----------------------|-----------------------|
| BRCA2 | AAAAAAAAAAAAALAAAAAAAAAAAAA | AAAAAAAAAAAAAFAAAAAAAAAAAAA | Ptx | 7.942 | 0.85 | 0.34 | some_value | some_value |
| BRCA2 | AAAAAAAAAAAAAMAAAAAAAAAAAAA | AAAAAAAAAAAAARAAAAAAAAAAAAA | Ptx | 7.942 | 0.85 | 0.34 | some_value | some_value |
@@ -122,8 +121,8 @@ This is an dummy example of a table with neoantigen candidates:
where:
- `gene`: the HGNC gene symbol
-- `mutation.mutatedXmer`: the neoantigen candidate sequence, i.e. the mutated amino acid sequence. The mutation should be located in the middle, flanked by 13 amino acid on both sites (IUPAC 1 respecting casing, eg: A)
-- `mutation.wildTypeXmer`: the equivalent non-mutated amino acid sequence (IUPAC 1 respecting casing, eg: A)
+- `mutatedXmer`: the neoantigen candidate sequence, i.e. the mutated amino acid sequence. The mutation should be located in the middle, flanked by 13 amino acid on both sites (IUPAC 1 respecting casing, eg: A)
+- `wildTypeXmer`: the equivalent non-mutated amino acid sequence (IUPAC 1 respecting casing, eg: A)
- `patientIdentifier`: the patient identifier
- `rnaExpression`: RNA expression. (**optional**) (see *NOTE*) This value can be in any format chosen by the user (e.g. TPM, RPKM) but it is recommended to be consistent for data that should be compared.
- `rnaVariantAlleleFrequency`: the variant allele frequency calculated from the RNA (**optional**, this will be estimated using the `dnaVariantAlleleFrequency` if not available)
diff --git a/docs/figures/figure1_v3b.png b/docs/figures/figure1_v3b.png
new file mode 100755
index 00000000..4f9244aa
Binary files /dev/null and b/docs/figures/figure1_v3b.png differ
diff --git a/docs/resources/column_description.xlsx b/docs/resources/column_description.xlsx
index 5962041e..af05fbb6 100755
Binary files a/docs/resources/column_description.xlsx and b/docs/resources/column_description.xlsx differ
diff --git a/docs/source/01_overview.md b/docs/source/01_overview.md
index ec3cef1d..a3401c2a 100644
--- a/docs/source/01_overview.md
+++ b/docs/source/01_overview.md
@@ -30,8 +30,8 @@ A list of implemented features and their references are given in Table 1. Please
|---------------------------------------------------------|--------------------------------------------------------------------------|-------------------------------------------------------------------------------------------|
| MHC I binding affinity/rank score (netMHCpan-v4.1) | Reynisson et al., 2020, Nucleic Acids Res. | https://doi.org/10.1093/nar/gkaa379 |
| MHC II binding affinity/rank score (netMHCIIpan-v4.0) | Reynisson et al., 2020, Nucleic Acids Res. | https://doi.org/10.1093/nar/gkaa379 |
-| MixMHCpred score v2.1 § | Bassani-Sternberg et al., 2017, PLoS Comp Bio; Gfeller, 2018, J Immunol. | https://doi.org/10.1371/journal.pcbi.1005725 , https://doi.org/10.4049/jimmunol.1800914 |
-| MixMHC2pred score v1.2 § | Racle et al., 2019, Nat. Biotech. 2019 | https://doi.org/10.1038/s41587-019-0289-6 |
+| MixMHCpred score v2.2 § | Bassani-Sternberg et al., 2017, PLoS Comp Bio; Gfeller, 2018, J Immunol. | https://doi.org/10.1371/journal.pcbi.1005725 , https://doi.org/10.4049/jimmunol.1800914 |
+| MixMHC2pred score v2.0.2 § | Racle et al., 2019, Nat. Biotech. 2019 | https://doi.org/10.1038/s41587-019-0289-6 |
| Differential Agretopicity Index (DAI) | Duan et al., 2014, JEM; Ghorani et al., 2018, Ann Oncol. | https://doi.org/10.1084/jem.20141308 |
| Self-Similarity | Bjerregaard et al., 2017, Front Immunol. | https://doi.org/10.3389/fimmu.2017.01566 |
| IEDB immunogenicity | Calis et al., 2013, PLoS Comput Biol. | https://doi.org/10.1371/journal.pcbi.1003266 |
@@ -43,8 +43,7 @@ A list of implemented features and their references are given in Table 1. Please
| Vaxrank | Rubinsteyn, 2017, Front Immunol | https://doi.org/10.3389/fimmu.2017.01807 |
| Priority score | Bjerregaard et al., 2017, Cancer Immunol Immunother. | https://doi.org/10.1007/s00262-017-2001-3 |
| Tcell predictor | Besser et al., 2019, Journal for ImmunoTherapy of Cancer | https://doi.org/10.1186/s40425-019-0595-z |
-| neoag | Smith et al., 2019, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-19-0155 |
-| PRIME § | Schmidt et al., 2021, Cell Reports Medicine | https://doi.org/10.1016/j.xcrm.2021.100194 |
+| PRIME v2.0 § | Schmidt et al., 2021, Cell Reports Medicine | https://doi.org/10.1016/j.xcrm.2021.100194 |
| HEX § | Chiaro et al., 2021, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-20-0814 |
@@ -56,7 +55,7 @@ model Neofox's input and output data: neoantigens, patients, MHC alleles and neo
**Figure 1**
-![Neofox model](../figures/figure1_v3.png)
+![Neofox model](../figures/figure1_v3b.png)
For detailed information about the required input data, output data and usage please refer to the [User guide](03_user_guide.rst).
diff --git a/docs/source/02_installation.md b/docs/source/02_installation.md
index 225f1556..4828ede5 100755
--- a/docs/source/02_installation.md
+++ b/docs/source/02_installation.md
@@ -1,7 +1,7 @@
# Installation
This guide contains two alternatives to install NeoFox:
-- Building a docker image that automates the installation into a container
+- Building a docker image that automates the installation into a container (**NOTE**: the docker recipe is not supported in neofox-v1.1.0. Please use an older version ( 10 | Generator rate |
-| Classically_defined_neopeptide_MHCII | `NetMHCIIpan_bestRank_rank` < 1 | Generator rate |
-| Alternatively_defined_neopeptide_MHCII | `Best_rank_MHCII_score` < 4 and `Amplitude_MHCII_bestRank` < 2 | Generator rate |
-| GeneratorRate_CDN_MHCI | number of neoepitope candidates with MHC I binding affinity < 50 nM per neoantigen canidate | Generator rate |
-| GeneratorRate_ADN_MHCI | number of neoepitope candidates with MHC I binding affinity < 5000 nM per neoantigen canidate 10x better affinity in comparison to corresponding WT peptide | Generator rate |
-| GeneratorRate_MHCI | sum of `GeneratorRate_CDN_MHCI` and `GeneratorRate_ADN_MHCI` | Generator rate |
-| GeneratorRate_CDN_MHCII | number of neoepitope candidates with MHC II binding rank score < 1 per neoantigen canidate | Generator rate |
-| GeneratorRate_ADN_MHCII | number of neoepitope candidates with MHC II binding rank score < 4 per neoantigen candidate 4x better rank in comparison to corresponding WT peptide | Generator rate |
-| GeneratorRate_MHCII | sum of `GeneratorRate_CDN_MHCII` and `GeneratorRate_ADN_MHCII` | Generator rate |
-| Tcell_predictor | output score of T cell predictor model | Tcell predictor |
-| ImprovedBinder_MHCI | ratio of `NetMHCpan_MHCI_rank_bestRankWT` and `NetMHCpan_MHCI_rank_bestRank` > 1.2 | self-similarity |
-| Selfsimilarity_MHCI_conserved_binder | score representing the similarity between `NetMHCpan_bestRank_peptide` and `NetMHCpan_bestRank_peptideWT` For conservered binder only | self-similarity |
-| Selfsimilarity_MHCI | score representing the similarity between `NetMHCpan_bestRank_peptide` and `NetMHCpan_bestRank_peptide` | self-similarity |
-| Selfsimilarity_MHCII | score representing the similarity between `NetMHCIIpan_bestAffinity_peptide` and `NetMHCIIpan_bestAffinity_peptide` | self-similarity |
-| Number_of_mismatches_MCHI | number of amino acids that do no match between `NetMHCpan_bestRank_peptide` and `NetMHCpan_bestRank_peptideWT` | Priority score |
-| Priority_score | combinatorial score of several features such as MHC binding, expression and VAF | Priority score |
-| Neoag_immunogenicity | output score of neoag model | neoag |
-| IEDB_Immunogenicity_MHCI | IEDB Immunogenicity score for ` NetMHCpan_bestAffinity_peptide` | IEDB Immunogenicity |
-| IEDB_Immunogenicity_MHCII | IEDB Immunogenicity score for `NetMHCIIpan_bestAffinity_peptide` | IEDB Immunogenicity |
-| MixMHCpred_bestScore_peptide | MHC class I neoepitope candidate sequence with maximum MixMHCpred score over all neoepitope canidates (8-11mers) and MHC I alleles | MixMHCpred |
-| MixMHCpred_bestScore_score | maximum MixMHCpred score over all neoepitope canidates (8-11mers) and MHC I alleles | MixMHCpred |
-| MixMHCpred_bestScore_rank | rank that corresponds to `MixMHCpred_bestScore_score` | MixMHCpred |
-| MixMHCpred_bestScore_allele | the allele with maximum MixMHCpred score | MixMHCpred |
-| MixMHC2pred_bestRank_peptide | MHC class II neoepitope candidate sequence with minimal MixMHC2pred score over all neoepitope canidates (13-18mers) and MHC II alleles | MixMHC2pred |
-| MixMHC2pred_bestRank_rank | minimal MixMHC2pred score over all neoepitope canidates (13-18mers) and MHC II alleles | MixMHC2pred |
-| MixMHC2pred_bestRank_allele | the MHC II isoform with minimum MixMHC2pred rank score | MixMHC2pred |
-| Dissimilarity_MHCI | score reflecting the dissimilarity of `NetMHCpan_bestAffinity_peptide` to the self-proteome | dissimilarity |
-| Dissimilarity_MHCII | score reflecting the dissimilarity of `NetMHCIIpan_bestAffinity_peptide` to the self-proteome | dissimilarity |
-| Vaxrank_bindingScore | total binding score of vaxrank | vaxrank |
-| Vaxrank_totalScore | product of total binding score and expression score. Originally, the root of the number of reads supporting the mutation are used in the original implementation. To simplify, the expression normalised to VAF is used. | vaxrank |
-| PRIME_bestScore_allele | best predicted MHC allele by PRIME model | PRIME |
-| PRIME_bestScore_peptide | best predicted neoepitope candidate by PRIME model | PRIME |
-| PRIME_bestScore_rank | output rank score of PRIME model | PRIME |
-| PRIME_bestScore_score | output score of PRIME model | PRIME |
-| HexAlignmentScore_MHCI | the alignment score by HEX for `NetMHCpan_bestAffinity_peptide` | HEX |
-| HexAlignmentScore_MHCII | the alignment score by HEX for ` NetMHCIIpan_bestAffinity_peptide` | HEX |
+| Column name | Description | Feature group/ Paper |
+|---------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------|
+| dnaVariantAlleleFrequency | the variant allele frequency calculated from the DNA | - |
+| mutatedXmer | the long mutated amino acid sequence | - |
+| wildTypeXmer | the long non-mutated amino acid sequence. This field shall be empty for alternative neoantigen classes | - |
+| patientIdentifier | the patient identifier | - |
+| rnaExpression | the RNA expression. If expression was imputed, this will will be `imputedGeneExpression` | expression |
+| imputedGeneExpression | median gene expression in the TCGA cohort of the tumor entity provided in the patient file. | expression |
+| rnaVariantAlleleFrequency | the variant allele frequency calculated from the RNA | - |
+| gene | the HGNC gene symbol | - |
+| Mutated_rnaExpression_fromRNA | transcript expression normalized by the variant allele frequency in RNA of the mutation | expression |
+| Mutated_rnaExpression_fromDNA | transcript expression normalized by the variant allele frequency in DNA of the mutation | |
+| Mutated_imputedGeneExpression_fromRNA | imputeted gene expression normalized by the variant allele frequency in RNA of the mutation | expression |
+| Mutated_imputedGeneExpression_fromDNA | imputed gene expression normalized by the variant allele frequency in RNA of the mutation | |
+| mutation_not_found_in_proteome | indicates if mutated amino acid sequence was not found in the WT proteome by exact search | Priority score |
+| NetMHCpan_bestRank_rank | minimal MHC I binding rank score over all neoepitope candidates (8-11mers) and MHC I alleles | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank_peptide | neoepitope candidate sequence with minimal MHC I binding rank score | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank_core | The minimal 9 amino acid binding core directly in contact with the MHC. | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank_Icore | Interaction core. This is the sequence of the binding core including eventual insertions of deletions. | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank_Of | Starting position offset of the core in the peptide (0 based) | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank_Gp | Position of the deletion (0 based), if any, in the Icore compared to the core. | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank_Gl | Length of the deletion, if any, in the Icore compared to the core. | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank_allele | the MHC I allele related to `NetMHCpan_bestRank_peptide` | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity_affinity | minimal MHC I binding affinity over all neoepitope candidates (8-11mers) and MHC I alleles | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity_peptide | neoepitope candidate sequence with minimal MHC I binding affinity | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity_core | The minimal 9 amino acid binding core directly in contact with the MHC. | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity_Icore | Interaction core. This is the sequence of the binding core including eventual insertions of deletions. | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity_Of | Starting position offset of the core in the peptide (0 based) | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity_Gp | Position of the deletion (0 based), if any, in the Icore compared to the core. | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity_Gl | Length of the deletion, if any, in the Icore compared to the core. | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity_allele | the MHC I allele related to `NetMHCpan_bestAffinity_peptide` | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank9mer_rank | minimal MHC I binding rank score over all neoepitope candidates (9mers only) and MHC I alleles | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank9mer_peptide | neoepitope candidate sequence (9mer) with minimal MHC I binding rank score | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank9mer_allele | the MHC I allele related to `NetMHCpan_bestRank9mer_peptide` | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity9mer_affinity | minimal MHC I binding affinity over all neoepitope candidates (9mers) and MHC I alleles | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity9mer_allele | the MHC I allele related to `NetMHCpan_bestAffinity9mer_peptide ` | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity9mer_peptide | neoepitope candidate sequence (9mer) with minimal MHC I binding affinity | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity_affinityWT | MHC I binding affinity of `NetMHCpan_bestAffinity_peptideWT` | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity_peptideWT | WT epitope that corresponds to `NetMHCpan_bestAffinity_peptide` | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank_rankWT | MHC I binding rank score of `NetMHCpan_bestRank_peptideWT` | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank_peptideWT | WT epitope that corresponds to `NetMHCpan_bestRank_peptide` | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank9mer_rankWT | MHC I binding rank score of `NetMHCpan_bestRank9mer_peptideWT ` | MHC I binding with netMHCpan |
+| NetMHCpan_bestRank9mer_peptideWT | WT epitope that corresponds to `NetMHCpan_bestRank9mer_peptide` | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity9mer_affinityWT | MHC I binding affinity of ` NetMHCpan_bestAffinity9mer_peptideWT ` | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity9mer_peptideWT | WT epitope that corresponds to `NetMHCpan_bestAffinity9mer_peptide` | |
+| PHBR_I | harmonic mean of minimal MHC I binding rank scores of all MHC I alleles of a patient | PHBR-I |
+| NetMHCpan_bestAffinity9mer_positionMutation | indicates position of the mutation in ` NetMHCpan_bestRank9mer_peptide` | MHC I binding with netMHCpan |
+| NetMHCpan_bestAffinity9mer_anchorMutated | mutation in `NetMHCpan_bestRank9mer_peptide` in an anchor position (i.e. position 2 or 9) | anchor/non-anchor |
+| NetMHCIIpan_bestRank_rank | minimal MHC II binding rank score over all neoepitope candidates (15mers) and all MHC II alleles | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestRank_peptide | neoepitope candidate sequence with minimal MHC II binding rank score | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestRank_core | Binding core register (9mer) | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestRank_Of | Starting position offset of the optimal binding core (starting from 0) | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestRank_coreRel | Reliability of the binding core, expressed as the fraction of networks in the ensemble selecting the optimal core | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestRank_allele | the MHC II isoform related to `NetMHCIIpan_bestRank_peptide` | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestAffinity_affinity | minimal MHC II binding affinity over all neoepitope candidates (15mers) and all MHC II alleles | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestAffinity_peptide | neoepitope candidate sequence with minimal MHC II binding affinity | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestAffinity_core | Binding core register (9mer) | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestAffinity_Of | Starting position offset of the optimal binding core (starting from 0) | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestAffinity_coreRel | Reliability of the binding core, expressed as the fraction of networks in the ensemble selecting the optimal core | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestAffinity_allele | the MHC II isoform related to `NetMHCIIpan_bestAffinity_peptide` | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestRank_rankWT | minimal MHC II binding rank of `NetMHCIIpan_bestRank_peptideWT` | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestRank_peptideWT | WT epitope sequence (15mer) that corresponds to ` NetMHCIIpan_bestRank_peptide` | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestAffinity_affinityWT | minimal MHC II binding rank of `NetMHCIIpan_bestAffinity_peptideWT` | MHC II binding with netMHCIIpan |
+| NetMHCIIpan_bestAffinity_peptideWT | WT epitope sequence (15mer) that corresponds to `NetMHCIIpan_bestAffinity_peptide` | MHC II binding with netMHCIIpan |
+| PHBR_II | harmonic mean of minimal MHC II binding rank scores of all MHC II alleles of a patient | PHBR-II |
+| Amplitude_MHCI_bestAffinity9mer | ratio of `NetMHCpan_bestAffinity9mer_affinity` and `NetMHCpan_bestAffinity9mer_affinityWT` | Recognition Potential |
+| Amplitude_MHCI_bestAffinity | ratio of `NetMHCpan_bestAffinity_affinityWT` and `NetMHCpan_bestAffinity_affinity` | Generator rate |
+| Amplitude_MHCII_bestRank | ratio of `NetMHCIIpan_bestRank_rank` and `NetMHCIIpan_bestRank_rankWT` | Generator rate |
+| Pathogensimiliarity_MHCI_bestAffinity9mer | score representing the similarity of `NetMHCpan_bestAffinity9mer_peptide` to pathogen sequences in IEDB database | Recognition Potential |
+| Pathogensimiliarity_MHCII_bestAffinity | score representing the similarity of `NetMHCIIpan_bestRank_peptide` to pathogen sequences in IEDB database | Recognition Potential |
+| RecognitionPotential_MHCI_bestAffinity9mer | product of `Amplitude_MHCI_affinity_9mer` and `Pathogensimiliarity_MHCI_affinity_9mer` | Recognition Potential |
+| DAI_MHCI_bestAffinity | difference of `NetMHCpan_bestAffinity_affinityWT` and `NetMHCpan_bestAffinity_affinity` | DAI |
+| Classically_defined_neopeptide_MHCI | `NetMHCpan_bestAffinity_peptide`< 50 nM | Generator rate |
+| Alternatively_defined_neopeptide_MHCI | `NetMHCpan_bestAffinity_peptide` < 5000 nM and `Amplitude_MHCI_bestAffinity` > 10 | Generator rate |
+| Classically_defined_neopeptide_MHCII | `NetMHCIIpan_bestRank_rank` < 1 | Generator rate |
+| Alternatively_defined_neopeptide_MHCII | `Best_rank_MHCII_score` < 4 and `Amplitude_MHCII_bestRank` < 2 | Generator rate |
+| GeneratorRate_CDN_MHCI | number of neoepitope candidates with MHC I binding affinity < 50 nM per neoantigen canidate | Generator rate |
+| GeneratorRate_ADN_MHCI | number of neoepitope candidates with MHC I binding affinity < 5000 nM per neoantigen canidate 10x better affinity in comparison to corresponding WT peptide | Generator rate |
+| GeneratorRate_MHCI | sum of `GeneratorRate_CDN_MHCI` and `GeneratorRate_ADN_MHCI` | Generator rate |
+| GeneratorRate_CDN_MHCII | number of neoepitope candidates with MHC II binding rank score < 1 per neoantigen canidate | Generator rate |
+| GeneratorRate_ADN_MHCII | number of neoepitope candidates with MHC II binding rank score < 4 per neoantigen candidate 4x better rank in comparison to corresponding WT peptide | Generator rate |
+| GeneratorRate_MHCII | sum of `GeneratorRate_CDN_MHCII` and `GeneratorRate_ADN_MHCII` | Generator rate |
+| Tcell_predictor | output score of T cell predictor model | Tcell predictor |
+| ImprovedBinder_MHCI | ratio of `NetMHCpan_MHCI_rank_bestRankWT` and `NetMHCpan_MHCI_rank_bestRank` > 1.2 | self-similarity |
+| Selfsimilarity_MHCI_conserved_binder | score representing the similarity between `NetMHCpan_bestRank_peptide` and `NetMHCpan_bestRank_peptideWT` For conservered binder only | self-similarity |
+| Selfsimilarity_MHCI | score representing the similarity between `NetMHCpan_bestRank_peptide` and `NetMHCpan_bestRank_peptide` | self-similarity |
+| Selfsimilarity_MHCII | score representing the similarity between `NetMHCIIpan_bestAffinity_peptide` and `NetMHCIIpan_bestAffinity_peptide` | self-similarity |
+| Number_of_mismatches_MCHI | number of amino acids that do no match between `NetMHCpan_bestRank_peptide` and `NetMHCpan_bestRank_peptideWT` | Priority score |
+| Priority_score_fromDNA | combinatorial score of several features such as MHC binding, transcription expression and VAF in DNA | Priority score |
+| Priority_score_fromRNA | combinatorial score of several features such as MHC binding, transcription expression and VAF in RNA | Priority score |
+| Priority_score_imputed_fromDNA | combinatorial score of several features such as MHC binding, imputed gene expression and VAF in DNA | Priority score |
+| Priority_score_imputed_fromRNA | combinatorial score of several features such as MHC binding, imputed gene expression and VAF in RNA | Priority score |
+| IEDB_Immunogenicity_MHCI | IEDB Immunogenicity score for `NetMHCpan_bestAffinity_peptide` | IEDB Immunogenicity |
+| IEDB_Immunogenicity_MHCII | IEDB Immunogenicity score for `NetMHCIIpan_bestAffinity_peptide` | IEDB Immunogenicity |
+| MixMHCpred_bestScore_peptide | MHC class I neoepitope candidate sequence with maximum MixMHCpred score over all neoepitope canidates (8-11mers) and MHC I alleles | MixMHCpred |
+| MixMHCpred_bestScore_score | maximum MixMHCpred score over all neoepitope canidates (8-11mers) and MHC I alleles | MixMHCpred |
+| MixMHCpred_bestScore_rank | rank that corresponds to `MixMHCpred_bestScore_score` | MixMHCpred |
+| MixMHCpred_bestScore_allele | the allele with maximum MixMHCpred score | MixMHCpred |
+| MixMHC2pred_bestRank_peptide | MHC class II neoepitope candidate sequence with minimal MixMHC2pred score over all neoepitope canidates (13-18mers) and MHC II alleles | MixMHC2pred |
+| MixMHC2pred_bestRank_rank | minimal MixMHC2pred score over all neoepitope canidates (13-18mers) and MHC II alleles | MixMHC2pred |
+| MixMHC2pred_bestRank_allele | the MHC II isoform with minimum MixMHC2pred rank score | MixMHC2pred |
+| Dissimilarity_MHCI | score reflecting the dissimilarity of `NetMHCpan_bestAffinity_peptide` to the self-proteome | dissimilarity |
+| Dissimilarity_MHCII | score reflecting the dissimilarity of `NetMHCIIpan_bestAffinity_peptide` to the self-proteome | dissimilarity |
+| Vaxrank_bindingScore | total binding score of vaxrank | vaxrank |
+| Vaxrank_totalScore | product of total binding score and transcription expression score. Originally, the root of the number of reads supporting the mutation are used in the original implementation. To simplify, the transcript expression normalised to VAF is used. | vaxrank |
+| Vaxrank_totalScore_imputed | product of total binding score and imputed gene expression score. Originally, the root of the number of reads supporting the mutation are used in the original implementation. To simplify, the imputed gene expression normalised to VAF is used. | vaxrank |
+| PRIME_bestScore_allele | best predicted MHC allele by PRIME model | PRIME |
+| PRIME_bestScore_peptide | best predicted neoepitope candidate by PRIME model | PRIME |
+| PRIME_bestScore_rank | output rank score of PRIME model | PRIME |
+| PRIME_bestScore_score | output score of PRIME model | PRIME |
+| HexAlignmentScore_MHCI | the alignment score by HEX for `NetMHCpan_bestAffinity_peptide` | HEX |
+| HexAlignmentScore_MHCII | the alignment score by HEX for `NetMHCIIpan_bestAffinity_peptide` | HEX |
In addition, all logging output is appended to a log file with the suffix
@@ -117,10 +138,10 @@ were provided in the input table, these external annotations.
This is a dummy example:
-| dnaVariantAlleleFrequency | gene | imputedGeneExpression | mutatedXmer | position | wildTypeXmer | patientIdentifier | rnaExpression | rnaVariantAlleleFrequency | +-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) | ADN_MHCI | ADN_MHCII | Amplitude_MHCII_rank | Amplitude_MHCI_affinity | Amplitude_MHCI_affinity_9mer | Best_affinity_MHCII_allele | Best_affinity_MHCII_allele_WT | Best_affinity_MHCII_epitope | Best_affinity_MHCII_epitope_WT | Best_affinity_MHCII_score | Best_affinity_MHCII_score_WT | Best_affinity_MHCI_9mer_allele | Best_affinity_MHCI_9mer_allele_WT | Best_affinity_MHCI_9mer_anchor_mutated | Best_affinity_MHCI_9mer_epitope | Best_affinity_MHCI_9mer_epitope_WT | Best_affinity_MHCI_9mer_position_mutation | Best_affinity_MHCI_9mer_score | Best_affinity_MHCI_9mer_score_WT | Best_affinity_MHCI_allele | Best_affinity_MHCI_allele_WT | Best_affinity_MHCI_epitope | Best_affinity_MHCI_epitope_WT | Best_affinity_MHCI_score | Best_affinity_MHCI_score_WT | Best_rank_MHCII_score | Best_rank_MHCII_score_WT | Best_rank_MHCII_score_allele | Best_rank_MHCII_score_allele_WT | Best_rank_MHCII_score_epitope | Best_rank_MHCII_score_epitope_WT | Best_rank_MHCI_9mer_allele | Best_rank_MHCI_9mer_allele_WT | Best_rank_MHCI_9mer_epitope | Best_rank_MHCI_9mer_epitope_WT | Best_rank_MHCI_9mer_score | Best_rank_MHCI_9mer_score_WT | Best_rank_MHCI_score | Best_rank_MHCI_score_WT | Best_rank_MHCI_score_allele | Best_rank_MHCI_score_allele_WT | Best_rank_MHCI_score_epitope | Best_rank_MHCI_score_epitope_WT | CDN_MHCI | CDN_MHCII | DAI_MHCI_affinity_cutoff500nM | Dissimilarity_MHCI_cutoff500nM | Expression_mutated_transcript | Generator_rate | IEDB_Immunogenicity_MHCI_cutoff500nM | Improved_Binder_MHCI | MixMHC2pred_best_allele | MixMHC2pred_best_peptide | MixMHC2pred_best_rank | MixMHCpred_best_allele | MixMHCpred_best_peptide | MixMHCpred_best_rank | MixMHCpred_best_score | Neoag_immunogenicity | Number_of_mismatches_MCHI | PHBR-I | PHBR-II | Pathogensimiliarity_MHCI_affinity_9mer | Priority_score | Recognition_Potential_MHCI_affinity_9mer | Selfsimilarity_MHCI_conserved_binder | Tcell_predictor_score_cutoff500nM | VAF_in_RNA | VAF_in_tumor | [WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) | mutation_not_found_in_proteome | patient | substitution | transcript_expression | vaxrank_binding_score | vaxrank_total_score |
-|---------------------------|-------|------------------------|-----------------------------|-----------|-----------------------------|-------------------|---------------|---------------------------|----------------------------------------|----------|-----------|----------------------|-------------------------|------------------------------|----------------------------|-------------------------------|-----------------------------|--------------------------------|---------------------------|------------------------------|--------------------------------|-----------------------------------|----------------------------------------|---------------------------------|------------------------------------|-------------------------------------------|-------------------------------|----------------------------------|---------------------------|------------------------------|----------------------------|-------------------------------|--------------------------|-----------------------------|-----------------------|--------------------------|------------------------------|---------------------------------|-------------------------------|----------------------------------|----------------------------|-------------------------------|-----------------------------|--------------------------------|---------------------------|------------------------------|----------------------|-------------------------|-----------------------------|--------------------------------|------------------------------|---------------------------------|----------|-----------|-------------------------------|--------------------------------|-------------------------------|----------------|--------------------------------------|----------------------|-------------------------|--------------------------|-----------------------|------------------------|-------------------------|----------------------|-----------------------|----------------------|---------------------------|---------|---------|----------------------------------------|----------------|------------------------------------------|--------------------------------------|-----------------------------------|------------|--------------|---------------------------------------------|--------------------------------|---------|--------------|-----------------------|-----------------------|---------------------|
- | 0.294 | BRCA2 | 0.5 | AAAAAAAAAAAAAFAAAAAAAAAAAAA | 14 | AAAAAAAAAAAAALAAAAAAAAAAAAA | Ptx | 0.51950689 | 0.857 | AAAAAAAAAAAAAFAAAAAAAAAAAAA | 0 | 1 | 28 | 0.88723 | 0.88723 | HLA-DQA10401-DQB10402 | HLA-DQA10401-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | 251.77 | 513.02 | HLA-C*16:01 | HLA-C*16:01 | 1 | AAAAAAAAF | AAAAAAAAL | 9 | 24.3 | 21.7 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 24.3 | 21.7 | 0.05 | 1.4 | HLA-DQA10301-DQB10402 | HLA-DQA10301-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 0.0592 | 0.0493 | 0.0592 | 0.0493 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 1 | 1 | -2.6 | 1 | 0.44522 | 1 | 0.18288 | 0 | DPA1_01_03__DPB1_04_01 | AAAAFAAAAAAAAAAA | 0.997 | B0702 | AAAAAAAAF | 0.1 | 0.50487 | 13.16998 | 1 | 0.31193 | 0.21892 | 0 | 0.07017 | 0 | 0.99178271 | 0.40327581 | 0.857 | 0.294 | AAAAAAAAAAAAALAAAAAAAAAAAAA | 1 | Ptx | I547T | 0.51950689 | 3.7689 | 1.678 |
-| 0.173 | BRCA2 | 0.5 | AAAAAAAAAAAAAMAAAAAAAAAAAAA | 14 | AAAAAAAAAAAAARAAAAAAAAAAAAA | Ptx | 0.71575659 | 0.556 | AAAAAAAAAAAAAMAAAAAAAAAAAAA | 1 | 1 | 10 | 90.685 | 90.685 | HLA-DQA10401-DQB10402 | HLA-DQA10401-DQB10402 | AAAAAAAAAMAAAAA | AAAAAAAAARAAAAA | 421.53 | 554.92 | HLA-C*16:01 | HLA-C*16:01 | 1 | AAAAAAAAM | AAAAAAAAR | 9 | 24.1 | 6346.9 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 24.1 | 6346.9 | 0.25 | 2.5 | HLA-DQA10401-DQB10302 | HLA-DQA10401-DQB10302 | AAAAAAAAAAMAAAA | AAAAAAAAAARAAAA | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 0.0587 | 8.9317 | 0.0587 | 8.9317 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 1 | 1 | 6322.8 | 1 | 0.39796 | 1 | 0.18288 | 1 | DPA1_01_03__DPB1_04_01 | AAAAMAAAAAAAAAAA | 2.44 | B0702 | AAAAAAAAM | 0.07 | 0.5444 | 39.51379 | 1 | 0.29303 | 1.5594 | 0 | 0.10626 | 0 | NA | 0.46452844 | 0.556 | 0.173 | AAAAAAAAAAAAARAAAAAAAAAAAAA | 1 | Ptx | E135S | 0.71575659 | 3.8741 | 1.5417 |
+| dnaVariantAlleleFrequency | gene | imputedGeneExpression | mutatedXmer | position | wildTypeXmer | patientIdentifier | rnaExpression | rnaVariantAlleleFrequency | +-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) | ADN_MHCI | ADN_MHCII | Amplitude_MHCII_rank | Amplitude_MHCI_affinity | Amplitude_MHCI_affinity_9mer | Best_affinity_MHCII_allele | Best_affinity_MHCII_allele_WT | Best_affinity_MHCII_epitope | Best_affinity_MHCII_epitope_WT | Best_affinity_MHCII_score | Best_affinity_MHCII_score_WT | Best_affinity_MHCI_9mer_allele | Best_affinity_MHCI_9mer_allele_WT | Best_affinity_MHCI_9mer_anchor_mutated | Best_affinity_MHCI_9mer_epitope | Best_affinity_MHCI_9mer_epitope_WT | Best_affinity_MHCI_9mer_position_mutation | Best_affinity_MHCI_9mer_score | Best_affinity_MHCI_9mer_score_WT | Best_affinity_MHCI_allele | Best_affinity_MHCI_allele_WT | Best_affinity_MHCI_epitope | Best_affinity_MHCI_epitope_WT | Best_affinity_MHCI_score | Best_affinity_MHCI_score_WT | Best_rank_MHCII_score | Best_rank_MHCII_score_WT | Best_rank_MHCII_score_allele | Best_rank_MHCII_score_allele_WT | Best_rank_MHCII_score_epitope | Best_rank_MHCII_score_epitope_WT | Best_rank_MHCI_9mer_allele | Best_rank_MHCI_9mer_allele_WT | Best_rank_MHCI_9mer_epitope | Best_rank_MHCI_9mer_epitope_WT | Best_rank_MHCI_9mer_score | Best_rank_MHCI_9mer_score_WT | Best_rank_MHCI_score | Best_rank_MHCI_score_WT | Best_rank_MHCI_score_allele | Best_rank_MHCI_score_allele_WT | Best_rank_MHCI_score_epitope | Best_rank_MHCI_score_epitope_WT | CDN_MHCI | CDN_MHCII | DAI_MHCI_affinity_cutoff500nM | Dissimilarity_MHCI_cutoff500nM | Expression_mutated_transcript | Generator_rate | IEDB_Immunogenicity_MHCI_cutoff500nM | Improved_Binder_MHCI | MixMHC2pred_best_allele | MixMHC2pred_best_peptide | MixMHC2pred_best_rank | MixMHCpred_best_allele | MixMHCpred_best_peptide | MixMHCpred_best_rank | MixMHCpred_best_score | Number_of_mismatches_MCHI | PHBR-I | PHBR-II | Pathogensimiliarity_MHCI_affinity_9mer | Priority_score | Recognition_Potential_MHCI_affinity_9mer | Selfsimilarity_MHCI_conserved_binder | Tcell_predictor_score_cutoff500nM | VAF_in_RNA | VAF_in_tumor | [WT]_+-13_AA_(SNV)_/_-15_AA_to_STOP_(INDEL) | mutation_not_found_in_proteome | patient | substitution | transcript_expression | vaxrank_binding_score | vaxrank_total_score |
+|---------------------------|-------|------------------------|-----------------------------|-----------|-----------------------------|-------------------|---------------|---------------------------|----------------------------------------|----------|-----------|----------------------|-------------------------|------------------------------|----------------------------|-------------------------------|-----------------------------|--------------------------------|---------------------------|------------------------------|--------------------------------|-----------------------------------|----------------------------------------|---------------------------------|------------------------------------|-------------------------------------------|-------------------------------|----------------------------------|---------------------------|------------------------------|----------------------------|-------------------------------|--------------------------|-----------------------------|-----------------------|--------------------------|------------------------------|---------------------------------|-------------------------------|----------------------------------|----------------------------|-------------------------------|-----------------------------|--------------------------------|---------------------------|------------------------------|----------------------|-------------------------|-----------------------------|--------------------------------|------------------------------|---------------------------------|----------|-----------|-------------------------------|--------------------------------|-------------------------------|----------------|--------------------------------------|----------------------|-------------------------|--------------------------|-----------------------|------------------------|-------------------------|----------------------|-----------------------|---------------------------|---------|---------|----------------------------------------|----------------|------------------------------------------|--------------------------------------|-----------------------------------|------------|--------------|---------------------------------------------|--------------------------------|---------|--------------|-----------------------|-----------------------|---------------------|
+ | 0.294 | BRCA2 | 0.5 | AAAAAAAAAAAAAFAAAAAAAAAAAAA | 14 | AAAAAAAAAAAAALAAAAAAAAAAAAA | Ptx | 0.51950689 | 0.857 | AAAAAAAAAAAAAFAAAAAAAAAAAAA | 0 | 1 | 28 | 0.88723 | 0.88723 | HLA-DQA10401-DQB10402 | HLA-DQA10401-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | 251.77 | 513.02 | HLA-C*16:01 | HLA-C*16:01 | 1 | AAAAAAAAF | AAAAAAAAL | 9 | 24.3 | 21.7 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 24.3 | 21.7 | 0.05 | 1.4 | HLA-DQA10301-DQB10402 | HLA-DQA10301-DQB10402 | AAAAFAAAAAAAAAA | AAAALAAAAAAAAAA | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 0.0592 | 0.0493 | 0.0592 | 0.0493 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAF | AAAAAAAAL | 1 | 1 | -2.6 | 1 | 0.44522 | 1 | 0.18288 | 0 | DPA1_01_03__DPB1_04_01 | AAAAFAAAAAAAAAAA | 0.997 | B0702 | AAAAAAAAF | 0.1 | 0.50487 | 1 | 0.31193 | 0.21892 | 0 | 0.07017 | 0 | 0.99178271 | 0.40327581 | 0.857 | 0.294 | AAAAAAAAAAAAALAAAAAAAAAAAAA | 1 | Ptx | I547T | 0.51950689 | 3.7689 | 1.678 |
+| 0.173 | BRCA2 | 0.5 | AAAAAAAAAAAAAMAAAAAAAAAAAAA | 14 | AAAAAAAAAAAAARAAAAAAAAAAAAA | Ptx | 0.71575659 | 0.556 | AAAAAAAAAAAAAMAAAAAAAAAAAAA | 1 | 1 | 10 | 90.685 | 90.685 | HLA-DQA10401-DQB10402 | HLA-DQA10401-DQB10402 | AAAAAAAAAMAAAAA | AAAAAAAAARAAAAA | 421.53 | 554.92 | HLA-C*16:01 | HLA-C*16:01 | 1 | AAAAAAAAM | AAAAAAAAR | 9 | 24.1 | 6346.9 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 24.1 | 6346.9 | 0.25 | 2.5 | HLA-DQA10401-DQB10302 | HLA-DQA10401-DQB10302 | AAAAAAAAAAMAAAA | AAAAAAAAAARAAAA | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 0.0587 | 8.9317 | 0.0587 | 8.9317 | HLA-C*16:01 | HLA-C*16:01 | AAAAAAAAM | AAAAAAAAR | 1 | 1 | 6322.8 | 1 | 0.39796 | 1 | 0.18288 | 1 | DPA1_01_03__DPB1_04_01 | AAAAMAAAAAAAAAAA | 2.44 | B0702 | AAAAAAAAM | 0.07 | 0.5444 | 1 | 0.29303 | 1.5594 | 0 | 0.10626 | 0 | NA | 0.46452844 | 0.556 | 0.173 | AAAAAAAAAAAAARAAAAAAAAAAAAA | 1 | Ptx | E135S | 0.71575659 | 3.8741 | 1.5417 |
### JSON format
@@ -264,7 +285,7 @@ the MHC-II neoepitope candidates.
Two output files with the suffix "*_mhcI_epitope_candidates_annotated.tsv" and ""*_mhcII_epitope_candidates_annotated.tsv"" are created.
-The following table describes each of the annotations in the output:
+The following table describes each of the annotations in the output. MHC-I or MHC-II specific features will be only available in the respective table:
**TABLE 2**
@@ -273,32 +294,45 @@ The following table describes each of the annotations in the output:
| dnaVariantAlleleFrequency | the variant allele frequency calculated from the DNA | - |
| mutatedSequence | the mutated amino acid sequence | - |
| wildTypeSequence | the non-mutated amino acid sequence (when not provided in the input this will contain the Blastp closest sequence in the proteome) | - |
+| core | | MHC core part of the peptide ligand that primarily interacts with the MHC binding groove | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan
| alleleMhcI / isoformMhcII | either the MHC-I allele for MHC-I neoepitopes or the MHC-II isoform for MHC-II neoepitopes | - |
| patientIdentifier | the patient identifier (optional) | - |
| rnaExpression | the RNA expression. If expression was imputed, this will will be `imputedGeneExpression` | expression |
| imputedGeneExpression | median gene expression in the TCGA cohort of the tumor entity provided in the patient file. | expression |
| rnaVariantAlleleFrequency | the variant allele frequency calculated from the RNA | - |
| gene | the HGNC gene symbol | - |
+| Mutated_rnaExpression_fromRNA | transcript expression normalized by the variant allele frequency in RNA of the mutation | expression |
+| Mutated_rnaExpression_fromDNA | transcript expression normalized by the variant allele frequency in DNA of the mutation | |
+| Mutated_imputedGeneExpression_fromRNA | imputeted gene expression normalized by the variant allele frequency in RNA of the mutation | expression |
+| Mutated_imputedGeneExpression_fromDNA | imputed gene expression normalized by the variant allele frequency in RNA of the mutation | |
| affinityMutated | NetMHCpan / NetMHCIIpan affinity score of the mutated peptide for MHC-I or MHC-II neoepitopes respectively | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan |
| affinityWildType | NetMHCpan / NetMHCIIpan affinity score of the wild type peptide for MHC-I or MHC-II neoepitopes respectively | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan |
| rankMutated | NetMHCpan / NetMHCIIpan rank of the mutated peptide for MHC-I or MHC-II neoepitopes respectively | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan |
| rankWildType | NetMHCpan / NetMHCIIpan rank of the wild type peptide for MHC-I or MHC-II neoepitopes respectively | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan |
-| MixMHCpred_score | MixMHCpred score of the mutated peptide for MHC-I neoepitopes | MHC-I binding with mixMHCpred |
+| MixMHCpred_score | MixMHCpred score of the mutated peptide for MHC-I neoepitopes | MHC-I binding with mixMHCpred |
| MixMHCpred_rank | MixMHCpred rank of the mutated peptide for MHC-I neoepitopes | MHC-I binding with mixMHCpred |
-| MixMHCpred_WT_score | MixMHCpred score of the wild type peptide for MHC-I neoepitopes | MHC-I binding with mixMHCpred |
+| MixMHCpred_WT_score | MixMHCpred score of the wild type peptide for MHC-I neoepitopes | MHC-I binding with mixMHCpred |
| MixMHCpred_WT_rank | MixMHCpred rank of the wild type peptide for MHC-I neoepitopes | MHC-I binding with mixMHCpred |
-| MixMHC2pred_score | MixMHC2pred score of the mutated peptide for MHC-II neoepitopes | MHC-II binding with mixMHC2pred |
+| MixMHC2pred_score | MixMHC2pred score of the mutated peptide for MHC-II neoepitopes | MHC-II binding with mixMHC2pred |
| MixMHC2pred_rank | MixMHC2pred rank of the mutated peptide for MHC-II neoepitopes | MHC-II binding with mixMHC2pred |
-| MixMHC2pred_WT_score | MixMHC2pred score of the wild type peptide for MHC-II neoepitopes | MHC-II binding with mixMHC2pred |
+| MixMHC2pred_WT_score | MixMHC2pred score of the wild type peptide for MHC-II neoepitopes | MHC-II binding with mixMHC2pred |
| MixMHC2pred_WT_rank | MixMHC2pred rank of the wild type peptide for MHC-II neoepitopes | MHC-II binding with mixMHC2pred |
-| PRIME_score | PRIME score of the mutated peptide for MHC-I neoepitopes | MHC-I binding with PRIME |
+| PRIME_score | PRIME score of the mutated peptide for MHC-I neoepitopes | MHC-I binding with PRIME |
| PRIME_rank | PRIME rank of the mutated peptide for MHC-I neoepitopes | MHC-I binding with PRIME |
-| PRIME_WT_score | PRIME score of the wild type peptide for MHC-I neoepitopes | MHC-I binding with PRIME |
+| PRIME_WT_score | PRIME score of the wild type peptide for MHC-I neoepitopes | MHC-I binding with PRIME |
| PRIME_WT_rank | PRIME rank of the wild type peptide for MHC-I neoepitopes | MHC-I binding with PRIME |
| DAI | difference of `affinityWildType` and `affinityMutated` | DAI (only availble for MHC-I) |
+| Gl | Length of the deletion (in the core), if any. | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan
+| Gp | Position of the deletion (in the core), if any. | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan
+| Icore | Interaction core. This is the sequence of the binding core including eventual insertions of deletions. | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan
+| Of | The starting position of the Core within the predicted peptide | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan
+| Core_Rel | Reliability of the (MHCII) binding core, expressed as the fraction of networks in the ensemble selecting the optimal core | MHC-I / MHC-II binding with netMHCpan / netMHCIIpan
| IEDB_Immunogenicity | IEDB Immunogenicity score for `affinityMutated` | IEDB immunogenicity |
| Improved_Binder_MHCI | ratio of `affinityWildType` and `affinityMutated` > 1.2 | self-similarity (only available for MHC-I) |
-| Priority_score | combinatorial score of several features such as MHC binding, expression and VAF | Priority score |
+| Priority_score_fromDNA | combinatorial score of several features such as MHC binding, transcription expression and VAF in DNA | Priority score |
+| Priority_score_fromRNA | combinatorial score of several features such as MHC binding, transcription expression and VAF in RNA | Priority score |
+| Priority_score_imputed_fromDNA | combinatorial score of several features such as MHC binding, imputed gene expression and VAF in DNA | Priority score |
+| Priority_score_imputed_fromRNA | combinatorial score of several features such as MHC binding, imputed gene expression and VAF in RNA | Priority score |
| mutation_not_found_in_proteome | indicates if mutated amino acid sequence was not found in the WT proteome by exact search | Priority score |
| Selfsimilarity | score representing the similarity between `rankMutated` and `rankWildType` | self-similarity |
| Selfsimilarity_conserved_binder | score representing the similarity between `rankMutated` and `rankWildType` for conserved binder only | self-similarity (only available for MHC-I) |
diff --git a/docs/source/03_03_usage.md b/docs/source/03_03_usage.md
index 2604c7e8..6f4a4a4a 100755
--- a/docs/source/03_03_usage.md
+++ b/docs/source/03_03_usage.md
@@ -38,7 +38,7 @@ where:
- `--patient-id`: patient identifier (*optional*, this is only relevant if the column `patientIdentifier` is missing in the candidate input file)
**PLEASE NOTE THE FOLLOWING HINTS**:
-- if all expression values related to a patient are NA or `rnaExpression` is not given in the input file but the tumor type has been provided in the patient file, imputated expression will be used for the relevant features
+- If a tumor type has been provided in the patient file, imputated gene expression from TCGA will be annoated and expression-dependent features will be determined with imputated gene expression aswell.
**EXAMPLE**
This is an example to call NeoFox with a candidate file and obtaining the annotated neoantigen candidates in [tabular](03_02_output_data.md#tabular-format) format:
@@ -54,12 +54,12 @@ The optional **config** file with the paths to the dependencies can look like th
````commandline
NEOFOX_REFERENCE_FOLDER=path/to/reference/folder
NEOFOX_RSCRIPT=`which Rscript`
-NEOFOX_BLASTP=path/to/ncbi-blast-2.10.1+/bin/blastp
-NEOFOX_NETMHCPAN=path/to/netMHCpan-4.1/netMHCpan
-NEOFOX_NETMHC2PAN=path/to/netMHCIIpan-4.0/netMHCIIpan
-NEOFOX_MIXMHCPRED=path/to/MixMHCpred-2.1/MixMHCpred
-NEOFOX_MIXMHC2PRED=path/to/MixMHC2pred-1.2/MixMHC2pred_unix
-NEOFOX_MAKEBLASTDB=path/to/ncbi-blast-2.8.1+/bin/makeblastdb
+NEOFOX_BLASTP=path/to/blast/bin/blastp
+NEOFOX_NETMHCPAN=path/to/netMHCpan/netMHCpan
+NEOFOX_NETMHC2PAN=path/to/netMHCIIpan/netMHCIIpan
+NEOFOX_MIXMHCPRED=path/to/MixMHCpred/MixMHCpred
+NEOFOX_MIXMHC2PRED=path/to/MixMHC2pred/MixMHC2pred_unix
+NEOFOX_MAKEBLASTDB=path/to/ncbi-blast/bin/makeblastdb
NEOFOX_PRIME=/path/to/PRIME/PRIME
````
@@ -89,6 +89,9 @@ where:
## Running from docker
+**NOTE: The provided docker recipe is not adapted to Neofox-v1.1.0. Please, use a previous version at the moment if running from docker is required.
+The docker recipe will be updated soon.**
+
In order to run the command line in a docker image, all of the above applies but
some additional steps are required.
diff --git a/docs/source/04_developer_guide.md b/docs/source/04_developer_guide.md
index 14e57288..baa65671 100644
--- a/docs/source/04_developer_guide.md
+++ b/docs/source/04_developer_guide.md
@@ -61,4 +61,4 @@ The models and the required scripts are in the folder `neofox/models`
Build the models into Python code with `make models`.
-Build the HTML documentation with `make html`.
\ No newline at end of file
+Build the HTML documentation with `make html` (this requires docker).
diff --git a/docs/source/05_models.md b/docs/source/05_models.md
index 5d475081..34bbce26 100644
--- a/docs/source/05_models.md
+++ b/docs/source/05_models.md
@@ -217,6 +217,7 @@ The metadata required for analysis for a given patient + its patient identifier
| wildTypePeptide | [string](#string) | | Closest wild type peptide |
| alleleMhcI | [MhcAllele](#neoantigen.MhcAllele) | | MHC I allele |
| isoformMhcII | [Mhc2Isoform](#neoantigen.Mhc2Isoform) | | MHC II isoform |
+| core | [string](#string) | | MHCII core part of the peptide ligand that primarily interacts with the MHC binding groove, predicted by NetMHCpan/NetMHCIIpan |
| affinityMutated | [float](#float) | | MHC binding affinity for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides |
| rankMutated | [float](#float) | | MHC binding rank for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides |
| affinityWildType | [float](#float) | | MHC binding affinity for the wild type peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides |
@@ -228,6 +229,7 @@ The metadata required for analysis for a given patient + its patient identifier
| imputedGeneExpression | [float](#float) | | Expression value of the transcript from TCGA data. Range [0, +inf]. |
| dnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the DNA. Range [0.0, 1.0] |
| rnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the RNA. Range [0.0, 1.0] |
+| externalAnnotations | [Annotation](#neoantigen.Annotation) | repeated | External annotations for neoepitope mode. |
diff --git a/gpl-v3.tmpl b/gpl-v3.tmpl
old mode 100644
new mode 100755
diff --git a/neofox/MHC_predictors/MixMHCpred/mixmhc2pred.py b/neofox/MHC_predictors/MixMHCpred/mixmhc2pred.py
index 8f17985a..dbcca511 100755
--- a/neofox/MHC_predictors/MixMHCpred/mixmhc2pred.py
+++ b/neofox/MHC_predictors/MixMHCpred/mixmhc2pred.py
@@ -23,12 +23,15 @@
from neofox.helpers.epitope_helper import EpitopeHelper
from neofox.model.mhc_parser import MhcParser, get_alleles_by_gene
-from neofox.references.references import DependenciesConfiguration
+from neofox.references.references import DependenciesConfiguration, MhcDatabase, \
+ ReferenceFolder, ORGANISM_HOMO_SAPIENS, \
+ ORGANISM_MUS_MUSCULUS
from neofox.helpers.runner import Runner
from neofox.model.neoantigen import Annotation, Mhc2, Mhc2GeneName, MhcAllele, PredictedEpitope, Mhc2Isoform, \
Neoantigen
+
from neofox.model.factories import AnnotationFactory
from neofox.helpers import intermediate_files
import pandas as pd
@@ -45,24 +48,37 @@ class MixMHC2pred:
ANNOTATION_PREFIX = 'MixMHC2pred'
ANNOTATION_PREFIX_WT = 'MixMHC2pred_WT'
- def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc_parser: MhcParser):
+ def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc_parser: MhcParser,
+ references: ReferenceFolder):
self.runner = runner
self.configuration = configuration
- self.available_alleles = self._load_available_alleles()
self.mhc_parser = mhc_parser
+ self.references = references
+ self.organism = references.organism
+ self.available_alleles = self._load_available_alleles()
self.results = None
def _load_available_alleles(self):
"""
- loads file with available HLA II alllels for MixMHC2pred prediction, returns set
+ loads file with available HLA II allels for MixMHC2pred prediction, returns set
:return:
"""
- alleles = pd.read_csv(
- self.configuration.mix_mhc2_pred_alleles_list, skiprows=1, sep="\t"
- )
+ if self.organism == ORGANISM_HOMO_SAPIENS:
+ alleles = pd.read_csv(
+ self.configuration.mix_mhc2_pred_human_alleles_list, skiprows=2, sep="\t"
+ )
+ elif self.organism == ORGANISM_MUS_MUSCULUS:
+ if self.references.mixmhc2pred_alleles_list is not None:
+ alleles = pd.read_csv(
+ self.references.mixmhc2pred_alleles_list, skiprows=2, sep="\t"
+ )
+ else:
+ logger.error("The PWMdef for Mouse was not downloaded.")
+
return list(alleles["AlleleName"])
+
@staticmethod
def _combine_dq_dp_alleles(alpha_alleles: List[str], beta_alleles: List[str]):
"""returns patient HLA-DQ/HLA-DP allele combination that are relevant for MixMHC2pred"""
@@ -84,7 +100,8 @@ def _combine_dq_dp_alleles(alpha_alleles: List[str], beta_alleles: List[str]):
return alleles_pairs + alleles_triplets
@staticmethod
- def _get_mixmhc2_allele_representation(hla_alleles: List[MhcAllele]):
+ def _get_mixmhc2_allele_human_representation(hla_alleles: List[MhcAllele]):
+ # alleles: hla_alleles
return list(
map(
lambda x: "{gene}_{group}_{protein}".format(
@@ -95,12 +112,12 @@ def _get_mixmhc2_allele_representation(hla_alleles: List[MhcAllele]):
)
@staticmethod
- def _get_mixmhc2_isoform_representation(isoform: Mhc2Isoform):
+ def _get_mixmhc2_isoform_human_representation(isoform: Mhc2Isoform):
- beta_chain = MixMHC2pred._get_mixmhc2_allele_representation([isoform.beta_chain])[0]
+ beta_chain = MixMHC2pred._get_mixmhc2_allele_human_representation([isoform.beta_chain])[0]
if isoform.alpha_chain is not None and isoform.alpha_chain.name:
# for DR only beta chain is provided
- alpha_chain = MixMHC2pred._get_mixmhc2_allele_representation([isoform.alpha_chain])[0]
+ alpha_chain = MixMHC2pred._get_mixmhc2_allele_human_representation([isoform.alpha_chain])[0]
return "{alpha}__{beta}".format(alpha=alpha_chain, beta=beta_chain)
return beta_chain
@@ -115,22 +132,49 @@ def transform_hla_ii_alleles_for_prediction(self, mhc: List[Mhc2]) -> List[str]:
dqb1_alleles = get_alleles_by_gene(mhc, Mhc2GeneName.DQB1)
dp_allele_combinations = self._combine_dq_dp_alleles(
- alpha_alleles=self._get_mixmhc2_allele_representation(dpa1_alleles),
- beta_alleles=self._get_mixmhc2_allele_representation(dpb1_alleles)
+ alpha_alleles=self._get_mixmhc2_allele_human_representation(dpa1_alleles),
+ beta_alleles=self._get_mixmhc2_allele_human_representation(dpb1_alleles)
)
dq_allele_combinations = self._combine_dq_dp_alleles(
- alpha_alleles=self._get_mixmhc2_allele_representation(dqa1_alleles),
- beta_alleles=self._get_mixmhc2_allele_representation(dqb1_alleles)
+ alpha_alleles=self._get_mixmhc2_allele_human_representation(dqa1_alleles),
+ beta_alleles=self._get_mixmhc2_allele_human_representation(dqb1_alleles)
)
return [
a
- for a in self._get_mixmhc2_allele_representation(drb1_alleles)
+ for a in self._get_mixmhc2_allele_human_representation(drb1_alleles)
+ dq_allele_combinations
+ dp_allele_combinations
if a in self.available_alleles
]
+ @staticmethod
+ def _get_mixmhc2_allele_mouse_representation(h2_alleles: List[MhcAllele]):
+ return list(
+ map(
+ lambda x: "H2_{gene}a_{protein}__H2_{gene}b_{protein}".format(
+ gene=x.gene[-1], protein=x.protein
+ ),
+ h2_alleles,
+ )
+ )
+
+ def _get_mixmhc2_isoform_mouse_representation(isoform: Mhc2Isoform):
+ if isoform is not None:
+ return "H2_{gene}a_{protein}__H2_{gene}b_{protein}".format(gene=isoform[-3], protein=isoform[-1])
+
+ def transform_h2_alleles_for_prediction(self, mhc:List[Mhc2]) -> List[str]:
+ """
+ prepares list of H2 alleles for prediction in required format
+ """
+
+ h2a_alleles = get_alleles_by_gene(mhc, Mhc2GeneName.H2A)
+ h2e_alleles = get_alleles_by_gene(mhc, Mhc2GeneName.H2E)
+
+ return [
+ a for i in (h2a_alleles, h2e_alleles) for a in self._get_mixmhc2_allele_mouse_representation(i) if a in self.available_alleles
+ ]
+
def _parse_mixmhc2pred_output(self, filename: str) -> List[PredictedEpitope]:
parsed_results = []
@@ -154,44 +198,50 @@ def _parse_mixmhc2pred_output(self, filename: str) -> List[PredictedEpitope]:
return parsed_results
def _mixmhc2prediction(self, isoforms: List[str], potential_ligand_sequences: List[str]) -> List[PredictedEpitope]:
-
- tmpfasta = intermediate_files.create_temp_fasta(potential_ligand_sequences, prefix="tmp_sequence_")
+ tmptxt = intermediate_files.create_temp_mixmhc2pred(potential_ligand_sequences, prefix="tmp_sequence_")
outtmp = intermediate_files.create_temp_file(prefix="mixmhc2pred", suffix=".txt")
+
cmd = [
self.configuration.mix_mhc2_pred,
"-a",
" ".join(isoforms),
"-i",
- tmpfasta,
+ tmptxt,
"-o",
outtmp,
+ "--no_context"
]
+ if self.organism != ORGANISM_HOMO_SAPIENS:
+ pwm_dir = self.references.mixmhc2pred_pwm_dir
+ cmd.extend(["-f", pwm_dir])
+
self.runner.run_command(cmd)
results = self._parse_mixmhc2pred_output(filename=outtmp)
os.remove(outtmp)
- os.remove(tmpfasta)
+ os.remove(tmptxt)
return results
def run(self, mhc: List[Mhc2], neoantigen: Neoantigen, uniprot):
"""
Runs MixMHC2pred:
- prediction for peptides of length 13 to 18 based on Suppl Fig. 6 a in Racle, J., et al., Nat. Biotech. (2019).
- Robust prediction of HLA class II epitopes by deep motif deconvolution of immunopeptidomes.
+ prediction for peptides of length 12 to 21 based on Racle, J., et al., Nat. Biotech. (2023).
+ Machine learning predictions of MHC-II specificities reveal alternative binding mode of class II epitopes.
"""
# TODO: get rid of this
self.results = None
potential_ligand_sequences = EpitopeHelper.generate_nmers(
- neoantigen=neoantigen, lengths=[13, 14, 15, 16, 17, 18], uniprot=uniprot)
- # filter mps shorter < 13aa
- filtered_sequences = list(
- filter(lambda x: len(x) >= 13, potential_ligand_sequences)
- )
- if len(filtered_sequences) > 0:
- mhc2_alleles = self.transform_hla_ii_alleles_for_prediction(mhc)
+ neoantigen=neoantigen, lengths=[12, 13, 14, 15, 16, 17, 18, 19, 20, 21], uniprot=uniprot)
+
+ if len(potential_ligand_sequences) > 0:
+ if self.organism == ORGANISM_HOMO_SAPIENS:
+ mhc2_alleles = self.transform_hla_ii_alleles_for_prediction(mhc)
+ else:
+ mhc2_alleles = self.transform_h2_alleles_for_prediction(mhc)
+
if len(mhc2_alleles) > 0:
self.results = self._mixmhc2prediction(
- isoforms=mhc2_alleles, potential_ligand_sequences=filtered_sequences)
+ isoforms=mhc2_alleles, potential_ligand_sequences=potential_ligand_sequences)
else:
logger.warning("None of the MHC II alleles are supported by MixMHC2pred")
@@ -200,13 +250,18 @@ def run_peptide(self, peptide: str, isoform: Mhc2Isoform) -> PredictedEpitope:
Performs MixMHC2pred prediction for desired hla allele and writes result to temporary file.
"""
result = None
- isoform_representation = self._get_mixmhc2_isoform_representation(isoform)
+ if self.organism == ORGANISM_HOMO_SAPIENS:
+ isoform_representation = self._get_mixmhc2_isoform_human_representation(isoform)
+ else:
+ isoform_representation = self._get_mixmhc2_isoform_mouse_representation(isoform)
if isoform_representation in self.available_alleles:
results = self._mixmhc2prediction(
isoforms=[isoform_representation],
potential_ligand_sequences=[peptide])
if results:
result = results[0]
+ else:
+ logger.warning("%s is not available in the available alleles." % isoform_representation)
return result
def get_annotations(self) -> List[Annotation]:
diff --git a/neofox/MHC_predictors/MixMHCpred/mixmhcpred.py b/neofox/MHC_predictors/MixMHCpred/mixmhcpred.py
index 82c06603..e72aa830 100755
--- a/neofox/MHC_predictors/MixMHCpred/mixmhcpred.py
+++ b/neofox/MHC_predictors/MixMHCpred/mixmhcpred.py
@@ -32,9 +32,9 @@
from neofox.references.references import DependenciesConfiguration
ALLELE = "BestAllele"
-RANK = "%Rank_bestAllele"
+RANK = "%Rank_"
PEPTIDE = "Peptide"
-SCORE = "Score_bestAllele"
+SCORE = "Score_"
class MixMHCpred:
@@ -58,7 +58,7 @@ def _load_available_alleles(self):
alleles = pd.read_csv(
self.configuration.mix_mhc_pred_alleles_list, sep="\t"
)
- return list(alleles["Allele"])
+ return set(alleles["Allele"])
def _get_mixmhc_allele_representation(self, mhc_alleles: List[MhcAllele]):
return list(
@@ -70,8 +70,16 @@ def _get_mixmhc_allele_representation(self, mhc_alleles: List[MhcAllele]):
)
)
- def _parse_mixmhcpred_output(self, filename: str) -> List[PredictedEpitope]:
+ def _get_mhc_alleles(self, mixmhc_result):
+ mhc_alleles = set()
+ for col in mixmhc_result.columns:
+ # take out alleles and eliminate the column Score_bestAllele out of the set
+ if col.startswith(SCORE) and not col.endswith('e'):
+ allele = col.split('_')[-1]
+ mhc_alleles.add(allele)
+ return mhc_alleles
+ def _parse_mixmhcpred_output(self, filename: str) -> List[PredictedEpitope]:
parsed_results = []
try:
results = pd.read_csv(filename, sep="\t", comment="#")
@@ -79,17 +87,22 @@ def _parse_mixmhcpred_output(self, filename: str) -> List[PredictedEpitope]:
logger.error("Results from MixMHCpred are empty, something went wrong")
results = pd.DataFrame()
+ mhc_alleles = self._get_mhc_alleles(results)
for _, row in results.iterrows():
# when MixMHCpred returns no results it provides a row with the peptide and NAs for other fields
# pandas reads NAs as float nan. Skip these
- if isinstance(row[ALLELE], str):
- parsed_results.append(
- PredictedEpitope(
- allele_mhc_i=self.mhc_parser.parse_mhc_allele(row[ALLELE]),
- mutated_peptide=row[PEPTIDE],
- affinity_mutated=float(row[SCORE]),
- rank_mutated=float(row[RANK]),
- ))
+ for allele in mhc_alleles:
+ if isinstance(row[PEPTIDE], str):
+ score = str(SCORE + allele)
+ rank = str(RANK + allele)
+
+ parsed_results.append(
+ PredictedEpitope(
+ allele_mhc_i=self.mhc_parser.parse_mhc_allele(allele),
+ mutated_peptide=row[PEPTIDE],
+ affinity_mutated=float(row[score]),
+ rank_mutated=float(row[rank]),
+ ))
return parsed_results
def _mixmhcprediction(self, mhc_alleles: List[str], potential_ligand_sequences) -> List[PredictedEpitope]:
@@ -123,9 +136,8 @@ def run(self, neoantigen: Neoantigen, mhc: List[Mhc1], uniprot):
# TODO: get rid of this
self.results = None
- # TODO: we may want to extend this to 8 to 14 bp (coordinate this with netMHCpan)
potential_ligand_sequences = EpitopeHelper.generate_nmers(
- neoantigen=neoantigen, lengths=[8, 9, 10, 11], uniprot=uniprot
+ neoantigen=neoantigen, lengths=[8, 9, 10, 11, 12, 13, 14], uniprot=uniprot
)
if len(potential_ligand_sequences) > 0:
mhc1_alleles = self._get_mixmhc_allele_representation([a for m in mhc for a in m.alleles])
diff --git a/neofox/MHC_predictors/netmhcpan/combine_netmhcIIpan_pred_multiple_binders.py b/neofox/MHC_predictors/netmhcpan/combine_netmhcIIpan_pred_multiple_binders.py
index 19913722..0afc86ce 100755
--- a/neofox/MHC_predictors/netmhcpan/combine_netmhcIIpan_pred_multiple_binders.py
+++ b/neofox/MHC_predictors/netmhcpan/combine_netmhcIIpan_pred_multiple_binders.py
@@ -46,6 +46,12 @@ def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc
)
self._initialise()
+ @staticmethod
+ def _init_additional_netmhciipan_annotations() -> List[Annotation]:
+ of = AnnotationFactory.build_annotation(name="Of", value=None)
+ core_rel = AnnotationFactory.build_annotation(name="coreRel", value=None)
+ return [of, core_rel]
+
def _initialise(self):
self.phbr_ii = None
self.generator_rate = None
@@ -53,18 +59,33 @@ def _initialise(self):
self.generator_rate_cdn = None
self.best_predicted_epitope_rank = PredictedEpitope(
mutated_peptide=None,
+ wild_type_peptide=None,
+ core=None,
position=None,
isoform_mhc_i_i=Mhc2Isoform(name=None),
affinity_mutated=None,
rank_mutated=None,
)
+ # additional netmhcIIpan annotations are initialized empty to always have
+ # all output columns in the result table
+ self.best_predicted_epitope_rank.neofox_annotations.annotations.extend(
+ self._init_additional_netmhciipan_annotations()
+ )
+
self.best_predicted_epitope_affinity = PredictedEpitope(
mutated_peptide=None,
+ wild_type_peptide=None,
+ core=None,
position=None,
isoform_mhc_i_i=Mhc2Isoform(name=None),
affinity_mutated=None,
rank_mutated=None,
)
+ # additional netmhcIIpan annotations are initialized empty to always have
+ # all output columns in the result table
+ self.best_predicted_epitope_affinity.neofox_annotations.annotations.extend(
+ self._init_additional_netmhciipan_annotations()
+ )
self.predictions = []
def calculate_phbr_ii(self, best_epitope_per_allele_mhc2: List[PredictedEpitope]):
@@ -199,6 +220,10 @@ def get_annotations(self) -> List[Annotation]:
value=self.best_predicted_epitope_rank.isoform_mhc_i_i.name,
name="NetMHCIIpan_bestRank_allele",
),
+ AnnotationFactory.build_annotation(
+ value=self.best_predicted_epitope_rank.core,
+ name="NetMHCIIpan_bestRank_core",
+ ),
AnnotationFactory.build_annotation(
value=self.best_predicted_epitope_rank.rank_wild_type,
name="NetMHCIIpan_bestRank_rankWT",
@@ -208,6 +233,13 @@ def get_annotations(self) -> List[Annotation]:
name="NetMHCIIpan_bestRank_peptideWT",
),
])
+ # Additional annotations assigned to the epitopes are added to the output
+ for annotation in self.best_predicted_epitope_rank.neofox_annotations.annotations:
+ annotations.append(
+ AnnotationFactory.build_annotation(
+ value=annotation.value, name=f"NetMHCIIpan_bestRank_{annotation.name}"
+ ),
+ )
if self.best_predicted_epitope_affinity:
annotations.extend([
AnnotationFactory.build_annotation(
@@ -222,6 +254,10 @@ def get_annotations(self) -> List[Annotation]:
value=self.best_predicted_epitope_affinity.isoform_mhc_i_i.name,
name="NetMHCIIpan_bestAffinity_allele",
),
+ AnnotationFactory.build_annotation(
+ value=self.best_predicted_epitope_affinity.core,
+ name="NetMHCIIpan_bestAffinity_core",
+ ),
AnnotationFactory.build_annotation(
value=self.best_predicted_epitope_affinity.affinity_wild_type,
name="NetMHCIIpan_bestAffinity_affinityWT",
@@ -231,6 +267,13 @@ def get_annotations(self) -> List[Annotation]:
name="NetMHCIIpan_bestAffinity_peptideWT",
)
])
+ # Additional annotations assigned to the epitopes are added to the output
+ for annotation in self.best_predicted_epitope_affinity.neofox_annotations.annotations:
+ annotations.append(
+ AnnotationFactory.build_annotation(
+ value=annotation.value, name=f"NetMHCIIpan_bestAffinity_{annotation.name}"
+ ),
+ )
if self.organism == ORGANISM_HOMO_SAPIENS:
annotations.extend([AnnotationFactory.build_annotation(value=self.phbr_ii, name="PHBR_II")])
diff --git a/neofox/MHC_predictors/netmhcpan/combine_netmhcpan_pred_multiple_binders.py b/neofox/MHC_predictors/netmhcpan/combine_netmhcpan_pred_multiple_binders.py
index a6da0863..537d35c9 100755
--- a/neofox/MHC_predictors/netmhcpan/combine_netmhcpan_pred_multiple_binders.py
+++ b/neofox/MHC_predictors/netmhcpan/combine_netmhcpan_pred_multiple_binders.py
@@ -45,6 +45,14 @@ def __init__(
blastp_runner=self.blastp_runner
)
+ @staticmethod
+ def _init_additional_netmhcpan_annotations() -> List[Annotation]:
+ icore = AnnotationFactory.build_annotation(name="Icore", value=None)
+ of = AnnotationFactory.build_annotation(name="Of", value=None)
+ gp = AnnotationFactory.build_annotation(name="Gp", value=None)
+ gl = AnnotationFactory.build_annotation(name="Gl", value=None)
+ return [icore, of, gp, gl]
+
def _initialise(self):
self.phbr_i = None
self.generator_rate = None
@@ -53,9 +61,21 @@ def _initialise(self):
self.generator_rate_adn = None
self.generator_rate_cdn = None
self.best_epitope_by_rank = EpitopeHelper.get_empty_epitope()
+ self.best_epitope_by_rank.neofox_annotations.annotations.extend(
+ self._init_additional_netmhcpan_annotations()
+ )
self.best_epitope_by_affinity = EpitopeHelper.get_empty_epitope()
+ self.best_epitope_by_affinity.neofox_annotations.annotations.extend(
+ self._init_additional_netmhcpan_annotations()
+ )
self.best_ninemer_epitope_by_affinity = EpitopeHelper.get_empty_epitope()
+ self.best_ninemer_epitope_by_affinity.neofox_annotations.annotations.extend(
+ self._init_additional_netmhcpan_annotations()
+ )
self.best_ninemer_epitope_by_rank = EpitopeHelper.get_empty_epitope()
+ self.best_ninemer_epitope_by_rank.neofox_annotations.annotations.extend(
+ self._init_additional_netmhcpan_annotations()
+ )
self.predictions = []
def calculate_phbr_i(
@@ -190,12 +210,16 @@ def get_annotations(self) -> List[Annotation]:
if self.best_epitope_by_rank:
annotations.extend([
AnnotationFactory.build_annotation(
- value=self.best_epitope_by_rank.rank_mutated, name="NetMHCpan_MHCI_bestRank_rank"
+ value=self.best_epitope_by_rank.rank_mutated, name="NetMHCpan_bestRank_rank"
),
AnnotationFactory.build_annotation(
value=self.best_epitope_by_rank.mutated_peptide,
name="NetMHCpan_bestRank_peptide",
),
+ AnnotationFactory.build_annotation(
+ value=self.best_epitope_by_rank.core,
+ name="NetMHCpan_bestRank_core",
+ ),
AnnotationFactory.build_annotation(
value=self.best_epitope_by_rank.allele_mhc_i.name, name="NetMHCpan_bestRank_allele"
),
@@ -207,6 +231,13 @@ def get_annotations(self) -> List[Annotation]:
name="NetMHCpan_bestRank_peptideWT",
)
])
+ # Additional annotations assigned to the epitopes are added to the output
+ for annotation in self.best_epitope_by_rank.neofox_annotations.annotations:
+ annotations.append(
+ AnnotationFactory.build_annotation(
+ value=annotation.value, name=f"NetMHCpan_bestRank_{annotation.name}"
+ ),
+ )
if self.best_epitope_by_affinity:
annotations.extend([
AnnotationFactory.build_annotation(
@@ -217,6 +248,10 @@ def get_annotations(self) -> List[Annotation]:
value=self.best_epitope_by_affinity.mutated_peptide,
name="NetMHCpan_bestAffinity_peptide",
),
+ AnnotationFactory.build_annotation(
+ value=self.best_epitope_by_affinity.core,
+ name="NetMHCpan_bestAffinity_core",
+ ),
AnnotationFactory.build_annotation(
value=self.best_epitope_by_affinity.allele_mhc_i.name,
name="NetMHCpan_bestAffinity_allele",
@@ -229,6 +264,13 @@ def get_annotations(self) -> List[Annotation]:
value=self.best_epitope_by_affinity.wild_type_peptide,
name="NetMHCpan_bestAffinity_peptideWT",
)])
+ # Additional annotations assigned to the epitopes are added to the output
+ for annotation in self.best_epitope_by_affinity.neofox_annotations.annotations:
+ annotations.append(
+ AnnotationFactory.build_annotation(
+ value=annotation.value, name=f"NetMHCpan_bestAffinity_{annotation.name}"
+ ),
+ )
if self.best_ninemer_epitope_by_rank:
annotations.extend([
AnnotationFactory.build_annotation(
diff --git a/neofox/MHC_predictors/netmhcpan/netmhcIIpan_prediction.py b/neofox/MHC_predictors/netmhcpan/netmhcIIpan_prediction.py
index a42933e9..8e7c111e 100755
--- a/neofox/MHC_predictors/netmhcpan/netmhcIIpan_prediction.py
+++ b/neofox/MHC_predictors/netmhcpan/netmhcIIpan_prediction.py
@@ -26,8 +26,9 @@
from neofox.helpers.epitope_helper import EpitopeHelper
from neofox.helpers.runner import Runner
from neofox.model.mhc_parser import MhcParser
-from neofox.model.neoantigen import Mhc2, Mhc2Name, Mhc2Isoform, PredictedEpitope, Neoantigen
+from neofox.model.neoantigen import Mhc2, Mhc2Name, Mhc2Isoform, PredictedEpitope, Neoantigen, Annotation
from neofox.references.references import DependenciesConfiguration
+from neofox.model.factories import AnnotationFactory
class NetMhcIIPanPredictor:
@@ -112,6 +113,12 @@ def mhc2_prediction_peptide(
os.remove(tmp_peptide)
return result
+ @staticmethod
+ def get_additional_netmhcpan_annotations(line) -> List[Annotation]:
+ of = AnnotationFactory.build_annotation(name="Of", value=str(line[3]))
+ core_rel = AnnotationFactory.build_annotation(name="coreRel", value=str(line[5]))
+ return [of, core_rel]
+
def _parse_netmhcpan_output(self, lines: str) -> List[PredictedEpitope]:
results = []
for line in lines.splitlines():
@@ -121,15 +128,19 @@ def _parse_netmhcpan_output(self, lines: str) -> List[PredictedEpitope]:
continue
line = line.split()
line = line[0:-1] if len(line) > 12 else line
- results.append(
- PredictedEpitope(
+
+ pred_epitope = PredictedEpitope(
position=int(line[0]),
isoform_mhc_i_i=self.mhc_parser.parse_mhc2_isoform(line[1]),
+ core=str(line[4]),
mutated_peptide=line[2],
affinity_mutated=float(line[11]),
rank_mutated=float(line[8]),
)
+ pred_epitope.neofox_annotations.annotations.extend(
+ self.get_additional_netmhcpan_annotations(line)
)
+ results.append(pred_epitope)
return results
def set_wt_netmhcpan_scores(self, predictions) -> List[PredictedEpitope]:
diff --git a/neofox/MHC_predictors/netmhcpan/netmhcpan_prediction.py b/neofox/MHC_predictors/netmhcpan/netmhcpan_prediction.py
index df20503c..d7934344 100755
--- a/neofox/MHC_predictors/netmhcpan/netmhcpan_prediction.py
+++ b/neofox/MHC_predictors/netmhcpan/netmhcpan_prediction.py
@@ -26,8 +26,9 @@
from neofox.helpers.epitope_helper import EpitopeHelper
from neofox.helpers.runner import Runner
from neofox.model.mhc_parser import MhcParser
-from neofox.model.neoantigen import Mhc1, PredictedEpitope, Zygosity, Neoantigen
+from neofox.model.neoantigen import Mhc1, PredictedEpitope, Zygosity, Neoantigen, Annotation
from neofox.references.references import DependenciesConfiguration
+from neofox.model.factories import AnnotationFactory
PEPTIDE_LENGTHS = ["8", "9", "10", "11", "12", "13", "14"]
@@ -92,6 +93,17 @@ def mhc_prediction_peptide(self, alleles, sequence) -> PredictedEpitope:
os.remove(input_file)
return result
+ @staticmethod
+ def get_additional_netmhcpan_annotations(line) -> List[Annotation]:
+ icore = AnnotationFactory.build_annotation(name="Icore", value=str(line[9]))
+ # start position of core in the peptide.
+ of = AnnotationFactory.build_annotation(name="Of", value=int(line[4]))
+ # Position of the deletion, if any.
+ gp = AnnotationFactory.build_annotation(name="Gp", value=int(line[5]))
+ # Length of the deletion, if any.
+ gl = AnnotationFactory.build_annotation(name="Gl", value=int(line[6]))
+ return [icore, of, gp, gl]
+
def _parse_netmhcpan_output(self, lines: str) -> List[PredictedEpitope]:
results = []
for line in lines.splitlines():
@@ -105,15 +117,18 @@ def _parse_netmhcpan_output(self, lines: str) -> List[PredictedEpitope]:
raise NeofoxCommandException("netmhcpan threw an error: {}".format(line))
line = line.split()
line = line[0:-2] if len(line) > 16 else line
- results.append(
- PredictedEpitope(
+ pred_epitope = PredictedEpitope(
position=int(line[0]),
allele_mhc_i=self.mhc_parser.parse_mhc_allele(line[1]),
+ core=str(line[3]),
mutated_peptide=line[2],
affinity_mutated=float(line[15]),
rank_mutated=float(line[12]),
)
+ pred_epitope.neofox_annotations.annotations.extend(
+ self.get_additional_netmhcpan_annotations(line)
)
+ results.append(pred_epitope)
return results
def get_alleles_netmhcpan_representation(self, mhc: List[Mhc1]) -> List[str]:
diff --git a/neofox/MHC_predictors/prime.py b/neofox/MHC_predictors/prime.py
index 4931925f..ae97a1d3 100755
--- a/neofox/MHC_predictors/prime.py
+++ b/neofox/MHC_predictors/prime.py
@@ -35,9 +35,9 @@
from neofox.references.references import DependenciesConfiguration
ALLELE = "BestAllele"
-RANK = "%Rank_bestAllele"
+RANK = "%Rank_"
PEPTIDE = "Peptide"
-SCORE = "Score_bestAllele"
+SCORE = "Score_"
class Prime:
@@ -59,7 +59,7 @@ def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc
def _load_available_alleles(self):
"""
- loads file with available HLA II alllels for Prime prediction, returns set
+ loads file with available HLA II alleles for Prime prediction, returns set
:return:
"""
alleles = pd.read_csv(
@@ -76,24 +76,39 @@ def _get_mixmhc_allele_representation(self, mhc_alleles: List[MhcAllele]):
mhc_alleles)
)
)
+ def _get_mhc_alleles(self, prime_result):
+ mhc_alleles = set()
+ for col in prime_result.columns:
+ # take out alleles and eliminate the column Score_bestAllele out of the set
+ if col.startswith(SCORE) and not col.endswith('e'):
+ allele = col.split('_')[-1]
+ mhc_alleles.add(allele)
+ return mhc_alleles
def _parse_prime_output(self, filename: str) -> List[PredictedEpitope]:
-
parsed_results = []
try:
results = pd.read_csv(filename, sep="\t", comment="#")
except EmptyDataError:
- logger.error("Results from PRIME are empty, something went wrong")
+ logger.error("Results from MixMHCpred are empty, something went wrong")
results = pd.DataFrame()
+ mhc_alleles = self._get_mhc_alleles(results)
for _, row in results.iterrows():
- parsed_results.append(
- PredictedEpitope(
- allele_mhc_i=self.mhc_parser.parse_mhc_allele(row[ALLELE]),
- mutated_peptide=row[PEPTIDE],
- affinity_mutated=float(row[SCORE]),
- rank_mutated=float(row[RANK]),
- ))
+ # when MixMHCpred returns no results it provides a row with the peptide and NAs for other fields
+ # pandas reads NAs as float nan. Skip these
+ for allele in mhc_alleles:
+ if isinstance(row[PEPTIDE], str):
+ score = str(SCORE + allele)
+ rank = str(RANK + allele)
+
+ parsed_results.append(
+ PredictedEpitope(
+ allele_mhc_i=self.mhc_parser.parse_mhc_allele(allele),
+ mutated_peptide=row[PEPTIDE],
+ affinity_mutated=float(row[score]),
+ rank_mutated=float(row[rank]),
+ ))
return parsed_results
def _prime(self, mhc_alleles: List[str], potential_ligand_sequences) -> List[PredictedEpitope]:
diff --git a/neofox/__init__.py b/neofox/__init__.py
index 1d64c073..8d99b4a7 100755
--- a/neofox/__init__.py
+++ b/neofox/__init__.py
@@ -18,7 +18,7 @@
# along with this program. If not, see .#
-VERSION = "1.1.0b1"
+VERSION = "1.1.0"
REFERENCE_FOLDER_ENV = "NEOFOX_REFERENCE_FOLDER"
NEOFOX_BLASTP_ENV = "NEOFOX_BLASTP"
diff --git a/neofox/annotator/abstract_annotator.py b/neofox/annotator/abstract_annotator.py
index 20569016..5f0307ee 100644
--- a/neofox/annotator/abstract_annotator.py
+++ b/neofox/annotator/abstract_annotator.py
@@ -63,11 +63,13 @@ def get_additional_annotations_neoepitope_mhci(
vaf_tumor_dna = neoantigen.dna_variant_allele_frequency
vaf_tumor_rna = neoantigen.rna_variant_allele_frequency
transcript_exp = neoantigen.rna_expression
+ gene_exp = neoantigen.imputed_gene_expression
else:
gene = epitope.gene
vaf_tumor_dna = epitope.dna_variant_allele_frequency
vaf_tumor_rna = epitope.rna_variant_allele_frequency
transcript_exp = epitope.rna_expression
+ gene_exp = epitope.imputed_gene_expression
epitope.neofox_annotations.annotations.extend(
BestAndMultipleBinder.get_annotations_epitope_mhci(epitope=epitope) +
@@ -94,7 +96,8 @@ def get_additional_annotations_neoepitope_mhci(
epitope.neofox_annotations.annotations.extend(
self.priority_score_calculator.get_annotations_epitope_mhci(
- epitope=epitope, vaf_rna=vaf_tumor_rna, vaf_tumor=vaf_tumor_dna, transcript_exp=transcript_exp))
+ epitope=epitope, vaf_rna=vaf_tumor_rna, vaf_tumor=vaf_tumor_dna,
+ transcript_exp=transcript_exp, gene_exp=gene_exp))
if self.organism == ORGANISM_HOMO_SAPIENS:
epitope.neofox_annotations.annotations.extend(
@@ -103,7 +106,8 @@ def get_additional_annotations_neoepitope_mhci(
return epitope
- def get_additional_annotations_neoepitope_mhcii(self, epitope: PredictedEpitope) -> PredictedEpitope:
+ def get_additional_annotations_neoepitope_mhcii(
+ self, epitope: PredictedEpitope) -> PredictedEpitope:
epitope.neofox_annotations.annotations.extend(
self.amplitude.get_annotations_epitope_mhcii(epitope=epitope) +
diff --git a/neofox/annotator/neoantigen_annotator.py b/neofox/annotator/neoantigen_annotator.py
index a6e5f1e2..fd2d6d86 100755
--- a/neofox/annotator/neoantigen_annotator.py
+++ b/neofox/annotator/neoantigen_annotator.py
@@ -31,7 +31,6 @@
from neofox.model.factories import AnnotationFactory
from neofox.model.mhc_parser import MhcParser
from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction
-from neofox.published_features.neoag.neoag_gbm_model import NeoagCalculator
from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator
from neofox.published_features.expression import Expression
from neofox.model.neoantigen import Patient, Neoantigen, Annotations, PredictedEpitope
@@ -57,7 +56,6 @@ def __init__(self, references: ReferenceFolder, configuration: DependenciesConfi
self.rank_mhcii_threshold = rank_mhcii_threshold
# NOTE: these resources do not read any file thus can be initialised fast
- self.neoag_calculator = NeoagCalculator(runner=self.runner, configuration=configuration)
self.expression_calculator = Expression()
self.mhc_database = references.get_mhc_database()
self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database)
@@ -193,14 +191,6 @@ def get_annotated_neoantigen(self, neoantigen: Neoantigen, patient: Patient, wit
)
)
- # neoag immunogenicity model
- if netmhcpan and netmhcpan.best_epitope_by_affinity:
- neoantigen.neofox_annotations.annotations.append(
- self.neoag_calculator.get_annotation(
- epitope_mhci=netmhcpan.best_epitope_by_affinity,
- neoantigen=neoantigen)
- )
-
# IEDB immunogenicity
if self.organism == ORGANISM_HOMO_SAPIENS:
neoantigen.neofox_annotations.annotations.extend(
@@ -218,10 +208,12 @@ def get_annotated_neoantigen(self, neoantigen: Neoantigen, patient: Patient, wit
)
# vaxrank
+ # TODO: consider to calculate vaxrank with DNA VAF aswell
if netmhcpan and netmhcpan.predictions:
neoantigen.neofox_annotations.annotations.extend(VaxRank().get_annotations(
epitope_predictions=netmhcpan.predictions,
- expression_score=expression_annotation[0].value,
+ expression_score=[e.value for e in expression_annotation if e.name == "Mutated_rnaExpression_fromRNA"][0],
+ imputed_score=[e.value for e in expression_annotation if e.name == "Mutated_imputedGeneExpression_fromRNA"][0]
))
# hex
@@ -236,8 +228,7 @@ def get_annotated_neoantigen(self, neoantigen: Neoantigen, patient: Patient, wit
# annotate neoepitopes
if with_all_neoepitopes:
neoantigen.neoepitopes_mhc_i = [
- self.get_additional_annotations_neoepitope_mhci(
- epitope=e, neoantigen=neoantigen)
+ self.get_additional_annotations_neoepitope_mhci(epitope=e, neoantigen=neoantigen)
for e in neoantigen.neoepitopes_mhc_i]
neoantigen.neoepitopes_mhc_i_i = [
self.get_additional_annotations_neoepitope_mhcii(epitope=e) for e in neoantigen.neoepitopes_mhc_i_i]
diff --git a/neofox/annotator/neoantigen_mhc_binding_annotator.py b/neofox/annotator/neoantigen_mhc_binding_annotator.py
index 327bf382..9394a2aa 100644
--- a/neofox/annotator/neoantigen_mhc_binding_annotator.py
+++ b/neofox/annotator/neoantigen_mhc_binding_annotator.py
@@ -8,7 +8,7 @@
from neofox.helpers.runner import Runner
from neofox.model.mhc_parser import MhcParser
from neofox.model.neoantigen import Neoantigen, Patient
-from neofox.references.references import DependenciesConfiguration, AvailableAlleles, ReferenceFolder, \
+from neofox.references.references import DependenciesConfiguration, AvailableAlleles, ReferenceFolder, MhcDatabase, \
ORGANISM_HOMO_SAPIENS
@@ -24,6 +24,7 @@ def __init__(self, references: ReferenceFolder, configuration: DependenciesConfi
self.organism = references.organism
self.uniprot = uniprot
self.proteome_blastp_runner = proteome_blastp_runner
+ self.references = references
self.mhc_database = references.get_mhc_database()
self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database)
@@ -56,16 +57,21 @@ def get_mhc_binding_annotations(self, neoantigen: Neoantigen, patient: Patient):
neoantigen,
patient
)
+
+ if self.configuration.mix_mhc2_pred is not None and has_mhc2:
+ mixmhc2pred = self._run_mixmhc2pred(
+ self.runner,
+ self.configuration,
+ self.mhc_parser,
+ neoantigen,
+ patient,
+ self.mhc_database,
+ self.references
+ )
+
# avoids running MixMHCpred and PRIME for non human organisms
if self.organism == ORGANISM_HOMO_SAPIENS:
- if self.configuration.mix_mhc2_pred is not None and has_mhc2:
- mixmhc2pred = self._run_mixmhc2pred(
- self.runner,
- self.configuration,
- self.mhc_parser,
- neoantigen,
- patient,
- )
+
if self.configuration.mix_mhc_pred is not None and has_mhc1:
mixmhcpred = self._run_mixmhcpred(
self.runner,
@@ -155,7 +161,9 @@ def _run_mixmhc2pred(
mhc_parser: MhcParser,
neoantigen: Neoantigen,
patient: Patient,
+ mhc_database: MhcDatabase,
+ references: ReferenceFolder
):
- mixmhc2 = MixMHC2pred(runner, configuration, mhc_parser)
+ mixmhc2 = MixMHC2pred(runner, configuration, mhc_parser, references)
mixmhc2.run(mhc=patient.mhc2, neoantigen=neoantigen, uniprot=self.uniprot)
return mixmhc2
\ No newline at end of file
diff --git a/neofox/annotator/neoepitope_annotator.py b/neofox/annotator/neoepitope_annotator.py
index 0fec9f51..091c113e 100755
--- a/neofox/annotator/neoepitope_annotator.py
+++ b/neofox/annotator/neoepitope_annotator.py
@@ -32,7 +32,6 @@
from neofox.model.factories import AnnotationFactory
from neofox.model.mhc_parser import MhcParser
from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction
-from neofox.published_features.neoag.neoag_gbm_model import NeoagCalculator
from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator
from neofox.published_features.expression import Expression
from neofox.model.neoantigen import Patient, Neoantigen, Annotations, PredictedEpitope
@@ -54,7 +53,6 @@ def __init__(self, references: ReferenceFolder, configuration: DependenciesConfi
self.available_alleles = references.get_available_alleles()
# NOTE: these resources do not read any file thus can be initialised fast
- self.neoag_calculator = NeoagCalculator(runner=self.runner, configuration=configuration)
self.mhc_database = references.get_mhc_database()
self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database)
@@ -72,6 +70,8 @@ def get_annotated_neoepitope(self, neoepitope: PredictedEpitope) -> PredictedEpi
resources=self.resources_versions,
annotations=[]
)
+ self.expression_calculator = Expression()
+ expression_annotation = self.expression_calculator.get_annotations(neoantigen=neoepitope)
# if the WT is not provided it searches for the closest match in the proteome
if neoepitope.wild_type_peptide is None or neoepitope.wild_type_peptide == '':
@@ -80,7 +80,7 @@ def get_annotated_neoepitope(self, neoepitope: PredictedEpitope) -> PredictedEpi
# Runs netmhcpan, netmhc2pan, mixmhcpred and mixmhc2prd in parallel
annotated_neoepitope = self.neoepitope_mhc_binding_annotator.get_mhc_binding_annotations(neoepitope=neoepitope)
-
+ annotated_neoepitope.neofox_annotations.annotations.extend(expression_annotation)
has_mhc1 = annotated_neoepitope.allele_mhc_i is not None and annotated_neoepitope.allele_mhc_i.name
if has_mhc1:
diff --git a/neofox/annotator/neoepitope_mhc_binding_annotator.py b/neofox/annotator/neoepitope_mhc_binding_annotator.py
index e5202822..e4a6d20e 100644
--- a/neofox/annotator/neoepitope_mhc_binding_annotator.py
+++ b/neofox/annotator/neoepitope_mhc_binding_annotator.py
@@ -40,7 +40,7 @@ def __init__(self, references: ReferenceFolder, configuration: DependenciesConfi
runner=self.runner, configuration=configuration, mhc_parser=self.mhc_parser,
blastp_runner=self.proteome_blastp_runner)
self.mixmhcpred = MixMHCpred(self.runner, self.configuration, self.mhc_parser)
- self.mixmhc2pred = MixMHC2pred(self.runner, self.configuration, self.mhc_parser)
+ self.mixmhc2pred = MixMHC2pred(self.runner, self.configuration, self.mhc_parser, references)
self.prime = Prime(self.runner, self.configuration, self.mhc_parser)
def get_mhc_binding_annotations(self, neoepitope: PredictedEpitope) -> PredictedEpitope:
diff --git a/neofox/command_line.py b/neofox/command_line.py
index 79f6795c..f18f6665 100755
--- a/neofox/command_line.py
+++ b/neofox/command_line.py
@@ -51,17 +51,25 @@ def neofox_configure():
action="store_true",
help="install the R dependencies automatically",
)
+ parser.add_argument(
+ "--install-mouse-mixmhc2pred",
+ dest="install_mouse_mixmhc2pred",
+ action="store_true",
+ help="get the mouse allele PWMs required to run MixMHC2pred for mouse",
+ )
args = parser.parse_args()
reference_folder = args.reference_folder
install_r_dependencies = args.install_r_dependencies
+ install_mouse_mixmhc2pred = args.install_mouse_mixmhc2pred
# makes sure that the output folder exists
os.makedirs(reference_folder, exist_ok=True)
logger.info("Starting the installation of references")
NeofoxReferenceInstaller(
- reference_folder=reference_folder, install_r_dependencies=install_r_dependencies
+ reference_folder=reference_folder, install_r_dependencies=install_r_dependencies,
+ install_mouse_mixmhc2pred=install_mouse_mixmhc2pred
).install()
logger.info("Finished the installation succesfully!")
@@ -213,6 +221,12 @@ def _read_data(input_file, patients_data, mhc_database: MhcDatabase) -> Tuple[Li
else:
raise ValueError('Not supported input file extension: {}'.format(input_file))
+ neoantigens_patient_ids = set(neoantigen.patient_identifier for neoantigen in neoantigens)
+ patient_ids = set(patient.identifier for patient in patients)
+ if len(neoantigens_patient_ids.difference(patient_ids)) > 0:
+ raise ValueError('%s patient candidate does not exist in the patient data file.'
+ % neoantigens_patient_ids.difference(patient_ids))
+
return neoantigens, patients
@@ -330,7 +344,8 @@ def neofox_epitope_cli():
neoepitopes, patients = _read_data_epitopes(
input_file,
patients_data,
- reference_folder.get_mhc_database())
+ reference_folder.get_mhc_database(),
+ organism)
# run annotations
annotated_neoepitopes = NeoFoxEpitope(
@@ -354,7 +369,7 @@ def neofox_epitope_cli():
def _read_data_epitopes(
- input_file, patients_data, mhc_database: MhcDatabase) -> Tuple[List[PredictedEpitope], List[Patient]]:
+ input_file, patients_data, mhc_database: MhcDatabase, organism: str) -> Tuple[List[PredictedEpitope], List[Patient]]:
# parse patient data
patients = []
@@ -366,7 +381,7 @@ def _read_data_epitopes(
# parse the neoantigen candidate data
if input_file.endswith('.txt') or input_file.endswith('.tsv'):
logger.info("Parsing candidate neoepitopes from: {}".format(input_file))
- neoepitopes = ModelConverter.parse_candidate_neoepitopes_file(input_file, mhc_database)
+ neoepitopes = ModelConverter.parse_candidate_neoepitopes_file(input_file, mhc_database, organism)
logger.info("Loaded {} candidate neoepitopes".format(len(neoepitopes)))
# TODO: add support for input in JSON format
#elif input_file.endswith('.json') :
diff --git a/neofox/helpers/epitope_helper.py b/neofox/helpers/epitope_helper.py
index ed38d991..0ce2fea9 100755
--- a/neofox/helpers/epitope_helper.py
+++ b/neofox/helpers/epitope_helper.py
@@ -147,6 +147,7 @@ def get_empty_epitope():
isoform_mhc_i_i=Mhc2Isoform(name=None),
affinity_mutated=None,
rank_mutated=None,
+ core=None,
)
@staticmethod
diff --git a/neofox/helpers/intermediate_files.py b/neofox/helpers/intermediate_files.py
index 6908c860..6b545f7a 100755
--- a/neofox/helpers/intermediate_files.py
+++ b/neofox/helpers/intermediate_files.py
@@ -42,6 +42,19 @@ def create_temp_fasta(sequences, prefix=None, comment_prefix="seq"):
counter += 1
return fasta_temp_file
+def create_temp_mixmhc2pred(sequences, prefix=None):
+ """
+ Write peptides into a table with two columns:
+ 1- peptide
+ 2- context
+ """
+
+ # TODO: update the context column soon.
+ table_temp_file = create_temp_file(prefix=prefix, suffix='.txt')
+ with open(table_temp_file, "w") as f:
+ for seq in sequences:
+ f.write(seq + '\n')
+ return table_temp_file
def create_temp_peptide(sequences, prefix=None):
"""
@@ -51,4 +64,4 @@ def create_temp_peptide(sequences, prefix=None):
with open(pep_temp_file, "w") as f:
for seq in sequences:
f.write(seq + "\n")
- return pep_temp_file
+ return pep_temp_file
\ No newline at end of file
diff --git a/neofox/model/Makefile b/neofox/model/Makefile
index 778abef4..d3eda137 100644
--- a/neofox/model/Makefile
+++ b/neofox/model/Makefile
@@ -2,5 +2,5 @@ models:
protoc -I . --python_betterproto_out=. neoantigen.proto
html:
- sudo docker run --rm -v `pwd`:/out -v `pwd`:/protos pseudomuto/protoc-gen-doc --doc_opt=/protos/models_template.tmpl,models.md
- cp models.md ../../docs/source/05_models.md
\ No newline at end of file
+ docker run --rm -v `pwd`:/out -v `pwd`:/protos pseudomuto/protoc-gen-doc --doc_opt=/protos/models_template.tmpl,models.md
+ cp models.md ../../docs/source/05_models.md
diff --git a/neofox/model/conversion.py b/neofox/model/conversion.py
index 757efacf..0eec8eb0 100755
--- a/neofox/model/conversion.py
+++ b/neofox/model/conversion.py
@@ -29,8 +29,13 @@
Neoantigen,
Patient,
PredictedEpitope,
+ Annotation,
+)
+from neofox.model.factories import (
+ PatientFactory,
+ NeoantigenFactory,
+ NeoepitopeFactory,
)
-from neofox.model.factories import PatientFactory, NeoantigenFactory
from neofox.references.references import MhcDatabase
@@ -63,7 +68,7 @@ def parse_candidate_file(candidate_file: str) -> List[Neoantigen]:
return neoantigens
@staticmethod
- def parse_candidate_neoepitopes_file(candidate_file: str, mhc_database: MhcDatabase) -> List[PredictedEpitope]:
+ def parse_candidate_neoepitopes_file(candidate_file: str, mhc_database: MhcDatabase, organism: str) -> List[PredictedEpitope]:
data = pd.read_csv(
candidate_file, sep="\t",
# NOTE: forces the types of every column to avoid pandas setting the wrong type for corner cases
@@ -82,7 +87,7 @@ def parse_candidate_neoepitopes_file(candidate_file: str, mhc_database: MhcDatab
# NOTE: this is the support for the NeoFox format
data = data.replace({np.nan: None})
- neoepitopes = ModelConverter._neoepitopes_csv2objects(data, mhc_database)
+ neoepitopes = ModelConverter._neoepitopes_csv2objects(data, mhc_database, organism)
return neoepitopes
@@ -181,14 +186,21 @@ def annotations2epitopes_table(neoantigens: List[Neoantigen], mhc: str) -> pd.Da
epitopes_dfs = []
for n in neoantigens:
# parses epitopes from a neoantigen into a data frame
- patient_identifier = n.patient_identifier
epitopes = n.neoepitopes_mhc_i if mhc == MHC_I else n.neoepitopes_mhc_i_i
epitopes_temp_df = ModelConverter._objects2dataframe(epitopes)
- epitopes_temp_df['patient_identifier'] = patient_identifier
+
+ epitopes_temp_df['patientIdentifier'] = n.patient_identifier
+ epitopes_temp_df['gene'] = n.gene
+ epitopes_temp_df['rnaExpression'] = n.rna_expression
+ epitopes_temp_df['imputedGeneExpression'] = n.imputed_gene_expression
+ epitopes_temp_df['dnaVariantAlleleFrequency'] = n.dna_variant_allele_frequency
+ epitopes_temp_df['rnaVariantAlleleFrequency'] = n.rna_variant_allele_frequency
+ epitopes_temp_df['mutatedXmer'] = n.mutated_xmer
# adapts output table depending on MHC type
if mhc == MHC_I:
epitopes_temp_df.drop(list(epitopes_temp_df.filter(regex='isoformMhcII.*')), axis=1, inplace=True)
+ epitopes_temp_df.drop(list(epitopes_temp_df.filter(regex='coreMhcII.*')), axis=1, inplace=True)
else:
epitopes_temp_df.drop(list(epitopes_temp_df.filter(regex='alleleMhcI.*')), axis=1, inplace=True)
@@ -199,20 +211,27 @@ def annotations2epitopes_table(neoantigens: List[Neoantigen], mhc: str) -> pd.Da
annotations_dfs = []
for e in epitopes:
annotations = [a.to_dict() for a in e.neofox_annotations.annotations]
+ # add external annotations also to epitope table
+ annotations.extend([a.to_dict() for a in n.external_annotations])
annotations_temp_df = (pd.DataFrame(annotations).set_index("name").transpose())
annotations_dfs.append(annotations_temp_df)
if len(annotations_dfs) > 0:
annotations_df = pd.concat(annotations_dfs, sort=True).reset_index()
del annotations_df["index"]
-
+
# puts together both data frames
epitopes_temp_df = pd.concat([epitopes_temp_df, annotations_df], axis=1)
-
+
epitopes_temp_df.replace({None: NOT_AVAILABLE_VALUE}, inplace=True)
epitopes_dfs.append(epitopes_temp_df)
# concatenates all together
epitopes_df = pd.concat(epitopes_dfs)
+ # has to be dropped otherwise a column containing all external annotations will exist
+ # if there are no epitopes below the rank threshold, this column does not exist
+ if 'externalAnnotations' in epitopes_df.columns:
+ epitopes_df.drop(["externalAnnotations"], axis=1, inplace=True)
+ epitopes_df.replace('None', NOT_AVAILABLE_VALUE, inplace=True)
return epitopes_df
@@ -226,6 +245,7 @@ def annotated_neoepitopes2epitopes_table(neoepitopes: List[PredictedEpitope], mh
# adapts output table depending on MHC type
if mhc == MHC_I:
epitopes_df.drop(list(epitopes_df.filter(regex='isoformMhcII.*')), axis=1, inplace=True)
+ epitopes_df.drop(list(epitopes_df.filter(regex='coreMhcII.*')), axis=1, inplace=True)
else:
epitopes_df.drop(list(epitopes_df.filter(regex='alleleMhcI.*')), axis=1, inplace=True)
@@ -238,6 +258,8 @@ def annotated_neoepitopes2epitopes_table(neoepitopes: List[PredictedEpitope], mh
annotations_dfs = []
for e in neoepitopes:
annotations = [a.to_dict() for a in e.neofox_annotations.annotations]
+ # add external annotations to output table
+ annotations.extend([a.to_dict() for a in e.external_annotations])
annotations_temp_df = (pd.DataFrame(annotations).set_index("name").transpose())
annotations_dfs.append(annotations_temp_df)
if len(annotations_dfs) > 0:
@@ -247,6 +269,8 @@ def annotated_neoepitopes2epitopes_table(neoepitopes: List[PredictedEpitope], mh
# puts together both data frames
epitopes_df = pd.concat([epitopes_df, annotations_df], axis=1)
+ # has to be dropped otherwise a column containing all external annotations will exist
+ epitopes_df.drop(["externalAnnotations"], axis=1, inplace=True)
# replace None by NA
epitopes_df.replace({None: NOT_AVAILABLE_VALUE}, inplace=True)
@@ -308,7 +332,7 @@ def _neoantigens_csv2objects(dataframe: pd.DataFrame) -> List[Neoantigen]:
return neoantigens
@staticmethod
- def _neoepitopes_csv2objects(dataframe: pd.DataFrame, mhc_database: MhcDatabase) -> List[PredictedEpitope]:
+ def _neoepitopes_csv2objects(dataframe: pd.DataFrame, mhc_database: MhcDatabase, organism: str) -> List[PredictedEpitope]:
"""transforms an patients CSV into a list of objects"""
neoepitopes = []
mhc_parser = MhcParser.get_mhc_parser(mhc_database)
@@ -324,7 +348,7 @@ def _neoepitopes_csv2objects(dataframe: pd.DataFrame, mhc_database: MhcDatabase)
external_annotations.pop("affinityWildType", None)
external_annotations.pop("rankWildType", None)
external_annotations.pop("alleleMhcI", None)
- external_annotations.pop("alleleMhcII", None)
+ external_annotations.pop("isoformMhcII", None)
external_annotations.pop("position", None)
external_annotations.pop("patientIdentifier", None)
external_annotations.pop("gene", None)
@@ -335,45 +359,28 @@ def _neoepitopes_csv2objects(dataframe: pd.DataFrame, mhc_database: MhcDatabase)
mhci_allele = neoepitope_dict.get("alleleMhcI")
mhcii_isoform = neoepitope_dict.get("isoformMhcII")
patient_id = neoepitope_dict.get("patientIdentifier")
- if mhci_allele is not None and mhci_allele != '':
- neoepitope = PredictedEpitope(
- mutated_peptide=neoepitope_dict.get("mutatedPeptide"),
- wild_type_peptide=neoepitope_dict.get("wildTypePeptide"),
- patient_identifier=patient_id,
- allele_mhc_i=mhc_parser.parse_mhc_allele(mhci_allele),
- gene=neoepitope_dict.get("gene"),
- rna_expression=neoepitope_dict.get("rnaExpression"),
- rna_variant_allele_frequency=neoepitope_dict.get("rnaVariantAlleleFrequency"),
- dna_variant_allele_frequency=neoepitope_dict.get("dnaVariantAlleleFrequency"),
- imputed_gene_expression=neoepitope_dict.get("imputedGeneExpression"),
- )
- elif mhcii_isoform is not None and mhcii_isoform != '':
- neoepitope = PredictedEpitope(
- mutated_peptide=neoepitope_dict.get("mutatedPeptide"),
- wild_type_peptide=neoepitope_dict.get("wildTypePeptide"),
- patient_identifier=patient_id,
- isoform_mhc_i_i=mhc_parser.parse_mhc2_isoform(mhcii_isoform),
- gene=neoepitope_dict.get("gene"),
- rna_expression=neoepitope_dict.get("rnaExpression"),
- rna_variant_allele_frequency=neoepitope_dict.get("rnaVariantAlleleFrequency"),
- dna_variant_allele_frequency=neoepitope_dict.get("dnaVariantAlleleFrequency"),
- imputed_gene_expression=neoepitope_dict.get("imputedGeneExpression"),
- )
- elif patient_id is not None and patient_id != '':
- neoepitope = PredictedEpitope(
- mutated_peptide=neoepitope_dict.get("mutatedPeptide"),
- wild_type_peptide=neoepitope_dict.get("wildTypePeptide"),
- patient_identifier=patient_id,
- gene=neoepitope_dict.get("gene"),
- rna_expression=neoepitope_dict.get("rnaExpression"),
- rna_variant_allele_frequency=neoepitope_dict.get("rnaVariantAlleleFrequency"),
- dna_variant_allele_frequency=neoepitope_dict.get("dnaVariantAlleleFrequency"),
- imputed_gene_expression=neoepitope_dict.get("imputedGeneExpression"),
- )
- else:
+
+ # check if any source for allele inference is given otherwise raise error
+ if all(var is None or var == '' for var in [mhci_allele, mhcii_isoform, patient_id]):
raise ValueError(
"Found an epitope without MHC-I allele, MHC-II isoform or patiend identifier: {}".format(
neoepitope_dict))
+ neoepitope = NeoepitopeFactory.build_neoepitope(
+ organism=organism,
+ mutated_peptide=neoepitope_dict.get("mutatedPeptide"),
+ wild_type_peptide=neoepitope_dict.get("wildTypePeptide"),
+ patient_identifier=patient_id,
+ gene=neoepitope_dict.get("gene"),
+ rna_expression=neoepitope_dict.get("rnaExpression"),
+ rna_variant_allele_frequency=neoepitope_dict.get("rnaVariantAlleleFrequency"),
+ dna_variant_allele_frequency=neoepitope_dict.get("dnaVariantAlleleFrequency"),
+ imputed_gene_expression=neoepitope_dict.get("imputedGeneExpression"),
+ allele_mhc_i=mhci_allele,
+ isoform_mhc_i_i=mhcii_isoform,
+ mhc_database=mhc_database,
+ **external_annotations
+ )
+
neoepitopes.append(neoepitope)
return neoepitopes
diff --git a/neofox/model/factories.py b/neofox/model/factories.py
index 6b6ce7ff..13ded262 100755
--- a/neofox/model/factories.py
+++ b/neofox/model/factories.py
@@ -158,8 +158,10 @@ def build_neoepitope(mutated_peptide=None, wild_type_peptide=None, patient_ident
# parse MHC alleles and isoforms
mhc_parser = MhcParser.get_mhc_parser(mhc_database)
- neoepitope.allele_mhc_i = mhc_parser.parse_mhc_allele(allele_mhc_i) if allele_mhc_i else None
- neoepitope.isoform_mhc_i_i = mhc_parser.parse_mhc2_isoform(isoform_mhc_i_i) if isoform_mhc_i_i else None
+ if allele_mhc_i:
+ neoepitope.allele_mhc_i = mhc_parser.parse_mhc_allele(allele_mhc_i)
+ if isoform_mhc_i_i:
+ neoepitope.isoform_mhc_i_i = mhc_parser.parse_mhc2_isoform(isoform_mhc_i_i)
external_annotation_names = dict.fromkeys(
nam for nam in kw.keys() if stringcase.snakecase(nam) not in set(Neoantigen.__annotations__.keys()))
diff --git a/neofox/model/mhc_parser.py b/neofox/model/mhc_parser.py
index 1d3151e2..056cedb6 100644
--- a/neofox/model/mhc_parser.py
+++ b/neofox/model/mhc_parser.py
@@ -38,10 +38,10 @@
)
HLA_DR_MOLECULE_PATTERN = re.compile(r"(?:HLA-)?(DRB1[\*|_]?[0-9]{2,}[:|_]?[0-9]{2,})")
-H2_ALLELE_PATTERN = re.compile(r"(H2K|H2D|H2L|H2A|H2E)([a-z][0-9]?)")
+H2_ALLELE_PATTERN = re.compile(r"(H2-?[KDLAE])([a-z][0-9]?)")
H2_NETMHCPAN_ALLELE_PATTERN = re.compile(r"H-2-I?(K|D|L|A|E)([a-z][0-9]?)")
H2_MOLECULE_PATTERN = re.compile(r"(H2A|H2E)([a-z][0-9]?)")
-
+H2_MIXMHC2PRED_ALLELE = re.compile(r"H2_(A|E)a_([a-z][0-9]?)__H2_(A|E)b_([a-z][0-9]?)")
ALLELE_PATTERN_BY_ORGANISM = {
ORGANISM_HOMO_SAPIENS: HLA_ALLELE_PATTERN,
ORGANISM_MUS_MUSCULUS: H2_ALLELE_PATTERN,
@@ -80,7 +80,6 @@ def get_mhc_parser(mhc_database: MhcDatabase):
raise NeofoxInputParametersException("Organism not supported {}".format(mhc_database.organism))
return mhc_parser
-
class H2Parser(MhcParser):
def parse_mhc_allele(self, allele: str, pattern=H2_ALLELE_PATTERN) -> MhcAllele:
@@ -113,7 +112,15 @@ def parse_mhc_allele(self, allele: str, pattern=H2_ALLELE_PATTERN) -> MhcAllele:
def parse_mhc2_isoform(self, allele: str) -> Mhc2Isoform:
# MHC II molecules in H2 lab mouse are represented as single chain proteins
# NOTE: by convention we represent this allele in both the alpha and beta chains
+ # format from current version of MixMHC2pred: H2_Aa_b__H2_Aa_b
+ # "H2_{gene}a_{protein}__H2_{gene}b_{protein}"
+
match = H2_NETMHCPAN_ALLELE_PATTERN.match(allele)
+
+ # convert the allele format in MixMHC2pred to the normal format
+ # H2_Aa_b__H2_Aa_b to H2Ab
+ if len(allele) > 5:
+ match = H2_MIXMHC2PRED_ALLELE.match(allele)
if match:
# this ensures that netmhcpan output is normalized
allele = "H2{gene}{protein}".format(gene=match.group(1), protein=match.group(2))
@@ -127,7 +134,6 @@ def get_netmhc2pan_representation(self, isoform: Mhc2Isoform):
return "H-2-I{gene}{protein}".format(
gene=isoform.alpha_chain.gene.strip("H2"), protein=isoform.alpha_chain.protein)
-
class HlaParser(MhcParser):
def parse_mhc_allele(self, allele: str) -> MhcAllele:
diff --git a/neofox/model/models.md b/neofox/model/models.md
index 5d475081..34bbce26 100644
--- a/neofox/model/models.md
+++ b/neofox/model/models.md
@@ -217,6 +217,7 @@ The metadata required for analysis for a given patient + its patient identifier
| wildTypePeptide | [string](#string) | | Closest wild type peptide |
| alleleMhcI | [MhcAllele](#neoantigen.MhcAllele) | | MHC I allele |
| isoformMhcII | [Mhc2Isoform](#neoantigen.Mhc2Isoform) | | MHC II isoform |
+| core | [string](#string) | | MHCII core part of the peptide ligand that primarily interacts with the MHC binding groove, predicted by NetMHCpan/NetMHCIIpan |
| affinityMutated | [float](#float) | | MHC binding affinity for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides |
| rankMutated | [float](#float) | | MHC binding rank for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides |
| affinityWildType | [float](#float) | | MHC binding affinity for the wild type peptide. This value is estimated with NetMHCpan in case of MHC-I peptides and NetMHCIIpan in cas of MHC-II peptides |
@@ -228,6 +229,7 @@ The metadata required for analysis for a given patient + its patient identifier
| imputedGeneExpression | [float](#float) | | Expression value of the transcript from TCGA data. Range [0, +inf]. |
| dnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the DNA. Range [0.0, 1.0] |
| rnaVariantAlleleFrequency | [float](#float) | | Variant allele frequency from the RNA. Range [0.0, 1.0] |
+| externalAnnotations | [Annotation](#neoantigen.Annotation) | repeated | External annotations for neoepitope mode. |
diff --git a/neofox/model/neoantigen.proto b/neofox/model/neoantigen.proto
index 572fad93..2cdce1cf 100755
--- a/neofox/model/neoantigen.proto
+++ b/neofox/model/neoantigen.proto
@@ -288,53 +288,62 @@ message PredictedEpitope {
*/
Mhc2Isoform isoformMhcII = 5;
/**
+ MHC core part of the peptide ligand that primarily interacts with the
+ MHC binding groove, predicted by NetMHCpan/NetMHCIIpan
+ */
+ string core = 6;
+ /**
MHC binding affinity for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides
and NetMHCIIpan in cas of MHC-II peptides
*/
- float affinityMutated = 6;
+ float affinityMutated = 7;
/**
MHC binding rank for the mutated peptide. This value is estimated with NetMHCpan in case of MHC-I peptides
and NetMHCIIpan in cas of MHC-II peptides
*/
- float rankMutated = 7;
+ float rankMutated = 8;
/**
MHC binding affinity for the wild type peptide. This value is estimated with NetMHCpan in case of MHC-I peptides
and NetMHCIIpan in cas of MHC-II peptides
*/
- float affinityWildType = 8;
+ float affinityWildType = 9;
/**
MHC binding rank for the wild type peptide. This value is estimated with NetMHCpan in case of MHC-I peptides
and NetMHCIIpan in cas of MHC-II peptides
*/
- float rankWildType = 9;
+ float rankWildType = 10;
/**
The NeoFox neoantigen annotations
*/
- Annotations neofoxAnnotations = 10;
+ Annotations neofoxAnnotations = 11;
/**
Patient identifier
*/
- string patientIdentifier = 11;
+ string patientIdentifier = 12;
/**
The HGNC gene symbol or gene identifier
*/
- string gene = 12;
+ string gene = 13;
/**
Expression value of the transcript from RNA data. Range [0, +inf].
*/
- float rnaExpression = 13;
+ float rnaExpression = 14;
/**
Expression value of the transcript from TCGA data. Range [0, +inf].
*/
- float imputedGeneExpression = 14;
+ float imputedGeneExpression = 15;
/**
Variant allele frequency from the DNA. Range [0.0, 1.0]
*/
- float dnaVariantAlleleFrequency = 15;
+ float dnaVariantAlleleFrequency = 16;
/**
Variant allele frequency from the RNA. Range [0.0, 1.0]
*/
- float rnaVariantAlleleFrequency = 16;
+ float rnaVariantAlleleFrequency = 17;
+ /**
+ External annotations for neoepitope mode.
+ */
+ repeated Annotation externalAnnotations = 18;
}
/**
diff --git a/neofox/model/neoantigen.py b/neofox/model/neoantigen.py
index b4b925fd..14eb859c 100755
--- a/neofox/model/neoantigen.py
+++ b/neofox/model/neoantigen.py
@@ -229,36 +229,41 @@ class PredictedEpitope(betterproto.Message):
allele_mhc_i: "MhcAllele" = betterproto.message_field(4)
# *MHC II isoform
isoform_mhc_i_i: "Mhc2Isoform" = betterproto.message_field(5)
+ # *MHCII core part of the peptide ligand that primarily interacts with the
+ # MHC binding groove, predicted by NetMHCpan/NetMHCIIpan
+ core: str = betterproto.string_field(6)
# *MHC binding affinity for the mutated peptide. This value is estimated with
# NetMHCpan in case of MHC-I peptidesand NetMHCIIpan in cas of MHC-II
# peptides
- affinity_mutated: float = betterproto.float_field(6)
+ affinity_mutated: float = betterproto.float_field(7)
# *MHC binding rank for the mutated peptide. This value is estimated with
# NetMHCpan in case of MHC-I peptidesand NetMHCIIpan in cas of MHC-II
# peptides
- rank_mutated: float = betterproto.float_field(7)
+ rank_mutated: float = betterproto.float_field(8)
# *MHC binding affinity for the wild type peptide. This value is estimated
# with NetMHCpan in case of MHC-I peptidesand NetMHCIIpan in cas of MHC-II
# peptides
- affinity_wild_type: float = betterproto.float_field(8)
+ affinity_wild_type: float = betterproto.float_field(9)
# *MHC binding rank for the wild type peptide. This value is estimated with
# NetMHCpan in case of MHC-I peptidesand NetMHCIIpan in cas of MHC-II
# peptides
- rank_wild_type: float = betterproto.float_field(9)
+ rank_wild_type: float = betterproto.float_field(10)
# *The NeoFox neoantigen annotations
- neofox_annotations: "Annotations" = betterproto.message_field(10)
+ neofox_annotations: "Annotations" = betterproto.message_field(11)
# *Patient identifier
- patient_identifier: str = betterproto.string_field(11)
+ patient_identifier: str = betterproto.string_field(12)
# *The HGNC gene symbol or gene identifier
- gene: str = betterproto.string_field(12)
+ gene: str = betterproto.string_field(13)
# *Expression value of the transcript from RNA data. Range [0, +inf].
- rna_expression: float = betterproto.float_field(13)
+ rna_expression: float = betterproto.float_field(14)
# *Expression value of the transcript from TCGA data. Range [0, +inf].
- imputed_gene_expression: float = betterproto.float_field(14)
+ imputed_gene_expression: float = betterproto.float_field(15)
# *Variant allele frequency from the DNA. Range [0.0, 1.0]
- dna_variant_allele_frequency: float = betterproto.float_field(15)
+ dna_variant_allele_frequency: float = betterproto.float_field(16)
# *Variant allele frequency from the RNA. Range [0.0, 1.0]
- rna_variant_allele_frequency: float = betterproto.float_field(16)
+ rna_variant_allele_frequency: float = betterproto.float_field(17)
+ # *External annotations for neoepitope mode.
+ external_annotations: List["Annotation"] = betterproto.message_field(18)
@dataclass
diff --git a/neofox/model/validation.py b/neofox/model/validation.py
index a53e16f1..2b1737bf 100755
--- a/neofox/model/validation.py
+++ b/neofox/model/validation.py
@@ -84,6 +84,8 @@ def validate_neoantigen(neoantigen: Neoantigen):
# avoids this validation when there is no wild type
if neoantigen.wild_type_xmer:
+ assert len(neoantigen.wild_type_xmer) == len(neoantigen.mutated_xmer), \
+ "The length of the wildtype (wildTypeXmer) and mutated peptide sequence (mutatedXmer) are not the same. Both sequences should have the same length in case of point mutations. wildTypeXmer shall be empty, specially in the case of neoantigen candidates derived from other sources than SNVs."
for aa in neoantigen.wild_type_xmer:
ModelValidator._validate_aminoacid(aa)
@@ -138,6 +140,8 @@ def validate_neoepitope(neoepitope: PredictedEpitope, organism: str):
if has_wt_peptide:
length_wt_peptide = len(neoepitope.wild_type_peptide)
+ assert length_wt_peptide == length_mutated_peptide, \
+ "Neoepitope does not have the same length of wildtype and mutated sequence. Both sequences should have the same length in case of point mutations. wildTypePeptide shall be empty, specially in the case of neoantigen candidates derived from other sources than SNVs."
if has_mhc_i:
assert ModelValidator.is_mhci_peptide_length_valid(length_wt_peptide), \
"Mutated MHC-I peptide has a non supported length of {}".format(length_wt_peptide)
diff --git a/neofox/neofox.py b/neofox/neofox.py
index 40a698e2..0c8b0761 100755
--- a/neofox/neofox.py
+++ b/neofox/neofox.py
@@ -30,6 +30,7 @@
from neofox.model.factories import NeoantigenFactory
from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction
from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator
+from neofox.published_features.expression import Expression
from neofox.references.references import ReferenceFolder, DependenciesConfiguration, ORGANISM_HOMO_SAPIENS
from neofox import NEOFOX_LOG_FILE_ENV
from neofox.annotator.neoantigen_annotator import NeoantigenAnnotator
@@ -104,16 +105,9 @@ def __init__(
self._validate_input_data()
- # retrieve from the data, if RNA-seq was available
- # add this information to patient model
- expression_per_patient = {self.patients[patient].identifier: [] for patient in self.patients}
- for neoantigen in self.neoantigens:
- expression_per_patient[neoantigen.patient_identifier].append(neoantigen.rna_expression)
- # only performs the expression imputation for humans
+ # annotate TCGA gene expression
if self.reference_folder.organism == ORGANISM_HOMO_SAPIENS:
- # impute expresssion from TCGA, ONLY if isRNAavailable = False for given patient,
- # otherwise original values is reported
# NOTE: this must happen after validation to avoid uncaptured errors due to missing patients
# NOTE: add gene expression to neoantigen candidate model
self.neoantigens = self._conditional_expression_imputation()
@@ -127,16 +121,16 @@ def _conditional_expression_imputation(self) -> List[Neoantigen]:
neoantigens_transformed = []
for neoantigen in self.neoantigens:
- expression_value = neoantigen.rna_expression
+
patient = self.patients[neoantigen.patient_identifier]
neoantigen_transformed = neoantigen
+
gene_expression = expression_annotator.get_gene_expression_annotation(
gene_name=neoantigen.gene, tcga_cohort=patient.tumor_type
)
- if expression_value is None and patient.tumor_type is not None and patient.tumor_type != "":
- expression_value = gene_expression
- neoantigen_transformed.rna_expression = expression_value
- neoantigen.imputed_gene_expression = gene_expression
+
+ neoantigen_transformed.imputed_gene_expression = gene_expression
+
neoantigens_transformed.append(neoantigen_transformed)
return neoantigens_transformed
diff --git a/neofox/published_features/expression.py b/neofox/published_features/expression.py
index ce687805..d7d0c6d9 100755
--- a/neofox/published_features/expression.py
+++ b/neofox/published_features/expression.py
@@ -18,8 +18,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .#
from typing import List
-from logzero import logger
-from neofox.model.neoantigen import Annotation, Neoantigen, Patient
+from neofox.model.neoantigen import Annotation, Neoantigen
from neofox.model.factories import AnnotationFactory
@@ -27,17 +26,17 @@ class Expression:
@staticmethod
def _get_expression_annotation(
- transcript_expression: float, vaf_rna: float
+ transcript_gene_expression: float, vaf: float
) -> float:
"""
- This function calculates the product of VAF in RNA and transcript expression
+ This function calculates the product of VAF and transcript expression
to reflect the expression of the mutated transcript
"""
expression_mut = None
try:
expression_mut = (
- transcript_expression * vaf_rna
- if vaf_rna is not None and vaf_rna >= 0.0
+ transcript_gene_expression * vaf
+ if vaf is not None and vaf >= 0.0
else None
)
except (TypeError, ValueError):
@@ -46,11 +45,17 @@ def _get_expression_annotation(
def get_annotations(self, neoantigen: Neoantigen) -> List[Annotation]:
- vaf = neoantigen.rna_variant_allele_frequency
- if vaf is None or vaf == -1:
- vaf = neoantigen.dna_variant_allele_frequency
-
return [
AnnotationFactory.build_annotation(
- name="Expression_mutated_transcript", value=self._get_expression_annotation(
- transcript_expression=neoantigen.rna_expression, vaf_rna=vaf))]
+ name="Mutated_rnaExpression_fromRNA", value=self._get_expression_annotation(
+ transcript_gene_expression=neoantigen.rna_expression, vaf=neoantigen.rna_variant_allele_frequency)),
+ AnnotationFactory.build_annotation(
+ name="Mutated_rnaExpression_fromDNA", value=self._get_expression_annotation(
+ transcript_gene_expression=neoantigen.rna_expression, vaf=neoantigen.dna_variant_allele_frequency)),
+ AnnotationFactory.build_annotation(
+ name="Mutated_imputedGeneExpression_fromRNA", value=self._get_expression_annotation(
+ transcript_gene_expression=neoantigen.imputed_gene_expression, vaf=neoantigen.rna_variant_allele_frequency)),
+ AnnotationFactory.build_annotation(
+ name="Mutated_imputedGeneExpression_fromDNA", value=self._get_expression_annotation(
+ transcript_gene_expression=neoantigen.imputed_gene_expression, vaf=neoantigen.dna_variant_allele_frequency))
+ ]
diff --git a/neofox/published_features/neoag/__init__.py b/neofox/published_features/neoag/__init__.py
deleted file mode 100755
index 4a64d329..00000000
--- a/neofox/published_features/neoag/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH.
-#
-# This file is part of Neofox
-# (see https://github.com/tron-bioinformatics/neofox).
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .#
diff --git a/neofox/published_features/neoag/neoag-master/Final_gbm_model.rds b/neofox/published_features/neoag/neoag-master/Final_gbm_model.rds
deleted file mode 100755
index 3588d3c5..00000000
Binary files a/neofox/published_features/neoag/neoag-master/Final_gbm_model.rds and /dev/null differ
diff --git a/neofox/published_features/neoag/neoag-master/LICENSE.txt b/neofox/published_features/neoag/neoag-master/LICENSE.txt
deleted file mode 100755
index 0be219fb..00000000
--- a/neofox/published_features/neoag/neoag-master/LICENSE.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-COPYRIGHT AND PERMISSION NOTICE
-UNC Software Neoantigen Immunogenicity Prediction Model
-Copyright (C) 2018 The University of North Carolina at Chapel Hill
-All rights reserved.
-
-Neoantigen Immunogenicity Prediction Model software (Program) is the property
-of The University of North Carolina at Chapel Hill (UNC). UNC grants to
-RECIPIENT a royalty-free, nonexclusive, and nontransferable license to use the
-Program for non-commercial research purposes only. Title and copyright to the
-Program and any associated documentation shall at all times remain with UNC.
-RECIPIENT acknowledges that the Program is a research tool still in the
-development stage and that it is being supplied "as is," without any
-accompanying services or improvements from UNC. RECIPIENT agrees to use the
-Program solely for internal purposes and shall not distribute or transfer it
-to another location or to any other person without prior written permission
-from UNC.
diff --git a/neofox/published_features/neoag/neoag-master/NeoAg_immunogenicity_predicition_GBM.R b/neofox/published_features/neoag/neoag-master/NeoAg_immunogenicity_predicition_GBM.R
deleted file mode 100755
index 4a98c969..00000000
--- a/neofox/published_features/neoag/neoag-master/NeoAg_immunogenicity_predicition_GBM.R
+++ /dev/null
@@ -1,75 +0,0 @@
-#############################################################
-##################TCGA NeoAg analysis########################
-#############################################################
-
-#Original analysis run in R v3.5.2
-
-library(caret) #Original analysis run in v6.0-84
-library(Peptides) #Original analysis run in v2.4
-library(data.table) #Original analysis run in v1.12.0
-library(doParallel) #Original analysis run in v1.0.14
-registerDoParallel(1) #Can change to suitable number of threads
-
-args = commandArgs(trailingOnly=TRUE)
-
-path_to_file <- args[1]
-neo_tab_path <- args[2]
-outifle <- args[3]
-
-
-#Input paths
-# neo_tab_path = "./TCGA_neoAg_example.txt"
-# GBM_model_path = "./NeoAg_immunogenicity/Final_gbm_model.rds"
-#neo_tab_path = paste(path_to_file, "TCGA_neoAg_example.txt", sep = "")
-GBM_model_path = paste(path_to_file, "Final_gbm_model.rds", sep = "/")
-#neo_tab_path = "/projects/SUMMIT/WP1.2/input/development/neoag/tmp_neoag_kZe31d.txt"
-#neo_tab_path = "/projects/SUMMIT/WP1.2/input/development/neoag/neoag-master/TCGA_neoAg_example.txt"
-
-
-#Example input
-neo_tab = fread(neo_tab_path)
-
-#Function for generating independent variables for the GBM model
-model_process = function(n){
- c(
- ifelse(substr(neo_tab$mut_peptide[n],1,1) == "V",1,0),
-
- ifelse(substr(neo_tab$mut_peptide[n],nchar(neo_tab$mut_peptide[n]),nchar(neo_tab$mut_peptide[n])) == "V",1,0),
-
- ifelse(aaComp(substr(neo_tab$mut_peptide[n],nchar(neo_tab$mut_peptide[n]),nchar(neo_tab$mut_peptide[n])))[[1]][2] == 1,1,0),
-
- ifelse(aaComp(substr(neo_tab$Reference[n],neo_tab$peptide_variant_position[n],neo_tab$peptide_variant_position[n]))[[1]][8] == 1,1,0),
-
- (aaComp(substr(neo_tab$mut_peptide[n],neo_tab$peptide_variant_position[n],neo_tab$peptide_variant_position[n]))[[1]][2] -
- aaComp(substr(neo_tab$Reference[n],neo_tab$peptide_variant_position[n],neo_tab$peptide_variant_position[n]))[[1]][2]),
-
- ifelse("K" %in% unlist(strsplit(substr(neo_tab$mut_peptide[n],1,nchar(neo_tab$mut_peptide[n])-7),"|")),1,0),
-
- ifelse("V" %in% unlist(strsplit(substr(neo_tab$mut_peptide[n],1,3),"|")),1,0)
- )
-}
-
-#Multi-thread derivation of features
-model_mat = foreach(n = 1:nrow(neo_tab), .combine = rbind) %dopar% model_process(n)
-
-# only one epitope-df
-if(is(model_mat, "numeric")){
- model_mat <- as.data.frame(t(model_mat))
-}
-
-colnames(model_mat) = c("Absolute_position_1_V", "Last_position_V", "Last_position_Small", "Reference_AA_at_mutated_position_Basic",
- "Mutated_position_change_of_Small_feature", "Relative_site_1_K", "First_three_AA_V" )
-##############################################################
-
-#Formatting, binding input matrix with feature sset
-neo_tab_final = cbind(neo_tab, model_mat)
-
-#Read in the GBM R object, run on the matrix generated above
-Final_model = readRDS(GBM_model_path)
-
-#Predicting neoantigen immunogenicity scores from above GBM model
-TCGA_predict = predict(Final_model, newdata = model_mat, type = "raw")
-
-TCGA_predict <- ifelse(length(TCGA_predict) == 0, NA, TCGA_predict )
-
-cat(TCGA_predict)
diff --git a/neofox/published_features/neoag/neoag_gbm_model.py b/neofox/published_features/neoag/neoag_gbm_model.py
deleted file mode 100755
index 4f1633bc..00000000
--- a/neofox/published_features/neoag/neoag_gbm_model.py
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH.
-#
-# This file is part of Neofox
-# (see https://github.com/tron-bioinformatics/neofox).
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .#
-
-import os
-from neofox.helpers import intermediate_files
-from neofox.helpers.epitope_helper import EpitopeHelper
-from neofox.model.neoantigen import Annotation, PredictedEpitope, Neoantigen
-from neofox.model.factories import AnnotationFactory
-
-
-class NeoagCalculator(object):
-
- def __init__(self, runner, configuration):
- """
- :type runner: neofox.helpers.runner.Runner
- :type configuration: neofox.references.DependenciesConfiguration
- """
- self.runner = runner
- self.configuration = configuration
-
- def _apply_gbm(self, tmp_in):
- """this function calls NeoAg tool. this tool applys a gradient boosting machine based on biochemical features
- to epitopes (predicted seqs)
- """
- my_path = os.path.abspath(os.path.dirname(__file__))
- model_path = os.path.join(my_path, "neoag-master")
- tool_path = os.path.join(
- my_path, "neoag-master/NeoAg_immunogenicity_predicition_GBM.R"
- )
- cmd = [self.configuration.rscript, tool_path, model_path, tmp_in]
- output, _ = self.runner.run_command(cmd)
- return output
-
- def _prepare_tmp_for_neoag(
- self,
- sample_id,
- mut_peptide,
- score_mut,
- ref_peptide,
- peptide_variant_position,
- tmp_file_name,
- ):
- """writes necessary epitope information into temporary file for neoag tool; only for epitopes with
- affinity < 500 nM
- """
- header = ["Sample_ID", "mut_peptide", "Reference", "peptide_variant_position"]
- try:
- epi_row = "\t".join(
- [sample_id, mut_peptide, ref_peptide, str(peptide_variant_position)]
- )
- except ValueError:
- epi_row = "\t".join(["NA", "NA", "NA", "NA"])
- with open(tmp_file_name, "w") as f:
- f.write("\t".join(header) + "\n")
- f.write(epi_row + "\n")
-
- def calculate_neoag_score(self, epitope: PredictedEpitope):
- tmp_file_name = intermediate_files.create_temp_file(
- prefix="tmp_neoag_", suffix=".txt"
- )
- self._prepare_tmp_for_neoag(
- "*****",
- epitope.mutated_peptide,
- epitope.affinity_mutated,
- epitope.wild_type_peptide,
- EpitopeHelper.position_of_mutation_epitope(epitope=epitope),
- tmp_file_name,
- )
- neoag_score = self._apply_gbm(tmp_file_name)
- os.remove(tmp_file_name)
- return neoag_score
-
- def get_annotation(self, epitope_mhci: PredictedEpitope, neoantigen: Neoantigen) -> Annotation:
- """wrapper function to determine neoag immunogenicity score for a mutated peptide sequence"""
-
- neoag_score = None
- if neoantigen.wild_type_xmer and epitope_mhci.mutated_peptide and epitope_mhci.wild_type_peptide:
- neoag_score = self.calculate_neoag_score(epitope=epitope_mhci)
-
- annotation = AnnotationFactory.build_annotation(value=neoag_score, name="Neoag_immunogenicity")
- return annotation
diff --git a/neofox/published_features/priority_score.py b/neofox/published_features/priority_score.py
index 832b3354..e849e174 100755
--- a/neofox/published_features/priority_score.py
+++ b/neofox/published_features/priority_score.py
@@ -21,9 +21,8 @@
# import modules
import math
from typing import List
-
from neofox.helpers.epitope_helper import EpitopeHelper
-from neofox.model.neoantigen import Annotation, PredictedEpitope, Neoantigen, Patient
+from neofox.model.neoantigen import Annotation, PredictedEpitope, Neoantigen
from neofox.model.factories import AnnotationFactory
from neofox.MHC_predictors.netmhcpan.combine_netmhcpan_pred_multiple_binders import (
BestAndMultipleBinder,
@@ -43,9 +42,8 @@ def calc_logistic_function(self, mhc_score):
def calc_priority_score(
self,
- vaf_dna,
- vaf_rna,
- transcript_expr,
+ vaf,
+ transcript_gene_expr,
no_mismatch,
score_mut,
score_wt,
@@ -53,15 +51,12 @@ def calc_priority_score(
):
"""
This function calculates the Priority Score using parameters for mhc I.
+ Bjerregard, 2017, Cancer Immunol Immunother
+ https://doi.org/10.1007/s00262-017-2001-3
"""
priority_score = None
- vaf = None
try:
- if vaf_dna is not None and vaf_dna != -1:
- vaf = vaf_dna
- elif vaf_rna is not None and vaf_rna != -1:
- vaf = vaf_rna
- if vaf:
+ if vaf is not None and vaf != -1:
l_mut = self.calc_logistic_function(score_mut)
l_wt = self.calc_logistic_function(score_wt)
priority_score = self.mupexi(
@@ -69,7 +64,7 @@ def calc_priority_score(
l_wt=l_wt,
mut_not_in_prot=mut_not_in_prot,
no_mismatch=no_mismatch,
- transcript_expr=transcript_expr,
+ transcript_gene_expr=transcript_gene_expr,
vaf_tumor=vaf
)
except (TypeError, ValueError):
@@ -77,9 +72,9 @@ def calc_priority_score(
return priority_score
def mupexi(
- self, l_mut, l_wt, mut_not_in_prot, no_mismatch, transcript_expr, vaf_tumor
+ self, l_mut, l_wt, mut_not_in_prot, no_mismatch, transcript_gene_expr, vaf_tumor
):
- priority_score = (l_mut * vaf_tumor * math.tanh(transcript_expr)) * (
+ priority_score = (l_mut * vaf_tumor * math.tanh(transcript_gene_expr)) * (
float(mut_not_in_prot) * (1 - 2 ** (-no_mismatch) * l_wt)
)
return priority_score
@@ -94,20 +89,43 @@ def get_annotations(
returns number of mismatches between best MHCI / MHC II epitopes (rank) and their corresponding WTs
"""
num_mismatches_mhc1 = None
- priority_score = None
+ priority_score_dna = None
+ priority_score_rna = None
+ priority_score_imputed_dna = None
+ priority_score_imputed_rna = None
if netmhcpan.best_epitope_by_rank.wild_type_peptide and netmhcpan.best_epitope_by_rank.mutated_peptide:
num_mismatches_mhc1 = EpitopeHelper.number_of_mismatches(
epitope_wild_type=netmhcpan.best_epitope_by_rank.wild_type_peptide,
epitope_mutation=netmhcpan.best_epitope_by_rank.mutated_peptide,
)
- vaf_rna = neoantigen.dna_variant_allele_frequency
- if vaf_rna is None:
- vaf_rna = neoantigen.rna_variant_allele_frequency
- priority_score = self.calc_priority_score(
- vaf_dna=neoantigen.dna_variant_allele_frequency,
- vaf_rna=vaf_rna,
- transcript_expr=neoantigen.rna_expression,
+ priority_score_dna = self.calc_priority_score(
+ vaf=neoantigen.dna_variant_allele_frequency,
+ transcript_gene_expr=neoantigen.rna_expression,
+ no_mismatch=num_mismatches_mhc1,
+ score_mut=netmhcpan.best_epitope_by_rank.rank_mutated,
+ score_wt=netmhcpan.best_epitope_by_rank.rank_wild_type,
+ mut_not_in_prot=mut_not_in_prot,
+ )
+ priority_score_rna = self.calc_priority_score(
+ vaf=neoantigen.rna_variant_allele_frequency,
+ transcript_gene_expr=neoantigen.rna_expression,
+ no_mismatch=num_mismatches_mhc1,
+ score_mut=netmhcpan.best_epitope_by_rank.rank_mutated,
+ score_wt=netmhcpan.best_epitope_by_rank.rank_wild_type,
+ mut_not_in_prot=mut_not_in_prot,
+ )
+ priority_score_imputed_dna = self.calc_priority_score(
+ vaf=neoantigen.dna_variant_allele_frequency,
+ transcript_gene_expr=neoantigen.imputed_gene_expression,
+ no_mismatch=num_mismatches_mhc1,
+ score_mut=netmhcpan.best_epitope_by_rank.rank_mutated,
+ score_wt=netmhcpan.best_epitope_by_rank.rank_wild_type,
+ mut_not_in_prot=mut_not_in_prot,
+ )
+ priority_score_imputed_rna = self.calc_priority_score(
+ vaf=neoantigen.rna_variant_allele_frequency,
+ transcript_gene_expr=neoantigen.imputed_gene_expression,
no_mismatch=num_mismatches_mhc1,
score_mut=netmhcpan.best_epitope_by_rank.rank_mutated,
score_wt=netmhcpan.best_epitope_by_rank.rank_wild_type,
@@ -119,25 +137,73 @@ def get_annotations(
),
# priority score with rank score
AnnotationFactory.build_annotation(
- value=priority_score,
- name="Priority_score",
+ value=priority_score_dna,
+ name="Priority_score_fromDNA",
+ ),
+ # imputed priority score with rank score
+ AnnotationFactory.build_annotation(
+ value=priority_score_imputed_rna,
+ name="Priority_score_imputed_fromRNA"
+ ),
+ # priority score with rank score f
+ AnnotationFactory.build_annotation(
+ value=priority_score_rna,
+ name="Priority_score_fromRNA",
),
+ # imputed priority score with rank score
+ AnnotationFactory.build_annotation(
+ value=priority_score_imputed_dna,
+ name="Priority_score_imputed_fromDNA"
+ )
]
return annotations
- def get_annotations_epitope_mhci(self, epitope: PredictedEpitope, vaf_tumor, transcript_exp, vaf_rna) -> \
+ def get_annotations_epitope_mhci(self, epitope: PredictedEpitope, vaf_tumor, transcript_exp, vaf_rna, gene_exp) -> \
List[Annotation]:
return [
AnnotationFactory.build_annotation(
value=self.calc_priority_score(
- vaf_dna=vaf_tumor,
- vaf_rna=vaf_rna,
- transcript_expr=transcript_exp,
+ vaf=vaf_tumor,
+ transcript_gene_expr=transcript_exp,
+ no_mismatch=int(EpitopeHelper.get_annotation_by_name(
+ epitope.neofox_annotations.annotations, name='number_of_mismatches')),
+ score_mut=epitope.rank_mutated,
+ score_wt=epitope.rank_wild_type,
+ mut_not_in_prot=bool(EpitopeHelper.get_annotation_by_name(
+ epitope.neofox_annotations.annotations, name='mutation_not_found_in_proteome'))),
+ name='Priority_score_fromDNA'),
+ AnnotationFactory.build_annotation(
+ value=self.calc_priority_score(
+ vaf=vaf_tumor,
+ transcript_gene_expr=gene_exp,
+ no_mismatch=int(EpitopeHelper.get_annotation_by_name(
+ epitope.neofox_annotations.annotations, name='number_of_mismatches')),
+ score_mut=epitope.rank_mutated,
+ score_wt=epitope.rank_wild_type,
+ mut_not_in_prot=bool(EpitopeHelper.get_annotation_by_name(
+ epitope.neofox_annotations.annotations, name='mutation_not_found_in_proteome'))),
+ name='Priority_score_imputed_fromDNA'),
+ AnnotationFactory.build_annotation(
+ value=self.calc_priority_score(
+ vaf=vaf_rna,
+ transcript_gene_expr=transcript_exp,
+ no_mismatch=int(EpitopeHelper.get_annotation_by_name(
+ epitope.neofox_annotations.annotations, name='number_of_mismatches')),
+ score_mut=epitope.rank_mutated,
+ score_wt=epitope.rank_wild_type,
+ mut_not_in_prot=bool(EpitopeHelper.get_annotation_by_name(
+ epitope.neofox_annotations.annotations, name='mutation_not_found_in_proteome'))),
+ name='Priority_score_fromRNA'),
+ AnnotationFactory.build_annotation(
+ value=self.calc_priority_score(
+ vaf=vaf_rna,
+ transcript_gene_expr=gene_exp,
no_mismatch=int(EpitopeHelper.get_annotation_by_name(
epitope.neofox_annotations.annotations, name='number_of_mismatches')),
score_mut=epitope.rank_mutated,
score_wt=epitope.rank_wild_type,
mut_not_in_prot=bool(EpitopeHelper.get_annotation_by_name(
epitope.neofox_annotations.annotations, name='mutation_not_found_in_proteome'))),
- name='Priority_score')
+ name='Priority_score_imputed_fromRNA'),
+
]
diff --git a/neofox/published_features/vaxrank/vaxrank.py b/neofox/published_features/vaxrank/vaxrank.py
index 21d317e4..ce163124 100755
--- a/neofox/published_features/vaxrank/vaxrank.py
+++ b/neofox/published_features/vaxrank/vaxrank.py
@@ -43,6 +43,8 @@ def logistic_epitope_score(
"The relationship between class I binding affinity
and immunogenicity of potential cytotoxic T cell epitopes.
adapted from: https://github.com/openvax/vaxrank/blob/master/vaxrank/epitope_prediction.py
+ Rubinsteyn, 2017, Front Immunol
+ https://doi.org/10.3389/fimmu.2017.01807
"""
if ic50 >= ic50_cutoff:
return 0.0
@@ -70,22 +72,22 @@ def total_binding(self, epitope_predictions: List[PredictedEpitope]):
return mut_scores_logistic
- def combined_score(self, expression_score, total_binding_score):
+ def combined_score(self, expression_imputed_score, total_binding_score):
"""
adapted from: https://github.com/openvax/vaxrank/blob/master/vaxrank/epitope_prediction.py
final ranking score implemented in VaxRank
"""
combined_score = None
try:
- combined_score = float(expression_score) * total_binding_score
+ combined_score = float(expression_imputed_score) * total_binding_score
except (ValueError, TypeError):
pass
return combined_score
- def get_annotations(self, epitope_predictions: List[PredictedEpitope], expression_score) -> List[Annotation]:
- expression_score = expression_score
+ def get_annotations(self, epitope_predictions: List[PredictedEpitope], expression_score, imputed_score) -> List[Annotation]:
total_binding_score = self.total_binding(epitope_predictions)
- ranking_score = self.combined_score(expression_score=expression_score, total_binding_score=total_binding_score)
+ ranking_score = self.combined_score(expression_imputed_score=expression_score, total_binding_score=total_binding_score)
+ ranking_score_imputed = self.combined_score(expression_imputed_score=imputed_score, total_binding_score=total_binding_score)
return [
AnnotationFactory.build_annotation(
value=total_binding_score, name="Vaxrank_bindingScore"
@@ -93,4 +95,7 @@ def get_annotations(self, epitope_predictions: List[PredictedEpitope], expressio
AnnotationFactory.build_annotation(
value=ranking_score, name="Vaxrank_totalScore"
),
+ AnnotationFactory.build_annotation(
+ value=ranking_score_imputed, name="Vaxrank_totalScore_imputed"
+ )
]
diff --git a/neofox/references/installer.py b/neofox/references/installer.py
index a41352c1..77a90c87 100644
--- a/neofox/references/installer.py
+++ b/neofox/references/installer.py
@@ -26,6 +26,7 @@
NETMHCPAN_AVAILABLE_ALLELES_MICE_FILE, NETMHC2PAN_AVAILABLE_ALLELES_MICE_FILE, MUS_MUSCULUS_FASTA,
PREFIX_MUS_MUSCULUS, MUS_MUSCULUS_PICKLE, IEDB_FASTA_MUS_MUSCULUS, IEDB_BLAST_PREFIX_HOMO_SAPIENS,
IEDB_BLAST_PREFIX_MUS_MUSCULUS, H2_DATABASE_AVAILABLE_ALLELES_FILE, RESOURCES_VERSIONS,
+ MIXMHC2PRED_PWM
)
from logzero import logger
@@ -41,13 +42,16 @@
IEDB_URL = 'http://www.iedb.org/downloader.php?file_name=doc/tcell_full_v3.zip'
+MIXMHC2PRED_PWM_MOUSE_URL = "http://ec2-18-188-210-66.us-east-2.compute.amazonaws.com:4000/data/PWMdef/PWMdef_Mouse.zip"
+
class NeofoxReferenceInstaller(object):
- def __init__(self, reference_folder, install_r_dependencies=False):
+ def __init__(self, reference_folder, install_r_dependencies=False, install_mouse_mixmhc2pred=False):
self.config = DependenciesConfigurationForInstaller()
self.runner = Runner()
self.reference_folder = reference_folder
self.install_r_dependencies = install_r_dependencies
+ self.install_mouse_mixmhc2pred = install_mouse_mixmhc2pred
def install(self):
# ensures the reference folder exists
@@ -65,14 +69,20 @@ def install(self):
self._install_r_dependencies()
else:
logger.warning("R dependencies will need to be installed manually")
+ mixmhc2pred_resources = []
+ if self.install_mouse_mixmhc2pred:
+ mixmhc2pred_resources = self._set_mixmhc2pred_pwms()
+ else:
+ logger.warning("MixMHC2pred mouse alleles have to be installed manually")
self._save_resources_versions(
iedb_resource=iedb_resource,
hla_resource=hla_resource,
- proteome_resources=proteome_resources
+ proteome_resources=proteome_resources,
+ mixmhc2pred_resources=mixmhc2pred_resources
)
def _save_resources_versions(
- self, iedb_resource, hla_resource, proteome_resources):
+ self, iedb_resource, hla_resource, proteome_resources, mixmhc2pred_resources):
download_timestamp = datetime.today().strftime('%Y%m%d%H%M%S')
resources_version_file = os.path.join(self.reference_folder, RESOURCES_VERSIONS)
@@ -81,15 +91,17 @@ def _save_resources_versions(
hla_resource.download_timestamp = download_timestamp
for r in proteome_resources:
r.download_timestamp = download_timestamp
+ for r in mixmhc2pred_resources:
+ r.download_timestamp = download_timestamp
resources_version = [
Resource(name="netMHCpan", version="4.1"),
Resource(name="netMHCIIpan", version="4.0"),
- Resource(name="mixMHCpred", version="2.1"),
- Resource(name="mixMHC2pred", version="1.2"),
+ Resource(name="mixMHCpred", version="2.2"),
+ Resource(name="mixMHC2pred", version="2.0.2"),
iedb_resource,
hla_resource
- ] + proteome_resources
+ ] + proteome_resources + mixmhc2pred_resources
json.dump([r.to_dict() for r in resources_version], open(resources_version_file, "w"), indent=4)
@@ -154,19 +166,12 @@ def _set_iedb(self):
os.makedirs(os.path.join(self.reference_folder, IEDB_FOLDER), exist_ok=True)
# download IEDB
- iedb_zip = os.path.join(self.reference_folder, IEDB_FOLDER, "Iedb.zip")
- cmd = 'wget "{}" -O {}'.format(IEDB_URL, iedb_zip)
- self._run_command(cmd)
-
- # unzip IEDB
path_to_iedb_folder = os.path.join(self.reference_folder, IEDB_FOLDER)
- cmd = "unzip -o {iedb_zip} -d {iedb_folder}".format(
- iedb_zip=iedb_zip, iedb_folder=path_to_iedb_folder
- )
+ tcell_full_iedb_file = os.path.join(path_to_iedb_folder, "tcell_full_v3.zip")
+ cmd = 'wget "{}" -O {}'.format(IEDB_URL, tcell_full_iedb_file)
self._run_command(cmd)
# transforms IEDB into fasta
- tcell_full_iedb_file = os.path.join(self.reference_folder, IEDB_FOLDER, "tcell_full_v3.csv")
hash = self._get_md5_hash(tcell_full_iedb_file)
iedb_builder = IedbFastaBuilder(tcell_full_iedb_file)
@@ -249,8 +254,6 @@ def _set_proteome(self):
url=MOUSE_PROTEOME_ISOFORMS, hash=hash_isoforms_mouse),
]
- return hash_human, hash_isoforms_human, version_human, hash_mouse, hash_isoforms_mouse, version_mouse
-
def _prepare_proteome(self, url, url_isoforms, version_url, proteome_file_name, proteome_prefix, proteome_pickle_file_name):
# download proteome
hash = self._download_and_unzip(proteome_file_name, url)
@@ -354,6 +357,32 @@ def _install_r_dependencies(self):
)
self._run_command(cmd)
+ def _set_mixmhc2pred_pwms(self):
+ # Downloads PWMs of other species than human from http://mixmhc2pred.gfellerlab.org/PWMdef
+ # Currently only mouse is supported and downloaded
+ logger.info("Installing MixMHC2pred for mouse...")
+
+ # reference folder path where the MixMHC2pred PWM directories are downloaded into
+ mixmhc2pred_pwm_path = os.path.join(self.reference_folder, MIXMHC2PRED_PWM)
+ os.makedirs(mixmhc2pred_pwm_path, exist_ok=True)
+ # the name of the zip file that will be downloaded
+ zip_file = os.path.basename(MIXMHC2PRED_PWM_MOUSE_URL)
+ pwm_zip_file = os.path.join(mixmhc2pred_pwm_path, zip_file)
+
+ url = MIXMHC2PRED_PWM_MOUSE_URL
+
+ # download the allele PWMs
+ cmd = f"wget {url} -O {pwm_zip_file}"
+ self._run_command(cmd)
+ hash = self._get_md5_hash(pwm_zip_file)
+ # unzip the downloaded PWMs
+ cmd = f"unzip -o {pwm_zip_file} -d {mixmhc2pred_pwm_path}"
+ self._run_command(cmd)
+
+ return [
+ Resource(name="MixMHC2pred_PWM_Mouse", url=MIXMHC2PRED_PWM_MOUSE_URL, hash=hash),
+ ]
+
def _run_command(self, cmd):
logger.info(cmd)
process = subprocess.Popen(
@@ -371,20 +400,25 @@ def __init__(self, input_file):
self.input_file = input_file
def build_fasta(self, organism, process_type, output_file):
+ # read IEDB header
+ iedb_head = pd.read_csv(self.input_file, nrows=2, header=None)
+ # combine the two header rows (table name and value) to get the unique column names
+ # e.g.: "Epitope:Name", "MHC Restriction:Name", "MHC Restriction:Class", ...
+ iedb_cols = iedb_head.apply(lambda col: f'{col[0]}:{col[1]}')
# read IEDB input file
- iedb = pd.read_csv(self.input_file, skiprows=1)
+ iedb = pd.read_csv(self.input_file, skiprows=2, names=iedb_cols)
# filter entries
filtered_iedb = iedb[
- (iedb["Name"].str.contains(organism))
- & (iedb["Object Type"] == "Linear peptide")
- & (iedb["Process Type"] == process_type)
- & (iedb["Qualitative Measure"] == "Positive")
- & (iedb["Class"] == "I")
+ (iedb["Host:Name"].str.contains(organism))
+ & (iedb["Epitope:Object Type"] == "Linear peptide")
+ & (iedb["1st in vivo Process:Process Type"] == process_type)
+ & (iedb["Assay:Qualitative Measurement"] == "Positive")
+ & (iedb["MHC Restriction:Class"] == "I")
]
# parses peptides and validates them, non-valid peptides are filtered out
- filtered_iedb.loc[:, "seq"] = filtered_iedb.loc[:, "Description"].transform(
+ filtered_iedb.loc[:, "seq"] = filtered_iedb.loc[:, "Epitope:Name"].transform(
lambda x: x.strip())
filtered_iedb.loc[:, "valid_peptide"] = filtered_iedb.loc[:, "seq"].transform(
lambda x: _verify_alphabet(Seq(x, IUPAC.protein)))
@@ -393,17 +427,17 @@ def build_fasta(self, organism, process_type, output_file):
# build fasta header: 449|FL-160-2 protein - Trypanosoma cruzi|JH0823|Trypanosoma cruzi|5693
# epitope id|Antigen Name|antigen_id|Organism Name|organism_id
filtered_iedb.loc[:, "epitope_id"] = filtered_iedb.loc[
- :, "Epitope IRI"
+ :, "Epitope:IEDB IRI"
].transform(lambda x: x.replace("http://www.iedb.org/epitope/", "", regex=True))
filtered_iedb.loc[:, "antigen_id"] = filtered_iedb.loc[
- :, "Antigen IRI"
+ :, "Epitope:Source Molecule IRI"
].transform(
lambda x: x.replace(
"http://www.ncbi.nlm.nih.gov/protein/", "", regex=True
).replace("https://ontology.iedb.org/ontology/", "", regex=True)
)
filtered_iedb.loc[:, "organism_id"] = filtered_iedb.loc[
- :, "Organism IRI"
+ :, "Epitope:Source Organism IRI"
].transform(
lambda x: x.replace(
"http://purl.obolibrary.org/obo/NCBITaxon_", "", regex=True
@@ -412,9 +446,9 @@ def build_fasta(self, organism, process_type, output_file):
filtered_iedb.loc[:, "fasta_header"] = filtered_iedb.apply(
lambda row: ">{epitope_id}|{antigen_name}|{antigen_id}|{organism_name}|{organism_id}".format(
epitope_id=str(row["epitope_id"]),
- antigen_name=row["Antigen Name"],
+ antigen_name=row["Epitope:Source Molecule"],
antigen_id=str(row["antigen_id"]),
- organism_name=row["Organism Name"],
+ organism_name=row["Epitope:Source Organism"],
organism_id=str(row["organism_id"]),
),
axis=1,
diff --git a/neofox/references/references.py b/neofox/references/references.py
index 94e09e04..20922716 100755
--- a/neofox/references/references.py
+++ b/neofox/references/references.py
@@ -77,7 +77,12 @@
HLA_DATABASE_AVAILABLE_ALLELES_FILE = "hla_database_allele_list.csv"
H2_DATABASE_AVAILABLE_ALLELES_FILE = "h2_database_allele_list.csv"
MIXMHCPRED_AVAILABLE_ALLELES_FILE = "allele_list.txt"
-MIXMHC2PRED_AVAILABLE_ALLELES_FILE = "Alleles_list.txt"
+MIXMHC2PRED_AVAILABLE_HUMAN_ALLELES_FILE = "PWMdef/Alleles_list_Human.txt"
+MIXMHC2PRED_PWM="MixMHC2pred"
+MIXMHC2PRED_AVAILABLE_MOUSE_PWM_DIR = "PWMdef_Mouse"
+MIXMHC2PRED_AVAILABLE_MOUSE_ALLELES_FILE = "Alleles_list_Mouse.txt"
+
+
PRIME_AVAILABLE_ALLELES_FILE = "alleles.txt"
RESOURCES_VERSIONS = "resources_versions.json"
@@ -127,13 +132,15 @@ def __init__(self):
self.blastp = self._check_and_load_binary(neofox.NEOFOX_BLASTP_ENV, default_value=DEFAULT_BLASTP)
self.mix_mhc2_pred = self._check_and_load_binary(
neofox.NEOFOX_MIXMHC2PRED_ENV, default_value=DEFAULT_MIXMHC2PRED, optional=True, path_search=False)
+
+ # set the available alleles for MixMHCpred
if self.mix_mhc2_pred is not None:
- self.mix_mhc2_pred_alleles_list = os.path.join(
- os.path.dirname(self.mix_mhc2_pred), MIXMHC2PRED_AVAILABLE_ALLELES_FILE)
- else:
- self.mix_mhc2_pred_alleles_list = None
+ self.mix_mhc2_pred_human_alleles_list = os.path.join(
+ os.path.dirname(self.mix_mhc2_pred), MIXMHC2PRED_AVAILABLE_HUMAN_ALLELES_FILE)
+
self.mix_mhc_pred = self._check_and_load_binary(
neofox.NEOFOX_MIXMHCPRED_ENV, default_value=DEFAULT_MIXMHCPRED, optional=True, path_search=False)
+
if self.mix_mhc_pred is not None:
self.mix_mhc_pred_alleles_list = os.path.join(
os.path.dirname(self.mix_mhc_pred), "lib", MIXMHCPRED_AVAILABLE_ALLELES_FILE)
@@ -183,7 +190,6 @@ def is_homo_sapiens(self):
def is_mus_musculus(self):
return self.organism == ORGANISM_MUS_MUSCULUS
-
class HlaDatabase(MhcDatabase):
organism = ORGANISM_HOMO_SAPIENS
@@ -280,6 +286,16 @@ def __init__(self, organism=ORGANISM_HOMO_SAPIENS, verbose=False):
self.mhc_database_filename,
self.resources_versions_file
]
+ # set MixMHC2pred specific paths for non human mode
+ if not organism == ORGANISM_HOMO_SAPIENS:
+ self.mixmhc2pred_pwm = self._get_reference_file_name(MIXMHC2PRED_PWM)
+ self.mixmhc2pred_alleles_list = self._get_mixmhc2pred_alleles_list()
+ self.mixmhc2pred_pwm_dir = self._get_mixmhc2pred_pwm_dir()
+ self.resources.extend([self.mixmhc2pred_alleles_list, self.mixmhc2pred_pwm_dir])
+ else:
+ self.mixmhc2pred_alleles_list = None
+ self.mixmhc2pred_pwm_dir = None
+
self._check_resources()
self.resources_versions = self.get_resources_versions()
if verbose:
@@ -384,6 +400,17 @@ def _log_configuration(self):
def _get_reference_file_name(self, file_name_suffix):
return os.path.join(self.reference_genome_folder, file_name_suffix)
+ def _get_mixmhc2pred_alleles_list(self):
+ if self.organism == ORGANISM_MUS_MUSCULUS:
+ return os.path.join(self.mixmhc2pred_pwm,
+ MIXMHC2PRED_AVAILABLE_MOUSE_PWM_DIR,
+ MIXMHC2PRED_AVAILABLE_MOUSE_ALLELES_FILE)
+
+ def _get_mixmhc2pred_pwm_dir(self):
+ if self.organism == ORGANISM_MUS_MUSCULUS:
+ return os.path.join(self.mixmhc2pred_pwm,
+ MIXMHC2PRED_AVAILABLE_MOUSE_PWM_DIR)
+
class AvailableAlleles(object):
def __init__(self, references):
diff --git a/neofox/tests/integration_tests/integration_test_tools.py b/neofox/tests/integration_tests/integration_test_tools.py
index 3d44baf5..53c60695 100755
--- a/neofox/tests/integration_tests/integration_test_tools.py
+++ b/neofox/tests/integration_tests/integration_test_tools.py
@@ -99,6 +99,13 @@ def get_h2_two_test(h2_database):
], h2_database
)
+def get_h2_two_test_b(h2_database):
+ return MhcFactory.build_mhc2_alleles(
+ [
+ "H2Ab",
+ "H2Ab",
+ ], h2_database
+ )
mutations_with_rare_aminoacids = [
("UTTDSDGKF", "UTTDSWGKF"), # this is an epitope from IEDB of length 9
@@ -196,7 +203,7 @@ def assert_neoepitope_mhci(self, original_neoepitope: PredictedEpitope, annotate
self.assert_annotation(annotated_neoepitope, annotation_name="hex_alignment_score")
# others to comes
- self.assert_annotation(annotated_neoepitope, annotation_name="Priority_score")
+ self.assert_annotation(annotated_neoepitope, annotation_name="Priority_score_fromDNA")
self.assert_annotation(annotated_neoepitope, annotation_name="Tcell_predictor")
def assert_neoepitope_mhcii(self, original_neoepitope: PredictedEpitope, annotated_neoepitope: PredictedEpitope):
diff --git a/neofox/tests/integration_tests/test_best_multiple_binder.py b/neofox/tests/integration_tests/test_best_multiple_binder.py
index d9e6693b..99c0b235 100755
--- a/neofox/tests/integration_tests/test_best_multiple_binder.py
+++ b/neofox/tests/integration_tests/test_best_multiple_binder.py
@@ -341,10 +341,10 @@ def test_generator_rate_mhcII(self):
position_of_mutation=mutation.position, predictions=predictions_wt
)
- paired_predictions = EpitopeHelper.pair_predictions(
+ paired_predictions = EpitopeHelper.pair_mhcii_predictions(
predictions=predicted_neoepitopes, predictions_wt=filtered_predictions_wt)
generator_rate_ADN = best_multiple.determine_number_of_alternative_binders(predictions=paired_predictions)
generator_rate_CDN = best_multiple.determine_number_of_binders(predictions=paired_predictions)
- self.assertEqual(generator_rate_ADN, 6)
+ self.assertEqual(generator_rate_ADN, 0)
self.assertEqual(generator_rate_CDN, 0)
diff --git a/neofox/tests/integration_tests/test_mixmhcpred.py b/neofox/tests/integration_tests/test_mixmhcpred.py
index ece5f62a..7a3cabd5 100755
--- a/neofox/tests/integration_tests/test_mixmhcpred.py
+++ b/neofox/tests/integration_tests/test_mixmhcpred.py
@@ -40,7 +40,8 @@ def setUp(self):
runner=self.runner, configuration=self.configuration, mhc_parser=mhc_parser
)
self.mixmhc2pred = MixMHC2pred(
- runner=self.runner, configuration=self.configuration, mhc_parser=mhc_parser
+ runner=self.runner, configuration=self.configuration, mhc_parser=mhc_parser,
+ references=self.references
)
self.hla_database = self.references.get_mhc_database()
self.test_mhc_one = integration_test_tools.get_hla_one_test(self.hla_database)
@@ -54,8 +55,8 @@ def test_mixmhcpred_epitope_iedb(self):
best_result = EpitopeHelper.select_best_by_affinity(
predictions=self.mixmhcpred.results, maximum=True)
self.assertEquals("NLVPMVATV", best_result.mutated_peptide)
- self.assertAlmostEqual(0.306957, best_result.affinity_mutated, delta=0.00001)
- self.assertEquals(0.6, best_result.rank_mutated)
+ self.assertAlmostEqual(0.107561, best_result.affinity_mutated, delta=0.00001)
+ self.assertEquals(0.0659342, best_result.rank_mutated)
self.assertEquals("HLA-A*02:01", best_result.allele_mhc_i.name)
def test_mixmhcpred_too_small_epitope(self):
@@ -81,8 +82,8 @@ def test_mixmhcpred_not_supported_allele(self):
best_result = EpitopeHelper.select_best_by_affinity(
predictions=self.mixmhcpred.results, maximum=True)
self.assertEqual('SIYGGLVLI', best_result.mutated_peptide)
- self.assertAlmostEqual(0.158294, best_result.affinity_mutated, places=5)
- self.assertEqual(1, best_result.rank_mutated)
+ self.assertAlmostEqual(-0.296735, best_result.affinity_mutated, places=5)
+ self.assertEqual(0.267446, best_result.rank_mutated)
self.assertEqual('HLA-A*02:01', best_result.allele_mhc_i.name)
def test_mixmhcpred_rare_aminoacid(self):
@@ -113,9 +114,9 @@ def test_mixmhcpred2_epitope_iedb(self):
uniprot=self.uniprot
)
best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results)
- self.assertEquals("DEVLGEPSQDILVT", best_result.mutated_peptide)
- self.assertEquals(3.06, best_result.rank_mutated)
- self.assertEquals("HLA-DPA1*01:03-DPB1*04:01", best_result.isoform_mhc_i_i.name)
+ self.assertEquals("TDQTRLEATISPET", best_result.mutated_peptide)
+ self.assertEquals(0.913, best_result.rank_mutated)
+ self.assertEquals("HLA-DPA1*01:03-DPB1*13:01", best_result.isoform_mhc_i_i.name)
def test_mixmhcpred2_epitope_iedb_forcing_no_drb1(self):
# this is an epitope from IEDB of length 15
@@ -128,9 +129,9 @@ def test_mixmhcpred2_epitope_iedb_forcing_no_drb1(self):
uniprot=self.uniprot
)
best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results)
- self.assertEquals("DEVLGEPSQDILVT", best_result.mutated_peptide)
- self.assertEquals(3.06, best_result.rank_mutated)
- self.assertEquals("HLA-DPA1*01:03-DPB1*04:01", best_result.isoform_mhc_i_i.name)
+ self.assertEquals("TDQTRLEATISPET", best_result.mutated_peptide)
+ self.assertEquals(0.913, best_result.rank_mutated)
+ self.assertEquals("HLA-DPA1*01:03-DPB1*13:01", best_result.isoform_mhc_i_i.name)
def test_mixmhcpred2_too_small_epitope(self):
neoantigen = get_neoantigen(mutated_xmer="ENPVVHFF", wild_type_xmer="ENPVVHFF")
@@ -179,9 +180,9 @@ def test_mixmhc2pred_allele(self):
logger.info(alleles)
self.mixmhc2pred.run(neoantigen=neoantigen, mhc=MHC_TWO_NEW, uniprot=self.uniprot)
best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results)
- self.assertIsNone(best_result.mutated_peptide)
- self.assertIsNone(best_result.rank_mutated)
- self.assertIsNone(best_result.isoform_mhc_i_i.name)
+ self.assertIsNotNone(best_result.mutated_peptide)
+ self.assertIsNotNone(best_result.rank_mutated)
+ self.assertIsNotNone(best_result.isoform_mhc_i_i.name)
def test_generate_nmers(self):
neoantigen = get_neoantigen(mutated_xmer="DDDDDVDDD", wild_type_xmer="DDDDDDDDD")
diff --git a/neofox/tests/integration_tests/test_mixmhcpred_mouse.py b/neofox/tests/integration_tests/test_mixmhcpred_mouse.py
new file mode 100755
index 00000000..8f9bd029
--- /dev/null
+++ b/neofox/tests/integration_tests/test_mixmhcpred_mouse.py
@@ -0,0 +1,122 @@
+#
+# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH.
+#
+# This file is part of Neofox
+# (see https://github.com/tron-bioinformatics/neofox).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .#
+from unittest import TestCase
+from logzero import logger
+
+from neofox.model.factories import MhcFactory
+from neofox.model.mhc_parser import MhcParser
+from neofox.model.neoantigen import Mhc2Name
+from neofox.helpers.epitope_helper import EpitopeHelper
+import neofox.tests.integration_tests.integration_test_tools as integration_test_tools
+from neofox.MHC_predictors.MixMHCpred.mixmhc2pred import MixMHC2pred
+from neofox.helpers.runner import Runner
+from neofox.annotation_resources.uniprot.uniprot import Uniprot
+from neofox.tests.tools import get_neoantigen
+from neofox.references.references import ReferenceFolder, DependenciesConfiguration, ORGANISM_HOMO_SAPIENS, \
+ ORGANISM_MUS_MUSCULUS
+
+
+class TestMixMHCPredMouse(TestCase):
+ def setUp(self):
+ self.references, self.configuration = integration_test_tools.load_references(organism=ORGANISM_MUS_MUSCULUS)
+ self.runner = Runner()
+ mhc_parser = MhcParser.get_mhc_parser(self.references.get_mhc_database())
+ #self.mixmhcpred = MixMHCpred(
+ # runner=self.runner, configuration=self.configuration, mhc_parser=mhc_parser
+ #)
+ self.mixmhc2pred = MixMHC2pred(
+ runner=self.runner, configuration=self.configuration, mhc_parser=mhc_parser,
+ references=self.references
+ )
+ self.hla_database = self.references.get_mhc_database()
+ #self.test_mhc_one = integration_test_tools.get_hla_one_test(self.hla_database)
+ self.test_mhc_two = integration_test_tools.get_h2_two_test(self.hla_database)
+ self.test_mhc_two_b = integration_test_tools.get_h2_two_test_b(self.hla_database)
+ self.uniprot = Uniprot(self.references.uniprot_pickle)
+
+ def test_mixmhcpred2_antigen_iedb_b_haplotype(self):
+ # Test mixmhc2pred with H2Ab allele (C57BL/6 setting)
+ # this is an antigen from IEDB of length 27
+ neoantigen = get_neoantigen(
+ mutated_xmer="RQHSIKEGLQFIQPPLSYPGTQEQYAV",
+ wild_type_xmer= "RQHSIKEGLQFIQSPLSYPGTQEQYAV")
+ self.mixmhc2pred.run(
+ neoantigen=neoantigen, mhc=self.test_mhc_two_b,
+ uniprot=self.uniprot
+ )
+
+ best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results)
+
+ self.assertEquals("QPPLSYPGTQEQYAV", best_result.mutated_peptide)
+ self.assertEquals(9.43, best_result.rank_mutated)
+ self.assertEquals("H2Ab", best_result.isoform_mhc_i_i.name)
+
+ def test_mixmhcpred2_antigen_iedb(self):
+ # Test mixmhc2pred with H2Ad and H2Ed allele (BALB/c setting)
+ # this is an antigen from IEDB of length 27
+ neoantigen = get_neoantigen(
+ mutated_xmer="RQHSIKEGLQFIQPPLSYPGTQEQYAV",
+ wild_type_xmer= "RQHSIKEGLQFIQSPLSYPGTQEQYAV")
+ self.mixmhc2pred.run(
+ neoantigen=neoantigen, mhc=self.test_mhc_two,
+ uniprot=self.uniprot
+ )
+
+ best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results)
+
+ self.assertEquals("KEGLQFIQPPLSYPG", best_result.mutated_peptide)
+ self.assertEquals(11.9, best_result.rank_mutated)
+ self.assertEquals("H2Ad", best_result.isoform_mhc_i_i.name)
+
+ def test_mixmhcpred2_no_mutation(self):
+ neoantigen = get_neoantigen(
+ mutated_xmer="RQHSIKEGLQFIQSPLSYPGTQEQYAV",
+ wild_type_xmer= "RQHSIKEGLQFIQSPLSYPGTQEQYAV")
+ self.mixmhc2pred.run(
+ neoantigen=neoantigen, mhc=self.test_mhc_two,
+ uniprot=self.uniprot
+ )
+
+ best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results)
+
+ self.assertIsNone(best_result.mutated_peptide)
+ self.assertIsNone(best_result.rank_mutated)
+ self.assertIsNone(best_result.isoform_mhc_i_i.name)
+
+ def test_mixmhc2pred_allele(self):
+ neoantigen = get_neoantigen(mutated_xmer="RQHSIKEGLQFIQPPLSYPGTQEQYAV", wild_type_xmer="RQHSIKEGLQFIQSPLSYPGTQEQYAV")
+ # this is a MHC II genotype which results in no available alleles for MixMHC2pred
+ MHC_TWO_NEW = MhcFactory.build_mhc2_alleles(
+ [
+ "H2Ab",
+ "H2Ad",
+ "H2Ed"
+ # this mouse allele is supported by MixMHC2pred but does not exist in H2 database
+ #"H2Anb1"
+ ],
+ self.hla_database
+ )
+ alleles = self.mixmhc2pred.transform_h2_alleles_for_prediction(MHC_TWO_NEW)
+ logger.info(alleles)
+ self.assertListEqual(alleles, ['H2_Aa_b__H2_Ab_b', 'H2_Aa_d__H2_Ab_d', 'H2_Ea_d__H2_Eb_d'])
+ self.mixmhc2pred.run(neoantigen=neoantigen, mhc=MHC_TWO_NEW, uniprot=self.uniprot)
+ best_result = EpitopeHelper.select_best_by_rank(predictions=self.mixmhc2pred.results)
+ self.assertIsNotNone(best_result.mutated_peptide)
+ self.assertIsNotNone(best_result.rank_mutated)
+ self.assertIsNotNone(best_result.isoform_mhc_i_i.name)
diff --git a/neofox/tests/integration_tests/test_neoag.py b/neofox/tests/integration_tests/test_neoag.py
deleted file mode 100755
index 35caf85d..00000000
--- a/neofox/tests/integration_tests/test_neoag.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# Copyright (c) 2020-2030 Translational Oncology at the Medical Center of the Johannes Gutenberg-University Mainz gGmbH.
-#
-# This file is part of Neofox
-# (see https://github.com/tron-bioinformatics/neofox).
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .#
-from unittest import TestCase
-
-from neofox.model.neoantigen import Annotation, PredictedEpitope, MhcAllele
-from neofox.published_features.neoag.neoag_gbm_model import NeoagCalculator
-from neofox.helpers.runner import Runner
-import neofox.tests.integration_tests.integration_test_tools as integration_test_tools
-from neofox.tests.tools import get_neoantigen
-
-
-class TestNeoantigenFitness(TestCase):
- def setUp(self):
- self.references, self.configuration = integration_test_tools.load_references()
- self.fastafile = integration_test_tools.create_temp_aminoacid_fasta_file()
- self.runner = Runner()
-
- def test_neoag(self):
-
- mutation = get_neoantigen(
- mutated_xmer= "DEVLGEPSQDILVTDQTRLEATISPET",
- wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET"
- )
- result = NeoagCalculator(
- runner=self.runner, configuration=self.configuration
- ).get_annotation(
- epitope_mhci=PredictedEpitope(
- mutated_peptide="ILVTDQTRL", wild_type_peptide="ILVIDQTRL",
- affinity_mutated=0, position=0, allele_mhc_i=MhcAllele(name="hla"), rank_mutated=0
- ),
- neoantigen=mutation,
- )
- self.assertTrue(isinstance(result, Annotation))
- self.assertTrue(float(result.value) > 0)
-
- def test_affinity_threshold(self):
- mutation = get_neoantigen(
- mutated_xmer="DEVLGEPSQDILVTDQTRLEATISPET",
- wild_type_xmer="DEVLGEPSQDILVIDQTRLEATISPET",
- )
- result = NeoagCalculator(
- runner=self.runner, configuration=self.configuration
- ).get_annotation(
- epitope_mhci=PredictedEpitope(
- mutated_peptide="DDDDDV", affinity_mutated=10, position=0, allele_mhc_i=MhcAllele(name="hla"),
- rank_mutated=0
- ),
- neoantigen=mutation
- )
- self.assertEqual(result.value, "NA")
diff --git a/neofox/tests/integration_tests/test_neoantigen_fitness.py b/neofox/tests/integration_tests/test_neoantigen_fitness.py
index 24d6decb..7a2651e7 100755
--- a/neofox/tests/integration_tests/test_neoantigen_fitness.py
+++ b/neofox/tests/integration_tests/test_neoantigen_fitness.py
@@ -48,7 +48,7 @@ def test_pathogen_similarity(self):
result = self.neoantigen_fitness_calculator.get_pathogen_similarity(
peptide="FIAGDAAIV"
)
- self.assertEqual(0.9923850668756105, result)
+ self.assertEqual(0.9961779812291349, result)
# tests a non pathogen sequence and expects 0 similarity
result = self.neoantigen_fitness_calculator.get_pathogen_similarity(
peptide="DDDDDMMDD"
diff --git a/neofox/tests/integration_tests/test_neoepitope_annotator.py b/neofox/tests/integration_tests/test_neoepitope_annotator.py
index f5954344..01f0c10e 100644
--- a/neofox/tests/integration_tests/test_neoepitope_annotator.py
+++ b/neofox/tests/integration_tests/test_neoepitope_annotator.py
@@ -61,7 +61,7 @@ def test_neoepitope_mhci_9mer_with_frequencies_and_gene(self):
annotated_neoepitope = self.annotator.get_annotated_neoepitope(neoepitope=neoepitope)
self.assert_neoepitope_mhci(original_neoepitope=neoepitope, annotated_neoepitope=annotated_neoepitope)
- self.assert_float_annotation(annotated_neoepitope, annotation_name="Priority_score")
+ self.assert_float_annotation(annotated_neoepitope, annotation_name="Priority_score_fromDNA")
self.assert_float_annotation(annotated_neoepitope, annotation_name="Tcell_predictor")
def test_neoepitope_mhci_10mer_no_tcell_predictor(self):
@@ -98,6 +98,7 @@ def test_neoepitope_mhci_without_dna_vaf(self):
mutated_peptide="DILVTDQTR",
wild_type_peptide="DILVIDQTR",
allele_mhc_i=self._get_test_mhci_allele('HLA-A*01:01'),
+ dna_variant_allele_frequency=None,
rna_variant_allele_frequency=0.1,
rna_expression=125,
gene='BRCA2'
@@ -107,15 +108,19 @@ def test_neoepitope_mhci_without_dna_vaf(self):
self.assert_neoepitope_mhci(original_neoepitope=neoepitope_with_dna_vaf,
annotated_neoepitope=annotated_neoepitope1)
+ print(neoepitope_with_dna_vaf)
+
annotated_neoepitope2 = self.annotator.get_annotated_neoepitope(neoepitope=neoepitope_without_dna_vaf)
self.assert_neoepitope_mhci(original_neoepitope=neoepitope_without_dna_vaf,
annotated_neoepitope=annotated_neoepitope2)
+ print(annotated_neoepitope2)
+
self.assertNotEqual(
EpitopeHelper.get_annotation_by_name(
- annotated_neoepitope1.neofox_annotations.annotations, "Priority_score"),
+ annotated_neoepitope1.neofox_annotations.annotations, "Priority_score_fromDNA"),
EpitopeHelper.get_annotation_by_name(
- annotated_neoepitope2.neofox_annotations.annotations, "Priority_score")
+ annotated_neoepitope2.neofox_annotations.annotations, "Priority_score_fromDNA")
)
def test_neoepitope_mhci_without_vaf(self):
@@ -137,7 +142,7 @@ def test_neoepitope_mhci_without_vaf(self):
self.assertEqual(
EpitopeHelper.get_annotation_by_name(
- annotated_neoepitope.neofox_annotations.annotations, "Priority_score"), "NA")
+ annotated_neoepitope.neofox_annotations.annotations, "Priority_score_fromDNA"), "NA")
def test_neoepitope_mhcii_annotation(self):
diff --git a/neofox/tests/integration_tests/test_neofox.py b/neofox/tests/integration_tests/test_neofox.py
index 4d885ec4..76a18bb9 100755
--- a/neofox/tests/integration_tests/test_neofox.py
+++ b/neofox/tests/integration_tests/test_neofox.py
@@ -204,7 +204,7 @@ def test_neofox_model_input(self):
).get_annotations()
self.assertEqual(5, len(annotations))
self.assertIsInstance(annotations[0], Neoantigen)
- self.assertTrue(len(annotations[0].neofox_annotations.annotations) == 82)
+ self.assertEqual(len(annotations[0].neofox_annotations.annotations), 104)
def test_neofox_without_mixmhcpreds(self):
"""
@@ -317,7 +317,7 @@ def test_neofox_with_config(self):
assert False
def test_neofox_without_mhc2(self):
- """"""
+
neoantigens, patients = self._get_test_data()
for p in patients:
p.mhc2 = []
@@ -328,7 +328,7 @@ def test_neofox_without_mhc2(self):
).get_annotations()
self.assertEqual(5, len(annotations))
self.assertIsInstance(annotations[0], Neoantigen)
- self.assertEqual(len(annotations[0].neofox_annotations.annotations), 63)
+ self.assertEqual(len(annotations[0].neofox_annotations.annotations), 79)
def test_neofox_without_mhc1(self):
neoantigens, patients = self._get_test_data()
@@ -341,7 +341,8 @@ def test_neofox_without_mhc1(self):
).get_annotations()
self.assertEqual(5, len(annotations))
self.assertIsInstance(annotations[0], Neoantigen)
- self.assertEqual(len(annotations[0].neofox_annotations.annotations), 39)
+ print(annotations[0].neofox_annotations.annotations)
+ self.assertEqual(len(annotations[0].neofox_annotations.annotations), 48)
def test_gene_expression_imputation(self):
neoantigens, patients = self._get_test_data()
@@ -408,6 +409,19 @@ def test_neoantigens_with_rna_expression(self):
for n in neofox.neoantigens:
self.assertEqual(n.rna_expression, 1.2)
+ def test_neoantigens_with_many_rna_expressions(self):
+ """"""
+ neoantigens, patients = self._get_test_data()
+ values = [1.2, 2, 3.1, 0.9, 4]
+ for n, val in zip(neoantigens, values):
+ n.rna_expression = val
+ neofox = NeoFox(
+ neoantigens=neoantigens,
+ patients=patients,
+ num_cpus=4
+ )
+ for n, val in zip(neofox.neoantigens, values):
+ self.assertEqual(n.rna_expression, val)
def test_patient_with_non_existing_allele_does_not_crash(self):
""""""
@@ -616,6 +630,7 @@ def test_with_all_neoepitopes(self):
self.assertTrue(found_recognition_potential)
+
df_epitopes_mhci = ModelConverter.annotations2epitopes_table(annotations, mhc=neofox.MHC_I)
self.assertFalse(any(c.startswith('isoformMhcII') for c in df_epitopes_mhci.columns))
diff --git a/neofox/tests/integration_tests/test_neofox_epitope.py b/neofox/tests/integration_tests/test_neofox_epitope.py
index 2316c3d5..059bf670 100755
--- a/neofox/tests/integration_tests/test_neofox_epitope.py
+++ b/neofox/tests/integration_tests/test_neofox_epitope.py
@@ -286,7 +286,9 @@ def test_with_expression_imputation(self):
for n, n2 in zip(neoepitopes, neofox_runner.neoepitopes):
self.assertIsNotNone(n2.imputed_gene_expression)
self.assertNotEqual(n2.imputed_gene_expression, 0)
- self.assertEqual(n2.imputed_gene_expression, n2.rna_expression)
+ self.assertNotEqual(n2.imputed_gene_expression, n2.rna_expression)
+ self.assertEqual(n.rna_expression, n2.rna_expression)
+ self.assertEqual(n.imputed_gene_expression, n2.imputed_gene_expression)
def _assert_neeoepitope(self, neoepitope: PredictedEpitope):
# netMHCpan or netMHC2pan annotations
diff --git a/neofox/tests/integration_tests/test_prime.py b/neofox/tests/integration_tests/test_prime.py
index fdeeb934..fd105575 100755
--- a/neofox/tests/integration_tests/test_prime.py
+++ b/neofox/tests/integration_tests/test_prime.py
@@ -45,8 +45,8 @@ def test_prime_epitope(self):
best_result = EpitopeHelper.select_best_by_affinity(
predictions=self.prime.results, maximum=True)
self.assertEquals("LVTDQTRL", best_result.mutated_peptide)
- self.assertAlmostEqual(0.163810, best_result.affinity_mutated, delta=0.00001)
- self.assertEquals(3.00, best_result.rank_mutated)
+ self.assertAlmostEqual(0.001858 , best_result.affinity_mutated, delta=0.00001)
+ self.assertEquals(18.992, best_result.rank_mutated)
self.assertEquals("HLA-C*05:01", best_result.allele_mhc_i.name)
def test_prime_too_small_epitope(self):
@@ -72,8 +72,8 @@ def test_prime_not_supported_allele(self):
best_result = EpitopeHelper.select_best_by_affinity(
predictions=self.prime.results, maximum=True)
self.assertEqual('SIYGGLVLI', best_result.mutated_peptide)
- self.assertEqual(0.186328, best_result.affinity_mutated)
- self.assertEqual(0.2, best_result.rank_mutated)
+ self.assertEqual(0.13728, best_result.affinity_mutated)
+ self.assertEqual(0.127, best_result.rank_mutated)
self.assertEqual('HLA-A*02:01', best_result.allele_mhc_i.name)
def test_prime_rare_aminoacid(self):
diff --git a/neofox/tests/synthetic_data/data_generator.py b/neofox/tests/synthetic_data/data_generator.py
index 1adb677a..caba2110 100644
--- a/neofox/tests/synthetic_data/data_generator.py
+++ b/neofox/tests/synthetic_data/data_generator.py
@@ -23,7 +23,7 @@ def __init__(self, reference_folder: ReferenceFolder, configuration: Dependencie
mhc1_alleles = mixmhcpred_alleles.union(netmhcpan_alleles)
mixmhc2pred_alleles = set(self.load_mhc2_alleles(
- MixMHC2pred(runner=None, configuration=configuration, mhc_parser=None).available_alleles))
+ MixMHC2pred(runner=None, configuration=configuration, mhc_parser=None, mhc_database=self.hla_database).available_alleles))
netmhc2pan_alleles = set(self.load_mhc2_alleles(
reference_folder.get_available_alleles().get_available_mhc_ii()))
mhc2_isoforms = mixmhc2pred_alleles.union(netmhc2pan_alleles)
diff --git a/neofox/tests/unit_tests/test_api.py b/neofox/tests/unit_tests/test_api.py
index 3a899f0a..d65373c8 100644
--- a/neofox/tests/unit_tests/test_api.py
+++ b/neofox/tests/unit_tests/test_api.py
@@ -75,19 +75,3 @@ def test_multiple_positions(self):
patient_identifier="123")
self.assertIsInstance(neoantigen, Neoantigen)
self.assertEqual(neoantigen.position, [7, 12])
-
- def test_insertion(self):
- neoantigen = NeoantigenFactory.build_neoantigen(
- mutated_xmer="aaaaaaaaaaaaaaa",
- wild_type_xmer="AAAAAAGAAAAgA",
- patient_identifier="123")
- self.assertIsInstance(neoantigen, Neoantigen)
- self.assertEqual(neoantigen.position, [7, 12])
-
- def test_deletion(self):
- neoantigen = NeoantigenFactory.build_neoantigen(
- mutated_xmer="aaaaaaaaaaaaa",
- wild_type_xmer="AAAAAAGAAAAgAAA",
- patient_identifier="123")
- self.assertIsInstance(neoantigen, Neoantigen)
- self.assertEqual(neoantigen.position, [7, 12])
diff --git a/neofox/tests/unit_tests/test_expression.py b/neofox/tests/unit_tests/test_expression.py
index dcd54f63..19e9f2e4 100755
--- a/neofox/tests/unit_tests/test_expression.py
+++ b/neofox/tests/unit_tests/test_expression.py
@@ -20,7 +20,6 @@
from unittest import TestCase
from neofox.model.factories import NOT_AVAILABLE_VALUE, NeoantigenFactory
-from neofox.model.neoantigen import Neoantigen
from neofox.published_features.expression import Expression
@@ -33,15 +32,20 @@ def test_calculate_expression_mutation(self):
neoantigen = NeoantigenFactory.build_neoantigen(
rna_expression=12.0, dna_variant_allele_frequency=0.2, patient_identifier="patient1",
mutated_xmer="DDDDD")
- result = self.expression.get_annotations(neoantigen=neoantigen)[0]
- self.assertGreater(float(result.value), 0.0)
+ results = self.expression.get_annotations(neoantigen=neoantigen)
+ result_rna = self.expression.get_annotations(neoantigen=neoantigen)[0]
+ result_dna = self.expression.get_annotations(neoantigen=neoantigen)[1]
+ self.assertEqual(result_rna.value, NOT_AVAILABLE_VALUE)
+ self.assertEqual(float(result_dna.value), 2.4)
# no reads for mut
neoantigen = NeoantigenFactory.build_neoantigen(
rna_expression=12.0, dna_variant_allele_frequency=0.0, patient_identifier="patient1",
mutated_xmer="DDDDD")
- result = self.expression.get_annotations(neoantigen=neoantigen)[0]
- self.assertEqual(result.value, "0")
+ result_rna = self.expression.get_annotations(neoantigen=neoantigen)[0]
+ result_dna = self.expression.get_annotations(neoantigen=neoantigen)[1]
+ self.assertEqual(float(result_dna.value), 0)
+ self.assertEqual(result_rna.value, NOT_AVAILABLE_VALUE)
# no reads for mut/wt
neoantigen = NeoantigenFactory.build_neoantigen(
@@ -49,9 +53,7 @@ def test_calculate_expression_mutation(self):
mutated_xmer="DDDDD")
result = self.expression.get_annotations(neoantigen=neoantigen)[0]
self.assertEqual(result.value, NOT_AVAILABLE_VALUE)
-
- neoantigen = NeoantigenFactory.build_neoantigen(
- rna_expression=None, dna_variant_allele_frequency=-1, rna_variant_allele_frequency=-1,
- patient_identifier="patient1", mutated_xmer="DDDDD")
- result = self.expression.get_annotations(neoantigen=neoantigen)[0]
+ result = self.expression.get_annotations(neoantigen=neoantigen)[1]
self.assertEqual(result.value, NOT_AVAILABLE_VALUE)
+
+
diff --git a/neofox/tests/unit_tests/test_model_converter.py b/neofox/tests/unit_tests/test_model_converter.py
index a79446b8..4d30a928 100755
--- a/neofox/tests/unit_tests/test_model_converter.py
+++ b/neofox/tests/unit_tests/test_model_converter.py
@@ -807,7 +807,7 @@ def test_candidate_neoepitopes2model(self):
)
with open(candidate_file) as f:
self.count_lines = len(f.readlines())
- neoepitopes = ModelConverter().parse_candidate_neoepitopes_file(candidate_file, self.hla_database)
+ neoepitopes = ModelConverter().parse_candidate_neoepitopes_file(candidate_file, self.hla_database, ORGANISM_HOMO_SAPIENS)
self.assertIsNotNone(neoepitopes)
self.assertEqual(self.count_lines -1, len(neoepitopes))
for n in neoepitopes:
@@ -821,7 +821,7 @@ def test_candidate_neoepitopes2model_with_patients(self):
with open(candidate_file) as f:
self.count_lines = len(f.readlines())
- neoepitopes = ModelConverter().parse_candidate_neoepitopes_file(candidate_file, self.hla_database)
+ neoepitopes = ModelConverter().parse_candidate_neoepitopes_file(candidate_file, self.hla_database, ORGANISM_HOMO_SAPIENS)
self.assertIsNotNone(neoepitopes)
self.assertEqual(self.count_lines -1, len(neoepitopes))
for n in neoepitopes:
diff --git a/neofox/tests/unit_tests/test_neofox.py b/neofox/tests/unit_tests/test_neofox.py
index f93f5d84..83eccc07 100755
--- a/neofox/tests/unit_tests/test_neofox.py
+++ b/neofox/tests/unit_tests/test_neofox.py
@@ -194,7 +194,9 @@ def test_with_expression_imputation(self):
for neoantigen, neoantigen_imputed in zip(original_neoantigens, neofox_runner.neoantigens):
self.assertIsNotNone(neoantigen_imputed.imputed_gene_expression)
if neoantigen.rna_expression is None:
- self.assertNotEqual(neoantigen.rna_expression, neoantigen_imputed.rna_expression)
+ #self.assertNotEqual(neoantigen.rna_expression, neoantigen_imputed.rna_expression)
+ self.assertTrue(neoantigen.rna_expression==neoantigen_imputed.rna_expression or
+ (neoantigen.rna_expression is None and neoantigen_imputed.rna_expression is None))
else:
self.assertEqual(neoantigen.rna_expression, neoantigen_imputed.rna_expression)
diff --git a/neofox/tests/unit_tests/test_priority_score.py b/neofox/tests/unit_tests/test_priority_score.py
index c6803443..c28530bf 100755
--- a/neofox/tests/unit_tests/test_priority_score.py
+++ b/neofox/tests/unit_tests/test_priority_score.py
@@ -19,46 +19,33 @@
from unittest import TestCase
from neofox.published_features.priority_score import PriorityScore
-
class TestPriorityScore(TestCase):
def setUp(self):
self.priority_calculator = PriorityScore()
def test_priority(self):
result = self.priority_calculator.calc_priority_score(
- vaf_dna=0.35,
- vaf_rna=0.33,
- transcript_expr=12,
- no_mismatch=1,
- score_mut=1.1,
- score_wt=10,
- mut_not_in_prot=True,
- )
- self.assertGreater(result, 0)
- result = self.priority_calculator.calc_priority_score(
- vaf_dna=None,
- vaf_rna=0.33,
- transcript_expr=12,
+ vaf=0.35,
+ transcript_gene_expr=12,
no_mismatch=1,
score_mut=1.1,
score_wt=10,
mut_not_in_prot=True,
)
self.assertGreater(result, 0)
+
result = self.priority_calculator.calc_priority_score(
- vaf_dna=0.35,
- vaf_rna=None,
- transcript_expr=12,
+ vaf=None,
+ transcript_gene_expr=12,
no_mismatch=1,
score_mut=1.1,
score_wt=10,
mut_not_in_prot=True,
)
- self.assertGreater(result, 0)
+ self.assertEqual(result, None)
result = self.priority_calculator.calc_priority_score(
- vaf_dna=None,
- vaf_rna=-1,
- transcript_expr=12,
+ vaf=-1,
+ transcript_gene_expr=12,
no_mismatch=1,
score_mut=1.1,
score_wt=10,
@@ -66,9 +53,8 @@ def test_priority(self):
)
self.assertEqual(result, None)
result = self.priority_calculator.calc_priority_score(
- vaf_dna=0.35,
- vaf_rna=0.33,
- transcript_expr=None,
+ vaf=0.35,
+ transcript_gene_expr=None,
no_mismatch=1,
score_mut=1.1,
score_wt=10,
@@ -76,12 +62,11 @@ def test_priority(self):
)
self.assertEqual(result, None)
result = self.priority_calculator.calc_priority_score(
- vaf_dna=0.35,
- vaf_rna=0.33,
- transcript_expr=None,
+ vaf=0.35,
+ transcript_gene_expr=500,
no_mismatch=1,
- score_mut=1.1,
+ score_mut=0.5,
score_wt=10,
mut_not_in_prot=True,
)
- self.assertEqual(result, None)
+ self.assertEqual(result, 0.34980652747707675)
diff --git a/setup.py b/setup.py
index ea4290f5..7643d65f 100755
--- a/setup.py
+++ b/setup.py
@@ -66,6 +66,6 @@
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: Unix"
],
- python_requires='>=3.6,<=3.8.12',
+ python_requires='>=3.6,<3.9',
license='GPLv3',
)