Skip to content

Commit

Permalink
Merge branch '306-removeNeoAg' into 'develop'
Browse files Browse the repository at this point in the history
Resolve "Remove NeoAg from NeoFox"

See merge request tron/addannot!258
  • Loading branch information
Nguyen-Hoang, Van committed Oct 19, 2023
2 parents d9ced09 + 1992828 commit 2a8843f
Show file tree
Hide file tree
Showing 17 changed files with 120 additions and 131 deletions.
1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
include neofox/published_features/neoag/neoag-master/*
include neofox/published_features/self_similarity/BLOSUM62-2.matrix.txt
include neofox/published_features/Tcell_predictor/amino-acids-features.pickle
include neofox/published_features/Tcell_predictor/genes-expression.pickle
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ NeoFox covers the following neoantigen features and prediction algorithms:
| Vaxrank | Rubinsteyn, 2017, Front Immunol | https://doi.org/10.3389/fimmu.2017.01807 |
| Priority score | Bjerregaard et al, 2017, Cancer Immunol Immunother. | https://doi.org/10.1007/s00262-017-2001-3 |
| Tcell predictor | Besser et al, 2019, Journal for ImmunoTherapy of Cancer | https://doi.org/10.1186/s40425-019-0595-z |
| neoag | Smith et al, 2019, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-19-0155 |
| PRIME § | Schmidt et al., 2021, Cell Reports Medicine | https://doi.org/10.1016/j.xcrm.2021.100194 |
| HEX § | Chiaro et al., 2021, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-20-0814 |

Expand Down
1 change: 0 additions & 1 deletion docs/source/01_overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ A list of implemented features and their references are given in Table 1. Please
| Vaxrank | Rubinsteyn, 2017, Front Immunol | https://doi.org/10.3389/fimmu.2017.01807 |
| Priority score | Bjerregaard et al., 2017, Cancer Immunol Immunother. | https://doi.org/10.1007/s00262-017-2001-3 |
| Tcell predictor | Besser et al., 2019, Journal for ImmunoTherapy of Cancer | https://doi.org/10.1186/s40425-019-0595-z |
| neoag | Smith et al., 2019, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-19-0155 |
| PRIME v2.0 § | Schmidt et al., 2021, Cell Reports Medicine | https://doi.org/10.1016/j.xcrm.2021.100194 |
| HEX § | Chiaro et al., 2021, Cancer Immunology Research | https://doi.org/10.1158/2326-6066.CIR-20-0814 |

Expand Down
1 change: 0 additions & 1 deletion docs/source/03_02_output_data.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ The following table describes each of the annotations in the output:
| Priority_score_fromRNA | combinatorial score of several features such as MHC binding, transcription expression and VAF in RNA | Priority score |
| Priority_score_imputed_fromDNA | combinatorial score of several features such as MHC binding, imputed gene expression and VAF in DNA | Priority score |
| Priority_score_imputed_fromRNA | combinatorial score of several features such as MHC binding, imputed gene expression and VAF in RNA | Priority score |
| Neoag_immunogenicity | output score of neoag model | neoag |
| IEDB_Immunogenicity_MHCI | IEDB Immunogenicity score for `NetMHCpan_bestAffinity_peptide` | IEDB Immunogenicity |
| IEDB_Immunogenicity_MHCII | IEDB Immunogenicity score for `NetMHCIIpan_bestAffinity_peptide` | IEDB Immunogenicity |
| MixMHCpred_bestScore_peptide | MHC class I neoepitope candidate sequence with maximum MixMHCpred score over all neoepitope canidates (8-11mers) and MHC I alleles | MixMHCpred |
Expand Down
97 changes: 75 additions & 22 deletions neofox/MHC_predictors/MixMHCpred/mixmhc2pred.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from neofox.helpers.epitope_helper import EpitopeHelper
from neofox.model.mhc_parser import MhcParser, get_alleles_by_gene

from neofox.references.references import DependenciesConfiguration
from neofox.references.references import DependenciesConfiguration, MhcDatabase

from neofox.helpers.runner import Runner

Expand All @@ -46,23 +46,34 @@ class MixMHC2pred:
ANNOTATION_PREFIX = 'MixMHC2pred'
ANNOTATION_PREFIX_WT = 'MixMHC2pred_WT'

def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc_parser: MhcParser):
def __init__(self, runner: Runner, configuration: DependenciesConfiguration, mhc_parser: MhcParser,
mhc_database: MhcDatabase):
self.runner = runner
self.configuration = configuration
self.available_alleles = self._load_available_alleles()
self.mhc_database = mhc_database
self.mhc_parser = mhc_parser
self.available_alleles = self._load_available_alleles(mhc_database)

self.results = None

def _load_available_alleles(self):
def _load_available_alleles(self, mhc_database):
"""
loads file with available HLA II alllels for MixMHC2pred prediction, returns set
:return:
"""

alleles = pd.read_csv(
self.configuration.mix_mhc2_pred_alleles_list, skiprows=2, sep="\t"
)
if mhc_database.is_homo_sapiens():
alleles = pd.read_csv(
self.configuration.mix_mhc2_pred_human_alleles_list, skiprows=2, sep="\t"
)
# run only
else:
# to test if the required PWMdef folder for mouse is downloaded
if self.configuration.mix_mhc2_pred_mouse_alleles_list is not None:
alleles = pd.read_csv(
self.configuration.mix_mhc2_pred_mouse_alleles_list, skiprows=2, sep="\t"
)
else:
logger.warning("The PWMdef folder of mouse has not been downloaded.")
return list(alleles["AlleleName"])


Expand All @@ -87,7 +98,8 @@ def _combine_dq_dp_alleles(alpha_alleles: List[str], beta_alleles: List[str]):
return alleles_pairs + alleles_triplets

@staticmethod
def _get_mixmhc2_allele_representation(hla_alleles: List[MhcAllele]):
def _get_mixmhc2_allele_human_representation(hla_alleles: List[MhcAllele]):
# alleles: hla_alleles
return list(
map(
lambda x: "{gene}_{group}_{protein}".format(
Expand All @@ -98,12 +110,12 @@ def _get_mixmhc2_allele_representation(hla_alleles: List[MhcAllele]):
)

@staticmethod
def _get_mixmhc2_isoform_representation(isoform: Mhc2Isoform):
def _get_mixmhc2_isoform_human_representation(isoform: Mhc2Isoform):

beta_chain = MixMHC2pred._get_mixmhc2_allele_representation([isoform.beta_chain])[0]
beta_chain = MixMHC2pred._get_mixmhc2_allele_human_representation([isoform.beta_chain])[0]
if isoform.alpha_chain is not None and isoform.alpha_chain.name:
# for DR only beta chain is provided
alpha_chain = MixMHC2pred._get_mixmhc2_allele_representation([isoform.alpha_chain])[0]
alpha_chain = MixMHC2pred._get_mixmhc2_allele_human_representation([isoform.alpha_chain])[0]
return "{alpha}__{beta}".format(alpha=alpha_chain, beta=beta_chain)
return beta_chain

Expand All @@ -118,22 +130,49 @@ def transform_hla_ii_alleles_for_prediction(self, mhc: List[Mhc2]) -> List[str]:
dqb1_alleles = get_alleles_by_gene(mhc, Mhc2GeneName.DQB1)

dp_allele_combinations = self._combine_dq_dp_alleles(
alpha_alleles=self._get_mixmhc2_allele_representation(dpa1_alleles),
beta_alleles=self._get_mixmhc2_allele_representation(dpb1_alleles)
alpha_alleles=self._get_mixmhc2_allele_human_representation(dpa1_alleles),
beta_alleles=self._get_mixmhc2_allele_human_representation(dpb1_alleles)
)
dq_allele_combinations = self._combine_dq_dp_alleles(
alpha_alleles=self._get_mixmhc2_allele_representation(dqa1_alleles),
beta_alleles=self._get_mixmhc2_allele_representation(dqb1_alleles)
alpha_alleles=self._get_mixmhc2_allele_human_representation(dqa1_alleles),
beta_alleles=self._get_mixmhc2_allele_human_representation(dqb1_alleles)
)

return [
a
for a in self._get_mixmhc2_allele_representation(drb1_alleles)
for a in self._get_mixmhc2_allele_human_representation(drb1_alleles)
+ dq_allele_combinations
+ dp_allele_combinations
if a in self.available_alleles
]

@staticmethod
def _get_mixmhc2_allele_mouse_representation(h2_alleles: List[MhcAllele]):
return list(
map(
lambda x: "H2_{gene}a_{protein}__H2_{gene}b_{protein}".format(
gene=x.gene[-1], protein=x.protein
),
h2_alleles,
)
)

def _get_mixmhc2_isoform_mouse_representation(isoform: Mhc2Isoform):
if isoform is not None:
return "H2_{gene}a_{protein}__H2_{gene}b_{protein}".format(gene=isoform[-3], protein=isoform[-1])

def transform_h2_alleles_for_prediction(self, mhc:List[Mhc2]) -> List[str]:
"""
prepares list of H2 alleles for prediction in required format
"""

h2a_alleles = get_alleles_by_gene(mhc, Mhc2GeneName.H2A)
h2e_alleles = get_alleles_by_gene(mhc, Mhc2GeneName.H2E)

return [
a for i in (h2a_alleles, h2e_alleles) for a in self._get_mixmhc2_allele_mouse_representation(i) if a in self.available_alleles
]

def _parse_mixmhc2pred_output(self, filename: str) -> List[PredictedEpitope]:

parsed_results = []
Expand All @@ -157,10 +196,15 @@ def _parse_mixmhc2pred_output(self, filename: str) -> List[PredictedEpitope]:
return parsed_results

def _mixmhc2prediction(self, isoforms: List[str], potential_ligand_sequences: List[str]) -> List[PredictedEpitope]:


# TODO: define the pwm_path again because the mouse path is only defined by the config
tmptxt = intermediate_files.create_temp_mixmhc2pred(potential_ligand_sequences, prefix="tmp_sequence_")
outtmp = intermediate_files.create_temp_file(prefix="mixmhc2pred", suffix=".txt")

if self.mhc_database.is_homo_sapiens():
pwm_path = os.path.dirname(self.configuration.mix_mhc2_pred_human_alleles_list)
else:
#pwm_path = '/home/nguyenhv/code/MixMHC2pred/2.0/PWMdef/PWMdef_Mouse/' # reference folder
pwm_path = os.path.dirname(self.configuration.mix_mhc2_pred_mouse_alleles_list)
cmd = [
self.configuration.mix_mhc2_pred,
"-a",
Expand All @@ -169,6 +213,8 @@ def _mixmhc2prediction(self, isoforms: List[str], potential_ligand_sequences: Li
tmptxt,
"-o",
outtmp,
"-f",
pwm_path,
"--no_context"
]
self.runner.run_command(cmd)
Expand All @@ -187,10 +233,14 @@ def run(self, mhc: List[Mhc2], neoantigen: Neoantigen, uniprot):
self.results = None

potential_ligand_sequences = EpitopeHelper.generate_nmers(
neoantigen=neoantigen, lengths=[12,13, 14, 15, 16, 17, 18, 19, 20, 21], uniprot=uniprot)
neoantigen=neoantigen, lengths=[12, 13, 14, 15, 16, 17, 18, 19, 20, 21], uniprot=uniprot)

if len(potential_ligand_sequences) > 0:
mhc2_alleles = self.transform_hla_ii_alleles_for_prediction(mhc)
if self.mhc_database.is_homo_sapiens():
mhc2_alleles = self.transform_hla_ii_alleles_for_prediction(mhc)
else:
mhc2_alleles = self.transform_h2_alleles_for_prediction(mhc)

if len(mhc2_alleles) > 0:
self.results = self._mixmhc2prediction(
isoforms=mhc2_alleles, potential_ligand_sequences=potential_ligand_sequences)
Expand All @@ -203,7 +253,10 @@ def run_peptide(self, peptide: str, isoform: Mhc2Isoform) -> PredictedEpitope:
Performs MixMHC2pred prediction for desired hla allele and writes result to temporary file.
"""
result = None
isoform_representation = self._get_mixmhc2_isoform_representation(isoform)
if self.mhc_database.is_homo_sapiens():
isoform_representation = self._get_mixmhc2_isoform_human_representation(isoform)
else:
isoform_representation = self._get_mixmhc2_isoform_mouse_representation(isoform)
if isoform_representation in self.available_alleles:
results = self._mixmhc2prediction(
isoforms=[isoform_representation],
Expand Down
2 changes: 1 addition & 1 deletion neofox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.#


VERSION = "1.1.0b28"
VERSION = "1.1.0b29"

REFERENCE_FOLDER_ENV = "NEOFOX_REFERENCE_FOLDER"
NEOFOX_BLASTP_ENV = "NEOFOX_BLASTP"
Expand Down
10 changes: 0 additions & 10 deletions neofox/annotator/neoantigen_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from neofox.model.factories import AnnotationFactory
from neofox.model.mhc_parser import MhcParser
from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction
from neofox.published_features.neoag.neoag_gbm_model import NeoagCalculator
from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator
from neofox.published_features.expression import Expression
from neofox.model.neoantigen import Patient, Neoantigen, Annotations, PredictedEpitope
Expand All @@ -57,7 +56,6 @@ def __init__(self, references: ReferenceFolder, configuration: DependenciesConfi
self.rank_mhcii_threshold = rank_mhcii_threshold

# NOTE: these resources do not read any file thus can be initialised fast
self.neoag_calculator = NeoagCalculator(runner=self.runner, configuration=configuration)
self.expression_calculator = Expression()
self.mhc_database = references.get_mhc_database()
self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database)
Expand Down Expand Up @@ -193,14 +191,6 @@ def get_annotated_neoantigen(self, neoantigen: Neoantigen, patient: Patient, wit
)
)

# neoag immunogenicity model
if netmhcpan and netmhcpan.best_epitope_by_affinity:
neoantigen.neofox_annotations.annotations.append(
self.neoag_calculator.get_annotation(
epitope_mhci=netmhcpan.best_epitope_by_affinity,
neoantigen=neoantigen)
)

# IEDB immunogenicity
if self.organism == ORGANISM_HOMO_SAPIENS:
neoantigen.neofox_annotations.annotations.extend(
Expand Down
25 changes: 15 additions & 10 deletions neofox/annotator/neoantigen_mhc_binding_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from neofox.helpers.runner import Runner
from neofox.model.mhc_parser import MhcParser
from neofox.model.neoantigen import Neoantigen, Patient
from neofox.references.references import DependenciesConfiguration, AvailableAlleles, ReferenceFolder, \
from neofox.references.references import DependenciesConfiguration, AvailableAlleles, ReferenceFolder, MhcDatabase, \
ORGANISM_HOMO_SAPIENS


Expand Down Expand Up @@ -56,16 +56,20 @@ def get_mhc_binding_annotations(self, neoantigen: Neoantigen, patient: Patient):
neoantigen,
patient
)

if self.configuration.mix_mhc2_pred is not None and has_mhc2:
mixmhc2pred = self._run_mixmhc2pred(
self.runner,
self.configuration,
self.mhc_parser,
neoantigen,
patient,
self.mhc_database
)

# avoids running MixMHCpred and PRIME for non human organisms
if self.organism == ORGANISM_HOMO_SAPIENS:
if self.configuration.mix_mhc2_pred is not None and has_mhc2:
mixmhc2pred = self._run_mixmhc2pred(
self.runner,
self.configuration,
self.mhc_parser,
neoantigen,
patient,
)

if self.configuration.mix_mhc_pred is not None and has_mhc1:
mixmhcpred = self._run_mixmhcpred(
self.runner,
Expand Down Expand Up @@ -155,7 +159,8 @@ def _run_mixmhc2pred(
mhc_parser: MhcParser,
neoantigen: Neoantigen,
patient: Patient,
mhc_database: MhcDatabase
):
mixmhc2 = MixMHC2pred(runner, configuration, mhc_parser)
mixmhc2 = MixMHC2pred(runner, configuration, mhc_parser, mhc_database)
mixmhc2.run(mhc=patient.mhc2, neoantigen=neoantigen, uniprot=self.uniprot)
return mixmhc2
2 changes: 0 additions & 2 deletions neofox/annotator/neoepitope_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from neofox.model.factories import AnnotationFactory
from neofox.model.mhc_parser import MhcParser
from neofox.published_features.Tcell_predictor.tcellpredictor_wrapper import TcellPrediction
from neofox.published_features.neoag.neoag_gbm_model import NeoagCalculator
from neofox.published_features.self_similarity.self_similarity import SelfSimilarityCalculator
from neofox.published_features.expression import Expression
from neofox.model.neoantigen import Patient, Neoantigen, Annotations, PredictedEpitope
Expand All @@ -54,7 +53,6 @@ def __init__(self, references: ReferenceFolder, configuration: DependenciesConfi
self.available_alleles = references.get_available_alleles()

# NOTE: these resources do not read any file thus can be initialised fast
self.neoag_calculator = NeoagCalculator(runner=self.runner, configuration=configuration)
self.mhc_database = references.get_mhc_database()
self.mhc_parser = MhcParser.get_mhc_parser(self.mhc_database)

Expand Down
Loading

0 comments on commit 2a8843f

Please sign in to comment.