From 298cb39c06aafe38fa1bb1b96b0f8cc413d18da2 Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Fri, 20 Jan 2023 14:50:11 +0100 Subject: [PATCH 1/4] bump version --- neofox/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neofox/__init__.py b/neofox/__init__.py index 7f60eabd..ba3c6051 100755 --- a/neofox/__init__.py +++ b/neofox/__init__.py @@ -18,7 +18,7 @@ # along with this program. If not, see .# -VERSION = "1.0.4" +VERSION = "1.0.6" REFERENCE_FOLDER_ENV = "NEOFOX_REFERENCE_FOLDER" From d463234fc99b4b944a23c0a7cfc472755656f403 Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Fri, 20 Jan 2023 22:26:45 +0100 Subject: [PATCH 2/4] first hex implementation in python --- neofox/tests/integration_tests/test_hex.py | 26 ++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/neofox/tests/integration_tests/test_hex.py b/neofox/tests/integration_tests/test_hex.py index d6a24a96..c421b71d 100755 --- a/neofox/tests/integration_tests/test_hex.py +++ b/neofox/tests/integration_tests/test_hex.py @@ -22,7 +22,8 @@ from neofox.helpers.runner import Runner import neofox.tests.integration_tests.integration_test_tools as integration_test_tools - +from neofox.published_features.hex.pyhex import PyHex +from Bio.Alphabet.IUPAC import ExtendedIUPACProtein class TestHex(TestCase): @@ -30,15 +31,32 @@ def setUp(self): self.references, self.configuration = integration_test_tools.load_references() self.runner = Runner() - def test_hex(self): res = Hex( runner=self.runner, configuration=self.configuration, references=self.references ).apply_hex( mut_peptide="FGLAIDVDD" ) - logger.info(res) - self.assertEqual(float(res), 148) + self.assertEqual(int(res), 148) + + def test_pyhex(self): + pyhex = PyHex(iedb_fasta=self.references.get_iedb_fasta()) + res = pyhex.run("FGLAIDVDD") + self.assertEqual(res, 148) + + def test_comparison(self): + for i in range(100): + for k in range(9, 30): + peptide = integration_test_tools.get_random_kmer(k=k) + logger.info(peptide) + res = Hex( + runner=self.runner, configuration=self.configuration, references=self.references + ).apply_hex( + mut_peptide=peptide + ) + pyhex = PyHex(iedb_fasta=self.references.get_iedb_fasta()) + res_pyhex = pyhex.run(peptide) + self.assertEqual(float(res), res_pyhex, "Peptide: {}".format(peptide)) From 1b2e77eb8249d17f48c034fd22b1233f507e1021 Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Fri, 20 Jan 2023 22:26:52 +0100 Subject: [PATCH 3/4] first hex implementation in python --- neofox/published_features/hex/pyhex.py | 51 ++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 neofox/published_features/hex/pyhex.py diff --git a/neofox/published_features/hex/pyhex.py b/neofox/published_features/hex/pyhex.py new file mode 100644 index 00000000..9db1e6d6 --- /dev/null +++ b/neofox/published_features/hex/pyhex.py @@ -0,0 +1,51 @@ +from math import ceil, floor + +from Bio import SeqIO +from Bio.Align import substitution_matrices +from Bio.Alphabet.IUPAC import ExtendedIUPACProtein + + +class PyHex: + + def __init__(self, iedb_fasta, magic_number=4): + self.iedb_sequences = self._read_fasta(iedb_fasta) + self.magic_number = magic_number + self.blosum = substitution_matrices.load("BLOSUM62") + + @staticmethod + def _read_fasta(fasta_file): + sequences = [] + # read fasta + with open(fasta_file, "r") as handle: + for record in SeqIO.parse(handle, "fasta"): + # include only records that do not contain non-standard amino acids + if not any([aa not in ExtendedIUPACProtein.letters for aa in record.seq]): + sequences.append(record) + return sequences + + def _align(self, sequence, mutated_sequence): + weights = self._get_sequence_weights(mutated_sequence) + score = sum([self.blosum[q, t] * w for q, t, w in zip(sequence, mutated_sequence, weights)]) + return score + + def _get_sequence_weights(self, mutated_sequence): + length_mutated_sequence = len(mutated_sequence) + mid_score = ceil(length_mutated_sequence / 2) * self.magic_number + weights = list(range(1, mid_score, self.magic_number)) + weights.extend(reversed(weights[0:floor(length_mutated_sequence / 2)])) + + top_floor = floor(length_mutated_sequence / 3) + weights[0:top_floor] = list(range(1, top_floor + 1)) + tail = length_mutated_sequence - top_floor + weights[tail:length_mutated_sequence] = list(reversed(range(1, top_floor + 1))) + + return weights + + def run(self, mutated_sequence): + # excludes sequences that have different length than the mutated sequence + sequences = [s for s in self.iedb_sequences if len(s.seq) == len(mutated_sequence)] + # align each of the sequences + alignment_scores = [self._align(s.seq, mutated_sequence) for s in sequences] + # gets the best score of all the alignments + best_score = max(alignment_scores) + return best_score From b879359efed5c758fab4187375ec326f3380c865 Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Fri, 20 Jan 2023 22:36:57 +0100 Subject: [PATCH 4/4] remove R code and dependencies for R --- neofox/annotator/abstract_annotator.py | 2 +- neofox/published_features/hex/BLOSUM62.rda | Bin 532 -> 0 bytes neofox/published_features/hex/hex.py | 19 ++++--------------- neofox/references/install_r_dependencies.R | 6 ------ neofox/tests/integration_tests/test_hex.py | 15 +++------------ 5 files changed, 8 insertions(+), 34 deletions(-) delete mode 100644 neofox/published_features/hex/BLOSUM62.rda diff --git a/neofox/annotator/abstract_annotator.py b/neofox/annotator/abstract_annotator.py index fb08f2ff..20569016 100644 --- a/neofox/annotator/abstract_annotator.py +++ b/neofox/annotator/abstract_annotator.py @@ -53,7 +53,7 @@ def __init__( self.priority_score_calculator = PriorityScore() self.iedb_immunogenicity = IEDBimmunogenicity() self.amplitude = Amplitude() - self.hex = Hex(runner=self.runner, configuration=configuration, references=references) + self.hex = Hex(references=references) def get_additional_annotations_neoepitope_mhci( self, epitope: PredictedEpitope, neoantigen: Neoantigen = None) -> PredictedEpitope: diff --git a/neofox/published_features/hex/BLOSUM62.rda b/neofox/published_features/hex/BLOSUM62.rda deleted file mode 100644 index 88991e871ceb3b9b95b660715aee3ea33548af20..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 532 zcmV+v0_*)kT4*^jL0KkKS>d~Rg8%}F|A7Dg$RGd^5dc5`|L~?GC;$Klzyfl(bT9~~ zrlUqpLlL6_10cd=(@SKm$MkWElW6P-&_oKr|XO28=<8f@EN72vUke)Bpg} zO#o=xjj6Oi&Hz&D;2|6;AaE6FL%hZw_mEl2(+rY9*Z~?c8Fc1_C0JVt0o7w;!I1cJbV`~ zD4NirKo2LA5qsBWL{q~cY=z!xbDr~*{F_gi^*IlrwELS*V`;S7a~qC>oas7E=dtym z9Egkv2J4m%Ii%H))fVXqMos%I&KV$U)*l1XP+GSZkq~8B{!05>&L(E|M8E6sar~5>n1Jsn#-ES zcVTNy7!UVnmKKHO?7c^a9BjD&~&S8LD@}l%(Ds% z8pshRYYMEWkPB>#5H6EqnXsg2M3JJdO|p>$U}Q2(d#N-r*0#xqkc1AA266FR-xy4! Wj<*rn4T}U%_`8xR!i0wn+sqjCE8x`t diff --git a/neofox/published_features/hex/hex.py b/neofox/published_features/hex/hex.py index 116dfb5c..e1d42df9 100755 --- a/neofox/published_features/hex/hex.py +++ b/neofox/published_features/hex/hex.py @@ -18,33 +18,22 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see .# from typing import List -import os from neofox.model.neoantigen import Annotation, PredictedEpitope from neofox.model.factories import AnnotationFactory +from neofox.published_features.hex.pyhex import PyHex from neofox.references.references import ReferenceFolder class Hex(object): - def __init__(self, references: ReferenceFolder, runner, configuration): - """ - :type runner: neofox.helpers.runner.Runner - :type configuration: neofox.references.DependenciesConfiguration - """ - self.runner = runner - self.configuration = configuration + def __init__(self, references: ReferenceFolder): self.iedb_fasta = references.get_iedb_fasta() + self.pyhex = PyHex(self.iedb_fasta) def apply_hex(self, mut_peptide): """this function calls hex tool. this tool analyses the neoepitope candidate sequence for molecular mimicry to viral epitopes """ - my_path = os.path.abspath(os.path.dirname(__file__)) - tool_path = os.path.join(my_path, "hex.R") - cmd = [self.configuration.rscript, tool_path, mut_peptide, self.iedb_fasta, my_path] - output, _ = self.runner.run_command(cmd) - if output == "": - output = None - return output + return self.pyhex.run(mut_peptide) def get_annotation( self, mutated_peptide_mhci: PredictedEpitope, mutated_peptide_mhcii: PredictedEpitope) -> List[Annotation]: diff --git a/neofox/references/install_r_dependencies.R b/neofox/references/install_r_dependencies.R index 7b906a14..2d8f6bc4 100644 --- a/neofox/references/install_r_dependencies.R +++ b/neofox/references/install_r_dependencies.R @@ -1,9 +1,3 @@ -install.packages("lattice", repo="http://cran.rstudio.com/") -install.packages("ggplot2", repo="http://cran.rstudio.com/") install.packages("caret", repo="http://cran.rstudio.com/") install.packages("Peptides", repo="http://cran.rstudio.com/") install.packages("doParallel", repo="http://cran.rstudio.com/") -install.packages("gbm", repo="http://cran.rstudio.com/") -if (!requireNamespace("BiocManager", quietly = TRUE)) - install.packages("BiocManager") -BiocManager::install("Biostrings") \ No newline at end of file diff --git a/neofox/tests/integration_tests/test_hex.py b/neofox/tests/integration_tests/test_hex.py index c421b71d..7684b0b6 100755 --- a/neofox/tests/integration_tests/test_hex.py +++ b/neofox/tests/integration_tests/test_hex.py @@ -23,7 +23,6 @@ import neofox.tests.integration_tests.integration_test_tools as integration_test_tools from neofox.published_features.hex.pyhex import PyHex -from Bio.Alphabet.IUPAC import ExtendedIUPACProtein class TestHex(TestCase): @@ -32,11 +31,7 @@ def setUp(self): self.runner = Runner() def test_hex(self): - res = Hex( - runner=self.runner, configuration=self.configuration, references=self.references - ).apply_hex( - mut_peptide="FGLAIDVDD" - ) + res = Hex(references=self.references).apply_hex(mut_peptide="FGLAIDVDD") self.assertEqual(int(res), 148) def test_pyhex(self): @@ -45,15 +40,11 @@ def test_pyhex(self): self.assertEqual(res, 148) def test_comparison(self): - for i in range(100): + for i in range(10): for k in range(9, 30): peptide = integration_test_tools.get_random_kmer(k=k) logger.info(peptide) - res = Hex( - runner=self.runner, configuration=self.configuration, references=self.references - ).apply_hex( - mut_peptide=peptide - ) + res = Hex(references=self.references).apply_hex(mut_peptide=peptide) pyhex = PyHex(iedb_fasta=self.references.get_iedb_fasta()) res_pyhex = pyhex.run(peptide) self.assertEqual(float(res), res_pyhex, "Peptide: {}".format(peptide))