From 7dab3551e918dc447dba0a8c0995000e7059b707 Mon Sep 17 00:00:00 2001 From: priesgo Date: Thu, 19 Jan 2023 17:14:02 +0100 Subject: [PATCH 1/7] bump version --- neofox/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neofox/__init__.py b/neofox/__init__.py index 7f60eabd..1e059eb7 100755 --- a/neofox/__init__.py +++ b/neofox/__init__.py @@ -18,7 +18,7 @@ # along with this program. If not, see .# -VERSION = "1.0.4" +VERSION = "1.0.5" REFERENCE_FOLDER_ENV = "NEOFOX_REFERENCE_FOLDER" From 4a16bb46b9ac41e2e56029f87bbecae636a3ec89 Mon Sep 17 00:00:00 2001 From: priesgo Date: Thu, 19 Jan 2023 17:24:13 +0100 Subject: [PATCH 2/7] remove is_rna_available from patient model --- neofox/command_line.py | 20 ------------------- neofox/model/neoantigen.proto | 10 +++------- neofox/model/neoantigen.py | 8 +++----- neofox/neofox.py | 6 ------ neofox/neofox_epitope.py | 2 -- neofox/tests/integration_tests/test_neofox.py | 2 -- .../tests/unit_tests/test_model_converter.py | 6 ------ 7 files changed, 6 insertions(+), 48 deletions(-) diff --git a/neofox/command_line.py b/neofox/command_line.py index cd84df4a..79f6795c 100755 --- a/neofox/command_line.py +++ b/neofox/command_line.py @@ -213,16 +213,6 @@ def _read_data(input_file, patients_data, mhc_database: MhcDatabase) -> Tuple[Li else: raise ValueError('Not supported input file extension: {}'.format(input_file)) - patients_dict : Dict[str, Patient] - patients_dict = {p.identifier: p for p in patients} - - for n in neoantigens: - patient = patients_dict.get(n.patient_identifier) - if not patient.is_rna_available: - # removes RNA vaf if indicated in patient that this information is no good - # iCam legacy - n.rna_variant_allele_frequency = None - return neoantigens, patients @@ -386,16 +376,6 @@ def _read_data_epitopes( else: raise ValueError('Not supported input file extension: {}'.format(input_file)) - patients_dict : Dict[str, Patient] - patients_dict = {p.identifier: p for p in patients} - - for n in neoepitopes: - patient = patients_dict.get(n.patient_identifier) - if patient is not None and not patient.is_rna_available: - # removes RNA vaf if indicated in patient that this information is no good - # iCam legacy - n.rna_variant_allele_frequency = None - return neoepitopes, patients diff --git a/neofox/model/neoantigen.proto b/neofox/model/neoantigen.proto index 757c62c2..572fad93 100755 --- a/neofox/model/neoantigen.proto +++ b/neofox/model/neoantigen.proto @@ -78,21 +78,17 @@ message Patient { */ string identifier = 1; /** - Is RNA expression available? - */ - bool isRnaAvailable = 2; - /** Tumor entity in TCGA study abbrevation style as described here: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations */ - string tumorType = 3; + string tumorType = 2; /** MHC I classic molecules */ - repeated Mhc1 mhc1 = 4; + repeated Mhc1 mhc1 = 3; /** MHC II classic molecules */ - repeated Mhc2 mhc2 = 5; + repeated Mhc2 mhc2 = 4; } /** diff --git a/neofox/model/neoantigen.py b/neofox/model/neoantigen.py index a0b253d3..b4b925fd 100755 --- a/neofox/model/neoantigen.py +++ b/neofox/model/neoantigen.py @@ -120,16 +120,14 @@ class Patient(betterproto.Message): # *Patient identifier identifier: str = betterproto.string_field(1) - # *Is RNA expression available? - is_rna_available: bool = betterproto.bool_field(2) # *Tumor entity in TCGA study abbrevation style as described here: # https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study- # abbreviations - tumor_type: str = betterproto.string_field(3) + tumor_type: str = betterproto.string_field(2) # *MHC I classic molecules - mhc1: List["Mhc1"] = betterproto.message_field(4) + mhc1: List["Mhc1"] = betterproto.message_field(3) # *MHC II classic molecules - mhc2: List["Mhc2"] = betterproto.message_field(5) + mhc2: List["Mhc2"] = betterproto.message_field(4) @dataclass diff --git a/neofox/neofox.py b/neofox/neofox.py index c805393b..30400f0c 100755 --- a/neofox/neofox.py +++ b/neofox/neofox.py @@ -110,10 +110,6 @@ def __init__( for neoantigen in self.neoantigens: expression_per_patient[neoantigen.patient_identifier].append(neoantigen.rna_expression) - for patient in self.patients: - self.patients[patient].is_rna_available = all(e is not None for e in - expression_per_patient[self.patients[patient].identifier]) - # only performs the expression imputation for humans if self.reference_folder.organism == ORGANISM_HOMO_SAPIENS: # impute expresssion from TCGA, ONLY if isRNAavailable = False for given patient, @@ -137,8 +133,6 @@ def _conditional_expression_imputation(self) -> List[Neoantigen]: gene_expression = expression_annotator.get_gene_expression_annotation( gene_name=neoantigen.gene, tcga_cohort=patient.tumor_type ) - if not patient.is_rna_available and patient.tumor_type is not None and patient.tumor_type != "": - expression_value = gene_expression neoantigen_transformed.rna_expression = expression_value neoantigen.imputed_gene_expression = gene_expression neoantigens_transformed.append(neoantigen_transformed) diff --git a/neofox/neofox_epitope.py b/neofox/neofox_epitope.py index 26433cb2..37d7e79a 100755 --- a/neofox/neofox_epitope.py +++ b/neofox/neofox_epitope.py @@ -211,8 +211,6 @@ def _conditional_expression_imputation(self) -> List[PredictedEpitope]: neoepitope_transformed = neoepitope gene_expression = expression_annotator.get_gene_expression_annotation( gene_name=neoepitope.gene, tcga_cohort=patient.tumor_type) - if not patient.is_rna_available and patient.tumor_type is not None and patient.tumor_type != "": - neoepitope_transformed.rna_expression = gene_expression neoepitope.imputed_gene_expression = gene_expression neoepitopes_transformed.append(neoepitope_transformed) else: diff --git a/neofox/tests/integration_tests/test_neofox.py b/neofox/tests/integration_tests/test_neofox.py index 585f91d3..4d885ec4 100755 --- a/neofox/tests/integration_tests/test_neofox.py +++ b/neofox/tests/integration_tests/test_neofox.py @@ -345,8 +345,6 @@ def test_neofox_without_mhc1(self): def test_gene_expression_imputation(self): neoantigens, patients = self._get_test_data() - for p in patients: - p.is_rna_available = False neofox = NeoFox( neoantigens=neoantigens, patients=patients, diff --git a/neofox/tests/unit_tests/test_model_converter.py b/neofox/tests/unit_tests/test_model_converter.py index a69e1bf4..a79446b8 100755 --- a/neofox/tests/unit_tests/test_model_converter.py +++ b/neofox/tests/unit_tests/test_model_converter.py @@ -193,7 +193,6 @@ def test_patients_csv_file2model(self): self.assertEqual( 9, len([a for m in patients[0].mhc2 for g in m.genes for a in g.alleles]) ) - self.assertEqual(patients[0].is_rna_available, False) def test_patients_without_mhc2(self): patients_file = pkg_resources.resource_filename( @@ -208,7 +207,6 @@ def test_patients_without_mhc2(self): self.assertEqual(3, len(patients[0].mhc1)) self.assertEqual(6, len([a for m in patients[0].mhc1 for a in m.alleles])) self.assertEqual(0, len(patients[0].mhc2)) - self.assertEqual(patients[0].is_rna_available, False) def test_patients_csv_file2model_mouse(self): patients_file = pkg_resources.resource_filename( @@ -226,7 +224,6 @@ def test_patients_csv_file2model_mouse(self): self.assertEqual( 3, len([a for m in patients[0].mhc2 for g in m.genes for a in g.alleles]) ) - self.assertEqual(patients[0].is_rna_available, False) def test_patients_csv_file2model2(self): patients_file = pkg_resources.resource_filename( @@ -244,7 +241,6 @@ def test_patients_csv_file2model2(self): self.assertEqual( 9, len([a for m in patients[0].mhc2 for g in m.genes for a in g.alleles]) ) - self.assertEqual(patients[0].is_rna_available, True) def test_patients_csv_file2model3(self): patients_file = pkg_resources.resource_filename( @@ -269,7 +265,6 @@ def test_patients_csv_file2model3(self): "HLA-DQA1*04:01" in [a.name for m in patients[0].mhc2 for g in m.genes for a in g.alleles] ) - self.assertTrue(patients[0].is_rna_available) def test_patients_csv_file2model_without_mhc1(self): patients_file = pkg_resources.resource_filename( @@ -287,7 +282,6 @@ def test_patients_csv_file2model_without_mhc1(self): self.assertEqual( 9, len([a for m in patients[0].mhc2 for g in m.genes for a in g.alleles]) ) - self.assertEqual(patients[0].is_rna_available, True) def test_patients_csv_file2model_without_mhc2(self): patients_file = pkg_resources.resource_filename( From 8044ed5c99649feac83816be7ad5b71b269be84f Mon Sep 17 00:00:00 2001 From: priesgo Date: Thu, 19 Jan 2023 17:43:50 +0100 Subject: [PATCH 3/7] remove references in tests --- neofox/model/conversion.py | 1 - neofox/model/factories.py | 3 +-- neofox/tests/synthetic_data/factories.py | 1 - neofox/tests/unit_tests/test_neofox.py | 2 +- neofox/tests/unit_tests/test_validation.py | 22 +--------------------- 5 files changed, 3 insertions(+), 26 deletions(-) mode change 100644 => 100755 neofox/tests/synthetic_data/factories.py mode change 100644 => 100755 neofox/tests/unit_tests/test_validation.py diff --git a/neofox/model/conversion.py b/neofox/model/conversion.py index f57f6c56..757efacf 100755 --- a/neofox/model/conversion.py +++ b/neofox/model/conversion.py @@ -111,7 +111,6 @@ def parse_patients_file(patients_file: str, mhc_database: MhcDatabase) -> List[P patient_dict = row.to_dict() patient = PatientFactory.build_patient( identifier=patient_dict.get("identifier"), - is_rna_available=patient_dict.get("isRnaAvailable", False), tumor_type=patient_dict.get("tumorType"), mhc_alleles=patient_dict.get("mhcIAlleles", []), mhc2_alleles=patient_dict.get("mhcIIAlleles", []), diff --git a/neofox/model/factories.py b/neofox/model/factories.py index b1da0522..6b6ce7ff 100755 --- a/neofox/model/factories.py +++ b/neofox/model/factories.py @@ -173,11 +173,10 @@ def build_neoepitope(mutated_peptide=None, wild_type_peptide=None, patient_ident class PatientFactory(object): @staticmethod - def build_patient(identifier, is_rna_available=False, tumor_type=None, mhc_alleles: List[str] = [], + def build_patient(identifier, tumor_type=None, mhc_alleles: List[str] = [], mhc2_alleles: List[str] = [], mhc_database: MhcDatabase =None): patient = Patient( identifier=identifier, - is_rna_available=is_rna_available, tumor_type=tumor_type, mhc1=MhcFactory.build_mhc1_alleles(mhc_alleles, mhc_database), mhc2=MhcFactory.build_mhc2_alleles(mhc2_alleles, mhc_database) diff --git a/neofox/tests/synthetic_data/factories.py b/neofox/tests/synthetic_data/factories.py old mode 100644 new mode 100755 index 233cf00e..0af11d2e --- a/neofox/tests/synthetic_data/factories.py +++ b/neofox/tests/synthetic_data/factories.py @@ -78,7 +78,6 @@ def patient(self) -> Patient: try: patient = Patient( identifier=self.generator.unique.uuid4(), - is_rna_available=True, tumor_type=self.random_elements(self.available_tumor_types, length=1)[0], # by setting unique=True we enforce that all patients are heterozygous mhc1=MhcFactory.build_mhc1_alleles( diff --git a/neofox/tests/unit_tests/test_neofox.py b/neofox/tests/unit_tests/test_neofox.py index ebf62169..d0ac1a85 100755 --- a/neofox/tests/unit_tests/test_neofox.py +++ b/neofox/tests/unit_tests/test_neofox.py @@ -207,7 +207,7 @@ def _get_test_neoantigen(self): ) def _get_test_patient(self): - return Patient(identifier="12345", is_rna_available=True) + return Patient(identifier="12345") if __name__ == "__main__": diff --git a/neofox/tests/unit_tests/test_validation.py b/neofox/tests/unit_tests/test_validation.py old mode 100644 new mode 100755 index 39f38d5b..311c3159 --- a/neofox/tests/unit_tests/test_validation.py +++ b/neofox/tests/unit_tests/test_validation.py @@ -42,12 +42,6 @@ def test_bad_type_raises_exception(self): Neoantigen(patient_identifier="1234", rna_expression="0.45"), ) # this should be a float) - self.assertRaises( - NeofoxDataValidationException, - ModelValidator.validate, - Patient(identifier="1234", is_rna_available="Richtig"), - ) # this should be a boolean) - # TODO: make validation capture this data types errors! ModelValidator.validate( Neoantigen( @@ -63,7 +57,7 @@ def test_good_data_does_not_raise_exceptions(self): neoantigen = Neoantigen(patient_identifier="1234", rna_expression=0.45) ModelValidator.validate(neoantigen) - patient = Patient(identifier="1234", is_rna_available=True) + patient = Patient(identifier="1234") ModelValidator.validate(patient) def test_enum_with_wrong_value(self): @@ -695,20 +689,6 @@ def test_empty_patient_identifier(self): NeofoxDataValidationException, ModelValidator.validate_patient, patient, ORGANISM_HOMO_SAPIENS ) - def test_bad_is_rna_available(self): - ModelValidator.validate_patient( - Patient(identifier="123", is_rna_available=True), ORGANISM_HOMO_SAPIENS - ) - ModelValidator.validate_patient( - Patient(identifier="123", is_rna_available=False), ORGANISM_HOMO_SAPIENS - ) - self.assertRaises( - NeofoxDataValidationException, - ModelValidator.validate_patient, - Patient(identifier="123", is_rna_available="False"), - ORGANISM_HOMO_SAPIENS - ) - def test_validate_neoepitope_mhci(self): neoepitope = PredictedEpitope( mutated_peptide="DILVTDQTR", From 06298633988e108a85a17407f6745a7bc1db1e54 Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Fri, 20 Jan 2023 12:54:50 +0100 Subject: [PATCH 4/7] update documentation --- docs/source/05_models.md | 1 - neofox/model/models.md | 1 - 2 files changed, 2 deletions(-) diff --git a/docs/source/05_models.md b/docs/source/05_models.md index 6f257bc6..5d475081 100644 --- a/docs/source/05_models.md +++ b/docs/source/05_models.md @@ -195,7 +195,6 @@ The metadata required for analysis for a given patient + its patient identifier | Field | Type | Label | Description | | ----- | ---- | ----- | ----------- | | identifier | [string](#string) | | Patient identifier | -| isRnaAvailable | [bool](#bool) | | Is RNA expression available? | | tumorType | [string](#string) | | Tumor entity in TCGA study abbrevation style as described here: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations | | mhc1 | [Mhc1](#neoantigen.Mhc1) | repeated | MHC I classic molecules | | mhc2 | [Mhc2](#neoantigen.Mhc2) | repeated | MHC II classic molecules | diff --git a/neofox/model/models.md b/neofox/model/models.md index 6f257bc6..5d475081 100644 --- a/neofox/model/models.md +++ b/neofox/model/models.md @@ -195,7 +195,6 @@ The metadata required for analysis for a given patient + its patient identifier | Field | Type | Label | Description | | ----- | ---- | ----- | ----------- | | identifier | [string](#string) | | Patient identifier | -| isRnaAvailable | [bool](#bool) | | Is RNA expression available? | | tumorType | [string](#string) | | Tumor entity in TCGA study abbrevation style as described here: https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations | | mhc1 | [Mhc1](#neoantigen.Mhc1) | repeated | MHC I classic molecules | | mhc2 | [Mhc2](#neoantigen.Mhc2) | repeated | MHC II classic molecules | From 2080ebb79edcede857c9b788478d5ec8dbfe5dd6 Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Fri, 20 Jan 2023 12:56:36 +0100 Subject: [PATCH 5/7] add comment explaining deployment strategy --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8be0d4ae..2001a66b 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -67,4 +67,5 @@ publish_package: - python3 setup.py sdist bdist_wheel - TWINE_PASSWORD=${CI_JOB_TOKEN} TWINE_USERNAME=gitlab-ci-token python -m twine upload --repository-url https://gitlab.rlp.net/api/v4/projects/${CI_PROJECT_ID}/packages/pypi dist/* only: + # deploys in private gitlab package repository only the develop branch, the master branch is published in PyPI - develop From 31ac5d9efe5e3c62905eff87f1e7053a52bd3c34 Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Fri, 20 Jan 2023 13:08:56 +0100 Subject: [PATCH 6/7] update condition to impute RNA expression with gene expression --- neofox/neofox.py | 2 ++ neofox/tests/unit_tests/test_neofox.py | 11 ++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/neofox/neofox.py b/neofox/neofox.py index 30400f0c..40a698e2 100755 --- a/neofox/neofox.py +++ b/neofox/neofox.py @@ -133,6 +133,8 @@ def _conditional_expression_imputation(self) -> List[Neoantigen]: gene_expression = expression_annotator.get_gene_expression_annotation( gene_name=neoantigen.gene, tcga_cohort=patient.tumor_type ) + if expression_value is None and patient.tumor_type is not None and patient.tumor_type != "": + expression_value = gene_expression neoantigen_transformed.rna_expression = expression_value neoantigen.imputed_gene_expression = gene_expression neoantigens_transformed.append(neoantigen_transformed) diff --git a/neofox/tests/unit_tests/test_neofox.py b/neofox/tests/unit_tests/test_neofox.py index d0ac1a85..f93f5d84 100755 --- a/neofox/tests/unit_tests/test_neofox.py +++ b/neofox/tests/unit_tests/test_neofox.py @@ -191,11 +191,12 @@ def test_with_expression_imputation(self): reference_folder=FakeReferenceFolder(), configuration=FakeDependenciesConfiguration(), ) - for neoantigen in original_neoantigens: - for neoantigen_imputed in neofox_runner.neoantigens: - self.assertFalse( - neoantigen.rna_expression == neoantigen_imputed.rna_expression - ) + for neoantigen, neoantigen_imputed in zip(original_neoantigens, neofox_runner.neoantigens): + self.assertIsNotNone(neoantigen_imputed.imputed_gene_expression) + if neoantigen.rna_expression is None: + self.assertNotEqual(neoantigen.rna_expression, neoantigen_imputed.rna_expression) + else: + self.assertEqual(neoantigen.rna_expression, neoantigen_imputed.rna_expression) def _get_test_neoantigen(self): return Neoantigen( From 65d7c673ed738e0ae5e5b1eb76d556c05d922fa9 Mon Sep 17 00:00:00 2001 From: Pablo Riesgo Ferreiro Date: Fri, 20 Jan 2023 13:12:17 +0100 Subject: [PATCH 7/7] update MHC-II threshold in documentation --- docs/source/03_03_usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/03_03_usage.md b/docs/source/03_03_usage.md index ba9db639..2604c7e8 100755 --- a/docs/source/03_03_usage.md +++ b/docs/source/03_03_usage.md @@ -15,7 +15,7 @@ neofox --input-file neoantigens_candidates.tsv \ [--output-prefix out_prefix] \ [--organism human|mouse] \ [--rank-mhci-threshold 2.0] \ - [--rank-mhcii-threshold 4.0] \ + [--rank-mhcii-threshold 5.0] \ [--num-cpus] \ [--config] \ [--patient-id] \