From 9dd3044d394661e0a9ef9659e74c9ea659c63d4f Mon Sep 17 00:00:00 2001 From: korikuzma Date: Fri, 26 Feb 2021 14:15:13 -0500 Subject: [PATCH 1/2] Remove duplicates --- main.py | 4 +++- tests/fixtures/translators.yml | 20 +++++++++++++++++++ .../polypeptide_sequence_variant_base.py | 9 +++++++-- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index e694c334..2419d598 100644 --- a/main.py +++ b/main.py @@ -67,7 +67,9 @@ def translate(q: str = Query(..., description=q_description)): translations = [] for valid_variant in validations.valid_results: - translations.append(translator.perform(valid_variant)) + result = translator.perform(valid_variant) + if result not in translations: + translations.append(result) return TranslationResponseSchema( search_term=q, variants=translations diff --git a/tests/fixtures/translators.yml b/tests/fixtures/translators.yml index 9076dce3..5d657c4b 100644 --- a/tests/fixtures/translators.yml +++ b/tests/fixtures/translators.yml @@ -37,6 +37,26 @@ amino_acid_substitution: "type": "Allele" } ] + - query: NP_004324.2:p.Val600Glu + variants: [ + { + "id": "ga4gh:VA.mJbjSsW541oOsOtBoX36Mppr6hMjbjFr", + "location": { + "interval": { + "end": 600, + "start": 599, + "type": "SimpleInterval" + }, + "sequence_id": "ga4gh:SQ.cQvw4UsHHRRlogxbWCB8W-mKD4AraM9y", + "type": "SequenceLocation" + }, + "state": { + "sequence": "E", + "type": "SequenceState" + }, + "type": "Allele" + } + ] polypeptide_truncation: diff --git a/variant/validators/polypeptide_sequence_variant_base.py b/variant/validators/polypeptide_sequence_variant_base.py index 4fbc55f1..681f6605 100644 --- a/variant/validators/polypeptide_sequence_variant_base.py +++ b/variant/validators/polypeptide_sequence_variant_base.py @@ -99,7 +99,13 @@ def get_vrs_allele(self, sequence_id, s) -> dict: allele = models.Allele(location=seq_location, state=state) allele['_id'] = ga4gh_identify(allele) - return allele.as_dict() + allele = allele.as_dict() + if len(allele['state']['sequence']) == 3: + for one, three in \ + self._amino_acid_cache._amino_acid_code_conversion: + if three == allele['state']['sequence']: + allele['state']['sequence'] = one + return allele def get_hgvs_expr(self, classification) -> str: """Return HGVS expression for a classification. @@ -133,7 +139,6 @@ def get_valid_invalid_results(self, classification_tokens, transcripts, tokens :param list results: A list to store validation result objects """ - for s in classification_tokens: for t in transcripts: valid = True From 2cb0287188ede21abfcca84948b3345c1cb2a528 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Fri, 26 Feb 2021 15:23:45 -0500 Subject: [PATCH 2/2] Fix allele state --- tests/fixtures/validators.yml | 1 + .../polypeptide_sequence_variant_base.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/fixtures/validators.yml b/tests/fixtures/validators.yml index f56d3efc..b6ba78f5 100644 --- a/tests/fixtures/validators.yml +++ b/tests/fixtures/validators.yml @@ -3,6 +3,7 @@ amino_acid_substitution: - query: BRAF V600E - query: NP_004324.2:p.Val600Glu - query: NP_005219.2:p.Thr790Met + - query: EGFR Leu858Arg should_not_match: - query: NP_004324.2:p.Val600000000000Glu - query: NP_004324.2:p.Glu600Val diff --git a/variant/validators/polypeptide_sequence_variant_base.py b/variant/validators/polypeptide_sequence_variant_base.py index 681f6605..4e42d9e7 100644 --- a/variant/validators/polypeptide_sequence_variant_base.py +++ b/variant/validators/polypeptide_sequence_variant_base.py @@ -96,16 +96,17 @@ def get_vrs_allele(self, sequence_id, s) -> dict: ) state = models.SequenceState(sequence=s.alt_protein) + state_dict = state.as_dict() + if len(state_dict['sequence']) == 3: + for one, three in \ + self._amino_acid_cache._amino_acid_code_conversion.items(): + if three == state_dict['sequence']: + state.sequence = one + allele = models.Allele(location=seq_location, state=state) allele['_id'] = ga4gh_identify(allele) - allele = allele.as_dict() - if len(allele['state']['sequence']) == 3: - for one, three in \ - self._amino_acid_cache._amino_acid_code_conversion: - if three == allele['state']['sequence']: - allele['state']['sequence'] = one - return allele + return allele.as_dict() def get_hgvs_expr(self, classification) -> str: """Return HGVS expression for a classification.