Skip to content

Commit

Permalink
Add the CURIE ID query testing for the mygene instance
Browse files Browse the repository at this point in the history
  • Loading branch information
Johnathan Schaff committed Feb 8, 2024
1 parent b4ad216 commit 15bb9d8
Showing 1 changed file with 104 additions and 4 deletions.
108 changes: 104 additions & 4 deletions tests/gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import types
import unittest

import pytest

sys.path.insert(0, os.path.split(os.path.split(os.path.abspath(__file__))[0])[0])

try:
Expand Down Expand Up @@ -61,6 +63,106 @@ def test_getgene_with_fields(self):
self.assertTrue("refseq" in g)
self.assertFalse("summary" in g)

def test_curie_id_query(self):
"""
Tests the annotation endpoint support for the biolink CURIE ID.
If support is enabled then we should retrieve the exact same document for all the provided
queries
"""
curie_id_testing_collection = [
("1017", "entrezgene:1017", "NCBIgene:1017"),
(1017, "entrezgene:1017", "ncbigene:1017"),
("1017", "entrezgene:1017", "NCBIGENE:1017"),
("1018", "ensembl.gene:ENSG00000250506", "ENSEMBL:ENSG00000250506"),
(1018, "ensembl.gene:ENSG00000250506", "ensembl:ENSG00000250506"),
("5995", "uniprot.Swiss-Prot:P47804", "UniProtKB:P47804"),
(5995, "uniprot.Swiss-Prot:P47804", "UNIPROTKB:P47804"),
("5995", "uniprot.Swiss-Prot:P47804", "uniprotkb:P47804"),
]

results_aggregation = []
for id_query, biothings_query, biolink_query in curie_id_testing_collection:
id_query_result = self.mg.getgene(_id=id_query)
biothings_term_query_result = self.mg.getgene(_id=biothings_query)
biolink_term_query_result = self.mg.getgene(_id=biolink_query)
results_aggregation.append(
(
id_query_result == biothings_term_query_result,
id_query_result == biolink_term_query_result,
biothings_term_query_result == biolink_term_query_result,
)
)

results_validation = []
failure_messages = []
for result, test_query in zip(results_aggregation, curie_id_testing_collection):
cumulative_result = all(result)
if not cumulative_result:
failure_messages.append(f"Query Failure: {test_query} | Results: {result}")
results_validation.append(cumulative_result)

self.assertTrue(all(results_validation), msg="\n".join(failure_messages))

def test_multiple_curie_id_query(self):
"""
Tests the annotations endpoint support for the biolink CURIE ID.
Batch query testing against the POST endpoint to verify that the CURIE ID can work with
multiple
If support is enabled then we should retrieve the exact same document for all the provided
queries
"""
curie_id_testing_collection = [
("1017", "entrezgene:1017", "NCBIgene:1017"),
(1017, "entrezgene:1017", "ncbigene:1017"),
("1017", "entrezgene:1017", "NCBIGENE:1017"),
("1018", "ensembl.gene:ENSG00000250506", "ENSEMBL:ENSG00000250506"),
(1018, "ensembl.gene:ENSG00000250506", "ensembl:ENSG00000250506"),
("5995", "uniprot.Swiss-Prot:P47804", "UniProtKB:P47804"),
(5995, "uniprot.Swiss-Prot:P47804", "UNIPROTKB:P47804"),
("5995", "uniprot.Swiss-Prot:P47804", "uniprotkb:P47804"),
]

results_aggregation = []
for id_query, biothings_query, biolink_query in curie_id_testing_collection:
base_result = self.mg.getgene(_id=id_query)

batch_query = [id_query, biothings_query, biolink_query]
query_results = self.mg.getgenes(ids=batch_query)
assert len(query_results) == len(batch_query)

batch_id_query = query_results[0]
batch_biothings_query = query_results[1]
batch_biolink_query = query_results[2]

batch_id_query_return_value = batch_id_query.pop("query")
assert batch_id_query_return_value == str(id_query)

batch_biothings_query_return_value = batch_biothings_query.pop("query")
assert batch_biothings_query_return_value == str(biothings_query)

batch_biolink_query_return_value = batch_biolink_query.pop("query")
assert batch_biolink_query_return_value == str(biolink_query)

batch_result = (
base_result == batch_id_query,
base_result == batch_biothings_query,
base_result == batch_biolink_query,
)
results_aggregation.append(batch_result)

results_validation = []
failure_messages = []
for result, test_query in zip(results_aggregation, curie_id_testing_collection):
cumulative_result = all(result)
if not cumulative_result:
failure_messages.append(f"Query Failure: {test_query} | Results: {result}")
results_validation.append(cumulative_result)

self.assertTrue(all(results_validation), msg="\n".join(failure_messages))

def test_getgene_with_fields_as_list(self):
g1 = self.mg.getgene("1017", fields="name,symbol,refseq")
g2 = self.mg.getgene("1017", fields=["name", "symbol", "refseq"])
Expand Down Expand Up @@ -230,10 +332,8 @@ def _querymany():

self.assertTrue(
all(
[
x == pre_cache_r
for x in [pre_cache_r, cache_fill_r, cached_r, post_cache_r, recached_r, clear_cached_r]
]
x == pre_cache_r
for x in [pre_cache_r, cache_fill_r, cached_r, post_cache_r, recached_r, clear_cached_r]
)
)

Expand Down

0 comments on commit 15bb9d8

Please sign in to comment.