-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
VariantUtils and TestVariantUtils #272
Changes from all commits
c8b9e3e
dd23855
9ab2b63
c44cd80
c4f2caa
64fc917
3a92bae
c3eb6ef
7a4756b
7a4bbfb
fc68edb
c9dd2c6
1566f47
80c94df
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,9 +47,9 @@ | |
}, | ||
"David Michaels": { | ||
"emails": [ | ||
"[email protected]", | ||
"[email protected]", | ||
"[email protected]", | ||
"[email protected]" | ||
"[email protected]" | ||
], | ||
"names": [ | ||
"David Michaels", | ||
|
@@ -58,8 +58,8 @@ | |
}, | ||
"Douglas Rioux": { | ||
"emails": [ | ||
"[email protected]", | ||
"[email protected]" | ||
"[email protected]", | ||
"[email protected]" | ||
], | ||
"names": [ | ||
"Douglas Rioux", | ||
|
@@ -85,8 +85,8 @@ | |
}, | ||
"Kent M Pitman": { | ||
"emails": [ | ||
"[email protected]", | ||
"[email protected]" | ||
"[email protected]", | ||
"[email protected]" | ||
], | ||
"names": [ | ||
"Kent M Pitman", | ||
|
@@ -129,6 +129,16 @@ | |
"SooLee" | ||
] | ||
}, | ||
"Tom Duraisingh": { | ||
"emails": [ | ||
"[email protected]", | ||
"contributors.TomDuraisingh.emails.138792649+TomDuraisingh@users.noreply.github.com" | ||
], | ||
"names": [ | ||
"TomDuraisingh", | ||
"Tom Duraisingh" | ||
] | ||
}, | ||
"Will Ronchetti": { | ||
"emails": [ | ||
"[email protected]" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import json | ||
from dcicutils.ff_utils import get_metadata, search_metadata | ||
from dcicutils.creds_utils import CGAPKeyManager | ||
|
||
|
||
class VariantUtils: | ||
|
||
SEARCH_VARIANTS_BY_GENE = (f'/search/?type=VariantSample&limit=1' | ||
f'&variant.genes.genes_most_severe_gene.display_title=') | ||
SEARCH_RARE_VARIANTS_BY_GENE = (f'/search/?samplegeno.samplegeno_role=proband&type=VariantSample' | ||
f'&variant.csq_gnomadg_af_popmax.from=0&variant.csq_gnomadg_af_popmax.to=0.001' | ||
f'&variant.genes.genes_most_severe_gene.display_title=') | ||
|
||
def __init__(self, *, env_name) -> None: | ||
self._key_manager = CGAPKeyManager() | ||
self.creds = self._key_manager.get_keydict_for_env(env=env_name) | ||
# Uncomment this if needed | ||
# self.health = get_health_page(key=self.creds) | ||
self.base_url = self.creds['server'] | ||
|
||
def get_creds(self): | ||
return self.creds | ||
|
||
def get_rare_variants_by_gene(self, *, gene, sort, addon=''): | ||
"""Searches for rare variants on a particular gene""" | ||
return search_metadata(f'{self.base_url}/{self.SEARCH_RARE_VARIANTS_BY_GENE}{gene}\ | ||
&sort=-{sort}{addon}', key=self.creds) | ||
|
||
def find_number_of_sample_ids(self, gene): | ||
"""Returns the number of samples that have a mutation on the specified gene""" | ||
return len(set(variant.get('CALL_INFO') | ||
for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID'))) | ||
|
||
def get_total_result_count_from_search(self, gene): | ||
"""Returns total number of variants associated with specified gene""" | ||
res = get_metadata(self.SEARCH_VARIANTS_BY_GENE + gene, key=self.creds) | ||
return res['total'] | ||
|
||
@staticmethod | ||
def sort_dict_in_descending_order(unsorted_dict): | ||
"""Sorts dictionary in descending value order""" | ||
sorted_list = sorted(unsorted_dict.items(), key=lambda x: x[1], reverse=True) | ||
return dict(sorted_list) | ||
|
||
def create_dict_of_mutations(self, gene): | ||
"""Creates dictionary of specified gene and mutations that occur 10+ times in database, in the form: | ||
{gene: {mutation1 pos: #variants, mutation2 pos: #variants, ...}""" | ||
mutation_dict = {} | ||
unique_positions = set() | ||
for variant in self.get_rare_variants_by_gene(gene=gene, sort='variant.ID'): | ||
pos = variant['variant']['POS'] | ||
if pos not in unique_positions: | ||
unique_positions.add(pos) | ||
mutation_dict[pos] = 1 | ||
else: | ||
mutation_dict[pos] += 1 | ||
return {gene: self.sort_dict_in_descending_order({k: v for k, v in mutation_dict.items() if v >= 10})} | ||
|
||
@staticmethod | ||
def return_json(file_name): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generally, it is useful to use type annotations ie:
See here for more info. This applies to all functions in this file. |
||
with open(file_name, 'r') as f: | ||
file_content = json.loads(f) | ||
return file_content | ||
|
||
@staticmethod | ||
def create_dict_from_json_file(file_name): | ||
"""Creates dictionary object from specified json file""" | ||
with open(file_name) as f: | ||
json_list = f.read() | ||
return json.loads(json_list) | ||
|
||
def create_list_of_msa_genes(self): | ||
"""Creates list of genes relating to the brain or nervous system | ||
(determined by whether keywords 'neur' or 'nerv' in summary)""" | ||
genes = self.return_json('gene.json') | ||
return [gene['gene_symbol'] for gene in genes | ||
if 'nerv' in gene.get('gene_summary', '') | ||
or 'neur' in gene.get('gene_summary', '')] | ||
|
||
def create_url(self, gene): | ||
"""Returns a url to the variants at the most commonly mutated position of specified gene""" | ||
d = self.create_dict_from_json_file('10+sorted_msa_genes_and_mutations.json') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hard-coded filename likely not desired |
||
pos = list(d[gene].keys())[0] | ||
return self.SEARCH_RARE_VARIANTS_BY_GENE + gene + f'&variant.POS.from={pos}&variant.POS.to={pos}&sort=-DP' | ||
|
||
def create_list_of_als_park_genes(self): | ||
"""Creates list of genes that relating to Parkinson's or ALS | ||
(determined by whether keywords 'Parkinson' or 'ALS' in summary)""" | ||
genes = self.return_json('gene.json') | ||
return [gene['gene_symbol'] for gene in genes | ||
if 'Parkinson' in gene.get('gene_summary', '') | ||
or 'ALS' in gene.get('gene_summary', '')] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "dcicutils" | ||
version = "7.7.2" | ||
version = "7.8.0" | ||
description = "Utility package for interacting with the 4DN Data Portal and other 4DN resources" | ||
authors = ["4DN-DCIC Team <[email protected]>"] | ||
license = "MIT" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
import pytest | ||
from unittest import mock | ||
from contextlib import contextmanager | ||
from dcicutils import variant_utils | ||
from dcicutils.variant_utils import VariantUtils | ||
from unittest.mock import patch | ||
|
||
|
||
def create_dummy_keydict(): | ||
return {'cgap-dummy': { | ||
'key': 'dummy', 'secret': 'dummy', | ||
'server': 'cgap-test.com' | ||
}} | ||
|
||
|
||
class TestVariantUtils: | ||
|
||
class CGAPKeyManager: | ||
def get_keydict_for_env(self, *, env): | ||
return create_dummy_keydict()['cgap-dummy'] | ||
|
||
@contextmanager | ||
def mock_key_manager(self): | ||
with mock.patch.object(variant_utils, 'CGAPKeyManager', new=self.CGAPKeyManager): | ||
yield | ||
|
||
def test_variant_utils_basic(self): | ||
"""Tests the instantiation of a VariantUtils object """ | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
assert isinstance(vu, VariantUtils) | ||
|
||
@pytest.mark.parametrize('total_value', [ | ||
100, | ||
200, | ||
300, | ||
400 | ||
]) | ||
@patch('dcicutils.variant_utils.get_metadata') | ||
def test_get_total_result_count_from_search(self, mock_get_metadata, total_value): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_gene = 'GENE' | ||
mock_get_metadata.return_value = {'total': total_value} | ||
result = vu.get_total_result_count_from_search(mock_gene) | ||
expected_result = total_value | ||
assert result == expected_result | ||
mock_get_metadata.assert_called_once_with(f'/search/?type=VariantSample&limit=1' | ||
f'&variant.genes.genes_most_severe_gene.display_title=' | ||
f'{mock_gene}', key=vu.creds) | ||
|
||
@pytest.mark.parametrize('returned_variants, expected_length', [ | ||
([{'variant': {'POS': 100000}}], 8), | ||
([{'variant': {'POS': 100000}}], 9), | ||
([{'variant': {'POS': 100000}}], 10), | ||
([{'variant': {'POS': 100000}}], 11), | ||
]) | ||
@patch('dcicutils.variant_utils.VariantUtils.get_rare_variants_by_gene') | ||
def test_create_dict_of_mutations(self, mock_get_rare_variants_by_gene, returned_variants, expected_length): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_gene = 'GENE' | ||
mock_get_rare_variants_by_gene.return_value = (returned_variants * expected_length) | ||
result = vu.create_dict_of_mutations(mock_gene) | ||
if expected_length >= 10: | ||
expected_result = {mock_gene: {100000: expected_length}} | ||
else: | ||
expected_result = {mock_gene: {}} | ||
assert result == expected_result | ||
mock_get_rare_variants_by_gene.assert_called_once_with(gene=mock_gene, sort='variant.ID') | ||
|
||
@patch('dcicutils.variant_utils.VariantUtils.return_json') | ||
def test_create_list_of_msa_genes(self, mock_return_json): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_return_json.return_value = [ | ||
{'gene_symbol': 'GENE1', 'gene_summary': '...nerv...'}, | ||
{'gene_symbol': 'GENE2', 'gene_summary': '..........'}, | ||
{'gene_symbol': 'GENE3', 'gene_summary': '...neur...'} | ||
] | ||
result = vu.create_list_of_msa_genes() | ||
expected_result = ['GENE1', 'GENE3'] | ||
assert result == expected_result | ||
mock_return_json.assert_called_once_with('gene.json') | ||
|
||
@patch('dcicutils.variant_utils.VariantUtils.get_rare_variants_by_gene') | ||
def test_find_number_of_sample_ids(self, mock_get_rare_variants_by_gene): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap_dummy') | ||
mock_gene = 'GENE' | ||
mock_get_rare_variants_by_gene.return_value = [ | ||
{'CALL_INFO': 'ABC123'}, | ||
{'CALL_INFO': 'ABC123'}, | ||
{'CALL_INFO': 'BCD234'}, | ||
{'CALL_INFO': 'CDE345'} | ||
] | ||
result = vu.find_number_of_sample_ids(mock_gene) | ||
expected_result = 3 | ||
assert result == expected_result | ||
mock_get_rare_variants_by_gene.assert_called_once_with(gene=mock_gene, sort='variant.ID') | ||
|
||
@pytest.mark.parametrize('pos', [ | ||
'100000', | ||
'200000', | ||
'300000', | ||
'400000' | ||
]) | ||
@patch('dcicutils.variant_utils.VariantUtils.create_dict_from_json_file') | ||
def test_create_url(self, mock_create_dict_from_json_file, pos): | ||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap_dummy') | ||
mock_gene = 'GENE' | ||
mock_create_dict_from_json_file.return_value = { | ||
'GENE': {pos: 20, '123456': 10}, | ||
'OTHER_GENE': {pos: 10} | ||
} | ||
result = vu.create_url(gene=mock_gene) | ||
expected_result = vu.SEARCH_RARE_VARIANTS_BY_GENE + mock_gene + (f'&variant.POS.from={pos}' | ||
f'&variant.POS.to={pos}&sort=-DP') | ||
assert result == expected_result | ||
mock_create_dict_from_json_file.assert_called_once_with('10+sorted_msa_genes_and_mutations.json') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hardcoded file path again - will break tests in the workflows |
||
|
||
@patch('dcicutils.variant_utils.VariantUtils.return_json') | ||
def test_create_list_of_als_park_genes(self, mock_return_json): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This tests looks very similar to another above... See if you can figure out how to parametrize them together as one |
||
with self.mock_key_manager(): | ||
vu = VariantUtils(env_name='cgap-dummy') | ||
mock_return_json.return_value = [ | ||
{'gene_symbol': 'GENE1', 'gene_summary': '...Parkinson...'}, | ||
{'gene_symbol': 'GENE2', 'gene_summary': '...............'}, | ||
{'gene_symbol': 'GENE3', 'gene_summary': '.....ALS.......'} | ||
] | ||
result = vu.create_list_of_als_park_genes() | ||
expected_result = ['GENE1', 'GENE3'] | ||
assert result == expected_result | ||
mock_return_json.assert_called_once_with('gene.json') |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think good to remove at this point