Skip to content

Commit

Permalink
Merge pull request #3 from NHS-NGS/dev
Browse files Browse the repository at this point in the history
v0.1.0
  • Loading branch information
Yu-jinKim authored Jul 2, 2020
2 parents d387af7 + 37fd891 commit 6a6bf43
Show file tree
Hide file tree
Showing 7 changed files with 276 additions and 109 deletions.
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,23 @@ BRAF
>> hgnc_queries.get_id("BRAF", verbose = False)
HGNC:1097
>>hgnc_queries.get_symbol_from_id("1097", verbose = False)
>> hgnc_queries.get_symbol_from_id("1097", verbose = False)
BRAF
# new function more for in-script use
# get the hgnc symbol if gene symbol not recognized by the process you're using
>>print(queries.get_hgnc_symbol("RN5S49"))
>> print(queries.get_hgnc_symbol("RN5S49"))
RNA5SP49
>>print(queries.get_hgnc_symbol("BRAF1"))
>> print(queries.get_hgnc_symbol("BRAF1"))
BRAF
>> hgnc_queries.get_refseq("BRCA1", False)
['NM_007294']
>> hgnc_queries.get_ensembl("BRCA1", False)
"ENSG00000012048"
>> hgnc_queries.convert_refseq2ensembl("NM_007294", False)
"ENSG00000012048"
>> hgnc_queries.convert_ensembl2refseq("ENSG00000012048", False)
['NM_007294']
```
10 changes: 7 additions & 3 deletions hgnc_queries/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from .api import get_api_response, URL
from .conversion import (
convert_ensembl2refseq, convert_refseq2ensembl
)
from .queries import (
get_new_symbol, get_gene_starting_with, get_alias,
get_main_symbol, get_prev_symbol, get_id,
get_symbol_from_id, get_hgnc_symbol
)
from .api import get_api_response
get_symbol_from_id, get_hgnc_symbol,
get_ensembl, get_refseq
)
5 changes: 3 additions & 2 deletions hgnc_queries/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import requests
import sys
import json
import sys

import requests

URL = "http://rest.genenames.org"

Expand Down
78 changes: 78 additions & 0 deletions hgnc_queries/conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from .api import get_api_response, URL


def convert_refseq2ensembl(refseq: str, verbose: bool = True):
""" Convert refseq number to ensembl gene id
Args:
refseq (str): Refseq accession number
verbose (bool, optional): Print output. Defaults to True.
Returns:
None
ensembl_id (str)
"""

refseq = refseq.strip().upper()

if not refseq.startswith("NM"):
print("Refseq given: {} doesn't start with \"NM\"".format(refseq))
return

ext = "fetch/refseq_accession/{}".format(refseq)
data = get_api_response("{}/{}".format(URL, ext))
res = data["response"]["docs"]

if not res:
if verbose:
print("Refseq \"{}\" not found".format(refseq))

return
else:
ensembl_id = res[0]["ensembl_gene_id"]

if verbose:
print("Refseq \"{}\" -> Ensembl \"{}\"".format(refseq, ensembl_id))

return ensembl_id


def convert_ensembl2refseq(ensembl_id: str, verbose: bool = True):
""" Convert Ensembl id to refseq number
Args:
ensembl_id (str): Ensembl id
verbose (bool, optional): Prints the output. Defaults to True.
Returns:
None
refseq (str)
"""

ensembl_id = ensembl_id.strip().upper()

if not ensembl_id.startswith("ENSG"):
print("Ensembl_id given: {} doesn't start with \"NM\"".format(
ensembl_id
))
return

ext = "fetch/ensembl_gene_id/{}".format(ensembl_id)
data = get_api_response("{}/{}".format(URL, ext))
res = data["response"]["docs"]

if not res:
if verbose:
print("Ensembl_id \"{}\" not found".format(ensembl_id))

return
else:
refseq = res[0]["refseq_accession"]

if verbose:
print("Ensembl_id \"{}\" -> Refseq \"{}\"".format(
ensembl_id,
refseq
))

return refseq
149 changes: 72 additions & 77 deletions hgnc_queries/queries.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
""" hgnc_queries.py
Fetch data from HGNC api
"""

import argparse
from .api import get_api_response, URL


Expand All @@ -19,7 +13,7 @@ def get_new_symbol(gene_symbol: str, verbose: bool = True):
- None
"""

gene_symbol = gene_symbol.upper()
gene_symbol = gene_symbol.strip().upper()

ext = "search/prev_symbol/{}".format(gene_symbol)
data = get_api_response("{}/{}".format(URL, ext))
Expand Down Expand Up @@ -61,7 +55,7 @@ def get_gene_starting_with(gene_symbol: str, verbose: bool = True):
- None
"""

gene_symbol = gene_symbol.upper()
gene_symbol = gene_symbol.strip().upper()

ext = "search/symbol/{}*".format(gene_symbol)
data = get_api_response("{}/{}".format(URL, ext))
Expand Down Expand Up @@ -97,7 +91,7 @@ def get_alias(gene_symbol: str, verbose: bool = True):
- None
"""

gene_symbol = gene_symbol.upper()
gene_symbol = gene_symbol.strip().upper()

ext = "fetch/symbol/{}".format(gene_symbol)
data = get_api_response("{}/{}".format(URL, ext))
Expand Down Expand Up @@ -146,7 +140,7 @@ def get_main_symbol(gene_symbol: str, verbose: bool = True):
- None
"""

gene_symbol = gene_symbol.upper()
gene_symbol = gene_symbol.strip().upper()

ext = "search/alias_symbol/{}".format(gene_symbol)
data = get_api_response("{}/{}".format(URL, ext))
Expand Down Expand Up @@ -184,7 +178,7 @@ def get_prev_symbol(gene_symbol: str, verbose: bool = True):
- None
"""

gene_symbol = gene_symbol.upper()
gene_symbol = gene_symbol.strip().upper()

ext = "fetch/symbol/{}".format(gene_symbol)
data = get_api_response("{}/{}".format(URL, ext))
Expand Down Expand Up @@ -225,7 +219,7 @@ def get_id(gene_symbol: str, verbose: bool = True):
- None
"""

gene_symbol = gene_symbol.upper()
gene_symbol = gene_symbol.strip().upper()

ext = "fetch/symbol/{}".format(gene_symbol)
data = get_api_response("{}/{}".format(URL, ext))
Expand Down Expand Up @@ -267,6 +261,8 @@ def get_symbol_from_id(gene_id: str, verbose: bool = True):

return

gene_id = gene_id.strip()

ext = "search/hgnc_id/{}".format(gene_id)
data = get_api_response("{}/{}".format(URL, ext))
res = data["response"]["docs"]
Expand Down Expand Up @@ -298,6 +294,8 @@ def get_hgnc_symbol(gene_symbol: str):
- None
"""

gene_symbol = gene_symbol.strip()

new_symbol = get_new_symbol(gene_symbol, False)

if new_symbol:
Expand All @@ -311,68 +309,65 @@ def get_hgnc_symbol(gene_symbol: str):
return


def main():
pass


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script to interface with the HGNC api"
)
subparsers = parser.add_subparsers(help="Commands")

new_symbol = subparsers.add_parser("new_symbol", help="Get the new symbol")
new_symbol.add_argument("gene_symbol", help="Gene symbol")
new_symbol.set_defaults(func=get_new_symbol)

alias = subparsers.add_parser(
"alias",
help="Get the aliases of given symbol"
)
alias.add_argument("gene_symbol", help="Gene symbol")
alias.set_defaults(func=get_alias)

main_symbol = subparsers.add_parser(
"main_symbol",
help="Get the main symbol from alias"
)
main_symbol.add_argument("gene_symbol", help="Gene symbol")
main_symbol.set_defaults(func=get_main_symbol)

prev_symbol = subparsers.add_parser(
"prev_symbol",
help="Get the previous symbol"
)
prev_symbol.add_argument("gene_symbol", help="Gene symbol")
prev_symbol.set_defaults(func=get_prev_symbol)

gene_symbol = subparsers.add_parser(
"gene",
help="Get the gene symbols starting with"
)
gene_symbol.add_argument("gene_symbol", help="Gene symbol")
gene_symbol.set_defaults(func=get_gene_starting_with)

gene_id = subparsers.add_parser(
"id",
help="Get the ID from a gene symbol"
)
gene_id.add_argument("gene_symbol", help="Gene symbol")
gene_id.set_defaults(func=get_id)

id2symbol = subparsers.add_parser(
"id2symbol",
help="Get the gene symbol from the id"
)
id2symbol.add_argument("gene_id", help="Gene ID")
id2symbol.set_defaults(func=get_symbol_from_id)

args = parser.parse_args()

if hasattr(args, "gene_symbol"):
gene_symbol = args.gene_symbol.upper()
args.func(gene_symbol)

elif hasattr(args, "gene_id"):
gene_id = args.gene_id
args.func(gene_id)
def get_refseq(gene_symbol: str, verbose: bool = True):
""" Get refseq given a gene symbol
Args:
gene_symbol (str): Gene symbol
verbose (bool, optional): Prints the output. Defaults to True.
Returns:
None
refseq (str)
"""

gene_symbol = gene_symbol.strip().upper()

ext = "fetch/symbol/{}".format(gene_symbol)
data = get_api_response("{}/{}".format(URL, ext))
res = data["response"]["docs"]

if not res:
if verbose:
print("Gene \"{}\" not found".format(gene_symbol))

return
else:
refseq = res[0]["refseq_accession"]

if verbose:
print("Refseq for \"{}\": {}".format(gene_symbol, refseq))

return refseq


def get_ensembl(gene_symbol: str, verbose: bool = True):
""" Get the ensembl id for given gene symbol
Args:
gene_symbol (str): Gene symbol
verbose (bool, optional): Prints the output. Defaults to True.
Returns:
None
ensembl_id (str)
"""

gene_symbol = gene_symbol.strip().upper()

ext = "fetch/symbol/{}".format(gene_symbol)
data = get_api_response("{}/{}".format(URL, ext))
res = data["response"]["docs"]

if not res:
if verbose:
print("Gene \"{}\" not found".format(gene_symbol))

return
else:
ensembl_id = res[0]["ensembl_gene_id"]

if verbose:
print("Ensembl_id for \"{}\": {}".format(gene_symbol, ensembl_id))

return ensembl_id
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="hgnc_queries",
version="0.0.3",
version="0.1.0",
author="Yujin Kim",
author_email="[email protected]",
description="Make HGNC queries for gene symbols",
Expand Down
Loading

0 comments on commit 6a6bf43

Please sign in to comment.