-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathterminology.py
150 lines (117 loc) · 4.66 KB
/
terminology.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import logging
import sqlite3
import ruamel.yaml
# create logger
logger = logging.getLogger('terminology')
# cashes sql and root concepts
data = {}
def load_terminology_data():
global data
f = open('terminology.yml', 'r')
data = ruamel.yaml.safe_load(f.read())
f.close()
return data
def get_sql(query, is_dom):
global data
if len(data) == 0:
load_terminology_data()
if is_dom:
return data['dom_sql'][query]
else:
return data['sql'][query]
def get_root_concept(dom):
global data
if len(data) == 0:
load_terminology_data()
if data['root_concept'][dom]:
return data['root_concept'][dom]
else:
return data['root_concept_default']
# if code (concept id) is 'root' replace with NCIt root concept
def check_for_root(code, dom):
if 'root' == code:
return get_root_concept(dom)['code']
return code
# get connection to sqlite3 terminology database
def get_connection():
connection = sqlite3.connect('db/terminology.db')
connection.row_factory = sqlite3.Row
return connection
# execute submitted sql that must retrieve a resultset
# with two fields named 'code' and 'name'
# function returns [{code: 'code1', name: 'name1'}, {code: 'code2', name: 'name2'}, ...]
def get_code_name_list_result(sql, querytokens=None):
connection = get_connection()
results = []
try:
cursor = connection.cursor()
if querytokens:
cursor.execute(sql, querytokens)
else:
cursor.execute(sql)
for row in cursor.fetchall():
results.append({'code': row['code'], 'name': row['name']})
cursor.close()
except sqlite3.Error as e:
logger.error("An error occurred:", e.args)
return results
# get a single row or a single field as a string
def get_single_string_result(sql, querytokens=None):
connection = get_connection()
result = None
try:
cursor = connection.cursor()
if querytokens:
cursor.execute(sql, querytokens)
else:
cursor.execute(sql)
if cursor.arraysize > 0:
result = cursor.fetchone()[0]
cursor.close()
except sqlite3.Error as e:
logger.error("An error occurred:", e.args[0])
return result
def search_domain_by_substring(dom, query):
return get_code_name_list_result(get_sql(dom, True), ['%' + query + '%', '%' + query + '%'])
def search_diseases_associated_with_anatomic_site(code):
return get_code_name_list_result(get_sql('search_diseases_associated_with_anatomic_site', False),
[check_for_root(code, 'anantomicsite')])
def search_diseases_associated_with_finding(code):
return get_code_name_list_result(get_sql('search_diseases_associated_with_finding', False),
[check_for_root(code, 'finding')])
def search_diseases_associated_with_gene(code):
return get_code_name_list_result(get_sql('search_diseases_associated_with_gene', False),
[check_for_root(code, 'gene')])
def search_diseases_is_stage(code):
return get_code_name_list_result(get_sql('search_diseases_is_stage', False),
[check_for_root(code, 'disease')])
def search_diseases_is_grade(code):
return get_code_name_list_result(get_sql('search_diseases_is_grade', False),
[check_for_root(code, 'disease')])
def get_subtree_codes(code, dom):
connection = sqlite3.connect('db/terminology.db')
connection.row_factory = lambda cursor, row: row[0]
results = []
try:
cursor = connection.cursor()
cursor.execute(get_sql('get_subtree_codes', False), [check_for_root(code, dom)])
results = cursor.fetchall()
cursor.close()
except sqlite3.Error as e:
logger.error("An error occurred:", e)
return results
def get_child_codes(code, dom):
# biomarkers are not marked up as such in ncit so we return all flat
ccode = check_for_root(code, dom)
# parent codes are in a pipe (|) delimited list
# just matching on the first term we get spurious substring matches
# such as C8461 matching C84615
return get_code_name_list_result(get_sql('get_child_codes', False), ['%' + ccode, '%' + ccode + '|%'])
def get_parent_codes(code, dom):
ccode = check_for_root(code, dom)
parent_codes = get_single_string_result(get_sql('get_parent_codes', False), [ccode])
results = []
# parent codes are in a pipe (|) delimited list
for parent_code in parent_codes.split('|'):
results.extend(get_code_name_list_result(get_sql('get_parent_codes_sub', False), [parent_code]))
return results