-
Notifications
You must be signed in to change notification settings - Fork 2
/
OHDSI2RDF_mp.py
179 lines (164 loc) · 9.04 KB
/
OHDSI2RDF_mp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#! /usr/bin/env python
### Read from Athena CPT processed Vocabulary files and Ananke UMLS CUI mappings into RDF turtle graph
### Created by: Juan M. Banda - Panacea Lab - Georgia State University
### Version 0.9
### Created during Biomedical Linked Annotation Hackathon (BLAH5) in Kashiwa, Japan
### http://blah5.linkedannotation.org/
###
import csv
import multiprocessing as mp
def findCUI(lineOfText,mappingsList):
row=lineOfText
print('<http://www.ohdsi.org/OHDSIVocab/' + row[0] + '> a owl:Class ;')
print(' skos:prefLabel """' + row[1].replace('"','\\"') + '"""@en ;')
print(' skos:concept """' + row[0] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/concept_id> """' + row[0] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/concept_name> """' + row[1].replace('"','\\"') + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/domain_id> <http://www.ohdsi.org/OHDSIVocab/Domain/'+ (row[2].replace(' ','_')).replace('/','_') +'> ;')
print(' <http://www.ohdsi.org/OHDSIVocab/vocabulary_id> <http://www.ohdsi.org/OHDSIVocab/Vocabulary/'+ (row[3].replace(' ','_')).replace('/','_') +'> ;')
print(' <http://www.ohdsi.org/OHDSIVocab/concept_class_id> <http://www.ohdsi.org/OHDSIVocab/Concept_class/'+ (row[4].replace(' ','_')).replace('/','_') +'> ;')
print(' <http://www.ohdsi.org/OHDSIVocab/standard_concept> """' + row[5] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/concept_code> """' + row[6] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/valid_start_date> """' + row[7] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/valid_end_date> """' + row[8] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/invalid_reason> """' + row[9] + '"""^^xsd:string ;')
### This snippet finds the appropiate UMLS CUI mapping if available
### This could be a looooot more efficient.... but works for now
for iR in range(0,len(mappingsList)-1):
if mappingsList[iR]['concept_id']==row[0]:
print(' umls:cui """' + mappingsList[iR]['CUI'] + ' """^^xsd:string ;')
break
print(' .')
HEADER = """
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix umls: <http://bioportal.bioontology.org/ontologies/umls/> .
"""
#init objects
pool = mp.Pool(4)
jobs = []
print (HEADER)
with open("VOCABULARY.csv") as fd:
rd = csv.reader(fd, delimiter="\t", quotechar="'")
next(rd)
for row in rd:
if row[0]=='None':
print("<http://www.ohdsi.org/OHDSIVocab/OHDSIVocabulary>")
print(" a owl:Ontology ;")
print(' rdfs:comment "' + row[1] + '" ;')
print(' rdfs:label "' + row[2] + '" ;')
print(" owl:imports <http://www.w3.org/2004/02/skos/core> ;")
print(' owl:versionInfo "' + row[3] + '"')
print(".")
## Get the Vocabulary relations ##
with open("VOCABULARY.csv") as fd:
rd = csv.reader(fd, delimiter="\t", quotechar="'")
next(rd)
for row in rd:
if row[0]!='None':
print('<http://www.ohdsi.org/OHDSIVocab/Vocabulary/' + (row[0].replace(' ','_')).replace('/','_') + '> a owl:Class ;')
print(' skos:prefLabel """' + row[1] + '"""@en ;')
print(' skos:concept """' + row[0] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/vocabulary_name> """' + row[1] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/vocabulary_reference> """' + row[2] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/vocabulary_version> """' + row[3] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/vocabulary_concept_id> """' + row[4] + '"""^^xsd:string ;')
print(".")
## Get the domain relations ##
with open("DOMAIN.csv") as fd:
rd = csv.reader(fd, delimiter="\t", quotechar="'")
next(rd)
for row in rd:
print('<http://www.ohdsi.org/OHDSIVocab/Domain/' + (row[0].replace(' ','_')).replace('/','_') + '> a owl:Class ;')
print(' skos:prefLabel """' + row[1] + '"""@en ;')
print(' skos:concept """' + row[2] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/domain_id> """' + row[0].replace(' ','_') + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/domain_name> """' + row[1] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/domain_concept_id> """' + row[2] + '"""^^xsd:string ;')
print('.')
## Get the concept_class relations ##
with open("CONCEPT_CLASS.csv") as fd:
rd = csv.reader(fd, delimiter="\t", quotechar="'")
next(rd)
for row in rd:
print('<http://www.ohdsi.org/OHDSIVocab/Concept_class/' + (row[0].replace(' ','_')).replace('/','_') + '> a owl:Class ;')
print(' skos:prefLabel """' + row[1] + '"""@en ;')
print(' skos:concept """' + row[2] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/concept_class_id> """' + (row[0].replace(' ','_')).replace('/','_') + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/concept_class_name> """' + row[1] + '"""^^xsd:string ;')
print(' <http://www.ohdsi.org/OHDSIVocab/concept_class_concept_id> """' + row[2] + '"""^^xsd:string ;')
print('.')
### We want to read the Annanke Mappings in memory for faster searches of the corresponding CUI
with open("AnankeV2.csv") as f:
reader = csv.DictReader(f, delimiter=",", quotechar="\"")
data = [r for r in reader]
## Now for the main concept mappings ##
with open("CONCEPT.csv") as fd:
rd = csv.reader(fd, delimiter="\t", quotechar="'")
next(rd) ## Remove pesky header
for row in rd:
jobs.append( pool.apply_async(findCUI,(row,data)) )
#wait for all jobs to finish
for job in jobs:
job.get()
#clean up
pool.close()
##Footer of the document ###
print('''
<http://www.ohdsi.org/OHDSIVocab/concept_id> a owl:ObjectProperty ;
rdfs:label """Concept ID""";
rdfs:comment """OHDSI Concept ID""" .
<http://www.ohdsi.org/OHDSIVocab/concept_name> a owl:ObjectProperty ;
rdfs:label """Concept Name""";
rdfs:comment """OHDSI Concept Name""" .
<http://www.ohdsi.org/OHDSIVocab/domain_id> a owl:ObjectProperty ;
rdfs:label """Domain ID""";
rdfs:comment """OHDSI Concept Domain ID""" .
<http://www.ohdsi.org/OHDSIVocab/domain_name> a owl:ObjectProperty ;
rdfs:label """Domain Name""";
rdfs:comment """OHDSI Domain Name""" .
<http://www.ohdsi.org/OHDSIVocab/domain_concept_id> a owl:ObjectProperty ;
rdfs:label """Domain Concept ID""";
rdfs:comment """OHDSI Domain Concept ID""" .
<http://www.ohdsi.org/OHDSIVocab/vocabulary_id> a owl:ObjectProperty ;
rdfs:label """Vocabulary ID""";
rdfs:comment """OHDSI Concept Vocabulary ID""" .
<http://www.ohdsi.org/OHDSIVocab/vocabulary_name> a owl:ObjectProperty ;
rdfs:label """Vocabulary Name""";
rdfs:comment """OHDSI Vocabulary Name""" .
<http://www.ohdsi.org/OHDSIVocab/vocabulary_reference> a owl:ObjectProperty ;
rdfs:label """Vocabulary Reference""";
rdfs:comment """OHDSI Vocabulary Reference""" .
<http://www.ohdsi.org/OHDSIVocab/vocabulary_version> a owl:ObjectProperty ;
rdfs:label """Vocabulary Version""";
rdfs:comment """OHDSI Vocabulary Version""" .
<http://www.ohdsi.org/OHDSIVocab/vocabulary_concept_id> a owl:ObjectProperty ;
rdfs:label """Vocabulary Concept ID""";
rdfs:comment """OHDSI Vocabulary Concept ID""" .
<http://www.ohdsi.org/OHDSIVocab/concept_class_id> a owl:ObjectProperty ;
rdfs:label """Concept Class ID""";
rdfs:comment """OHDSI Concept Class ID""" .
<http://www.ohdsi.org/OHDSIVocab/concept_class_name> a owl:ObjectProperty ;
rdfs:label """Concept Class Name""";
rdfs:comment """OHDSI Concept Class Name""" .
<http://www.ohdsi.org/OHDSIVocab/concept_class_concept_id> a owl:ObjectProperty ;
rdfs:label """Concept Class Concept ID""";
rdfs:comment """OHDSI Concept Class Concept ID""" .
<http://www.ohdsi.org/OHDSIVocab/standard_concept> a owl:ObjectProperty ;
rdfs:label """Standard Concept""";
rdfs:comment """OHDSI Standard Concept""" .
<http://www.ohdsi.org/OHDSIVocab/concept_code> a owl:ObjectProperty ;
rdfs:label """Concept Code""";
rdfs:comment """Source Vocabulary Concept Code""" .
<http://www.ohdsi.org/OHDSIVocab/valid_start_date> a owl:ObjectProperty ;
rdfs:label """Valid Start Date""";
rdfs:comment """OHDSI Concept Valid Start Date""" .
<http://www.ohdsi.org/OHDSIVocab/valid_end_date> a owl:ObjectProperty ;
rdfs:label """Valid End Date""";
rdfs:comment """OHDSI Concept Valid End Date""" .
<http://www.ohdsi.org/OHDSIVocab/invalid_reason> a owl:ObjectProperty ;
rdfs:label """Invalid Reason""";
rdfs:comment """OHDSI Concept Invalid Reason""" .
''')