diff --git a/.dockerignore b/.dockerignore index 21181dc..c82a741 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,6 +3,7 @@ oxo-* .idea dataloading neo4jimport/*.csv +neo4jdata db solr-config/data *.rtf diff --git a/dataloading/oxo/.dockerignore b/dataloading/oxo/.dockerignore new file mode 100644 index 0000000..449876a --- /dev/null +++ b/dataloading/oxo/.dockerignore @@ -0,0 +1 @@ +#config_localdev.ini \ No newline at end of file diff --git a/dataloading/oxo/Dockerfile b/dataloading/oxo/Dockerfile new file mode 100644 index 0000000..7b07098 --- /dev/null +++ b/dataloading/oxo/Dockerfile @@ -0,0 +1,12 @@ +FROM jfloff/alpine-python:2.7 +MAINTAINER Simon Jupp "jupp@ebi.ac.uk" + +RUN mkdir /app +WORKDIR /app +COPY *.py requirements.txt ./ + +RUN apk add --no-cache mariadb-dev build-base +#RUN apk --update add mysql mysql-client +RUN pip install -r requirements.txt + +CMD bash \ No newline at end of file diff --git a/dataloading/oxo/MappingLoader.py b/dataloading/oxo/MappingLoader.py new file mode 100644 index 0000000..98ceaf7 --- /dev/null +++ b/dataloading/oxo/MappingLoader.py @@ -0,0 +1,484 @@ +import MySQLdb +import OxoClient as OXO +from pip._vendor.requests.packages.urllib3.connection import port_by_scheme +import urllib +import json +import xml.etree.ElementTree as ET +import yaml +import csv +import sys +import datetime +from neo4j.v1 import GraphDatabase, basic_auth +from ConfigParser import SafeConfigParser + + +#Parse the input parameters. A config file and a flag is expected +if len(sys.argv)!=2: + print "\nNot enough arguments! Please pass a (path) of a config file!" + raise Exception("Not enough arguments! Please pass in a config file!") +else: + config = SafeConfigParser() + config.read(sys.argv[1]) + + + +#config = SafeConfigParser() +#config.read("../config/oxo_dataRelease_config.ini") + +OXO.oxoUrl=config.get("Basics","oxoUrl") +OXO.apikey=config.get("Basics","oxoAPIkey") +#OXO.olsurl=config.get("Basics","olsurl") +olsurl=config.get("Basics","olsurl") + +solrBaseUrl=config.get("Basics","solrBaseUrl") +getEfoAnnotationsUrl = solrBaseUrl+"/ontology/select?q=*%3A*&fq=ontology_name%3Aefo&rows=0&wt=csv&indent=true" +efoSolrQueryUrl = solrBaseUrl+"/ontology/select?fq=ontology_name%3Aefo&q=*&wt=json" +olsDbxerfSolrQuery = solrBaseUrl+"/ontology/select?q=hasDbXref_annotation%3A*+OR%0Adatabase_cross_reference_annotation%3A*+OR%0Ahas_alternative_id_annotation%3A*+OR%0Adefinition_citation_annotation%3A*&fl=iri%2Contology_name%2Clabel%2Cshort_form%2Cobo_id%2Cdatabase_cross_reference_annotation%2ChasDbXref_annotation%2C+definition_citation_annotation%2C+has_alternative_id_annotation+&wt=json&fq=!ontology_name%3Ancbitaxon&fq=!ontology_name%3Apr&fq=!ontology_name%3Avto&fq=!ontology_name%3Aogg" + +solrChunks=config.getint("Basics","solrChunks") +uri=config.get("Basics","neoURL") + +exportFileTerms=config.get("Paths","exportFileTerms") +exportFileMappings=config.get("Paths","exportFileMappings") + +user=config.get("SQLumls","user") +password=config.get("SQLumls","password") +host=config.get("SQLumls","host") +sqldb=config.get("SQLumls","db") +port=config.getint("SQLumls","port") + + +driver = GraphDatabase.driver(uri, auth=basic_auth("neo4j", "dba")) +session = driver.session() +print "neo success no sql" +db = MySQLdb.connect(user=user, passwd=password, + host=host, + db=sqldb, port=port) + + +# OLS loader +# get prefix data from OLS +prefixToPreferred = {} +termToIri = {} +termToLabel = {} +idorgNamespace = {} +prefixToDatasource = {} + +print "Reading datasources from OxO..." +for data in OXO.getOxODatasets(): + del data['_links'] + del data['description'] + prefix = data["prefix"] + prefixToDatasource[prefix] = data + prefixToPreferred[prefix] = prefix + for altPrefix in data["alternatePrefix"]: + prefixToPreferred[altPrefix] = prefix + if "idorgNamespace" in data: + idorgNamespace[altPrefix.lower()] = data["idorgNamespace"] + idorgNamespace[prefix.lower()] = data["idorgNamespace"] +# get total number of results + +knownAnnotations = [ + "database_cross_reference_annotation", + "hasDbXref_annotation" +] + +print "Reading datasources from OxO done" +# hack to get EFO xref annotations + +response = urllib.urlopen(getEfoAnnotationsUrl) +cr = csv.reader(response) +for row in cr: + for p in row: + if 'definition_citation' in p: + knownAnnotations.append(p) + +print "\n knownAnnotations" +print knownAnnotations + + +unknownSource = {} + +terms = {} +mappings = {} +postMappings = [] + +def processSolrDocs(url): + rows = solrChunks + initUrl = url + "&start=0&rows=" + str(rows) + reply = urllib.urlopen(initUrl) + anwser = json.load(reply) + + size = anwser["response"]["numFound"] + + + for x in range(rows, size, rows): + for docs in anwser["response"]["docs"]: + fromPrefix = None + fromId = None + + fromIri = docs["iri"] + fromShortForm = docs["short_form"] + fromOntology = docs["ontology_name"] + fromLabel = docs["label"] + + if "obo_id" in docs: + fromOboId = docs["obo_id"] + fromPrefix = OXO.getPrefixFromCui(fromOboId) + fromId = OXO.getIdFromCui(fromOboId) + + #if fromPrefix=="orphanet": + # than use OLS API to check if it is an exact Match + + if not fromPrefix and not fromId: + fromPrefix = OXO.getPrefixFromCui(fromShortForm) + fromId = OXO.getIdFromCui(fromShortForm) + + if not fromPrefix: + print "Can't determine prefix for " + fromShortForm + " so skipping" + continue + + if not fromId: + print "Can't determine id for " + fromShortForm + " so skipping" + continue + # do we know the source term from the prefix? + + if fromPrefix not in prefixToPreferred: + print "unknown prefix " + fromPrefix + " so skipping" + continue + + fromPrefix = prefixToPreferred[fromPrefix] + fromCurie = fromPrefix + ":" + fromId + + if fromCurie not in terms: + terms[fromCurie] = { + "prefix": fromPrefix, + "id": fromId, + "curie": fromCurie, + "uri": fromIri, + "label": fromLabel + } + else: + terms[fromCurie]["uri"] = fromIri + terms[fromCurie]["label"] = fromLabel + + for anno in knownAnnotations: + if anno in docs: + for xref in docs[anno]: + if ":" in xref or "_" in xref: + toPrefix = OXO.getPrefixFromCui(xref) + toId = OXO.getIdFromCui(xref) + + if not toPrefix or not toId: + print "Can't get prefix or id for " + xref.encode('utf-8') + continue + + if not toPrefix: + print "Can't extract prefix for " + xref.encode('utf-8') + continue + if toPrefix.lower() not in prefixToPreferred: + unknownSource[toPrefix] = 1 + # print "Unknown prefix source for "+toPrefix+" so skipping" + continue + + + toPrefix = prefixToPreferred[toPrefix.lower()] + toCurie = toPrefix + ":" + toId + + if toCurie not in terms: + terms[toCurie] = { + "prefix": toPrefix, + "id": toId, + "curie": toCurie, + "uri": None, + "label":None + } + + if fromCurie == toCurie: + continue + + + if fromOntology not in prefixToPreferred: + print "mapping from unknown source " + fromOntology + continue + mapping = { + "fromId": fromCurie, + "toId": toCurie, + "datasourcePrefix": prefixToPreferred[fromOntology], + "sourceType": "ONTOLOGY", + "scope": "RELATED" + } + + postMappings.append(mapping) + + # if fromCurie not in termToIri: + # termToIri[fromCurie] = None + # if fromCurie not in termToLabel: + # termToLabel[fromCurie] = None + # if toCurie not in termToIri: + # termToIri[toCurie] = None + # if toCurie not in termToLabel: + # termToLabel[toCurie] = None + + # if to id is idorg, then mint the Uri + if idorgNamespace[toPrefix.lower()] is not None: + idorgUri = "http://identifiers.org/" + idorgNamespace[toPrefix.lower()] + "/" + toId + terms[toCurie]["uri"] = idorgUri + + print str(x) + # OXO.saveMappings(postMappings) + # postMappings = [] + initUrl = url + "&start=" + str(x) + "&rows=" + str(rows) + reply = urllib.urlopen(initUrl) + anwser = json.load(reply) + + +# do the query to get docs from solr and process + +processSolrDocs(efoSolrQueryUrl) +print "Done processing EFO, starting to query OLS" +processSolrDocs(olsDbxerfSolrQuery) +print "Done processing OLS" + +#terms={ "DOID:0080184" :{"prefix": "DOID", +# "id": "0080184", +# "curie": "DOID:0080184", +# "uri": None, +# "label":None} +# } + + +print "Looking for OLS terms with no labels..." +for key, term in terms.iteritems(): + if term["label"] is None: + prefix = OXO.getPrefixFromCui(key) + if prefixToDatasource[prefixToPreferred[prefix]]["source"] == "ONTOLOGY": + object = OXO.getIriAndLabelFromOls(term["curie"], olsurl) + if object is not None: + if term["uri"] is None: + terms[key]["uri"] = object["uri"] + if term["label"] is None: + terms[key]["label"] = object["label"] + else: + print "Object None!" + print object + print terms[key] + + + + +#url = "http://www.ebi.ac.uk/ols/api/search?q=*&fieldList=iri,short_form,obo_id,database_cross_reference_annotation" +#print "Updating term labels" +# update URIs and labels for any terms we have seen +#for id in termToIri: +# if id not in termToIri and id not in termToLabel: +# print "Can't determine iri or label for "+id +# else: +# OXO.updateTerm(id, termToIri[id], termToLabel[id]) + + +# dump out the list of unkonwn sources +print "Finished, here are all the unknown sources" +for key, value in unknownSource.iteritems() : + # see if we can match prefix to db + print key.encode('utf-8', 'ignore') + + +# print all the first cell of all the rows +idToLabel = {} +def getUMLSMappingFromRow(row): + cui = row[0] + source = row[1] + toid = row[2] + descId = row[3] + label = row[4] + + if descId is not None: + toid = descId + + if toid is None: + return None + + if source == "HPO": + source = OXO.getPrefixFromCui(toid) + toid = OXO.getIdFromCui(toid) + + fromCurie = "UMLS:" + cui + + toCurie = prefixToPreferred[source] + ":" + toid + + +#### Do the if-else things here prevent empty labels?? + if fromCurie not in terms: + terms[fromCurie] = { + "prefix": "UMLS", + "id": cui, + "curie": fromCurie, + "uri": "http://identifiers.org/umls/"+cui, + "label": label + } + else: + if label!="": + terms[fromCurie]["label"] = label + else: + print "FROM UMLS label is none for " + print fromCurie + + if toCurie not in terms: + terms[toCurie] = { + "prefix": prefixToPreferred[source], + "id": toid, + "curie": toCurie, + "label": label, + "uri": None + } + else: + if label!="": + terms[toCurie]["label"] = label + else: + print "FROM UMLS - label is NONE! for" + print toCurie +#### End empty labels + + if idorgNamespace[source.lower()]: + terms[toCurie]["uri"] = "http://identifiers.org/"+idorgNamespace[source.lower()]+"/"+toid + + mapping = { + "fromId": fromCurie, + "toId": toCurie, + "datasourcePrefix": "UMLS", + "sourceType": "DATABASE", + "scope": "RELATED" + } + # idToLabel[source+":"+toid] = label + return mapping + + + +# umls loader +cur = db.cursor() +# Use all the SQL you like + +#cur.execute("select distinct cui,sab, scui, sdui, str from MRCONSO where stt = 'PF' and (ts = 'P' or tty='PT') and sab != 'src'") +# --> missing Snomed labels 6613 (down from ) + +# https://www.ncbi.nlm.nih.gov/books/NBK9685/ +# STT String type +# TS Term status +# SAB Abbreviated source name (SAB). + +cur.execute("select distinct cui,sab, scui, sdui, str from MRCONSO where stt = 'PF' and (ts = 'P' or ts='S') and sab != 'src'") + +fetched=cur.fetchall() + +# Previously, 'old sql query' +#cur.execute("select distinct cui,sab, scui, sdui, str from MRCONSO where stt = 'PF' and tty = 'PT' and sab != 'src'") +#fetched=cur.fetchall() + +#if len(fetched)==0: +# cur.execute("select distinct cui,sab, scui, sdui, str from MRCONSO where stt = 'PF' and tty = 'PT' and sab != 'src'") +# fetched=cur.fetchall() + +for row in fetched: + try: + mappingRow = getUMLSMappingFromRow(row) + if mappingRow is not None: + postMappings.append(mappingRow) + except Exception as e: + print e + print "Experienced a problem with " + print row + print "Catched it and try to move on" + #Experienced a problem with ('C1180021', 'NCI', 'C33333', None, 'Plus End of the Microtubule') + #('C0796501', 'NCI', 'C11519', None, 'Asparaginase/Dexamethasone/Prednisone/Vincristine') + +db.close() + + + +print +print "Generating CSV files for neo loading..." + +with open(exportFileTerms, 'w') as csvfile: + spamwriter = csv.writer(csvfile, delimiter=',', + quoting=csv.QUOTE_ALL, escapechar='\\',doublequote=False) + spamwriter.writerow(['identifier', "curie", "label","uri", "prefix" ]) + for key, term in terms.iteritems(): + label = None + uri = None + + try: + if term["label"] is not None: + label = term["label"].encode('utf-8', errors="ignore") + except: + pass + + if term["uri"] is not None: + uri = term["uri"] + + spamwriter.writerow( [term["id"], term["curie"], label, uri, term["prefix"] ]) + +with open(exportFileMappings, 'w') as csvfile: + spamwriter = csv.writer(csvfile, delimiter=',', + quoting=csv.QUOTE_ALL, escapechar='\\',doublequote=False) + spamwriter.writerow(['fromCurie', "toCurie","datasourcePrefix","datasource","sourceType","scope","date" ]) + for mapping in postMappings: + datasource = prefixToDatasource[mapping["datasourcePrefix"]] + spamwriter.writerow( [mapping["fromId"],mapping["toId"],mapping["datasourcePrefix"],json.dumps(datasource),mapping["sourceType"],mapping["scope"], datetime.datetime.now().strftime("%y-%m-%d")]) + +print "Generating CSV files for neo loading done, now loading them..." + +# CREATE CONSTRAINT ON (i:Term) ASSERT i.curie IS UNIQUE +# CREATE CONSTRAINT ON (i:Datasource) ASSERT i.prefix IS UNIQUE + +# +def deleteMappings(): + result = session.run("match (t)-[m:MAPPING]->() WITH m LIMIT 50000 DETACH DELETE m RETURN count(*) as count") + for record in result: + return record["count"] +print "Deleting mappings..." +while deleteMappings() > 0: + print "Still deleting..." +print "Mappings deleted!" + +print "Deleting previous has_source" +def deleteSourceRels(): + result = session.run("match (t)-[m:HAS_SOURCE]->() WITH m LIMIT 50000 DETACH DELETE m RETURN count(*) as count") + for record in result: + return record["count"] +while deleteSourceRels() > 0: + print "Still deleting..." +print "Source rels deleted!" + +print "Deleting previous terms" +def deleteTerms(): + result = session.run("match (t:Term) WITH t LIMIT 50000 DETACH DELETE t RETURN count(*) as count") + for record in result: + return record["count"] +while deleteTerms() > 0: + print "Still deleting..." +print "Terms deleted!" + +print "Loading terms.csv..." +loadTermsCypher = "USING PERIODIC COMMIT 10000 LOAD CSV WITH HEADERS FROM 'file:///"+exportFileTerms+"""' AS line + MATCH (d:Datasource {prefix : line.prefix}) + WITH d, line + MERGE (t:Term { id: line.identifier, curie: line.curie, label: line.label, uri: line.uri}) + with t,d + CREATE (t)-[:HAS_SOURCE]->(d)""" +result = session.run(loadTermsCypher) +print result.summary() + +print "Loading mappings.csv..." +loadMappingsCypher = "USING PERIODIC COMMIT 10000 LOAD CSV WITH HEADERS FROM 'file:///"+exportFileMappings+"""' AS line + MATCH (f:Term { curie: line.fromCurie}),(t:Term { curie: line.toCurie}) + WITH f,t,line + CREATE (f)-[m:MAPPING { sourcePrefix: line.datasourcePrefix, datasource: line.datasource, sourceType: line.sourceType, scope: line.scope, date: line.date}]->(t)""" + +result = session.run(loadMappingsCypher) +print result.summary() + +#After Loading, update indexes +print "updating indexes" +reply = urllib.urlopen(OXO.oxoUrl+"/api/search/rebuild?apikey="+OXO.apikey) +print "Finished process!" diff --git a/dataloading/oxo/OlsDatasetExtractor.py b/dataloading/oxo/OlsDatasetExtractor.py index 5cba0f5..259a588 100755 --- a/dataloading/oxo/OlsDatasetExtractor.py +++ b/dataloading/oxo/OlsDatasetExtractor.py @@ -158,6 +158,8 @@ prefixToPreferred[namespace.lower()] = prefPrefix if prefPrefix not in datasources: + print "New datasource " + namespace + " from GO db-xrefs file" + datasources[prefPrefix] = OxoClient.Datasource (prefPrefix, None, title, None, "DATABASE",urlSyntax, altPrefixes, None, None) diff --git a/dataloading/oxo/OlsDatasetLoader.py b/dataloading/oxo/OlsDatasetLoader.py new file mode 100644 index 0000000..fcd5e54 --- /dev/null +++ b/dataloading/oxo/OlsDatasetLoader.py @@ -0,0 +1,141 @@ +import urllib +import json +import xml.etree.ElementTree as ET +import yaml +import OxoClient as OXO +import csv +from ConfigParser import SafeConfigParser +import sys + +prefixToPreferred = {} +idorgNamespace = {} + +unprocessedIds = {} +termToIri = {} +termToLabel = {} + +#Parse the input parameters. A config file and a flag is expected +if len(sys.argv)!=2: + print "\nNot enough arguments! Please pass a (path) of a config file!" + raise Exception("Not enough arguments! Please pass in a config file!") +else: + config = SafeConfigParser() + config.read(sys.argv[1]) + +OXO.oxoUrl = config.get("Basics","oxoUrl") +OXO.apikey = config.get("Basics", "oxoAPIkey") +oboDbxrefUrl= config.get("Basics", "oboDbxrefUrl") + +olsurl=config.get("Basics", "olsurl") +olsurl=olsurl+"/ontologies?size=1000" + +idorgDataLocation = config.get("Paths", "idorgDataLocation") + +reply = urllib.urlopen(olsurl) +anwser = json.load(reply) + +ontologies = anwser["_embedded"]["ontologies"] + +for ontology in ontologies: + namespace = ontology["config"]["namespace"] + version = ontology["updated"] + + if namespace == 'ordo': + prefPrefix = 'Orphanet' + else: + prefPrefix = ontology["config"]["preferredPrefix"] + + title = ontology["config"]["title"] + desc = ontology["config"]["description"] + prefixToPreferred[prefPrefix.lower()] = prefPrefix + prefixToPreferred[namespace.lower()] = prefPrefix + + OXO.saveDatasource(prefPrefix, None, title, desc, "ONTOLOGY", None, [namespace], "https://creativecommons.org/licenses/by/4.0/", "Last updated in the ontology lookup service on "+version ) +# get namespaces from identifiers.org + +#urllib.urlopen('http://www.ebi.ac.uk/miriam/main/export/xml/') +tree = ET.ElementTree(file=idorgDataLocation) + +# from id.org default to namespace +# if no spaces in title, this is usally a better option +# unless a preferred prefix is provided, then always use that + +rootElem = tree.getroot() +for datatype in rootElem.findall('{http://www.biomodels.net/MIRIAM/}datatype'): + namespace = datatype.find('{http://www.biomodels.net/MIRIAM/}namespace').text + prefPrefix = namespace + + + title = datatype.find('{http://www.biomodels.net/MIRIAM/}name').text + desc = datatype.find('{http://www.biomodels.net/MIRIAM/}definition').text + licence = None + versionInfo = None + + altPrefixes = [namespace] + + if datatype.find('{http://www.biomodels.net/MIRIAM/}licence') is not None: + licence = datatype.find('{http://www.biomodels.net/MIRIAM/}licence').text + if datatype.find('{http://www.biomodels.net/MIRIAM/}versionInfo') is not None: + versionInfo = datatype.find('{http://www.biomodels.net/MIRIAM/}versionInfo').text + + if datatype.find('{http://www.biomodels.net/MIRIAM/}preferredPrefix') is not None: + prefPrefix = datatype.find('{http://www.biomodels.net/MIRIAM/}preferredPrefix').text + elif ' ' not in title: + prefPrefix = title + + # add titles to alt prefix if + if ' ' not in title: + altPrefixes.append(title) + + if datatype.find('{http://www.biomodels.net/MIRIAM/}alternatePrefixes') is not None: + for altPrefixs in datatype.find('{http://www.biomodels.net/MIRIAM/}alternatePrefixes'): + altPrefixes.append(altPrefixs.text) + + if prefPrefix.lower() in prefixToPreferred: + print "Ignoring "+namespace+" from idorg as it is already registered as a datasource" + elif namespace.lower() in prefixToPreferred: + print "Ignoring " + namespace + " from idorg as it is already registered as a datasource" + else: + idorgNamespace[prefPrefix.lower()] = prefPrefix + idorgNamespace[namespace.lower()] = prefPrefix + idorgNamespace[title.lower()] = prefPrefix + prefixToPreferred[prefPrefix.lower()] = prefPrefix + prefixToPreferred[namespace.lower()] = prefPrefix + prefixToPreferred[title.lower()] = prefPrefix + OXO.saveDatasource(prefPrefix, namespace, title, desc, "DATABASE", None, altPrefixes, licence, versionInfo) + + +#oboDbxrefUrl = 'https://raw.githubusercontent.com/geneontology/go-site/master/metadata/db-xrefs.yaml' +# Read from OBO db-xrefs +yamlData = yaml.load(urllib.urlopen(oboDbxrefUrl)) + +for database in yamlData: + namespace= database["database"] + title = database["name"] + prefPrefix = namespace + + altPrefixes = [namespace] + if namespace.lower() in prefixToPreferred: + print "Ignoring " + namespace + " from OBO as it is already registered as a datasource" + else: + urlSyntax = None + if "entity_types" in database: + if "url_syntax" in database["entity_types"][0]: + urlSyntax = database["entity_types"][0]["url_syntax"].replace("[example_id]", "") + prefixToPreferred[namespace.lower()] = prefPrefix + + OXO.saveDatasource(prefPrefix, None, title, None, "DATABASE",urlSyntax, altPrefixes, None, None) + + +# Create Paxo as datasources +print "Save paxo as datasource" +prefPrefix="paxo" +namespace=None +title="paxo" +desc=None +sourceType="DATABASE" +urlSyntax=None +altPrefixes=["paxo"] +licence=None +versionInfo=1 +OXO.saveDatasource(prefPrefix, namespace, title, desc, sourceType, urlSyntax, altPrefixes, licence, versionInfo) diff --git a/dataloading/oxo/OlsMappingExtractor.py b/dataloading/oxo/OlsMappingExtractor.py index 5dba3ab..f7de7bb 100755 --- a/dataloading/oxo/OlsMappingExtractor.py +++ b/dataloading/oxo/OlsMappingExtractor.py @@ -236,7 +236,7 @@ def processSolrDocs(url): # if to id is idorg, then mint the Uri if toPrefix.lower() in idorgNamespace: - idorgUri = "http://identifiers.org/" + idorgNamespace[toPrefix.lower()] + "/" + toId + idorgUri = "http://identifiers.org/" + toCurie terms[toCurie]["uri"] = idorgUri print(str(x)) diff --git a/dataloading/oxo/OxoClient.py b/dataloading/oxo/OxoClient.py old mode 100755 new mode 100644 diff --git a/dataloading/oxo/OxoNeo4jLoader.py b/dataloading/oxo/OxoNeo4jLoader.py index 3c8b572..809c43f 100644 --- a/dataloading/oxo/OxoNeo4jLoader.py +++ b/dataloading/oxo/OxoNeo4jLoader.py @@ -32,7 +32,11 @@ def __init__(self): config = ConfigParser() config.read(options.config) - uri = config.get("Basics", "neoURL") + try: + uri = config.get("Basics", "neoURL") + except: + print "No config found, please supply a config.ini using -c" + exit (1) neoUser = config.get("Basics", "neoUser") neoPass = config.get("Basics", "neoPass") diff --git a/dataloading/oxo/UmlsMappingExtractor.py b/dataloading/oxo/UmlsMappingExtractor.py index 240df89..c5fbfd9 100644 --- a/dataloading/oxo/UmlsMappingExtractor.py +++ b/dataloading/oxo/UmlsMappingExtractor.py @@ -127,7 +127,7 @@ def getUMLSMappingFromRow(row, terms, umlsMapping): "uri": toUri } - if sourcePreferred == 'PT': + if sourcePreferred in ['PT' ,'MH', 'OAP', "NM"]: terms[toCurie]["label"] = label @@ -154,7 +154,7 @@ def getUMLSMappingFromRow(row, terms, umlsMapping): print("Fetching all source terms info from from UMLS...") # now get source term labels -getPreferredLabelFromSource = "select distinct cui,sab, scui, sdui, str, tty, ts, stt, ispref from MRCONSO where tty = 'PT' and sab != 'src'" +getPreferredLabelFromSource = "select distinct cui,sab, scui, sdui, str, tty, ts, stt, ispref from MRCONSO where (tty = 'PT' or tty = 'MH' or tty = 'OAP' or tty = 'NM') and sab != 'src'" cur.execute(getPreferredLabelFromSource) fetched=cur.fetchall() diff --git a/dataloading/oxo/config.ini b/dataloading/oxo/config.ini deleted file mode 100644 index 81c6cf2..0000000 --- a/dataloading/oxo/config.ini +++ /dev/null @@ -1,22 +0,0 @@ -[Basics] -oxoUrl=http://localhost:8080/oxo -solrBaseUrl=http://localhost:8993/solr -solrChunks=5000 -neoURL=bolt://localhost:7687 -neoUser=neo4j -neoPass=dba -olsurl=https://www.ebi.ac.uk/ols/api -oboDbxrefUrl=https://raw.githubusercontent.com/geneontology/go-site/master/metadata/db-xrefs.yaml - -[Paths] -exportFileDatasources=datasources.csv -exportFileTerms=terms.csv -exportFileMappings=mappings.csv -idorgDataLocation =idorg.xml - -[SQLumls] -user= -password= -host=localhost -db=UMLS_2016AB -port= \ No newline at end of file diff --git a/dataloading/oxo/config/oxo_config.ini b/dataloading/oxo/config/oxo_config.ini new file mode 100644 index 0000000..19cb9ec --- /dev/null +++ b/dataloading/oxo/config/oxo_config.ini @@ -0,0 +1,20 @@ +[Basics] +oxoUrl=http://localhost:8080/oxo +oxoAPIkey=key +solrBaseUrl=http://url/solr +solrChunks=5000 +neoURL=bolt://localhost:7687 +olsurl=http://www.ebi.ac.uk/ols/api +oboDbxrefUrl=https://raw.githubusercontent.com/geneontology/go-site/master/metadata/db-xrefs.yaml + +[Paths] +exportFileTerms=/path/terms.csv +exportFileMappings=/path/mappings.csv +idorgDataLocation = /path/idorg.xml + +[SQLumls] +user=username +password=password +host=mysql-name +db=dbName +port=4570 diff --git a/dataloading/paxo/config/listprocessing_dummy_config.ini b/dataloading/paxo/config/listprocessing_dummy_config.ini index df6a7ac..e62f818 100644 --- a/dataloading/paxo/config/listprocessing_dummy_config.ini +++ b/dataloading/paxo/config/listprocessing_dummy_config.ini @@ -1,6 +1,6 @@ [Basics] olsAPIURL=https://www.ebi.ac.uk/ols/api/ -oxoURL=https://www.ebi.ac.uk/spot/oxo/api/search +oxoURL=https://www.ebi.ac.uk/spot/oxo/api/ inputFile=/path/input-file.csv resultFile=/path/output-file.csv logFile=listprocessing.log diff --git a/dataloading/paxo/config/paxo_dummy_config.ini b/dataloading/paxo/config/paxo_dummy_config.ini index 71c2e1c..3cb0372 100644 --- a/dataloading/paxo/config/paxo_dummy_config.ini +++ b/dataloading/paxo/config/paxo_dummy_config.ini @@ -1,6 +1,6 @@ [Basics] olsAPIURL=https://www.ebi.ac.uk/ols/api/ -oxoURL=https://www.ebi.ac.uk/spot/oxo/api/search +oxoURL=https://www.ebi.ac.uk/spot/oxo/api/ logFile=../paxo.log neoURL=bolt://localhost:7687 neoUser=neo4j @@ -15,6 +15,8 @@ neoFolder=/path/path/neo_export/ StopwordsList=of,the writeToDiscFlag=True uniqueMaps=False +mapSmallest=True +useLocalOnly=True [mp_hp] sourceOntology=mp diff --git a/dataloading/paxo/listprocessing.py b/dataloading/paxo/listprocessing.py index bdc5966..631433d 100644 --- a/dataloading/paxo/listprocessing.py +++ b/dataloading/paxo/listprocessing.py @@ -3,6 +3,7 @@ import csv import requests import time +import logging def runListProcessing(options, params, scoreParams): @@ -11,6 +12,7 @@ def runListProcessing(options, params, scoreParams): delimiter=options["delimiter"] targetOntology=options["targetOntology"] detailLevel=options["detailLevel"] + synonymSplitChar=options["synonymSplitChar"] #Open the input file with open(inputFile) as csvfile: @@ -25,12 +27,17 @@ def runListProcessing(options, params, scoreParams): counter=0 #tmpReadCSV=readCSV #totalLength=len(list(tmpReadCSV)) - print "Enumerate over csv now" + logging.info("Start going through input csv") for index,row in enumerate(readCSV): potentialReply=[] #Execute label in the first row prefLabel=row[1].encode(encoding='UTF-8') - synList=row[2].split("|") + + if len(row)>2: + synList=row[2].split(synonymSplitChar) + else: + synList=[] + #print prefLabel tmpReply=paxo_internals.scoreTermLabel(prefLabel, targetOntology, scoreParams, params) @@ -63,13 +70,15 @@ def runListProcessing(options, params, scoreParams): else: print e print "Problem getting results for "+prefLabel+" - the reply was "+potentialReply - print potentialReply + logging.error("Problem getting results for "+prefLabel+" - the reply was "+potentialReply) + logging.error(e) raise #This is just to print feedback - if we work on a large list counter=counter+1 if counter%20==0: print "Processed "+str(counter)+" entries" + logging.info("Processed "+str(counter)+" entries") ### annotating file print "Done processing input list, now annotate the result" @@ -84,12 +93,13 @@ def runListProcessing(options, params, scoreParams): try: row[3]=jsonReply['response']['docs'][0]['label'].encode(encoding='UTF-8') except Exception as e: - print "No label found" row[3]="no label found" - print e + logging.error("No label found for "+row[2]) + logging.error(e) ### write to file print "Done annotating file, now write result to file" + logging.info("Done annotating file, now write result to file") #Writing result to output file with open(resultFile, 'wb') as f: writer = csv.writer(f) @@ -100,3 +110,5 @@ def runListProcessing(options, params, scoreParams): except Exception as e: print "Error while processing file" print e + logging.error("Error while processing file") + logging.error(e) diff --git a/dataloading/paxo/listprocessing_config.ini b/dataloading/paxo/listprocessing_config.ini new file mode 100644 index 0000000..17fa166 --- /dev/null +++ b/dataloading/paxo/listprocessing_config.ini @@ -0,0 +1,27 @@ +[Basics] +olsAPIURL=https://www.ebi.ac.uk/ols/api/ +oxoURL=https://www.ebi.ac.uk/spot/oxo/api/search +inputFile=/path/input-file.csv +resultFile=/path/output-file.csv +logFile=listprocessing.log +detailLevel=0 +targetOntology=doid +delimiter=, +synonymSplitChar=| +StopwordsList=of,the +fuzzyUpperLimit=0.8 +fuzzyLowerLimit=0.6 +fuzzyUpperFactor=1 +fuzzyLowerFactor=0.6 +oxoDistanceOne=1 +oxoDistanceTwo=0.3 +oxoDistanceThree=0.1 +synFuzzyFactor=0.6 +synOxoFactor=0.4 +bridgeOxoFactor=1 +threshold=1 + +[Params] +StopwordsList=of,the +writeToDiscFlag=True +uniqueMaps=True diff --git a/dataloading/paxo/neoExporter.py b/dataloading/paxo/neoExporter.py old mode 100755 new mode 100644 index 03fec26..7af2451 --- a/dataloading/paxo/neoExporter.py +++ b/dataloading/paxo/neoExporter.py @@ -1,6 +1,6 @@ import csv import logging -import flaskMapping +import paxo_internals import time from datetime import datetime @@ -10,13 +10,6 @@ date=datetime.now().strftime('%Y-%m-%d') def writeTermsToNeo(termsFile, session): - #loadMappingsCypher = "USING PERIODIC COMMIT 10000 LOAD CSV WITH HEADERS FROM 'file:///"+termsFile+"""' AS line - # MATCH (d:Datasource {prefix : line.prefix}) - # WITH d, line - # MERGE (t:Term { curie: line.curie}) - # with t,d - # CREATE (t)-[:HAS_SOURCE]->(d)""" - loadMappingsCypher = "USING PERIODIC COMMIT 10000 LOAD CSV WITH HEADERS FROM 'file:///"+termsFile+"""' AS line MATCH (d:Datasource {prefix : line.prefix}) WITH d, line @@ -44,7 +37,7 @@ def writeMappingsToNeo(mappingsFile, session): def createNode(iri, ontology, olsURL): data={"q": iri, "ontology":ontology, "exact":True, "type":"class", "local":True, "fieldList":"label,ontology_prefix,obo_id"} - jsonReply=flaskMapping.apiCall(olsURL+"search", data) + jsonReply=paxo_internals .apiCall(olsURL+"search", data) try: jsonReply=jsonReply.json() @@ -83,9 +76,7 @@ def createNode(iri, ontology, olsURL): identifier=obo_id.split(':')[1] - - - + #if we couldn't retrieve doc from OLS we return empty line so it's not added to the csv if obo_id=='UNKNOWN:UNKNOWN': return [] @@ -110,7 +101,6 @@ def exportInNeo(onto1, onto2, predictedFolder, targetFolder, olsURL, neoURL, neo driver = GraphDatabase.driver(uri, auth=basic_auth(neoUser, neoPW)) session = driver.session() - paxo_term=[] paxo_mappings=[] line=list(['identifier','curie', 'label', 'uri', 'prefix']) @@ -135,13 +125,6 @@ def exportInNeo(onto1, onto2, predictedFolder, targetFolder, olsURL, neoURL, neo if firstRow!=[] and secondRow!=[]: paxo_mappings.append(createMap(firstRow[1],secondRow[1], row[2])) - #This is just for Testing, don't take more than 10 - #counter=counter+1 - #if counter%500: - # print "Processed "+str(counter)+" entries" - #break #This is just for Testing - - #print paxo_term with open(targetFolder+onto1+"_"+onto2+'_termsNeo.csv', 'wb') as f: writer = csv.writer(f) writer.writerows(paxo_term) diff --git a/dataloading/paxo/paxo.py b/dataloading/paxo/paxo.py index 23dc696..c4f432c 100644 --- a/dataloading/paxo/paxo.py +++ b/dataloading/paxo/paxo.py @@ -5,28 +5,49 @@ import time import requests import json +import os from ConfigParser import SafeConfigParser import ast import neoExporter import sys import listprocessing - #Compares to ontologies from the OLS. This process can take a while and procudes a csv with primary results -def scoreOntologies(sourceOntology, targetOntology, scoreParams, scoringtargetFolder): +def scoreOntologies(sourceOntology, targetOntology, scoreParams, scoringtargetFolder, mapSmallest, useLocalOnly): logging.info("Start scoring "+sourceOntology+" and "+targetOntology) - #Check for the smaller ontology - olsURL=config.get("Basics","olsAPIURL") oxoURL=config.get("Basics","oxoURL") - urls={"ols":olsURL, "oxo":oxoURL} + #Check if the olsURL is correct + if olsURL[-1:]!='/': + print "ols url is not ending with / but it is "+olsURL+" - I add the terminating /. Please make sure the URL is correct" + olsURL=olsURL+'/' + print "Set olsURL to "+olsURL + #Check if the oxoURL is correct + if oxoURL[-1:]!='/': + print "oxo url is not ending with / but it is "+oxoURL+" - I add the terminating /. Please make sure the URL is correct" + oxoURL=oxoURL+'/' + print "Set oxoURL to "+oxoURL + + urls={"ols":olsURL, "oxo":oxoURL} try: r = requests.get(olsURL+"ontologies/"+sourceOntology) numberOfTerms=r.json()['numberOfTerms'] + #Logging some meta data + logging.info("MetaData for "+sourceOntology+": ") + logging.info(" OLS update date: "+str(r.json()["updated"])) + logging.info(" OLS version field: "+str(r.json()["config"]["version"])) + logging.info(" OLS versionIRI field: "+str(r.json()["config"]["versionIri"])) + r = requests.get(olsURL+"ontologies/"+targetOntology) numberOfTerms2 = r.json()['numberOfTerms'] + #Logging some meta data + logging.info("MetaData for "+targetOntology+": ") + logging.info(" OLS update date: "+str(r.json()["updated"])) + logging.info(" OLS version field: "+str(r.json()["config"]["version"])) + logging.info(" OLS versionIRI field: "+str(r.json()["config"]["versionIri"])) + except: logging.error("Error getting number of terms throw webservice call!") logging.error(olsURL+"ontologies/"+sourceOntology) @@ -34,16 +55,17 @@ def scoreOntologies(sourceOntology, targetOntology, scoreParams, scoringtargetFo logging.error(r) raise - #In case the targetOntology is smaller than the source Ontology, switch the output - if (numberOfTerms>numberOfTerms2): - tmpOntology=sourceOntology - sourceOntology=targetOntology - targetOntology=tmpOntology + if mapSmallest==True: + #In case the targetOntology is smaller than the source Ontology, switch the output + if (numberOfTerms>numberOfTerms2): + tmpOntology=sourceOntology + sourceOntology=targetOntology + targetOntology=tmpOntology termsUrl=olsURL+"ontologies/"+sourceOntology+"/terms?size=500&fieldList=iri,label,synonym" results=[] - results.append(["sourceLabel","sourceIRI", "fuzzy", "oxo", "synFuzzy", "synOxo", "bridgeTerms"]) + results.append(["sourceLabel","sourceIRI", "fuzzy", "oxo", "synFuzzy", "bridgeTerms"]) counter=0 while True: try: @@ -60,53 +82,51 @@ def scoreOntologies(sourceOntology, targetOntology, scoreParams, scoringtargetFo originalLabel=term["label"] synonyms=term["synonyms"] - #Check if the term is actually defined in that ontology - if term['is_defining_ontology'] is True: - pscore=paxo_internals.scoreTermOLS(term["iri"], originalLabel, targetOntology, scoreParams, urls) - try: - calculatedMappings=paxo_internals.processPScore(pscore) - except Exception as e: - print "Exception in primary Scoring" - print e - print term["iri"] - print originalLabel - print targetOntology - logging.info("Exception in primary Scoring") - logging.info(term["iri"]+" "+originalLabel) - calculatedMappings={'sourceTerm':term["iri"]+"ERROR", "olsFuzzyScore": [], "oxoScore": [], "bridgeEvidence": []} - - #If synonyms are available, run through the same steps with synonyms to score an ontology - synCalculatedMappings={} - if synonyms!=None: - for synonym in synonyms: - try: - synPscore=paxo_internals.primaryScoreTerm('', synonym, targetOntology, scoreParams, urls) - synCalculatedMappings=paxo_internals.processPScore(synPscore) #Process the primaryScore for synonyms - synCalculatedMappings['sourceIRI']=term["iri"] - except Exception as e: - print "Exception in Synonym processPScore Term" - print e - synCalculatedMappings={'sourceTerm':term["iri"]+"ERROR", "olsFuzzyScore": [], "oxoScore": [], "bridgeEvidence": []} - logging.info("Exception in Synonym processPScore Term") - logging.info(term["iri"]+" "+synonym+" "+targetOntology) - synCalculatedMappings['olsFuzzyScore']=[{'fuzzyScore': 0, 'fuzzyMapping': 'UNKNOWN - ERROR', 'fuzzyIri': 'UNKNOWN - ERROR'}] - synCalculatedMappings['oxoScore']=[{'distance': 0, 'oxoCurie': 'UNKNOWN', 'oxoScore': 0}] - synCalculatedMappings['sourceIRI']=term["iri"] - - else: - synCalculatedMappings['olsFuzzyScore']=[{'fuzzyScore': 0, 'fuzzyMapping': 'UNKNOWN', 'fuzzyIri': 'UNKNOWN'}] - synCalculatedMappings['oxoScore']=[{'distance': 0, 'oxoCurie': 'UNKNOWN', 'oxoScore': 0}] - - results.append([originalLabel.encode(encoding='UTF-8'), term["iri"].encode(encoding='UTF-8'), calculatedMappings['olsFuzzyScore'], calculatedMappings['oxoScore'], synCalculatedMappings['olsFuzzyScore'], synCalculatedMappings['oxoScore'], calculatedMappings['bridgeEvidence']]) - - + #Check if the term is actually defined in that ontology. Via flag it can be changed to process all terms + if term['is_defining_ontology'] is True or term['is_defining_ontology'] is useLocalOnly: + pscore=paxo_internals.scoreTermOLS(term["iri"], originalLabel, targetOntology, scoreParams, urls) + try: + calculatedMappings=paxo_internals.processPScore(pscore) + except Exception as e: + print "Exception in primary Scoring" + print e + print term["iri"] + print originalLabel + print targetOntology + logging.info("Exception in primary Scoring") + logging.info(term["iri"]+" "+originalLabel) + calculatedMappings={'sourceTerm':term["iri"]+"ERROR", "olsFuzzyScore": [], "oxoScore": [], "bridgeEvidence": []} + + #If synonyms are available, run through the same steps with synonyms to score an ontology + synCalculatedMappings={} + if synonyms!=None: + for synonym in synonyms: + try: + synPscore=paxo_internals.primaryScoreTerm('', synonym, targetOntology, scoreParams, urls) + synCalculatedMappings=paxo_internals.processPScore(synPscore) #Process the primaryScore for synonyms + synCalculatedMappings['sourceIRI']=term["iri"] + except Exception as e: + print "Exception in Synonym processPScore Term" + print e + synCalculatedMappings={'sourceTerm':term["iri"]+"ERROR", "olsFuzzyScore": [], "oxoScore": [], "bridgeEvidence": []} + logging.info("Exception in Synonym processPScore Term") + logging.info(term["iri"]+" "+synonym+" "+targetOntology) + synCalculatedMappings['olsFuzzyScore']=[{'fuzzyScore': 0, 'fuzzyMapping': 'UNKNOWN - ERROR', 'fuzzyIri': 'UNKNOWN - ERROR'}] + synCalculatedMappings['oxoScore']=[{'distance': 0, 'oxoCurie': 'UNKNOWN', 'oxoScore': 0}] + synCalculatedMappings['sourceIRI']=term["iri"] + + else: + synCalculatedMappings['olsFuzzyScore']=[{'fuzzyScore': 0, 'fuzzyMapping': 'UNKNOWN', 'fuzzyIri': 'UNKNOWN'}] + synCalculatedMappings['oxoScore']=[{'distance': 0, 'oxoCurie': 'UNKNOWN', 'oxoScore': 0}] + + results.append([originalLabel.encode(encoding='UTF-8'), term["iri"].encode(encoding='UTF-8'), calculatedMappings['olsFuzzyScore'], calculatedMappings['oxoScore'], synCalculatedMappings['olsFuzzyScore'], calculatedMappings['bridgeEvidence']]) try: termsUrl=r.json()['_links']['next']['href'] counter=counter+1 if counter%2==0: print "Processed "+str(counter)+" pages" logging.info("Processed "+str(counter)+" pages") - #break #Uncomment this for testing (to not parse the whole ontology) + #break #Uncomment this for testing the -s flag (so not the whole ontology is parsed but 2 pages) except: logging.info("Reached last page I recon") print "Reached last page I recon" @@ -136,28 +156,23 @@ def scoreOntologyPrimaryScore(name, scorefolder): tmp=row[4] synFuzzy=ast.literal_eval(tmp) tmp=row[5] - synOxo=ast.literal_eval(tmp) - tmp=row[6] bridgeEvidence=ast.literal_eval(tmp) for i in fuzzy: - obj={"sourceTerm":originalLabel, "sourceIRI":orginaliri ,"iri": i['fuzzyIri'], "olsFuzzyScore": fuzzy, "oxoScore": oxo, "synFuzzy": synFuzzy, "synOxo":synOxo, "bridgeEvidence":bridgeEvidence} + #obj={"sourceTerm":originalLabel, "sourceIRI":orginaliri ,"iri": i['fuzzyIri'], "olsFuzzyScore": fuzzy, "oxoScore": oxo, "synFuzzy": synFuzzy, "synOxo":synOxo, "bridgeEvidence":bridgeEvidence} + obj={"sourceTerm":originalLabel, "sourceIRI":orginaliri ,"iri": i['fuzzyIri'], "olsFuzzyScore": fuzzy, "oxoScore": oxo, "synFuzzy": synFuzzy, "bridgeEvidence":bridgeEvidence} scoreMatrix.append(obj) for i in oxo: - obj={"sourceTerm":originalLabel, "sourceIRI":orginaliri, "iri": i['oxoCurie'], "olsFuzzyScore": fuzzy, "oxoScore": oxo, "synFuzzy": synFuzzy, "synOxo":synOxo, "bridgeEvidence":bridgeEvidence} + obj={"sourceTerm":originalLabel, "sourceIRI":orginaliri, "iri": i['oxoCurie'], "olsFuzzyScore": fuzzy, "oxoScore": oxo, "synFuzzy": synFuzzy, "bridgeEvidence":bridgeEvidence} scoreMatrix.append(obj) for i in synFuzzy: - obj={"sourceTerm":originalLabel, "sourceIRI":orginaliri, "iri": i['fuzzyIri'], "olsFuzzyScore": fuzzy, "oxoScore": oxo, "synFuzzy": synFuzzy, "synOxo":synOxo, "bridgeEvidence":bridgeEvidence} - scoreMatrix.append(obj) - - for i in synOxo: - obj={"sourceTerm":originalLabel, "sourceIRI":orginaliri, "iri": i['oxoCurie'], "olsFuzzyScore": fuzzy, "oxoScore": oxo, "synFuzzy": synFuzzy, "synOxo":synOxo, "bridgeEvidence":bridgeEvidence} + obj={"sourceTerm":originalLabel, "sourceIRI":orginaliri, "iri": i['fuzzyIri'], "olsFuzzyScore": fuzzy, "oxoScore": oxo, "synFuzzy": synFuzzy, "bridgeEvidence":bridgeEvidence} scoreMatrix.append(obj) for i in bridgeEvidence: - obj={"sourceTerm":originalLabel, "sourceIRI":orginaliri, "iri": i['oxoCurie'], "olsFuzzyScore": fuzzy, "oxoScore": oxo, "synFuzzy": synFuzzy, "synOxo":synOxo, "bridgeEvidence":bridgeEvidence} + obj={"sourceTerm":originalLabel, "sourceIRI":orginaliri, "iri": i['oxoCurie'], "olsFuzzyScore": fuzzy, "oxoScore": oxo, "synFuzzy": synFuzzy, "bridgeEvidence":bridgeEvidence} scoreMatrix.append(obj) @@ -182,8 +197,6 @@ def processOntologyPrimaryScore(pScore, params): for entry in result: if entry!=[]: tmp.append(entry) - #else: - # print "entry in results!" #SortScore tmp=sorted(tmp, key=lambda tmp:tmp[0]['finaleScore'], reverse=True) @@ -196,21 +209,28 @@ def scoreTermList(termList, targetOntology, scoreParams, params): result.append(paxo_internals.scoreTermLabel(term, targetOntology, scoreParams, params)) return result -# Process an IRI list via OLS instead of a termList -# def scoreIriList(IriList, targetOntology, params): - #Process scoredMatrix to prepare for validation or save to disc def writeOutPutScore(scoredMatrix, name, predictedTargetFolder, saveToDisc): result=[] - for line in scoredMatrix: - result.append([line[0]['sourceIRI'], line[0]['iri'], line[0]['finaleScore'], line[0]['sourceTerm']]) + try: + sourceTerm=line[0]['sourceTerm'] + targetLabel=str(line[0]['label'].encode('ascii','ignore')) + result.append([line[0]['sourceIRI'], line[0]['iri'], float(line[0]['finaleScore']), sourceTerm, targetLabel, float(line[0]['normalizedScore'])]) + except Exception as e: + print "Failed Unfortunatley, try to investigate why!" + print e + targetLabel="Cound not be found temporarily" + result.append([line[0]['sourceIRI'], line[0]['iri'], float(line[0]['finaleScore']), sourceTerm, targetLabel, float(line[0]['normalizedScore'])]) + if saveToDisc==True: + result.insert(0,['sourceIRI','mappedIRI','score','sourceLabel', 'mappedLabel', 'NormalizedScore']) with open(predictedTargetFolder+'calculated_output_'+name+'.csv', 'w') as f: writer = csv.writer(f) writer.writerows(result) f.close() + result.pop(0) return result @@ -221,32 +241,21 @@ def curationOntologyFinalScore(scoredMatrix): doubleEntryCounter=0 replacedList=[] for counter, line in enumerate(scoredMatrix): - #print line if line[1] not in endmap: endmap.append(line[1]) unified.append(line) else: - #print "Double entry Found!!!" doubleEntryCounter=doubleEntryCounter+1 index=endmap.index(line[1]) - #print unified[index] - #print scoredMatrix[counter] + #Found higher score, so replace the lower! if unified[index][2]3: print helptext print "\nToo many arguments! Take exactly two, "+str(len(sys.argv)-1)+" given!" -else: - + raise +elif len(sys.argv)==3: config = SafeConfigParser() config.read(sys.argv[1]) logFile=config.get("Basics","logFile") logging.basicConfig(filename=logFile, level=logging.INFO, format='%(asctime)s - %(message)s') - writeToDiscFlag=config.getboolean("Params","writeToDiscFlag") uniqueMaps=config.getboolean("Params","uniqueMaps") @@ -456,86 +509,15 @@ def runListAnnotation(): elif sys.argv[2]=="-c": calculateListOntologies(sections, writeToDiscFlag, uniqueMaps) elif sys.argv[2]=="-cv": - calculateAndValidateListOntologies(sections, writeToDiscFlag, uniqueMaps) + print calculateAndValidateListOntologies(sections, writeToDiscFlag, uniqueMaps) elif sys.argv[2]=="-n": exportNeoList(sections) else: print "Could not recognize option. So I execute what's uncommented in the else branch. This should just be during development" - #removeStopwordsList=['of', 'the'] - #replaceTermList=[('cancer', 'carcinom'), ('cancer', 'neoplasm'), ('cancer','carcinoma'),('abnormality','disease')] - #scoreParams={"removeStopwordsList": removeStopwordsList, "replaceTermList" :replaceTermList} - #hp_doid_scoreParams={"removeStopwordsList": ['of', 'the'], "replaceTermList" : [('cancer', 'carcinom'), ('cancer', 'neoplasm'), ('cancer','carcinoma'),('abnormality','disease')]} - - - ### Primary score ontologies - #ordo_hp_scoreParams={"removeStopwordsList": ['of', 'the', 'Rare'], "replaceTermList" : [('cancer', 'carcinom'), ('cancer', 'neoplasm'), ('cancer','carcinoma'),('tumor', 'neoplasm'), ('tumor','cancer'), ('abnormality', 'disease'), ('decreased', 'reduced'), ('morphology', '')]} - #scoreOntologies("ordo","hp", ordo_hp_scoreParams, 'final_dec/scoring/') - # - #doid_mp_scoreParams={"removeStopwordsList": ['of', 'the'], "replaceTermList" : []} - #scoreOntologies("doid","mp", doid_mp_scoreParams, 'final_dec/scoring/') - # # - # doid_ordo_scoreParams={"removeStopwordsList": ['of', 'the'], "replaceTermList" : []} - # scoreOntologies("doid","ordo", doid_ordo_scoreParams, 'final_dec/scoring/') - # # - # hp_doid_scoreParams={"removeStopwordsList": ['of', 'the'], "replaceTermList" : [('cancer', 'neoplasm'), ('cancer','carcinoma'), ('abnormality','disease'), 'abnormality','disease']} - # scoreOntologies("hp","doid",hp_doid_scoreParams, 'final_dec/scoring/') - # # - # hp_mp_scoreParams={"removeStopwordsList": ['of', 'the'], "replaceTermList" : [('cancer', 'carcinom'), ('cancer', 'neoplasm'), ('cancer','carcinoma'),('abnormality','disease'), ('abnormal','Abnormality')]} - # scoreOntologies("hp","mp", hp_mp_scoreParams, 'final_dec/scoring/') - # # - # ordo_mp_scoreParams={"removeStopwordsList": ['of', 'the'], "replaceTermList" : []} - # scoreOntologies("ordo","mp", ordo_mp_scoreParams, 'final_dec/scoring/') - - - #mesh_scoreParams={"removeStopwordsList": ['of', 'the'], "replaceTermList" : []} - #scoreOntologies("mesh","hp", mesh_scoreParams, 'final_dec/scoring/') - #scoreOntologies("mesh","doid", mesh_scoreParams, 'final_dec/scoring/') - #scoreOntologies("mesh","ordo", mesh_scoreParams, 'final_dec/scoring/') - #scoreOntologies("mesh","mp", mesh_scoreParams, 'final_dec/scoring/') - - - #Could/Should be changed so parameters come from the config file params={"fuzzyUpperLimit": 0.8, "fuzzyLowerLimit": 0.6,"fuzzyUpperFactor": 1,"fuzzyLowerFactor":0.6, "oxoDistanceOne":1, "oxoDistanceTwo":0.3, "oxoDistanceThree":0.1, "synFuzzyFactor":0.6, "synOxoFactor": 0.4, "bridgeOxoFactor":1, "threshold":0.6} - #params={"fuzzyUpperLimit": 0.8, "fuzzyLowerLimit": 0.6,"fuzzyUpperFactor": 1,"fuzzyLowerFactor":0.6, "oxoDistanceOne":1, "oxoDistanceTwo":0.3, "oxoDistanceThree":0.1, "synFuzzyFactor":0.6, "synOxoFactor": 0.4, "bridgeOxoFactor":1, "threshold":0.8} - - ### Execute Calculate and validate for a certain file - #print calculateAndValidateOntologyPrimaryScore('hp', 'doid', 'loom', 'Loom/DOID_HP_loom.csv', params, 'final_dec/scoring/', writeToDiscFlag, 'final_dec/predicted/', {'uri1':0, 'uri2':1, 'scorePosition':2, 'delimiter':','}, uniqueMaps, 'final_dec/validation/') - #print calculateAndValidateOntologyPrimaryScore('hp','doid', 'silver','silver_nov/Consensus-3-hp-doid.tsv', params, 'final_dec/scoring/', writeToDiscFlag, 'final_dec/predicted/',{'uri1':0, 'uri2':2, 'scorePosition':4 , 'delimiter':'\t'}, uniqueMaps, 'final_dec/validation/') - #print calculateAndValidateOntologyPrimaryScore('ordo', 'hp', 'loom', 'Loom/ordo_hp_loom.csv', params,'final_dec/scoring/', writeToDisc, final_dec/predicted/', {'uri1':0, 'uri2':1, 'scorePosition':2, 'delimiter':','}, uniqueMaps, 'final_dec/validation/') - - - #params={"fuzzyUpperLimit": 0, "fuzzyLowerLimit": 0,"fuzzyUpperFactor": 0.65, "fuzzyLowerFactor":0, "oxoDistanceOne":0.00029, "oxoDistanceTwo":0.57, "oxoDistanceThree":0.027, "synFuzzyFactor":0.247, "synOxoFactor": 0.62, "bridgeOxoFactor":0.829, "threshold":0.6} - - #print calculateAndValidateOntologyPrimaryScore('ordo', 'hp', 'silver','silver_nov/Consensus-3-hp-ordo.tsv', params,'final_dec/scoring/', writeToDiscFlag, 'final_dec/predicted/',{'uri1':2, 'uri2':0, 'scorePosition':4 , 'delimiter':'\t'}, uniqueMaps, 'final_dec/validation/') - # {'misses': 210, 'alternatives': 350} - - # # - # print calculateAndValidateOntologyPrimaryScore('mp','hp', 'loom','Loom/MP_HP_loom.csv', params,'final_dec/scoring/', writeToDiscFag, 'final_dec/predicted/', {'uri1':0, 'uri2':1, 'scorePosition':2 , 'delimiter':','}, uniqueMaps, 'final_dec/evaluation/') - #print calculateAndValidateOntologyPrimaryScore('mp','hp', 'silver','silver_nov/Consensus-3-hp-mp.tsv', params, 'final_dec/scoring/',writeToDiscFlag, 'final_dec/predicted/', {'uri1':2, 'uri2':0, 'scorePosition':4 , 'delimiter':'\t'}, uniqueMaps, 'final_dec/evaluation/') - # print calculateAndValidateOntologyPrimaryScore('ordo','doid', 'loom' ,'Loom/DOID_ORDO_loom.csv', params, 'final_dec/scoring/',writeToDisc, 'final_dec/predicted/', {'uri1':0, 'uri2':1, 'scorePosition':2, 'delimiter':','}, uniqueMaps, 'final_dec/evaluation/') - #print calculateAndValidateOntologyPrimaryScore('ordo','doid', 'silver','silver_nov/Consensus-3-doid-ordo.tsv', params, 'final_dec/scoring/', writeToDiscFlag,'final_dec/predicted/', {'uri1':2, 'uri2':0, 'scorePosition':4 , 'delimiter':'\t'}, uniqueMaps, 'final_dec/evaluation/') - # print calculateAndValidateOntologyPrimaryScore('ordo','mp', 'loom', 'Loom/mp_ordo_loom.csv', params,'final_dec/scoring/', writeToDiscFlag, 'final_dec/predicted/', {'uri1':0, 'uri2':1, 'scorePosition':2, 'delimiter':','}, uniqueMaps, 'final_dec/evaluation/') - #print calculateAndValidateOntologyPrimaryScore('ordo','mp', 'silver','silver_nov/Consensus-3-mp-ordo.tsv', params,'final_dec/scoring/', writeToDiscFlag, 'final_dec/predicted/', {'uri1':2, 'uri2':0, 'scorePosition':4 , 'delimiter':'\t'}, uniqueMaps, 'final_dec/evaluation/') - #print calculateAndValidateOntologyPrimaryScore('mp','doid', 'loom', 'Loom/DOID_MP_loom.csv', params, 'final_dec/scoring/',writeToDiscFlag, 'final_dec/predicted/', {'uri1':1, 'uri2':0, 'scorePosition':2, 'delimiter':','}, uniqueMaps, 'final_dec/evaluation/') - #print calculateAndValidateOntologyPrimaryScore('mp','doid', 'silver','silver_nov/Consensus-3-mp-doid.tsv', params, 'final_dec/scoring/',writeToDiscFlag,'final_dec/predicted/', {'uri1':0, 'uri2':2, 'scorePosition':4 , 'delimiter':'\t'}, uniqueMaps, 'final_dec/evaluation/') - # # - # - - # print calculateAndValidateOntologyPrimaryScore('mesh','doid', 'loom', 'Loom/DOID_MESH_loom_new.csv', params, 'final_dec/scoring/',writeToDisc,'final_dec/predicted/', {'uri1':0, 'uri2':1, 'scorePosition':2, 'delimiter':','}, uniqueMaps, 'final_dec/validation/') - # print calculateAndValidateOntologyPrimaryScore('mesh','doid', 'silver', 'silver_nov/Consensus-3-doid-mesh3.tsv', 'final_dec/scoring/',params, writeToDisc, 'final_dec/predicted/', {'uri1':2, 'uri2':0, 'scorePosition':2, 'delimiter':'\t'}, uniqueMaps, 'final_dec/validation/') - # print calculateAndValidateOntologyPrimaryScore('mesh','hp', 'loom', 'Loom/mesh_hp_loom_new.csv', params, 'final_dec/scoring/',writeToDisc,'final_dec/predicted/', {'uri1':0, 'uri2':1, 'scorePosition':2, 'delimiter':','}, uniqueMaps, 'final_dec/validation/') - # print calculateAndValidateOntologyPrimaryScore('mesh','hp', 'silver', 'silver_nov/Consensus-3-hp-mesh3.tsv', params, 'final_dec/scoring/',writeToDisc,'final_dec/predicted/', {'uri1':1, 'uri2':0, 'scorePosition':2, 'delimiter':'\t'}, uniqueMaps, 'final_dec/validation/') - # print calculateAndValidateOntologyPrimaryScore('mesh','mp', 'loom', 'Loom/mesh_mp_loom_new.csv', params, 'final_dec/scoring/',writeToDisc,'final_dec/predicted/', {'uri1':0, 'uri2':1, 'scorePosition':2, 'delimiter':','}, uniqueMaps, 'final_dec/validation/') - # print calculateAndValidateOntologyPrimaryScore('mesh','mp', 'silver', 'silver_nov/Consensus-3-mp-mesh3.tsv', params,'final_dec/scoring/', writeToDisc, 'final_dec/predicted/', {'uri1':1, 'uri2':0, 'scorePosition':2, 'delimiter':'\t'}, uniqueMaps, 'final_dec/validation/') - - - #Just run calculate without validation - #calculatePrimaryScore('ordo'+"_"+'doid', params, 'final_dec/scoring/', writeToDiscFlag, 'final_dec/predicted/', uniqueMaps) - ###Execute functions for terms scoreParams={"removeStopwordsList": ['of', 'the'], "replaceTermList" : []} params={"fuzzyUpperLimit": 0.6, "fuzzyLowerLimit": 0.6,"fuzzyUpperFactor": 1,"fuzzyLowerFactor":0.6, "oxoDistanceOne":1, "oxoDistanceTwo":0.3, "oxoDistanceThree":0.1, "synFuzzyFactor":0.6, "synOxoFactor": 0.4, "bridgeOxoFactor":1, "threshold":0.6, "ols":"https://www.ebi.ac.uk/ols/api/", "oxo":"https://www.ebi.ac.uk/ols/api/"} print paxo_internals.scoreTermLabel("Nuclear cataract", "doid", scoreParams, params) - #print scoreTermList(["Asthma", "Dermatitis atopic"], "doid", scoreParams, params) diff --git a/dataloading/paxo/paxo_config.ini b/dataloading/paxo/paxo_config.ini new file mode 100644 index 0000000..5862304 --- /dev/null +++ b/dataloading/paxo/paxo_config.ini @@ -0,0 +1,38 @@ +[Basics] +olsAPIURL=https://www.ebi.ac.uk/ols/api/ +oxoURL=https://www.ebi.ac.uk/spot/oxo/api/search +logFile=../paxo.log +neoURL=bolt://localhost:7687 +neoUser=neo4j +neoPW=neopassword + +[Params] +scoringTargetFolder=../data/scoring/ +predictedTargetFolder=../data/predicted/ +validationTargetFolder=../data/evaluation/ +; Neo needs full path! +neoFolder=/path/path/neo_export/ +StopwordsList=of,the +writeToDiscFlag=True +uniqueMaps=False + +[mp_hp] +sourceOntology=mp +targetOntology=hp +standard=silver +silver=standard/std_hp-mp.tsv +uri1silver=2 +uri2silver=0 +scorePositionsilver=4 +delimitersilver=t +fuzzyUpperLimit=0.8 +fuzzyLowerLimit=0.6 +fuzzyUpperFactor=1 +fuzzyLowerFactor=0.6 +oxoDistanceOne=1 +oxoDistanceTwo=0.3 +oxoDistanceThree=0.1 +synFuzzyFactor=0.6 +synOxoFactor=0.4 +bridgeOxoFactor=1 +threshold=0.6 diff --git a/dataloading/paxo/paxo_internals.py b/dataloading/paxo/paxo_internals.py old mode 100755 new mode 100644 index def4715..3182e13 --- a/dataloading/paxo/paxo_internals.py +++ b/dataloading/paxo/paxo_internals.py @@ -2,7 +2,6 @@ import logging import requests import Levenshtein -#from ConfigParser import SafeConfigParser from flask import Flask from flask import request @@ -63,55 +62,20 @@ def oxoMatch(termLabel, targetOntology, url): sourceCurie=jsonReply['curie'] if len(jsonReply['mappingResponseList'])>0: for row in jsonReply['mappingResponseList']: - - ##Additional webservice call to get the stupid long IRI out of oxo - #oxoMapURL="https://www.ebi.ac.uk/spot/oxo/api/mappings" data={"fromId":row['curie']} longId=apiCall(url+"mappings", data) longId=longId.json()['_embedded']['mappings'][0]['fromTerm']['uri'] - tmpList.append({"curie":longId, "distance":row['distance']}) - #tmpList.append({"curie":row['curie'], "distance":row['distance']}) + tmpList.append({"curie":longId, "distance":row['distance'], "oxoLabel":row['label']}) sortedCurie=sorted(tmpList, key=lambda tmpList: tmpList['distance'], reverse=False) else: - sortedCurie=[{"curie":"UNKNOWN", "distance": 0}] + sortedCurie=[{"curie":"UNKNOWN", "distance": 0, "oxoLabel":"UNKNOWN"}] + return sortedCurie except Exception as e: #In case there is NO oxo result, we find outselfs in this loop - #print "Problem with oxo:" - #print e - #print "Termlabel: "+termLabel - #print "TargetOntolgy: "+targetOntology - #print "Tried to reach "+oxoUrl+" with parameters "+str(data) - return [{"curie":"UNKNOWN", "distance": 0}] - - - -############################################################ -##List of terms that should be cut out of label before fuzzy match # Shall come from config file later -#cutList=["abnormalityof", "syndrome", "disease", "cancer", "tumor", "abnormal"] - -# def stringProcess(term): -# processedTerm=term.lower() #Make sting lower case -# processedTerm=processedTerm.replace(" ", "") #Remove all spaces -# -# #Simply cut some things from the label before calculating the levenstein distance -# for cut in cutList: -# tmpArray=term.split(cut) #Remove problematic terms -# if len(tmpArray[0])!=0: -# processedTerm=tmpArray[0] -# break -# elif len(tmpArray[1])!=0: -# processedTerm=tmpArray[1] -# break -# else: -# print "Something is wrong" -# break -# -# return processedTerm - -############################################################ -#removeStopwordsList=['of', 'the'] -#replaceTermList=[('cancer', 'carcinom'), ('cancer', 'neoplasm'), ('cancer','carcinoma'),('abnormality','disease')] + return [{"curie":"UNKNOWN", "distance": 0, "oxoLabel":"UNKNOWN"}] + + def sortWords(term): term=term.lower() @@ -123,60 +87,36 @@ def sortWords(term): def stringMatcher(sourceTerm, targetTerm, replaceTermList, removeStopwordsList): #First calculate Lev without changes lev=round(Levenshtein.ratio(sourceTerm, targetTerm), 5) - #print "Straight Lev: "+sourceTerm+" - "+targetTerm+" --> "+str(lev) sourceTerm=sortWords(sourceTerm) targetTerm=sortWords(targetTerm) - replacementLev=round(Levenshtein.ratio(sourceTerm, targetTerm), 5) - #print "Sorted Lev: "+sourceTerm+" - "+targetTerm+" --> "+str(replacementLev) if replacementLev>lev: lev=replacementLev - #print " Score Higher, so replaced" #Remove stop words for stop in removeStopwordsList: sourceTerm=sourceTerm.replace(stop,'').strip().replace(' ', ' ') targetTerm=targetTerm.replace(stop, '').strip().replace(' ', ' ') - #print "Removed Stopwords Lev: "+sourceTerm+" - "+targetTerm+" --> "+str(round(Levenshtein.ratio(sourceTerm, targetTerm), 5)) - - #print "SourceReplacements:" #Replace terms in source to trying to find higher score for replacement in replaceTermList: tmpSource=sourceTerm.replace(replacement[0], replacement[1]) tmpSource=sortWords(tmpSource) replacementLev=round(Levenshtein.ratio(tmpSource, targetTerm), 5) - #print " Replacements: "+tmpSource+" - "+targetTerm+" --> "+str(replacementLev) if replacementLev>lev: lev=replacementLev - #print " Score Higher, so replaced" -# tmpSource=tmpSource.split(' ') -# tmpSource.sort() -# tmpSource=' '.join(tmpSource) -# replacementLev=round(Levenshtein.ratio(tmpSource, targetTerm), 5) -# print " Replacements Source: "+tmpSource+" - "+targetTerm+" --> "+str(replacementLev) -# if replacementLev>lev: -# lev=replacementLev - - #print "TargetReplacements:" #Replace terms in target to trying to find higher score for replacement in replaceTermList: tmpTarget=targetTerm.replace(replacement[0], replacement[1]) tmpTarget=sortWords(tmpTarget) replacementLev=round(Levenshtein.ratio(sourceTerm, tmpTarget), 5) - #print " Replacements: "+sourceTerm+" - "+tmpTarget+" --> "+str(replacementLev) if replacementLev>lev: lev=replacementLev - #print " Score Higher, so replaced" return lev -############################################################ - - - #Takes an input label and executes the fuzzyOLS call def olsFuzzyMatch(termLabel, targetOntology, replaceTermList, removeStopwordsList, url): @@ -201,19 +141,14 @@ def olsFuzzyMatch(termLabel, targetOntology, replaceTermList, removeStopwordsLis levList=[] for reply in jsonReply['docs']: try: - #answerTerm=stringProcess(reply['label'].encode(encoding='UTF-8')) answerTerm=reply['label'].encode(encoding='UTF-8') - - #lev=round(Levenshtein.ratio(termLabel, answerTerm), 5) lev=stringMatcher(termLabel, answerTerm, replaceTermList, removeStopwordsList) #Compare the inputLabel with all synonym Labels as well. #If lev score is higher for a synonym, replace lev score --> boost synonym label hits if "synonym" in reply.keys(): for synonym in reply["synonym"]: - #answerTerm=stringProcess(synonym.encode(encoding='UTF-8')) answerTerm=synonym.encode(encoding='UTF-8') - #tmpLev=round(Levenshtein.ratio(termLabel, answerTerm), 5) tmpLev=stringMatcher(termLabel, answerTerm, replaceTermList, removeStopwordsList) if tmpLev>lev: lev=tmpLev @@ -230,7 +165,6 @@ def olsFuzzyMatch(termLabel, targetOntology, replaceTermList, removeStopwordsLis sortedLev=sorted(levList, key=lambda levList:levList['lev'], reverse=True) else: - #print "No hits, therefore Add empty placeholder" sortedLev=[{"SourceLabel": termLabel, "SourceIRI": termLabel , "TargetIRI": "UNKNOWN", "TargetLabel": "UNKNOWN", "lev": 0}] @@ -248,8 +182,6 @@ def olsFuzzyMatch(termLabel, targetOntology, replaceTermList, removeStopwordsLis logging.error(data) logging.error(e) - - #jsonReply=jsonReply.json()['response'] try: oxoTargetList=[] if jsonReply['numFound']>0: @@ -276,14 +208,15 @@ def primaryScoreTerm(termIRI, termLabel, targetOntology, scoreParams, urls): if termIRI!='': oxoResults=oxoMatch(termIRI, targetOntology, urls["oxo"]) else: - oxoResults=[{"curie":"UNKNOWN", "distance": 0}] + #print "nononno you should not be executed" + #print termIRI + #print termLabel + #print targetOntology + oxoResults=[{"curie":"UNKNOWN", "distance": 0, "oxoLabel":"UNKNOWN"}] bridgeTerms=olsFuzzyResult['bridgeTerms'] olsFuzzyResult=olsFuzzyResult['fuzzyTerms'] - #if bridgeTerms!=[]: - #print "Found bridge terms, it is incredible!" - bridgeOxo=[] if len(bridgeTerms)>0: for bridgeTerm in bridgeTerms: @@ -292,15 +225,15 @@ def primaryScoreTerm(termIRI, termLabel, targetOntology, scoreParams, urls): if line['curie']!='UNKNOWN': bridgeOxo.append(tmp) else: - bridgeOxo=[[{"curie":"UNKNOWN", "distance": 0}]] + bridgeOxo=[[{"curie":"UNKNOWN", "distance": 0, "oxoLabel":"UNKNOWN"}]] else: - bridgeOxo=[[{"curie":"UNKNOWN", "distance": 0}]] + bridgeOxo=[[{"curie":"UNKNOWN", "distance": 0, "oxoLabel":"UNKNOWN"}]] try: bridgeOxo=bridgeOxo[0] except e as Exception: - print "Error with that stupid list in bridgeOxo" + print "Error with that list in bridgeOxo" print termIRI print termLabel print bridgeOxo @@ -319,11 +252,12 @@ def processPScore(pScore): for oxo in pScore['oxoScore']: tmpCurie=oxo['curie'] oxoScore=int(oxo['distance']) + oxoLabel=oxo['oxoLabel'] if int(oxo['distance'])==0: tmpCurie="UNKNOWN" - mapping['oxoScore'].append({'oxoCurie':tmpCurie, "distance": oxo['distance'] ,"oxoScore":oxoScore}) + mapping['oxoScore'].append({'oxoCurie':tmpCurie, "distance": oxo['distance'],"oxoLabel":oxoLabel, "oxoScore":oxoScore}) for oxo in pScore['bridgeEvidence']: tmpCurie=oxo['curie'] @@ -345,7 +279,7 @@ def simplifyProcessedPscore(mapping): for line in mapping['olsFuzzyScore']: if line['fuzzyScore']==[]: line['fuzzyScore']=0 - obj={"sourceTerm":mapping['sourceTerm'], "sourceIRI":sourceIRI,"iri":line['fuzzyIri'], "fuzzyScore": line['fuzzyScore'], "oxoScore": 0, "synFuzzy":0, "synOxo": 0, "bridgeOxoScore":0} + obj={"sourceTerm":mapping['sourceTerm'], "sourceIRI":sourceIRI,"iri":line['fuzzyIri'],"label":line['fuzzyMapping'], "fuzzyScore": line['fuzzyScore'], "oxoScore": 0, "synFuzzy":0, "synOxo": 0, "bridgeOxoScore":0} scoreMatrix.append(obj) flag=False @@ -356,7 +290,15 @@ def simplifyProcessedPscore(mapping): flag=True if flag==False: - obj={"sourceTerm":mapping['sourceTerm'], "sourceIRI":sourceIRI, "iri":line['oxoCurie'], "fuzzyScore": 0, "oxoScore": line['oxoScore'], "synFuzzy":0, "synOxo": 0, "bridgeOxoScore":0} + #This is ugly, just temporary until input is fixed# + try: + x=line['oxoLabel'] + except: + #print "Unknown til new data arrives" + line['oxoLabel']="UNKNOWN" + #print sourceIRI + + obj={"sourceTerm":mapping['sourceTerm'], "sourceIRI":sourceIRI, "iri":line['oxoCurie'], "label":line['oxoLabel'], "fuzzyScore": 0, "oxoScore": line['oxoScore'], "synFuzzy":0, "synOxo": 0, "bridgeOxoScore":0} scoreMatrix.append(obj) # Starting here we try to take care of synonyms! @@ -366,32 +308,13 @@ def simplifyProcessedPscore(mapping): for line in mapping['synFuzzy']: for s in scoreMatrix: if line["fuzzyIri"]==s["iri"]: - #s['fuzzyScore']=line['fuzzyScore'] s['synFuzzy']=line['fuzzyScore'] flag=True if flag==False: - obj={"sourceTerm":mapping['sourceTerm'], "sourceIRI":sourceIRI,"iri":line['fuzzyIri'], "fuzzyScore":0, "oxoScore": 0, "synFuzzy": line['fuzzyScore'], "synOxo":0, "bridgeOxoScore":0} - scoreMatrix.append(obj) - - # Oxo Synonyms Score - flag=False - for line in mapping['synOxo']: - for s in scoreMatrix: - if line["oxoCurie"]==s["iri"]: - #s['oxoScore']=line['oxoScore'] - s['synOxo']=line['oxoScore'] - flag=True - - if flag==False: - obj={"sourceTerm":mapping['sourceTerm'], "sourceIRI":sourceIRI, "iri":line['oxoCurie'], "fuzzyScore": 0, "oxoScore": 0, "synFuzzy":0, "synOxo": line['oxoScore'], "bridgeOxoScore":0} + obj={"sourceTerm":mapping['sourceTerm'], "sourceIRI":sourceIRI,"iri":line['fuzzyIri'], "label":line['fuzzyMapping'], "fuzzyScore":0, "oxoScore": 0, "synFuzzy": line['fuzzyScore'], "synOxo":0, "bridgeOxoScore":0} scoreMatrix.append(obj) - #else: - # print "No Synonyms here" - - - #Getting into bridge evidence flag=False for line in mapping['bridgeEvidence']: @@ -401,13 +324,9 @@ def simplifyProcessedPscore(mapping): flag=True if flag==False: - #obj={"sourceTerm":mapping['sourceTerm'], "sourceIRI":sourceIRI, "iri":line['bridgeOxoCurie'], "fuzzyScore": 0, "oxoScore": 0, "synFuzzy":0, "synOxo": 0, "bridgeOxoScore": line['oxoScore']} obj={"sourceTerm":mapping['sourceTerm'], "sourceIRI":sourceIRI, "iri":line['oxoCurie'], "fuzzyScore": 0, "oxoScore": 0, "synFuzzy":0, "synOxo": 0, "bridgeOxoScore": line['oxoScore']} - #oxoCurie scoreMatrix.append(obj) - - #print "made it to the end of simplifyProcessedPscore"1 return scoreMatrix #Simple Score mechanism for all subscores, returns a sorted list. Is Called after simplifyProcessedPscore @@ -424,17 +343,15 @@ def scoreSimple(scoreMatrix, params): fuzzyLowerFactor=params['fuzzyLowerFactor'] synFuzzyFactor=params['synFuzzyFactor'] - synOxoFactor=params['synOxoFactor'] synFuzzyFactor=params['synFuzzyFactor'] - synOxoFactor=params['synOxoFactor'] bridgeOxoFactor=params['bridgeOxoFactor'] resultMatrix=[] for i,score in enumerate(scoreMatrix): fFactor=0 if score['fuzzyScore']==1: #Exact match, we shall boost this by all means, so we take UpperFactor*2 for now - fFactor=2 + fFactor=3*fuzzyUpperFactor elif score['fuzzyScore']>=fuzzyUpperLimit: fFactor=fuzzyUpperFactor elif score['fuzzyScore']=fuzzyLowerLimit: @@ -442,6 +359,15 @@ def scoreSimple(scoreMatrix, params): elif score['fuzzyScore']=fuzzyUpperLimit: + synFuzzyFactor=fuzzyUpperFactor + elif score['synFuzzy']=fuzzyLowerLimit: + synFuzzyFactor=fuzzyLowerFactor + elif score['synFuzzy']0: - print "FOUND an incredible bridge Term, uhauha!" - print scoreMatrix[i] - - - score['finaleScore']=score['fuzzyScore']*fFactor+score['oxoScore']+score['synFuzzy']*synFuzzyFactor+score['synOxo']*synOxoFactor+score['bridgeOxoScore']*bridgeOxoFactor + #score['finaleScore']=score['fuzzyScore']*fFactor+score['oxoScore']+score['synFuzzy']*synFuzzyFactor+score['synOxo']*synOxoFactor+score['bridgeOxoScore']*bridgeOxoFactor + score['finaleScore']=score['fuzzyScore']*fFactor+score['oxoScore']+score['synFuzzy']*synFuzzyFactor+score['bridgeOxoScore']*bridgeOxoFactor ### Do we want unknown to be printed if score['finaleScore']>threshold: #This removes "unknow" from the results and weak results resultMatrix.append(scoreMatrix[i]) -# else: - #print "Failed to pass the threshold unfortunatley!" - #print score['finaleScore'] #Sort the thing so the best score is top resultMatrix=sorted(resultMatrix, key=lambda resultMatrix:resultMatrix['finaleScore'], reverse=True) return resultMatrix -#Simple Score mechanism for all subscores, returns a sorted list. Is Called after simplifyProcessedPscore -#def scoreComplex(scoreMatrix): - #Calls all necessary steps to get a result for a termLabel def scoreTermLabel(termLabel, targetOntology, scoreParams, params): pscore=primaryScoreTerm('', termLabel, targetOntology, scoreParams, params) #Executes the basic calls to OLS and OXO, delievers primary score @@ -482,8 +404,6 @@ def scoreTermLabel(termLabel, targetOntology, scoreParams, params): singleLineResult=scoreSimple(simplerMatrix, params) #Takes simplified input and actually calculates the finale score return singleLineResult - -# Synonymsearch for comparing Ontologies in OLS, should be called instead score Simple for these cases def scoreTermOLS(termIRI, termLabel, targetOntology, params, urls): pscore=primaryScoreTerm(termIRI, termLabel, targetOntology, params, urls) pscore['sourceIri']=termIRI diff --git a/dataloading/paxo/readme.md b/dataloading/paxo/readme.md old mode 100755 new mode 100644 index bfbe984..62522e5 --- a/dataloading/paxo/readme.md +++ b/dataloading/paxo/readme.md @@ -1,27 +1,74 @@ -### Installation +### Prerequisite -Suggested in a virtualenv as always: -> pip install -r requirements.txt +- In order to install dependencies, first ensure that you have Python 2.7 and a corresponding version of pip. +- With pip 2.7, you need to install the prerequisite python modules listed specifically in this directory, thus: +> `pip install -r requirements.txt` +- Paxo tries to predict mappings between two ontologies, that are present in the Ontology Lookup Service (OLS). It's also possible to try to map a list of terms with one target ontology in OLS. +### Usage (short) +1. First create a raw score with +> python paxo.py paxo_config.ini -s -### Usage +2. Calculate a score with: +> python paxo.py paxo_config.ini -c -Edit and run clientOperations.py: +3. Calculate and Validated a primary score: +> python paxo.py paxo_config.ini -cv -First create a raw score with -> scoreOntologies(sourceOntology, targetOntology) +4. Create a csv file that is compatible with oxo. Based on a previously calculated score +> python paxo.py paxo_config.ini -n -Calculate a calculatedScore with: -> calculatePrimaryScore(combinedOntologyName, params, writeToDisc) +5. Create a mapping file, given not a "real" ontology but a list of terms +> python paxo.py listprocessing_config.ini -l -or calculate and validated a primary score with: -> calculateAndValidateOntologyPrimaryScore(combinedOntologyName, stdName, stdFile, params, writeToDisc, parseParms) +### More about usage +**About 1:** The primary, 'raw' score is the base for the calculation of the mapping score. This has to execute many calls to the OLS and Oxo API and can take a long time. The files created by this step thus are somewhat of a 'checkpoint'. +**About 2:** Reading in the raw score, created by the -s option, this function calculates the actual score and tries to predict mappings. The final result is strongly influenced by the parameters defined in the config file (e.g. threshold). +**About 3:** If validation files (a 'standard') is available, the calculated result can be evaluated against this standard by using the -cv flag (first a score is calculated, then validated) -### Prerequisite: +**About 4:** The option -n creates a csv file that can be loaded into OxO. This option reads in file create by the -c option. -* A Folder “pipeline_output” has to be present -* Within this folder there need to be a folder to put the calculated primary score (e.g. the folder “ordo_doid” containing the file “scoring_output_ordo_doid.csv”) -* A path to a validation file if you want to validate against a std file \ No newline at end of file +**About 5:** To create a mapping file between a list of terms and an ontology, start paxo with -l + +### Parameter explanation +To run paxo it is mandatory to provide a config file with context. The dummy config files in the config folder should provide an easy start into creating your own config file. The structure of the config file for the mapping of ontologies (flag:-s,-c, -cv) and the listprocessing (flag:-l) are slightly different, most parameters are the same. Most parameters should be self-explanatory, others are described here in a few words. + +#### Shared values for all configs +**StopwordsList** Words that are cut out before string compare. Candidates for this are e.g. 'the' or 'of' but could also be words you don't want to consider e.g. 'abnormality' + +**fuzzyUpperLimit** Upper limit of a fuzzy label score. A score above this limit is multiplied with the fuzzyUpperFactor. A fuzzy score of 1 is the highest, so equivalent labels (exact match) + +**fuzzyUpperFactor** Factor for a fuzzy label score above the fuzzy upper limit + +**fuzzyLowerLimit** A fuzzy label score between the upper and lower limit is multiplied by the fuzzyLowerFactor for the final score. A score below thus limit is discarded and leads to a fuzzy score of 0. + +**fuzzyLowerFactor** Factor for a fuzzy label score between the upper and the lower limit + +**oxoDistanceOne** Score for a connection of distance 1 in Oxo + +**oxoDistanceTwo** Score for a connection of distance 2 in Oxo + +**oxoDistanceThree** Score for a connection of distance 3 in Oxo + +**synFuzzyFactor** Factor to weight a fuzzy label matching of a synonym + +**synOxoFactor** Weight for a link that was found in Oxo but via a synonym and not the preferred label + +**bridgeOxoFactor** Weight for a bridge term + +**threshold** Threshold for the mappings. Only the final score above this threshold is considered as a mapping and printed to the file + + +#### Config for listprocessing +**inputFile** Path to the input file, consisting of 3 rows (ids, labels, optional synonyms) + +**resultFile** /path/output-file.csv + +**detailLevel** Value can be 0,1 or 2 depending on how much detail should be printed to the final file. Value 2 is the verbose mode, where one could spot alternatives to the suggested map + +**delimiter** delimiter of the input file, in most cases e.g. *,* + +**synonymSplitChar** delimiter of the synonyms that are located in the 3 row, could be e.g. *|* or *;* ... diff --git a/dataloading/paxo/requirements.txt b/dataloading/paxo/requirements.txt index f128713..f155e50 100755 --- a/dataloading/paxo/requirements.txt +++ b/dataloading/paxo/requirements.txt @@ -3,9 +3,3 @@ requests python-levenshtein flask neo4j-driver -pyyaml -mysql-python -spotpy -numpy -matplotlib -pandas diff --git a/dataloading/paxo/validation.py b/dataloading/paxo/validation.py old mode 100755 new mode 100644 index 4a3b6f3..8c04161 --- a/dataloading/paxo/validation.py +++ b/dataloading/paxo/validation.py @@ -2,9 +2,6 @@ import logging import requests import time -#from ConfigParser import SafeConfigParser -#config = SafeConfigParser() -#config.read("config.ini") def validateFinaleScore(onto1, onto2, stdNamed, inputFile, TargetFile, writeToDisc, params, parseParms, validationTargetFolder, url): uri1Position=parseParms['uri1'] @@ -12,20 +9,11 @@ def validateFinaleScore(onto1, onto2, stdNamed, inputFile, TargetFile, writeToDi counterPosition=parseParms['scorePosition'] delimiterChar=parseParms['delimiter'] - #url=config.get("Basics","olsURL") - - - print "Validate ... " - #logging.basicConfig(filename="flask.log", level=logging.INFO, format='%(asctime)s - %(message)s') - inputList=[] inputLongList=[] for row in inputFile: inputList.append([row[0], row[1]]) inputLongList.append(row) - #if row[2]=='': - # print "Oh No, we found a empty value!" - targetList=[] targetLongList=[] @@ -45,13 +33,11 @@ def validateFinaleScore(onto1, onto2, stdNamed, inputFile, TargetFile, writeToDi missing=[] matches=[] - #print inputList - #Now validate the computed mappings against the standard for counter, line in enumerate(targetList): - #NoMatch from std to the created mapping file, so this goes to the missing List + if line not in inputList: - missing.append([line[0], line[1], "NoScore", targetLongList[counter][counterPosition]]) + missing.append([line[0], line[1], "NoScore", targetLongList[counter][counterPosition], targetLongList[counter][1], targetLongList[counter][3]]) #Exact same Result for both, so this is a match. Is added to the matches List else: @@ -59,13 +45,13 @@ def validateFinaleScore(onto1, onto2, stdNamed, inputFile, TargetFile, writeToDi if c[0]==line[0] and c[1]==line[1] or c[1]==line[0] and c[1]==line[1]: score=c[2] - matches.append([line[0], line[1], score, targetLongList[counter][counterPosition]]) + matches.append([line[0], line[1], score, targetLongList[counter][counterPosition],targetLongList[counter][1], targetLongList[counter][3]]) #Add those mappings that where no in the standard but calculated to the alternatives List alternatives=[] for counter, line in enumerate(inputList): if line not in targetList and line[1]!="UNKNOWN": - alternatives.append([line[0], line[1], inputLongList[counter][2], "noScore"]) + alternatives.append([line[0], line[1], inputLongList[counter][2], "noScore", inputLongList[counter][3], inputLongList[counter][4]]) #Alternative Counter @@ -85,65 +71,66 @@ def validateFinaleScore(onto1, onto2, stdNamed, inputFile, TargetFile, writeToDi result=matches+missing+alternatives#+discarted - we can also show the discarted terms or put them in an own file + #print result #If we write to disc, I get the labels of the parts that are NOT mapped to the standard if writeToDisc is True: - print "Try to save the result" + #print "Try to save the result" obsoleteScore=0 - for row in result: - #if row[2]=='NoScore' or row[3]=='noScore': - #print "Need to annotate "+row[0]+" and "+row[1] - - data={'q':row[0],'queryFields':'iri', 'fieldList': 'label', "ontology":onto1, "type":"class", "local":True} - - try: - r = requests.get(url, data) - except: - time.sleep(60) - logging.info("API exception, try again after 5 second delay") - print "API exception, try again after 5 second delay" - try: - r = requests.get(url, data) - logging.info("Success") - print "Success!" - except: - logging.info("Error with second try") - logging.info(r.status_code) - logging.info(r.request.url) - #raise - - - try: - jsonReply=r.json() - row.append(jsonReply['response']['docs'][0]['label'].encode(encoding='UTF-8')) - except: - row.append('NoLabel Found') - obsoleteScore=obsoleteScore+1 - print "No Label found in the first row" - - data={'q':row[1],'queryFields':'iri', 'fieldList': 'label', "ontology":onto2, "type":"class", "local":True} - try: - r = requests.get(url, data) - except: - time.sleep(60) - logging.info("API exception, try again after 5 second delay") - print "API exception, try again after 5 second delay" - try: - r = requests.get(url, data) - logging.info("Success") - print "Success" - except: - logging.info("Error with second try") - logging.info(r.status_code) - logging.info(r.request.url) - - - try: - jsonReply=r.json() - row.append(jsonReply['response']['docs'][0]['label'].encode(encoding='UTF-8')) - except: - row.append('NoLabel Found') - obsoleteScore=obsoleteScore+1 - print "No Label found in the second row" + # for row in result: + # #if row[2]=='NoScore' or row[3]=='noScore': + # #print "Need to annotate "+row[0]+" and "+row[1] + # + # data={'q':row[0],'queryFields':'iri', 'fieldList': 'label', "ontology":onto1, "type":"class", "local":True} + # + # try: + # r = requests.get(url, data) + # except: + # time.sleep(60) + # logging.info("API exception, try again after 5 second delay") + # print "API exception, try again after 5 second delay" + # try: + # r = requests.get(url, data) + # logging.info("Success") + # print "Success!" + # except: + # logging.info("Error with second try") + # logging.info(r.status_code) + # logging.info(r.request.url) + # #raise + # + # + # try: + # jsonReply=r.json() + # row.append(jsonReply['response']['docs'][0]['label'].encode(encoding='UTF-8')) + # except: + # row.append('NoLabel Found') + # obsoleteScore=obsoleteScore+1 + # print "No Label found in the first row" + # + # data={'q':row[1],'queryFields':'iri', 'fieldList': 'label', "ontology":onto2, "type":"class", "local":True} + # try: + # r = requests.get(url, data) + # except: + # time.sleep(60) + # logging.info("API exception, try again after 5 second delay") + # print "API exception, try again after 5 second delay" + # try: + # r = requests.get(url, data) + # logging.info("Success") + # print "Success" + # except: + # logging.info("Error with second try") + # logging.info(r.status_code) + # logging.info(r.request.url) + # + # + # try: + # jsonReply=r.json() + # row.append(jsonReply['response']['docs'][0]['label'].encode(encoding='UTF-8')) + # except: + # row.append('NoLabel Found') + # obsoleteScore=obsoleteScore+1 + # print "No Label found in the second row" with open(validationTargetFolder+onto1+"_"+onto2+'_'+stdNamed+'_validate.csv', 'wb') as f: @@ -162,7 +149,7 @@ def validateFinaleScore(onto1, onto2, stdNamed, inputFile, TargetFile, writeToDi logging.info("oxoDistanceTwo, "+str(params["oxoDistanceTwo"])) logging.info("oxoDistanceThree, "+str(params["oxoDistanceThree"])) logging.info("synFuzzyFactor, "+str(params["synFuzzyFactor"])) - logging.info("synOxoFactor, "+str(params["synOxoFactor"])) + #logging.info("synOxoFactor, "+str(params["synOxoFactor"])) msg="Stats for "+str(onto1)+"_"+str(onto2)+" validation "+stdNamed+"\n" msg=msg+"Number of std mappings, "+str(len(targetList))+"\n" diff --git a/dataloading/readme.md b/dataloading/readme.md new file mode 100755 index 0000000..c11749a --- /dev/null +++ b/dataloading/readme.md @@ -0,0 +1,6 @@ + +### Project structure + +- The *oxo folder* contains the scripts to load data into OxO. +- The *paxo folder* contains the scripts to generate predicted mappings. + - You can evaluate paxo using a gold standard set of mappings. The *standard folder* contains an example of how the gold standard mapppings needs to be formatted. diff --git a/dataloading/standard/dummy-standard.csv b/dataloading/standard/dummy-standard.csv new file mode 100644 index 0000000..5e647b4 --- /dev/null +++ b/dataloading/standard/dummy-standard.csv @@ -0,0 +1,2 @@ +#URI 1,Label 1,URI 2,Label 2,Confidence +http://purl.obolibrary.org/obo/DOID_0060263,porencephaly,http://id.nlm.nih.gov/mesh/2017/D065708,familial porencephalic white matter disease,1 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 1dbdc46..7a3a521 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,12 +8,13 @@ services: - 8983:8983 volumes: - ./solr-config:/home/mysolrhome -# - ./solr-config/data:/home/mysolrhome/mapping/data + - ./solr-config/data:/home/mysolrhome/mapping/data command: ["solr", "-f"] neo4j: image: neo4j:3.1.1 environment: - - NEO4J_HEAP_MEMORY=2048 # configure the heap memory + - NEO4J_HEAP_MEMORY=10g # configure the heap memory + - NEO4J_dbms_memory_heap_maxSize=8g - NEO4J_AUTH=neo4j/dba cap_add: - SYS_RESOURCE diff --git a/oxo-model/src/main/java/uk/ac/ebi/spot/model/Scope.java b/oxo-model/src/main/java/uk/ac/ebi/spot/model/Scope.java index 0dd8a1b..5ceb0d4 100755 --- a/oxo-model/src/main/java/uk/ac/ebi/spot/model/Scope.java +++ b/oxo-model/src/main/java/uk/ac/ebi/spot/model/Scope.java @@ -6,6 +6,7 @@ * Samples, Phenotypes and Ontologies Team, EMBL-EBI */ public enum Scope { + EXACT, NARROWER, BROADER, diff --git a/oxo-model/src/main/java/uk/ac/ebi/spot/service/CypherQueryService.java b/oxo-model/src/main/java/uk/ac/ebi/spot/service/CypherQueryService.java index c5be467..df41013 100644 --- a/oxo-model/src/main/java/uk/ac/ebi/spot/service/CypherQueryService.java +++ b/oxo-model/src/main/java/uk/ac/ebi/spot/service/CypherQueryService.java @@ -220,7 +220,7 @@ public Object getMappingSummaryQuery(String query, Map params) { "UNWIND rels(path) as r\n" + "WITH n, r\n" + "MATCH (n)-[HAS_SOURCE]-(d:Datasource)\n" + - "RETURN {nodes: collect( distinct {id: n.curie, group : d.prefix}), links: collect (distinct {source: startNode(r).curie, target: endNode(r).curie} )} as result"; + "RETURN {nodes: collect( distinct {id: n.curie, text: n.label, group : d.prefix}), links: collect (distinct {source: startNode(r).curie, target: endNode(r).curie, scope: r.scope} )} as result"; @Override public Object getMappingSummaryGraph(String curie, int distance) { HashMap params = new HashMap(); diff --git a/oxo-model/src/main/java/uk/ac/ebi/spot/service/TermService.java b/oxo-model/src/main/java/uk/ac/ebi/spot/service/TermService.java index c412b42..432c065 100644 --- a/oxo-model/src/main/java/uk/ac/ebi/spot/service/TermService.java +++ b/oxo-model/src/main/java/uk/ac/ebi/spot/service/TermService.java @@ -244,7 +244,7 @@ public void rebuildIndexes(String source) { log.info("Reading " +x + " terms from term repository..."); for (IndexableTermInfo t : termGraphRepository.getAllIndexableTerms(x, chunks)) { chunk.add(DocumentBuilder.getDocumentFromTerm(t)); - // save solr in chuncks of 10000 + // save solr in chunks of 10000 if (chunk.size() == 10000) { log.info("Saving " +chunk.size() + " documents..."); documentRepository.save(chunk); diff --git a/oxo-web/src/main/asciidoc/api.adoc b/oxo-web/src/main/asciidoc/api.adoc index 3895ca1..2f0c8e9 100644 --- a/oxo-web/src/main/asciidoc/api.adoc +++ b/oxo-web/src/main/asciidoc/api.adoc @@ -316,3 +316,29 @@ You can also set the response format using HTTP headers of either `Accept: text/ include::{snippets}/search-by-ids-to-json/http-response.adoc[] +include::{snippets}/api-example/http-response.adoc[] + + + + +=== Datasource +Example request +include::{snippets}/datasources-list-example/curl-request.adoc[] +include::{snippets}/datasources-list-example/http-request.adoc[] +Example reply +include::{snippets}/datasources-list-example/http-response.adoc[] + + +=== mappings +Example request +include::{snippets}/mappings-list-example/curl-request.adoc[] +include::{snippets}/mappings-list-example/http-request.adoc[] +Example reply +include::{snippets}/mappings-list-example/http-response.adoc[] + +=== terms +Example request +include::{snippets}/terms-list-example/curl-request.adoc[] +include::{snippets}/terms-list-example/http-request.adoc[] +Example reply +include::{snippets}/terms-list-example/http-response.adoc[] diff --git a/oxo-web/src/main/asciidoc/developer.adoc b/oxo-web/src/main/asciidoc/developer.adoc index 1831ec9..ab64e1a 100644 --- a/oxo-web/src/main/asciidoc/developer.adoc +++ b/oxo-web/src/main/asciidoc/developer.adoc @@ -1,11 +1,9 @@ == Developer Resources === OxO source code - -* You can obtain the source code at https://github.com/EBISPOT/oxo +* You can obtain the source code at https://github.com/EBISPOT/OxO === OxO REST API -* Information about the API access to our services can be found at https://www.ebi.ac.uk/spot/oxo/docs/api +* Information about the API access to our services can be found link:../docs/api[here] + -//=== Local OxO installation -//* Details on how to build a local OLS can be found link:../docs/installation-guide[here] diff --git a/oxo-web/src/main/java/uk/ac/ebi/spot/controller/api/TermAssembler.java b/oxo-web/src/main/java/uk/ac/ebi/spot/controller/api/TermAssembler.java index d6f3eaa..198351e 100644 --- a/oxo-web/src/main/java/uk/ac/ebi/spot/controller/api/TermAssembler.java +++ b/oxo-web/src/main/java/uk/ac/ebi/spot/controller/api/TermAssembler.java @@ -28,7 +28,8 @@ public class TermAssembler implements ResourceAssembler> { @Autowired private TermService termService; - private static String olsBase = "https://www.ebi.ac.uk/ols/api/terms?id="; + //private static String olsBase = "https://www.ebi.ac.uk/ols/api/terms?id="; + private static String olsBase = "https://www.ebi.ac.uk/ols/api/terms?obo_id="; @Override public Resource toResource(Term term) { Resource resource = new Resource(term); diff --git a/oxo-web/src/main/java/uk/ac/ebi/spot/controller/ui/IndexController.java b/oxo-web/src/main/java/uk/ac/ebi/spot/controller/ui/IndexController.java index 9736303..e77b435 100644 --- a/oxo-web/src/main/java/uk/ac/ebi/spot/controller/ui/IndexController.java +++ b/oxo-web/src/main/java/uk/ac/ebi/spot/controller/ui/IndexController.java @@ -49,21 +49,25 @@ public String home(Model model) { return "index"; } + /* @RequestMapping({"docs"}) public String showDocsIndex(Model model) { return "redirect:docs/"; } // ok, this is bad, need to find a way to deal with trailing slashes and constructing relative URLs in the thymeleaf template... @RequestMapping({"docs/"}) - public String showDocsIndex2(Model model) { + public String showDocsIndex2(Model model) {*/ + /* + @RequestMapping(path = "docs") + public String docs(Model model) { return "docs"; - } + } @RequestMapping({"docs/{page}"}) public String showDocs(@PathVariable("page") String pageName, Model model) { model.addAttribute("page", pageName); return "docs-template"; - } + }*/ @RequestMapping(path = "about") public String about(Model model) { @@ -92,4 +96,31 @@ public String myAccount(Model model, @AuthenticationPrincipal OrcidPrinciple pri return "myaccount"; } + + + + + + @RequestMapping({"docs"}) + public String showDocsIndex(Model model) { + return "redirect:docs/index"; + } + // ok, this is bad, need to find a way to deal with trailing slashes and constructing relative URLs in the thymeleaf template... + @RequestMapping({"docs/"}) + public String showDocsIndex2(Model model) { + return "redirect:index"; + } + + @RequestMapping({"docs/{page}"}) + public String showDocs(@PathVariable("page") String pageName, Model model) { + model.addAttribute("page", pageName); + return "docs-template"; + } + + + + + + + } diff --git a/oxo-web/src/main/resources/static/css/oxo.css b/oxo-web/src/main/resources/static/css/oxo.css index ecc1cc6..6e02c4f 100644 --- a/oxo-web/src/main/resources/static/css/oxo.css +++ b/oxo-web/src/main/resources/static/css/oxo.css @@ -10,17 +10,13 @@ margin-top: 1.1em; padding:2px; padding-right:4px; color: white; - /*font-size: larger;*/ - border-radius: 3px; + /*font-size: larger; + border-radius: 3px;*/ display: inline-block; margin-right: 4px; vertical-align: middle; } -#ex1Slider .slider-selection { - background: #BABABA; -} - .context-help-wrapper { position: relative; @@ -42,6 +38,7 @@ margin-top: 1.1em; color: inherit; } + .context-help-label:after { font-family: 'EBI-Generic'; font-size: 70%; @@ -55,3 +52,107 @@ margin-top: 1.1em; cursor: pointer; } +/* Here we introduce new css for the new version*/ +.alert-warning{ + background-color: #faebcc; + margin-bottom: 10px; + margin-top: 5px; + padding: 10px 10px 10px 10px; + border-radius: 0px; +} + +.grayBackground{ +background-color: #f2f2f2; +/*border-radius: 25px;*/ +padding: 10px 10px 10px 10px; +margin: 10px; +} + + +.marginTop{ + margin-top:60px; +} + +.marginBottom{ + margin-bottom:80px; +} + +#network{ + height:500px; + width: 100%; + border: 0px black solid; +} + + + +/*Migrated fomr fomrs.lens.css*/ +label { + display: inline-block !important; + max-width: 100%; + margin-bottom: 5px; + font-weight: 700; + +} + + +/* Migrated from OLS css (used previously by OxO) to the oxo.css*/ +.term-source { + background-color: #ffac1b; + padding: 2px; + padding-right: 2px; + padding-right: 4px; + color: white; + font-size: larger; + border-radius: 3px; + display: inline-block; + margin-right: 4px; + vertical-align: middle; +} + +a .ontology-source { + background-color: #5FBDCE; + padding: 2px; + padding-right: 2px; + padding-right: 4px; + color: white; + font-size: larger; + border-radius: 3px; + display: inline-block; + margin-right: 4px; + vertical-align: middle; + border-bottom: none; +} + + +/* datatable customizing */ +.dataTables_length { + white-space: nowrap; +} + +.dataTables_info{ + margin-top: 30px; +} + +#example_length select{ + width:60px; +} + + +/*Help Pages*/ + +/* API doc*/ + +.listingblock{ + background: #ededed; + color: #fff; + text-shadow: none; + padding: 3%; + border-width: 1px; + border-style: dotted; + overflow: auto; +} + +.listingblock .content .highlight{ + white-space: pre !important; + +} \ No newline at end of file diff --git a/oxo-web/src/main/resources/static/img/OXO_logo_2017_colour_background.png b/oxo-web/src/main/resources/static/img/OXO_logo_2017_colour_background.png new file mode 100644 index 0000000..0b2ed6c Binary files /dev/null and b/oxo-web/src/main/resources/static/img/OXO_logo_2017_colour_background.png differ diff --git a/oxo-web/src/main/resources/static/js/oxo-graph.js b/oxo-web/src/main/resources/static/js/oxo-graph.js index 327d517..5b99fa4 100644 --- a/oxo-web/src/main/resources/static/js/oxo-graph.js +++ b/oxo-web/src/main/resources/static/js/oxo-graph.js @@ -1,5 +1,96 @@ +var colorMap={"efo": "blue", "uberon":"green", "Wikipedia": "red", "BTO": "yellow", "XAO":"black", "ZFA":"gray"} + + +$(document).ready(function() { + drawGraph(); +}) ; + +function drawGraph () { + $("#mapping-vis-spinner").show(); + + var curie = $("div[data-get-mapping-vis]").data('get-mapping-vis'); + var relativePath = $("div[data-get-mapping-vis]").data("api-path") ? $("div[data-get-mapping-vis]").data("api-path") : ''; + var distance = $("input[name=distance]").val() ? $("input[name=distance]").val() : 1; + + $.getJSON(relativePath+"api/terms/"+curie+"/graph?distance="+distance, function(json) {}) + .success(function(json){ + var container = document.getElementById('network'); + console.log(json) + + for(var i=0;i 0) { $('#unmapped').text(noMappings) @@ -301,16 +303,28 @@ function getApiPath(element) { return $(element).data("api-path") ? $(element).data("api-path") : ''; } + + +/* */ function progressComplete() { + $("#searching_bar").hide(); if (withProgress) { $( ".progress-label" ).text( "Complete!" ); } } function addProgressBar() { + //console.log("In add Progress Bar, but nothing happens here anymore?") + /* var progressbar = $( "#progressbar" ), progressLabel = $( ".progress-label" ); + var progressbar = $( "#progressbar" ), + progressLabel = $( ".aria-valuetext" ); + + + var progressbar = $('

25%

') + progressbar.progressbar({ value: false, @@ -319,13 +333,15 @@ function addProgressBar() { }, complete: function() { progressLabel.text( "Complete!" ); + + $("#searching_bar").hide(); } - }); + });*/ } function updateProgress(value) { - if (withProgress) { - $("#progressbar").progressbar( "value", value) - } + //console.log("In Add progress bar, but that is useless now isn't it? ") + //if (withProgress) { $("#progressbar").progressbar( "value", value) } + } diff --git a/oxo-web/src/main/resources/templates/about.html b/oxo-web/src/main/resources/templates/about.html index 1798af4..8dbae8f 100644 --- a/oxo-web/src/main/resources/templates/about.html +++ b/oxo-web/src/main/resources/templates/about.html @@ -4,261 +4,55 @@ - - - + - - - Docs < Ontology Xref Service < EMBL-EBI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Ontology Xref Service < EMBL-EBI + - - - - -
- -
- -
- - - EMBL European Bioinformatics Institute - - - -
- -
- - - - -
-

Ontology Xref Service

-
- - - - -
- -
- - - -
-
- -
-
- - -
- - - - -
- -

OxO is an database of ontology cross-references (xrefs) extracted from public ontologies and databases. Most of these cross-references have been extracted from ontologies in the - Ontology Lookup Service by searching for database cross-reference annotations on terms. We have supplemented these cross-references with - mappings from a subset of vocabularies in the UMLS.

- -

- The semantics of a cross-reference are weakly specified, in most cases they mean some kind of operational equivalence, but there is no guarantee. - Sometimes cross-references are used to indicate other types of relationships such as parent/child or that the terms are related in some other way (such as linking a disease concept to a pathway accession that is somehow related to that disease). - OxO aims to provide simple and convenient access to cross-references, but is not a mapping prediction service, so always treat these xrefs with caution, especially if you are seeking true equivalence between two ontologies. -

- -

- OxO gives you access to existing mappings, you can also explore the neighbourhood of a mapping using the distance controller. By default OxO shows you direct asserted mappings, but you can use the slider on various pages to look for mappings that are up to - three hops away. You may see some terms that don't have labels associated to them, we are doing our best to find labels for all of these terms, but sometimes the labels are missing from the sources that we extract mappings from. -

- -

- OxO is developed by the Samples, Phenotypes and Ontologies team. If you have any questions about OxO please contact us. -

- -

Privacy Policy

- -

The General Data Protection Regulation (GDPR) will apply in the UK from 25 May 2018. It will replace the 1998 Data Protection Act and introduce new rules on privacy notices, as well as processing and safeguarding personal data.

- -

- This website requires cookies, and the limited processing of your personal data in order to function. By using the site you are agreeing to this as outlined in our Privacy Notice and Terms of Use. -

- -

- OXO Submission Service applies to the data submitted to OXO (eg. Ontology mappings) or the data pulled out from other data providers (such as the OBO foundry ontologies). All requests must come through the oxo-submission@ebi.ac.uk e-mail. -

- -

- OLS Mail Service applies to our public e-mail lists; ols-support [at] ebi.ac.uk, ols-announce [at] ebi.ac.uk and ontology-tools-support [at] ebi.ac.uk. -

- -
- -
- -
-
- - +
+

OxO is an database of ontology cross-references (xrefs) extracted from public ontologies and databases. Most of these cross-references have been extracted from ontologies in the + Ontology Lookup Service by searching for database cross-reference annotations on terms. We have supplemented these cross-references with + mappings from a subset of vocabularies in the UMLS.

-
+

+ The semantics of a cross-reference are weakly specified, in most cases they mean some kind of operational equivalence, but there is no guarantee. + Sometimes cross-references are used to indicate other types of relationships such as parent/child or that the terms are related in some other way (such as linking a disease concept to a pathway accession that is somehow related to that disease). + OxO aims to provide simple and convenient access to cross-references, but is not a mapping prediction service, so always treat these xrefs with caution, especially if you are seeking true equivalence between two ontologies. +

+

+ OxO gives you access to existing mappings, you can also explore the neighbourhood of a mapping using the distance controller. By default OxO shows you direct asserted mappings, but you can use the slider on various pages to look for mappings that are up to + three hops away. You may see some terms that don't have labels associated to them, we are doing our best to find labels for all of these terms, but sometimes the labels are missing from the sources that we extract mappings from. +

+

+ OxO is developed by the Samples, Phenotypes and Ontologies team. If you have any questions about OxO please contact us. +

+ - - - \ No newline at end of file diff --git a/oxo-web/src/main/resources/templates/contact.html b/oxo-web/src/main/resources/templates/contact.html index 0a26709..8e0120f 100644 --- a/oxo-web/src/main/resources/templates/contact.html +++ b/oxo-web/src/main/resources/templates/contact.html @@ -34,8 +34,8 @@ - - + + diff --git a/oxo-web/src/main/resources/templates/datasource.html b/oxo-web/src/main/resources/templates/datasource.html index a2a32cf..e7a1988 100644 --- a/oxo-web/src/main/resources/templates/datasource.html +++ b/oxo-web/src/main/resources/templates/datasource.html @@ -4,323 +4,137 @@ - - - + - - - Datasource < Ontology Xref Service < EMBL-EBI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Ontology Xref Service < EMBL-EBI + - - -
+ -
+ -
+
+
- - EMBL European Bioinformatics Institute - - + +
+ Success message goes here.
-
- - - - -
-

Ontology Xref Service

-
- - - - -
- -
- - - - + +
+ Error message goes here.
-
+
-
-
+ - -
- Success message goes here. -
- - -
- Error message goes here. -
- - -
- - - -
-
-

Datasource

+
+
+ +

id...

+
+
+
+ id...
-
-
- id... -
-
- id... -
-
- Version info: id... -
-
- Licence info: id... -
-
- Prefix: id... -
-
- +
+ Version info: id... +
+
+ Licence info: id... +
+
+ Prefix: id... +
+
+ + Type: id... + +
+
- - Type: id... - + class="button secondary clickable">View in OLS -
- -
+
-
-
-

Mappings

-
-
+
+
+

Mappings

+
+
-
- - - Mapping Distance: -
- -
-
-
+
+ + + + + + + +
-
- -
-
+
+
+
-
+
+
+
-
-
-
- -
- -
+ - - + - - - - - - - - - - \ No newline at end of file + + + diff --git a/oxo-web/src/main/resources/templates/docs-template.html b/oxo-web/src/main/resources/templates/docs-template.html index ae2201f..069667b 100644 --- a/oxo-web/src/main/resources/templates/docs-template.html +++ b/oxo-web/src/main/resources/templates/docs-template.html @@ -4,316 +4,70 @@ - - - - - - - - Docs < Ontology Xref Service < EMBL-EBI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + Ontology Xref Service < EMBL-EBI + - - - - -
- -
- -
- - - EMBL European Bioinformatics Institute - - + + -
- -
- - - - -
-

Ontology Xref Service

-
+
+
- + - -
- -
+
- - - - - - - - - - - - - - - - - - - - - - - -
+ -
- -
-
- -
- -
-
- -
- - - - - - - -
+ -
- -
- - - -
- -
+ - - - - - - - + + - - - + - - diff --git a/oxo-web/src/main/resources/templates/login.html b/oxo-web/src/main/resources/templates/login.html index 68d5a9c..ea57898 100644 --- a/oxo-web/src/main/resources/templates/login.html +++ b/oxo-web/src/main/resources/templates/login.html @@ -4,7 +4,7 @@ - + @@ -21,7 +21,7 @@ - + @@ -34,8 +34,8 @@ - - + + diff --git a/oxo-web/src/main/resources/templates/mapping.html b/oxo-web/src/main/resources/templates/mapping.html index bacd44a..e9911ba 100644 --- a/oxo-web/src/main/resources/templates/mapping.html +++ b/oxo-web/src/main/resources/templates/mapping.html @@ -4,173 +4,31 @@ - - - + - - - Mapping < Ontology Xref Service < EMBL-EBI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Ontology Xref Service < EMBL-EBI + - - - - -
- -
- -
- - - EMBL European Bioinformatics Institute - -
- -
- - - - -
-

Ontology Xref Service

-
- - - - -
- -
- - - - -
- - -
+ -
-
+ -
+
- +
Success message goes here.
@@ -181,23 +39,20 @@
- -
-
-

Mapping info

+
+
+

Mapping info

-
+
- From term: + From term cxxxxx (id...) -
-
- Scope: id... + Scope: id...
- Created date: id... + Created date: id...
- Mapping source: - id... + Mapping source: + id... id...
- Source type: id... + Source type: id...
@@ -227,75 +82,10 @@

Mapping info

-
-
-
- - - -
- - - - - - - + - - - \ No newline at end of file diff --git a/oxo-web/src/main/resources/templates/mappings.html b/oxo-web/src/main/resources/templates/mappings.html index d413724..1146c07 100644 --- a/oxo-web/src/main/resources/templates/mappings.html +++ b/oxo-web/src/main/resources/templates/mappings.html @@ -4,171 +4,33 @@ - - - + - - - Mapping < Ontology Xref Service < EMBL-EBI - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + Ontology Xref Service < EMBL-EBI + - - - - -
- -
- -
- - - EMBL European Bioinformatics Institute - - - -
- -
- - - -
-

Ontology Xref Service

-
- - - -
- - -
- - - - -
+ -
+ -
-
-
+
- +
Success message goes here.
@@ -187,13 +49,13 @@ -
-
-

Direct mappings

+
+
+

Direct mappings

-
+
-
+
@@ -233,7 +95,7 @@

Direct mappings

id... - id... + (id...) @@ -246,13 +108,12 @@

Direct mappings

-
-
-

Derived mapping provenance

+
+
+

Derived mapping provenance

-
- -
+
+
@@ -292,7 +153,7 @@

Derived mapping provenance

id... - id... + (id...) @@ -305,70 +166,10 @@

Derived mapping provenance

+ - - - - - - - - - - - - \ No newline at end of file diff --git a/oxo-web/src/main/resources/templates/myaccount.html b/oxo-web/src/main/resources/templates/myaccount.html index 5048cbe..d0d34b3 100644 --- a/oxo-web/src/main/resources/templates/myaccount.html +++ b/oxo-web/src/main/resources/templates/myaccount.html @@ -4,7 +4,7 @@ - + @@ -21,7 +21,7 @@ - + @@ -34,8 +34,8 @@ - - + + diff --git a/oxo-web/src/main/resources/templates/search.html b/oxo-web/src/main/resources/templates/search.html index 90fbee6..d12b6bf 100644 --- a/oxo-web/src/main/resources/templates/search.html +++ b/oxo-web/src/main/resources/templates/search.html @@ -5,221 +5,106 @@ - - - - - Search < Ontology Xref Service < EMBL-EBI - - - + + Ontology Xref Service < EMBL-EBI + - + + + + - - - - - - - - + - - + #ex1Slider .slider-selection { + background: #BABABA; + } - - - + tr.group, + tr.group:hover { + background-color: #ddd !important; + } - - - + .ui-progressbar { + position: relative; + } + .progress-label { + position: absolute; + left: 45%; + top: 4px; + font-weight: bold; + text-shadow: 1px 1px 0 #fff; + } - - + .ui-progressbar .ui-progressbar-value { + margin: 0px; + height: 100%; + } - - - - + + --> - + .dataTables_filter { + width:30%; + background: red; + } + + - - - - - - - - - + - - -
- -
- -
- - - EMBL European Bioinformatics Institute - - - -
- -
- - - - -
-

Ontology Xref Service

-
- - - - -
- -
- - - - -
- - -
- -
-
- - -
- +
+
-
-

Mapping results

- -
- -
-
-
-
Fetching mappings...
-
+

Mapping results

+
+
- -
- - -
-
- -
- - -
- -
-
-
-
+

Select a term to see more information. The evidence colums tells you how many times we have seen this mapping and the distance is a how many hops across other mappings you need to go to find this mapping. Distance 1 is a direct mapping, the greater the distance the less likely it is that a mapping holds true. Max distance is set to . @@ -230,16 +115,14 @@

Mapping results

You are trying to view over 50,000 mappings! Please use the download link.
- -
-
+
- - -
+ + + + - - + + + - - + + Ontology Xref Service < EMBL-EBI + + -
-
-
+ - - EMBL European Bioinformatics Institute + - +
+ +
+ Success message goes here.
-
- - - - -
-

Ontology Xref Service

-
- - - - -
- - -
- - - - + +
+ Error message goes here.
+ -
- -
-
- -
+
+ - -
- Success message goes here. -
- - -
- Error message goes here. -
- - - -
- -
- -
-
-

Term info

-
-
+
+
+
+
Term info
+
+
-
- id... +
+ id... (id...) -
-
- URI: id... -
-
- Datasource: - id... - id... -
-
- Type: id... -
- - - - -
- -
- - - - -
- -
-
-

Add new Mapping

-
-
- -
- - - -
- -
- -
- - -
-
- - - - - - -
-
-
+
+ URI: id...
- -
- - -
- -
-
-

Network

-
-
-
-
- -
-
-
-
+
+ Datasource: + id... + id...
+
+ Type: id... +
+ + + - +
+ +
-
-
-
-
-

Mappings

-
-
- -
+
+
+
Add new Mapping
+
+
-
+
-
- Mapping Distance: -
- -
- - - -
+ +
-
- -
+
+ +
+ +
- - -
+ + + + + +
+
+
+
-
+
+
+
+
Network
+
+
+
+
+ +
+
-
-
- - -
+ - - - - - - + - - - - - - - - - - - + + + - - - \ No newline at end of file + diff --git a/oxo-web/src/test/java/ApiDocumentation.java b/oxo-web/src/test/java/ApiDocumentation.java index d925300..412173d 100644 --- a/oxo-web/src/test/java/ApiDocumentation.java +++ b/oxo-web/src/test/java/ApiDocumentation.java @@ -11,6 +11,8 @@ import org.neo4j.ogm.response.model.QueryResultModel; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.SpringApplicationConfiguration; +import org.springframework.boot.test.autoconfigure.restdocs.AutoConfigureRestDocs; +import org.springframework.boot.test.autoconfigure.web.servlet.WebMvcTest; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.data.domain.Page; @@ -30,8 +32,14 @@ import org.springframework.test.web.servlet.setup.MockMvcBuilders; import org.springframework.web.context.WebApplicationContext; import uk.ac.ebi.spot.OxoWebApp; -import uk.ac.ebi.spot.model.*; -import uk.ac.ebi.spot.service.*; +import uk.ac.ebi.spot.controller.api.MappingController; +import uk.ac.ebi.spot.model.Datasource; +import uk.ac.ebi.spot.model.Mapping; +import uk.ac.ebi.spot.model.SourceType; +import uk.ac.ebi.spot.model.Term; +import uk.ac.ebi.spot.service.DatasourceService; +import uk.ac.ebi.spot.service.MappingService; +import uk.ac.ebi.spot.service.TermService; import javax.servlet.RequestDispatcher; @@ -62,6 +70,7 @@ @RunWith(SpringJUnit4ClassRunner.class) @SpringBootTest(classes = OxoWebApp.class) @WebAppConfiguration +/*@Ignore*/ public class ApiDocumentation { @Rule @@ -94,8 +103,7 @@ public class ApiDocumentation { @Before public void setUp() { - Mockito.when(neo4jTemplate.query(Mockito.anyString(), Mockito.anyMap(), Mockito.anyBoolean())).thenReturn(new QueryResultModel(null, null)); - + System.out.print("Start the Tests"); this.document = document("{method-name}" , preprocessRequest(prettyPrint()), @@ -104,16 +112,16 @@ public void setUp() { this.mockMvc = MockMvcBuilders.webAppContextSetup(this.context) .apply(documentationConfiguration(this.restDocumentation).uris() - .withScheme("https") - .withHost("www.ebi.ac.uk") - .withPort(443) + .withScheme("https") + .withHost("www.ebi.ac.uk/spot/oxo") + .withPort(80) ) .alwaysDo(this.document) .build(); } - + /* @Test public void pageExample () throws Exception { @@ -145,8 +153,7 @@ public void pageExample () throws Exception { this.mockMvc.perform(requestBuilder) .andExpect(status().isOk()); - - } + }*/ @Test @@ -172,6 +179,8 @@ public void errorExample() throws Exception { ; } + + /* @Test public void apiExample () throws Exception { @@ -188,7 +197,7 @@ public void apiExample () throws Exception { ); this.mockMvc.perform(get("/spot/oxo/api").contextPath("/spot/oxo").accept(MediaType.APPLICATION_JSON)) .andExpect(status().isOk()); - } + }*/ @Test @@ -225,6 +234,7 @@ public void mappingsListExample () throws Exception { } + /* @Test public void mappingExample () throws Exception { @@ -364,5 +374,5 @@ public void searchByIdsToJson () throws Exception { ObjectMapper mapper = new ObjectMapper(); this.mockMvc.perform(post("/spot/oxo/api/search").contextPath("/spot/oxo").contentType(MediaType.APPLICATION_JSON).accept(MediaType.APPLICATION_JSON).content(mapper.writeValueAsString(searchRequest))) .andExpect(status().isOk()); - } + }*/ }