diff --git a/abstractor b/abstractor index f84c548..0308f0b 100644 --- a/abstractor +++ b/abstractor @@ -1,5 +1,6 @@ #! /usr/bin/python3 +import logging import argparse from libabstractor.SparqlQuery import SparqlQuery from libabstractor.QueryLibrary import QueryLibrary @@ -19,35 +20,61 @@ class Abstractor(object): parser.add_argument("-s", "--source", type=str, help="RDF data source (SPARQL endpoint url or path to RDF file)", required=True) parser.add_argument("-t", "--source-type", choices=['sparql', 'xml', 'turtle', 'nt'], help="Source format", default="sparql") - parser.add_argument("--askomics-prefix", type=str, help="AskOmics prefix", default="http://www.semanticweb.org/user/ontologies/2018/1#") + parser.add_argument("--askomics-internal-namespace", type=str, help="AskOmics internal namespace", default="http://askomics.org/internal/") parser.add_argument("-o", "--output", type=str, help="Output file", default="abstraction.rdf") parser.add_argument("-f", "--output-format", choices=['xml', 'turtle', 'nt'], help="RDF format", default="turtle") parser.add_argument("--owl", default=False, action='store_true', help="Use OWL ontology") + parser.add_argument("-v", "--verbosity", action="count", help="increase output verbosity") + self.args = parser.parse_args() + logging_level = logging.CRITICAL + if self.args.verbosity == 1: + logging_level = logging.ERROR + if self.args.verbosity == 2: + logging_level = logging.WARNING + if self.args.verbosity == 3: + logging_level = logging.INFO + if self.args.verbosity > 3: + logging_level = logging.DEBUG + + logging.basicConfig(level=logging_level) + def main(self): """main""" - sparql = SparqlQuery(self.args.source, self.args.source_type, self.args.askomics_prefix) + sparql = SparqlQuery(self.args.source, self.args.source_type) library = QueryLibrary() - rdf = RdfGraph(self.args.askomics_prefix) + rdf = RdfGraph(self.args.askomics_internal_namespace) + + if self.args.source_type == "sparql": + rdf.add_location(self.args.source) # Use owl ontology if self.args.owl: + logging.debug("Use OWL Ontology") result = sparql.process_query(library.ontologies) for res in result: + logging.debug(res["ontology"]) + logging.debug("Get entities and relation") rdf.add_entities_and_relations(sparql.process_query(library.entities_and_relations_with_ontology(res["ontology"]))) + logging.debug("Get decimal attributes") rdf.add_decimal_attributes(sparql.process_query(library.entities_and_numeric_attributes_with_ontology(res["ontology"]))) + logging.debug("Get text attributes") rdf.add_text_attributes(sparql.process_query(library.entities_and_text_attributes_with_ontology(res["ontology"]))) # All relations else: + logging.debug("Get entities and relation") rdf.add_entities_and_relations(sparql.process_query(library.entities_and_relations)) + logging.debug("Get decimal attributes") rdf.add_decimal_attributes(sparql.process_query(library.entities_and_numeric_attributes)) + logging.debug("Get text attributes") rdf.add_text_attributes(sparql.process_query(library.entities_and_text_attributes)) + logging.debug("Write RDF ({}) into {}".format(self.args.output_format, self.args.output)) rdf.graph.serialize(destination=self.args.output, format=self.args.output_format, encoding="utf-8" if self.args.output_format == "turtle" else None) diff --git a/libabstractor/RdfGraph.py b/libabstractor/RdfGraph.py index 202790e..4cc4a91 100644 --- a/libabstractor/RdfGraph.py +++ b/libabstractor/RdfGraph.py @@ -13,18 +13,19 @@ class RdfGraph(object): The RDF graph """ - def __init__(self, askomics_prefix): + def __init__(self, namespace_internal): """init Parameters ---------- - askomics_prefix : str - AskOmics prefix + namespace_internal : str + AskOmics internal namespace """ - self.gprefix = rdflib.namespace.Namespace(askomics_prefix) + self.namespace_internal = rdflib.namespace.Namespace(namespace_internal) self.graph = rdflib.Graph() - self.graph.bind('askomics', askomics_prefix) + self.graph.bind('askomics', namespace_internal) + self.prov = rdflib.Namespace('http://www.w3.org/ns/prov#') def check_entity(self, entity): """Check if entity is correct (not rdf rdfs owl or virtuoso thing) @@ -39,18 +40,25 @@ def check_entity(self, entity): bool True if entity is a true one """ - excluded_prefixes = ( - "http://www.w3.org/2002/07/owl#", - "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "http://www.w3.org/2000/01/rdf-schema#", - "http://www.openlinksw.com/schemas/virtrdf#", - "http://www.w3.org/ns/sparql-service-description#" + excluded_namespaces = ( + "http://www.w3.org", + "http://www.openlinksw.com" ) - if entity.lower().startswith(excluded_prefixes): + if entity.lower().startswith(excluded_namespaces): return False return True + def add_location(self, location): + """Add location of the data + + Parameters + ---------- + location : str + URL of distant endpoint + """ + self.graph.add((rdflib.BNode("graph"), self.prov.atLocation, rdflib.Literal(location))) + def add_entities_and_relations(self, sparql_result): """Add entities and relation in the rdf graph @@ -70,25 +78,25 @@ def add_entities_and_relations(self, sparql_result): # Source entity if self.check_entity(source_entity) and source_entity not in entities: entities.append(source_entity) - self.graph.add((rdflib.URIRef(source_entity), rdflib.RDF.type, self.gprefix["entity"])) - self.graph.add((rdflib.URIRef(source_entity), rdflib.RDF.type, self.gprefix["startPoint"])) + self.graph.add((rdflib.URIRef(source_entity), rdflib.RDF.type, self.namespace_internal["entity"])) + self.graph.add((rdflib.URIRef(source_entity), rdflib.RDF.type, self.namespace_internal["startPoint"])) self.graph.add((rdflib.URIRef(source_entity), rdflib.RDF.type, rdflib.OWL.Class)) - self.graph.add((rdflib.URIRef(source_entity), self.gprefix["instancesHaveNoLabels"], rdflib.Literal(True))) + self.graph.add((rdflib.URIRef(source_entity), self.namespace_internal["instancesHaveNoLabels"], rdflib.Literal(True))) self.graph.add((rdflib.URIRef(source_entity), rdflib.RDFS.label, rdflib.Literal(self.get_label(source_entity)))) # Target entity if self.check_entity(target_entity) and target_entity not in entities: entities.append(target_entity) - self.graph.add((rdflib.URIRef(target_entity), rdflib.RDF.type, self.gprefix["entity"])) - self.graph.add((rdflib.URIRef(target_entity), rdflib.RDF.type, self.gprefix["startPoint"])) + self.graph.add((rdflib.URIRef(target_entity), rdflib.RDF.type, self.namespace_internal["entity"])) + self.graph.add((rdflib.URIRef(target_entity), rdflib.RDF.type, self.namespace_internal["startPoint"])) self.graph.add((rdflib.URIRef(target_entity), rdflib.RDF.type, rdflib.OWL.Class)) - self.graph.add((rdflib.URIRef(target_entity), self.gprefix["instancesHaveNoLabels"], rdflib.Literal(True))) + self.graph.add((rdflib.URIRef(target_entity), self.namespace_internal["instancesHaveNoLabels"], rdflib.Literal(True))) self.graph.add((rdflib.URIRef(target_entity), rdflib.RDFS.label, rdflib.Literal(self.get_label(target_entity)))) # Relation if self.check_entity(relation): self.graph.add((rdflib.URIRef(relation), rdflib.RDF.type, rdflib.OWL.ObjectProperty)) - self.graph.add((rdflib.URIRef(relation), rdflib.RDF.type, self.gprefix["AskomicsRelation"])) + self.graph.add((rdflib.URIRef(relation), rdflib.RDF.type, self.namespace_internal["AskomicsRelation"])) self.graph.add((rdflib.URIRef(relation), rdflib.RDFS.label, rdflib.Literal(self.get_label(relation)))) self.graph.add((rdflib.URIRef(relation), rdflib.RDFS.domain, rdflib.URIRef(source_entity))) self.graph.add((rdflib.URIRef(relation), rdflib.RDFS.range, rdflib.URIRef(target_entity))) @@ -105,8 +113,7 @@ def add_decimal_attributes(self, sparql_result): entity = result["entity"] attribute = result["attribute"] - if self.check_entity(entity) and self.check_entity(attribute): - """""" + if self.check_entity(entity): self.graph.add((rdflib.URIRef(attribute), rdflib.RDF.type, rdflib.OWL.DatatypeProperty)) self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.label, rdflib.Literal(self.get_label(attribute)))) self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.domain, rdflib.URIRef(entity))) @@ -124,12 +131,14 @@ def add_text_attributes(self, sparql_result): entity = result["entity"] attribute = result["attribute"] - if self.check_entity(entity) and self.check_entity(attribute): - - self.graph.add((rdflib.URIRef(attribute), rdflib.RDF.type, rdflib.OWL.DatatypeProperty)) - self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.label, rdflib.Literal(self.get_label(attribute)))) - self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.domain, rdflib.URIRef(entity))) - self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.range, rdflib.XSD.string)) + if self.check_entity(entity): + if attribute == "http://www.w3.org/2000/01/rdf-schema#label": + self.graph.remove((rdflib.URIRef(entity), self.namespace_internal["instancesHaveNoLabels"], rdflib.Literal(True))) + else: + self.graph.add((rdflib.URIRef(attribute), rdflib.RDF.type, rdflib.OWL.DatatypeProperty)) + self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.label, rdflib.Literal(self.get_label(attribute)))) + self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.domain, rdflib.URIRef(entity))) + self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.range, rdflib.XSD.string)) def get_label(self, uri): """Get a label from an URI diff --git a/libabstractor/SparqlQuery.py b/libabstractor/SparqlQuery.py index 8986221..218878f 100644 --- a/libabstractor/SparqlQuery.py +++ b/libabstractor/SparqlQuery.py @@ -1,3 +1,4 @@ +import logging import rdflib from SPARQLWrapper import SPARQLWrapper, JSON @@ -19,7 +20,7 @@ class SparqlQuery(object): Description """ - def __init__(self, source, source_type, prefix): + def __init__(self, source, source_type): """Init Parameters @@ -28,12 +29,9 @@ def __init__(self, source, source_type, prefix): Description source_type : TYPE Description - prefix : string - Prefix URI """ self.source = source self.source_type = source_type - self.prefix = prefix self.prefixes = { "owl:": "http://www.w3.org/2002/07/owl#", "rdf:": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", @@ -43,8 +41,7 @@ def __init__(self, source, source_type, prefix): "xsd:": "http://www.w3.org/2001/XMLSchema#", "skos:": "http://www.w3.org/2004/02/skos/core#", "chebi:": "http://purl.obolibrary.org/obo/", - "drugbankdrugs:": "http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/", - "askomics:": self.prefix + "drugbankdrugs:": "http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/" } # if source is a file, load it in a rdflib graph @@ -178,7 +175,7 @@ def process_query(self, query): Parsed results """ # prefixed_query = self.get_sparl_prefix() + query - # print(query) + logging.debug(query) if self.source_type == "sparql": return self.parse_sparql_results(self.execute_sparql_query(query)) else: diff --git a/setup.py b/setup.py index 0b66d7d..3dcd5df 100644 --- a/setup.py +++ b/setup.py @@ -2,12 +2,12 @@ setup( name='abstractor', - version='2.0.0', + version='4.0.0', description='Abstraction generator for AskOmics, from a distant SPARQL endpoint', author='Xavier Garnier', author_email='xavier.garnier@irisa.fr', url='https://github.com/askomics/abstractor', - download_url='https://github.com/askomics/abstractor/archive/2.0.0.tar.gz', + download_url='https://github.com/askomics/abstractor/archive/4.0.0.tar.gz', install_requires=['SPARQLWrapper'], packages=find_packages(), license='AGPL',