Skip to content

Commit

Permalink
Merge pull request #5 from xgaia/4.1.0
Browse files Browse the repository at this point in the history
Release 4.1.0
  • Loading branch information
xgaia authored Sep 28, 2020
2 parents 3c3c9ea + c83daf4 commit 0396c25
Show file tree
Hide file tree
Showing 6 changed files with 224 additions and 24 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ Example with [NeXtProt](https://sparql.nextprot.org):
abstractor -s https://sparql.nextprot.org -o nextprot_abstraction.ttl
```


Example with [Uniprot](https://sparql.uniprot.org), using OWL:

```bash
abstractor -s https://sparql.nextprot.org -o nextprot_abstraction.ttl -m owl
```

#### With a RDF file

```bash
Expand Down
56 changes: 40 additions & 16 deletions abstractor
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,22 @@ class Abstractor(object):

parser.add_argument("-o", "--output", type=str, help="Output file", default="abstraction.rdf")
parser.add_argument("-f", "--output-format", choices=['xml', 'turtle', 'nt'], help="RDF format", default="turtle")
parser.add_argument("--owl", default=False, action='store_true', help="Use OWL ontology")

parser.add_argument("-m", "--mode", choices=["all", "batch", "owl", "askomics"], help="Scan mode: all: 3 queries to get all entities,\
relation and attributes. batch: 3 queries for each entity. owl: 3 queries using existant owl ontology. askomics: 3 queries using askomics ontology (NOT IMPLEMENTED YET)", default="all")

parser.add_argument("-v", "--verbosity", action="count", help="increase output verbosity")

self.args = parser.parse_args()

logging_level = logging.CRITICAL
if self.args.verbosity == 1:
if self.args.verbosity is None or self.args.verbosity == 1:
logging_level = logging.ERROR
if self.args.verbosity == 2:
elif self.args.verbosity == 2:
logging_level = logging.WARNING
if self.args.verbosity == 3:
elif self.args.verbosity == 3:
logging_level = logging.INFO
if self.args.verbosity > 3:
elif self.args.verbosity > 3:
logging_level = logging.DEBUG

logging.basicConfig(level=logging_level)
Expand All @@ -52,9 +54,36 @@ class Abstractor(object):
if self.args.source_type == "sparql":
rdf.add_location(self.args.source)

# Use owl ontology
if self.args.owl:
logging.debug("Use OWL Ontology")
if self.args.mode == "all":
logging.debug("Get entities and relation")
rdf.add_entities_and_relations(sparql.process_query(library.entities_and_relations))
logging.debug("Get decimal attributes")
rdf.add_decimal_attributes(sparql.process_query(library.entities_and_numeric_attributes))
logging.debug("Get text attributes")
rdf.add_text_attributes(sparql.process_query(library.entities_and_text_attributes))

elif self.args.mode == "batch":
logging.debug("Get all entities, then, get relations and attributes for each entity")
entities = sparql.process_query(library.get_entities)
rdf.add_entities(entities)
for entity_dict in entities:
entity = entity_dict["entity"]
if rdf.check_entity(entity):
# relation
relations = sparql.process_query(library.get_relation_for_entity(entity))
for relation_dict in relations:
rdf.add_relation(entity, relation_dict["relation"], relation_dict["target_entity"])
# numeric attribute
attributes = sparql.process_query(library.get_numeric_attribute_for_entity(entity))
for attribute_dict in attributes:
rdf.add_attribute(entity, attribute_dict["attribute"])

text_attributes = sparql.process_query(library.get_text_attribute_for_entity(entity))
for attribute_dict in text_attributes:
rdf.add_attribute(entity, attribute_dict["attribute"], decimal=False)

elif self.args.mode == "owl":
logging.debug("Use OWL ontology")
result = sparql.process_query(library.ontologies)
for res in result:
logging.debug(res["ontology"])
Expand All @@ -65,14 +94,9 @@ class Abstractor(object):
logging.debug("Get text attributes")
rdf.add_text_attributes(sparql.process_query(library.entities_and_text_attributes_with_ontology(res["ontology"])))

# All relations
else:
logging.debug("Get entities and relation")
rdf.add_entities_and_relations(sparql.process_query(library.entities_and_relations))
logging.debug("Get decimal attributes")
rdf.add_decimal_attributes(sparql.process_query(library.entities_and_numeric_attributes))
logging.debug("Get text attributes")
rdf.add_text_attributes(sparql.process_query(library.entities_and_text_attributes))
elif self.args.mode == "askomics":
logging.debug("Use AskOmics ontology")
raise NotImplementedError

logging.debug("Write RDF ({}) into {}".format(self.args.output_format, self.args.output))
rdf.graph.serialize(destination=self.args.output, format=self.args.output_format, encoding="utf-8" if self.args.output_format == "turtle" else None)
Expand Down
121 changes: 116 additions & 5 deletions libabstractor/QueryLibrary.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,92 @@ def __init__(self):
"""init"""
pass

@property
def get_entities(self):
"""Sparql query to get all entities
Returns
-------
str
SPARQL query
"""
return textwrap.dedent('''
SELECT DISTINCT ?entity
WHERE {
?instance a ?entity .
}
''')

def get_relation_for_entity(self, entity):
"""Sparql query to get all relations of an entity
Parameters
----------
entity : string
The source entity
Returns
-------
str
SPARQL query
"""
return textwrap.dedent('''
SELECT DISTINCT ?relation ?target_entity
WHERE {{
<{}> ?relation ?target_entity .
?instance_of_target a ?target_entity .
}}
'''.format(entity))

def get_numeric_attribute_for_entity(self, entity):
"""Sparql query to get all attribute of an entity
Parameters
----------
entity : string
The source entity
Returns
-------
str
SPARQL query
"""
return textwrap.dedent('''
SELECT DISTINCT ?attribute
WHERE {{
# Get entities
?instance_of_entity a <{}> .
# Attributes
?instance_of_entity ?attribute ?value .
FILTER (isNumeric(?value))
}}
'''.format(entity))

def get_text_attribute_for_entity(self, entity):
"""Sparql query to get all text attribute of an entity
Parameters
----------
entity : string
The source entity
Returns
-------
str
SPARQL query
"""
return textwrap.dedent('''
SELECT DISTINCT ?attribute
WHERE {{
# Get entities
?instance_of_entity a <{}> .
# Attributes
?instance_of_entity ?attribute ?value .
FILTER (isLiteral(?value))
FILTER (!isNumeric(?value))
}}
'''.format(entity))

@property
def entities_and_relations(self):
"""Sparql query to get entities and relations
Expand All @@ -18,13 +104,20 @@ def entities_and_relations(self):
SPARQL query
"""
return textwrap.dedent('''
SELECT DISTINCT ?source_entity ?relation ?target_entity
SELECT DISTINCT ?source_entity ?relation ?target_entity ?mother_source ?mother_target
WHERE {
# Get entities
?instance_of_source a ?source_entity .
?instance_of_target a ?target_entity .
# Relations
?instance_of_source ?relation ?instance_of_target .
OPTIONAL {{
?source_entity rdfs:subClassOf ?mother_source .
}}
OPTIONAL {{
?target_entity rdfs:subClassOf ?mother_target .
}}
}
''')

Expand Down Expand Up @@ -101,7 +194,7 @@ def entities_and_relations_with_ontology(ontology):
SPARQL query
"""
return textwrap.dedent('''
SELECT DISTINCT ?source_entity ?relation ?target_entity
SELECT DISTINCT ?source_entity ?relation ?target_entity ?mother_source ?mother_target
WHERE {{
?source_entity a owl:Class .
?source_entity rdfs:isDefinedBy <{ontology}> .
Expand All @@ -110,8 +203,18 @@ def entities_and_relations_with_ontology(ontology):
?target_entity rdfs:isDefinedBy <{ontology}> .
?relation a owl:ObjectProperty .
?relation rdfs:domain ?source_entity .
?relation rdfs:range ?target_entity .
{{
?relation rdfs:domain/(owl:unionOf/(rdf:rest*)/rdf:first) ?source_entity .
}} UNION {{
?relation rdfs:domain ?source_entity .
}}
OPTIONAL {{
?source_entity rdfs:subClassOf ?mother_source .
}}
OPTIONAL {{
?target_entity rdfs:subClassOf ?mother_target .
}}
}}
'''.format(ontology=ontology))

Expand All @@ -132,9 +235,13 @@ def entities_and_numeric_attributes_with_ontology(ontology):
?entity rdfs:isDefinedBy <{ontology}> .
# Attribute
?attribute a owl:DatatypeProperty .
?attribute rdfs:domain ?entity .
?attribute rdfs:range ?range .
VALUES ?range {{ xsd:float xsd:int }} .
{{
?attribute rdfs:domain/(owl:unionOf/(rdf:rest*)/rdf:first) ?entity .
}} UNION {{
?attribute rdfs:domain ?entity .
}}
}}
'''.format(ontology=ontology))

Expand All @@ -155,8 +262,12 @@ def entities_and_text_attributes_with_ontology(ontology):
?entity rdfs:isDefinedBy <{ontology}> .
# Attribute
?attribute a owl:DatatypeProperty .
?attribute rdfs:domain ?entity .
?attribute rdfs:range ?range .
VALUES ?range {{ xsd:string }} .
{{
?attribute rdfs:domain/(owl:unionOf/(rdf:rest*)/rdf:first) ?entity .
}} UNION {{
?attribute rdfs:domain ?entity .
}}
}}
'''.format(ontology=ontology))
58 changes: 58 additions & 0 deletions libabstractor/RdfGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,58 @@ def add_location(self, location):
"""
self.graph.add((rdflib.BNode("graph"), self.prov.atLocation, rdflib.Literal(location)))

def add_entities(self, sparql_result):
"""Add entities
Parameters
----------
sparql_result : list
Sparql result
"""
for result in sparql_result:
if self.check_entity(result["entity"]):
self.graph.add((rdflib.URIRef(result["entity"]), rdflib.RDF.type, self.namespace_internal["entity"]))
self.graph.add((rdflib.URIRef(result["entity"]), rdflib.RDF.type, self.namespace_internal["startPoint"]))
self.graph.add((rdflib.URIRef(result["entity"]), rdflib.RDF.type, rdflib.OWL.Class))
self.graph.add((rdflib.URIRef(result["entity"]), self.namespace_internal["instancesHaveNoLabels"], rdflib.Literal(True)))
self.graph.add((rdflib.URIRef(result["entity"]), rdflib.RDFS.label, rdflib.Literal(self.get_label(result["entity"]))))

def add_relation(self, source_entity, relation, target_entity):
"""Add a relation
Parameters
----------
source_entity : str
Source URI
relation : str
Relation URI
target_entity : str
Target URI
"""
# Relation
if self.check_entity(relation):
self.graph.add((rdflib.URIRef(relation), rdflib.RDF.type, rdflib.OWL.ObjectProperty))
self.graph.add((rdflib.URIRef(relation), rdflib.RDF.type, self.namespace_internal["AskomicsRelation"]))
self.graph.add((rdflib.URIRef(relation), rdflib.RDFS.label, rdflib.Literal(self.get_label(relation))))
self.graph.add((rdflib.URIRef(relation), rdflib.RDFS.domain, rdflib.URIRef(source_entity)))
self.graph.add((rdflib.URIRef(relation), rdflib.RDFS.range, rdflib.URIRef(target_entity)))

def add_attribute(self, entity, attribute, decimal=True):
"""Add attribute
Parameters
----------
entity : str
Source URI
attribute : str
Attribue URI
"""
if self.check_entity(entity):
self.graph.add((rdflib.URIRef(attribute), rdflib.RDF.type, rdflib.OWL.DatatypeProperty))
self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.label, rdflib.Literal(self.get_label(attribute))))
self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.domain, rdflib.URIRef(entity)))
self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.range, rdflib.XSD.decimal if decimal else rdflib.XSD.string))

def add_entities_and_relations(self, sparql_result):
"""Add entities and relation in the rdf graph
Expand All @@ -74,6 +126,8 @@ def add_entities_and_relations(self, sparql_result):
source_entity = result["source_entity"]
target_entity = result["target_entity"]
relation = result["relation"]
mother_source = result["mother_source"] if "mother_source" in result else None
mother_target = result["mother_target"] if "mother_target" in result else None

# Source entity
if self.check_entity(source_entity) and source_entity not in entities:
Expand All @@ -83,6 +137,8 @@ def add_entities_and_relations(self, sparql_result):
self.graph.add((rdflib.URIRef(source_entity), rdflib.RDF.type, rdflib.OWL.Class))
self.graph.add((rdflib.URIRef(source_entity), self.namespace_internal["instancesHaveNoLabels"], rdflib.Literal(True)))
self.graph.add((rdflib.URIRef(source_entity), rdflib.RDFS.label, rdflib.Literal(self.get_label(source_entity))))
if mother_source:
self.graph.add((rdflib.URIRef(source_entity), rdflib.RDFS.subClassOf, rdflib.URIRef(mother_source)))

# Target entity
if self.check_entity(target_entity) and target_entity not in entities:
Expand All @@ -92,6 +148,8 @@ def add_entities_and_relations(self, sparql_result):
self.graph.add((rdflib.URIRef(target_entity), rdflib.RDF.type, rdflib.OWL.Class))
self.graph.add((rdflib.URIRef(target_entity), self.namespace_internal["instancesHaveNoLabels"], rdflib.Literal(True)))
self.graph.add((rdflib.URIRef(target_entity), rdflib.RDFS.label, rdflib.Literal(self.get_label(target_entity))))
if mother_target:
self.graph.add((rdflib.URIRef(source_entity), rdflib.RDFS.subClassOf, rdflib.URIRef(mother_target)))

# Relation
if self.check_entity(relation):
Expand Down
2 changes: 1 addition & 1 deletion libabstractor/SparqlQuery.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def process_query(self, query):
list
Parsed results
"""
# prefixed_query = self.get_sparl_prefix() + query
# query = self.get_sparl_prefix() + query
logging.debug(query)
if self.source_type == "sparql":
return self.parse_sparql_results(self.execute_sparql_query(query))
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

setup(
name='abstractor',
version='4.0.0',
version='4.1.0',
description='Abstraction generator for AskOmics, from a distant SPARQL endpoint',
author='Xavier Garnier',
author_email='[email protected]',
url='https://github.com/askomics/abstractor',
download_url='https://github.com/askomics/abstractor/archive/4.0.0.tar.gz',
download_url='https://github.com/askomics/abstractor/archive/4.1.0.tar.gz',
install_requires=['SPARQLWrapper'],
packages=find_packages(),
license='AGPL',
Expand Down

0 comments on commit 0396c25

Please sign in to comment.