Skip to content

Commit

Permalink
Merge pull request #3 from xgaia/owl
Browse files Browse the repository at this point in the history
Abstractor 2.0.0
  • Loading branch information
xgaia authored Jan 30, 2020
2 parents d717c76 + 99ba82c commit 12f0ad5
Show file tree
Hide file tree
Showing 7 changed files with 465 additions and 221 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
*.ttl
*.xml
*.nt

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
27 changes: 21 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Abstractor

Abstraction generator: Generate AskOmics abstraction from a distant endpoint
Abstraction generator: Generate AskOmics abstraction from a distant SPARQL endpoint or a RDF file

## Installation

Expand Down Expand Up @@ -58,15 +58,30 @@ abstractor -h

### General usage

Use `abstractor --help` to get all available options.

#### With a SPARQL endpoint

```bash
abstractor -e <endpoint_url> -p <entity_prefix> -o <output_file>
abstractor -s <endpoint_url> -o <output_file>
```

### Example with NeXtProt
Example with [NeXtProt](https://sparql.nextprot.org):

```bash
# Get help
abstractor -e "https://sparql.nextprot.org" -p "http://nextprot.org/rdf#" -n nextprot -o "abstraction.ttl"
abstractor -s https://sparql.nextprot.org -o nextprot_abstraction.ttl
```

#### With a RDF file

```bash
abstractor -s <path> -t <type> -o <output_file>
```

Example with a file `data.rdf`. Input and output file in xml format.

```bash
abstractor -s ~/me/data.xml -t xml -o data_abstraction.xml -f xml
```

Obtained TTL file can be used with [AskOmics](https://github.com/askomics/flaskomics)
Obtained TTL file can be used with [AskOmics](https://github.com/askomics/flaskomics)
203 changes: 29 additions & 174 deletions abstractor
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#! /usr/bin/python3

import argparse
import rdflib
import textwrap
from libabstractor.SparqlQuery import SparqlQuery
from libabstractor.QueryLibrary import QueryLibrary
from libabstractor.RdfGraph import RdfGraph


class Abstractor(object):
Expand All @@ -16,184 +16,39 @@ class Abstractor(object):
"""
parser = argparse.ArgumentParser(description="Generate AskOmics abstraction from a SPARQL endpoint")

parser.add_argument("-e", "--endpoint", type=str, help="SPARQL enpoint url", required=True)
parser.add_argument("-n", "--name", type=str, help="Endpoint prefix short name", default="external")
parser.add_argument("-p", "--endpoint-prefix", type=str, help="Endpoint prefix url", required=True)
parser.add_argument("--askomics-prefix", type=str, help="AskOmics prefix", default="http://www.semanticweb.org/user/ontologies/2018/1#")
parser.add_argument("-o", "--output", type=str, help="Output file", default="abstraction.ttl")
parser.add_argument("-f", "--output-format", type=str, help="RDF format", default="turtle")
parser.add_argument("-s", "--source", type=str, help="RDF data source (SPARQL endpoint url or path to RDF file)", required=True)
parser.add_argument("-t", "--source-type", choices=['sparql', 'xml', 'turtle', 'nt'], help="Source format", default="sparql")

self.args = parser.parse_args()
parser.add_argument("--askomics-prefix", type=str, help="AskOmics prefix", default="http://www.semanticweb.org/user/ontologies/2018/1#")

def get_entities_and_relations(self):
"""Get all entities and relations
parser.add_argument("-o", "--output", type=str, help="Output file", default="abstraction.rdf")
parser.add_argument("-f", "--output-format", choices=['xml', 'turtle', 'nt'], help="RDF format", default="turtle")
parser.add_argument("--owl", default=False, action='store_true', help="Use OWL ontology")

Returns
-------
list, list
header and results
"""
sparql = SparqlQuery(self.args.endpoint, self.args.askomics_prefix)

query = textwrap.dedent('''
SELECT DISTINCT ?source_entity ?relation ?target_entity
WHERE {
# Get entities
?instance_of_source a ?source_entity .
?instance_of_target a ?target_entity .
# Relations
?instance_of_source ?relation ?instance_of_target .
}
''')

return sparql.process_query(query)

def get_entities_and_numeric_attributes(self):
"""Get all entities and numeric attributes
Returns
-------
list, list
header and results
"""
sparql = SparqlQuery(self.args.endpoint, self.args.askomics_prefix)

query = textwrap.dedent('''
SELECT DISTINCT ?entity ?attribute
WHERE {
# Get entities
?instance_of_entity a ?entity .
# Attributes
?instance_of_entity ?attribute ?value .
FILTER (isNumeric(?value))
}
''')

return sparql.process_query(query)

def get_entities_and_text_attributes(self):
"""Get all entities and text attributes
Returns
-------
list, list
header and results
"""
sparql = SparqlQuery(self.args.endpoint, self.args.askomics_prefix)

query = textwrap.dedent('''
SELECT DISTINCT ?entity ?attribute
WHERE {
# Get entities
?instance_of_entity a ?entity .
# Attributes
?instance_of_entity ?attribute ?value .
FILTER (isLiteral(?value))
FILTER (!isNumeric(?value))
}
''')

return sparql.process_query(query)
self.args = parser.parse_args()

def main(self):
"""main"""
sparql = SparqlQuery(self.args.endpoint, self.args.askomics_prefix)

# launch query
try:
result_entities = self.get_entities_and_relations()
except Exception as e:
raise e

entities = []

# RDF graphs
gprefix = rdflib.namespace.Namespace(self.args.askomics_prefix)

gentities = rdflib.Graph()
gentities.bind('', self.args.askomics_prefix)
gentities.bind(self.args.name, self.args.endpoint_prefix)

grelations = rdflib.Graph()
grelations.bind('', self.args.askomics_prefix)
grelations.bind(self.args.name, self.args.endpoint_prefix)

gattributes = rdflib.Graph()
gattributes.bind('', self.args.askomics_prefix)
gattributes.bind(self.args.name, self.args.endpoint_prefix)

# Entities and relations
for result in result_entities:
source_entity = result["source_entity"]
target_entity = result["target_entity"]
relation = result["relation"]

# Source entity
if source_entity.startswith(self.args.endpoint_prefix) and source_entity not in entities:
entities.append(source_entity)
gentities.add((rdflib.URIRef(source_entity), rdflib.RDF.type, gprefix["entity"]))
gentities.add((rdflib.URIRef(source_entity), rdflib.RDF.type, gprefix["startPoint"]))
gentities.add((rdflib.URIRef(source_entity), rdflib.RDF.type, rdflib.OWL.Class))
gentities.add((rdflib.URIRef(source_entity), gprefix["instancesHaveNoLabels"], rdflib.Literal(True)))
gentities.add((rdflib.URIRef(source_entity), rdflib.RDFS.label, rdflib.Literal(sparql.get_label(source_entity))))

# Target entity
if target_entity.startswith(self.args.endpoint_prefix) and target_entity not in entities:
entities.append(target_entity)
gentities.add((rdflib.URIRef(target_entity), rdflib.RDF.type, gprefix["entity"]))
gentities.add((rdflib.URIRef(target_entity), rdflib.RDF.type, gprefix["startPoint"]))
gentities.add((rdflib.URIRef(target_entity), rdflib.RDF.type, rdflib.OWL.Class))
gentities.add((rdflib.URIRef(target_entity), gprefix["instancesHaveNoLabels"], rdflib.Literal(True)))
gentities.add((rdflib.URIRef(target_entity), rdflib.RDFS.label, rdflib.Literal(sparql.get_label(target_entity))))

# Relation
if relation.startswith(self.args.endpoint_prefix):
grelations.add((rdflib.URIRef(relation), rdflib.RDF.type, rdflib.OWL.ObjectProperty))
grelations.add((rdflib.URIRef(relation), rdflib.RDF.type, gprefix["AskomicsRelation"]))
grelations.add((rdflib.URIRef(relation), rdflib.RDFS.label, rdflib.Literal(sparql.get_label(relation))))
grelations.add((rdflib.URIRef(relation), rdflib.RDFS.domain, rdflib.URIRef(source_entity)))
grelations.add((rdflib.URIRef(relation), rdflib.RDFS.range, rdflib.URIRef(target_entity)))

# launch query
try:
result_numeric_attr = self.get_entities_and_numeric_attributes()
except Exception as e:
raise e

# Numeric attributes
for result in result_numeric_attr:
entity = result["entity"]
attribute = result["attribute"]

if not entity.startswith(self.args.endpoint_prefix) or not attribute.startswith(self.args.endpoint_prefix):
continue

gattributes.add((rdflib.URIRef(attribute), rdflib.RDF.type, rdflib.OWL.DatatypeProperty))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.label, rdflib.Literal(sparql.get_label(attribute))))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.domain, rdflib.URIRef(entity)))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.range, rdflib.XSD.decimal))

# launch query
try:
result_text_attr = self.get_entities_and_text_attributes()
except Exception as e:
raise e

for result in result_text_attr:
entity = result["entity"]
attribute = result["attribute"]

if not entity.startswith(self.args.endpoint_prefix) or not attribute.startswith(self.args.endpoint_prefix):
continue

gattributes.add((rdflib.URIRef(attribute), rdflib.RDF.type, rdflib.OWL.DatatypeProperty))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.label, rdflib.Literal(sparql.get_label(attribute))))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.domain, rdflib.URIRef(entity)))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.range, rdflib.XSD.string))

# Serialize
full_graph = gentities + grelations + gattributes
full_graph.serialize(destination=self.args.output, format=self.args.output_format, encoding="utf-8" if self.args.output_format == "turtle" else None)
sparql = SparqlQuery(self.args.source, self.args.source_type, self.args.askomics_prefix)
library = QueryLibrary()

rdf = RdfGraph(self.args.askomics_prefix)

# Use owl ontology
if self.args.owl:
result = sparql.process_query(library.ontologies)
for res in result:
rdf.add_entities_and_relations(sparql.process_query(library.entities_and_relations_with_ontology(res["ontology"])))
rdf.add_decimal_attributes(sparql.process_query(library.entities_and_numeric_attributes_with_ontology(res["ontology"])))
rdf.add_text_attributes(sparql.process_query(library.entities_and_text_attributes_with_ontology(res["ontology"])))

# All relations
else:
rdf.add_entities_and_relations(sparql.process_query(library.entities_and_relations))
rdf.add_decimal_attributes(sparql.process_query(library.entities_and_numeric_attributes))
rdf.add_text_attributes(sparql.process_query(library.entities_and_text_attributes))

rdf.graph.serialize(destination=self.args.output, format=self.args.output_format, encoding="utf-8" if self.args.output_format == "turtle" else None)


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 12f0ad5

Please sign in to comment.