Skip to content

Commit

Permalink
Merge pull request #2 from xgaia/release_1.1.0_federated
Browse files Browse the repository at this point in the history
Release 1.2.0
  • Loading branch information
xgaia authored Nov 27, 2019
2 parents 3a659f1 + 697e891 commit d717c76
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 134 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,14 @@ abstractor -h
### General usage

```bash
# Get help
abstractor -e <endpoint_url> -p <entity_prefix> -o <output_file>
```

### Example with NeXtProt

```bash
# Get help
abstractor -e "https://sparql.nextprot.org" -p "http://nextprot.org/rdf#" -o "abstraction.ttl"
abstractor -e "https://sparql.nextprot.org" -p "http://nextprot.org/rdf#" -n nextprot -o "abstraction.ttl"
```

Obtained TTL file can be used with [AskOmics](https://github.com/askomics/flaskomics)
241 changes: 111 additions & 130 deletions abstractor
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#! /usr/bin/python3

import argparse
import rdflib
import textwrap
from libabstractor.SparqlQuery import SparqlQuery

Expand All @@ -11,14 +12,16 @@ class Abstractor(object):
def __init__(self):
"""Init
Parse args and get prefixex
Parse args and get prefixes
"""
parser = argparse.ArgumentParser(description="Generate AskOmics abstraction from a SPARQL endpoint")

parser.add_argument("-e", "--endpoint", type=str, help="SPARQL enpoint url", required=True)
parser.add_argument("-p", "--endpoint-prefix", type=str, help="Endpoint prefix", required=True)
parser.add_argument("-n", "--name", type=str, help="Endpoint prefix short name", default="external")
parser.add_argument("-p", "--endpoint-prefix", type=str, help="Endpoint prefix url", required=True)
parser.add_argument("--askomics-prefix", type=str, help="AskOmics prefix", default="http://www.semanticweb.org/user/ontologies/2018/1#")
parser.add_argument("-o", "--output", type=str, help="Output ttl file", default="abstraction.ttl")
parser.add_argument("-o", "--output", type=str, help="Output file", default="abstraction.ttl")
parser.add_argument("-f", "--output-format", type=str, help="RDF format", default="turtle")

self.args = parser.parse_args()

Expand All @@ -33,16 +36,13 @@ class Abstractor(object):
sparql = SparqlQuery(self.args.endpoint, self.args.askomics_prefix)

query = textwrap.dedent('''
SELECT DISTINCT ?entity ?rel ?valueType
SELECT DISTINCT ?source_entity ?relation ?target_entity
WHERE {
# Get entities
?entity a ?type1 .
?instance_of_source a ?source_entity .
?instance_of_target a ?target_entity .
# Relations
?s a ?entity .
?s ?rel ?value .
?value a ?valueType .
?valueType a ?type2 .
?instance_of_source ?relation ?instance_of_target .
}
''')

Expand All @@ -59,13 +59,12 @@ class Abstractor(object):
sparql = SparqlQuery(self.args.endpoint, self.args.askomics_prefix)

query = textwrap.dedent('''
SELECT DISTINCT ?entity ?attr
SELECT DISTINCT ?entity ?attribute
WHERE {
# Get entities
?entity a ?type1 .
?instance_of_entity a ?entity .
# Attributes
?subject a ?entity .
?subject ?attr ?value .
?instance_of_entity ?attribute ?value .
FILTER (isNumeric(?value))
}
''')
Expand All @@ -83,13 +82,12 @@ class Abstractor(object):
sparql = SparqlQuery(self.args.endpoint, self.args.askomics_prefix)

query = textwrap.dedent('''
SELECT DISTINCT ?entity ?attr
SELECT DISTINCT ?entity ?attribute
WHERE {
# Get entities
?entity a ?type1 .
?instance_of_entity a ?entity .
# Attributes
?subject a ?entity .
?subject ?attr ?value .
?instance_of_entity ?attribute ?value .
FILTER (isLiteral(?value))
FILTER (!isNumeric(?value))
}
Expand All @@ -101,118 +99,101 @@ class Abstractor(object):
"""main"""
sparql = SparqlQuery(self.args.endpoint, self.args.askomics_prefix)

with open(self.args.output, "w") as file:

# Insert prefix
file.write(sparql.get_ttl_prefix())

# launch query
try:
result_entities = self.get_entities_and_relations()
except Exception as e:
raise e

entities = []

# Entities and relations
for result in result_entities:
entity = result["entity"]
relation = result["rel"] if "rel" in result else None
relation_range = result["valueType"] if "valueType" in result else None

if not entity.startswith(self.args.endpoint_prefix):
continue

# Write ttl for entities
if entity not in entities:
entities.append(entity)
ttl = textwrap.dedent('''
<{}> a :entity ,
:startPoint ,
owl:Class ;
:instancesHaveNoLabels true ;
rdfs:label "{}" .
'''.format(
entity,
sparql.get_label(entity)
))

file.write(ttl)

if not relation.startswith(self.args.endpoint_prefix):
continue

# write ttl for relations
if relation and relation_range:
ttl = textwrap.dedent('''
<{}> a owl:ObjectProperty ,
:AskomicsRelation ;
rdfs:label "{}" ;
rdfs:domain <{}> ;
rdfs:range <{}> .
'''.format(
relation,
sparql.get_label(relation),
entity,
relation_range
))

file.write(ttl)

# launch query
try:
result_numeric_attr = self.get_entities_and_numeric_attributes()
except Exception as e:
raise e

# Numeric attributes
for result in result_numeric_attr:
entity = result["entity"]
attribute = result["attr"] if "attr" in result else None

if not entity.startswith(self.args.endpoint_prefix) and attribute.startswith(self.args.endpoint_prefix):
continue

if attribute:
ttl = textwrap.dedent('''
<{}> a owl:DatatypeProperty ;
rdfs:label "{}" ;
rdfs:domain <{}> ;
rdfs:range xsd:decimal .
'''.format(
attribute,
sparql.get_label(attribute),
entity
))

file.write(ttl)

# launch query
try:
result_text_attr = self.get_entities_and_text_attributes()
except Exception as e:
raise e

for result in result_text_attr:
entity = result["entity"]
attribute = result["attr"] if "attr" in result else None

if not entity.startswith(self.args.endpoint_prefix) and attribute.startswith(self.args.endpoint_prefix):
continue

if attribute:
ttl = '''
<{}> a owl:DatatypeProperty ;
rdfs:label "{}" ;
rdfs:domain <{}> ;
rdfs:range xsd:string .
'''.format(
attribute,
sparql.get_label(attribute),
entity
)

file.write(ttl)
# launch query
try:
result_entities = self.get_entities_and_relations()
except Exception as e:
raise e

entities = []

# RDF graphs
gprefix = rdflib.namespace.Namespace(self.args.askomics_prefix)

gentities = rdflib.Graph()
gentities.bind('', self.args.askomics_prefix)
gentities.bind(self.args.name, self.args.endpoint_prefix)

grelations = rdflib.Graph()
grelations.bind('', self.args.askomics_prefix)
grelations.bind(self.args.name, self.args.endpoint_prefix)

gattributes = rdflib.Graph()
gattributes.bind('', self.args.askomics_prefix)
gattributes.bind(self.args.name, self.args.endpoint_prefix)

# Entities and relations
for result in result_entities:
source_entity = result["source_entity"]
target_entity = result["target_entity"]
relation = result["relation"]

# Source entity
if source_entity.startswith(self.args.endpoint_prefix) and source_entity not in entities:
entities.append(source_entity)
gentities.add((rdflib.URIRef(source_entity), rdflib.RDF.type, gprefix["entity"]))
gentities.add((rdflib.URIRef(source_entity), rdflib.RDF.type, gprefix["startPoint"]))
gentities.add((rdflib.URIRef(source_entity), rdflib.RDF.type, rdflib.OWL.Class))
gentities.add((rdflib.URIRef(source_entity), gprefix["instancesHaveNoLabels"], rdflib.Literal(True)))
gentities.add((rdflib.URIRef(source_entity), rdflib.RDFS.label, rdflib.Literal(sparql.get_label(source_entity))))

# Target entity
if target_entity.startswith(self.args.endpoint_prefix) and target_entity not in entities:
entities.append(target_entity)
gentities.add((rdflib.URIRef(target_entity), rdflib.RDF.type, gprefix["entity"]))
gentities.add((rdflib.URIRef(target_entity), rdflib.RDF.type, gprefix["startPoint"]))
gentities.add((rdflib.URIRef(target_entity), rdflib.RDF.type, rdflib.OWL.Class))
gentities.add((rdflib.URIRef(target_entity), gprefix["instancesHaveNoLabels"], rdflib.Literal(True)))
gentities.add((rdflib.URIRef(target_entity), rdflib.RDFS.label, rdflib.Literal(sparql.get_label(target_entity))))

# Relation
if relation.startswith(self.args.endpoint_prefix):
grelations.add((rdflib.URIRef(relation), rdflib.RDF.type, rdflib.OWL.ObjectProperty))
grelations.add((rdflib.URIRef(relation), rdflib.RDF.type, gprefix["AskomicsRelation"]))
grelations.add((rdflib.URIRef(relation), rdflib.RDFS.label, rdflib.Literal(sparql.get_label(relation))))
grelations.add((rdflib.URIRef(relation), rdflib.RDFS.domain, rdflib.URIRef(source_entity)))
grelations.add((rdflib.URIRef(relation), rdflib.RDFS.range, rdflib.URIRef(target_entity)))

# launch query
try:
result_numeric_attr = self.get_entities_and_numeric_attributes()
except Exception as e:
raise e

# Numeric attributes
for result in result_numeric_attr:
entity = result["entity"]
attribute = result["attribute"]

if not entity.startswith(self.args.endpoint_prefix) or not attribute.startswith(self.args.endpoint_prefix):
continue

gattributes.add((rdflib.URIRef(attribute), rdflib.RDF.type, rdflib.OWL.DatatypeProperty))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.label, rdflib.Literal(sparql.get_label(attribute))))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.domain, rdflib.URIRef(entity)))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.range, rdflib.XSD.decimal))

# launch query
try:
result_text_attr = self.get_entities_and_text_attributes()
except Exception as e:
raise e

for result in result_text_attr:
entity = result["entity"]
attribute = result["attribute"]

if not entity.startswith(self.args.endpoint_prefix) or not attribute.startswith(self.args.endpoint_prefix):
continue

gattributes.add((rdflib.URIRef(attribute), rdflib.RDF.type, rdflib.OWL.DatatypeProperty))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.label, rdflib.Literal(sparql.get_label(attribute))))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.domain, rdflib.URIRef(entity)))
gattributes.add((rdflib.URIRef(attribute), rdflib.RDFS.range, rdflib.XSD.string))

# Serialize
full_graph = gentities + grelations + gattributes
full_graph.serialize(destination=self.args.output, format=self.args.output_format, encoding="utf-8" if self.args.output_format == "turtle" else None)


if __name__ == '__main__':
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

setup(
name='abstractor',
version='1.1.0',
version='1.2.0',
description='Abstraction generator for AskOmics, from a distant SPARQL endpoint',
author='Xavier Garnier',
author_email='[email protected]',
url='https://github.com/askomics/abstractor',
download_url='https://github.com/askomics/abstractor/archive/1.1.0.tar.gz',
download_url='https://github.com/askomics/abstractor/archive/1.2.0.tar.gz',
install_requires=['SPARQLWrapper'],
packages=find_packages(),
license='AGPL',
Expand Down

0 comments on commit d717c76

Please sign in to comment.