Skip to content

Commit

Permalink
Merge pull request #291 from ckan/shacl-validation-range
Browse files Browse the repository at this point in the history
Add support for defining ranges/classes when generating a graph
  • Loading branch information
amercader authored Aug 13, 2024
2 parents 51d6513 + 75c5451 commit 7c6339d
Show file tree
Hide file tree
Showing 8 changed files with 790 additions and 52 deletions.
21 changes: 18 additions & 3 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,8 +858,13 @@ def _add_list_triples_from_dict(self, _dict, subject, items):
def _add_triples_from_dict(
self, _dict, subject, items, list_value=False, date_value=False
):

for item in items:
key, predicate, fallbacks, _type = item
try:
key, predicate, fallbacks, _type, _class = item
except ValueError:
key, predicate, fallbacks, _type = item
_class = None
self._add_triple_from_dict(
_dict,
subject,
Expand All @@ -869,6 +874,7 @@ def _add_triples_from_dict(
list_value=list_value,
date_value=date_value,
_type=_type,
_class=_class,
)

def _add_triple_from_dict(
Expand All @@ -882,6 +888,7 @@ def _add_triple_from_dict(
date_value=False,
_type=Literal,
_datatype=None,
_class=None,
value_modifier=None,
):
"""
Expand All @@ -896,6 +903,8 @@ def _add_triple_from_dict(
returning a modified value can be passed.
If a value was found, the modifier is applied before adding the value.
`_class` is the optional RDF class of the entity being added.
If `list_value` or `date_value` are True, then the value is treated as
a list or a date respectively (see `_add_list_triple` and
`_add_date_triple` for details.
Expand All @@ -912,7 +921,7 @@ def _add_triple_from_dict(
value = value_modifier(value)

if value and list_value:
self._add_list_triple(subject, predicate, value, _type, _datatype)
self._add_list_triple(subject, predicate, value, _type, _datatype, _class)
elif value and date_value:
self._add_date_triple(subject, predicate, value, _type)
elif value:
Expand All @@ -926,8 +935,11 @@ def _add_triple_from_dict(
object = _type(value)
self.g.add((subject, predicate, object))

if _class and isinstance(object, URIRef):
self.g.add((object, RDF.type, _class))

def _add_list_triple(
self, subject, predicate, value, _type=Literal, _datatype=None
self, subject, predicate, value, _type=Literal, _datatype=None, _class=None
):
"""
Adds as many triples to the graph as values
Expand All @@ -948,6 +960,9 @@ def _add_list_triple(
object = _type(item)
self.g.add((subject, predicate, object))

if _class and isinstance(object, URIRef):
self.g.add((object, RDF.type, _class))

def _add_date_triple(self, subject, predicate, value, _type=Literal):
"""
Adds a new triple with a date object
Expand Down
58 changes: 37 additions & 21 deletions ckanext/dcat/profiles/euro_dcat_ap.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,14 +275,14 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
items = [
("title", DCT.title, None, Literal),
("notes", DCT.description, None, Literal),
("url", DCAT.landingPage, None, URIRef),
("url", DCAT.landingPage, None, URIRef, FOAF.Document),
("identifier", DCT.identifier, ["guid", "id"], URIRefOrLiteral),
("version", OWL.versionInfo, ["dcat_version"], Literal),
("version_notes", ADMS.versionNotes, None, Literal),
("frequency", DCT.accrualPeriodicity, None, URIRefOrLiteral),
("access_rights", DCT.accessRights, None, URIRefOrLiteral),
("dcat_type", DCT.type, None, Literal),
("provenance", DCT.provenance, None, Literal),
("frequency", DCT.accrualPeriodicity, None, URIRefOrLiteral, DCT.Frequency),
("access_rights", DCT.accessRights, None, URIRefOrLiteral, DCT.AccessRights),
("dcat_type", DCT.type, None, URIRefOrLiteral),
("provenance", DCT.provenance, None, URIRefOrLiteral, DCT.ProvenanceStatement),
]
self._add_triples_from_dict(dataset_dict, dataset_ref, items)

Expand All @@ -299,16 +299,16 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):

# Lists
items = [
("language", DCT.language, None, URIRefOrLiteral),
("language", DCT.language, None, URIRefOrLiteral, DCT.LinguisticSystem),
("theme", DCAT.theme, None, URIRef),
("conforms_to", DCT.conformsTo, None, Literal),
("alternate_identifier", ADMS.identifier, None, URIRefOrLiteral),
("documentation", FOAF.page, None, URIRefOrLiteral),
("related_resource", DCT.relation, None, URIRefOrLiteral),
("conforms_to", DCT.conformsTo, None, URIRefOrLiteral, DCT.Standard),
("alternate_identifier", ADMS.identifier, None, URIRefOrLiteral, ADMS.Identifier),
("documentation", FOAF.page, None, URIRefOrLiteral, FOAF.Document),
("related_resource", DCT.relation, None, URIRefOrLiteral, RDFS.Resource),
("has_version", DCT.hasVersion, None, URIRefOrLiteral),
("is_version_of", DCT.isVersionOf, None, URIRefOrLiteral),
("source", DCT.source, None, URIRefOrLiteral),
("sample", ADMS.sample, None, URIRefOrLiteral),
("sample", ADMS.sample, None, URIRefOrLiteral, DCAT.Distribution),
]
self._add_list_triples_from_dict(dataset_dict, dataset_ref, items)

Expand Down Expand Up @@ -404,7 +404,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
}
# Add to graph
if publisher_ref:
g.add((publisher_ref, RDF.type, FOAF.Organization))
g.add((publisher_ref, RDF.type, FOAF.Agent))
g.add((dataset_ref, DCT.publisher, publisher_ref))
items = [
("name", FOAF.name, None, Literal),
Expand Down Expand Up @@ -468,23 +468,24 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
("name", DCT.title, None, Literal),
("description", DCT.description, None, Literal),
("status", ADMS.status, None, URIRefOrLiteral),
("rights", DCT.rights, None, URIRefOrLiteral),
("license", DCT.license, None, URIRefOrLiteral),
("access_url", DCAT.accessURL, None, URIRef),
("download_url", DCAT.downloadURL, None, URIRef),
("rights", DCT.rights, None, URIRefOrLiteral, DCT.RightsStatement),
("license", DCT.license, None, URIRefOrLiteral, DCT.LicenseDocument),
("access_url", DCAT.accessURL, None, URIRef, RDFS.Resource),
("download_url", DCAT.downloadURL, None, URIRef, RDFS.Resource),
]

self._add_triples_from_dict(resource_dict, distribution, items)

# Lists
items = [
("documentation", FOAF.page, None, URIRefOrLiteral),
("language", DCT.language, None, URIRefOrLiteral),
("conforms_to", DCT.conformsTo, None, Literal),
("documentation", FOAF.page, None, URIRefOrLiteral, FOAF.Document),
("language", DCT.language, None, URIRefOrLiteral, DCT.LinguisticSystem),
("conforms_to", DCT.conformsTo, None, URIRefOrLiteral, DCT.Standard),
]
self._add_list_triples_from_dict(resource_dict, distribution, items)

# Set default license for distribution if needed and available

if resource_license_fallback and not (distribution, DCT.license, None) in g:
g.add(
(
Expand All @@ -493,6 +494,15 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
URIRefOrLiteral(resource_license_fallback),
)
)
# TODO: add an actual field to manage this
if (distribution, DCT.license, None) in g:
g.add(
(
list(g.objects(distribution, DCT.license))[0],
DCT.type,
URIRef("http://purl.org/adms/licencetype/UnknownIPR")
)
)

# Format
mimetype = resource_dict.get("mimetype")
Expand All @@ -515,10 +525,16 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
mimetype = None

if mimetype:
g.add((distribution, DCAT.mediaType, URIRefOrLiteral(mimetype)))
mimetype = URIRefOrLiteral(mimetype)
g.add((distribution, DCAT.mediaType, mimetype))
if isinstance(mimetype, URIRef):
g.add((mimetype, RDF.type, DCT.MediaType))

if fmt:
g.add((distribution, DCT["format"], URIRefOrLiteral(fmt)))
fmt = URIRefOrLiteral(fmt)
g.add((distribution, DCT["format"], fmt))
if isinstance(fmt, URIRef):
g.add((fmt, RDF.type, DCT.MediaTypeOrExtent))

# URL fallback and old behavior
url = resource_dict.get("url")
Expand Down
59 changes: 49 additions & 10 deletions ckanext/dcat/profiles/euro_dcat_ap_2.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from decimal import Decimal, DecimalException

from rdflib import URIRef, BNode, Literal
from rdflib import URIRef, BNode, Literal, Namespace
from ckanext.dcat.utils import resource_uri

from .base import URIRefOrLiteral, CleanedURIRef
Expand All @@ -13,11 +13,15 @@
DCT,
XSD,
SCHEMA,
RDFS,
)

from .euro_dcat_ap import EuropeanDCATAPProfile


ELI = Namespace("http://data.europa.eu/eli/ontology#")


class EuropeanDCATAP2Profile(EuropeanDCATAPProfile):
"""
An RDF profile based on the DCAT-AP 2 for data portals in Europe
Expand All @@ -36,7 +40,9 @@ def parse_dataset(self, dataset_dict, dataset_ref):
# Standard values
value = self._object_value(dataset_ref, DCAT.temporalResolution)
if value:
dataset_dict["extras"].append({"key": "temporal_resolution", "value": value})
dataset_dict["extras"].append(
{"key": "temporal_resolution", "value": value}
)

# Lists
for key, predicate in (
Expand Down Expand Up @@ -67,7 +73,8 @@ def parse_dataset(self, dataset_dict, dataset_ref):
# For some reason we incorrectly allowed lists in this property at some point
# keep support for it but default to single value
value = (
spatial_resolution[0] if len(spatial_resolution) == 1
spatial_resolution[0]
if len(spatial_resolution) == 1
else json.dumps(spatial_resolution)
)
dataset_dict["extras"].append(
Expand Down Expand Up @@ -169,16 +176,24 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
)

# Lists
for key, predicate, fallbacks, type, datatype in (
("is_referenced_by", DCT.isReferencedBy, None, URIRefOrLiteral, None),
for key, predicate, fallbacks, type, datatype, _class in (
(
"is_referenced_by",
DCT.isReferencedBy,
None,
URIRefOrLiteral,
None,
RDFS.Resource,
),
(
"applicable_legislation",
DCATAP.applicableLegislation,
None,
URIRefOrLiteral,
None,
ELI.LegalResource,
),
("hvd_category", DCATAP.hvdCategory, None, URIRefOrLiteral, None),
("hvd_category", DCATAP.hvdCategory, None, URIRefOrLiteral, None, None),
):
self._add_triple_from_dict(
dataset_dict,
Expand Down Expand Up @@ -254,8 +269,20 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
# Simple values
items = [
("availability", DCATAP.availability, None, URIRefOrLiteral),
("compress_format", DCAT.compressFormat, None, URIRefOrLiteral),
("package_format", DCAT.packageFormat, None, URIRefOrLiteral),
(
"compress_format",
DCAT.compressFormat,
None,
URIRefOrLiteral,
DCT.MediaType,
),
(
"package_format",
DCAT.packageFormat,
None,
URIRefOrLiteral,
DCT.MediaType,
),
]

self._add_triples_from_dict(resource_dict, distribution, items)
Expand All @@ -267,6 +294,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
DCATAP.applicableLegislation,
None,
URIRefOrLiteral,
ELI.LegalResource,
),
]
self._add_list_triples_from_dict(resource_dict, distribution, items)
Expand Down Expand Up @@ -300,7 +328,12 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
("license", DCT.license, None, URIRefOrLiteral),
("access_rights", DCT.accessRights, None, URIRefOrLiteral),
("title", DCT.title, None, Literal),
("endpoint_description", DCAT.endpointDescription, None, URIRefOrLiteral),
(
"endpoint_description",
DCAT.endpointDescription,
None,
URIRefOrLiteral,
),
("description", DCT.description, None, Literal),
]

Expand All @@ -310,7 +343,13 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):

# Lists
items = [
("endpoint_url", DCAT.endpointURL, None, URIRefOrLiteral),
(
"endpoint_url",
DCAT.endpointURL,
None,
URIRefOrLiteral,
RDFS.Resource,
),
("serves_dataset", DCAT.servesDataset, None, URIRefOrLiteral),
]
self._add_list_triples_from_dict(
Expand Down
27 changes: 19 additions & 8 deletions ckanext/dcat/profiles/euro_dcat_ap_scheming.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,7 @@ def _parse_list_value(data_dict, field_name):
_parse_list_value(resource_dict, field_name)

# Repeating subfields
new_fields_mapping = {
"temporal_coverage": "temporal"
}
new_fields_mapping = {"temporal_coverage": "temporal"}
for schema_field in self._dataset_schema["dataset_fields"]:
if "repeating_subfields" in schema_field:
# Check if existing extras need to be migrated
Expand Down Expand Up @@ -132,7 +130,7 @@ def _not_empty_dict(data_dict):
else:
contact_details = BNode()

self.g.add((contact_details, RDF.type, VCARD.Organization))
self.g.add((contact_details, RDF.type, VCARD.Kind))
self.g.add((dataset_ref, DCAT.contactPoint, contact_details))

self._add_triple_from_dict(item, contact_details, VCARD.fn, "name")
Expand All @@ -147,23 +145,32 @@ def _not_empty_dict(data_dict):
)

publisher = dataset_dict.get("publisher")
if isinstance(publisher, list) and len(publisher) and _not_empty_dict(publisher[0]):
if (
isinstance(publisher, list)
and len(publisher)
and _not_empty_dict(publisher[0])
):
publisher = publisher[0]
publisher_uri = publisher.get("uri")
if publisher_uri:
publisher_ref = CleanedURIRef(publisher_uri)
else:
publisher_ref = BNode()

self.g.add((publisher_ref, RDF.type, FOAF.Organization))
self.g.add((publisher_ref, RDF.type, FOAF.Agent))
self.g.add((dataset_ref, DCT.publisher, publisher_ref))

self._add_triple_from_dict(publisher, publisher_ref, FOAF.name, "name")
self._add_triple_from_dict(
publisher, publisher_ref, FOAF.homepage, "url", _type=URIRef
)
self._add_triple_from_dict(
publisher, publisher_ref, DCT.type, "type", _type=URIRefOrLiteral
publisher,
publisher_ref,
DCT.type,
"type",
_type=URIRefOrLiteral,
_class=SKOS.Concept,
)
self._add_triple_from_dict(
publisher,
Expand All @@ -175,7 +182,11 @@ def _not_empty_dict(data_dict):
)

temporal = dataset_dict.get("temporal_coverage")
if isinstance(temporal, list) and len(temporal) and _not_empty_dict(temporal[0]):
if (
isinstance(temporal, list)
and len(temporal)
and _not_empty_dict(temporal[0])
):
for item in temporal:
temporal_ref = BNode()
self.g.add((temporal_ref, RDF.type, DCT.PeriodOfTime))
Expand Down
Loading

0 comments on commit 7c6339d

Please sign in to comment.