diff --git a/ckanext/dcat/profiles.py b/ckanext/dcat/profiles.py index 26001285..02bd395c 100644 --- a/ckanext/dcat/profiles.py +++ b/ckanext/dcat/profiles.py @@ -1752,47 +1752,53 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): ] self._add_list_triples_from_dict(resource_dict, distribution, items) - try: - access_service_list = json.loads(resource_dict.get('access_services', '[]')) - # Access service - for access_service_dict in access_service_list: - - access_service_uri = access_service_dict.get('uri') - if access_service_uri: - access_service_node = CleanedURIRef(access_service_uri) - else: - access_service_node = BNode() - # Remember the (internal) access service reference for referencing in - # further profiles - access_service_dict['access_service_ref'] = str(access_service_node) - - self.g.add((distribution, DCAT.accessService, access_service_node)) - - self.g.add((access_service_node, RDF.type, DCAT.DataService)) - - # Simple values - items = [ - ('availability', DCATAP.availability, None, URIRefOrLiteral), - ('license', DCT.license, None, URIRefOrLiteral), - ('access_rights', DCT.accessRights, None, URIRefOrLiteral), - ('title', DCT.title, None, Literal), - ('endpoint_description', DCAT.endpointDescription, None, Literal), - ('description', DCT.description, None, Literal), - ] - - self._add_triples_from_dict(access_service_dict, access_service_node, items) + # TODO: this will go into a separate profile - # Lists - items = [ - ('endpoint_url', DCAT.endpointURL, None, URIRefOrLiteral), - ('serves_dataset', DCAT.servesDataset, None, URIRefOrLiteral), - ] - self._add_list_triples_from_dict(access_service_dict, access_service_node, items) - - if access_service_list: - resource_dict['access_services'] = json.dumps(access_service_list) - except ValueError: - pass + access_service_list = resource_dict.get('access_services', []) + if isinstance(access_service_list, str): + try: + access_service_list = json.loads(access_service_list) + except ValueError: + access_service_list = [] + + # Access service + for access_service_dict in access_service_list: + + access_service_uri = access_service_dict.get('uri') + if access_service_uri: + access_service_node = CleanedURIRef(access_service_uri) + else: + access_service_node = BNode() + # Remember the (internal) access service reference for referencing in + # further profiles + access_service_dict['access_service_ref'] = str(access_service_node) + + self.g.add((distribution, DCAT.accessService, access_service_node)) + + self.g.add((access_service_node, RDF.type, DCAT.DataService)) + + # Simple values + items = [ + ('availability', DCATAP.availability, None, URIRefOrLiteral), + ('license', DCT.license, None, URIRefOrLiteral), + ('access_rights', DCT.accessRights, None, URIRefOrLiteral), + ('title', DCT.title, None, Literal), + ('endpoint_description', DCAT.endpointDescription, None, Literal), + ('description', DCT.description, None, Literal), + ] + + self._add_triples_from_dict(access_service_dict, access_service_node, items) + + # Lists + items = [ + ('endpoint_url', DCAT.endpointURL, None, URIRefOrLiteral), + ('serves_dataset', DCAT.servesDataset, None, URIRefOrLiteral), + ] + self._add_list_triples_from_dict(access_service_dict, access_service_node, items) + + # TODO: re-enable when separating into a profile + # if access_service_list: + # resource_dict['access_services'] = json.dumps(access_service_list) def graph_from_catalog(self, catalog_dict, catalog_ref): diff --git a/ckanext/dcat/schemas/dcat_ap_2.1.yaml b/ckanext/dcat/schemas/dcat_ap_2.1.yaml index aff9ecce..20edc599 100644 --- a/ckanext/dcat/schemas/dcat_ap_2.1.yaml +++ b/ckanext/dcat/schemas/dcat_ap_2.1.yaml @@ -94,6 +94,7 @@ dataset_fields: - field_name: conforms_to label: Conforms to preset: multiple_text + validators: ignore_missing scheming_multiple_text resource_fields: @@ -114,3 +115,27 @@ resource_fields: - field_name: format label: Format preset: resource_format_autocomplete + +- field_name: rights + label: Rights + form_snippet: markdown.html + form_placeholder: Some statement about the rights associated with the resource + +- field_name: language + label: Language + preset: multiple_text + +- field_name: access_services + label: Access services + repeating_label: Access service + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: title + label: Title + + - field_name: endpoint_url + label: Endpoint URL + preset: multiple_text diff --git a/ckanext/dcat/tests/test_scheming_support.py b/ckanext/dcat/tests/test_scheming_support.py index 3769f361..aa49551a 100644 --- a/ckanext/dcat/tests/test_scheming_support.py +++ b/ckanext/dcat/tests/test_scheming_support.py @@ -55,7 +55,24 @@ def test_e2e_ckan_to_dcat(self): {"name": "Contact 1", "email": "contact1@example.org"}, {"name": "Contact 2", "email": "contact2@example.org"}, ], - # TODO: resources + "resources": [ + { + "name": "Resource 1", + "url": "https://example.com/data.csv", + "format": "CSV", + "rights": "Some stament about rights", + "language": ["en", "ca", "es"], + "access_services": [ + { + "title": "Access Service 1", + "endpoint_url": [ + "https://example.org/access_service/1", + "https://example.org/access_service/2", + ], + } + ], + } + ], } dataset = call_action("package_create", **dataset_dict) @@ -64,7 +81,7 @@ def test_e2e_ckan_to_dcat(self): assert dataset["conforms_to"][0] == "Standard 1" assert dataset["contact"][0]["name"] == "Contact 1" - s = RDFSerializer(profiles=["euro_dcat_ap"]) + s = RDFSerializer(profiles=["euro_dcat_ap_2"]) g = s.g dataset_ref = s.graph_from_dataset(dataset) @@ -81,17 +98,68 @@ def test_e2e_ckan_to_dcat(self): # List fields # TODO helper function - conforms = [t for t in g.triples((dataset_ref, DCT.conformsTo, None))] - assert len(conforms) == len(dataset["conforms_to"]) - for index, item in enumerate(conforms): - assert str(item[2]) == dataset["conforms_to"][index] + conforms_to = [ + str(t[2]) for t in g.triples((dataset_ref, DCT.conformsTo, None)) + ] + assert conforms_to == dataset["conforms_to"] # Repeating subfields contact_details = [t for t in g.triples((dataset_ref, DCAT.contactPoint, None))] assert len(contact_details) == len(dataset["contact"]) - self._triple(g, contact_details[0][2], VCARD.fn, dataset_dict["contact"][0]["name"]) - self._triple(g, contact_details[0][2], VCARD.hasEmail, dataset_dict["contact"][0]["email"]) - self._triple(g, contact_details[1][2], VCARD.fn, dataset_dict["contact"][1]["name"]) - self._triple(g, contact_details[1][2], VCARD.hasEmail, dataset_dict["contact"][1]["email"]) + self._triple( + g, contact_details[0][2], VCARD.fn, dataset_dict["contact"][0]["name"] + ) + self._triple( + g, + contact_details[0][2], + VCARD.hasEmail, + dataset_dict["contact"][0]["email"], + ) + self._triple( + g, contact_details[1][2], VCARD.fn, dataset_dict["contact"][1]["name"] + ) + self._triple( + g, + contact_details[1][2], + VCARD.hasEmail, + dataset_dict["contact"][1]["email"], + ) + + distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2] + + # Resources: standard fields + + assert self._triple( + g, distribution_ref, DCT.rights, dataset_dict["resources"][0]["rights"] + ) + + # Resources: list fields + + language = [ + str(t[2]) for t in g.triples((distribution_ref, DCT.language, None)) + ] + assert language == dataset_dict["resources"][0]["language"] + + # Resource: repeating subfields + access_services = [ + t for t in g.triples((distribution_ref, DCAT.accessService, None)) + ] + + assert len(access_services) == len(dataset["resources"][0]["access_services"]) + self._triple( + g, + access_services[0][2], + DCT.title, + dataset_dict["resources"][0]["access_services"][0]["title"], + ) + + endpoint_urls = [ + str(t[2]) + for t in g.triples((access_services[0][2], DCAT.endpointURL, None)) + ] + assert ( + endpoint_urls + == dataset_dict["resources"][0]["access_services"][0]["endpoint_url"] + )