From ed9b9d0bb4fa4c325f10e1959330227607ce223f Mon Sep 17 00:00:00 2001 From: seitenbau-govdata Date: Thu, 28 Mar 2024 15:57:28 +0100 Subject: [PATCH] Add support for the new Hydra vocabulary --- CHANGELOG.md | 1 + ckanext/dcat/processors.py | 29 +++++--- ckanext/dcat/tests/test_base_parser.py | 46 +++++++++++- .../test_euro_dcatap_profile_serialize.py | 73 ++++++++++++++++++- 4 files changed, 136 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da24aed5..efae64cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v1.6.0...HEAD) +* Adds support for the latest Hydra vocabulary. For backward compatibility, the old properties are still supported but marked as deprecated. ## [v1.6.0](https://github.com/ckan/ckanext-dcat/compare/v1.5.1...v1.6.0) - 2024-02-29 diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py index e11df3c8..864f734c 100644 --- a/ckanext/dcat/processors.py +++ b/ckanext/dcat/processors.py @@ -116,11 +116,15 @@ def next_page(self): Returns the URL of the next page or None if there is no next page ''' for pagination_node in self.g.subjects(RDF.type, HYDRA.PagedCollection): + # Try to find HYDRA.next first + for o in self.g.objects(pagination_node, HYDRA.next): + return str(o) + + # If HYDRA.next is not found, try HYDRA.nextPage (deprecated) for o in self.g.objects(pagination_node, HYDRA.nextPage): return str(o) return None - def parse(self, data, _format=None): ''' Parses and RDF graph serialization and into the class graph @@ -178,7 +182,6 @@ def datasets(self): yield dataset_dict - class RDFSerializer(RDFProcessor): ''' A CKAN to RDF serializer based on rdflib @@ -209,19 +212,23 @@ def _add_pagination_triples(self, paging_info): pagination_ref = BNode() self.g.add((pagination_ref, RDF.type, HYDRA.PagedCollection)) + # The predicates `nextPage`, `previousPage`, `firstPage`, `lastPage` + # and `itemsPerPage` are deprecated and will be removed in the future items = [ - ('next', HYDRA.nextPage), - ('previous', HYDRA.previousPage), - ('first', HYDRA.firstPage), - ('last', HYDRA.lastPage), - ('count', HYDRA.totalItems), - ('items_per_page', HYDRA.itemsPerPage), + ('next', [HYDRA.nextPage, HYDRA.next]), + ('previous', [HYDRA.previousPage, HYDRA.previous]), + ('first', [HYDRA.firstPage, HYDRA.first]), + ('last', [HYDRA.lastPage, HYDRA.last]), + ('count', [HYDRA.totalItems]), + ('items_per_page', [HYDRA.itemsPerPage]), ] + for item in items: - key, predicate = item + key, predicates = item if paging_info.get(key): - self.g.add((pagination_ref, predicate, - Literal(paging_info[key]))) + for predicate in predicates: + self.g.add((pagination_ref, predicate, + Literal(paging_info[key]))) return pagination_ref diff --git a/ckanext/dcat/tests/test_base_parser.py b/ckanext/dcat/tests/test_base_parser.py index 819ed111..46aa0e3c 100644 --- a/ckanext/dcat/tests/test_base_parser.py +++ b/ckanext/dcat/tests/test_base_parser.py @@ -140,7 +140,7 @@ def test_parse_data(self): assert len(p.g) == 2 - def test_parse_pagination_next_page(self): + def test_parse_pagination_next_page_deprecated_vocabulary_only(self): data = ''' + + + 245 + http://example.com/catalog.xml?page=3 + http://example.com/catalog.xml?page=2 + http://example.com/catalog.xml?page=1 + + + ''' + + p = RDFParser() + + p.parse(data) + + assert p.next_page() == 'http://example.com/catalog.xml?page=2' + + def test_parse_pagination_next_page_both_vocabularies(self): + + data = ''' + + + http://example.com/catalog.xml?page=3 + http://example.com/catalog.xml?page=next + http://example.com/catalog.xml?page=nextPage + http://example.com/catalog.xml?page=1 + + + ''' + + p = RDFParser() + + p.parse(data) + + assert p.next_page() == 'http://example.com/catalog.xml?page=next' + def test_parse_without_pagination(self): data = ''' diff --git a/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py b/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py index e0f7d3c0..a389acfd 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py +++ b/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py @@ -15,7 +15,7 @@ from ckantoolkit.tests import helpers, factories from ckanext.dcat import utils -from ckanext.dcat.processors import RDFSerializer +from ckanext.dcat.processors import RDFSerializer, HYDRA from ckanext.dcat.profiles import (DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA, SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT, DISTRIBUTION_LICENSE_FALLBACK_CONFIG) @@ -1250,6 +1250,77 @@ def test_subcatalog(self): assert len(dataset_title) == 1 assert str(dataset_title[0]) == dataset['title'] + def test_catalog_pagination(self): + dataset = { + 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'test-dataset', + 'title': 'test dataset', + 'extras': [ + {'key': 'source_catalog_title', 'value': 'Subcatalog example'}, + {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'}, + {'key': 'source_catalog_description', 'value': 'Subcatalog example description'} + ] + } + catalog_dict = { + 'title': 'My Catalog', + 'description': 'An Open Data Catalog', + 'homepage': 'http://example.com', + 'language': 'de', + } + + expected_first = 'http://subcatalog.example?page=1' + expected_next = 'http://subcatalog.example?page=2' + expected_last = 'http://subcatalog.example?page=3' + + pagination = { + 'count': 12, + 'items_per_page': 5, + 'current':expected_first, + 'first':expected_first, + 'last':expected_last, + 'next':expected_next, + } + + s = RDFSerializer(profiles=['euro_dcat_ap']) + g = s.g + + s.serialize_catalog(catalog_dict, dataset_dicts=[dataset], pagination_info=pagination) + + paged_collection = list(g.subjects(RDF.type, HYDRA.PagedCollection)) + assert len(paged_collection) == 1 + + # Pagination item: next + next = list(g.objects(paged_collection[0], HYDRA.next)) + assert len(next) == 1 + assert str(next[0]) == expected_next + next_page = list(g.objects(paged_collection[0], HYDRA.nextPage)) + assert len(next_page) == 1 + assert str(next_page[0]) == expected_next + + # Pagination item: previous + previous_page = list(g.objects(paged_collection[0], HYDRA.previousPage)) + assert len(previous_page) == 0 + previous = list(g.objects(paged_collection[0], HYDRA.previous)) + assert len(previous) == 0 + + # Pagination item: last + last = list(g.objects(paged_collection[0], HYDRA.last)) + assert len(last) == 1 + assert str(last[0]) == expected_last + last_page = list(g.objects(paged_collection[0], HYDRA.lastPage)) + assert len(last_page) == 1 + assert str(last_page[0]) == expected_last + + # Pagination item: count + total_items = list(g.objects(paged_collection[0], HYDRA.totalItems)) + assert len(total_items) == 1 + assert str(total_items[0]) == "12" + + # Pagination item: items_per_page + items_per_page = list(g.objects(paged_collection[0], HYDRA.itemsPerPage)) + assert len(items_per_page) == 1 + assert str(items_per_page[0]) == "5" + @pytest.mark.ckan_config(DISTRIBUTION_LICENSE_FALLBACK_CONFIG, 'true') def test_set_missing_license_for_resource(self): ''' Check the behavior if param in config is set: Add license_id to the resource'''