From bebd7960481aff5cc8c88201856cbc59166e2df0 Mon Sep 17 00:00:00 2001 From: seitenbau-govdata Date: Tue, 3 Sep 2024 13:42:06 +0200 Subject: [PATCH] Add support for Hydra collection type PartialCollectionView --- CHANGELOG.md | 2 +- ckanext/dcat/processors.py | 19 ++++++----- .../tests/profiles/base/test_base_parser.py | 32 ++++++++++++++++++- 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 059dcbfa..6fd439a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v2.0.0...HEAD) -* ... +* Add support for hydra collection type PartialCollectionView ## [v2.0.0](https://github.com/ckan/ckanext-dcat/compare/v1.7.0...v2.0.0) - 2024-08-30 diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py index ef3230f6..acf5af57 100644 --- a/ckanext/dcat/processors.py +++ b/ckanext/dcat/processors.py @@ -35,6 +35,8 @@ def _get_default_rdf_profiles(): """Helper function used fo documenting the rdf profiles config option""" return " ".join(DEFAULT_RDF_PROFILES) +SUPPORTED_PAGINATION_COLLECTION_DESIGNS = [HYDRA.PartialCollectionView, HYDRA.PagedCollection] + class RDFProcessor(object): @@ -125,14 +127,15 @@ def next_page(self): ''' Returns the URL of the next page or None if there is no next page ''' - for pagination_node in self.g.subjects(RDF.type, HYDRA.PagedCollection): - # Try to find HYDRA.next first - for o in self.g.objects(pagination_node, HYDRA.next): - return str(o) - - # If HYDRA.next is not found, try HYDRA.nextPage (deprecated) - for o in self.g.objects(pagination_node, HYDRA.nextPage): - return str(o) + for supported_collection_type in SUPPORTED_PAGINATION_COLLECTION_DESIGNS: + for pagination_node in self.g.subjects(RDF.type, supported_collection_type): + # Try to find HYDRA.next first + for o in self.g.objects(pagination_node, HYDRA.next): + return str(o) + + # If HYDRA.next is not found, try HYDRA.nextPage (deprecated) + for o in self.g.objects(pagination_node, HYDRA.nextPage): + return str(o) return None def parse(self, data, _format=None): diff --git a/ckanext/dcat/tests/profiles/base/test_base_parser.py b/ckanext/dcat/tests/profiles/base/test_base_parser.py index 46aa0e3c..20a409ac 100644 --- a/ckanext/dcat/tests/profiles/base/test_base_parser.py +++ b/ckanext/dcat/tests/profiles/base/test_base_parser.py @@ -13,13 +13,15 @@ RDFParserException, RDFProfileException, DEFAULT_RDF_PROFILES, - RDF_PROFILES_CONFIG_OPTION + RDF_PROFILES_CONFIG_OPTION, + SUPPORTED_PAGINATION_COLLECTION_DESIGNS ) from ckanext.dcat.profiles import RDFProfile DCT = Namespace("http://purl.org/dc/terms/") DCAT = Namespace("http://www.w3.org/ns/dcat#") +HYDRA = Namespace('http://www.w3.org/ns/hydra/core#') def _default_graph(): @@ -207,6 +209,34 @@ def test_parse_pagination_next_page_both_vocabularies(self): assert p.next_page() == 'http://example.com/catalog.xml?page=next' + @pytest.mark.parametrize("collection_design", SUPPORTED_PAGINATION_COLLECTION_DESIGNS + [HYDRA.Unsupported]) + def test_parse_pagination_next_page_different_collection_designs(self, collection_design): + design = collection_design.lstrip().split('#')[1] + + data = f''' + + + 245 + http://example.com/catalog.xml?page=3 + 100 + http://example.com/catalog.xml?page=2 + http://example.com/catalog.xml?page=1 + + + ''' + + p = RDFParser() + + p.parse(data) + + if collection_design in SUPPORTED_PAGINATION_COLLECTION_DESIGNS: + assert p.next_page() == 'http://example.com/catalog.xml?page=2' + else: + assert p.next_page() == None + def test_parse_without_pagination(self): data = '''