diff --git a/CHANGELOG.md b/CHANGELOG.md index da0748b4..a4222837 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v2.0.0...HEAD) * Fix DCAT date validator on empty values ([#297](https://github.com/ckan/ckanext-dcat/pull/297)) +* Add support for hydra collection type PartialCollectionView ([#299](https://github.com/ckan/ckanext-dcat/pull/299)) ## [v2.0.0](https://github.com/ckan/ckanext-dcat/compare/v1.7.0...v2.0.0) - 2024-08-30 @@ -117,7 +118,7 @@ ## [v1.1.0](https://github.com/ckan/ckanext-dcat/compare/v1.0.0...v1.1.0) - 2020-03-12 -* Python 3 support and new pytest based test suite ([#174](https://github.com/ckan/ckanext-dcat/pull/174)) +* Python 3 support and new pytest based test suite ([#174](https://github.com/ckan/ckanext-dcat/pull/174))painful * Fix `after_show - set_titles` in plugins.py ([#172](https://github.com/ckan/ckanext-dcat/pull/172)) * Add support for DCT.rightsStatement in DCT.accessRights and DCT.rights ([#177](https://github.com/ckan/ckanext-dcat/pull/177)) * Add support for additional vcard representations ([#178](https://github.com/ckan/ckanext-dcat/pull/178)) diff --git a/ckanext/dcat/harvesters/_json.py b/ckanext/dcat/harvesters/_json.py index 5eea7ac0..c058bc54 100644 --- a/ckanext/dcat/harvesters/_json.py +++ b/ckanext/dcat/harvesters/_json.py @@ -6,6 +6,7 @@ import uuid import requests +import sqlalchemy as sa from ckan import model from ckan import logic @@ -273,7 +274,8 @@ def import_stage(self, harvest_object): # the harvest object id (on the after_show hook from the harvester # plugin) model.Session.execute( - 'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') + sa.text('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') + ) model.Session.flush() elif status == 'change': diff --git a/ckanext/dcat/harvesters/rdf.py b/ckanext/dcat/harvesters/rdf.py index 564ba1af..7bc402e3 100644 --- a/ckanext/dcat/harvesters/rdf.py +++ b/ckanext/dcat/harvesters/rdf.py @@ -6,6 +6,8 @@ import hashlib import traceback +import sqlalchemy as sa + import ckan.plugins as p import ckan.model as model @@ -278,7 +280,7 @@ def import_stage(self, harvest_object): harvest_object.guid)) except p.toolkit.ObjectNotFound: log.info('Package {0} already deleted.'.format(harvest_object.package_id)) - + return True if harvest_object.content is None: @@ -392,7 +394,9 @@ def import_stage(self, harvest_object): # Defer constraints and flush so the dataset can be indexed with # the harvest object id (on the after_show hook from the harvester # plugin) - model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') + model.Session.execute( + sa.text('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') + ) model.Session.flush() p.toolkit.get_action('package_create')(context, dataset) diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py index ef3230f6..acf5af57 100644 --- a/ckanext/dcat/processors.py +++ b/ckanext/dcat/processors.py @@ -35,6 +35,8 @@ def _get_default_rdf_profiles(): """Helper function used fo documenting the rdf profiles config option""" return " ".join(DEFAULT_RDF_PROFILES) +SUPPORTED_PAGINATION_COLLECTION_DESIGNS = [HYDRA.PartialCollectionView, HYDRA.PagedCollection] + class RDFProcessor(object): @@ -125,14 +127,15 @@ def next_page(self): ''' Returns the URL of the next page or None if there is no next page ''' - for pagination_node in self.g.subjects(RDF.type, HYDRA.PagedCollection): - # Try to find HYDRA.next first - for o in self.g.objects(pagination_node, HYDRA.next): - return str(o) - - # If HYDRA.next is not found, try HYDRA.nextPage (deprecated) - for o in self.g.objects(pagination_node, HYDRA.nextPage): - return str(o) + for supported_collection_type in SUPPORTED_PAGINATION_COLLECTION_DESIGNS: + for pagination_node in self.g.subjects(RDF.type, supported_collection_type): + # Try to find HYDRA.next first + for o in self.g.objects(pagination_node, HYDRA.next): + return str(o) + + # If HYDRA.next is not found, try HYDRA.nextPage (deprecated) + for o in self.g.objects(pagination_node, HYDRA.nextPage): + return str(o) return None def parse(self, data, _format=None): diff --git a/ckanext/dcat/tests/profiles/base/test_base_parser.py b/ckanext/dcat/tests/profiles/base/test_base_parser.py index 46aa0e3c..20a409ac 100644 --- a/ckanext/dcat/tests/profiles/base/test_base_parser.py +++ b/ckanext/dcat/tests/profiles/base/test_base_parser.py @@ -13,13 +13,15 @@ RDFParserException, RDFProfileException, DEFAULT_RDF_PROFILES, - RDF_PROFILES_CONFIG_OPTION + RDF_PROFILES_CONFIG_OPTION, + SUPPORTED_PAGINATION_COLLECTION_DESIGNS ) from ckanext.dcat.profiles import RDFProfile DCT = Namespace("http://purl.org/dc/terms/") DCAT = Namespace("http://www.w3.org/ns/dcat#") +HYDRA = Namespace('http://www.w3.org/ns/hydra/core#') def _default_graph(): @@ -207,6 +209,34 @@ def test_parse_pagination_next_page_both_vocabularies(self): assert p.next_page() == 'http://example.com/catalog.xml?page=next' + @pytest.mark.parametrize("collection_design", SUPPORTED_PAGINATION_COLLECTION_DESIGNS + [HYDRA.Unsupported]) + def test_parse_pagination_next_page_different_collection_designs(self, collection_design): + design = collection_design.lstrip().split('#')[1] + + data = f''' + + + 245 + http://example.com/catalog.xml?page=3 + 100 + http://example.com/catalog.xml?page=2 + http://example.com/catalog.xml?page=1 + + + ''' + + p = RDFParser() + + p.parse(data) + + if collection_design in SUPPORTED_PAGINATION_COLLECTION_DESIGNS: + assert p.next_page() == 'http://example.com/catalog.xml?page=2' + else: + assert p.next_page() == None + def test_parse_without_pagination(self): data = '''