Merge branch 'master' into fix-date-validator-empty-values

ckan · Sep 12, 2024 · d974c3b · d974c3b
2 parents 748b23f + cb25389
commit d974c3b
Show file tree

Hide file tree

Showing 5 changed files with 53 additions and 13 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v2.0.0...HEAD)
 
 * Fix DCAT date validator on empty values ([#297](https://github.com/ckan/ckanext-dcat/pull/297))
+* Add support for hydra collection type PartialCollectionView ([#299](https://github.com/ckan/ckanext-dcat/pull/299))
 
 ## [v2.0.0](https://github.com/ckan/ckanext-dcat/compare/v1.7.0...v2.0.0) - 2024-08-30
 
@@ -117,7 +118,7 @@
 
 ## [v1.1.0](https://github.com/ckan/ckanext-dcat/compare/v1.0.0...v1.1.0) - 2020-03-12
 
-* Python 3 support and new pytest based test suite ([#174](https://github.com/ckan/ckanext-dcat/pull/174))
+* Python 3 support and new pytest based test suite ([#174](https://github.com/ckan/ckanext-dcat/pull/174))painful
 * Fix `after_show - set_titles` in plugins.py ([#172](https://github.com/ckan/ckanext-dcat/pull/172))
 * Add support for DCT.rightsStatement in DCT.accessRights and DCT.rights ([#177](https://github.com/ckan/ckanext-dcat/pull/177))
 * Add support for additional vcard representations ([#178](https://github.com/ckan/ckanext-dcat/pull/178))

diff --git a/ckanext/dcat/harvesters/_json.py b/ckanext/dcat/harvesters/_json.py
@@ -6,6 +6,7 @@
 import uuid
 
 import requests
+import sqlalchemy as sa
 
 from ckan import model
 from ckan import logic
@@ -273,7 +274,8 @@ def import_stage(self, harvest_object):
                 # the harvest object id (on the after_show hook from the harvester
                 # plugin)
                 model.Session.execute(
-                    'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
+                    sa.text('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
+                )
                 model.Session.flush()
 
             elif status == 'change':

diff --git a/ckanext/dcat/harvesters/rdf.py b/ckanext/dcat/harvesters/rdf.py
@@ -6,6 +6,8 @@
 import hashlib
 import traceback
 
+import sqlalchemy as sa
+
 import ckan.plugins as p
 import ckan.model as model
 
@@ -278,7 +280,7 @@ def import_stage(self, harvest_object):
                                                                     harvest_object.guid))
             except p.toolkit.ObjectNotFound:
                 log.info('Package {0} already deleted.'.format(harvest_object.package_id))
-            
+
             return True
 
         if harvest_object.content is None:
@@ -392,7 +394,9 @@ def import_stage(self, harvest_object):
                         # Defer constraints and flush so the dataset can be indexed with
                         # the harvest object id (on the after_show hook from the harvester
                         # plugin)
-                        model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
+                        model.Session.execute(
+                            sa.text('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
+                        )
                         model.Session.flush()
 
                         p.toolkit.get_action('package_create')(context, dataset)

diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py
@@ -35,6 +35,8 @@ def _get_default_rdf_profiles():
     """Helper function used fo documenting the rdf profiles config option"""
     return " ".join(DEFAULT_RDF_PROFILES)
 
+SUPPORTED_PAGINATION_COLLECTION_DESIGNS = [HYDRA.PartialCollectionView, HYDRA.PagedCollection]
+
 
 class RDFProcessor(object):
 
@@ -125,14 +127,15 @@ def next_page(self):
         '''
         Returns the URL of the next page or None if there is no next page
         '''
-        for pagination_node in self.g.subjects(RDF.type, HYDRA.PagedCollection):
-            # Try to find HYDRA.next first
-            for o in self.g.objects(pagination_node, HYDRA.next):
-                return str(o)
-
-            # If HYDRA.next is not found, try HYDRA.nextPage (deprecated)
-            for o in self.g.objects(pagination_node, HYDRA.nextPage):
-                return str(o)
+        for supported_collection_type in SUPPORTED_PAGINATION_COLLECTION_DESIGNS:
+            for pagination_node in self.g.subjects(RDF.type, supported_collection_type):
+                # Try to find HYDRA.next first
+                for o in self.g.objects(pagination_node, HYDRA.next):
+                    return str(o)
+
+                # If HYDRA.next is not found, try HYDRA.nextPage (deprecated)
+                for o in self.g.objects(pagination_node, HYDRA.nextPage):
+                    return str(o)
         return None
 
     def parse(self, data, _format=None):

diff --git a/ckanext/dcat/tests/profiles/base/test_base_parser.py b/ckanext/dcat/tests/profiles/base/test_base_parser.py
@@ -13,13 +13,15 @@
     RDFParserException,
     RDFProfileException,
     DEFAULT_RDF_PROFILES,
-    RDF_PROFILES_CONFIG_OPTION
+    RDF_PROFILES_CONFIG_OPTION,
+    SUPPORTED_PAGINATION_COLLECTION_DESIGNS
 )
 
 from ckanext.dcat.profiles import RDFProfile
 
 DCT = Namespace("http://purl.org/dc/terms/")
 DCAT = Namespace("http://www.w3.org/ns/dcat#")
+HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
 
 
 def _default_graph():
@@ -207,6 +209,34 @@ def test_parse_pagination_next_page_both_vocabularies(self):
 
         assert p.next_page() == 'http://example.com/catalog.xml?page=next'
 
+    @pytest.mark.parametrize("collection_design", SUPPORTED_PAGINATION_COLLECTION_DESIGNS + [HYDRA.Unsupported])
+    def test_parse_pagination_next_page_different_collection_designs(self, collection_design):
+        design = collection_design.lstrip().split('#')[1]
+
+        data = f'''<?xml version="1.0" encoding="utf-8" ?>
+        <rdf:RDF
+         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+         xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+         xmlns:hydra="http://www.w3.org/ns/hydra/core#">
+         <hydra:{design} rdf:about="http://example.com/catalog.xml?page=1">
+            <hydra:totalItems rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">245</hydra:totalItems>
+            <hydra:lastPage>http://example.com/catalog.xml?page=3</hydra:lastPage>
+            <hydra:itemsPerPage rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">100</hydra:itemsPerPage>
+            <hydra:nextPage>http://example.com/catalog.xml?page=2</hydra:nextPage>
+            <hydra:firstPage>http://example.com/catalog.xml?page=1</hydra:firstPage>
+        </hydra:{design}>
+        </rdf:RDF>
+        '''
+
+        p = RDFParser()
+
+        p.parse(data)
+
+        if collection_design in SUPPORTED_PAGINATION_COLLECTION_DESIGNS:
+            assert p.next_page() == 'http://example.com/catalog.xml?page=2'
+        else:
+            assert p.next_page() == None
+
     def test_parse_without_pagination(self):
 
         data = '''<?xml version="1.0" encoding="utf-8" ?>