From bebd7960481aff5cc8c88201856cbc59166e2df0 Mon Sep 17 00:00:00 2001
From: seitenbau-govdata <govdata@seitenbau.com>
Date: Tue, 3 Sep 2024 13:42:06 +0200
Subject: [PATCH] Add support for Hydra collection type PartialCollectionView

---
 CHANGELOG.md                                  |  2 +-
 ckanext/dcat/processors.py                    | 19 ++++++-----
 .../tests/profiles/base/test_base_parser.py   | 32 ++++++++++++++++++-
 3 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 059dcbfa..6fd439a9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,7 +2,7 @@
 
 ## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v2.0.0...HEAD)
 
-* ...
+* Add support for hydra collection type PartialCollectionView
 
 ## [v2.0.0](https://github.com/ckan/ckanext-dcat/compare/v1.7.0...v2.0.0) - 2024-08-30
 
diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py
index ef3230f6..acf5af57 100644
--- a/ckanext/dcat/processors.py
+++ b/ckanext/dcat/processors.py
@@ -35,6 +35,8 @@ def _get_default_rdf_profiles():
     """Helper function used fo documenting the rdf profiles config option"""
     return " ".join(DEFAULT_RDF_PROFILES)
 
+SUPPORTED_PAGINATION_COLLECTION_DESIGNS = [HYDRA.PartialCollectionView, HYDRA.PagedCollection]
+
 
 class RDFProcessor(object):
 
@@ -125,14 +127,15 @@ def next_page(self):
         '''
         Returns the URL of the next page or None if there is no next page
         '''
-        for pagination_node in self.g.subjects(RDF.type, HYDRA.PagedCollection):
-            # Try to find HYDRA.next first
-            for o in self.g.objects(pagination_node, HYDRA.next):
-                return str(o)
-
-            # If HYDRA.next is not found, try HYDRA.nextPage (deprecated)
-            for o in self.g.objects(pagination_node, HYDRA.nextPage):
-                return str(o)
+        for supported_collection_type in SUPPORTED_PAGINATION_COLLECTION_DESIGNS:
+            for pagination_node in self.g.subjects(RDF.type, supported_collection_type):
+                # Try to find HYDRA.next first
+                for o in self.g.objects(pagination_node, HYDRA.next):
+                    return str(o)
+
+                # If HYDRA.next is not found, try HYDRA.nextPage (deprecated)
+                for o in self.g.objects(pagination_node, HYDRA.nextPage):
+                    return str(o)
         return None
 
     def parse(self, data, _format=None):
diff --git a/ckanext/dcat/tests/profiles/base/test_base_parser.py b/ckanext/dcat/tests/profiles/base/test_base_parser.py
index 46aa0e3c..20a409ac 100644
--- a/ckanext/dcat/tests/profiles/base/test_base_parser.py
+++ b/ckanext/dcat/tests/profiles/base/test_base_parser.py
@@ -13,13 +13,15 @@
     RDFParserException,
     RDFProfileException,
     DEFAULT_RDF_PROFILES,
-    RDF_PROFILES_CONFIG_OPTION
+    RDF_PROFILES_CONFIG_OPTION,
+    SUPPORTED_PAGINATION_COLLECTION_DESIGNS
 )
 
 from ckanext.dcat.profiles import RDFProfile
 
 DCT = Namespace("http://purl.org/dc/terms/")
 DCAT = Namespace("http://www.w3.org/ns/dcat#")
+HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
 
 
 def _default_graph():
@@ -207,6 +209,34 @@ def test_parse_pagination_next_page_both_vocabularies(self):
 
         assert p.next_page() == 'http://example.com/catalog.xml?page=next'
 
+    @pytest.mark.parametrize("collection_design", SUPPORTED_PAGINATION_COLLECTION_DESIGNS + [HYDRA.Unsupported])
+    def test_parse_pagination_next_page_different_collection_designs(self, collection_design):
+        design = collection_design.lstrip().split('#')[1]
+
+        data = f'''<?xml version="1.0" encoding="utf-8" ?>
+        <rdf:RDF
+         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+         xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+         xmlns:hydra="http://www.w3.org/ns/hydra/core#">
+         <hydra:{design} rdf:about="http://example.com/catalog.xml?page=1">
+            <hydra:totalItems rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">245</hydra:totalItems>
+            <hydra:lastPage>http://example.com/catalog.xml?page=3</hydra:lastPage>
+            <hydra:itemsPerPage rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">100</hydra:itemsPerPage>
+            <hydra:nextPage>http://example.com/catalog.xml?page=2</hydra:nextPage>
+            <hydra:firstPage>http://example.com/catalog.xml?page=1</hydra:firstPage>
+        </hydra:{design}>
+        </rdf:RDF>
+        '''
+
+        p = RDFParser()
+
+        p.parse(data)
+
+        if collection_design in SUPPORTED_PAGINATION_COLLECTION_DESIGNS:
+            assert p.next_page() == 'http://example.com/catalog.xml?page=2'
+        else:
+            assert p.next_page() == None
+
     def test_parse_without_pagination(self):
 
         data = '''<?xml version="1.0" encoding="utf-8" ?>