Merge pull request #18 from mjanez/feature/hydra-vocab

Feature/hydra vocab
mjanez · Apr 5, 2024 · 87b640a · 87b640a
2 parents 138c341 + 20a9f73
commit 87b640a
Show file tree

Hide file tree

Showing 6 changed files with 144 additions and 20 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,10 @@
 # Changelog
 
-## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v1.6.0...HEAD)
+## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v1.7.0...HEAD)
 
+## [v1.7.0](https://github.com/ckan/ckanext-dcat/compare/v1.6.0...v1.7.0) - 2024-04-04
+
+* Adds support for the latest Hydra vocabulary. For backward compatibility, the old properties are still supported but marked as deprecated. (#267)
 
 ## [v1.6.0](https://github.com/ckan/ckanext-dcat/compare/v1.5.1...v1.6.0) - 2024-02-29
 

diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@ This extension provides plugins that allow CKAN to expose and consume metadata f
 
 It also offers other features related to Semantic Data like exposing the necessary markup to get your datasets indexed in [Google Dataset Search](https://toolbox.google.com/datasetsearch).
 
->**Warning**
+> [!IMPORTANT]
 > * Custom extension designed for use with: [`mjanez/ckanext-schemingdcat`](https://github.com/mjanez/schemingdcat)
 >*  Contains [custom profiles](#custom-profiles) as `euro_dcat_ap_2`, `spain_dcat` or `spain_dcat_ap` to be used with [Spanish context for some codelists and metadata properties (GeoDCAT-AP ES)](https://github.com/mjanez/ckanext-schemingdcat#geodcat-ap-es) or [GeoDCAT-AP EU version](https://github.com/mjanez/ckanext-schemingdcat#geodcat-ap-eu).  All schema information is available in the [README](https://github.com/mjanez/ckanext-schemingdcat#schemas))
 
@@ -182,10 +182,9 @@ The number of datasets returned is limited. The response will include paging inf
     @prefix hydra: <http://www.w3.org/ns/hydra/core#> .
 
     <http://example.com/catalog.ttl?page=1> a hydra:PagedCollection ;
-        hydra:firstPage "http://example.com/catalog.ttl?page=1" ;
-        hydra:itemsPerPage 100 ;
-        hydra:lastPage "http://example.com/catalog.ttl?page=3" ;
-        hydra:nextPage "http://example.com/catalog.ttl?page=2" ;
+        hydra:first "http://example.com/catalog.ttl?page=1" ;
+        hydra:last "http://example.com/catalog.ttl?page=3" ;
+        hydra:next "http://example.com/catalog.ttl?page=2" ;
         hydra:totalItems 283 .
 
 The default number of datasets returned (100) can be modified by CKAN site maintainers using the following configuration option on your ini file:

diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py
@@ -116,11 +116,15 @@ def next_page(self):
         Returns the URL of the next page or None if there is no next page
         '''
         for pagination_node in self.g.subjects(RDF.type, HYDRA.PagedCollection):
+            # Try to find HYDRA.next first
+            for o in self.g.objects(pagination_node, HYDRA.next):
+                return str(o)
+
+            # If HYDRA.next is not found, try HYDRA.nextPage (deprecated)
             for o in self.g.objects(pagination_node, HYDRA.nextPage):
                 return str(o)
         return None
 
-
     def parse(self, data, _format=None):
         '''
         Parses and RDF graph serialization and into the class graph
@@ -178,7 +182,6 @@ def datasets(self):
 
             yield dataset_dict
 
-
 class RDFSerializer(RDFProcessor):
     '''
     A CKAN to RDF serializer based on rdflib
@@ -209,19 +212,23 @@ def _add_pagination_triples(self, paging_info):
             pagination_ref = BNode()
         self.g.add((pagination_ref, RDF.type, HYDRA.PagedCollection))
 
+        #  The predicates `nextPage`, `previousPage`, `firstPage`, `lastPage`
+        #  and `itemsPerPage` are deprecated and will be removed in the future
         items = [
-            ('next', HYDRA.nextPage),
-            ('previous', HYDRA.previousPage),
-            ('first', HYDRA.firstPage),
-            ('last', HYDRA.lastPage),
-            ('count', HYDRA.totalItems),
-            ('items_per_page', HYDRA.itemsPerPage),
+            ('next', [HYDRA.nextPage, HYDRA.next]),
+            ('previous', [HYDRA.previousPage, HYDRA.previous]),
+            ('first', [HYDRA.firstPage, HYDRA.first]),
+            ('last', [HYDRA.lastPage, HYDRA.last]),
+            ('count', [HYDRA.totalItems]),
+            ('items_per_page', [HYDRA.itemsPerPage]),
         ]
+
         for item in items:
-            key, predicate = item
+            key, predicates = item
             if paging_info.get(key):
-                self.g.add((pagination_ref, predicate,
-                            Literal(paging_info[key])))
+                for predicate in predicates:
+                    self.g.add((pagination_ref, predicate,
+                                Literal(paging_info[key])))
 
         return pagination_ref
 

diff --git a/ckanext/dcat/tests/test_base_parser.py b/ckanext/dcat/tests/test_base_parser.py
@@ -140,7 +140,7 @@ def test_parse_data(self):
 
         assert len(p.g) == 2
 
-    def test_parse_pagination_next_page(self):
+    def test_parse_pagination_next_page_deprecated_vocabulary_only(self):
 
         data = '''<?xml version="1.0" encoding="utf-8" ?>
         <rdf:RDF
@@ -163,6 +163,50 @@ def test_parse_pagination_next_page(self):
 
         assert p.next_page() == 'http://example.com/catalog.xml?page=2'
 
+    def test_parse_pagination_next_page_updated_vocabulary_only(self):
+
+        data = '''<?xml version="1.0" encoding="utf-8" ?>
+        <rdf:RDF
+         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+         xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+         xmlns:hydra="http://www.w3.org/ns/hydra/core#">
+         <hydra:PagedCollection rdf:about="http://example.com/catalog.xml?page=1">
+            <hydra:totalItems rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">245</hydra:totalItems>
+            <hydra:last>http://example.com/catalog.xml?page=3</hydra:last>
+            <hydra:next>http://example.com/catalog.xml?page=2</hydra:next>
+            <hydra:first>http://example.com/catalog.xml?page=1</hydra:first>
+        </hydra:PagedCollection>
+        </rdf:RDF>
+        '''
+
+        p = RDFParser()
+
+        p.parse(data)
+
+        assert p.next_page() == 'http://example.com/catalog.xml?page=2'
+
+    def test_parse_pagination_next_page_both_vocabularies(self):
+
+        data = '''<?xml version="1.0" encoding="utf-8" ?>
+        <rdf:RDF
+         xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+         xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+         xmlns:hydra="http://www.w3.org/ns/hydra/core#">
+         <hydra:PagedCollection rdf:about="http://example.com/catalog.xml?page=1">
+            <hydra:last>http://example.com/catalog.xml?page=3</hydra:last>
+            <hydra:next>http://example.com/catalog.xml?page=next</hydra:next>
+            <hydra:nextPage>http://example.com/catalog.xml?page=nextPage</hydra:nextPage>
+            <hydra:first>http://example.com/catalog.xml?page=1</hydra:first>
+        </hydra:PagedCollection>
+        </rdf:RDF>
+        '''
+
+        p = RDFParser()
+
+        p.parse(data)
+
+        assert p.next_page() == 'http://example.com/catalog.xml?page=next'
+
     def test_parse_without_pagination(self):
 
         data = '''<?xml version="1.0" encoding="utf-8" ?>

diff --git a/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py b/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py
@@ -15,7 +15,7 @@
 from ckantoolkit.tests import helpers, factories
 
 from ckanext.dcat import utils
-from ckanext.dcat.processors import RDFSerializer
+from ckanext.dcat.processors import RDFSerializer, HYDRA
 from ckanext.dcat.profiles import (DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA,
                                    SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT, 
                                    DISTRIBUTION_LICENSE_FALLBACK_CONFIG)
@@ -1250,6 +1250,77 @@ def test_subcatalog(self):
         assert len(dataset_title) == 1
         assert str(dataset_title[0]) == dataset['title']
 
+    def test_catalog_pagination(self):
+        dataset = {
+            'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
+            'name': 'test-dataset',
+            'title': 'test dataset',
+            'extras': [
+                {'key': 'source_catalog_title', 'value': 'Subcatalog example'},
+                {'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
+                {'key': 'source_catalog_description', 'value': 'Subcatalog example description'}
+            ]
+        }
+        catalog_dict = {
+            'title': 'My Catalog',
+            'description': 'An Open Data Catalog',
+            'homepage': 'http://example.com',
+            'language': 'de',
+        }
+
+        expected_first = 'http://subcatalog.example?page=1'
+        expected_next = 'http://subcatalog.example?page=2'
+        expected_last = 'http://subcatalog.example?page=3'
+
+        pagination = {
+            'count': 12,
+            'items_per_page': 5,
+            'current':expected_first,
+            'first':expected_first,
+            'last':expected_last,
+            'next':expected_next,
+        }
+
+        s = RDFSerializer(profiles=['euro_dcat_ap'])
+        g = s.g
+
+        s.serialize_catalog(catalog_dict, dataset_dicts=[dataset], pagination_info=pagination)
+
+        paged_collection = list(g.subjects(RDF.type, HYDRA.PagedCollection))
+        assert len(paged_collection) == 1
+
+        # Pagination item: next
+        next = list(g.objects(paged_collection[0], HYDRA.next))
+        assert len(next) == 1
+        assert str(next[0]) == expected_next
+        next_page = list(g.objects(paged_collection[0], HYDRA.nextPage))
+        assert len(next_page) == 1
+        assert str(next_page[0]) == expected_next
+
+        # Pagination item: previous
+        previous_page = list(g.objects(paged_collection[0], HYDRA.previousPage))
+        assert len(previous_page) == 0
+        previous = list(g.objects(paged_collection[0], HYDRA.previous))
+        assert len(previous) == 0
+
+        # Pagination item: last
+        last = list(g.objects(paged_collection[0], HYDRA.last))
+        assert len(last) == 1
+        assert str(last[0]) == expected_last
+        last_page = list(g.objects(paged_collection[0], HYDRA.lastPage))
+        assert len(last_page) == 1
+        assert str(last_page[0]) == expected_last
+
+        # Pagination item: count
+        total_items = list(g.objects(paged_collection[0], HYDRA.totalItems))
+        assert len(total_items) == 1
+        assert str(total_items[0]) == "12"
+
+        # Pagination item: items_per_page
+        items_per_page = list(g.objects(paged_collection[0], HYDRA.itemsPerPage))
+        assert len(items_per_page) == 1
+        assert str(items_per_page[0]) == "5"
+
     @pytest.mark.ckan_config(DISTRIBUTION_LICENSE_FALLBACK_CONFIG, 'true')
     def test_set_missing_license_for_resource(self):
         ''' Check the behavior if param in config is set: Add license_id to the resource'''

diff --git a/setup.py b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup, find_packages
 
-version = '1.6.0'
+version = '1.7.0'
 
 setup(
     name='ckanext-dcat',