Skip to content

Commit

Permalink
Merge pull request #18 from mjanez/feature/hydra-vocab
Browse files Browse the repository at this point in the history
Feature/hydra vocab
  • Loading branch information
mjanez authored Apr 5, 2024
2 parents 138c341 + 20a9f73 commit 87b640a
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 20 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# Changelog

## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v1.6.0...HEAD)
## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v1.7.0...HEAD)

## [v1.7.0](https://github.com/ckan/ckanext-dcat/compare/v1.6.0...v1.7.0) - 2024-04-04

* Adds support for the latest Hydra vocabulary. For backward compatibility, the old properties are still supported but marked as deprecated. (#267)

## [v1.6.0](https://github.com/ckan/ckanext-dcat/compare/v1.5.1...v1.6.0) - 2024-02-29

Expand Down
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ This extension provides plugins that allow CKAN to expose and consume metadata f

It also offers other features related to Semantic Data like exposing the necessary markup to get your datasets indexed in [Google Dataset Search](https://toolbox.google.com/datasetsearch).

>**Warning**
> [!IMPORTANT]
> * Custom extension designed for use with: [`mjanez/ckanext-schemingdcat`](https://github.com/mjanez/schemingdcat)
>* Contains [custom profiles](#custom-profiles) as `euro_dcat_ap_2`, `spain_dcat` or `spain_dcat_ap` to be used with [Spanish context for some codelists and metadata properties (GeoDCAT-AP ES)](https://github.com/mjanez/ckanext-schemingdcat#geodcat-ap-es) or [GeoDCAT-AP EU version](https://github.com/mjanez/ckanext-schemingdcat#geodcat-ap-eu). All schema information is available in the [README](https://github.com/mjanez/ckanext-schemingdcat#schemas))
Expand Down Expand Up @@ -182,10 +182,9 @@ The number of datasets returned is limited. The response will include paging inf
@prefix hydra: <http://www.w3.org/ns/hydra/core#> .

<http://example.com/catalog.ttl?page=1> a hydra:PagedCollection ;
hydra:firstPage "http://example.com/catalog.ttl?page=1" ;
hydra:itemsPerPage 100 ;
hydra:lastPage "http://example.com/catalog.ttl?page=3" ;
hydra:nextPage "http://example.com/catalog.ttl?page=2" ;
hydra:first "http://example.com/catalog.ttl?page=1" ;
hydra:last "http://example.com/catalog.ttl?page=3" ;
hydra:next "http://example.com/catalog.ttl?page=2" ;
hydra:totalItems 283 .

The default number of datasets returned (100) can be modified by CKAN site maintainers using the following configuration option on your ini file:
Expand Down
29 changes: 18 additions & 11 deletions ckanext/dcat/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,15 @@ def next_page(self):
Returns the URL of the next page or None if there is no next page
'''
for pagination_node in self.g.subjects(RDF.type, HYDRA.PagedCollection):
# Try to find HYDRA.next first
for o in self.g.objects(pagination_node, HYDRA.next):
return str(o)

# If HYDRA.next is not found, try HYDRA.nextPage (deprecated)
for o in self.g.objects(pagination_node, HYDRA.nextPage):
return str(o)
return None


def parse(self, data, _format=None):
'''
Parses and RDF graph serialization and into the class graph
Expand Down Expand Up @@ -178,7 +182,6 @@ def datasets(self):

yield dataset_dict


class RDFSerializer(RDFProcessor):
'''
A CKAN to RDF serializer based on rdflib
Expand Down Expand Up @@ -209,19 +212,23 @@ def _add_pagination_triples(self, paging_info):
pagination_ref = BNode()
self.g.add((pagination_ref, RDF.type, HYDRA.PagedCollection))

# The predicates `nextPage`, `previousPage`, `firstPage`, `lastPage`
# and `itemsPerPage` are deprecated and will be removed in the future
items = [
('next', HYDRA.nextPage),
('previous', HYDRA.previousPage),
('first', HYDRA.firstPage),
('last', HYDRA.lastPage),
('count', HYDRA.totalItems),
('items_per_page', HYDRA.itemsPerPage),
('next', [HYDRA.nextPage, HYDRA.next]),
('previous', [HYDRA.previousPage, HYDRA.previous]),
('first', [HYDRA.firstPage, HYDRA.first]),
('last', [HYDRA.lastPage, HYDRA.last]),
('count', [HYDRA.totalItems]),
('items_per_page', [HYDRA.itemsPerPage]),
]

for item in items:
key, predicate = item
key, predicates = item
if paging_info.get(key):
self.g.add((pagination_ref, predicate,
Literal(paging_info[key])))
for predicate in predicates:
self.g.add((pagination_ref, predicate,
Literal(paging_info[key])))

return pagination_ref

Expand Down
46 changes: 45 additions & 1 deletion ckanext/dcat/tests/test_base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_parse_data(self):

assert len(p.g) == 2

def test_parse_pagination_next_page(self):
def test_parse_pagination_next_page_deprecated_vocabulary_only(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
Expand All @@ -163,6 +163,50 @@ def test_parse_pagination_next_page(self):

assert p.next_page() == 'http://example.com/catalog.xml?page=2'

def test_parse_pagination_next_page_updated_vocabulary_only(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:hydra="http://www.w3.org/ns/hydra/core#">
<hydra:PagedCollection rdf:about="http://example.com/catalog.xml?page=1">
<hydra:totalItems rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">245</hydra:totalItems>
<hydra:last>http://example.com/catalog.xml?page=3</hydra:last>
<hydra:next>http://example.com/catalog.xml?page=2</hydra:next>
<hydra:first>http://example.com/catalog.xml?page=1</hydra:first>
</hydra:PagedCollection>
</rdf:RDF>
'''

p = RDFParser()

p.parse(data)

assert p.next_page() == 'http://example.com/catalog.xml?page=2'

def test_parse_pagination_next_page_both_vocabularies(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:hydra="http://www.w3.org/ns/hydra/core#">
<hydra:PagedCollection rdf:about="http://example.com/catalog.xml?page=1">
<hydra:last>http://example.com/catalog.xml?page=3</hydra:last>
<hydra:next>http://example.com/catalog.xml?page=next</hydra:next>
<hydra:nextPage>http://example.com/catalog.xml?page=nextPage</hydra:nextPage>
<hydra:first>http://example.com/catalog.xml?page=1</hydra:first>
</hydra:PagedCollection>
</rdf:RDF>
'''

p = RDFParser()

p.parse(data)

assert p.next_page() == 'http://example.com/catalog.xml?page=next'

def test_parse_without_pagination(self):

data = '''<?xml version="1.0" encoding="utf-8" ?>
Expand Down
73 changes: 72 additions & 1 deletion ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ckantoolkit.tests import helpers, factories

from ckanext.dcat import utils
from ckanext.dcat.processors import RDFSerializer
from ckanext.dcat.processors import RDFSerializer, HYDRA
from ckanext.dcat.profiles import (DCAT, DCT, ADMS, XSD, VCARD, FOAF, SCHEMA,
SKOS, LOCN, GSP, OWL, SPDX, GEOJSON_IMT,
DISTRIBUTION_LICENSE_FALLBACK_CONFIG)
Expand Down Expand Up @@ -1250,6 +1250,77 @@ def test_subcatalog(self):
assert len(dataset_title) == 1
assert str(dataset_title[0]) == dataset['title']

def test_catalog_pagination(self):
dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
'title': 'test dataset',
'extras': [
{'key': 'source_catalog_title', 'value': 'Subcatalog example'},
{'key': 'source_catalog_homepage', 'value': 'http://subcatalog.example'},
{'key': 'source_catalog_description', 'value': 'Subcatalog example description'}
]
}
catalog_dict = {
'title': 'My Catalog',
'description': 'An Open Data Catalog',
'homepage': 'http://example.com',
'language': 'de',
}

expected_first = 'http://subcatalog.example?page=1'
expected_next = 'http://subcatalog.example?page=2'
expected_last = 'http://subcatalog.example?page=3'

pagination = {
'count': 12,
'items_per_page': 5,
'current':expected_first,
'first':expected_first,
'last':expected_last,
'next':expected_next,
}

s = RDFSerializer(profiles=['euro_dcat_ap'])
g = s.g

s.serialize_catalog(catalog_dict, dataset_dicts=[dataset], pagination_info=pagination)

paged_collection = list(g.subjects(RDF.type, HYDRA.PagedCollection))
assert len(paged_collection) == 1

# Pagination item: next
next = list(g.objects(paged_collection[0], HYDRA.next))
assert len(next) == 1
assert str(next[0]) == expected_next
next_page = list(g.objects(paged_collection[0], HYDRA.nextPage))
assert len(next_page) == 1
assert str(next_page[0]) == expected_next

# Pagination item: previous
previous_page = list(g.objects(paged_collection[0], HYDRA.previousPage))
assert len(previous_page) == 0
previous = list(g.objects(paged_collection[0], HYDRA.previous))
assert len(previous) == 0

# Pagination item: last
last = list(g.objects(paged_collection[0], HYDRA.last))
assert len(last) == 1
assert str(last[0]) == expected_last
last_page = list(g.objects(paged_collection[0], HYDRA.lastPage))
assert len(last_page) == 1
assert str(last_page[0]) == expected_last

# Pagination item: count
total_items = list(g.objects(paged_collection[0], HYDRA.totalItems))
assert len(total_items) == 1
assert str(total_items[0]) == "12"

# Pagination item: items_per_page
items_per_page = list(g.objects(paged_collection[0], HYDRA.itemsPerPage))
assert len(items_per_page) == 1
assert str(items_per_page[0]) == "5"

@pytest.mark.ckan_config(DISTRIBUTION_LICENSE_FALLBACK_CONFIG, 'true')
def test_set_missing_license_for_resource(self):
''' Check the behavior if param in config is set: Add license_id to the resource'''
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import setup, find_packages

version = '1.6.0'
version = '1.7.0'

setup(
name='ckanext-dcat',
Expand Down

0 comments on commit 87b640a

Please sign in to comment.