Skip to content

Commit

Permalink
Add high value datasets to EuropeanDCATAP2Profile
Browse files Browse the repository at this point in the history
  • Loading branch information
seitenbau-govdata committed Jan 19, 2024
1 parent 33498df commit 1701094
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v1.5.1...HEAD)

* Add support for `DCATAP.applicableLegislation` and `DCATAP.hvdCategory` to the `euro_dcat_ap_2` profile
* Improve access service tests (#258)
* Fix missing access service items when parsing dataset (#256)

Expand Down
20 changes: 19 additions & 1 deletion ckanext/dcat/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -1503,6 +1503,8 @@ def parse_dataset(self, dataset_dict, dataset_ref):
for key, predicate in (
('temporal_resolution', DCAT.temporalResolution),
('is_referenced_by', DCT.isReferencedBy),
('applicableLegislation', DCATAP.applicableLegislation),
('hvdCategory', DCATAP.hvdCategory),
):
values = self._object_value_list(dataset_ref, predicate)
if values:
Expand Down Expand Up @@ -1543,6 +1545,14 @@ def parse_dataset(self, dataset_dict, dataset_ref):
if value:
resource_dict[key] = value

# Lists
for key, predicate in (
('applicableLegislation', DCATAP.applicableLegislation),
):
values = self._object_value_list(distribution, predicate)
if values:
resource_dict[key] = json.dumps(values)

# Access services
access_service_list = []

Expand Down Expand Up @@ -1594,7 +1604,9 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
# Lists
for key, predicate, fallbacks, type, datatype in (
('temporal_resolution', DCAT.temporalResolution, None, Literal, XSD.duration),
('is_referenced_by', DCT.isReferencedBy, None, URIRefOrLiteral, None)
('is_referenced_by', DCT.isReferencedBy, None, URIRefOrLiteral, None),
('applicableLegislation', DCATAP.applicableLegislation, None, URIRefOrLiteral, None),
('hvdCategory', DCATAP.hvdCategory, None, URIRefOrLiteral, None),
):
self._add_triple_from_dict(dataset_dict, dataset_ref, predicate, key, list_value=True,
fallbacks=fallbacks, _type=type, _datatype=datatype)
Expand Down Expand Up @@ -1650,6 +1662,12 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):

self._add_triples_from_dict(resource_dict, distribution, items)

# Lists
items = [
('applicableLegislation', DCATAP.applicableLegislation, None, URIRefOrLiteral),
]
self._add_list_triples_from_dict(resource_dict, distribution, items)

try:
access_service_list = json.loads(resource_dict.get('access_services', '[]'))
# Access service
Expand Down
83 changes: 81 additions & 2 deletions ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ def test_dataset_all_fields(self):
dist_availability = "http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE"
compress_format = "http://www.iana.org/assignments/media-types/application/gzip"
package_format = 'http://publications.europa.eu/resource/authority/file-type/TAR'
applicable_legislation = 'http://data.europa.eu/eli/reg_impl/2023/138/oj'
hvd_category = 'http://data.europa.eu/bna/c_164e0bf5'

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
Expand All @@ -54,6 +56,8 @@ def test_dataset_all_fields(self):
<dcat:endDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">{end}</dcat:endDate>
</dct:PeriodOfTime>
</dct:temporal>
<dcatap:applicableLegislation rdf:resource="{applicable_legislation}"/>
<dcatap:hvdCategory rdf:resource="{hvd_category}"/>
<dcat:temporalResolution rdf:datatype="http://www.w3.org/2001/XMLSchema#duration">{temp_res}</dcat:temporalResolution>
<dcat:spatialResolutionInMeters rdf:datatype="http://www.w3.org/2001/XMLSchema#decimal">{spatial_res}</dcat:spatialResolutionInMeters>
<dct:isReferencedBy rdf:resource="{referenced_by}"/>
Expand All @@ -67,6 +71,7 @@ def test_dataset_all_fields(self):
<dcatap:availability rdf:resource="{availability}"/>
<dcat:compressFormat rdf:resource="{compressFormat}"/>
<dcat:packageFormat rdf:resource="{packageFormat}"/>
<dcatap:applicableLegislation rdf:resource="{applicable_legislation}"/>
<dcat:accessService>
<dcat:DataService>
<dcatap:availability rdf:resource="http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE"/>
Expand All @@ -85,7 +90,8 @@ def test_dataset_all_fields(self):
'''.format(start=temporal_start, end=temporal_end, temp_res=temporal_resolution,
spatial_res=spatial_resolution_in_meters, referenced_by=isreferencedby_uri,
availability=dist_availability, compressFormat=compress_format,
packageFormat=package_format)
packageFormat=package_format, applicable_legislation=applicable_legislation,
hvd_category=hvd_category)

p = RDFParser(profiles=DCAT_AP_PROFILES)

Expand Down Expand Up @@ -115,16 +121,28 @@ def test_dataset_all_fields(self):
assert extras['temporal_start'] == temporal_start
assert extras['temporal_end'] == temporal_end

applicable_legislation_list = json.loads(extras['applicableLegislation'])
assert len(applicable_legislation_list) == 1
assert applicable_legislation in applicable_legislation_list

hvd_category_list = json.loads(extras['hvdCategory'])
assert len(hvd_category_list) == 1
assert hvd_category in hvd_category_list

# Resources
assert len(dataset['resources']) == 1

resource = dataset['resources'][0]

# Simple values
# Simple values
assert resource['availability'] == dist_availability
assert resource['compress_format'] == compress_format
assert resource['package_format'] == package_format

# List values
dist_applicable_legislation_list = json.loads(resource.get('applicableLegislation'))
assert dist_applicable_legislation_list == applicable_legislation_list

# Access services
access_service_list = json.loads(resource.get('access_services'))
assert len(access_service_list) == 1
Expand Down Expand Up @@ -379,6 +397,67 @@ def test_isreferencedby_multiple(self):
assert isreferencedby_uri in isreferencedby_list
assert isreferencedby_uri_2 in isreferencedby_list

def test_high_value_datasets(self):
applicable_legislation = 'http://data.europa.eu/eli/reg_impl/2023/138/oj'
applicable_legislation_alt = 'http://data.europa.eu/eli/reg_impl/2023/138/oj_alt'
hvd_category = 'http://data.europa.eu/bna/c_164e0bf5'
hvd_category_alt = 'http://data.europa.eu/bna/c_ac64a52d'

data = '''<?xml version="1.0" encoding="utf-8" ?>
<rdf:RDF
xmlns:dct="http://purl.org/dc/terms/"
xmlns:dcat="http://www.w3.org/ns/dcat#"
xmlns:dcatap="http://data.europa.eu/r5r/"
xmlns:schema="http://schema.org/"
xmlns:time="http://www.w3.org/2006/time"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<dcat:Dataset rdf:about="http://example.org">
<dcatap:applicableLegislation rdf:resource="{applicable_legislation}"/>
<dcatap:applicableLegislation>{applicable_legislation_alt}</dcatap:applicableLegislation>
<dcatap:hvdCategory rdf:resource="{hvd_category}"/>
<dcatap:hvdCategory>{hvd_category_alt}</dcatap:hvdCategory>
<dcat:distribution>
<dcat:Distribution rdf:about="https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/1">
<dcatap:applicableLegislation rdf:resource="{applicable_legislation}"/>
<dcatap:applicableLegislation>{applicable_legislation_alt}</dcatap:applicableLegislation>
</dcat:Distribution>
</dcat:distribution>
</dcat:Dataset>
</rdf:RDF>
'''.format(applicable_legislation=applicable_legislation, applicable_legislation_alt=applicable_legislation_alt,
hvd_category=hvd_category, hvd_category_alt=hvd_category_alt)

p = RDFParser(profiles=DCAT_AP_PROFILES)

p.parse(data)

datasets = [d for d in p.datasets()]
assert len(datasets) == 1

dataset = datasets[0]

# Dataset
extras = self._extras(dataset)

applicable_legislation_list = json.loads(extras['applicableLegislation'])
assert len(applicable_legislation_list) == 2
assert applicable_legislation in applicable_legislation_list
assert applicable_legislation_alt in applicable_legislation_list

hvd_category_list = json.loads(extras['hvdCategory'])
assert len(hvd_category_list) == 2
assert hvd_category in hvd_category_list
assert hvd_category_alt in hvd_category_list

# Resources
assert len(dataset['resources']) == 1

resource = dataset['resources'][0]

dist_applicable_legislation_list = [applicable_legislation, applicable_legislation_alt]
assert dist_applicable_legislation_list == applicable_legislation_list

def test_parse_distribution_access_service(self):

expected_access_services = [{
Expand Down
43 changes: 43 additions & 0 deletions ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,49 @@ def test_temporal(self):
triples.extend(self._triples(g, temporal_obj, predicate, parse_date(extras['temporal_end']).isoformat(), XSD.dateTime))
assert len(triples) == 1

def test_high_value_datasets(self):
"""
Tests that the HVD information properties are included in the graph.
"""

resource = {
'id': 'c041c635-054f-4431-b647-f9186926d021',
'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'Distribution name',
'applicableLegislation': json.dumps(['http://data.europa.eu/eli/reg_impl/2023/138/oj', 'http://data.europa.eu/eli/reg_impl/2023/138/oj_alt']),
}

dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
'extras': [
{'key': 'applicableLegislation', 'value': '[\"http://data.europa.eu/eli/reg_impl/2023/138/oj\", \"http://data.europa.eu/eli/reg_impl/2023/138/oj_alt\"]'},
{'key': 'hvdCategory', 'value': '[\"http://data.europa.eu/bna/c_164e0bf5\", \"http://data.europa.eu/bna/c_ac64a52d\"]'},
],
'resources': [
resource
]
}
extras = self._extras(dataset)

s = RDFSerializer(profiles=DCAT_AP_PROFILES)
g = s.g

dataset_ref = s.graph_from_dataset(dataset)

values = json.loads(extras['applicableLegislation'])
assert len([t for t in g.triples((dataset_ref, DCATAP.applicableLegislation, None))]) == len(values)
assert self._triple(g, dataset_ref, DCATAP.applicableLegislation, URIRef(values[0]))

values = json.loads(extras['hvdCategory'])
assert len([t for t in g.triples((dataset_ref, DCATAP.hvdCategory, None))]) == len(values)
assert self._triple(g, dataset_ref, DCATAP.hvdCategory, URIRef(values[0]))

distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
self._assert_values_list(g, distribution, DCATAP.applicableLegislation,
self._get_typed_list(json.loads(resource['applicableLegislation']), URIRef))


def test_distribution_fields(self):

resource = {
Expand Down

0 comments on commit 1701094

Please sign in to comment.