From ccb5ab5ac33665e9a5d4810ec0a22ec5b6be2191 Mon Sep 17 00:00:00 2001 From: seitenbau-govdata Date: Fri, 19 Jan 2024 15:21:49 +0100 Subject: [PATCH 1/2] Add high value datasets to EuropeanDCATAP2Profile --- CHANGELOG.md | 1 + ckanext/dcat/profiles.py | 20 ++++- .../tests/test_euro_dcatap_2_profile_parse.py | 83 ++++++++++++++++++- .../test_euro_dcatap_2_profile_serialize.py | 43 ++++++++++ 4 files changed, 144 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 995d61dc..98287ce1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v1.5.1...HEAD) +* Add support for `DCATAP.applicableLegislation` and `DCATAP.hvdCategory` to the `euro_dcat_ap_2` profile * Improve access service tests (#258) * Fix missing access service items when parsing dataset (#256) diff --git a/ckanext/dcat/profiles.py b/ckanext/dcat/profiles.py index 1f5632e4..fb9e9640 100644 --- a/ckanext/dcat/profiles.py +++ b/ckanext/dcat/profiles.py @@ -1503,6 +1503,8 @@ def parse_dataset(self, dataset_dict, dataset_ref): for key, predicate in ( ('temporal_resolution', DCAT.temporalResolution), ('is_referenced_by', DCT.isReferencedBy), + ('applicableLegislation', DCATAP.applicableLegislation), + ('hvdCategory', DCATAP.hvdCategory), ): values = self._object_value_list(dataset_ref, predicate) if values: @@ -1543,6 +1545,14 @@ def parse_dataset(self, dataset_dict, dataset_ref): if value: resource_dict[key] = value + # Lists + for key, predicate in ( + ('applicableLegislation', DCATAP.applicableLegislation), + ): + values = self._object_value_list(distribution, predicate) + if values: + resource_dict[key] = json.dumps(values) + # Access services access_service_list = [] @@ -1594,7 +1604,9 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # Lists for key, predicate, fallbacks, type, datatype in ( ('temporal_resolution', DCAT.temporalResolution, None, Literal, XSD.duration), - ('is_referenced_by', DCT.isReferencedBy, None, URIRefOrLiteral, None) + ('is_referenced_by', DCT.isReferencedBy, None, URIRefOrLiteral, None), + ('applicableLegislation', DCATAP.applicableLegislation, None, URIRefOrLiteral, None), + ('hvdCategory', DCATAP.hvdCategory, None, URIRefOrLiteral, None), ): self._add_triple_from_dict(dataset_dict, dataset_ref, predicate, key, list_value=True, fallbacks=fallbacks, _type=type, _datatype=datatype) @@ -1650,6 +1662,12 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): self._add_triples_from_dict(resource_dict, distribution, items) + # Lists + items = [ + ('applicableLegislation', DCATAP.applicableLegislation, None, URIRefOrLiteral), + ] + self._add_list_triples_from_dict(resource_dict, distribution, items) + try: access_service_list = json.loads(resource_dict.get('access_services', '[]')) # Access service diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py index 52f6a54c..965c03ba 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py +++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py @@ -37,6 +37,8 @@ def test_dataset_all_fields(self): dist_availability = "http://publications.europa.eu/resource/authority/planned-availability/AVAILABLE" compress_format = "http://www.iana.org/assignments/media-types/application/gzip" package_format = 'http://publications.europa.eu/resource/authority/file-type/TAR' + applicable_legislation = 'http://data.europa.eu/eli/reg_impl/2023/138/oj' + hvd_category = 'http://data.europa.eu/bna/c_164e0bf5' data = ''' {end} + + {temp_res} {spatial_res} @@ -67,6 +71,7 @@ def test_dataset_all_fields(self): + @@ -85,7 +90,8 @@ def test_dataset_all_fields(self): '''.format(start=temporal_start, end=temporal_end, temp_res=temporal_resolution, spatial_res=spatial_resolution_in_meters, referenced_by=isreferencedby_uri, availability=dist_availability, compressFormat=compress_format, - packageFormat=package_format) + packageFormat=package_format, applicable_legislation=applicable_legislation, + hvd_category=hvd_category) p = RDFParser(profiles=DCAT_AP_PROFILES) @@ -115,16 +121,28 @@ def test_dataset_all_fields(self): assert extras['temporal_start'] == temporal_start assert extras['temporal_end'] == temporal_end + applicable_legislation_list = json.loads(extras['applicableLegislation']) + assert len(applicable_legislation_list) == 1 + assert applicable_legislation in applicable_legislation_list + + hvd_category_list = json.loads(extras['hvdCategory']) + assert len(hvd_category_list) == 1 + assert hvd_category in hvd_category_list + # Resources assert len(dataset['resources']) == 1 resource = dataset['resources'][0] - # Simple values + # Simple values assert resource['availability'] == dist_availability assert resource['compress_format'] == compress_format assert resource['package_format'] == package_format + # List values + dist_applicable_legislation_list = json.loads(resource.get('applicableLegislation')) + assert dist_applicable_legislation_list == applicable_legislation_list + # Access services access_service_list = json.loads(resource.get('access_services')) assert len(access_service_list) == 1 @@ -379,6 +397,67 @@ def test_isreferencedby_multiple(self): assert isreferencedby_uri in isreferencedby_list assert isreferencedby_uri_2 in isreferencedby_list + def test_high_value_datasets(self): + applicable_legislation = 'http://data.europa.eu/eli/reg_impl/2023/138/oj' + applicable_legislation_alt = 'http://data.europa.eu/eli/reg_impl/2023/138/oj_alt' + hvd_category = 'http://data.europa.eu/bna/c_164e0bf5' + hvd_category_alt = 'http://data.europa.eu/bna/c_ac64a52d' + + data = ''' + + + + {applicable_legislation_alt} + + {hvd_category_alt} + + + + {applicable_legislation_alt} + + + + + '''.format(applicable_legislation=applicable_legislation, applicable_legislation_alt=applicable_legislation_alt, + hvd_category=hvd_category, hvd_category_alt=hvd_category_alt) + + p = RDFParser(profiles=DCAT_AP_PROFILES) + + p.parse(data) + + datasets = [d for d in p.datasets()] + assert len(datasets) == 1 + + dataset = datasets[0] + + # Dataset + extras = self._extras(dataset) + + applicable_legislation_list = json.loads(extras['applicableLegislation']) + assert len(applicable_legislation_list) == 2 + assert applicable_legislation in applicable_legislation_list + assert applicable_legislation_alt in applicable_legislation_list + + hvd_category_list = json.loads(extras['hvdCategory']) + assert len(hvd_category_list) == 2 + assert hvd_category in hvd_category_list + assert hvd_category_alt in hvd_category_list + + # Resources + assert len(dataset['resources']) == 1 + + resource = dataset['resources'][0] + + dist_applicable_legislation_list = [applicable_legislation, applicable_legislation_alt] + assert dist_applicable_legislation_list == applicable_legislation_list + def test_parse_distribution_access_service(self): expected_access_services = [{ diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py index 4d016e23..c91f59ee 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py +++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py @@ -307,6 +307,49 @@ def test_temporal(self): triples.extend(self._triples(g, temporal_obj, predicate, parse_date(extras['temporal_end']).isoformat(), XSD.dateTime)) assert len(triples) == 1 + def test_high_value_datasets(self): + """ + Tests that the HVD information properties are included in the graph. + """ + + resource = { + 'id': 'c041c635-054f-4431-b647-f9186926d021', + 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'Distribution name', + 'applicableLegislation': json.dumps(['http://data.europa.eu/eli/reg_impl/2023/138/oj', 'http://data.europa.eu/eli/reg_impl/2023/138/oj_alt']), + } + + dataset = { + 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'test-dataset', + 'extras': [ + {'key': 'applicableLegislation', 'value': '[\"http://data.europa.eu/eli/reg_impl/2023/138/oj\", \"http://data.europa.eu/eli/reg_impl/2023/138/oj_alt\"]'}, + {'key': 'hvdCategory', 'value': '[\"http://data.europa.eu/bna/c_164e0bf5\", \"http://data.europa.eu/bna/c_ac64a52d\"]'}, + ], + 'resources': [ + resource + ] + } + extras = self._extras(dataset) + + s = RDFSerializer(profiles=DCAT_AP_PROFILES) + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + + values = json.loads(extras['applicableLegislation']) + assert len([t for t in g.triples((dataset_ref, DCATAP.applicableLegislation, None))]) == len(values) + assert self._triple(g, dataset_ref, DCATAP.applicableLegislation, URIRef(values[0])) + + values = json.loads(extras['hvdCategory']) + assert len([t for t in g.triples((dataset_ref, DCATAP.hvdCategory, None))]) == len(values) + assert self._triple(g, dataset_ref, DCATAP.hvdCategory, URIRef(values[0])) + + distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] + self._assert_values_list(g, distribution, DCATAP.applicableLegislation, + self._get_typed_list(json.loads(resource['applicableLegislation']), URIRef)) + + def test_distribution_fields(self): resource = { From cfc4acea81d973c36021a3f44dfbfda4cad5c03b Mon Sep 17 00:00:00 2001 From: seitenbau-govdata Date: Mon, 29 Jan 2024 09:35:41 +0100 Subject: [PATCH 2/2] Standardize field names --- ckanext/dcat/profiles.py | 12 ++++++------ .../dcat/tests/test_euro_dcatap_2_profile_parse.py | 10 +++++----- .../tests/test_euro_dcatap_2_profile_serialize.py | 12 ++++++------ 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/ckanext/dcat/profiles.py b/ckanext/dcat/profiles.py index fb9e9640..450166b0 100644 --- a/ckanext/dcat/profiles.py +++ b/ckanext/dcat/profiles.py @@ -1503,8 +1503,8 @@ def parse_dataset(self, dataset_dict, dataset_ref): for key, predicate in ( ('temporal_resolution', DCAT.temporalResolution), ('is_referenced_by', DCT.isReferencedBy), - ('applicableLegislation', DCATAP.applicableLegislation), - ('hvdCategory', DCATAP.hvdCategory), + ('applicable_legislation', DCATAP.applicableLegislation), + ('hvd_category', DCATAP.hvdCategory), ): values = self._object_value_list(dataset_ref, predicate) if values: @@ -1547,7 +1547,7 @@ def parse_dataset(self, dataset_dict, dataset_ref): # Lists for key, predicate in ( - ('applicableLegislation', DCATAP.applicableLegislation), + ('applicable_legislation', DCATAP.applicableLegislation), ): values = self._object_value_list(distribution, predicate) if values: @@ -1605,8 +1605,8 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): for key, predicate, fallbacks, type, datatype in ( ('temporal_resolution', DCAT.temporalResolution, None, Literal, XSD.duration), ('is_referenced_by', DCT.isReferencedBy, None, URIRefOrLiteral, None), - ('applicableLegislation', DCATAP.applicableLegislation, None, URIRefOrLiteral, None), - ('hvdCategory', DCATAP.hvdCategory, None, URIRefOrLiteral, None), + ('applicable_legislation', DCATAP.applicableLegislation, None, URIRefOrLiteral, None), + ('hvd_category', DCATAP.hvdCategory, None, URIRefOrLiteral, None), ): self._add_triple_from_dict(dataset_dict, dataset_ref, predicate, key, list_value=True, fallbacks=fallbacks, _type=type, _datatype=datatype) @@ -1664,7 +1664,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # Lists items = [ - ('applicableLegislation', DCATAP.applicableLegislation, None, URIRefOrLiteral), + ('applicable_legislation', DCATAP.applicableLegislation, None, URIRefOrLiteral), ] self._add_list_triples_from_dict(resource_dict, distribution, items) diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py index 965c03ba..eb964824 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py +++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_parse.py @@ -121,11 +121,11 @@ def test_dataset_all_fields(self): assert extras['temporal_start'] == temporal_start assert extras['temporal_end'] == temporal_end - applicable_legislation_list = json.loads(extras['applicableLegislation']) + applicable_legislation_list = json.loads(extras['applicable_legislation']) assert len(applicable_legislation_list) == 1 assert applicable_legislation in applicable_legislation_list - hvd_category_list = json.loads(extras['hvdCategory']) + hvd_category_list = json.loads(extras['hvd_category']) assert len(hvd_category_list) == 1 assert hvd_category in hvd_category_list @@ -140,7 +140,7 @@ def test_dataset_all_fields(self): assert resource['package_format'] == package_format # List values - dist_applicable_legislation_list = json.loads(resource.get('applicableLegislation')) + dist_applicable_legislation_list = json.loads(resource.get('applicable_legislation')) assert dist_applicable_legislation_list == applicable_legislation_list # Access services @@ -440,12 +440,12 @@ def test_high_value_datasets(self): # Dataset extras = self._extras(dataset) - applicable_legislation_list = json.loads(extras['applicableLegislation']) + applicable_legislation_list = json.loads(extras['applicable_legislation']) assert len(applicable_legislation_list) == 2 assert applicable_legislation in applicable_legislation_list assert applicable_legislation_alt in applicable_legislation_list - hvd_category_list = json.loads(extras['hvdCategory']) + hvd_category_list = json.loads(extras['hvd_category']) assert len(hvd_category_list) == 2 assert hvd_category in hvd_category_list assert hvd_category_alt in hvd_category_list diff --git a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py index c91f59ee..114dc602 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py +++ b/ckanext/dcat/tests/test_euro_dcatap_2_profile_serialize.py @@ -316,15 +316,15 @@ def test_high_value_datasets(self): 'id': 'c041c635-054f-4431-b647-f9186926d021', 'package_id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'Distribution name', - 'applicableLegislation': json.dumps(['http://data.europa.eu/eli/reg_impl/2023/138/oj', 'http://data.europa.eu/eli/reg_impl/2023/138/oj_alt']), + 'applicable_legislation': json.dumps(['http://data.europa.eu/eli/reg_impl/2023/138/oj', 'http://data.europa.eu/eli/reg_impl/2023/138/oj_alt']), } dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', 'extras': [ - {'key': 'applicableLegislation', 'value': '[\"http://data.europa.eu/eli/reg_impl/2023/138/oj\", \"http://data.europa.eu/eli/reg_impl/2023/138/oj_alt\"]'}, - {'key': 'hvdCategory', 'value': '[\"http://data.europa.eu/bna/c_164e0bf5\", \"http://data.europa.eu/bna/c_ac64a52d\"]'}, + {'key': 'applicable_legislation', 'value': '[\"http://data.europa.eu/eli/reg_impl/2023/138/oj\", \"http://data.europa.eu/eli/reg_impl/2023/138/oj_alt\"]'}, + {'key': 'hvd_category', 'value': '[\"http://data.europa.eu/bna/c_164e0bf5\", \"http://data.europa.eu/bna/c_ac64a52d\"]'}, ], 'resources': [ resource @@ -337,17 +337,17 @@ def test_high_value_datasets(self): dataset_ref = s.graph_from_dataset(dataset) - values = json.loads(extras['applicableLegislation']) + values = json.loads(extras['applicable_legislation']) assert len([t for t in g.triples((dataset_ref, DCATAP.applicableLegislation, None))]) == len(values) assert self._triple(g, dataset_ref, DCATAP.applicableLegislation, URIRef(values[0])) - values = json.loads(extras['hvdCategory']) + values = json.loads(extras['hvd_category']) assert len([t for t in g.triples((dataset_ref, DCATAP.hvdCategory, None))]) == len(values) assert self._triple(g, dataset_ref, DCATAP.hvdCategory, URIRef(values[0])) distribution = self._triple(g, dataset_ref, DCAT.distribution, None)[2] self._assert_values_list(g, distribution, DCATAP.applicableLegislation, - self._get_typed_list(json.loads(resource['applicableLegislation']), URIRef)) + self._get_typed_list(json.loads(resource['applicable_legislation']), URIRef)) def test_distribution_fields(self):