diff --git a/ckanext/dcat/plugins/__init__.py b/ckanext/dcat/plugins/__init__.py index 27d02df8..291b7663 100644 --- a/ckanext/dcat/plugins/__init__.py +++ b/ckanext/dcat/plugins/__init__.py @@ -147,14 +147,16 @@ def before_dataset_index(self, dataset_dict): pass if schema: + # TODO: https://github.com/ckan/ckanext-dcat/pull/281#discussion_r1610549936 for field in schema['dataset_fields']: if field['field_name'] in dataset_dict and 'repeating_subfields' in field: for index, item in enumerate(dataset_dict[field['field_name']]): for key in item: - # Index a flattened version - new_key = f'{field["field_name"]}_{index}_{key}' - - dataset_dict[new_key] = dataset_dict[field['field_name']][index][key] + value = dataset_dict[field['field_name']][index][key] + if not isinstance(value, dict): + # Index a flattened version + new_key = f'{field["field_name"]}_{index}_{key}' + dataset_dict[new_key] = value dataset_dict.pop(field['field_name'], None) return dataset_dict diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index c91a1e8e..8299b718 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -702,17 +702,19 @@ def _add_spatial_value_to_graph(self, spatial_ref, predicate, value): self.g.add((spatial_ref, predicate, Literal(value, datatype=GEOJSON_IMT))) # WKT, because GeoDCAT-AP says so try: + if isinstance(value, str): + value = json.loads(value) self.g.add( ( spatial_ref, predicate, Literal( - wkt.dumps(json.loads(value), decimals=4), + wkt.dumps(value, decimals=4), datatype=GSP.wktLiteral, ), ) ) - except (TypeError, ValueError, InvalidGeoJSONException): + except (TypeError, ValueError, InvalidGeoJSONException) as e: pass def _add_spatial_to_dict(self, dataset_dict, key, spatial): diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py index 4353d2a7..6ff50a39 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py @@ -1,6 +1,6 @@ import json -from rdflib import URIRef, BNode +from rdflib import URIRef, BNode, Literal from .base import RDFProfile, CleanedURIRef, URIRefOrLiteral from .base import ( RDF, @@ -74,7 +74,7 @@ def _parse_list_value(data_dict, field_name): field_name = schema_field["field_name"] new_extras = [] new_dict = {} - check_name = new_fields_mappings.get(field_name, field_name) + check_name = new_fields_mapping.get(field_name, field_name) for extra in dataset_dict.get("extras", []): if extra["key"].startswith(f"{check_name}_"): subfield = extra["key"][extra["key"].index("_") + 1 :] @@ -173,6 +173,29 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): self._add_date_triple(temporal_ref, SCHEMA.endDate, item["end"]) self.g.add((dataset_ref, DCT.temporal, temporal_ref)) + spatial = dataset_dict.get("spatial_coverage") + if isinstance(spatial, list) and len(spatial): + for item in spatial: + if item.get("uri"): + spatial_ref = CleanedURIRef(item["uri"]) + else: + spatial_ref = BNode() + self.g.add((spatial_ref, RDF.type, DCT.Location)) + self.g.add((dataset_ref, DCT.spatial, spatial_ref)) + + if item.get("text"): + self.g.add((spatial_ref, SKOS.prefLabel, Literal(item["text"]))) + + for field in [ + ("geom", LOCN.geometry), + ("bbox", DCAT.bbox), + ("centroid", DCAT.centroid), + ]: + if item.get(field[0]): + self._add_spatial_value_to_graph( + spatial_ref, field[1], item[field[0]] + ) + resources = dataset_dict.get("resources", []) for resource in resources: if resource.get("access_services"): diff --git a/ckanext/dcat/schemas/dcat_ap_2.1.yaml b/ckanext/dcat/schemas/dcat_ap_2.1.yaml index 3a848751..3c4b7232 100644 --- a/ckanext/dcat/schemas/dcat_ap_2.1.yaml +++ b/ckanext/dcat/schemas/dcat_ap_2.1.yaml @@ -120,6 +120,25 @@ dataset_fields: label: End # TODO: dcat_date preset +- field_name: spatial_coverage + label: Spatial coverage + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: text + label: Label + + - field_name: geom + label: Geometry + + - field_name: bbox + label: Bounding Box + + - field_name: centroid + label: Centroid + - field_name: access_rights label: Access rights validators: ignore_missing unicode_safe diff --git a/ckanext/dcat/tests/test_scheming_support.py b/ckanext/dcat/tests/test_scheming_support.py index f37d57f0..ae21f7b9 100644 --- a/ckanext/dcat/tests/test_scheming_support.py +++ b/ckanext/dcat/tests/test_scheming_support.py @@ -1,7 +1,7 @@ import pytest - from rdflib.namespace import RDF from rdflib.term import URIRef +from geomet import wkt from ckan.tests import factories from ckan.tests.helpers import call_action @@ -20,10 +20,15 @@ LOCN, GSP, OWL, + GEOJSON_IMT, ) from ckanext.dcat.tests.utils import BaseSerializeTest, BaseParseTest +# TODO: tests for spatial coverage +# TODO: index "spatial" extra + + @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @pytest.mark.ckan_config( @@ -84,6 +89,37 @@ def test_e2e_ckan_to_dcat(self): {"start": "1905-03-01", "end": "2013-01-05"}, {"start": "2024-04-10", "end": "2024-05-29"}, ], + "spatial_coverage": [ + { + "geom": { + "type": "Polygon", + "coordinates": [ + [ + [11.9936, 54.0486], + [11.9936, 54.2466], + [12.3045, 54.2466], + [12.3045, 54.0486], + [11.9936, 54.0486], + ] + ], + }, + "text": "Tarragona", + "uri": "https://sws.geonames.org/6361390/", + "bbox": { + "type": "Polygon", + "coordinates": [ + [ + [-2.1604, 42.7611], + [-2.0938, 42.7611], + [-2.0938, 42.7931], + [-2.1604, 42.7931], + [-2.1604, 42.7611], + ] + ], + }, + "centroid": {"type": "Point", "coordinates": [1.26639, 41.12386]}, + } + ], "resources": [ { "name": "Resource 1", @@ -257,6 +293,29 @@ def test_e2e_ckan_to_dcat(self): data_type=XSD.dateTime, ) + spatial = [t for t in g.triples((dataset_ref, DCT.spatial, None))] + assert len(spatial) == len(dataset["spatial_coverage"]) + assert str(spatial[0][2]) == dataset["spatial_coverage"][0]["uri"] + assert self._triple(g, spatial[0][2], RDF.type, DCT.Location) + assert self._triple( + g, spatial[0][2], SKOS.prefLabel, dataset["spatial_coverage"][0]["text"] + ) + + assert len([t for t in g.triples((spatial[0][2], LOCN.geometry, None))]) == 2 + # Geometry in GeoJSON + assert self._triple( + g, + spatial[0][2], + LOCN.geometry, + dataset["spatial_coverage"][0]["geom"], + GEOJSON_IMT, + ) + # Geometry in WKT + wkt_geom = wkt.dumps( + dataset["spatial_coverage"][0]["geom"], decimals=4 + ) + assert self._triple(g, spatial[0][2], LOCN.geometry, wkt_geom, GSP.wktLiteral) + distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2] # Resources: core fields