From a8a3f255ec18d822712e0b0d900e90e7d5ed6949 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 19 Jun 2024 12:23:16 +0200 Subject: [PATCH] [#56] Don't serialize empty repeating subfields Scheming adds a dict with empty keys when empty repeating subfields are submitted from the form. Check that there's an actual value before creating the triples when serializing --- .../dcat/profiles/euro_dcat_ap_scheming.py | 11 ++++++---- ckanext/dcat/tests/test_scheming_support.py | 22 +++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py index c945deff..12eb540e 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py @@ -120,8 +120,11 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): Add triples to the graph from new repeating subfields """ + def _not_empty_dict(data_dict): + return any(data_dict.values()) + contact = dataset_dict.get("contact") - if isinstance(contact, list) and len(contact): + if isinstance(contact, list) and len(contact) and _not_empty_dict(contact[0]): for item in contact: contact_uri = item.get("uri") if contact_uri: @@ -144,7 +147,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): ) publisher = dataset_dict.get("publisher") - if isinstance(publisher, list) and len(publisher): + if isinstance(publisher, list) and len(publisher) and _not_empty_dict(publisher[0]): publisher = publisher[0] publisher_uri = publisher.get("uri") if publisher_uri: @@ -172,7 +175,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): ) temporal = dataset_dict.get("temporal_coverage") - if isinstance(temporal, list) and len(temporal): + if isinstance(temporal, list) and len(temporal) and _not_empty_dict(temporal[0]): for item in temporal: temporal_ref = BNode() self.g.add((temporal_ref, RDF.type, DCT.PeriodOfTime)) @@ -183,7 +186,7 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): self.g.add((dataset_ref, DCT.temporal, temporal_ref)) spatial = dataset_dict.get("spatial_coverage") - if isinstance(spatial, list) and len(spatial): + if isinstance(spatial, list) and len(spatial) and _not_empty_dict(spatial[0]): for item in spatial: if item.get("uri"): spatial_ref = CleanedURIRef(item["uri"]) diff --git a/ckanext/dcat/tests/test_scheming_support.py b/ckanext/dcat/tests/test_scheming_support.py index d8770e2f..3779780d 100644 --- a/ckanext/dcat/tests/test_scheming_support.py +++ b/ckanext/dcat/tests/test_scheming_support.py @@ -514,6 +514,28 @@ def test_publisher_fallback_org_ignored_if_publisher_field_present(self): g, publisher[0][2], FOAF.name, dataset_dict["publisher"][0]["name"] ) + def test_empty_repeating_subfields_not_serialized(self): + + dataset_dict = { + "name": "test-dataset-3", + "title": "Test DCAT dataset 3", + "notes": "Lorem ipsum", + "spatial_coverage": [ + { + "uri": "", + "geom": "", + }, + ], + } + + dataset = call_action("package_create", **dataset_dict) + + s = RDFSerializer() + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + assert not [t for t in g.triples((dataset_ref, DCT.spatial, None))] + def test_legacy_fields(self): dataset_dict = {