From cd1d3f0822208e1e8291cdec787710cbeb4ba038 Mon Sep 17 00:00:00 2001 From: amercader Date: Thu, 30 May 2024 10:36:54 +0200 Subject: [PATCH] [#56] Add temporal extent --- .../dcat/profiles/euro_dcat_ap_scheming.py | 21 +++++- ckanext/dcat/schemas/dcat_ap_2.1.yaml | 14 +++- ckanext/dcat/tests/test_scheming_support.py | 68 +++++++++++++++++++ 3 files changed, 101 insertions(+), 2 deletions(-) diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py index 6bd570a9..4353d2a7 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py @@ -9,6 +9,9 @@ DCT, VCARD, FOAF, + SCHEMA, + SKOS, + LOCN, ) @@ -62,14 +65,18 @@ def _parse_list_value(data_dict, field_name): _parse_list_value(resource_dict, field_name) # Repeating subfields + new_fields_mapping = { + "temporal_coverage": "temporal" + } for schema_field in self._dataset_schema["dataset_fields"]: if "repeating_subfields" in schema_field: # Check if existing extras need to be migrated field_name = schema_field["field_name"] new_extras = [] new_dict = {} + check_name = new_fields_mappings.get(field_name, field_name) for extra in dataset_dict.get("extras", []): - if extra["key"].startswith(f"{field_name}_"): + if extra["key"].startswith(f"{check_name}_"): subfield = extra["key"][extra["key"].index("_") + 1 :] if subfield in [ f["field_name"] for f in schema_field["repeating_subfields"] @@ -83,6 +90,7 @@ def _parse_list_value(data_dict, field_name): dataset_dict[field_name] = [new_dict] dataset_dict["extras"] = new_extras + # Repeating subfields: resources for schema_field in self._dataset_schema["resource_fields"]: if "repeating_subfields" in schema_field: # Check if value needs to be load from JSON @@ -154,6 +162,17 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): value_modifier=self._add_mailto, ) + temporal = dataset_dict.get("temporal_coverage") + if isinstance(temporal, list) and len(temporal): + for item in temporal: + temporal_ref = BNode() + self.g.add((temporal_ref, RDF.type, DCT.PeriodOfTime)) + if item.get("start"): + self._add_date_triple(temporal_ref, SCHEMA.startDate, item["start"]) + if item.get("end"): + self._add_date_triple(temporal_ref, SCHEMA.endDate, item["end"]) + self.g.add((dataset_ref, DCT.temporal, temporal_ref)) + resources = dataset_dict.get("resources", []) for resource in resources: if resource.get("access_services"): diff --git a/ckanext/dcat/schemas/dcat_ap_2.1.yaml b/ckanext/dcat/schemas/dcat_ap_2.1.yaml index f5373c3a..3a848751 100644 --- a/ckanext/dcat/schemas/dcat_ap_2.1.yaml +++ b/ckanext/dcat/schemas/dcat_ap_2.1.yaml @@ -107,7 +107,19 @@ dataset_fields: - field_name: dcat_type label: Type # TODO: controlled vocabulary? - # + +- field_name: temporal_coverage + label: Temporal coverage + repeating_subfields: + + - field_name: start + label: Start + # TODO: dcat_date preset + + - field_name: end + label: End + # TODO: dcat_date preset + - field_name: access_rights label: Access rights validators: ignore_missing unicode_safe diff --git a/ckanext/dcat/tests/test_scheming_support.py b/ckanext/dcat/tests/test_scheming_support.py index e2508e0f..f37d57f0 100644 --- a/ckanext/dcat/tests/test_scheming_support.py +++ b/ckanext/dcat/tests/test_scheming_support.py @@ -80,6 +80,10 @@ def test_e2e_ckan_to_dcat(self): "type": "public_body", }, ], + "temporal_coverage": [ + {"start": "1905-03-01", "end": "2013-01-05"}, + {"start": "2024-04-10", "end": "2024-05-29"}, + ], "resources": [ { "name": "Resource 1", @@ -221,6 +225,38 @@ def test_e2e_ckan_to_dcat(self): dataset_dict["publisher"][0]["type"], ) + temporal = [t for t in g.triples((dataset_ref, DCT.temporal, None))] + + assert len(temporal) == len(dataset["temporal_coverage"]) + assert self._triple( + g, + temporal[0][2], + SCHEMA.startDate, + dataset_dict["temporal_coverage"][0]["start"] + "T00:00:00", + data_type=XSD.dateTime, + ) + assert self._triple( + g, + temporal[0][2], + SCHEMA.endDate, + dataset_dict["temporal_coverage"][0]["end"] + "T00:00:00", + data_type=XSD.dateTime, + ) + assert self._triple( + g, + temporal[1][2], + SCHEMA.startDate, + dataset_dict["temporal_coverage"][1]["start"] + "T00:00:00", + data_type=XSD.dateTime, + ) + assert self._triple( + g, + temporal[1][2], + SCHEMA.endDate, + dataset_dict["temporal_coverage"][1]["end"] + "T00:00:00", + data_type=XSD.dateTime, + ) + distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2] # Resources: core fields @@ -355,6 +391,36 @@ def test_publisher_fallback_org_ignored_if_publisher_field_present(self): g, publisher[0][2], FOAF.name, dataset_dict["publisher"][0]["name"] ) + def test_legacy_fields(self): + + dataset_dict = { + "name": "test-dataset-2", + "title": "Test DCAT dataset 2", + "notes": "Lorem ipsum", + "extras": [ + {"key": "contact_name", "value": "Test Contact"}, + {"key": "contact_email", "value": "contact@example.org"}, + {"key": "publisher_name", "value": "Test Publisher"}, + {"key": "publisher_email", "value": "publisher@example.org"}, + {"key": "publisher_url", "value": "https://example.org"}, + {"key": "publisher_type", "value": "public_body"}, + ], + } + + dataset = call_action("package_create", **dataset_dict) + + s = RDFSerializer() + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + contact_details = [t for t in g.triples((dataset_ref, DCAT.contactPoint, None))] + assert len(contact_details) == 1 + assert self._triple(g, contact_details[0][2], VCARD.fn, "Test Contact") + + publisher = [t for t in g.triples((dataset_ref, DCT.publisher, None))] + assert len(publisher) == 1 + assert self._triple(g, publisher[0][2], FOAF.name, "Test Publisher") + @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @@ -444,6 +510,8 @@ def test_e2e_dcat_to_ckan(self): dataset["publisher"][0]["type"] == "http://purl.org/adms/publishertype/NonProfitOrganisation" ) + assert dataset["temporal_coverage"][0]["start"] == "1905-03-01" + assert dataset["temporal_coverage"][0]["end"] == "2013-01-05" resource = dataset["resources"][0]