diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index 36b44c00..a2eddd71 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -11,6 +11,7 @@ from ckan.model.license import LicenseRegister from ckan.lib.helpers import resource_formats from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS +from ckanext.dcat.validators import is_year, is_year_month, is_date DCT = Namespace("http://purl.org/dc/terms/") DCAT = Namespace("http://www.w3.org/ns/dcat#") @@ -934,22 +935,31 @@ def _add_date_triple(self, subject, predicate, value, _type=Literal): """ Adds a new triple with a date object - Dates are parsed using dateutil, and if the date obtained is correct, - added to the graph as an XSD.dateTime value. + If the value is one of xsd:gYear, xsd:gYearMonth or xsd:date. If not + the value will be parsed using dateutil, and if the date obtained is correct, + added to the graph as an xsd:dateTime value. If there are parsing errors, the literal string value is added. """ if not value: return - try: - default_datetime = datetime.datetime(1, 1, 1, 0, 0, 0) - _date = parse_date(value, default=default_datetime) - self.g.add( - (subject, predicate, _type(_date.isoformat(), datatype=XSD.dateTime)) - ) - except ValueError: - self.g.add((subject, predicate, _type(value))) + if is_year(value): + self.g.add((subject, predicate, _type(value, datatype=XSD.gYear))) + elif is_year_month(value): + self.g.add((subject, predicate, _type(value, datatype=XSD.gYearMonth))) + elif is_date(value): + self.g.add((subject, predicate, _type(value, datatype=XSD.date))) + else: + try: + default_datetime = datetime.datetime(1, 1, 1, 0, 0, 0) + _date = parse_date(value, default=default_datetime) + + self.g.add( + (subject, predicate, _type(_date.isoformat(), datatype=XSD.dateTime)) + ) + except ValueError: + self.g.add((subject, predicate, _type(value))) def _last_catalog_modification(self): """ diff --git a/ckanext/dcat/schemas/dcat_ap_2.1.yaml b/ckanext/dcat/schemas/dcat_ap_2.1.yaml index 5ff95fab..350e359c 100644 --- a/ckanext/dcat/schemas/dcat_ap_2.1.yaml +++ b/ckanext/dcat/schemas/dcat_ap_2.1.yaml @@ -88,12 +88,13 @@ dataset_fields: # Note: this will fall back to metadata_created if not present - field_name: issued label: Release date - # TODO: dcat_date preset + preset: dcat_date + # Note: this will fall back to metadata_modified if not present - field_name: modified label: Modification date - # TODO: dcat_date preset + preset: dcat_date - field_name: identifier label: Identifier @@ -114,11 +115,11 @@ dataset_fields: - field_name: start label: Start - # TODO: dcat_date preset + preset: dcat_date - field_name: end label: End - # TODO: dcat_date preset + preset: dcat_date - field_name: temporal_resolution label: Temporal resolution @@ -275,11 +276,11 @@ resource_fields: - field_name: issued label: Release date - # TODO: dcat_date preset + preset: dcat_date - field_name: modified label: Modification date - # TODO: dcat_date preset + preset: dcat_date - field_name: language label: Language diff --git a/ckanext/dcat/schemas/presets.yaml b/ckanext/dcat/schemas/presets.yaml new file mode 100644 index 00000000..88be7b0c --- /dev/null +++ b/ckanext/dcat/schemas/presets.yaml @@ -0,0 +1,12 @@ +scheming_presets_version: 1 +about: Presets for the ckanext-dcat extension +about_url": "http://github.com/ckan/ckanext-dcat" + +presets: + +- preset_name: dcat_date + values: + # Note: use datetime.html or datetime_tz.html if you want to inclue an input for time + form_snippet: date.html + display_snippet: dcat_date.html + validators: ignore_missing dcat_date convert_to_json_if_datetime diff --git a/ckanext/dcat/templates/scheming/display_snippets/dcat_date.html b/ckanext/dcat/templates/scheming/display_snippets/dcat_date.html new file mode 100644 index 00000000..3e7f7ec6 --- /dev/null +++ b/ckanext/dcat/templates/scheming/display_snippets/dcat_date.html @@ -0,0 +1,4 @@ +{{ h.render_datetime(data[field.field_name]) }} + +{# Use the following if you want to include the time as well #} +{# h.render_datetime(data[field.field_name], with_hours=True) #} diff --git a/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py b/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py index 1a389df4..bfed0160 100644 --- a/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py +++ b/ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py @@ -1128,6 +1128,30 @@ def test_hash_algorithm_not_uri(self): assert self._triple(g, checksum, SPDX.checksumValue, resource['hash'], data_type='http://www.w3.org/2001/XMLSchema#hexBinary') assert self._triple(g, checksum, SPDX.algorithm, resource['hash_algorithm']) + @pytest.mark.parametrize("value,data_type", [ + ("2024", XSD.gYear), + ("2024-05", XSD.gYearMonth), + ("2024-05-31", XSD.date), + ("2024-05-31T00:00:00", XSD.dateTime), + ("2024-05-31T12:30:01", XSD.dateTime), + ("2024-05-31T12:30:01.451243", XSD.dateTime), + ]) + def test_dates_data_types(self, value, data_type): + dataset = { + 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', + 'name': 'test-dataset', + 'title': 'Test DCAT dataset', + 'issued': value, + } + + s = RDFSerializer(profiles=['euro_dcat_ap']) + g = s.g + + dataset_ref = s.graph_from_dataset(dataset) + + assert str(self._triple(g, dataset_ref, DCT.issued, None)[2]) == value + assert self._triple(g, dataset_ref, DCT.issued, None)[2].datatype == data_type + class TestEuroDCATAPProfileSerializeCatalog(BaseSerializeTest): diff --git a/ckanext/dcat/tests/test_scheming_support.py b/ckanext/dcat/tests/test_scheming_support.py index d9ec2efc..38e4da4d 100644 --- a/ckanext/dcat/tests/test_scheming_support.py +++ b/ckanext/dcat/tests/test_scheming_support.py @@ -35,7 +35,10 @@ @pytest.mark.ckan_config( "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1.yaml" ) -@pytest.mark.ckan_config("scheming.presets", "ckanext.scheming:presets.json") +@pytest.mark.ckan_config( + "scheming.presets", + "ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml", +) @pytest.mark.ckan_config( "ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming" ) @@ -547,7 +550,10 @@ def test_legacy_fields(self): @pytest.mark.ckan_config( "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1.yaml" ) -@pytest.mark.ckan_config("scheming.presets", "ckanext.scheming:presets.json") +@pytest.mark.ckan_config( + "scheming.presets", + "ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml", +) @pytest.mark.ckan_config( "ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming" ) @@ -576,7 +582,10 @@ def test_mimetype_is_guessed(self): @pytest.mark.ckan_config( "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1.yaml" ) -@pytest.mark.ckan_config("scheming.presets", "ckanext.scheming:presets.json") +@pytest.mark.ckan_config( + "scheming.presets", + "ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml", +) @pytest.mark.ckan_config( "ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming" ) @@ -733,7 +742,10 @@ def test_e2e_dcat_to_ckan(self): @pytest.mark.ckan_config( "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1.yaml" ) -@pytest.mark.ckan_config("scheming.presets", "ckanext.scheming:presets.json") +@pytest.mark.ckan_config( + "scheming.presets", + "ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml", +) @pytest.mark.ckan_config( "ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming" ) @@ -801,3 +813,55 @@ def test_spatial_field(self): assert search_dict["spatial"] == json.dumps( dataset_dict["spatial_coverage"][0]["centroid"] ) + + +@pytest.mark.usefixtures("with_plugins", "clean_db") +@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") +@pytest.mark.ckan_config( + "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1.yaml" +) +@pytest.mark.ckan_config( + "scheming.presets", + "ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml", +) +@pytest.mark.ckan_config( + "ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming" +) +class TestSchemingPresets: + def test_dcat_date(self): + dataset_dict = { + # Core fields + "name": "test-dataset", + "title": "Test DCAT dataset", + "notes": "Some notes", + "issued": "2024", + "modified": "2024-10", + "temporal_coverage": [ + {"start": "1905-03-01T10:07:31.182680", "end": "2013-01-05"}, + {"start": "2024-04-10T10:07:31", "end": "2024-05-29"}, + ], + } + + dataset = call_action("package_create", **dataset_dict) + + # Year + assert dataset["issued"] == dataset_dict["issued"] + + # Year-month + assert dataset["modified"] == dataset_dict["modified"] + + # Date + assert ( + dataset["temporal_coverage"][0]["end"] + == dataset_dict["temporal_coverage"][0]["end"] + ) + + # Datetime + assert ( + dataset["temporal_coverage"][0]["start"] + == dataset_dict["temporal_coverage"][0]["start"] + ) + assert ( + dataset["temporal_coverage"][1]["start"] + == dataset_dict["temporal_coverage"][1]["start"] + ) diff --git a/ckanext/dcat/validators.py b/ckanext/dcat/validators.py index 4db20cdb..6dae17af 100644 --- a/ckanext/dcat/validators.py +++ b/ckanext/dcat/validators.py @@ -1,12 +1,65 @@ +import datetime import json +import re from ckantoolkit import ( missing, StopOnError, + get_validator, + Invalid, _, ) from ckanext.scheming.validation import scheming_validator +# https://www.w3.org/TR/xmlschema11-2/#gYear +regexp_xsd_year = re.compile( + "-?([1-9][0-9]{3,}|0[0-9]{3})(Z|(\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00))?" +) + +# https://www.w3.org/TR/xmlschema11-2/#gYearMonth +regexp_xsd_year_month = re.compile( + "-?([1-9][0-9]{3,}|0[0-9]{3})-(0[1-9]|1[0-2])(Z|(\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00))?" +) + +regexp_xsd_date = re.compile( + "-?([1-9][0-9]{3,}|0[0-9]{3})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])(Z|(\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00))?" +) + + +def is_year(value): + return regexp_xsd_year.fullmatch(value) + + +def is_year_month(value): + return regexp_xsd_year_month.fullmatch(value) + + +def is_date(value): + return regexp_xsd_date.fullmatch(value) + + +def dcat_date(key, data, errors, context): + value = data[key] + + scheming_isodatetime = get_validator("scheming_isodatetime") + + if isinstance(value, datetime.datetime): + return + + if is_year(value) or is_year_month(value) or is_date(value): + return + + try: + scheming_isodatetime({}, {})(key, data, errors, context) + except Invalid: + raise Invalid( + _( + "Date format incorrect. Supported formats are YYYY, YYYY-MM, YYYY-MM-DD and YYYY-MM-DDTHH:MM:SS" + ) + ) + + return value + @scheming_validator def scheming_multiple_number(field, schema): @@ -77,4 +130,5 @@ def _scheming_multiple_number(key, data, errors, context): dcat_validators = { "scheming_multiple_number": scheming_multiple_number, + "dcat_date": dcat_date, }