Skip to content

Commit

Permalink
[#56] Common preset for DCAT date-based fields
Browse files Browse the repository at this point in the history
Support at the validator level for year, year-month, date and datetime
values, which are correctly typed in the RDF serialization.
At the UI level a date input is used by default as it was difficult to
provide one that supported all inputs.
  • Loading branch information
amercader committed Jun 6, 2024
1 parent d456c00 commit b1e1718
Show file tree
Hide file tree
Showing 7 changed files with 189 additions and 20 deletions.
30 changes: 20 additions & 10 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ckan.model.license import LicenseRegister
from ckan.lib.helpers import resource_formats
from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS
from ckanext.dcat.validators import is_year, is_year_month, is_date

DCT = Namespace("http://purl.org/dc/terms/")
DCAT = Namespace("http://www.w3.org/ns/dcat#")
Expand Down Expand Up @@ -934,22 +935,31 @@ def _add_date_triple(self, subject, predicate, value, _type=Literal):
"""
Adds a new triple with a date object
Dates are parsed using dateutil, and if the date obtained is correct,
added to the graph as an XSD.dateTime value.
If the value is one of xsd:gYear, xsd:gYearMonth or xsd:date. If not
the value will be parsed using dateutil, and if the date obtained is correct,
added to the graph as an xsd:dateTime value.
If there are parsing errors, the literal string value is added.
"""
if not value:
return
try:
default_datetime = datetime.datetime(1, 1, 1, 0, 0, 0)
_date = parse_date(value, default=default_datetime)

self.g.add(
(subject, predicate, _type(_date.isoformat(), datatype=XSD.dateTime))
)
except ValueError:
self.g.add((subject, predicate, _type(value)))
if is_year(value):
self.g.add((subject, predicate, _type(value, datatype=XSD.gYear)))
elif is_year_month(value):
self.g.add((subject, predicate, _type(value, datatype=XSD.gYearMonth)))
elif is_date(value):
self.g.add((subject, predicate, _type(value, datatype=XSD.date)))
else:
try:
default_datetime = datetime.datetime(1, 1, 1, 0, 0, 0)
_date = parse_date(value, default=default_datetime)

self.g.add(
(subject, predicate, _type(_date.isoformat(), datatype=XSD.dateTime))
)
except ValueError:
self.g.add((subject, predicate, _type(value)))

def _last_catalog_modification(self):
"""
Expand Down
13 changes: 7 additions & 6 deletions ckanext/dcat/schemas/dcat_ap_2.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,13 @@ dataset_fields:
# Note: this will fall back to metadata_created if not present
- field_name: issued
label: Release date
# TODO: dcat_date preset
preset: dcat_date


# Note: this will fall back to metadata_modified if not present
- field_name: modified
label: Modification date
# TODO: dcat_date preset
preset: dcat_date

- field_name: identifier
label: Identifier
Expand All @@ -114,11 +115,11 @@ dataset_fields:

- field_name: start
label: Start
# TODO: dcat_date preset
preset: dcat_date

- field_name: end
label: End
# TODO: dcat_date preset
preset: dcat_date

- field_name: temporal_resolution
label: Temporal resolution
Expand Down Expand Up @@ -275,11 +276,11 @@ resource_fields:

- field_name: issued
label: Release date
# TODO: dcat_date preset
preset: dcat_date

- field_name: modified
label: Modification date
# TODO: dcat_date preset
preset: dcat_date

- field_name: language
label: Language
Expand Down
12 changes: 12 additions & 0 deletions ckanext/dcat/schemas/presets.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
scheming_presets_version: 1
about: Presets for the ckanext-dcat extension
about_url": "http://github.com/ckan/ckanext-dcat"

presets:

- preset_name: dcat_date
values:
# Note: use datetime.html or datetime_tz.html if you want to inclue an input for time
form_snippet: date.html
display_snippet: dcat_date.html
validators: ignore_missing dcat_date convert_to_json_if_datetime
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{ h.render_datetime(data[field.field_name]) }}

{# Use the following if you want to include the time as well #}
{# h.render_datetime(data[field.field_name], with_hours=True) #}
24 changes: 24 additions & 0 deletions ckanext/dcat/tests/test_euro_dcatap_profile_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,30 @@ def test_hash_algorithm_not_uri(self):
assert self._triple(g, checksum, SPDX.checksumValue, resource['hash'], data_type='http://www.w3.org/2001/XMLSchema#hexBinary')
assert self._triple(g, checksum, SPDX.algorithm, resource['hash_algorithm'])

@pytest.mark.parametrize("value,data_type", [
("2024", XSD.gYear),
("2024-05", XSD.gYearMonth),
("2024-05-31", XSD.date),
("2024-05-31T00:00:00", XSD.dateTime),
("2024-05-31T12:30:01", XSD.dateTime),
("2024-05-31T12:30:01.451243", XSD.dateTime),
])
def test_dates_data_types(self, value, data_type):
dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
'title': 'Test DCAT dataset',
'issued': value,
}

s = RDFSerializer(profiles=['euro_dcat_ap'])
g = s.g

dataset_ref = s.graph_from_dataset(dataset)

assert str(self._triple(g, dataset_ref, DCT.issued, None)[2]) == value
assert self._triple(g, dataset_ref, DCT.issued, None)[2].datatype == data_type


class TestEuroDCATAPProfileSerializeCatalog(BaseSerializeTest):

Expand Down
72 changes: 68 additions & 4 deletions ckanext/dcat/tests/test_scheming_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@
@pytest.mark.ckan_config(
"scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1.yaml"
)
@pytest.mark.ckan_config("scheming.presets", "ckanext.scheming:presets.json")
@pytest.mark.ckan_config(
"scheming.presets",
"ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml",
)
@pytest.mark.ckan_config(
"ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming"
)
Expand Down Expand Up @@ -547,7 +550,10 @@ def test_legacy_fields(self):
@pytest.mark.ckan_config(
"scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1.yaml"
)
@pytest.mark.ckan_config("scheming.presets", "ckanext.scheming:presets.json")
@pytest.mark.ckan_config(
"scheming.presets",
"ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml",
)
@pytest.mark.ckan_config(
"ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming"
)
Expand Down Expand Up @@ -576,7 +582,10 @@ def test_mimetype_is_guessed(self):
@pytest.mark.ckan_config(
"scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1.yaml"
)
@pytest.mark.ckan_config("scheming.presets", "ckanext.scheming:presets.json")
@pytest.mark.ckan_config(
"scheming.presets",
"ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml",
)
@pytest.mark.ckan_config(
"ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming"
)
Expand Down Expand Up @@ -733,7 +742,10 @@ def test_e2e_dcat_to_ckan(self):
@pytest.mark.ckan_config(
"scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1.yaml"
)
@pytest.mark.ckan_config("scheming.presets", "ckanext.scheming:presets.json")
@pytest.mark.ckan_config(
"scheming.presets",
"ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml",
)
@pytest.mark.ckan_config(
"ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming"
)
Expand Down Expand Up @@ -801,3 +813,55 @@ def test_spatial_field(self):
assert search_dict["spatial"] == json.dumps(
dataset_dict["spatial_coverage"][0]["centroid"]
)


@pytest.mark.usefixtures("with_plugins", "clean_db")
@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets")
@pytest.mark.ckan_config(
"scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_2.1.yaml"
)
@pytest.mark.ckan_config(
"scheming.presets",
"ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml",
)
@pytest.mark.ckan_config(
"ckanext.dcat.rdf.profiles", "euro_dcat_ap_2 euro_dcat_ap_scheming"
)
class TestSchemingPresets:
def test_dcat_date(self):
dataset_dict = {
# Core fields
"name": "test-dataset",
"title": "Test DCAT dataset",
"notes": "Some notes",
"issued": "2024",
"modified": "2024-10",
"temporal_coverage": [
{"start": "1905-03-01T10:07:31.182680", "end": "2013-01-05"},
{"start": "2024-04-10T10:07:31", "end": "2024-05-29"},
],
}

dataset = call_action("package_create", **dataset_dict)

# Year
assert dataset["issued"] == dataset_dict["issued"]

# Year-month
assert dataset["modified"] == dataset_dict["modified"]

# Date
assert (
dataset["temporal_coverage"][0]["end"]
== dataset_dict["temporal_coverage"][0]["end"]
)

# Datetime
assert (
dataset["temporal_coverage"][0]["start"]
== dataset_dict["temporal_coverage"][0]["start"]
)
assert (
dataset["temporal_coverage"][1]["start"]
== dataset_dict["temporal_coverage"][1]["start"]
)
54 changes: 54 additions & 0 deletions ckanext/dcat/validators.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,65 @@
import datetime
import json
import re

from ckantoolkit import (
missing,
StopOnError,
get_validator,
Invalid,
_,
)
from ckanext.scheming.validation import scheming_validator

# https://www.w3.org/TR/xmlschema11-2/#gYear
regexp_xsd_year = re.compile(
"-?([1-9][0-9]{3,}|0[0-9]{3})(Z|(\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00))?"
)

# https://www.w3.org/TR/xmlschema11-2/#gYearMonth
regexp_xsd_year_month = re.compile(
"-?([1-9][0-9]{3,}|0[0-9]{3})-(0[1-9]|1[0-2])(Z|(\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00))?"
)

regexp_xsd_date = re.compile(
"-?([1-9][0-9]{3,}|0[0-9]{3})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])(Z|(\+|-)((0[0-9]|1[0-3]):[0-5][0-9]|14:00))?"
)


def is_year(value):
return regexp_xsd_year.fullmatch(value)


def is_year_month(value):
return regexp_xsd_year_month.fullmatch(value)


def is_date(value):
return regexp_xsd_date.fullmatch(value)


def dcat_date(key, data, errors, context):
value = data[key]

scheming_isodatetime = get_validator("scheming_isodatetime")

if isinstance(value, datetime.datetime):
return

if is_year(value) or is_year_month(value) or is_date(value):
return

try:
scheming_isodatetime({}, {})(key, data, errors, context)
except Invalid:
raise Invalid(
_(
"Date format incorrect. Supported formats are YYYY, YYYY-MM, YYYY-MM-DD and YYYY-MM-DDTHH:MM:SS"
)
)

return value


@scheming_validator
def scheming_multiple_number(field, schema):
Expand Down Expand Up @@ -77,4 +130,5 @@ def _scheming_multiple_number(key, data, errors, context):

dcat_validators = {
"scheming_multiple_number": scheming_multiple_number,
"dcat_date": dcat_date,
}

0 comments on commit b1e1718

Please sign in to comment.