Skip to content

Commit

Permalink
[#56] Add temporal extent
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed May 30, 2024
1 parent 1bce834 commit cd1d3f0
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 2 deletions.
21 changes: 20 additions & 1 deletion ckanext/dcat/profiles/euro_dcat_ap_scheming.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
DCT,
VCARD,
FOAF,
SCHEMA,
SKOS,
LOCN,
)


Expand Down Expand Up @@ -62,14 +65,18 @@ def _parse_list_value(data_dict, field_name):
_parse_list_value(resource_dict, field_name)

# Repeating subfields
new_fields_mapping = {
"temporal_coverage": "temporal"
}
for schema_field in self._dataset_schema["dataset_fields"]:
if "repeating_subfields" in schema_field:
# Check if existing extras need to be migrated
field_name = schema_field["field_name"]
new_extras = []
new_dict = {}
check_name = new_fields_mappings.get(field_name, field_name)
for extra in dataset_dict.get("extras", []):
if extra["key"].startswith(f"{field_name}_"):
if extra["key"].startswith(f"{check_name}_"):
subfield = extra["key"][extra["key"].index("_") + 1 :]
if subfield in [
f["field_name"] for f in schema_field["repeating_subfields"]
Expand All @@ -83,6 +90,7 @@ def _parse_list_value(data_dict, field_name):
dataset_dict[field_name] = [new_dict]
dataset_dict["extras"] = new_extras

# Repeating subfields: resources
for schema_field in self._dataset_schema["resource_fields"]:
if "repeating_subfields" in schema_field:
# Check if value needs to be load from JSON
Expand Down Expand Up @@ -154,6 +162,17 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
value_modifier=self._add_mailto,
)

temporal = dataset_dict.get("temporal_coverage")
if isinstance(temporal, list) and len(temporal):
for item in temporal:
temporal_ref = BNode()
self.g.add((temporal_ref, RDF.type, DCT.PeriodOfTime))
if item.get("start"):
self._add_date_triple(temporal_ref, SCHEMA.startDate, item["start"])
if item.get("end"):
self._add_date_triple(temporal_ref, SCHEMA.endDate, item["end"])
self.g.add((dataset_ref, DCT.temporal, temporal_ref))

resources = dataset_dict.get("resources", [])
for resource in resources:
if resource.get("access_services"):
Expand Down
14 changes: 13 additions & 1 deletion ckanext/dcat/schemas/dcat_ap_2.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,19 @@ dataset_fields:
- field_name: dcat_type
label: Type
# TODO: controlled vocabulary?
#

- field_name: temporal_coverage
label: Temporal coverage
repeating_subfields:

- field_name: start
label: Start
# TODO: dcat_date preset

- field_name: end
label: End
# TODO: dcat_date preset

- field_name: access_rights
label: Access rights
validators: ignore_missing unicode_safe
Expand Down
68 changes: 68 additions & 0 deletions ckanext/dcat/tests/test_scheming_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ def test_e2e_ckan_to_dcat(self):
"type": "public_body",
},
],
"temporal_coverage": [
{"start": "1905-03-01", "end": "2013-01-05"},
{"start": "2024-04-10", "end": "2024-05-29"},
],
"resources": [
{
"name": "Resource 1",
Expand Down Expand Up @@ -221,6 +225,38 @@ def test_e2e_ckan_to_dcat(self):
dataset_dict["publisher"][0]["type"],
)

temporal = [t for t in g.triples((dataset_ref, DCT.temporal, None))]

assert len(temporal) == len(dataset["temporal_coverage"])
assert self._triple(
g,
temporal[0][2],
SCHEMA.startDate,
dataset_dict["temporal_coverage"][0]["start"] + "T00:00:00",
data_type=XSD.dateTime,
)
assert self._triple(
g,
temporal[0][2],
SCHEMA.endDate,
dataset_dict["temporal_coverage"][0]["end"] + "T00:00:00",
data_type=XSD.dateTime,
)
assert self._triple(
g,
temporal[1][2],
SCHEMA.startDate,
dataset_dict["temporal_coverage"][1]["start"] + "T00:00:00",
data_type=XSD.dateTime,
)
assert self._triple(
g,
temporal[1][2],
SCHEMA.endDate,
dataset_dict["temporal_coverage"][1]["end"] + "T00:00:00",
data_type=XSD.dateTime,
)

distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2]

# Resources: core fields
Expand Down Expand Up @@ -355,6 +391,36 @@ def test_publisher_fallback_org_ignored_if_publisher_field_present(self):
g, publisher[0][2], FOAF.name, dataset_dict["publisher"][0]["name"]
)

def test_legacy_fields(self):

dataset_dict = {
"name": "test-dataset-2",
"title": "Test DCAT dataset 2",
"notes": "Lorem ipsum",
"extras": [
{"key": "contact_name", "value": "Test Contact"},
{"key": "contact_email", "value": "contact@example.org"},
{"key": "publisher_name", "value": "Test Publisher"},
{"key": "publisher_email", "value": "publisher@example.org"},
{"key": "publisher_url", "value": "https://example.org"},
{"key": "publisher_type", "value": "public_body"},
],
}

dataset = call_action("package_create", **dataset_dict)

s = RDFSerializer()
g = s.g

dataset_ref = s.graph_from_dataset(dataset)
contact_details = [t for t in g.triples((dataset_ref, DCAT.contactPoint, None))]
assert len(contact_details) == 1
assert self._triple(g, contact_details[0][2], VCARD.fn, "Test Contact")

publisher = [t for t in g.triples((dataset_ref, DCT.publisher, None))]
assert len(publisher) == 1
assert self._triple(g, publisher[0][2], FOAF.name, "Test Publisher")


@pytest.mark.usefixtures("with_plugins", "clean_db")
@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets")
Expand Down Expand Up @@ -444,6 +510,8 @@ def test_e2e_dcat_to_ckan(self):
dataset["publisher"][0]["type"]
== "http://purl.org/adms/publishertype/NonProfitOrganisation"
)
assert dataset["temporal_coverage"][0]["start"] == "1905-03-01"
assert dataset["temporal_coverage"][0]["end"] == "2013-01-05"

resource = dataset["resources"][0]

Expand Down

0 comments on commit cd1d3f0

Please sign in to comment.