From aefa22c91b97899d18017645ac59a2959d73f1e4 Mon Sep 17 00:00:00 2001 From: Hans-Chrstian Date: Mon, 9 Sep 2024 09:34:38 +0200 Subject: [PATCH] feat: add support for dct:identifier in publisher details --- ckanext/dcat/converters.py | 13 +- ckanext/dcat/processors.py | 3 +- ckanext/dcat/profiles/base.py | 2 + ckanext/dcat/profiles/euro_dcat_ap_base.py | 2 +- ckanext/dcat/profiles/schemaorg.py | 4 + ckanext/dcat/schemas/dcat_ap_recommended.yaml | 5 + .../tests/profiles/base/test_base_profile.py | 2 + .../dcat_ap/test_euro_dcatap_profile_parse.py | 1 + .../test_schemaorg_profile_serialize.py | 4 + docs/mapping.md | 137 +++++++++--------- examples/dcat/dataset.rdf | 1 + 11 files changed, 102 insertions(+), 72 deletions(-) diff --git a/ckanext/dcat/converters.py b/ckanext/dcat/converters.py index ddce1723..2e27a0ee 100644 --- a/ckanext/dcat/converters.py +++ b/ckanext/dcat/converters.py @@ -27,8 +27,17 @@ def dcat_to_ckan(dcat_dict): if isinstance(dcat_publisher, basestring): package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher}) elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'): - package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')}) - package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')}) + if dcat_publisher.get('name'): + package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')}) + + if dcat_publisher.get('mbox'): + package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')}) + + if dcat_publisher.get('identifier'): + package_dict['extras'].append({ + 'key': 'dcat_publisher_id', + 'value': dcat_publisher.get('identifier') # This could be a URI like https://ror.org/05wg1m734 + }) package_dict['extras'].append({ 'key': 'language', diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py index acf5af57..932d58f5 100644 --- a/ckanext/dcat/processors.py +++ b/ckanext/dcat/processors.py @@ -407,7 +407,8 @@ def _get_from_extra(key): ('name', Literal, FOAF.name, True,), ('email', Literal, FOAF.mbox, False,), ('url', URIRef, FOAF.homepage,False,), - ('type', Literal, DCT.type, False,)) + ('type', Literal, DCT.type, False,), + ('identifier', URIRef, DCT.identifier, False,)) _pub = _get_from_extra('source_catalog_publisher') if _pub: diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index 396aa152..fd5af492 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -468,6 +468,8 @@ def _publisher(self, subject, predicate): publisher["type"] = self._object_value(agent, DCT.type) + publisher['identifier'] = self._object_value(agent, DCT.identifier) + return publisher def _contact_details(self, subject, predicate): diff --git a/ckanext/dcat/profiles/euro_dcat_ap_base.py b/ckanext/dcat/profiles/euro_dcat_ap_base.py index 56525a28..110e04dd 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_base.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_base.py @@ -123,7 +123,7 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref): # Publisher publisher = self._publisher(dataset_ref, DCT.publisher) - for key in ("uri", "name", "email", "url", "type"): + for key in ("uri", "name", "email", "url", "type", "identifier"): if publisher.get(key): dataset_dict["extras"].append( {"key": "publisher_{0}".format(key), "value": publisher.get(key)} diff --git a/ckanext/dcat/profiles/schemaorg.py b/ckanext/dcat/profiles/schemaorg.py index 3b3ec3b0..88e30be6 100644 --- a/ckanext/dcat/profiles/schemaorg.py +++ b/ckanext/dcat/profiles/schemaorg.py @@ -221,6 +221,10 @@ def _publisher_graph(self, dataset_ref, dataset_dict): self._add_triples_from_dict(dataset_dict, contact_point, items) + publisher_identifier = self._get_dataset_value(dataset_dict, "publisher_identifier") + if publisher_identifier: + self.g.add((publisher_details, SCHEMA.identifier, Literal(publisher_identifier))) + def _temporal_graph(self, dataset_ref, dataset_dict): start = self._get_dataset_value(dataset_dict, "temporal_start") end = self._get_dataset_value(dataset_dict, "temporal_end") diff --git a/ckanext/dcat/schemas/dcat_ap_recommended.yaml b/ckanext/dcat/schemas/dcat_ap_recommended.yaml index ed386d67..883f337d 100644 --- a/ckanext/dcat/schemas/dcat_ap_recommended.yaml +++ b/ckanext/dcat/schemas/dcat_ap_recommended.yaml @@ -66,6 +66,11 @@ dataset_fields: - field_name: type label: Type + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the publisher, such as a ROR ID. + help_text: Entity responsible for making the dataset available. - field_name: license_id diff --git a/ckanext/dcat/tests/profiles/base/test_base_profile.py b/ckanext/dcat/tests/profiles/base/test_base_profile.py index 235b001f..221c772c 100644 --- a/ckanext/dcat/tests/profiles/base/test_base_profile.py +++ b/ckanext/dcat/tests/profiles/base/test_base_profile.py @@ -647,6 +647,7 @@ def test_publisher_foaf(self): contact@some.org http://some.org + @@ -666,6 +667,7 @@ def test_publisher_foaf(self): assert publisher['email'] == 'contact@some.org' assert publisher['url'] == 'http://some.org' assert publisher['type'] == 'http://purl.org/adms/publishertype/NonProfitOrganisation' + assert publisher['identifier'] == 'https://ror.org/05wg1m734' def test_publisher_ref(self): diff --git a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py index b9ecc880..d2b84ae4 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py +++ b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py @@ -113,6 +113,7 @@ def _get_extra_value_as_list(key): assert _get_extra_value('publisher_email') == 'contact@some.org' assert _get_extra_value('publisher_url') == 'http://some.org' assert _get_extra_value('publisher_type') == 'http://purl.org/adms/publishertype/NonProfitOrganisation' + assert _get_extra_value('publisher_identifier') == 'https://ror.org/05wg1m734' assert _get_extra_value('contact_name') == 'Point of Contact' # mailto gets removed for storage and is added again on output assert _get_extra_value('contact_email') == 'contact@some.org' diff --git a/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py b/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py index 0d1949d5..eb9a5eb4 100644 --- a/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py @@ -105,6 +105,7 @@ def test_publisher_extras(self): {'key': 'publisher_email', 'value': 'publisher@example.com'}, {'key': 'publisher_url', 'value': 'http://example.com/publisher/home'}, {'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'}, + {'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'}, ] @@ -121,6 +122,7 @@ def test_publisher_extras(self): assert str(publisher) == extras['publisher_uri'] assert self._triple(g, publisher, RDF.type, SCHEMA.Organization) assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name']) + assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier']) contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2] assert contact_point @@ -144,6 +146,7 @@ def test_publisher_no_uri(self): {'key': 'publisher_email', 'value': 'publisher@example.com'}, {'key': 'publisher_url', 'value': 'http://example.com/publisher/home'}, {'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'}, + {'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'}, ] } extras = self._extras(dataset) @@ -158,6 +161,7 @@ def test_publisher_no_uri(self): assert isinstance(publisher, BNode) assert self._triple(g, publisher, RDF.type, SCHEMA.Organization) assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name']) + assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier']) contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2] assert contact_point diff --git a/docs/mapping.md b/docs/mapping.md index 823635a3..89057ab2 100644 --- a/docs/mapping.md +++ b/docs/mapping.md @@ -18,75 +18,76 @@ some cases the way metadata is stored internally and presented at the CKAN API l fields are properly validated, can use the scheming snippets etc. See [Schemas](getting-started.md#schemas) for more details. -| DCAT class | DCAT property | CKAN dataset field | CKAN fallback fields | Stored as | | -|-------------------|------------------------|-------------------------------------------|--------------------------------|-----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| -| dcat:Dataset | - | custom:uri | | text | See [URIs](mapping.md#uris) | -| dcat:Dataset | dct:title | title | | text | | -| dcat:Dataset | dct:description | notes | | text | | -| dcat:Dataset | dcat:keyword | tags | | text | | -| dcat:Dataset | dcat:theme | custom:theme | | list | See [Lists](#lists) | -| dcat:Dataset | dct:identifier | custom:identifier | custom:guid, id | text | | -| dcat:Dataset | adms:identifier | custom:alternate_identifier | | text | | -| dcat:Dataset | dct:issued | custom:issued | metadata_created | text | | -| dcat:Dataset | dct:modified | custom:modified | metadata_modified | text | | -| dcat:Dataset | owl:versionInfo | version | custom:dcat_version | text | | -| dcat:Dataset | adms:versionNotes | custom:version_notes | | text | | -| dcat:Dataset | dct:language | custom:language | | list | See [Lists](#lists) | -| dcat:Dataset | dcat:landingPage | url | | text | | -| dcat:Dataset | dct:accrualPeriodicity | custom:frequency | | text | | -| dcat:Dataset | dct:conformsTo | custom:conforms_to | | list | See [Lists](#lists) | -| dcat:Dataset | dct:accessRights | custom:access_rights | | text | | -| dcat:Dataset | foaf:page | custom:documentation | | list | See [Lists](#lists) | -| dcat:Dataset | dct:provenance | custom:provenance | | text | | -| dcat:Dataset | dct:type | custom:dcat_type | | text | | -| dcat:Dataset | dct:hasVersion | custom:has_version | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | -| dcat:Dataset | dct:isVersionOf | custom:is_version_of | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | -| dcat:Dataset | dct:source | custom:source | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | -| dcat:Dataset | adms:sample | custom:sample | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to dcat:Distribution instances | -| dcat:Dataset | dct:spatial | custom:spatial_uri | | text | See [Spatial coverage](#spatial-coverage) | +| DCAT class | DCAT property | CKAN dataset field | CKAN fallback fields | Stored as | | +|-------------------|------------------------|---------------------------------------------|--------------------------------|-----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| +| dcat:Dataset | - | custom:uri | | text | See [URIs](mapping.md#uris) | +| dcat:Dataset | dct:title | title | | text | | +| dcat:Dataset | dct:description | notes | | text | | +| dcat:Dataset | dcat:keyword | tags | | text | | +| dcat:Dataset | dcat:theme | custom:theme | | list | See [Lists](#lists) | +| dcat:Dataset | dct:identifier | custom:identifier | custom:guid, id | text | | +| dcat:Dataset | adms:identifier | custom:alternate_identifier | | text | | +| dcat:Dataset | dct:issued | custom:issued | metadata_created | text | | +| dcat:Dataset | dct:modified | custom:modified | metadata_modified | text | | +| dcat:Dataset | owl:versionInfo | version | custom:dcat_version | text | | +| dcat:Dataset | adms:versionNotes | custom:version_notes | | text | | +| dcat:Dataset | dct:language | custom:language | | list | See [Lists](#lists) | +| dcat:Dataset | dcat:landingPage | url | | text | | +| dcat:Dataset | dct:accrualPeriodicity | custom:frequency | | text | | +| dcat:Dataset | dct:conformsTo | custom:conforms_to | | list | See [Lists](#lists) | +| dcat:Dataset | dct:accessRights | custom:access_rights | | text | | +| dcat:Dataset | foaf:page | custom:documentation | | list | See [Lists](#lists) | +| dcat:Dataset | dct:provenance | custom:provenance | | text | | +| dcat:Dataset | dct:type | custom:dcat_type | | text | | +| dcat:Dataset | dct:hasVersion | custom:has_version | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | +| dcat:Dataset | dct:isVersionOf | custom:is_version_of | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | +| dcat:Dataset | dct:source | custom:source | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | +| dcat:Dataset | adms:sample | custom:sample | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to dcat:Distribution instances | +| dcat:Dataset | dct:spatial | custom:spatial_uri | | text | See [Spatial coverage](#spatial-coverage) | | dcat:Dataset | dct:temporal | custom:temporal_start + custom:temporal_end | | text | None, one or both extras can be present | -| dcat:Dataset | dcat:temporalResolution| custom:temporal_resolution | | list | | -| dcat:Dataset | dcat:spatialResolutionInMeters| custom:spatial_resolution_in_meters | | list | | -| dcat:Dataset | dct:isReferencedBy | custom:is_referenced_by | | list | | -| dcat:Dataset | dct:publisher | custom:publisher_uri | | text | See [URIs](mapping.md#uris) and [Publisher](#contact-points-and-publisher) | -| foaf:Agent | foaf:name | custom:publisher_name | | text | | -| foaf:Agent | foaf:mbox | custom:publisher_email | organization:title | text | | -| foaf:Agent | foaf:homepage | custom:publisher_url | | text | | -| foaf:Agent | dct:type | custom:publisher_type | | text | | -| dcat:Dataset | dcat:contactPoint | custom:contact_uri | | text | See [URIs](mapping.md#uris) and [Contact points](#contact-points-and-publisher) | -| vcard:Kind | vcard:fn | custom:contact_name | maintainer, author | text | | -| vcard:Kind | vcard:hasEmail | custom:contact_email | maintainer_email, author_email | text | | -| dcat:Dataset | dcat:distribution | resources | | text | | -| dcat:Distribution | - | resource:uri | | text | See [URIs](mapping.md#uris) | -| dcat:Distribution | dct:title | resource:name | | text | | -| dcat:Distribution | dcat:accessURL | resource:access_url | resource:url | text | If downloadURL is not present, accessURL will be used as resource url | -| dcat:Distribution | dcat:downloadURL | resource:download_url | | text | If present, downloadURL will be used as resource url | -| dcat:Distribution | dct:description | resource:description | | text | | -| dcat:Distribution | dcat:mediaType | resource:mimetype | | text | | -| dcat:Distribution | dct:format | resource:format | | text | | -| dcat:Distribution | dct:license | resource:license | | text | See [Licenses](#licenses) | -| dcat:Distribution | adms:status | resource:status | | text | | -| dcat:Distribution | dcat:byteSize | resource:size | | number | | -| dcat:Distribution | dct:issued | resource:issued | created | text | | -| dcat:Distribution | dct:modified | resource:modified | metadata_modified | text | | -| dcat:Distribution | dct:rights | resource:rights | | text | | -| dcat:Distribution | foaf:page | resource:documentation | | list | See [Lists](#lists) | -| dcat:Distribution | dct:language | resource:language | | list | See [Lists](#lists) | -| dcat:Distribution | dct:conformsTo | resource:conforms_to | | list | See [Lists](#lists) | -| dcat:Distribution | dcatap:availability | resource:availability | | text | | -| dcat:Distribution | dcat:compressFormat | resource:compress_format | | text | | -| dcat:Distribution | dcat:packageFormat | resource:package_format | | text | | -| dcat:Distribution | dcat:accessService | resource:access_services | | text | | -| dcat:DataService | dct:title | access_service:title | | text | | -| dcat:DataService | dcat:endpointURL | access_service:endpoint_url | | list | | -| dcat:DataService | dcat:endpointDescription| access_service:endpoint_description | | text | | -| dcat:DataService | dcatap:availability | access_service:availability | | text | | -| dcat:DataService | dcat:servesDataset | access_service:serves_dataset | | list | | -| dcat:DataService | dct:description | access_service:description | | text | | -| dcat:DataService | dct:license | access_service:license | | text | | -| dcat:DataService | dct:accessRights | access_service:access_rights | | text | | -| spdx:Checksum | spdx:checksumValue | resource:hash | | text | | -| spdx:Checksum | spdx:algorithm | resource:hash_algorithm | | text | | +| dcat:Dataset | dcat:temporalResolution| custom:temporal_resolution | | list | | +| dcat:Dataset | dcat:spatialResolutionInMeters| custom:spatial_resolution_in_meters | | list | | +| dcat:Dataset | dct:isReferencedBy | custom:is_referenced_by | | list | | +| dcat:Dataset | dct:publisher | custom:publisher_uri | | text | See [URIs](mapping.md#uris) and [Publisher](#contact-points-and-publisher) | +| foaf:Agent | foaf:name | custom:publisher_name | | text | | +| foaf:Agent | foaf:mbox | custom:publisher_email | organization:title | text | | +| foaf:Agent | foaf:homepage | custom:publisher_url | | text | | +| foaf:Agent | dct:type | custom:publisher_type | | text | | +| foaf:Agent | dct:identifier | custom:publisher_id | | text | +| dcat:Dataset | dcat:contactPoint | custom:contact_uri | | text | See [URIs](mapping.md#uris) and [Contact points](#contact-points-and-publisher) | +| vcard:Kind | vcard:fn | custom:contact_name | maintainer, author | text | | +| vcard:Kind | vcard:hasEmail | custom:contact_email | maintainer_email, author_email | text | | +| dcat:Dataset | dcat:distribution | resources | | text | | +| dcat:Distribution | - | resource:uri | | text | See [URIs](mapping.md#uris) | +| dcat:Distribution | dct:title | resource:name | | text | | +| dcat:Distribution | dcat:accessURL | resource:access_url | resource:url | text | If downloadURL is not present, accessURL will be used as resource url | +| dcat:Distribution | dcat:downloadURL | resource:download_url | | text | If present, downloadURL will be used as resource url | +| dcat:Distribution | dct:description | resource:description | | text | | +| dcat:Distribution | dcat:mediaType | resource:mimetype | | text | | +| dcat:Distribution | dct:format | resource:format | | text | | +| dcat:Distribution | dct:license | resource:license | | text | See [Licenses](#licenses) | +| dcat:Distribution | adms:status | resource:status | | text | | +| dcat:Distribution | dcat:byteSize | resource:size | | number | | +| dcat:Distribution | dct:issued | resource:issued | created | text | | +| dcat:Distribution | dct:modified | resource:modified | metadata_modified | text | | +| dcat:Distribution | dct:rights | resource:rights | | text | | +| dcat:Distribution | foaf:page | resource:documentation | | list | See [Lists](#lists) | +| dcat:Distribution | dct:language | resource:language | | list | See [Lists](#lists) | +| dcat:Distribution | dct:conformsTo | resource:conforms_to | | list | See [Lists](#lists) | +| dcat:Distribution | dcatap:availability | resource:availability | | text | | +| dcat:Distribution | dcat:compressFormat | resource:compress_format | | text | | +| dcat:Distribution | dcat:packageFormat | resource:package_format | | text | | +| dcat:Distribution | dcat:accessService | resource:access_services | | text | | +| dcat:DataService | dct:title | access_service:title | | text | | +| dcat:DataService | dcat:endpointURL | access_service:endpoint_url | | list | | +| dcat:DataService | dcat:endpointDescription| access_service:endpoint_description | | text | | +| dcat:DataService | dcatap:availability | access_service:availability | | text | | +| dcat:DataService | dcat:servesDataset | access_service:serves_dataset | | list | | +| dcat:DataService | dct:description | access_service:description | | text | | +| dcat:DataService | dct:license | access_service:license | | text | | +| dcat:DataService | dct:accessRights | access_service:access_rights | | text | | +| spdx:Checksum | spdx:checksumValue | resource:hash | | text | | +| spdx:Checksum | spdx:algorithm | resource:hash_algorithm | | text | | ### Custom fields diff --git a/examples/dcat/dataset.rdf b/examples/dcat/dataset.rdf index b2f925c8..9e117752 100644 --- a/examples/dcat/dataset.rdf +++ b/examples/dcat/dataset.rdf @@ -75,6 +75,7 @@ contact@some.org http://some.org +