diff --git a/ckanext/dcat/converters.py b/ckanext/dcat/converters.py index afb2b773..8129f7ad 100644 --- a/ckanext/dcat/converters.py +++ b/ckanext/dcat/converters.py @@ -29,8 +29,8 @@ def dcat_to_ckan(dcat_dict): elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'): package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')}) - if dcat_publisher.get('mbox'): - package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')}) + if dcat_publisher.get('email'): + package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('email')}) if dcat_publisher.get('identifier'): package_dict['extras'].append({ @@ -38,6 +38,22 @@ def dcat_to_ckan(dcat_dict): 'value': dcat_publisher.get('identifier') # This could be a URI like https://ror.org/05wg1m734 }) + dcat_creator = dcat_dict.get('creator') + if isinstance(dcat_creator, basestring): + package_dict['extras'].append({'key': 'dcat_creator_name', 'value': dcat_creator}) + elif isinstance(dcat_creator, dict) and dcat_creator.get('name'): + if dcat_creator.get('name'): + package_dict['extras'].append({'key': 'dcat_creator_name', 'value': dcat_creator.get('name')}) + + if dcat_creator.get('email'): + package_dict['extras'].append({'key': 'dcat_creator_email', 'value': dcat_creator.get('email')}) + + if dcat_creator.get('identifier'): + package_dict['extras'].append({ + 'key': 'dcat_creator_id', + 'value': dcat_creator.get('identifier') + }) + package_dict['extras'].append({ 'key': 'language', 'value': ','.join(dcat_dict.get('language', [])) @@ -63,20 +79,20 @@ def dcat_to_ckan(dcat_dict): def ckan_to_dcat(package_dict): - dcat_dict = {} dcat_dict['title'] = package_dict.get('title') dcat_dict['description'] = package_dict.get('notes') dcat_dict['landingPage'] = package_dict.get('url') - + # Keywords dcat_dict['keyword'] = [] for tag in package_dict.get('tags', []): dcat_dict['keyword'].append(tag['name']) - + # Publisher dcat_dict['publisher'] = {} + dcat_dict['creator'] = {} for extra in package_dict.get('extras', []): if extra['key'] in ['dcat_issued', 'dcat_modified']: @@ -85,19 +101,41 @@ def ckan_to_dcat(package_dict): elif extra['key'] == 'language': dcat_dict['language'] = extra['value'].split(',') + # Publisher fields elif extra['key'] == 'dcat_publisher_name': dcat_dict['publisher']['name'] = extra['value'] elif extra['key'] == 'dcat_publisher_email': - dcat_dict['publisher']['mbox'] = extra['value'] + dcat_dict['publisher']['email'] = extra['value'] + + elif extra['key'] == 'dcat_publisher_id': + dcat_dict['publisher']['identifier'] = extra['value'] + + # Creator fields + elif extra['key'] == 'dcat_creator_name': + dcat_dict['creator']['name'] = extra['value'] + elif extra['key'] == 'dcat_creator_email': + dcat_dict['creator']['email'] = extra['value'] + + elif extra['key'] == 'dcat_creator_id': + dcat_dict['creator']['identifier'] = extra['value'] + + # Identifier elif extra['key'] == 'guid': dcat_dict['identifier'] = extra['value'] + # Fallback for publisher (if no name in extras, use maintainer) if not dcat_dict['publisher'].get('name') and package_dict.get('maintainer'): dcat_dict['publisher']['name'] = package_dict.get('maintainer') if package_dict.get('maintainer_email'): - dcat_dict['publisher']['mbox'] = package_dict.get('maintainer_email') + dcat_dict['publisher']['email'] = package_dict.get('maintainer_email') + + # Fallback for creator (if no name in extras, optionally use author) + if not dcat_dict['creator'].get('name') and package_dict.get('author'): + dcat_dict['creator']['name'] = package_dict.get('author') + if package_dict.get('author_email'): + dcat_dict['creator']['email'] = package_dict.get('author_email') dcat_dict['distribution'] = [] for resource in package_dict.get('resources', []): diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index fd5af492..9169abbf 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -419,58 +419,37 @@ def _insert_or_update_temporal(self, dataset_dict, key, value): else: dataset_dict["extras"].append({"key": key, "value": value}) - def _publisher(self, subject, predicate): + def _agent_details(self, subject, predicate): """ - Returns a dict with details about a dct:publisher entity, a foaf:Agent + Returns a dict with details about a dct:publisher or dct:creator entity, a foaf:Agent Both subject and predicate must be rdflib URIRef or BNode objects Examples: - + or Publishing Organization for dataset 1 contact@some.org http://some.org - - - { - 'uri': 'http://orgs.vocab.org/some-org', - 'name': 'Publishing Organization for dataset 1', - 'email': 'contact@some.org', - 'url': 'http://some.org', - 'type': 'http://purl.org/adms/publishertype/NonProfitOrganisation', - } - - - - { - 'uri': 'http://publications.europa.eu/resource/authority/corporate-body/EURCOU' - } - Returns keys for uri, name, email, url and type with the values set to - an empty string if they could not be found + Returns keys for uri, name, email, url, type, and identifier with the values set to + an empty string if they could not be found. """ - publisher = {} + agent_details = {} for agent in self.g.objects(subject, predicate): + agent_details["uri"] = str(agent) if isinstance(agent, term.URIRef) else "" + agent_details["name"] = self._object_value(agent, FOAF.name) + agent_details["email"] = self._object_value(agent, FOAF.mbox) + agent_details["url"] = self._object_value(agent, FOAF.homepage) + agent_details["type"] = self._object_value(agent, DCT.type) + agent_details['identifier'] = self._object_value(agent, DCT.identifier) - publisher["uri"] = str(agent) if isinstance(agent, term.URIRef) else "" - - publisher["name"] = self._object_value(agent, FOAF.name) - - publisher["email"] = self._object_value(agent, FOAF.mbox) - - publisher["url"] = self._object_value(agent, FOAF.homepage) - - publisher["type"] = self._object_value(agent, DCT.type) - - publisher['identifier'] = self._object_value(agent, DCT.identifier) - - return publisher + return agent_details def _contact_details(self, subject, predicate): """ @@ -1136,7 +1115,7 @@ def _extract_catalog_dict(self, catalog_ref): out.append( { "key": "source_catalog_publisher", - "value": json.dumps(self._publisher(catalog_ref, DCT.publisher)), + "value": json.dumps(self._agent_details(catalog_ref, DCT.publisher)), } ) return out diff --git a/ckanext/dcat/profiles/euro_dcat_ap_base.py b/ckanext/dcat/profiles/euro_dcat_ap_base.py index 28b476bb..f1db48b6 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_base.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_base.py @@ -34,7 +34,6 @@ config = toolkit.config - DISTRIBUTION_LICENSE_FALLBACK_CONFIG = "ckanext.dcat.resource.inherit.license" @@ -122,13 +121,21 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref): ) # Publisher - publisher = self._publisher(dataset_ref, DCT.publisher) + publisher = self._agent_details(dataset_ref, DCT.publisher) for key in ("uri", "name", "email", "url", "type", "identifier"): if publisher.get(key): dataset_dict["extras"].append( {"key": "publisher_{0}".format(key), "value": publisher.get(key)} ) + # Creator + creator = self._agent_details(dataset_ref, DCT.creator) + for key in ("uri", "name", "email", "url", "type", "identifier"): + if creator.get(key): + dataset_dict["extras"].append( + {"key": "creator_{0}".format(key), "value": creator.get(key)} + ) + # Temporal start, end = self._time_interval(dataset_ref, DCT.temporal) if start: @@ -250,7 +257,6 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref): "publisher_name", "publisher_email", ): - extra["key"] = "dcat_" + extra["key"] if extra["key"] == "language": @@ -412,6 +418,48 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref): ] self._add_triples_from_dict(publisher_details, publisher_ref, items) + # Creator + creator_ref = None + + if dataset_dict.get("creator"): + # Scheming publisher field: will be handled in a separate profile + pass + elif any( + [ + self._get_dataset_value(dataset_dict, "creator_uri"), + self._get_dataset_value(dataset_dict, "creator_name"), + ] + ): + # Legacy creator_* extras + creator_uri = self._get_dataset_value(dataset_dict, "creator_uri") + creator_name = self._get_dataset_value(dataset_dict, "creator_name") + if creator_uri: + creator_ref = CleanedURIRef(creator_uri) + else: + # No creator_uri + creator_ref = BNode() + + creator_details = { + "name": creator_name, + "email": self._get_dataset_value(dataset_dict, "creator_email"), + "url": self._get_dataset_value(dataset_dict, "creator_url"), + "type": self._get_dataset_value(dataset_dict, "creator_type"), + "identifier": self._get_dataset_value(dataset_dict, "creator_identifier"), + } + + # Add to graph + if creator_ref: + g.add((creator_ref, RDF.type, FOAF.Agent)) + g.add((dataset_ref, DCT.creator, creator_ref)) # Use DCT.creator for creator + items = [ + ("name", FOAF.name, None, Literal), + ("email", FOAF.mbox, None, Literal), + ("url", FOAF.homepage, None, URIRef), + ("type", DCT.type, None, URIRefOrLiteral), + ("identifier", DCT.identifier, None, URIRefOrLiteral), + ] + self._add_triples_from_dict(creator_details, creator_ref, items) + # Temporal start = self._get_dataset_value(dataset_dict, "temporal_start") end = self._get_dataset_value(dataset_dict, "temporal_end") diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py index 8d0ffb79..62dcf61d 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py @@ -123,12 +123,8 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref): """ Add triples to the graph from new repeating subfields """ - - def _not_empty_dict(data_dict): - return any(data_dict.values()) - contact = dataset_dict.get("contact") - if isinstance(contact, list) and len(contact) and _not_empty_dict(contact[0]): + if isinstance(contact, list) and len(contact) and self._not_empty_dict(contact[0]): for item in contact: contact_uri = item.get("uri") if contact_uri: @@ -150,56 +146,14 @@ def _not_empty_dict(data_dict): value_modifier=self._add_mailto, ) - publisher = dataset_dict.get("publisher") - if ( - isinstance(publisher, list) - and len(publisher) - and _not_empty_dict(publisher[0]) - ): - publisher = publisher[0] - publisher_uri = publisher.get("uri") - if publisher_uri: - publisher_ref = CleanedURIRef(publisher_uri) - else: - publisher_ref = BNode() - - self.g.add((publisher_ref, RDF.type, FOAF.Agent)) - self.g.add((dataset_ref, DCT.publisher, publisher_ref)) - - self._add_triple_from_dict(publisher, publisher_ref, FOAF.name, "name") - self._add_triple_from_dict( - publisher, publisher_ref, FOAF.homepage, "url", _type=URIRef - ) - self._add_triple_from_dict( - publisher, - publisher_ref, - DCT.type, - "type", - _type=URIRefOrLiteral, - # TODO: fix prefLabel stuff - # _class=SKOS.Concept, - ) - self._add_triple_from_dict( - publisher, - publisher_ref, - VCARD.hasEmail, - "email", - _type=URIRef, - value_modifier=self._add_mailto, - ) - self._add_triple_from_dict( - publisher, - publisher_ref, - DCT.identifier, - "identifier", - _type=URIRefOrLiteral - ) + self._add_agent(dataset_ref, dataset_dict, "publisher", DCT.publisher) + self._add_agent(dataset_ref, dataset_dict, "creator", DCT.creator) temporal = dataset_dict.get("temporal_coverage") if ( isinstance(temporal, list) and len(temporal) - and _not_empty_dict(temporal[0]) + and self._not_empty_dict(temporal[0]) ): for item in temporal: temporal_ref = BNode() @@ -211,7 +165,7 @@ def _not_empty_dict(data_dict): self.g.add((dataset_ref, DCT.temporal, temporal_ref)) spatial = dataset_dict.get("spatial_coverage") - if isinstance(spatial, list) and len(spatial) and _not_empty_dict(spatial[0]): + if isinstance(spatial, list) and len(spatial) and self._not_empty_dict(spatial[0]): for item in spatial: if item.get("uri"): spatial_ref = CleanedURIRef(item["uri"]) @@ -243,3 +197,57 @@ def _not_empty_dict(data_dict): ) except ValueError: pass + + def _add_agent(self, dataset_ref, dataset_dict, agent_key, rdf_predicate): + """ + Adds an agent (publisher or creator) to the RDF graph. + + :param dataset_ref: The RDF reference of the dataset + :param dataset_dict: The dataset dictionary containing agent information + :param agent_key: 'publisher' or 'creator' to specify the agent + :param rdf_predicate: The RDF predicate (DCT.publisher or DCT.creator) + """ + agent = dataset_dict.get(agent_key) + if ( + isinstance(agent, list) + and len(agent) + and self._not_empty_dict(agent[0]) + ): + agent = agent[0] + agent_uri = agent.get("uri") + if agent_uri: + agent_ref = CleanedURIRef(agent_uri) + else: + agent_ref = BNode() + + self.g.add((agent_ref, RDF.type, FOAF.Agent)) + self.g.add((dataset_ref, rdf_predicate, agent_ref)) + + self._add_triple_from_dict(agent, agent_ref, FOAF.name, "name") + self._add_triple_from_dict(agent, agent_ref, FOAF.homepage, "url", _type=URIRef) + self._add_triple_from_dict( + agent, + agent_ref, + DCT.type, + "type", + _type=URIRefOrLiteral, + ) + self._add_triple_from_dict( + agent, + agent_ref, + VCARD.hasEmail, + "email", + _type=URIRef, + value_modifier=self._add_mailto, + ) + self._add_triple_from_dict( + agent, + agent_ref, + DCT.identifier, + "identifier", + _type=URIRefOrLiteral + ) + + @staticmethod + def _not_empty_dict(data_dict): + return any(data_dict.values()) diff --git a/ckanext/dcat/profiles/schemaorg.py b/ckanext/dcat/profiles/schemaorg.py index 88e30be6..b397f44d 100644 --- a/ckanext/dcat/profiles/schemaorg.py +++ b/ckanext/dcat/profiles/schemaorg.py @@ -50,7 +50,10 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): self._list_fields_graph(dataset_ref, dataset_dict) # Publisher - self._publisher_graph(dataset_ref, dataset_dict) + self._agent_graph(dataset_ref, dataset_dict, SCHEMA.publisher, "publisher") + + # Creator + self._agent_graph(dataset_ref, dataset_dict, SCHEMA.creator, "creator") # Temporal self._temporal_graph(dataset_ref, dataset_dict) @@ -156,74 +159,73 @@ def _list_fields_graph(self, dataset_ref, dataset_dict): ] self._add_list_triples_from_dict(dataset_dict, dataset_ref, items) - def _publisher_graph(self, dataset_ref, dataset_dict): + def _agent_graph(self, dataset_ref, dataset_dict, agent_type, schema_property_prefix): + uri_key = f"{schema_property_prefix}_uri" + name_key = f"{schema_property_prefix}_name" + url_key = f"{schema_property_prefix}_url" + email_key = f"{schema_property_prefix}_email" + identifier_key = f"{schema_property_prefix}_identifier" + if any( [ - self._get_dataset_value(dataset_dict, "publisher_uri"), - self._get_dataset_value(dataset_dict, "publisher_name"), + self._get_dataset_value(dataset_dict, uri_key), + self._get_dataset_value(dataset_dict, name_key), dataset_dict.get("organization"), ] ): - - publisher_uri = self._get_dataset_value(dataset_dict, "publisher_uri") - publisher_uri_fallback = publisher_uri_organization_fallback(dataset_dict) - publisher_name = self._get_dataset_value(dataset_dict, "publisher_name") - if publisher_uri: - publisher_details = CleanedURIRef(publisher_uri) - elif not publisher_name and publisher_uri_fallback: - # neither URI nor name are available, use organization as fallback - publisher_details = CleanedURIRef(publisher_uri_fallback) + agent_uri = self._get_dataset_value(dataset_dict, uri_key) + agent_uri_fallback = publisher_uri_organization_fallback(dataset_dict) + agent_name = self._get_dataset_value(dataset_dict, name_key) + + if agent_uri: + agent_details = CleanedURIRef(agent_uri) + elif not agent_name and agent_uri_fallback: + agent_details = CleanedURIRef(agent_uri_fallback) else: - # No publisher_uri - publisher_details = BNode() + agent_details = BNode() - self.g.add((publisher_details, RDF.type, SCHEMA.Organization)) - self.g.add((dataset_ref, SCHEMA.publisher, publisher_details)) + self.g.add((agent_details, RDF.type, SCHEMA.Organization)) + self.g.add((dataset_ref, agent_type, agent_details)) - # In case no name and URI are available, again fall back to organization. - # If no name but an URI is available, the name literal remains empty to - # avoid mixing organization and dataset values. if ( - not publisher_name - and not publisher_uri + not agent_name + and not agent_uri and dataset_dict.get("organization") ): - publisher_name = dataset_dict["organization"]["title"] - self.g.add((publisher_details, SCHEMA.name, Literal(publisher_name))) + agent_name = dataset_dict["organization"]["title"] + self.g.add((agent_details, SCHEMA.name, Literal(agent_name))) contact_point = BNode() self.g.add((contact_point, RDF.type, SCHEMA.ContactPoint)) - self.g.add((publisher_details, SCHEMA.contactPoint, contact_point)) - + self.g.add((agent_details, SCHEMA.contactPoint, contact_point)) self.g.add((contact_point, SCHEMA.contactType, Literal("customer service"))) - publisher_url = self._get_dataset_value(dataset_dict, "publisher_url") - if not publisher_url and dataset_dict.get("organization"): - publisher_url = dataset_dict["organization"].get("url") or config.get( + agent_url = self._get_dataset_value(dataset_dict, url_key) + if not agent_url and dataset_dict.get("organization"): + agent_url = dataset_dict["organization"].get("url") or config.get( "ckan.site_url" ) + self.g.add((contact_point, SCHEMA.url, Literal(agent_url))) - self.g.add((contact_point, SCHEMA.url, Literal(publisher_url))) items = [ ( - "publisher_email", + email_key, SCHEMA.email, ["contact_email", "maintainer_email", "author_email"], Literal, ), ( - "publisher_name", + name_key, SCHEMA.name, ["contact_name", "maintainer", "author"], Literal, ), ] - self._add_triples_from_dict(dataset_dict, contact_point, items) - publisher_identifier = self._get_dataset_value(dataset_dict, "publisher_identifier") - if publisher_identifier: - self.g.add((publisher_details, SCHEMA.identifier, Literal(publisher_identifier))) + agent_identifier = self._get_dataset_value(dataset_dict, identifier_key) + if agent_identifier: + self.g.add((agent_details, SCHEMA.identifier, Literal(agent_identifier))) def _temporal_graph(self, dataset_ref, dataset_dict): start = self._get_dataset_value(dataset_dict, "temporal_start") diff --git a/ckanext/dcat/schemas/dcat_ap_full.yaml b/ckanext/dcat/schemas/dcat_ap_full.yaml index 36508d9a..8a831852 100644 --- a/ckanext/dcat/schemas/dcat_ap_full.yaml +++ b/ckanext/dcat/schemas/dcat_ap_full.yaml @@ -72,6 +72,38 @@ dataset_fields: help_text: Unique identifier for the publisher, such as a ROR ID. help_text: Entity responsible for making the dataset available. +- field_name: creator + label: Creator + repeating_label: Creator + repeating_once: true + repeating_subfields: + + - field_name: uri + label: URI + help_text: URI of the creator, if available. + + - field_name: name + label: Name + help_text: Name of the entity or person who created the dataset. + + - field_name: email + label: Email + display_snippet: email.html + help_text: Contact email of the creator. + + - field_name: url + label: URL + display_snippet: link.html + help_text: URL for more information about the creator. + + - field_name: type + label: Type + help_text: Type of creator (e.g., Organization, Person). + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the creator, such as an ORCID or ROR ID. + - field_name: license_id label: License form_snippet: license.html diff --git a/ckanext/dcat/schemas/dcat_ap_recommended.yaml b/ckanext/dcat/schemas/dcat_ap_recommended.yaml index 883f337d..daa7bce4 100644 --- a/ckanext/dcat/schemas/dcat_ap_recommended.yaml +++ b/ckanext/dcat/schemas/dcat_ap_recommended.yaml @@ -70,7 +70,6 @@ dataset_fields: - field_name: identifier label: Identifier help_text: Unique identifier for the publisher, such as a ROR ID. - help_text: Entity responsible for making the dataset available. - field_name: license_id diff --git a/ckanext/dcat/tests/profiles/base/test_base_profile.py b/ckanext/dcat/tests/profiles/base/test_base_profile.py index 221c772c..52bf8bc1 100644 --- a/ckanext/dcat/tests/profiles/base/test_base_profile.py +++ b/ckanext/dcat/tests/profiles/base/test_base_profile.py @@ -660,7 +660,7 @@ def test_publisher_foaf(self): p = RDFProfile(g) - publisher = p._publisher(URIRef('http://example.org'), DCT.publisher) + publisher = p._agent_details(URIRef('http://example.org'), DCT.publisher) assert publisher['uri'] == 'http://orgs.vocab.org/some-org' assert publisher['name'] == 'Publishing Organization for dataset 1' @@ -688,7 +688,7 @@ def test_publisher_ref(self): p = RDFProfile(g) - publisher = p._publisher(URIRef('http://example.org'), DCT.publisher) + publisher = p._agent_details(URIRef('http://example.org'), DCT.publisher) assert publisher['uri'] == 'http://orgs.vocab.org/some-org' diff --git a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py index d2b84ae4..3300f8c3 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py +++ b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py @@ -114,6 +114,12 @@ def _get_extra_value_as_list(key): assert _get_extra_value('publisher_url') == 'http://some.org' assert _get_extra_value('publisher_type') == 'http://purl.org/adms/publishertype/NonProfitOrganisation' assert _get_extra_value('publisher_identifier') == 'https://ror.org/05wg1m734' + assert _get_extra_value('creator_uri') == 'http://example.org/creator-org' + assert _get_extra_value('creator_name') == 'Creating Organization for dataset 1' + assert _get_extra_value('creator_email') == 'creator@example.org' + assert _get_extra_value('creator_url') == 'http://example.org' + assert _get_extra_value('creator_type') == 'http://purl.org/adms/publishertype/NonProfitOrganisation' + assert _get_extra_value('creator_identifier') == 'https://ror.org/05wg1m735' assert _get_extra_value('contact_name') == 'Point of Contact' # mailto gets removed for storage and is added again on output assert _get_extra_value('contact_email') == 'contact@some.org' diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py index b249d600..8095202f 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py @@ -97,6 +97,15 @@ def test_e2e_ckan_to_dcat(self): "identifier": "http://example.org/publisher-id", }, ], + "creator": [ + { + "name": "Test Creator", + "email": "creator@example.org", + "url": "https://example.org/creator", + "type": "person", + "identifier": "http://example.org/creator-id", + } + ], "temporal_coverage": [ {"start": "1905-03-01", "end": "2013-01-05"}, {"start": "2024-04-10", "end": "2024-05-29"}, @@ -309,6 +318,38 @@ def test_e2e_ckan_to_dcat(self): URIRef(dataset_dict["publisher"][0]["identifier"]) ) + creator = [t for t in g.triples((dataset_ref, DCT.creator, None))] + + assert len(creator) == 1 + assert self._triple( + g, creator[0][2], FOAF.name, dataset_dict["creator"][0]["name"] + ) + assert self._triple( + g, + creator[0][2], + VCARD.hasEmail, + URIRef("mailto:" + dataset_dict["creator"][0]["email"]), + ) + assert self._triple( + g, + creator[0][2], + FOAF.homepage, + URIRef(dataset_dict["creator"][0]["url"]), + ) + assert self._triple( + g, + creator[0][2], + DCT.type, + dataset_dict["creator"][0]["type"], + ) + assert self._triple( + g, + creator[0][2], + DCT.identifier, + URIRef(dataset_dict["creator"][0]["identifier"]) + ) + + temporal = [t for t in g.triples((dataset_ref, DCT.temporal, None))] assert len(temporal) == len(dataset["temporal_coverage"]) diff --git a/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py b/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py index eb9a5eb4..dec37644 100644 --- a/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py @@ -90,7 +90,7 @@ def test_graph_from_dataset(self): for value in values: assert self._triple(g, dataset_ref, item[1], item[2](value)) - def test_publisher_extras(self): + def test_publisher_and_creator_extras(self): dataset = { 'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6', 'name': 'test-dataset', @@ -100,16 +100,24 @@ def test_publisher_extras(self): 'title': 'Example Publisher from Org', }, 'extras': [ + # Publisher fields {'key': 'publisher_uri', 'value': 'http://example.com/publisher'}, {'key': 'publisher_name', 'value': 'Example Publisher'}, {'key': 'publisher_email', 'value': 'publisher@example.com'}, {'key': 'publisher_url', 'value': 'http://example.com/publisher/home'}, {'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'}, {'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'}, - ] - + # Creator fields + {'key': 'creator_uri', 'value': 'http://example.com/creator'}, + {'key': 'creator_name', 'value': 'Example Creator'}, + {'key': 'creator_email', 'value': 'creator@example.com'}, + {'key': 'creator_url', 'value': 'http://example.com/creator/home'}, + {'key': 'creator_type', 'value': 'http://purl.org/adms/publishertype/NonProfitOrganisation'}, + {'key': 'creator_identifier', 'value': 'https://ror.org/05wg1m735'}, + ] } + extras = self._extras(dataset) s = RDFSerializer(profiles=['schemaorg']) @@ -117,6 +125,7 @@ def test_publisher_extras(self): dataset_ref = s.graph_from_dataset(dataset) + # Publisher validation publisher = self._triple(g, dataset_ref, SCHEMA.publisher, None)[2] assert publisher assert str(publisher) == extras['publisher_uri'] @@ -124,13 +133,29 @@ def test_publisher_extras(self): assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name']) assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier']) - contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2] - assert contact_point - assert self._triple(g, contact_point, RDF.type, SCHEMA.ContactPoint) - assert self._triple(g, contact_point, SCHEMA.name, extras['publisher_name']) - assert self._triple(g, contact_point, SCHEMA.email, extras['publisher_email']) - assert self._triple(g, contact_point, SCHEMA.url, extras['publisher_url']) - assert self._triple(g, contact_point, SCHEMA.contactType, 'customer service') + contact_point_publisher = self._triple(g, publisher, SCHEMA.contactPoint, None)[2] + assert contact_point_publisher + assert self._triple(g, contact_point_publisher, RDF.type, SCHEMA.ContactPoint) + assert self._triple(g, contact_point_publisher, SCHEMA.name, extras['publisher_name']) + assert self._triple(g, contact_point_publisher, SCHEMA.email, extras['publisher_email']) + assert self._triple(g, contact_point_publisher, SCHEMA.url, extras['publisher_url']) + assert self._triple(g, contact_point_publisher, SCHEMA.contactType, 'customer service') + + # Creator validation + creator = self._triple(g, dataset_ref, SCHEMA.creator, None)[2] + assert creator + assert str(creator) == extras['creator_uri'] + assert self._triple(g, creator, RDF.type, SCHEMA.Organization) + assert self._triple(g, creator, SCHEMA.name, extras['creator_name']) + assert self._triple(g, creator, SCHEMA.identifier, extras['creator_identifier']) + + contact_point_creator = self._triple(g, creator, SCHEMA.contactPoint, None)[2] + assert contact_point_creator + assert self._triple(g, contact_point_creator, RDF.type, SCHEMA.ContactPoint) + assert self._triple(g, contact_point_creator, SCHEMA.name, extras['creator_name']) + assert self._triple(g, contact_point_creator, SCHEMA.email, extras['creator_email']) + assert self._triple(g, contact_point_creator, SCHEMA.url, extras['creator_url']) + assert self._triple(g, contact_point_creator, SCHEMA.contactType, 'customer service') def test_publisher_no_uri(self): dataset = { diff --git a/ckanext/dcat/utils.py b/ckanext/dcat/utils.py index de17e9ad..d5fd1749 100644 --- a/ckanext/dcat/utils.py +++ b/ckanext/dcat/utils.py @@ -79,6 +79,12 @@ def field_labels(): 'publisher_url': _('Publisher URL'), 'publisher_type': _('Publisher type'), 'publisher_identifier': _('Publisher identifier'), + 'creator_uri': _('Creator URI'), + 'creator_name': _('Creator name'), + 'creator_email': _('Creator email'), + 'creator_url': _('Creator URL'), + 'creator_type': _('Creator type'), + 'creator_identifier': _('Creator identifier'), 'contact_name': _('Contact name'), 'contact_email': _('Contact email'), 'contact_uri': _('Contact URI'), diff --git a/docs/mapping.md b/docs/mapping.md index 89057ab2..ce4048c4 100644 --- a/docs/mapping.md +++ b/docs/mapping.md @@ -54,6 +54,12 @@ some cases the way metadata is stored internally and presented at the CKAN API l | foaf:Agent | foaf:homepage | custom:publisher_url | | text | | | foaf:Agent | dct:type | custom:publisher_type | | text | | | foaf:Agent | dct:identifier | custom:publisher_id | | text | +| dcat:Dataset | dct:creator | custom:creator_uri | | text | See [URIs](mapping.md#uris) and [Publisher](#contact-points-and-publisher) | +| foaf:Agent | foaf:name | custom:creator_name | | text | | +| foaf:Agent | foaf:mbox | custom:creator_email | organization:title | text | | +| foaf:Agent | foaf:homepage | custom:creator_url | | text | | +| foaf:Agent | dct:type | custom:creator_type | | text | | +| foaf:Agent | dct:identifier | custom:creator_id | | text | | dcat:Dataset | dcat:contactPoint | custom:contact_uri | | text | See [URIs](mapping.md#uris) and [Contact points](#contact-points-and-publisher) | | vcard:Kind | vcard:fn | custom:contact_name | maintainer, author | text | | | vcard:Kind | vcard:hasEmail | custom:contact_email | maintainer_email, author_email | text | | diff --git a/examples/ckan/ckan_dataset.json b/examples/ckan/ckan_dataset.json index a05f989a..ccdcb2d9 100644 --- a/examples/ckan/ckan_dataset.json +++ b/examples/ckan/ckan_dataset.json @@ -8,6 +8,8 @@ {"key": "guid", "value": "9df8df51-63db-37a8-e044-0003ba9b0d98"}, {"key": "dcat_publisher_name", "value": "Geological Society"}, {"key": "dcat_publisher_email", "value": "info@gs.org"}, + {"key": "dcat_creator_name", "value": "John Doe"}, + {"key": "dcat_creator_email", "value": "johndoe@example.com"}, {"key": "language", "value": "en,es,ca"} ], "resources": [{"id": "b1e0b666-b7f4-44c1-9b16-56c78e86b66a", diff --git a/examples/ckan/dataset.json b/examples/ckan/dataset.json index c1d887d1..ebcc1055 100644 --- a/examples/ckan/dataset.json +++ b/examples/ckan/dataset.json @@ -9,11 +9,17 @@ "keyword" : ["exploration", "geochemical-exploration", "geochemical-maps", "geochemistry", "geology", "nercddc", "regional-geology"], "publisher": { "name": "Geological Society", - "mbox": "info@gs.org" + "email": "info@gs.org" }, - "distribution": [{"accessURL": "http://www.bgs.ac.uk/gbase/geochemcd/home.html", - "byteSize": null, - "description": "Resource locator", - "format": "text/html", - "title": ""}] + "creator": { + "name": "John Doe", + "email": "johndoe@example.com" + }, + "distribution": [{ + "accessURL": "http://www.bgs.ac.uk/gbase/geochemcd/home.html", + "byteSize": null, + "description": "Resource locator", + "format": "text/html", + "title": "" + }] } diff --git a/examples/ckan/full_ckan_dataset.json b/examples/ckan/full_ckan_dataset.json index 24a17bcc..e62927c7 100644 --- a/examples/ckan/full_ckan_dataset.json +++ b/examples/ckan/full_ckan_dataset.json @@ -1,6 +1,6 @@ { - "author": null, - "author_email": null, + "author": "John Doe", + "author_email": "johndoe@example.com", "extras": [ { "__extras": { diff --git a/examples/dcat/dataset.rdf b/examples/dcat/dataset.rdf index 9e117752..8cd9619f 100644 --- a/examples/dcat/dataset.rdf +++ b/examples/dcat/dataset.rdf @@ -78,6 +78,15 @@ + + + Creating Organization for dataset 1 + creator@example.org + http://example.org + + + + Some website