diff --git a/ckanext/dcat/converters.py b/ckanext/dcat/converters.py
index afb2b773..8129f7ad 100644
--- a/ckanext/dcat/converters.py
+++ b/ckanext/dcat/converters.py
@@ -29,8 +29,8 @@ def dcat_to_ckan(dcat_dict):
elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')})
- if dcat_publisher.get('mbox'):
- package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})
+ if dcat_publisher.get('email'):
+ package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('email')})
if dcat_publisher.get('identifier'):
package_dict['extras'].append({
@@ -38,6 +38,22 @@ def dcat_to_ckan(dcat_dict):
'value': dcat_publisher.get('identifier') # This could be a URI like https://ror.org/05wg1m734
})
+ dcat_creator = dcat_dict.get('creator')
+ if isinstance(dcat_creator, basestring):
+ package_dict['extras'].append({'key': 'dcat_creator_name', 'value': dcat_creator})
+ elif isinstance(dcat_creator, dict) and dcat_creator.get('name'):
+ if dcat_creator.get('name'):
+ package_dict['extras'].append({'key': 'dcat_creator_name', 'value': dcat_creator.get('name')})
+
+ if dcat_creator.get('email'):
+ package_dict['extras'].append({'key': 'dcat_creator_email', 'value': dcat_creator.get('email')})
+
+ if dcat_creator.get('identifier'):
+ package_dict['extras'].append({
+ 'key': 'dcat_creator_id',
+ 'value': dcat_creator.get('identifier')
+ })
+
package_dict['extras'].append({
'key': 'language',
'value': ','.join(dcat_dict.get('language', []))
@@ -63,20 +79,20 @@ def dcat_to_ckan(dcat_dict):
def ckan_to_dcat(package_dict):
-
dcat_dict = {}
dcat_dict['title'] = package_dict.get('title')
dcat_dict['description'] = package_dict.get('notes')
dcat_dict['landingPage'] = package_dict.get('url')
-
+ # Keywords
dcat_dict['keyword'] = []
for tag in package_dict.get('tags', []):
dcat_dict['keyword'].append(tag['name'])
-
+ # Publisher
dcat_dict['publisher'] = {}
+ dcat_dict['creator'] = {}
for extra in package_dict.get('extras', []):
if extra['key'] in ['dcat_issued', 'dcat_modified']:
@@ -85,19 +101,41 @@ def ckan_to_dcat(package_dict):
elif extra['key'] == 'language':
dcat_dict['language'] = extra['value'].split(',')
+ # Publisher fields
elif extra['key'] == 'dcat_publisher_name':
dcat_dict['publisher']['name'] = extra['value']
elif extra['key'] == 'dcat_publisher_email':
- dcat_dict['publisher']['mbox'] = extra['value']
+ dcat_dict['publisher']['email'] = extra['value']
+
+ elif extra['key'] == 'dcat_publisher_id':
+ dcat_dict['publisher']['identifier'] = extra['value']
+
+ # Creator fields
+ elif extra['key'] == 'dcat_creator_name':
+ dcat_dict['creator']['name'] = extra['value']
+ elif extra['key'] == 'dcat_creator_email':
+ dcat_dict['creator']['email'] = extra['value']
+
+ elif extra['key'] == 'dcat_creator_id':
+ dcat_dict['creator']['identifier'] = extra['value']
+
+ # Identifier
elif extra['key'] == 'guid':
dcat_dict['identifier'] = extra['value']
+ # Fallback for publisher (if no name in extras, use maintainer)
if not dcat_dict['publisher'].get('name') and package_dict.get('maintainer'):
dcat_dict['publisher']['name'] = package_dict.get('maintainer')
if package_dict.get('maintainer_email'):
- dcat_dict['publisher']['mbox'] = package_dict.get('maintainer_email')
+ dcat_dict['publisher']['email'] = package_dict.get('maintainer_email')
+
+ # Fallback for creator (if no name in extras, optionally use author)
+ if not dcat_dict['creator'].get('name') and package_dict.get('author'):
+ dcat_dict['creator']['name'] = package_dict.get('author')
+ if package_dict.get('author_email'):
+ dcat_dict['creator']['email'] = package_dict.get('author_email')
dcat_dict['distribution'] = []
for resource in package_dict.get('resources', []):
diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py
index fd5af492..9169abbf 100644
--- a/ckanext/dcat/profiles/base.py
+++ b/ckanext/dcat/profiles/base.py
@@ -419,58 +419,37 @@ def _insert_or_update_temporal(self, dataset_dict, key, value):
else:
dataset_dict["extras"].append({"key": key, "value": value})
- def _publisher(self, subject, predicate):
+ def _agent_details(self, subject, predicate):
"""
- Returns a dict with details about a dct:publisher entity, a foaf:Agent
+ Returns a dict with details about a dct:publisher or dct:creator entity, a foaf:Agent
Both subject and predicate must be rdflib URIRef or BNode objects
Examples:
-
+ or
Publishing Organization for dataset 1
contact@some.org
http://some.org
-
-
- {
- 'uri': 'http://orgs.vocab.org/some-org',
- 'name': 'Publishing Organization for dataset 1',
- 'email': 'contact@some.org',
- 'url': 'http://some.org',
- 'type': 'http://purl.org/adms/publishertype/NonProfitOrganisation',
- }
-
-
-
- {
- 'uri': 'http://publications.europa.eu/resource/authority/corporate-body/EURCOU'
- }
- Returns keys for uri, name, email, url and type with the values set to
- an empty string if they could not be found
+ Returns keys for uri, name, email, url, type, and identifier with the values set to
+ an empty string if they could not be found.
"""
- publisher = {}
+ agent_details = {}
for agent in self.g.objects(subject, predicate):
+ agent_details["uri"] = str(agent) if isinstance(agent, term.URIRef) else ""
+ agent_details["name"] = self._object_value(agent, FOAF.name)
+ agent_details["email"] = self._object_value(agent, FOAF.mbox)
+ agent_details["url"] = self._object_value(agent, FOAF.homepage)
+ agent_details["type"] = self._object_value(agent, DCT.type)
+ agent_details['identifier'] = self._object_value(agent, DCT.identifier)
- publisher["uri"] = str(agent) if isinstance(agent, term.URIRef) else ""
-
- publisher["name"] = self._object_value(agent, FOAF.name)
-
- publisher["email"] = self._object_value(agent, FOAF.mbox)
-
- publisher["url"] = self._object_value(agent, FOAF.homepage)
-
- publisher["type"] = self._object_value(agent, DCT.type)
-
- publisher['identifier'] = self._object_value(agent, DCT.identifier)
-
- return publisher
+ return agent_details
def _contact_details(self, subject, predicate):
"""
@@ -1136,7 +1115,7 @@ def _extract_catalog_dict(self, catalog_ref):
out.append(
{
"key": "source_catalog_publisher",
- "value": json.dumps(self._publisher(catalog_ref, DCT.publisher)),
+ "value": json.dumps(self._agent_details(catalog_ref, DCT.publisher)),
}
)
return out
diff --git a/ckanext/dcat/profiles/euro_dcat_ap_base.py b/ckanext/dcat/profiles/euro_dcat_ap_base.py
index 28b476bb..f1db48b6 100644
--- a/ckanext/dcat/profiles/euro_dcat_ap_base.py
+++ b/ckanext/dcat/profiles/euro_dcat_ap_base.py
@@ -34,7 +34,6 @@
config = toolkit.config
-
DISTRIBUTION_LICENSE_FALLBACK_CONFIG = "ckanext.dcat.resource.inherit.license"
@@ -122,13 +121,21 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):
)
# Publisher
- publisher = self._publisher(dataset_ref, DCT.publisher)
+ publisher = self._agent_details(dataset_ref, DCT.publisher)
for key in ("uri", "name", "email", "url", "type", "identifier"):
if publisher.get(key):
dataset_dict["extras"].append(
{"key": "publisher_{0}".format(key), "value": publisher.get(key)}
)
+ # Creator
+ creator = self._agent_details(dataset_ref, DCT.creator)
+ for key in ("uri", "name", "email", "url", "type", "identifier"):
+ if creator.get(key):
+ dataset_dict["extras"].append(
+ {"key": "creator_{0}".format(key), "value": creator.get(key)}
+ )
+
# Temporal
start, end = self._time_interval(dataset_ref, DCT.temporal)
if start:
@@ -250,7 +257,6 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):
"publisher_name",
"publisher_email",
):
-
extra["key"] = "dcat_" + extra["key"]
if extra["key"] == "language":
@@ -412,6 +418,48 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
]
self._add_triples_from_dict(publisher_details, publisher_ref, items)
+ # Creator
+ creator_ref = None
+
+ if dataset_dict.get("creator"):
+ # Scheming publisher field: will be handled in a separate profile
+ pass
+ elif any(
+ [
+ self._get_dataset_value(dataset_dict, "creator_uri"),
+ self._get_dataset_value(dataset_dict, "creator_name"),
+ ]
+ ):
+ # Legacy creator_* extras
+ creator_uri = self._get_dataset_value(dataset_dict, "creator_uri")
+ creator_name = self._get_dataset_value(dataset_dict, "creator_name")
+ if creator_uri:
+ creator_ref = CleanedURIRef(creator_uri)
+ else:
+ # No creator_uri
+ creator_ref = BNode()
+
+ creator_details = {
+ "name": creator_name,
+ "email": self._get_dataset_value(dataset_dict, "creator_email"),
+ "url": self._get_dataset_value(dataset_dict, "creator_url"),
+ "type": self._get_dataset_value(dataset_dict, "creator_type"),
+ "identifier": self._get_dataset_value(dataset_dict, "creator_identifier"),
+ }
+
+ # Add to graph
+ if creator_ref:
+ g.add((creator_ref, RDF.type, FOAF.Agent))
+ g.add((dataset_ref, DCT.creator, creator_ref)) # Use DCT.creator for creator
+ items = [
+ ("name", FOAF.name, None, Literal),
+ ("email", FOAF.mbox, None, Literal),
+ ("url", FOAF.homepage, None, URIRef),
+ ("type", DCT.type, None, URIRefOrLiteral),
+ ("identifier", DCT.identifier, None, URIRefOrLiteral),
+ ]
+ self._add_triples_from_dict(creator_details, creator_ref, items)
+
# Temporal
start = self._get_dataset_value(dataset_dict, "temporal_start")
end = self._get_dataset_value(dataset_dict, "temporal_end")
diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py
index 8d0ffb79..62dcf61d 100644
--- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py
+++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py
@@ -123,12 +123,8 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref):
"""
Add triples to the graph from new repeating subfields
"""
-
- def _not_empty_dict(data_dict):
- return any(data_dict.values())
-
contact = dataset_dict.get("contact")
- if isinstance(contact, list) and len(contact) and _not_empty_dict(contact[0]):
+ if isinstance(contact, list) and len(contact) and self._not_empty_dict(contact[0]):
for item in contact:
contact_uri = item.get("uri")
if contact_uri:
@@ -150,56 +146,14 @@ def _not_empty_dict(data_dict):
value_modifier=self._add_mailto,
)
- publisher = dataset_dict.get("publisher")
- if (
- isinstance(publisher, list)
- and len(publisher)
- and _not_empty_dict(publisher[0])
- ):
- publisher = publisher[0]
- publisher_uri = publisher.get("uri")
- if publisher_uri:
- publisher_ref = CleanedURIRef(publisher_uri)
- else:
- publisher_ref = BNode()
-
- self.g.add((publisher_ref, RDF.type, FOAF.Agent))
- self.g.add((dataset_ref, DCT.publisher, publisher_ref))
-
- self._add_triple_from_dict(publisher, publisher_ref, FOAF.name, "name")
- self._add_triple_from_dict(
- publisher, publisher_ref, FOAF.homepage, "url", _type=URIRef
- )
- self._add_triple_from_dict(
- publisher,
- publisher_ref,
- DCT.type,
- "type",
- _type=URIRefOrLiteral,
- # TODO: fix prefLabel stuff
- # _class=SKOS.Concept,
- )
- self._add_triple_from_dict(
- publisher,
- publisher_ref,
- VCARD.hasEmail,
- "email",
- _type=URIRef,
- value_modifier=self._add_mailto,
- )
- self._add_triple_from_dict(
- publisher,
- publisher_ref,
- DCT.identifier,
- "identifier",
- _type=URIRefOrLiteral
- )
+ self._add_agent(dataset_ref, dataset_dict, "publisher", DCT.publisher)
+ self._add_agent(dataset_ref, dataset_dict, "creator", DCT.creator)
temporal = dataset_dict.get("temporal_coverage")
if (
isinstance(temporal, list)
and len(temporal)
- and _not_empty_dict(temporal[0])
+ and self._not_empty_dict(temporal[0])
):
for item in temporal:
temporal_ref = BNode()
@@ -211,7 +165,7 @@ def _not_empty_dict(data_dict):
self.g.add((dataset_ref, DCT.temporal, temporal_ref))
spatial = dataset_dict.get("spatial_coverage")
- if isinstance(spatial, list) and len(spatial) and _not_empty_dict(spatial[0]):
+ if isinstance(spatial, list) and len(spatial) and self._not_empty_dict(spatial[0]):
for item in spatial:
if item.get("uri"):
spatial_ref = CleanedURIRef(item["uri"])
@@ -243,3 +197,57 @@ def _not_empty_dict(data_dict):
)
except ValueError:
pass
+
+ def _add_agent(self, dataset_ref, dataset_dict, agent_key, rdf_predicate):
+ """
+ Adds an agent (publisher or creator) to the RDF graph.
+
+ :param dataset_ref: The RDF reference of the dataset
+ :param dataset_dict: The dataset dictionary containing agent information
+ :param agent_key: 'publisher' or 'creator' to specify the agent
+ :param rdf_predicate: The RDF predicate (DCT.publisher or DCT.creator)
+ """
+ agent = dataset_dict.get(agent_key)
+ if (
+ isinstance(agent, list)
+ and len(agent)
+ and self._not_empty_dict(agent[0])
+ ):
+ agent = agent[0]
+ agent_uri = agent.get("uri")
+ if agent_uri:
+ agent_ref = CleanedURIRef(agent_uri)
+ else:
+ agent_ref = BNode()
+
+ self.g.add((agent_ref, RDF.type, FOAF.Agent))
+ self.g.add((dataset_ref, rdf_predicate, agent_ref))
+
+ self._add_triple_from_dict(agent, agent_ref, FOAF.name, "name")
+ self._add_triple_from_dict(agent, agent_ref, FOAF.homepage, "url", _type=URIRef)
+ self._add_triple_from_dict(
+ agent,
+ agent_ref,
+ DCT.type,
+ "type",
+ _type=URIRefOrLiteral,
+ )
+ self._add_triple_from_dict(
+ agent,
+ agent_ref,
+ VCARD.hasEmail,
+ "email",
+ _type=URIRef,
+ value_modifier=self._add_mailto,
+ )
+ self._add_triple_from_dict(
+ agent,
+ agent_ref,
+ DCT.identifier,
+ "identifier",
+ _type=URIRefOrLiteral
+ )
+
+ @staticmethod
+ def _not_empty_dict(data_dict):
+ return any(data_dict.values())
diff --git a/ckanext/dcat/profiles/schemaorg.py b/ckanext/dcat/profiles/schemaorg.py
index 88e30be6..b397f44d 100644
--- a/ckanext/dcat/profiles/schemaorg.py
+++ b/ckanext/dcat/profiles/schemaorg.py
@@ -50,7 +50,10 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
self._list_fields_graph(dataset_ref, dataset_dict)
# Publisher
- self._publisher_graph(dataset_ref, dataset_dict)
+ self._agent_graph(dataset_ref, dataset_dict, SCHEMA.publisher, "publisher")
+
+ # Creator
+ self._agent_graph(dataset_ref, dataset_dict, SCHEMA.creator, "creator")
# Temporal
self._temporal_graph(dataset_ref, dataset_dict)
@@ -156,74 +159,73 @@ def _list_fields_graph(self, dataset_ref, dataset_dict):
]
self._add_list_triples_from_dict(dataset_dict, dataset_ref, items)
- def _publisher_graph(self, dataset_ref, dataset_dict):
+ def _agent_graph(self, dataset_ref, dataset_dict, agent_type, schema_property_prefix):
+ uri_key = f"{schema_property_prefix}_uri"
+ name_key = f"{schema_property_prefix}_name"
+ url_key = f"{schema_property_prefix}_url"
+ email_key = f"{schema_property_prefix}_email"
+ identifier_key = f"{schema_property_prefix}_identifier"
+
if any(
[
- self._get_dataset_value(dataset_dict, "publisher_uri"),
- self._get_dataset_value(dataset_dict, "publisher_name"),
+ self._get_dataset_value(dataset_dict, uri_key),
+ self._get_dataset_value(dataset_dict, name_key),
dataset_dict.get("organization"),
]
):
-
- publisher_uri = self._get_dataset_value(dataset_dict, "publisher_uri")
- publisher_uri_fallback = publisher_uri_organization_fallback(dataset_dict)
- publisher_name = self._get_dataset_value(dataset_dict, "publisher_name")
- if publisher_uri:
- publisher_details = CleanedURIRef(publisher_uri)
- elif not publisher_name and publisher_uri_fallback:
- # neither URI nor name are available, use organization as fallback
- publisher_details = CleanedURIRef(publisher_uri_fallback)
+ agent_uri = self._get_dataset_value(dataset_dict, uri_key)
+ agent_uri_fallback = publisher_uri_organization_fallback(dataset_dict)
+ agent_name = self._get_dataset_value(dataset_dict, name_key)
+
+ if agent_uri:
+ agent_details = CleanedURIRef(agent_uri)
+ elif not agent_name and agent_uri_fallback:
+ agent_details = CleanedURIRef(agent_uri_fallback)
else:
- # No publisher_uri
- publisher_details = BNode()
+ agent_details = BNode()
- self.g.add((publisher_details, RDF.type, SCHEMA.Organization))
- self.g.add((dataset_ref, SCHEMA.publisher, publisher_details))
+ self.g.add((agent_details, RDF.type, SCHEMA.Organization))
+ self.g.add((dataset_ref, agent_type, agent_details))
- # In case no name and URI are available, again fall back to organization.
- # If no name but an URI is available, the name literal remains empty to
- # avoid mixing organization and dataset values.
if (
- not publisher_name
- and not publisher_uri
+ not agent_name
+ and not agent_uri
and dataset_dict.get("organization")
):
- publisher_name = dataset_dict["organization"]["title"]
- self.g.add((publisher_details, SCHEMA.name, Literal(publisher_name)))
+ agent_name = dataset_dict["organization"]["title"]
+ self.g.add((agent_details, SCHEMA.name, Literal(agent_name)))
contact_point = BNode()
self.g.add((contact_point, RDF.type, SCHEMA.ContactPoint))
- self.g.add((publisher_details, SCHEMA.contactPoint, contact_point))
-
+ self.g.add((agent_details, SCHEMA.contactPoint, contact_point))
self.g.add((contact_point, SCHEMA.contactType, Literal("customer service")))
- publisher_url = self._get_dataset_value(dataset_dict, "publisher_url")
- if not publisher_url and dataset_dict.get("organization"):
- publisher_url = dataset_dict["organization"].get("url") or config.get(
+ agent_url = self._get_dataset_value(dataset_dict, url_key)
+ if not agent_url and dataset_dict.get("organization"):
+ agent_url = dataset_dict["organization"].get("url") or config.get(
"ckan.site_url"
)
+ self.g.add((contact_point, SCHEMA.url, Literal(agent_url)))
- self.g.add((contact_point, SCHEMA.url, Literal(publisher_url)))
items = [
(
- "publisher_email",
+ email_key,
SCHEMA.email,
["contact_email", "maintainer_email", "author_email"],
Literal,
),
(
- "publisher_name",
+ name_key,
SCHEMA.name,
["contact_name", "maintainer", "author"],
Literal,
),
]
-
self._add_triples_from_dict(dataset_dict, contact_point, items)
- publisher_identifier = self._get_dataset_value(dataset_dict, "publisher_identifier")
- if publisher_identifier:
- self.g.add((publisher_details, SCHEMA.identifier, Literal(publisher_identifier)))
+ agent_identifier = self._get_dataset_value(dataset_dict, identifier_key)
+ if agent_identifier:
+ self.g.add((agent_details, SCHEMA.identifier, Literal(agent_identifier)))
def _temporal_graph(self, dataset_ref, dataset_dict):
start = self._get_dataset_value(dataset_dict, "temporal_start")
diff --git a/ckanext/dcat/schemas/dcat_ap_full.yaml b/ckanext/dcat/schemas/dcat_ap_full.yaml
index 36508d9a..8a831852 100644
--- a/ckanext/dcat/schemas/dcat_ap_full.yaml
+++ b/ckanext/dcat/schemas/dcat_ap_full.yaml
@@ -72,6 +72,38 @@ dataset_fields:
help_text: Unique identifier for the publisher, such as a ROR ID.
help_text: Entity responsible for making the dataset available.
+- field_name: creator
+ label: Creator
+ repeating_label: Creator
+ repeating_once: true
+ repeating_subfields:
+
+ - field_name: uri
+ label: URI
+ help_text: URI of the creator, if available.
+
+ - field_name: name
+ label: Name
+ help_text: Name of the entity or person who created the dataset.
+
+ - field_name: email
+ label: Email
+ display_snippet: email.html
+ help_text: Contact email of the creator.
+
+ - field_name: url
+ label: URL
+ display_snippet: link.html
+ help_text: URL for more information about the creator.
+
+ - field_name: type
+ label: Type
+ help_text: Type of creator (e.g., Organization, Person).
+
+ - field_name: identifier
+ label: Identifier
+ help_text: Unique identifier for the creator, such as an ORCID or ROR ID.
+
- field_name: license_id
label: License
form_snippet: license.html
diff --git a/ckanext/dcat/schemas/dcat_ap_recommended.yaml b/ckanext/dcat/schemas/dcat_ap_recommended.yaml
index 883f337d..daa7bce4 100644
--- a/ckanext/dcat/schemas/dcat_ap_recommended.yaml
+++ b/ckanext/dcat/schemas/dcat_ap_recommended.yaml
@@ -70,7 +70,6 @@ dataset_fields:
- field_name: identifier
label: Identifier
help_text: Unique identifier for the publisher, such as a ROR ID.
-
help_text: Entity responsible for making the dataset available.
- field_name: license_id
diff --git a/ckanext/dcat/tests/profiles/base/test_base_profile.py b/ckanext/dcat/tests/profiles/base/test_base_profile.py
index 221c772c..52bf8bc1 100644
--- a/ckanext/dcat/tests/profiles/base/test_base_profile.py
+++ b/ckanext/dcat/tests/profiles/base/test_base_profile.py
@@ -660,7 +660,7 @@ def test_publisher_foaf(self):
p = RDFProfile(g)
- publisher = p._publisher(URIRef('http://example.org'), DCT.publisher)
+ publisher = p._agent_details(URIRef('http://example.org'), DCT.publisher)
assert publisher['uri'] == 'http://orgs.vocab.org/some-org'
assert publisher['name'] == 'Publishing Organization for dataset 1'
@@ -688,7 +688,7 @@ def test_publisher_ref(self):
p = RDFProfile(g)
- publisher = p._publisher(URIRef('http://example.org'), DCT.publisher)
+ publisher = p._agent_details(URIRef('http://example.org'), DCT.publisher)
assert publisher['uri'] == 'http://orgs.vocab.org/some-org'
diff --git a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py
index d2b84ae4..3300f8c3 100644
--- a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py
+++ b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py
@@ -114,6 +114,12 @@ def _get_extra_value_as_list(key):
assert _get_extra_value('publisher_url') == 'http://some.org'
assert _get_extra_value('publisher_type') == 'http://purl.org/adms/publishertype/NonProfitOrganisation'
assert _get_extra_value('publisher_identifier') == 'https://ror.org/05wg1m734'
+ assert _get_extra_value('creator_uri') == 'http://example.org/creator-org'
+ assert _get_extra_value('creator_name') == 'Creating Organization for dataset 1'
+ assert _get_extra_value('creator_email') == 'creator@example.org'
+ assert _get_extra_value('creator_url') == 'http://example.org'
+ assert _get_extra_value('creator_type') == 'http://purl.org/adms/publishertype/NonProfitOrganisation'
+ assert _get_extra_value('creator_identifier') == 'https://ror.org/05wg1m735'
assert _get_extra_value('contact_name') == 'Point of Contact'
# mailto gets removed for storage and is added again on output
assert _get_extra_value('contact_email') == 'contact@some.org'
diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py
index b249d600..8095202f 100644
--- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py
+++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_scheming_support.py
@@ -97,6 +97,15 @@ def test_e2e_ckan_to_dcat(self):
"identifier": "http://example.org/publisher-id",
},
],
+ "creator": [
+ {
+ "name": "Test Creator",
+ "email": "creator@example.org",
+ "url": "https://example.org/creator",
+ "type": "person",
+ "identifier": "http://example.org/creator-id",
+ }
+ ],
"temporal_coverage": [
{"start": "1905-03-01", "end": "2013-01-05"},
{"start": "2024-04-10", "end": "2024-05-29"},
@@ -309,6 +318,38 @@ def test_e2e_ckan_to_dcat(self):
URIRef(dataset_dict["publisher"][0]["identifier"])
)
+ creator = [t for t in g.triples((dataset_ref, DCT.creator, None))]
+
+ assert len(creator) == 1
+ assert self._triple(
+ g, creator[0][2], FOAF.name, dataset_dict["creator"][0]["name"]
+ )
+ assert self._triple(
+ g,
+ creator[0][2],
+ VCARD.hasEmail,
+ URIRef("mailto:" + dataset_dict["creator"][0]["email"]),
+ )
+ assert self._triple(
+ g,
+ creator[0][2],
+ FOAF.homepage,
+ URIRef(dataset_dict["creator"][0]["url"]),
+ )
+ assert self._triple(
+ g,
+ creator[0][2],
+ DCT.type,
+ dataset_dict["creator"][0]["type"],
+ )
+ assert self._triple(
+ g,
+ creator[0][2],
+ DCT.identifier,
+ URIRef(dataset_dict["creator"][0]["identifier"])
+ )
+
+
temporal = [t for t in g.triples((dataset_ref, DCT.temporal, None))]
assert len(temporal) == len(dataset["temporal_coverage"])
diff --git a/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py b/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py
index eb9a5eb4..dec37644 100644
--- a/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py
+++ b/ckanext/dcat/tests/profiles/schemaorg/test_schemaorg_profile_serialize.py
@@ -90,7 +90,7 @@ def test_graph_from_dataset(self):
for value in values:
assert self._triple(g, dataset_ref, item[1], item[2](value))
- def test_publisher_extras(self):
+ def test_publisher_and_creator_extras(self):
dataset = {
'id': '4b6fe9ca-dc77-4cec-92a4-55c6624a5bd6',
'name': 'test-dataset',
@@ -100,16 +100,24 @@ def test_publisher_extras(self):
'title': 'Example Publisher from Org',
},
'extras': [
+ # Publisher fields
{'key': 'publisher_uri', 'value': 'http://example.com/publisher'},
{'key': 'publisher_name', 'value': 'Example Publisher'},
{'key': 'publisher_email', 'value': 'publisher@example.com'},
{'key': 'publisher_url', 'value': 'http://example.com/publisher/home'},
{'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'},
{'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'},
- ]
-
+ # Creator fields
+ {'key': 'creator_uri', 'value': 'http://example.com/creator'},
+ {'key': 'creator_name', 'value': 'Example Creator'},
+ {'key': 'creator_email', 'value': 'creator@example.com'},
+ {'key': 'creator_url', 'value': 'http://example.com/creator/home'},
+ {'key': 'creator_type', 'value': 'http://purl.org/adms/publishertype/NonProfitOrganisation'},
+ {'key': 'creator_identifier', 'value': 'https://ror.org/05wg1m735'},
+ ]
}
+
extras = self._extras(dataset)
s = RDFSerializer(profiles=['schemaorg'])
@@ -117,6 +125,7 @@ def test_publisher_extras(self):
dataset_ref = s.graph_from_dataset(dataset)
+ # Publisher validation
publisher = self._triple(g, dataset_ref, SCHEMA.publisher, None)[2]
assert publisher
assert str(publisher) == extras['publisher_uri']
@@ -124,13 +133,29 @@ def test_publisher_extras(self):
assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name'])
assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier'])
- contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2]
- assert contact_point
- assert self._triple(g, contact_point, RDF.type, SCHEMA.ContactPoint)
- assert self._triple(g, contact_point, SCHEMA.name, extras['publisher_name'])
- assert self._triple(g, contact_point, SCHEMA.email, extras['publisher_email'])
- assert self._triple(g, contact_point, SCHEMA.url, extras['publisher_url'])
- assert self._triple(g, contact_point, SCHEMA.contactType, 'customer service')
+ contact_point_publisher = self._triple(g, publisher, SCHEMA.contactPoint, None)[2]
+ assert contact_point_publisher
+ assert self._triple(g, contact_point_publisher, RDF.type, SCHEMA.ContactPoint)
+ assert self._triple(g, contact_point_publisher, SCHEMA.name, extras['publisher_name'])
+ assert self._triple(g, contact_point_publisher, SCHEMA.email, extras['publisher_email'])
+ assert self._triple(g, contact_point_publisher, SCHEMA.url, extras['publisher_url'])
+ assert self._triple(g, contact_point_publisher, SCHEMA.contactType, 'customer service')
+
+ # Creator validation
+ creator = self._triple(g, dataset_ref, SCHEMA.creator, None)[2]
+ assert creator
+ assert str(creator) == extras['creator_uri']
+ assert self._triple(g, creator, RDF.type, SCHEMA.Organization)
+ assert self._triple(g, creator, SCHEMA.name, extras['creator_name'])
+ assert self._triple(g, creator, SCHEMA.identifier, extras['creator_identifier'])
+
+ contact_point_creator = self._triple(g, creator, SCHEMA.contactPoint, None)[2]
+ assert contact_point_creator
+ assert self._triple(g, contact_point_creator, RDF.type, SCHEMA.ContactPoint)
+ assert self._triple(g, contact_point_creator, SCHEMA.name, extras['creator_name'])
+ assert self._triple(g, contact_point_creator, SCHEMA.email, extras['creator_email'])
+ assert self._triple(g, contact_point_creator, SCHEMA.url, extras['creator_url'])
+ assert self._triple(g, contact_point_creator, SCHEMA.contactType, 'customer service')
def test_publisher_no_uri(self):
dataset = {
diff --git a/ckanext/dcat/utils.py b/ckanext/dcat/utils.py
index de17e9ad..d5fd1749 100644
--- a/ckanext/dcat/utils.py
+++ b/ckanext/dcat/utils.py
@@ -79,6 +79,12 @@ def field_labels():
'publisher_url': _('Publisher URL'),
'publisher_type': _('Publisher type'),
'publisher_identifier': _('Publisher identifier'),
+ 'creator_uri': _('Creator URI'),
+ 'creator_name': _('Creator name'),
+ 'creator_email': _('Creator email'),
+ 'creator_url': _('Creator URL'),
+ 'creator_type': _('Creator type'),
+ 'creator_identifier': _('Creator identifier'),
'contact_name': _('Contact name'),
'contact_email': _('Contact email'),
'contact_uri': _('Contact URI'),
diff --git a/docs/mapping.md b/docs/mapping.md
index 89057ab2..ce4048c4 100644
--- a/docs/mapping.md
+++ b/docs/mapping.md
@@ -54,6 +54,12 @@ some cases the way metadata is stored internally and presented at the CKAN API l
| foaf:Agent | foaf:homepage | custom:publisher_url | | text | |
| foaf:Agent | dct:type | custom:publisher_type | | text | |
| foaf:Agent | dct:identifier | custom:publisher_id | | text |
+| dcat:Dataset | dct:creator | custom:creator_uri | | text | See [URIs](mapping.md#uris) and [Publisher](#contact-points-and-publisher) |
+| foaf:Agent | foaf:name | custom:creator_name | | text | |
+| foaf:Agent | foaf:mbox | custom:creator_email | organization:title | text | |
+| foaf:Agent | foaf:homepage | custom:creator_url | | text | |
+| foaf:Agent | dct:type | custom:creator_type | | text | |
+| foaf:Agent | dct:identifier | custom:creator_id | | text |
| dcat:Dataset | dcat:contactPoint | custom:contact_uri | | text | See [URIs](mapping.md#uris) and [Contact points](#contact-points-and-publisher) |
| vcard:Kind | vcard:fn | custom:contact_name | maintainer, author | text | |
| vcard:Kind | vcard:hasEmail | custom:contact_email | maintainer_email, author_email | text | |
diff --git a/examples/ckan/ckan_dataset.json b/examples/ckan/ckan_dataset.json
index a05f989a..ccdcb2d9 100644
--- a/examples/ckan/ckan_dataset.json
+++ b/examples/ckan/ckan_dataset.json
@@ -8,6 +8,8 @@
{"key": "guid", "value": "9df8df51-63db-37a8-e044-0003ba9b0d98"},
{"key": "dcat_publisher_name", "value": "Geological Society"},
{"key": "dcat_publisher_email", "value": "info@gs.org"},
+ {"key": "dcat_creator_name", "value": "John Doe"},
+ {"key": "dcat_creator_email", "value": "johndoe@example.com"},
{"key": "language", "value": "en,es,ca"}
],
"resources": [{"id": "b1e0b666-b7f4-44c1-9b16-56c78e86b66a",
diff --git a/examples/ckan/dataset.json b/examples/ckan/dataset.json
index c1d887d1..ebcc1055 100644
--- a/examples/ckan/dataset.json
+++ b/examples/ckan/dataset.json
@@ -9,11 +9,17 @@
"keyword" : ["exploration", "geochemical-exploration", "geochemical-maps", "geochemistry", "geology", "nercddc", "regional-geology"],
"publisher": {
"name": "Geological Society",
- "mbox": "info@gs.org"
+ "email": "info@gs.org"
},
- "distribution": [{"accessURL": "http://www.bgs.ac.uk/gbase/geochemcd/home.html",
- "byteSize": null,
- "description": "Resource locator",
- "format": "text/html",
- "title": ""}]
+ "creator": {
+ "name": "John Doe",
+ "email": "johndoe@example.com"
+ },
+ "distribution": [{
+ "accessURL": "http://www.bgs.ac.uk/gbase/geochemcd/home.html",
+ "byteSize": null,
+ "description": "Resource locator",
+ "format": "text/html",
+ "title": ""
+ }]
}
diff --git a/examples/ckan/full_ckan_dataset.json b/examples/ckan/full_ckan_dataset.json
index 24a17bcc..e62927c7 100644
--- a/examples/ckan/full_ckan_dataset.json
+++ b/examples/ckan/full_ckan_dataset.json
@@ -1,6 +1,6 @@
{
- "author": null,
- "author_email": null,
+ "author": "John Doe",
+ "author_email": "johndoe@example.com",
"extras": [
{
"__extras": {
diff --git a/examples/dcat/dataset.rdf b/examples/dcat/dataset.rdf
index 9e117752..8cd9619f 100644
--- a/examples/dcat/dataset.rdf
+++ b/examples/dcat/dataset.rdf
@@ -78,6 +78,15 @@
+
+
+ Creating Organization for dataset 1
+ creator@example.org
+ http://example.org
+
+
+
+
Some website