Skip to content

Commit

Permalink
Merge pull request #301 from Health-RI/add-dct-identifier-to-publisher
Browse files Browse the repository at this point in the history
Add dct identifier to publisher
  • Loading branch information
amercader authored Sep 12, 2024
2 parents eb83433 + 6bd994a commit d4dfab8
Show file tree
Hide file tree
Showing 15 changed files with 122 additions and 71 deletions.
10 changes: 9 additions & 1 deletion ckanext/dcat/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,15 @@ def dcat_to_ckan(dcat_dict):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher})
elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')})
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})

if dcat_publisher.get('mbox'):
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})

if dcat_publisher.get('identifier'):
package_dict['extras'].append({
'key': 'dcat_publisher_id',
'value': dcat_publisher.get('identifier') # This could be a URI like https://ror.org/05wg1m734
})

package_dict['extras'].append({
'key': 'language',
Expand Down
3 changes: 2 additions & 1 deletion ckanext/dcat/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,8 @@ def _get_from_extra(key):
('name', Literal, FOAF.name, True,),
('email', Literal, FOAF.mbox, False,),
('url', URIRef, FOAF.homepage,False,),
('type', Literal, DCT.type, False,))
('type', Literal, DCT.type, False,),
('identifier', URIRef, DCT.identifier, False,))

_pub = _get_from_extra('source_catalog_publisher')
if _pub:
Expand Down
2 changes: 2 additions & 0 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,8 @@ def _publisher(self, subject, predicate):

publisher["type"] = self._object_value(agent, DCT.type)

publisher['identifier'] = self._object_value(agent, DCT.identifier)

return publisher

def _contact_details(self, subject, predicate):
Expand Down
5 changes: 4 additions & 1 deletion ckanext/dcat/profiles/euro_dcat_ap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):

# Publisher
publisher = self._publisher(dataset_ref, DCT.publisher)
for key in ("uri", "name", "email", "url", "type"):
for key in ("uri", "name", "email", "url", "type", "identifier"):
if publisher.get(key):
dataset_dict["extras"].append(
{"key": "publisher_{0}".format(key), "value": publisher.get(key)}
Expand Down Expand Up @@ -372,6 +372,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
"email": self._get_dataset_value(dataset_dict, "publisher_email"),
"url": self._get_dataset_value(dataset_dict, "publisher_url"),
"type": self._get_dataset_value(dataset_dict, "publisher_type"),
"identifier": self._get_dataset_value(dataset_dict, "publisher_identifier"),
}
elif dataset_dict.get("organization"):
# Fall back to dataset org
Expand All @@ -396,6 +397,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
"email": org_dict.get("email"),
"url": org_dict.get("url"),
"type": org_dict.get("dcat_type"),
"identifier": org_dict.get("identifier"),
}
# Add to graph
if publisher_ref:
Expand All @@ -406,6 +408,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
("email", FOAF.mbox, None, Literal),
("url", FOAF.homepage, None, URIRef),
("type", DCT.type, None, URIRefOrLiteral),
("identifier", DCT.identifier, None, URIRefOrLiteral),
]
self._add_triples_from_dict(publisher_details, publisher_ref, items)

Expand Down
7 changes: 7 additions & 0 deletions ckanext/dcat/profiles/euro_dcat_ap_scheming.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,13 @@ def _not_empty_dict(data_dict):
_type=URIRef,
value_modifier=self._add_mailto,
)
self._add_triple_from_dict(
publisher,
publisher_ref,
DCT.identifier,
"identifier",
_type=URIRefOrLiteral
)

temporal = dataset_dict.get("temporal_coverage")
if (
Expand Down
4 changes: 4 additions & 0 deletions ckanext/dcat/profiles/schemaorg.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ def _publisher_graph(self, dataset_ref, dataset_dict):

self._add_triples_from_dict(dataset_dict, contact_point, items)

publisher_identifier = self._get_dataset_value(dataset_dict, "publisher_identifier")
if publisher_identifier:
self.g.add((publisher_details, SCHEMA.identifier, Literal(publisher_identifier)))

def _temporal_graph(self, dataset_ref, dataset_dict):
start = self._get_dataset_value(dataset_dict, "temporal_start")
end = self._get_dataset_value(dataset_dict, "temporal_end")
Expand Down
4 changes: 4 additions & 0 deletions ckanext/dcat/schemas/dcat_ap_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ dataset_fields:

- field_name: type
label: Type

- field_name: identifier
label: Identifier
help_text: Unique identifier for the publisher, such as a ROR ID.
help_text: Entity responsible for making the dataset available.

- field_name: license_id
Expand Down
5 changes: 5 additions & 0 deletions ckanext/dcat/schemas/dcat_ap_recommended.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ dataset_fields:

- field_name: type
label: Type

- field_name: identifier
label: Identifier
help_text: Unique identifier for the publisher, such as a ROR ID.

help_text: Entity responsible for making the dataset available.

- field_name: license_id
Expand Down
2 changes: 2 additions & 0 deletions ckanext/dcat/tests/profiles/base/test_base_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ def test_publisher_foaf(self):
<foaf:mbox>contact@some.org</foaf:mbox>
<foaf:homepage>http://some.org</foaf:homepage>
<dct:type rdf:resource="http://purl.org/adms/publishertype/NonProfitOrganisation"/>
<dct:identifier rdf:resource="https://ror.org/05wg1m734"/>
</foaf:Organization>
</dct:publisher>
</rdfs:SomeClass>
Expand All @@ -666,6 +667,7 @@ def test_publisher_foaf(self):
assert publisher['email'] == 'contact@some.org'
assert publisher['url'] == 'http://some.org'
assert publisher['type'] == 'http://purl.org/adms/publishertype/NonProfitOrganisation'
assert publisher['identifier'] == 'https://ror.org/05wg1m734'

def test_publisher_ref(self):

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def _get_extra_value_as_list(key):
assert _get_extra_value('publisher_email') == 'contact@some.org'
assert _get_extra_value('publisher_url') == 'http://some.org'
assert _get_extra_value('publisher_type') == 'http://purl.org/adms/publishertype/NonProfitOrganisation'
assert _get_extra_value('publisher_identifier') == 'https://ror.org/05wg1m734'
assert _get_extra_value('contact_name') == 'Point of Contact'
# mailto gets removed for storage and is added again on output
assert _get_extra_value('contact_email') == 'contact@some.org'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def test_e2e_ckan_to_dcat(self):
"email": "publisher@example.org",
"url": "https://example.org",
"type": "public_body",
"identifier": "http://example.org/publisher-id",
},
],
"temporal_coverage": [
Expand Down Expand Up @@ -301,6 +302,12 @@ def test_e2e_ckan_to_dcat(self):
DCT.type,
dataset_dict["publisher"][0]["type"],
)
assert self._triple(
g,
publisher[0][2],
DCT.identifier,
URIRef(dataset_dict["publisher"][0]["identifier"])
)

temporal = [t for t in g.triples((dataset_ref, DCT.temporal, None))]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def test_publisher_extras(self):
{'key': 'publisher_email', 'value': 'publisher@example.com'},
{'key': 'publisher_url', 'value': 'http://example.com/publisher/home'},
{'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'},
{'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'},
]


Expand All @@ -121,6 +122,7 @@ def test_publisher_extras(self):
assert str(publisher) == extras['publisher_uri']
assert self._triple(g, publisher, RDF.type, SCHEMA.Organization)
assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name'])
assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier'])

contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2]
assert contact_point
Expand All @@ -144,6 +146,7 @@ def test_publisher_no_uri(self):
{'key': 'publisher_email', 'value': 'publisher@example.com'},
{'key': 'publisher_url', 'value': 'http://example.com/publisher/home'},
{'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'},
{'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'},
]
}
extras = self._extras(dataset)
Expand All @@ -158,6 +161,7 @@ def test_publisher_no_uri(self):
assert isinstance(publisher, BNode)
assert self._triple(g, publisher, RDF.type, SCHEMA.Organization)
assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name'])
assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier'])

contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2]
assert contact_point
Expand Down
1 change: 1 addition & 0 deletions ckanext/dcat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def field_labels():
'publisher_email': _('Publisher email'),
'publisher_url': _('Publisher URL'),
'publisher_type': _('Publisher type'),
'publisher_identifier': _('Publisher identifier'),
'contact_name': _('Contact name'),
'contact_email': _('Contact email'),
'contact_uri': _('Contact URI'),
Expand Down
Loading

0 comments on commit d4dfab8

Please sign in to comment.