Skip to content

Commit

Permalink
Fix merge errors
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed May 29, 2024
1 parent 214d853 commit 1bce834
Show file tree
Hide file tree
Showing 6 changed files with 388 additions and 146 deletions.
1 change: 1 addition & 0 deletions ckanext/dcat/profiles/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@

from .euro_dcat_ap import EuropeanDCATAPProfile
from .euro_dcat_ap_2 import EuropeanDCATAP2Profile
from .euro_dcat_ap_scheming import EuropeanDCATAPSchemingProfile
from .schemaorg import SchemaOrgProfile
93 changes: 84 additions & 9 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from rdflib.namespace import Namespace, RDF, XSD, SKOS, RDFS
from geomet import wkt, InvalidGeoJSONException

from ckantoolkit import config, url_for, asbool, get_action
from ckantoolkit import config, url_for, asbool, get_action, ObjectNotFound
from ckan.model.license import LicenseRegister
from ckan.lib.helpers import resource_formats
from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS
Expand Down Expand Up @@ -41,7 +41,7 @@
"spdx": SPDX,
}

PREFIX_MAILTO = u"mailto:"
PREFIX_MAILTO = "mailto:"

GEOJSON_IMT = "https://www.iana.org/assignments/media-types/application/vnd.geo+json"

Expand Down Expand Up @@ -105,11 +105,20 @@ class RDFProfile(object):
custom profiles
"""

def __init__(self, graph, compatibility_mode=False):
"""Class constructor
_dataset_schema = None

Graph is an rdflib.Graph instance.
# Cache for mappings of licenses URL/title to ID built when needed in
# _license().
_licenceregister_cache = None

# Cache for organization_show details (used for publisher fallback)
_org_cache: dict = {}

def __init__(self, graph, dataset_type="dataset", compatibility_mode=False):
"""Class constructor
Graph is an rdflib.Graph instance.
A scheming dataset type can be provided, in which case the scheming schema
will be loaded so it can be used by profiles.
In compatibility mode, some fields are modified to maintain
compatibility with previous versions of the ckanext-dcat parsers
(eg adding the `dcat_` prefix or storing comma separated lists instead
Expand All @@ -120,9 +129,17 @@ def __init__(self, graph, compatibility_mode=False):

self.compatibility_mode = compatibility_mode

# Cache for mappings of licenses URL/title to ID built when needed in
# _license().
self._licenceregister_cache = None
try:
schema_show = get_action("scheming_dataset_schema_show")
try:
schema = schema_show({}, {"type": dataset_type})
except ObjectNotFound:
raise ObjectNotFound(f"Unknown dataset schema: {dataset_type}")

self._dataset_schema = schema

except KeyError:
pass

def _datasets(self):
"""
Expand Down Expand Up @@ -707,6 +724,64 @@ def _add_spatial_to_dict(self, dataset_dict, key, spatial):
}
)

def _schema_field(self, key):
"""
Returns the schema field information if the provided key exists as a field in
the dataset schema (if one was provided)
"""
if not self._dataset_schema:
return None

for field in self._dataset_schema["dataset_fields"]:
if field["field_name"] == key:
return field

def _schema_resource_field(self, key):
"""
Returns the schema field information if the provided key exists as a field in
the resources fields of the dataset schema (if one was provided)
"""
if not self._dataset_schema:
return None

for field in self._dataset_schema["resource_fields"]:
if field["field_name"] == key:
return field

def _set_dataset_value(self, dataset_dict, key, value):
"""
Sets the value for a given key in a CKAN dataset dict
If a dataset schema was provided, the schema will be checked to see if
a custom field is present for the key. If so the key will be stored at
the dict root level, otherwise it will be stored as an extra.
Standard CKAN fields (defined in ROOT_DATASET_FIELDS) are always stored
at the root level.
"""
if self._schema_field(key) or key in ROOT_DATASET_FIELDS:
dataset_dict[key] = value
else:
if not dataset_dict.get("extras"):
dataset_dict["extras"] = []
dataset_dict["extras"].append({"key": key, "value": value})

return dataset_dict

def _set_list_dataset_value(self, dataset_dict, key, value):
schema_field = self._schema_field(key)
if schema_field and "scheming_multiple_text" in schema_field["validators"]:
return self._set_dataset_value(dataset_dict, key, value)
else:
return self._set_dataset_value(dataset_dict, key, json.dumps(value))

def _set_list_resource_value(self, resource_dict, key, value):
schema_field = self._schema_resource_field(key)
if schema_field and "scheming_multiple_text" in schema_field["validators"]:
resource_dict[key] = value
else:
resource_dict[key] = json.dumps(value)

return resource_dict

def _get_dataset_value(self, dataset_dict, key, default=None):
"""
Returns the value for the given key on a CKAN dict
Expand Down Expand Up @@ -880,7 +955,7 @@ def _without_mailto(self, mail_addr):
Ensures that the mail address string has no mailto: prefix.
"""
if mail_addr:
return str(mail_addr).replace(PREFIX_MAILTO, u"")
return str(mail_addr).replace(PREFIX_MAILTO, "")
else:
return mail_addr

Expand Down
83 changes: 48 additions & 35 deletions ckanext/dcat/profiles/euro_dcat_ap.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@
DCAT,
DCT,
ADMS,
XSD,
VCARD,
FOAF,
SCHEMA,
SKOS,
LOCN,
GSP,
OWL,
Expand Down Expand Up @@ -354,51 +352,66 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
)

# Publisher
if any(
publisher_ref = None

if dataset_dict.get("publisher"):
# Scheming publisher field: will be handled in a separate profile
pass
elif any(
[
self._get_dataset_value(dataset_dict, "publisher_uri"),
self._get_dataset_value(dataset_dict, "publisher_name"),
dataset_dict.get("organization"),
]
):

# Legacy publisher_* extras
publisher_uri = self._get_dataset_value(dataset_dict, "publisher_uri")
publisher_uri_fallback = publisher_uri_organization_fallback(dataset_dict)
publisher_name = self._get_dataset_value(dataset_dict, "publisher_name")
if publisher_uri:
publisher_details = CleanedURIRef(publisher_uri)
elif not publisher_name and publisher_uri_fallback:
# neither URI nor name are available, use organization as fallback
publisher_details = CleanedURIRef(publisher_uri_fallback)
publisher_ref = CleanedURIRef(publisher_uri)
else:
# No publisher_uri
publisher_details = BNode()

g.add((publisher_details, RDF.type, FOAF.Organization))
g.add((dataset_ref, DCT.publisher, publisher_details))

# In case no name and URI are available, again fall back to organization.
# If no name but an URI is available, the name literal remains empty to
# avoid mixing organization and dataset values.
if (
not publisher_name
and not publisher_uri
and dataset_dict.get("organization")
):
publisher_name = dataset_dict["organization"]["title"]

g.add((publisher_details, FOAF.name, Literal(publisher_name)))
# TODO: It would make sense to fallback these to organization
# fields but they are not in the default schema and the
# `organization` object in the dataset_dict does not include
# custom fields
publisher_ref = BNode()
publisher_details = {
"name": publisher_name,
"email": self._get_dataset_value(dataset_dict, "publisher_email"),
"url": self._get_dataset_value(dataset_dict, "publisher_url"),
"type": self._get_dataset_value(dataset_dict, "publisher_type"),
}
elif dataset_dict.get("organization"):
# Fall back to dataset org
org_id = dataset_dict["organization"]["id"]
org_dict = None
if org_id in self._org_cache:
org_dict = self._org_cache[org_id]
else:
try:
org_dict = toolkit.get_action("organization_show")(
{"ignore_auth": True}, {"id": org_id}
)
self._org_cache[org_id] = org_dict
except toolkit.ObjectNotFound:
pass
if org_dict:
publisher_ref = CleanedURIRef(
publisher_uri_organization_fallback(dataset_dict)
)
publisher_details = {
"name": org_dict.get("title"),
"email": org_dict.get("email"),
"url": org_dict.get("url"),
"type": org_dict.get("dcat_type"),
}
# Add to graph
if publisher_ref:
g.add((publisher_ref, RDF.type, FOAF.Organization))
g.add((dataset_ref, DCT.publisher, publisher_ref))
items = [
("publisher_email", FOAF.mbox, None, Literal),
("publisher_url", FOAF.homepage, None, URIRef),
("publisher_type", DCT.type, None, URIRefOrLiteral),
("name", FOAF.name, None, Literal),
("email", FOAF.mbox, None, Literal),
("url", FOAF.homepage, None, URIRef),
("type", DCT.type, None, URIRefOrLiteral),
]

self._add_triples_from_dict(dataset_dict, publisher_details, items)
self._add_triples_from_dict(publisher_details, publisher_ref, items)

# Temporal
start = self._get_dataset_value(dataset_dict, "temporal_start")
Expand Down
Loading

0 comments on commit 1bce834

Please sign in to comment.