Skip to content

Commit

Permalink
Merge branch 'master' into more-robust-rdf-pagination
Browse files Browse the repository at this point in the history
  • Loading branch information
thorge committed Sep 16, 2024
2 parents 9396b25 + f18ba7a commit cae29ff
Show file tree
Hide file tree
Showing 100 changed files with 12,763 additions and 3,268 deletions.
44 changes: 26 additions & 18 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
name: Tests
env:
COLUMNS: 120
on: [push, pull_request]
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.7'
python-version: '3.9'
- name: Install requirements
run: pip install flake8 pycodestyle
- name: Check syntax
Expand All @@ -17,16 +19,22 @@ jobs:
needs: lint
strategy:
matrix:
ckan-version: ["2.10", 2.9]
include:
- ckan-version: "2.11"
ckan-image: "ckan/ckan-dev:2.11-py3.10"
- ckan-version: "2.10"
ckan-image: "ckan/ckan-dev:2.10-py3.10"
- ckan-version: "2.9"
ckan-image: "ckan/ckan-dev:2.9-py3.9"
fail-fast: false

name: CKAN ${{ matrix.ckan-version }}
runs-on: ubuntu-latest
container:
image: openknowledge/ckan-dev:${{ matrix.ckan-version }}
image: ${{ matrix.ckan-image }}
services:
solr:
image: ckan/ckan-solr:${{ matrix.ckan-version }}
image: ckan/ckan-solr:${{ matrix.ckan-version }}-solr9
postgres:
image: ckan/ckan-postgres-dev:${{ matrix.ckan-version }}
env:
Expand All @@ -44,28 +52,28 @@ jobs:
CKAN_REDIS_URL: redis://redis:6379/1

steps:
- uses: actions/checkout@v3
- name: Install requirements
- uses: actions/checkout@v4
- name: Install requirements (common)
run: |
pip install -r requirements.txt
pip install -r dev-requirements.txt
- name: Install requirements (common)
run: |
pip install -e .
# Replace default path to CKAN core config file with the one on the container
sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini
- name: Setup harvest extension
- name: Install requirements (2.9)
run: |
pip install -U pytest-rerunfailures
if: ${{ matrix.ckan-version == '2.9' }}
- name: Setup other extensions
run: |
git clone https://github.com/ckan/ckanext-harvest
pip install -e ckanext-harvest
pip install -r ckanext-harvest/pip-requirements.txt
pip install -r ckanext-harvest/requirements.txt
git clone https://github.com/ckan/ckanext-scheming
pip install -e ckanext-scheming
- name: Setup extension
run: |
ckan -c test.ini db init
ckan -c test.ini harvester initdb
ckan -c test.ini db pending-migrations --apply
- name: Run tests
run: pytest --ckan-ini=test.ini --cov=ckanext.dcat --cov-report=xml --cov-append --disable-warnings ckanext/dcat/tests
- name: Upload coverage report to codecov
uses: codecov/codecov-action@v1
with:
file: ./coverage.xml
run: pytest --ckan-ini=test.ini --cov=ckanext.dcat --cov-report=term-missing --cov-append --disable-warnings ckanext/dcat/tests
17 changes: 17 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Read the Docs configuration file for MkDocs projects
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

build:
os: ubuntu-22.04
tools:
python: "3.12"

mkdocs:
configuration: mkdocs.yml

python:
install:
- requirements: docs/requirements.txt
212 changes: 128 additions & 84 deletions CHANGELOG.md

Large diffs are not rendered by default.

938 changes: 15 additions & 923 deletions README.md

Large diffs are not rendered by default.

140 changes: 130 additions & 10 deletions ckanext/dcat/cli.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,145 @@
# -*- coding: utf-8 -*-
import json

import click

import ckan.plugins.toolkit as tk

import ckanext.dcat.utils as utils
from ckanext.dcat.processors import (
RDFParser,
RDFSerializer,
DEFAULT_RDF_PROFILES,
RDF_PROFILES_CONFIG_OPTION,
)

@click.group()
def generate_static():
"""Generates static files containing all datasets.

"""
@click.group()
def dcat():
"""DCAT utilities for CKAN"""
pass

@generate_static.command()
@click.argument('output', type=click.File(mode="w"))
def json(output):
"""The generate command will generate a static file containing all of
the datasets in the catalog in JSON format.

@dcat.command()
@click.argument("output", type=click.File(mode="w"))
def generate_static(output):
"""[Deprecated] Generate a static datasets file in JSON format
(requires the dcat_json_interface plugin) .
"""
utils.generate_static_json(output)


def _get_profiles(profiles):
if profiles:
profiles = profiles.split()
elif tk.config.get(RDF_PROFILES_CONFIG_OPTION):
profiles = tk.aslist(tk.config[RDF_PROFILES_CONFIG_OPTION])
else:
profiles = None

return profiles


@dcat.command(context_settings={"show_default": True})
@click.argument("input", type=click.File(mode="r"))
@click.option(
"-o",
"--output",
type=click.File(mode="w"),
default="-",
help="By default the command will output the result to stdin, "
"alternatively you can provide a file path with this option",
)
@click.option(
"-f", "--format", default="xml", help="Serialization format (eg ttl, jsonld)"
)
@click.option(
"-p",
"--profiles",
help=f"RDF profiles to use. If not provided will be read from config, "
"if not present there, the default will be used: {DEFAULT_RDF_PROFILES}",
)
@click.option(
"-P", "--pretty", is_flag=True, help="Make the output more human readable"
)
@click.option(
"-m", "--compat_mode", is_flag=True, help="Compatibility mode (deprecated)"
)
def consume(input, output, format, profiles, pretty, compat_mode):
"""
Parses DCAT RDF graphs into CKAN dataset JSON objects.
The input serializations can be provided as a path to a file, e.g.:
ckan dcat consume examples/dcat/dataset.ttl
Or be read from stdin:
ckan dcat consume -
"""
contents = input.read()

profiles = _get_profiles(profiles)

parser = RDFParser(profiles=profiles, compatibility_mode=compat_mode)
parser.parse(contents, _format=format)

ckan_datasets = [d for d in parser.datasets()]

indent = 4 if pretty else None
out = json.dumps(ckan_datasets, indent=indent)

output.write(out)


@dcat.command(context_settings={"show_default": True})
@click.argument("input", type=click.File(mode="r"))
@click.option(
"-o",
"--output",
type=click.File(mode="w"),
default="-",
help="By default the command will output the result to stdin, "
"alternatively you can provide a file path with this option",
)
@click.option(
"-f", "--format", default="xml", help="Serialization format (eg ttl, jsonld)"
)
@click.option(
"-p",
"--profiles",
help=f"RDF profiles to use. If not provided will be read from config, "
"if not present there, the default will be used: {DEFAULT_RDF_PROFILES}",
)
@click.option(
"-m", "--compat_mode", is_flag=True, help="Compatibility mode (deprecated)"
)
def produce(input, output, format, profiles, compat_mode):
"""
Transforms CKAN dataset JSON objects into DCAT RDF serializations.
The input datasets can be provided as a path to a file, e.g.:
ckan dcat consume examples/ckan/ckan_dataset.json
Or be read from stdin:
ckan dcat produce -
"""
contents = input.read()

profiles = _get_profiles(profiles)

serializer = RDFSerializer(profiles=profiles, compatibility_mode=compat_mode)

dataset = json.loads(contents)
if isinstance(dataset, list):
out = serializer.serialize_datasets(dataset, _format=format)
else:
out = serializer.serialize_dataset(dataset, _format=format)

output.write(out)


def get_commands():
return [generate_static]
return [dcat]
120 changes: 120 additions & 0 deletions ckanext/dcat/config_declaration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
version: 1
groups:
- annotation: General settings
options:

- key: ckanext.dcat.rdf.profiles
default_callable: 'ckanext.dcat.processors:_get_default_rdf_profiles'
description: |
RDF profiles to use when parsing and serializing. See https://github.com/ckan/ckanext-dcat#profiles
for more details.
example: 'euro_dcat_ap_2 my_local_ap'

- key: ckanext.dcat.translate_keys
type: bool
default: True
description: |
If set to True, the plugin will automatically translate the keys of the DCAT
fields used in the frontend (at least those present in the `ckanext/dcat/i18n`
po files).
- annotation: Parsers / Serializers settings
options:

- key: ckanext.dcat.output_spatial_format
type: list
default:
- 'wkt'
description: |
Format to use for geometries when serializing RDF documents. The default is
recommended as is the format expected by GeoDCAT, alternatively you can
use `geojson` (or both, which will make SHACL validation fail)
- key: ckanext.dcat.resource.inherit.license
type: bool
default: False
description: |
If there is no license defined for a resource / distribution, inherit it from
the dataset.
- key: ckanext.dcat.normalize_ckan_format
type: bool
default: True
description: |
When true, the resource label will be tried to match against the standard
list of CKAN formats (https://github.com/ckan/ckan/blob/master/ckan/config/resource_formats.json)
This allows for instance to populate the CKAN resource format field
with a value that view plugins, etc will understand (`csv`, `xml`, etc.)
- key: ckanext.dcat.clean_tags
type: bool
default: False
description: |
Remove special characters from keywords (use the old munge_tag() CKAN function).
This is generally not needed.
- annotation: Endpoints settings
options:

- key: ckanext.dcat.enable_rdf_endpoints
default: True
description: |
Whether to expose the catalog and dataset endpoints with the RDF DCAT
serializations.
type: bool

- key: ckanext.dcat.base_uri
description: |
Base URI to use when generating URIs for all entities. It needs to be a valid URI value.
example: 'https://my-site.org/uri/'

- key: ckanext.dcat.catalog_endpoint
default: '/catalog.{_format}'
description: |
Custom route for the catalog endpoint. It should start with `/` and include the
`{_format}` placeholder.
example: '/dcat/catalog/{_format}'

- key: ckanext.dcat.datasets_per_page
default: 100
type: int
description: |
Default number of datasets returned by the catalog endpoint.
- key: ckanext.dcat.enable_content_negotiation
default: False
type: bool
description: |
Enable content negotiation in the main catalog and dataset endpoints. Note that
setting this to True overrides the core `home.index` and `dataset.read` endpoints.
- annotation: Harvester settings
options:

- key: ckanext.dcat.max_file_size
type: int
default: 50
description: |
Maximum file size that will be downloaded for parsing by the harvesters
- key: ckanext.dcat.expose_subcatalogs
type: bool
default: false
description: |
Store information about the origin catalog when harvesting datasets.
See https://github.com/ckan/ckanext-dcat#transitive-harvesting for more details.
- annotation: Deprecated options (will be removed in future versions)
options:

- key: ckanext.dcat.compatibility_mode
type: bool
default: False
description: |
Whether to modify some fields to maintain compatibility with previous versions
of the ckanext-dcat parsers.
- key: ckanext.dcat.json_endpoint
default: '/dcat.json'
description: |
Custom route to expose the legacy JSON endpoint
10 changes: 9 additions & 1 deletion ckanext/dcat/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,15 @@ def dcat_to_ckan(dcat_dict):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher})
elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')})
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})

if dcat_publisher.get('mbox'):
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})

if dcat_publisher.get('identifier'):
package_dict['extras'].append({
'key': 'dcat_publisher_id',
'value': dcat_publisher.get('identifier') # This could be a URI like https://ror.org/05wg1m734
})

package_dict['extras'].append({
'key': 'language',
Expand Down
Loading

0 comments on commit cae29ff

Please sign in to comment.