diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 858b21f..21f75c2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,7 +4,7 @@ jobs: test: strategy: matrix: - ckan-version: ["2.10", 2.9] + ckan-version: ["2.11", "2.10", 2.9] fail-fast: false runs-on: ubuntu-latest @@ -15,7 +15,7 @@ jobs: image: openknowledge/ckan-dev:${{ matrix.ckan-version }} services: solr: - image: ckan/ckan-solr:${{ matrix.ckan-version }} + image: ckan/ckan-solr:${{ matrix.ckan-version }}-solr9 postgres: image: ckan/ckan-postgres-dev:${{ matrix.ckan-version }} env: diff --git a/.gitignore b/.gitignore index 8570dc5..b9d3c90 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,4 @@ coverage.xml # Sphinx documentation docs/_build/ +site/ diff --git a/Makefile b/Makefile index e8d9704..e333bc0 100644 --- a/Makefile +++ b/Makefile @@ -23,3 +23,6 @@ changelog: ## compile changelog vendor: cp node_modules/htmx.org/dist/htmx.js ckanext/collection/assets/vendor + +deploy-docs: ## build and publish documentation + mkdocs gh-deploy diff --git a/README.md b/README.md index 13b9363..89a9d0e 100644 --- a/README.md +++ b/README.md @@ -2,869 +2,99 @@ # ckanext-collection -Base classes for viewing data series from CKAN. - -## Content - -* [Requirements](#requirements) -* [Installation](#installation) -* [Usage](#usage) -* [Documentation](#documentation) - * [Overview](#overview) - * [Collection intialization](#collection-intialization) - * [Services](#services) - * [Common logic](#common-logic) - * [Data service](#data-service) - * [Pager service](#pager-service) - * [Serializer service](#serializer-service) - * [Columns service](#columns-service) - * [Filters service](#filters-service) - * [Core classes and usage examples](#core-classes-and-usage-examples) - * [Collection](#collection) - * [DbCollection](#dbcollection) - * [Data](#data) - * [StaticData](#staticdata) - * [BaseSaData](#basesadata) - * [StatementSaData](#statementsadata) - * [UnionSaData](#unionsadata) - * [ModelData](#modeldata) - * [TableData](#tabledata) - * [ApiData](#apidata) - * [ApiSearchData](#apisearchdata) - * [ApiListData](#apilistdata) - * [Pager](#pager) - * [ClassicPager](#classicpager) - * [Columns](#columns) - * [TableColumns](#tablecolumns) - * [Filters](#filters) - * [Serializer](#serializer) - * [CsvSerializer](#csvserializer) - * [JsonlSerializer](#jsonlserializer) - * [JsonSerializer](#jsonserializer) - * [HtmlSerializer](#htmlserializer) - * [TableSerializer](#tableserializer) - * [HtmxTableSerializer](#htmxtableserializer) -* [Config settings](#config-settings) -* [Integrations](#integrations) - * [ckanext-admin-panel](#ckanext-admin-panel) -* [License](#license) - -## Requirements - -Compatibility with core CKAN versions: - -| CKAN version | Compatible? | -|--------------|-------------| -| 2.9 | no | -| 2.10 | yes | -| master | yes | - -## Installation - -To install ckanext-collection: - -1. Install the extension: - ```sh - pip install ckanext-collection - ``` - -1. Add `collection` to the `ckan.plugins` setting in your CKAN - config file . - -## Usage - -Collections can be registered via `ckanext.collection.interfaces.ICollection` -or via CKAN signals. Registered collection can be initialized anywhere in code -using helper and can be used in a number of generic endpoints that render -collection as HTML of export it into different formats. - -Registration via interface: - -```python -from ckanext.collection.interfaces import CollectionFactory, ICollection +Tools for building interfaces for data collections. +This extension simplifies describing series of items, such as datasets from +search page, users registered on portal, rows of CSV file, tables in DB, +etc. Once you defined the way items are obtained from data source, you'll get +generic interface for pagination, search and displaying data in any format: +HTML page, CSV document, JSON list, or any other custom format that you can +describe. -class MyPlugin(p.SingletonPlugin): - p.implements(ICollection, inherit=True) +Read the [documentation](https://datashades.github.io/ckanext-collection/) for +a full user guide. - def get_collection_factories(self) -> dict[str, CollectionFactory]: - return { - "my-collection": MyCollection, - } -``` +## Quickstart -`get_collection_factories` returns a dictionary with collection names(letters, -digits, underscores and hyphens are allowed) as keys, and collection factories -as values. In most generic case, collection factory is just a collection -class. But you can use any function with signature `(str, dict[str, Any], -**Any) -> Collection` as a factory. For example, the following function is a -valid collection factory and it can be returned from `get_collection_factories` +Install the extension -```python -def my_factory(name: str, params: dict[str, Any], **kwargs: Any): - """Collection that shows 100 numbers per page""" - params.setdefault("rows_per_page", 100) - return MyCollection(name, params, **kwargs) +```sh +pip install ckanext-collection ``` -If you want to register a collection only if collection plugin is enabled, you -can use CKAN signals instead of wrapping import from ckanext-collection into -try except block: - -```python - -class MyPlugin(p.SingletonPlugin): - p.implements(p.ISignal) - - def get_signal_subscriptions(self) -> types.SignalMapping: - return { - tk.signals.ckanext.signal("collection:register_collections"): [ - self.collect_collection_factories, - ], - } - - def collect_collection_factories(self, sender: None): - return { - "my-collection": MyCollection, - } - -``` - -Data returned from the signal subscription is exactly the same as from -`ICollection.get_collection_factories`. The only difference, signal -subscription accepts `sender` argument which is always `None`, due to internal -implementation of signals. - +Add `collection` to the `ckan.plugins` setting in your CKAN config file -## Documentation - -### Overview - -The goal of this plugin is to supply you with generic classes for processing -collections of data. As result, it doesn't do much out of the box and you have -to write some code to see a result. - -Majority of useful classes are available inside `ckanext.collection.utils` -module and all examples bellow require the following line in the beginning of -the script: `from ckanext.collection.utils import *`. - -Let's start with the basics. `ckanext-collection` defines a few collections for -different puproses. The most basic collection is `Collection`, but it has no -value without customization, so we'll start from `StaticCollection`: - -```python -col = StaticCollection("name", {}) -``` - -Constructor of any collection has two mandatory arguments: name and -parameters. Name is mostly used internally and consists of any combination of -letters, digits, hyphens and underscores. Parameters are passed inside the -dictionary and they change the content of the collection. - -In the most basic scenario, collection represents a number of similar items: -datasets, users, organizations, dictionaries, numbers, etc. As result, it can -be transformed into list or iterated over: +Define the collection ```python -list(col) - -for item in col: - print(item) -``` - -Our test collection is empty at the moment, so you will not see anything just -yet. Usually, `StaticCollection` contains static data, specified when -collection is created. But because we haven't specified any data, collection -contains nothing. +from ckan import model +from ckanext.collection.utils import * -To fix this problem, we have to configure a part of the collection responsible -for data production using its **settings**. Collection divides its internal -logic between a number of configurable *services*, and service that we need is -called **data** service. To modify it, we can pass a named argument called -`data_settings` to the collection's constructor: - -```python -col = StaticCollection( - "name", {}, - data_settings={"data": [1,2,3]} -) -``` - -Now try again iterating over the collection and now you'll see the result: - -```python -for item in col: - print(item) -``` - -It's not very impressive, but you didn't expect much from **static** -collection, right? There are other collections that are more smart, but we have -to learn more concepts of this extension to use them, so for now we'll only -take a brief look on them. - -**Note**: collections have certain restrictions when it comes to amount of -data. By default, you'll see only around 10 records, even if you have more. The -same is true for `StaticCollection` - you can see it if you set `data` -attribute of its data-service to `range(1, 100)`. We'll learn how to control -these restrictions later. - -`StaticCollection` works with static data. It can be used for tests or as a -placeholder for a collection that is not yet implemented. In rare cases it can -be used with arbitrary iterable to create a standard interface for data -interaction. - -`ModelCollection` works with SQLAlchemy models. We are going to use two -attributes of its data-service: `model` and `is_scalar`. The former sets actual -model that collection processes, while the latter controls, how we work with -every individual record. By default, `ModelCollection` returns every record as -a number of columns, but we'll set `is_scalar=True` and receive model instance -for every record instead: - -```python -col = ModelCollection( - "", {}, - data_settings={"is_scalar": True, "model": model.User} -) - -for user in col: - assert isinstance(user, model.User) - print(f"{user.name}, {user.email}") -``` - -`ApiSearchCollection` works with API actions similar to `package_search`. They -have to use `rows` and `start` parameters for pagination and their result must -contain `count` and `results` keys. Its data-service accepts `action` attribute -with the name of API action that produces the data: - -```python -col = ApiSearchCollection( - "", {}, - data_settings={"action": "package_search"} -) - -for pkg in col: - print(f"{pkg['id']}: {pkg['title']}") -``` - -`ApiListCollection` works with API actions similar to `package_list`. They have -to use `limit` and `offset` parameters for pagination and their result must be -represented by a list. - -```python -col = ApiListCollection( - "", {}, - data_settings={"action": "package_list"} -) - -for name in col: - print(name) -``` - -`ApiCollection` works with API actions similar to `user_list`. They have to -return all records at once, as list. - -```python -col = ApiCollection( - "", {}, - data_settings={"action": "user_list"} -) -for user in col: - print(user["name"]) -``` - -### Collection intialization - -Collection constructor has two mandatory arguments: name and parameters. - -Name is used as collection identifier and it's better to keep this value unique -accross collections. For example, name is used for computing HTML table `id` -attribute when serializing collection as an HTML table. If you render two -collections with the same name, you'll get two identical IDs on the page. - -Params are usually used by data and pager service for searching, sorting, -etc. Collection does not keep all the params. Instead, it stores only items -with key prefixed by `:`. I.e, if collection has name `hello`, and you -pass `{"hello:a": 1, "b": 2, "world:c": 3}`, collection will remove `b`(because -it has no collection name plus colon prefix) and `world:c` members(because it -uses `world` instead of `hello` in prefix). As for `hello:a`, collection strips -`:` prefix from it. So, in the end, collection stores `{"a": 1}`. You -can check params of the collection using `params` attribute: - -```python -col = Collection("hello", {"hello:a": 1, "b": 2, "world:c": 3}) -assert col.params == {"a": 1} - -col = Collection("world", {"hello:a": 1, "b": 2, "world:c": 3}) -assert col.params == {"c": 3} -``` - -It allows you rendering and processing multiple collections simultaneously on -the same page. Imagine that you have collection `users` and collection -`packages`. You want to see second page of `users` and fifth of -`packages`. Submit the query string `?users:page=2&packages:page=5` and -initialize collections using the following code: - -```python -from ckan.logic import parse_params -from ckan.plugins import toolkit as tk - -params = parse_params(tk.request.args) - -users = ModelCollection( - "users", params, - data_settings={"model": model.User} -) -packages = ModelCollection( - "packages", params, - data_settings={"model": model.Package} -) - -assert users.pager.page == 2 -assert packages.pager.page == 5 -``` - -### Services - -Collection itself contains just a bare minimum of logic, and all the -heavy-lifting is delegated to *services*. Collection knows how to initialize -services and usually the only difference between all your collections, is the -way all their services are configured. - -Collection contains the following services: -* `data`: controls the exact data that can be received from - collection. Contains logic for searching, filters, sorting, etc. -* `pager`: defines restrictions for data iteration. Exactly this service shows - only 10 records when you iterating over static collection -* `serializer`: specifies how collection can be transformed into desired - form. Using correct serializer you'll be able to dump the whole collection as - CSV, JSON, YAML or render it as HTML table. -* `columns`: contains configuration of specific data columns used by other - services. It may define model attributes that are dumped into CSV, names of - the transformation functions that are applied to the certain attribute, names - of the columns that are available for sorting in HTML representation of data. -* `filters`: contains configuration of additional widgets produced during data - serialization. For example, when data is serialized into an HTML table, - filters can define configuration of dropdowns and input fields from the data - search form. - -**Note**: You can define more services in custom collections. The list above -enumerates all the services that are available in the base collection and in -all collections shipped with the current extension. For example, one of -built-in collections, `DbCollection` has additional service called -`db_connection` that can communicate with DB. - - -When a collection is created, it creates an instance of each service using -service factories and service settings. Base collection and all collections -that extend it already have all details for initializing every service: - -```python -col = Collection("name", {}) -print(f"""Services: - {col.data=}, - {col.pager=}, - {col.serializer=}, - {col.columns=}, - {col.filters=}""") - -assert list(col) == [] -``` - -This collection has no data. We can initialize an instance of `StaticData` and -replace the existing data service of the collection with new `StaticData` -instance. - -Every service has one required argument: collection that owns the service. All -other arguments are used as a service settings and must be passed by -name. Remember, all the classes used in this manual are available inside -`ckanext.collection.utils`: - -```python -static_data = StaticData(col, data=[1,2,3]) -col.replace_service(static_data) - -assert list(col) == [1, 2, 3] -``` - -Look at `Colletion.replace_service`. It accepts only service instance. There is -no need to pass the name of the service that must be replaced - collection can -understand it without help. And pay attention to the first argument of service -constructor. It must be the collection that is going to use the service. Some -services may work even if you pass a random value as the first argument, but -it's an exceptional situation and one shouldn't rely on it. - -If existing collection is no longer used and you are going to create a new one, -you sometimes want to reuse a service from an existing collection. Just to -avoid creating the service and calling `Collection.replace_service`, which will -save you two lines of code. In this case, use `_instance` parameter of -the collection constructor: - -```python -another_col = Collection("another-name", {}, data_instance=col.data) -assert list(another_col) == [1, 2, 3] -``` - -If you do such thing, make sure you are not using old collection anymore. You -just transfered one of its services to another collection, so there is no -guarantees that old collection with detached service will function properly. - -It's usually better to customize service factory, instead of passing existing -customized instance of the service around. You can tell which class to use for -making an instance of a service using `_factory` parameter of the -collection contstructor: - -```python -col = Collection("name", {}, data_factory=StaticData) -assert list(col) == [] -``` - -But in this way we cannot specify the `data` attribute of the `data` factory! -No worries, there are multiple ways to overcome this problem. First of all, all -the settings of the service are available as its attributes. It means that -`data` setting is the same as `data` attribute of the service. If you can do -`StaticData(..., data=...)`, you can as well do `service = StaticData(...); -service.data = ...`: - -```python -col = Collection("name", {}, data_factory=StaticData) -col.data.data = [1, 2, 3] -assert list(col) == [1, 2, 3] -``` - -**Note**: `data` service caches its data. If you already accessed data property -from the `StaticData`, assigning an new value doesn't have any effect because -of the cache. You have to call `col.data.refresh_data()` after assigning to -rebuild the cache. - -But there is a better way. You can pass `_settings` dictionary to the -collection constructor and it will be passed down into corresponding service -factory: - -```python -col = Collection( - "name", {}, - data_factory=StaticData, - data_settings={"data": [1, 2, 3]} -) -assert list(col) == [1, 2, 3] -``` - - -It works well for individual scenarios, but when you are creating a lot of -collections with the static data, you want to omit some standard parameters. In -this case you should define a new class that extends Collection and declares -`Factory` attribute: - -```python +## collection of all resources class MyCollection(Collection): - DataFactory = StaticData - -col = MyCollection( - "name", {}, - data_settings={"data": [1, 2, 3]} -) -assert list(col) == [1, 2, 3] -``` + DataFactory = ModelData.with_attributes(model=model.Resource) + # `names` controls names of fields exported by serializer + # further in this guide + ColumnsFactory = cu.Columns.with_attributes(names=["name", "size"]) -You still can pass `data_factory` into `MyCollection` constructor to override -data service factory. But now, by default, `StaticData` is used when it's not -specified explicitly. - -Finally, if you want to create a subclass of service, that has a specific value -of certain attributes, i.e something like this: - -```python -class OneTwoThreeData(StaticData): - data = [1, 2, 3] -``` - -you can use `Service.with_attributes(attr_name=attr_value)` factory method. It -produce a new service class(factory) with specified attributes bound to a -static value. For example, that's how we can define a collection, that always -contains `[1, 2, 3]`: - -```python +## collection of all packages available via search API class MyCollection(Collection): - DataFactory = StaticData.with_attributes(data=[1, 2, 3]) - -col = MyCollection("name", {}) -assert list(col) == [1, 2, 3] -``` - -Now you don't have to specify `data_factory` or `data_settings` when creating a -collection. It will always use `StaticData` with `data` set to `[1, 2, 3]` -. Make sure you mean it, because now you cannot override the data using -`data_settings`. - - -#### Common logic - -All services share a few common features. First of all, all services contain a -reference to the collection that uses/owns the service. Only one collection can -own the service. If you move service from one collection to another, you must -never use the old collection, that no longer owns the service. Depending on -internal implementation of the service, it may work without changes, but we -recommend removing such collections. At any point you can get the collection -that owns the service via `attached` attribute of the service: - -```python -col = Collection("name", {}) -assert col.data.attached is col -assert col.pager.attached is col -assert col.columns.attached is col - -another_col = Collection( - "another-name", {}, - data_instance=col.data -) -assert col.data.attached is not col -assert col.data.attached is another_col -assert col.data is another_col.data -``` - -Second common point of services is **settings**. Let's use `StaticData` for -tests. It has one configurable attribute(setting) - `data`. We can specify it -directly when creating data service instance: `StaticData(..., data=DATA)`. Or -we can specify it via `data_settings` when creating a collection: -`StaticCollection("name", {}, data_settings={"data": DATA})`. In both cases -`DATA` will be available as a `data` attribute of the data service. But it -doesn't mean that we can pass just any attribute in this way: - -```python -data = StaticData(col, data=[], not_real=True) -assert hasattr(data, "data") -assert not hasattr(data, "not_real") -``` - -To allow overriding the value of attribute via settings, we have to define this -attribute as a **configurable attribute**. For this we need -`configurable_attribute` function from `ckanext.collection.shared`: - -```python -class MyData(StaticData): - i_am_real = configurable_attribute(False) - -data = MyData(col, data=[], i_am_real=True) -assert hasattr(data, "data") -assert hasattr(data, "i_am_real") -assert data.i_am_real is True -``` + DataFactory = ApiSearchData.with_attributes(action="package_search") + ColumnsFactory = cu.Columns.with_attributes(names=["name", "title"]) -`configurable_attribute` accepts either positional default value of the -attribute, or named `default_factory` function that generated default value -every time new instance of the service is created. `default_factory` must -accept a single argument - a new service that is instantiated at the moment: - -```python -class MyData(StaticData): - ref = 42 - i_am_real = shared.configurable_attribute(default_factory=lambda self: self.ref * 10) - -data = MyData(col, data=[]) -assert data.i_am_real == 420 -``` - -Never use another configurable attributes in the `default_factory` - order in -which configurable attributes are initialized is not strictly defined. At the -moment of writing this manual, configurable attributes were initialized in -alphabetical order, but this implementation detail may change in future without -notice. - -TODO: with_attributes - -#### Data service - -This service produces the data for collection. Every data service must: - -* be Iterable and iterate over all available records by default -* define `total` property, that reflects number of available records so that - `len(list(data)) == data.total` -* define `range(start: Any, end: Any)` method that returns slice of the data - -Base class for data services - `Data` - already contains a simple version of -this logic. You need to define only one method to make you custom -implementations: `compute_data()`. When data if accessed for the first time, -`compute_data` is called. Its result cached and used for iteration in -for-loops, slicing via `range` method and size measurement via `total` -property. - - -```python -class CustomData(Data): - def compute_data(self) -> Any: - return "abcdefghijklmnopqrstuvwxyz" - -col = Collection("name", {}, data_factory=CustomData) -assert list(col) == ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] -assert col.data.total == 26 -assert col.data.range(-3, None) == "xyz" +## collection of all records from CSV file +class MyCollection(Collection): + DataFactory = CsvData.with_attributes(source="/path/to/file.csv") + ColumnsFactory = cu.Columns.with_attributes(names=["a", "b"]) ``` -If you need more complex data source, make sure you defined `__iter__`, -`total`, and `range`: +Initialize collection object and work with data: ```python -class CustomData(Data): - names = configurable_attribute(default_factory=["Anna", "Henry", "Mary"]) - - @property - def total(self): - return len(self.names) - - def __iter__(self): - yield from sorted(self.names) - - def range(self, start: Any, end: Any): - if not isinstance(start, str) or not isinstance(end, str): - return [] - for name in self: - if name < start: - continue - if name > end: - break - yield name +# collection with first page of results(1st-10th items) +col = MyCollection("", {}) +items = list(col) -``` +# collection with third page of results(21st-30th items) +col = MyCollection("", {"page": 3}) +items = list(col) -#### Pager service +# alternatively, read all the items into memory at once, without pagination. +# It may be quite expensive operation depending on number of items +col = MyCollection("", {}) +items = list(col.data) -Pager service sets the upper and lower bounds on data used by -collection. Default pager used by collection relies on numeric `start`/`end` -values. But it's possible to define custom pager that uses alphabetical or -temporal bounds, as long as `range` method of your custom data service supports -these bounds. +# or get the slice of data from 2nd till 5th(not includeing 5th, +# just like in python slices) +items = col.data.range(2, 5) -Standard pager(`ClassicPager`) has two configurable attributes: `page`(default: -1) and `rows_per_page`(default: 10). +# check total number of items in collection +print(col.data.total) -```python -col = StaticCollection("name", {}) -assert col.pager.page == 1 -assert col.pager.rows_per_page == 10 ``` -Because of these values you see only first 10 records from data when iterating -the collection. Let's change pager settings: +Serialize data using `Serializer` service: ```python -col = StaticCollection( - "name", {}, - data_settings={"data": range(1, 100)}, - pager_settings={"page": 3, "rows_per_page": 6} -) -assert list(col) == [13, 14, 15, 16, 17, 18] -``` -Pagination details are often passed with search parameters and have huge -implact on the required data frame. Because of it, if `pager_settings` are -missing, `ClassicPager` will look for settings inside collection -parameters(second argument of the collection constructor). But in this case, -pager will use only items that has `:` prefix: +# serialize into JSON string +serializer = JsonSerializer(col) -```python -col = StaticCollection( - "xxx", - {"xxx:page": 3, "xxx:rows_per_page": 6}, - data_settings={"data": range(1, 100)} -) -assert list(col) == [13, 14, 15, 16, 17, 18] - -col = StaticCollection( - "xxx", - {"page": 3, "rows_per_page": 6}, - data_settings={"data": range(1, 100)} -) -assert list(col) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - -``` - -#### Serializer service - -Serializer converts data into textual, binary or any other alternative -representation. For example, if you want to compute records produced by the -`data` service of the collection into pandas' DataFrame, you should probably -use serializer. - -Serializers are main users of columns service, because it contains details -about specific data columns. And serializers often iterate data service -directly(ignoring `range` method), to serialize all available records. - -The only required method for serializer is `serialize`. This method must return -an data from `data` service transformed into format provided by serializer. For -example, `JsonSerializer` returns string with JSON-encoded data. - -You are not restricted by textual or binary formats. Serializer that transforms -data into pandas' DataFrame is completely valid version of the serializer. - -```python -class NewLineSerializer(Serializer): - def serialize(self): - result = "" - for item in self.attached.data: - result += str(item) + "\n" - - return result - -col = StaticCollection( - "name", {}, - serializer_factory=NewLineSerializer, - data_settings={"data": [1, 2, 3]} -) -assert "".join(col.serializer.serialize()) == "1\n2\n3\n" -``` - -#### Columns service - -This service contains additional information about separate columns of data -records. It defines following settings: - -* names: all available column names. Used by other settings of columns service -* hidden: columns that should not be shown by serializer. Used by serializer - services -* visible: columns that must be shown by serializer. Used by serializer - services -* sortable: columns that support sorting. Used by data services -* filterable: columns that support filtration/facetting. Used by data services -* searchable: columns that support search by partial match. Used by data - services -* labels: human readable labels for columns. Used by serializer services - -This service contains information used by other service, so defining additional -attributes here is completely normal. For example, some custom serializer, that -serializes data into ORC, can expect `orc_format` attribute in the `columns` -service to be available. So you can add as much additional column related -details as required into this service. - -#### Filters service - -This service used only by HTML table serializers at the moment. It has two -configurable attributes `static_filters` and `static_actions`. `static_filters` -are used for building search form for the data table. `static_actions` are not -used, but you can put into it details about batch or record-level actions and -use these details to extend one of standard serializers. For example, -ckanext-admin-panel defines allowed actions (remove, restore, hide) for content -and creates custom templates that are referring these actions. - - -### Core classes and usage examples - -TBA - -#### Data -TBA - -#### StaticData -TBA - -#### BaseSaData -TBA - -#### StatementSaData -TBA - -#### UnionSaData -TBA - -#### ModelData -TBA - -#### ApiData -TBA - -#### ApiSearchData -TBA - -#### ApiListData -TBA - -#### Pager -TBA - -#### ClassicPager -TBA - -#### Columns -TBA - -#### Filters -TBA - -#### Serializer -TBA - -#### CsvSerializer -TBA - -#### JsonlSerializer -TBA - -#### JsonSerializer -TBA - -#### HtmlSerializer -TBA - -#### TableSerializer -TBA - -#### HtmxTableSerializer -TBA - -## Config settings - -```ini -# Names of registered collections that are viewable by any visitor, including -# anonymous. -# (optional, default: ) -ckanext.collection.auth.anonymous_collections = - -# Names of registered collections that are viewable by any authenticated -# user. -# (optional, default: ) -ckanext.collection.auth.authenticated_collections = - -# Add HTMX asset to pages. Enable this option if you are using CKAN v2.10 -# (optional, default: false) -ckanext.collection.include_htmx_asset = false - -# Initialize CKAN JS modules every time HTMX fetches HTML from the server. -# (optional, default: false) -ckanext.collection.htmx_init_modules = false - -# Import path for serializer used by CSV export endpoint. -# (optional, default: ckanext.collection.utils.serialize:CsvSerializer) -ckanext.collection.export.csv.serializer = ckanext.collection.utils.serialize:CsvSerializer - -# Import path for serializer used by JSON export endpoint. -# (optional, default: ckanext.collection.utils.serialize:JsonSerializer) -ckanext.collection.export.json.serializer = ckanext.collection.utils.serialize:JsonSerializer - -# Import path for serializer used by JSONl export endpoint. -# (optional, default: ckanext.collection.utils.serialize:JsonlSerializer) -ckanext.collection.export.jsonl.serializer = ckanext.collection.utils.serialize:JsonlSerializer - -# Import path for serializer used by `format`-export endpoint. -# (optional, default: ) -ckanext.collection.export..serializer = - -``` +# or serialize into CSV string +serializer = CsvSerializer(col) -## Integrations +# or serialize into list of dictionaries +serializer = DictListSerializer(col) -### [ckanext-admin-panel](https://github.com/mutantsan/ckanext-admin-panel) -To enable configuration form of ckanext-collection in the admin panel, enable -the following arbitrary schema +print(serializer.serialize()) -```ini -scheming.arbitrary_schemas = - ckanext.collection:ap_config.yaml ``` ## License diff --git a/ckanext/collection/plugin.py b/ckanext/collection/plugin.py index 2285fb4..487a895 100644 --- a/ckanext/collection/plugin.py +++ b/ckanext/collection/plugin.py @@ -2,13 +2,13 @@ import operator from dataclasses import is_dataclass -from typing import Any +from typing import Any, cast import ckan.plugins as p import ckan.plugins.toolkit as tk from ckan.common import CKANConfig -from . import shared +from . import shared, signals from .interfaces import CollectionFactory, ICollection try: @@ -40,10 +40,7 @@ def register_config_sections( break else: config_list.append( - SectionConfig( # type: ignore - name="Basic site settings", - configs=[config_page], - ), + SectionConfig(name="Basic site settings", configs=[config_page]), ) return config_list @@ -54,11 +51,6 @@ class ApImplementation(p.SingletonPlugin): pass -register_collection_signal = tk.signals.ckanext.signal( - "collection:register_collections", -) - - @tk.blanket.blueprints @tk.blanket.auth_functions @tk.blanket.config_declarations @@ -67,7 +59,6 @@ class ApImplementation(p.SingletonPlugin): class CollectionPlugin(ApImplementation, p.SingletonPlugin): p.implements(p.IConfigurer) p.implements(p.IConfigurable) - # p.implements(p.ISignal) p.implements(ICollection, inherit=True) # IConfigurer @@ -79,15 +70,7 @@ def update_config(self, config_: CKANConfig): # IConfigurable def configure(self, config_: CKANConfig): - shared.collection_registry.reset() - - for plugin in p.PluginImplementations(ICollection): - for name, factory in plugin.get_collection_factories().items(): - shared.collection_registry.register(name, factory) - - for _, factories in register_collection_signal.send(): - for name, factory in factories.items(): - shared.collection_registry.register(name, factory) + _register_collections() def get_collection_factories(self) -> dict[str, CollectionFactory]: if tk.config["debug"]: @@ -113,3 +96,19 @@ def get_collection_factories(self) -> dict[str, CollectionFactory]: } return {} + + +def _register_collections(): + shared.collection_registry.reset() + + for plugin in p.PluginImplementations(ICollection): + for name, factory in plugin.get_collection_factories().items(): + shared.collection_registry.register(name, factory) + + results = cast( + "list[tuple[Any, dict[str, CollectionFactory]]]", + signals.register_collection_signal.send(), + ) + for _, factories in results: + for name, factory in factories.items(): + shared.collection_registry.register(name, factory) diff --git a/ckanext/collection/signals.py b/ckanext/collection/signals.py new file mode 100644 index 0000000..95780ac --- /dev/null +++ b/ckanext/collection/signals.py @@ -0,0 +1,5 @@ +import ckan.plugins.toolkit as tk + +register_collection_signal = tk.signals.ckanext.signal( + "collection:register_collections", +) diff --git a/ckanext/collection/tests/data/file.csv b/ckanext/collection/tests/data/file.csv new file mode 100644 index 0000000..35097e0 --- /dev/null +++ b/ckanext/collection/tests/data/file.csv @@ -0,0 +1,17 @@ +a,b,c +1,2,3 +4,5,6 +7,8,9 +10,11,12 +1,2,3 +4,5,6 +7,8,9 +10,11,12 +1,2,3 +4,5,6 +7,8,9 +10,11,12 +1,2,3 +4,5,6 +7,8,9 +10,11,12 diff --git a/ckanext/collection/tests/test_dive.py b/ckanext/collection/tests/test_dive.py new file mode 100644 index 0000000..4802196 --- /dev/null +++ b/ckanext/collection/tests/test_dive.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +import pytest + +from ckan import model + +from ckanext.collection import shared +from ckanext.collection.utils import * + + +class TestOverview: + def test_static(self): + col = StaticCollection("name", {}) + assert list(col) == [] + + col = StaticCollection("name", {}, data_settings={"data": [1, 2, 3]}) + assert list(col) == [1, 2, 3] + + @pytest.mark.usefixtures("clean_db") + def test_model(self): + col = ModelCollection( + "", + {}, + data_settings={"is_scalar": True, "model": model.User}, + ) + + for user in col: + assert isinstance(user, model.User) + + @pytest.mark.usefixtures("clean_db", "clean_index", "package") + def test_api_search(self): + col = ApiSearchCollection("", {}, data_settings={"action": "package_search"}) + + for pkg in col: + assert isinstance(pkg, dict) + + @pytest.mark.usefixtures("clean_db", "clean_index", "package") + def test_api_list(self): + col = ApiListCollection("", {}, data_settings={"action": "package_list"}) + for pkg in col: + assert isinstance(pkg, str) + + @pytest.mark.usefixtures("clean_db", "user") + def test_api(self): + col = ApiCollection("", {}, data_settings={"action": "user_list"}) + + for user in col: + assert isinstance(user, dict) + + +class TestInitialization: + def test_params(self): + col = Collection("hello", {"hello:a": 1, "b": 2, "world:c": 3}) + assert col.params == {"a": 1} + + col = Collection("world", {"hello:a": 1, "b": 2, "world:c": 3}) + assert col.params == {"c": 3} + + def test_multi(self): + params = {"users:page": 2, "packages:page": 5} + + users = ModelCollection("users", params, data_settings={"model": model.User}) + packages = ModelCollection( + "packages", + params, + data_settings={"model": model.Package}, + ) + + assert isinstance(users.pager, ClassicPager) + assert isinstance(packages.pager, ClassicPager) + + assert users.pager.page == 2 + assert packages.pager.page == 5 + + +class TestServices: + def test_default_initialization(self): + col = Collection("name", {}) + assert isinstance(col.data, Data) + assert col.pager + assert col.serializer + assert col.columns + assert col.filters + + def test_service_replacement(self): + col = Collection("name", {}) + static_data = StaticData(col, data=[1, 2, 3]) + col.replace_service(static_data) + + assert list(col) == [1, 2, 3] + + def test_factory(self): + col = Collection("name", {}, data_factory=StaticData) + assert list(col) == [] + + col.data.data = [1, 2, 3] + col.data.refresh_data() + assert list(col) == [1, 2, 3] + + def test_custom_settings(self): + col = Collection( + "name", + {}, + data_factory=StaticData, + data_settings={"data": [1, 2, 3]}, + ) + assert list(col) == [1, 2, 3] + + def test_custom_service_factory(self): + class MyCollection(Collection): + DataFactory = StaticData + + col = MyCollection("name", {}, data_settings={"data": [1, 2, 3]}) + assert list(col) == [1, 2, 3] + + def test_with_attributes(self): + class MyCollection(Collection): + DataFactory = StaticData.with_attributes(data=[1, 2, 3]) + + col = MyCollection("name", {}) + assert list(col) == [1, 2, 3] + + +class TestCommonLogic: + def test_attached(self): + col = Collection("name", {}) + assert col.data.attached is col + assert col.pager.attached is col + assert col.columns.attached is col + + another_col = Collection("another-name", {}, data_instance=col.data) + assert col.data.attached is not col + assert col.data.attached is another_col + assert col.data is another_col.data + + def test_settings(self): + col = Collection("name", {}) + data = StaticData(col, data=[], not_real=True) + assert hasattr(data, "data") + assert not hasattr(data, "not_real") + + def test_configurable_attributes(self): + col = Collection("name", {}) + + class MyData(StaticData): + i_am_real = shared.configurable_attribute(False) + + data = MyData(col, data=[], i_am_real=True) + assert hasattr(data, "data") + assert hasattr(data, "i_am_real") + assert data.i_am_real is True + + def test_configurable_attribute_default_factory(self): + col = Collection("name", {}) + + class MyData(StaticData): + ref = 42 + i_am_real = shared.configurable_attribute( + default_factory=lambda self: self.ref * 10, + ) + + data = MyData(col, data=[]) + assert data.i_am_real == 420 diff --git a/ckanext/collection/tests/test_readme.py b/ckanext/collection/tests/test_readme.py index 4802196..aeb2972 100644 --- a/ckanext/collection/tests/test_readme.py +++ b/ckanext/collection/tests/test_readme.py @@ -1,163 +1,111 @@ from __future__ import annotations +import csv +import json +import os +from io import StringIO +from typing import Any, cast + import pytest from ckan import model +from ckan.tests.factories import CKANFactory -from ckanext.collection import shared -from ckanext.collection.utils import * - +import ckanext.collection.utils as cu -class TestOverview: - def test_static(self): - col = StaticCollection("name", {}) - assert list(col) == [] - col = StaticCollection("name", {}, data_settings={"data": [1, 2, 3]}) - assert list(col) == [1, 2, 3] +class TestModelCollection(cu.Collection): + DataFactory = cu.ModelData.with_attributes(model=model.Resource) + ColumnsFactory = cu.Columns.with_attributes(names=["name", "size"]) - @pytest.mark.usefixtures("clean_db") - def test_model(self): - col = ModelCollection( - "", - {}, - data_settings={"is_scalar": True, "model": model.User}, - ) - for user in col: - assert isinstance(user, model.User) +## collection of all packages available via search API +class TestApiCollection(cu.Collection): + DataFactory = cu.ApiSearchData.with_attributes(action="package_search") + ColumnsFactory = cu.Columns.with_attributes(names=["name", "title"]) - @pytest.mark.usefixtures("clean_db", "clean_index", "package") - def test_api_search(self): - col = ApiSearchCollection("", {}, data_settings={"action": "package_search"}) - for pkg in col: - assert isinstance(pkg, dict) +## collection of all records from CSV file +class TestCsvCollection(cu.Collection): + DataFactory = cu.CsvData.with_attributes( + source=os.path.join(os.path.dirname(__file__), "data/file.csv"), + ) + ColumnsFactory = cu.Columns.with_attributes(names=["a", "b"]) - @pytest.mark.usefixtures("clean_db", "clean_index", "package") - def test_api_list(self): - col = ApiListCollection("", {}, data_settings={"action": "package_list"}) - for pkg in col: - assert isinstance(pkg, str) - @pytest.mark.usefixtures("clean_db", "user") - def test_api(self): - col = ApiCollection("", {}, data_settings={"action": "user_list"}) +@pytest.mark.usefixtures("with_plugins", "clean_db", "clean_index") +class TestQuickstart: + def test_model(self, resource_factory: type[CKANFactory]): + col = TestModelCollection("", {}) + assert col.data.total == 0 - for user in col: - assert isinstance(user, dict) + resource_factory.create_batch(3) + col = TestModelCollection("", {}) + assert col.data.total == 3 -class TestInitialization: - def test_params(self): - col = Collection("hello", {"hello:a": 1, "b": 2, "world:c": 3}) - assert col.params == {"a": 1} + serializer = cu.JsonSerializer(col) + expected = [{"name": r.name, "size": r.size} for r in col.data] + assert json.loads(serializer.serialize()) == expected - col = Collection("world", {"hello:a": 1, "b": 2, "world:c": 3}) - assert col.params == {"c": 3} + serializer = cu.CsvSerializer(col) + output = StringIO() + writer = csv.writer(output) + writer.writerow(["name", "size"]) + writer.writerows([[r.name, r.size] for r in col.data]) + assert serializer.serialize() == output.getvalue() - def test_multi(self): - params = {"users:page": 2, "packages:page": 5} + serializer = cu.DictListSerializer(col) + expected = [{"name": r.name, "size": r.size} for r in col.data] + assert serializer.serialize() == expected - users = ModelCollection("users", params, data_settings={"model": model.User}) - packages = ModelCollection( - "packages", - params, - data_settings={"model": model.Package}, - ) + def test_api(self, package_factory: type[CKANFactory]): + col = TestApiCollection("", {}) + assert col.data.total == 0 - assert isinstance(users.pager, ClassicPager) - assert isinstance(packages.pager, ClassicPager) + package_factory.create_batch(3) - assert users.pager.page == 2 - assert packages.pager.page == 5 + col = TestApiCollection("", {}) + assert col.data.total == 3 + serializer = cu.JsonSerializer(col) + expected = [{"name": r["name"], "title": r["title"]} for r in col.data] + assert json.loads(serializer.serialize()) == expected -class TestServices: - def test_default_initialization(self): - col = Collection("name", {}) - assert isinstance(col.data, Data) - assert col.pager - assert col.serializer - assert col.columns - assert col.filters + serializer = cu.CsvSerializer(col) + output = StringIO() + writer = csv.writer(output) + writer.writerow(["name", "title"]) + writer.writerows([[r["name"], r["title"]] for r in col.data]) + assert serializer.serialize() == output.getvalue() - def test_service_replacement(self): - col = Collection("name", {}) - static_data = StaticData(col, data=[1, 2, 3]) - col.replace_service(static_data) + serializer = cu.DictListSerializer(col) + expected = [{"name": r["name"], "title": r["title"]} for r in col.data] + assert serializer.serialize() == expected - assert list(col) == [1, 2, 3] + def test_csv(self, package_factory: type[CKANFactory]): + filename = cast(Any, TestCsvCollection.DataFactory).source + col = TestCsvCollection("", {}) + assert col.data.total == 16 - def test_factory(self): - col = Collection("name", {}, data_factory=StaticData) - assert list(col) == [] + assert len(list(col)) == 10 + assert len(list(col.data)) == 16 + assert len(list(col.data.range(2, 5))) == 3 - col.data.data = [1, 2, 3] - col.data.refresh_data() - assert list(col) == [1, 2, 3] + serializer = cu.JsonSerializer(col) + with open(filename) as src: + expected = [{"a": r["a"], "b": r["b"]} for r in csv.DictReader(src)] + assert json.loads(serializer.serialize()) == expected - def test_custom_settings(self): - col = Collection( - "name", - {}, - data_factory=StaticData, - data_settings={"data": [1, 2, 3]}, - ) - assert list(col) == [1, 2, 3] - - def test_custom_service_factory(self): - class MyCollection(Collection): - DataFactory = StaticData - - col = MyCollection("name", {}, data_settings={"data": [1, 2, 3]}) - assert list(col) == [1, 2, 3] - - def test_with_attributes(self): - class MyCollection(Collection): - DataFactory = StaticData.with_attributes(data=[1, 2, 3]) - - col = MyCollection("name", {}) - assert list(col) == [1, 2, 3] - - -class TestCommonLogic: - def test_attached(self): - col = Collection("name", {}) - assert col.data.attached is col - assert col.pager.attached is col - assert col.columns.attached is col - - another_col = Collection("another-name", {}, data_instance=col.data) - assert col.data.attached is not col - assert col.data.attached is another_col - assert col.data is another_col.data - - def test_settings(self): - col = Collection("name", {}) - data = StaticData(col, data=[], not_real=True) - assert hasattr(data, "data") - assert not hasattr(data, "not_real") + serializer = cu.CsvSerializer(col) + output = StringIO() + with open(filename) as src: + writer = csv.writer(output) + for row in csv.reader(src): + writer.writerow(row[:-1]) + assert serializer.serialize() == output.getvalue() - def test_configurable_attributes(self): - col = Collection("name", {}) - - class MyData(StaticData): - i_am_real = shared.configurable_attribute(False) - - data = MyData(col, data=[], i_am_real=True) - assert hasattr(data, "data") - assert hasattr(data, "i_am_real") - assert data.i_am_real is True - - def test_configurable_attribute_default_factory(self): - col = Collection("name", {}) - - class MyData(StaticData): - ref = 42 - i_am_real = shared.configurable_attribute( - default_factory=lambda self: self.ref * 10, - ) - - data = MyData(col, data=[]) - assert data.i_am_real == 420 + serializer = cu.DictListSerializer(col) + with open(filename) as src: + expected = [{"a": r["a"], "b": r["b"]} for (r) in csv.DictReader(src)] + assert serializer.serialize() == expected diff --git a/ckanext/collection/types.py b/ckanext/collection/types.py index e9635c6..1631245 100644 --- a/ckanext/collection/types.py +++ b/ckanext/collection/types.py @@ -17,8 +17,7 @@ def __call__( params: dict[str, Any], /, **kwargs: Any, - ) -> BaseCollection: - ... + ) -> BaseCollection: ... TCollection = TypeVar("TCollection", bound="BaseCollection") @@ -32,7 +31,11 @@ def __call__( class Service: """Marker for service classes used by collection.""" - @abc.abstractproperty + def __init__(self, obj: Any, /, **kwargs: Any): + pass + + @property + @abc.abstractmethod def service_name(self) -> str: """Name of the service instance used by collection.""" ... @@ -57,7 +60,8 @@ def service_name(self): class BaseData(abc.ABC, Sized, Iterable[Any], Service): """Declaration of data properties.""" - @abc.abstractproperty + @property + @abc.abstractmethod def total(self) -> int: """Total number of data records.""" ... @@ -77,7 +81,8 @@ class BasePager(abc.ABC, Service): params: dict[str, Any] - @abc.abstractproperty + @property + @abc.abstractmethod def size(self) -> Any: """Range of the pager. @@ -87,7 +92,8 @@ def size(self) -> Any: """ ... - @abc.abstractproperty + @property + @abc.abstractmethod def start(self) -> Any: """Inclusive lower bound of the page. @@ -97,7 +103,8 @@ def start(self) -> Any: """ ... - @abc.abstractproperty + @property + @abc.abstractmethod def end(self) -> Any: """Exclusive upper bound of the page. @@ -114,8 +121,7 @@ def service_name(self): class BaseSerializer(abc.ABC, Service): @abc.abstractmethod - def serialize(self) -> Any: - ... + def serialize(self) -> Any: ... @property def service_name(self): diff --git a/ckanext/collection/utils/__init__.py b/ckanext/collection/utils/__init__.py index 511697f..ea2ec89 100644 --- a/ckanext/collection/utils/__init__.py +++ b/ckanext/collection/utils/__init__.py @@ -16,6 +16,7 @@ ApiSearchData, BaseModelData, BaseSaData, + CsvData, Data, DbData, ModelData, @@ -37,6 +38,7 @@ from .serialize import ( ChartJsSerializer, CsvSerializer, + DictListSerializer, HtmlSerializer, HtmxTableSerializer, JsonlSerializer, @@ -62,6 +64,7 @@ "BaseModelData", "BaseSaData", "ChartJsSerializer", + "DictListSerializer", "ClassicPager", "Collection", "CollectionExplorer", @@ -88,4 +91,5 @@ "TableSerializer", "UnionModelData", "UnionSaData", + "CsvData", ] diff --git a/ckanext/collection/utils/collection/explorer.py b/ckanext/collection/utils/collection/explorer.py index 13e2edb..a0d56df 100644 --- a/ckanext/collection/utils/collection/explorer.py +++ b/ckanext/collection/utils/collection/explorer.py @@ -102,20 +102,27 @@ def compute_data(self) -> Iterable[Any]: if isinstance(tables, str): tables = [tables] - hidden = self.attached.params.get("hidden", "").split(",") - hidden = set(filter(None, map(str.strip, hidden))) + _hidden = self.attached.params.get("hidden", "").split(",") + hidden: set[str] = set(filter(None, map(str.strip, _hidden))) - visible = self.attached.params.get("visible", "").split(",") - visible = set(filter(None, map(str.strip, visible))) - hidden + _visible = self.attached.params.get("visible", "").split(",") + visible: set[str] = set(filter(None, map(str.strip, _visible))) - hidden - allowed_filters = self.attached.params.get("allowed_filters", "").split(",") - allowed_filters = set(filter(None, map(str.strip, allowed_filters))) + _allowed_filters = self.attached.params.get("allowed_filters", "").split( + ",", + ) + allowed_filters: set[str] = set( + filter(None, map(str.strip, _allowed_filters)), + ) - searchable_fields = self.attached.params.get("searchable_fields", "").split( + _searchable_fields = self.attached.params.get( + "searchable_fields", + "", + ).split( ",", ) - searchable_fields = ( - set(filter(None, map(str.strip, searchable_fields))) - allowed_filters + searchable_fields: set[str] = ( + set(filter(None, map(str.strip, _searchable_fields))) - allowed_filters ) return [ diff --git a/ckanext/collection/utils/columns.py b/ckanext/collection/utils/columns.py index d28b65e..00b0277 100644 --- a/ckanext/collection/utils/columns.py +++ b/ckanext/collection/utils/columns.py @@ -74,7 +74,11 @@ def _compute_set(self, value: Default | set[str]): return value def get_primary_order(self, name: str) -> str: - """Format column name for usage as a primary order value.""" + """Format column name for usage as a primary order value. + + Args: + name: the name of sorted column + """ return name def get_secondary_order(self, name: str) -> str: diff --git a/ckanext/collection/utils/data/__init__.py b/ckanext/collection/utils/data/__init__.py index 7b53203..5a632ad 100644 --- a/ckanext/collection/utils/data/__init__.py +++ b/ckanext/collection/utils/data/__init__.py @@ -9,9 +9,11 @@ from .base import Data from .db import DbData, TableData from .model import BaseSaData, ModelData, StatementSaData, UnionSaData +from .stream import CsvData __all__ = [ "Data", + "CsvData", "TableData", "ApiData", "ApiListData", diff --git a/ckanext/collection/utils/data/stream.py b/ckanext/collection/utils/data/stream.py new file mode 100644 index 0000000..819a377 --- /dev/null +++ b/ckanext/collection/utils/data/stream.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import csv +import logging + +from ckanext.collection import shared, types + +from .base import Data + +log = logging.getLogger(__name__) + + +class CsvData(Data[types.TData, types.TDataCollection]): + source = shared.configurable_attribute() + + def compute_data(self): + with open(self.source) as src: + reader = csv.DictReader(src) + return list(reader) diff --git a/ckanext/collection/utils/serialize/__init__.py b/ckanext/collection/utils/serialize/__init__.py index e178222..66a4ae3 100644 --- a/ckanext/collection/utils/serialize/__init__.py +++ b/ckanext/collection/utils/serialize/__init__.py @@ -22,6 +22,7 @@ "StreamingSerializer", "RenderableSerializer", "CsvSerializer", + "DictListSerializer", "JsonlSerializer", "JsonSerializer", "ChartJsSerializer", @@ -100,20 +101,43 @@ def dictize_row(self, row: Any) -> dict[str, Any]: """Transform single data record into serializable dictionary.""" result = self.row_dictizer(row) - return {k: self.serialize_value(v, k, row) for k, v in result.items()} + return { + field: self.serialize_value(result[field], field, row) + for field in self.attached.columns.visible + if field in result + } -class StreamingSerializer(Serializer[types.TSerialized, types.TDataCollection]): +class StreamingSerializer( + Serializer[types.TSerialized, types.TDataCollection], +): + @abc.abstractmethod def stream(self) -> Iterable[types.TSerialized]: - """Iterate over fragments of the content.""" + """Iterate over fragments of the content. + + Type of the stream fragment must be the same as type of serialized + content. For example, serializer that produces list of dictionaries, + must yield `[dict(...)]`, not just `dict(...)` + """ raise NotImplementedError - def serialize(self): + def serialize(self) -> types.TSerialized: return reduce(operator.add, self.stream()) +class DictListSerializer( + StreamingSerializer["list[dict[str, Any]]", types.TDataCollection], +): + + def stream(self): + """Iterate over fragments of the content.""" + for item in self.attached.data: + yield [self.dictize_row(item)] + + class RenderableSerializer(StreamingSerializer[str, types.TDataCollection]): + def stream(self) -> Iterable[str]: """Iterate over fragments of the content.""" yield "" diff --git a/docs/detailed.md b/docs/detailed.md new file mode 100644 index 0000000..453ed30 --- /dev/null +++ b/docs/detailed.md @@ -0,0 +1,868 @@ +# Deep dive + +## Content + +* [Requirements](#requirements) +* [Installation](#installation) +* [Usage](#usage) +* [Documentation](#documentation) + * [Overview](#overview) + * [Collection intialization](#collection-intialization) + * [Services](#services) + * [Common logic](#common-logic) + * [Data service](#data-service) + * [Pager service](#pager-service) + * [Serializer service](#serializer-service) + * [Columns service](#columns-service) + * [Filters service](#filters-service) + * [Core classes and usage examples](#core-classes-and-usage-examples) + * [Collection](#collection) + * [DbCollection](#dbcollection) + * [Data](#data) + * [StaticData](#staticdata) + * [BaseSaData](#basesadata) + * [StatementSaData](#statementsadata) + * [UnionSaData](#unionsadata) + * [ModelData](#modeldata) + * [TableData](#tabledata) + * [ApiData](#apidata) + * [ApiSearchData](#apisearchdata) + * [ApiListData](#apilistdata) + * [Pager](#pager) + * [ClassicPager](#classicpager) + * [Columns](#columns) + * [TableColumns](#tablecolumns) + * [Filters](#filters) + * [Serializer](#serializer) + * [CsvSerializer](#csvserializer) + * [JsonlSerializer](#jsonlserializer) + * [JsonSerializer](#jsonserializer) + * [HtmlSerializer](#htmlserializer) + * [TableSerializer](#tableserializer) + * [HtmxTableSerializer](#htmxtableserializer) +* [Config settings](#config-settings) +* [Integrations](#integrations) + * [ckanext-admin-panel](#ckanext-admin-panel) +* [License](#license) + +## Requirements + +Compatibility with core CKAN versions: + +| CKAN version | Compatible? | +|--------------|-------------| +| 2.9 | no | +| 2.10 | yes | +| master | yes | + +## Installation + +To install ckanext-collection: + +1. Install the extension: + ```sh + pip install ckanext-collection + ``` + +1. Add `collection` to the `ckan.plugins` setting in your CKAN + config file . + +## Usage + +Collections can be registered via `ckanext.collection.interfaces.ICollection` +or via CKAN signals. Registered collection can be initialized anywhere in code +using helper and can be used in a number of generic endpoints that render +collection as HTML of export it into different formats. + +Registration via interface: + +```python +from ckanext.collection.interfaces import CollectionFactory, ICollection + + +class MyPlugin(p.SingletonPlugin): + p.implements(ICollection, inherit=True) + + def get_collection_factories(self) -> dict[str, CollectionFactory]: + return { + "my-collection": MyCollection, + } + +``` + +`get_collection_factories` returns a dictionary with collection names(letters, +digits, underscores and hyphens are allowed) as keys, and collection factories +as values. In most generic case, collection factory is just a collection +class. But you can use any function with signature `(str, dict[str, Any], +**Any) -> Collection` as a factory. For example, the following function is a +valid collection factory and it can be returned from `get_collection_factories` + +```python +def my_factory(name: str, params: dict[str, Any], **kwargs: Any): + """Collection that shows 100 numbers per page""" + params.setdefault("rows_per_page", 100) + return MyCollection(name, params, **kwargs) +``` + +If you want to register a collection only if collection plugin is enabled, you +can use CKAN signals instead of wrapping import from ckanext-collection into +try except block: + +```python + +class MyPlugin(p.SingletonPlugin): + p.implements(p.ISignal) + + def get_signal_subscriptions(self) -> types.SignalMapping: + return { + tk.signals.ckanext.signal("collection:register_collections"): [ + self.collect_collection_factories, + ], + } + + def collect_collection_factories(self, sender: None): + return { + "my-collection": MyCollection, + } + +``` + +Data returned from the signal subscription is exactly the same as from +`ICollection.get_collection_factories`. The only difference, signal +subscription accepts `sender` argument which is always `None`, due to internal +implementation of signals. + + +## Documentation + +### Overview + +The goal of this plugin is to supply you with generic classes for processing +collections of data. As result, it doesn't do much out of the box and you have +to write some code to see a result. + +Majority of useful classes are available inside `ckanext.collection.utils` +module and all examples bellow require the following line in the beginning of +the script: `from ckanext.collection.utils import *`. + +Let's start with the basics. `ckanext-collection` defines a few collections for +different puproses. The most basic collection is `Collection`, but it has no +value without customization, so we'll start from `StaticCollection`: + +```python +col = StaticCollection("name", {}) +``` + +Constructor of any collection has two mandatory arguments: name and +parameters. Name is mostly used internally and consists of any combination of +letters, digits, hyphens and underscores. Parameters are passed inside the +dictionary and they change the content of the collection. + +In the most basic scenario, collection represents a number of similar items: +datasets, users, organizations, dictionaries, numbers, etc. As result, it can +be transformed into list or iterated over: + +```python +list(col) + +for item in col: + print(item) +``` + +Our test collection is empty at the moment, so you will not see anything just +yet. Usually, `StaticCollection` contains static data, specified when +collection is created. But because we haven't specified any data, collection +contains nothing. + + +To fix this problem, we have to configure a part of the collection responsible +for data production using its **settings**. Collection divides its internal +logic between a number of configurable *services*, and service that we need is +called **data** service. To modify it, we can pass a named argument called +`data_settings` to the collection's constructor: + +```python +col = StaticCollection( + "name", {}, + data_settings={"data": [1,2,3]} +) +``` + +Now try again iterating over the collection and now you'll see the result: + +```python +for item in col: + print(item) +``` + +It's not very impressive, but you didn't expect much from **static** +collection, right? There are other collections that are more smart, but we have +to learn more concepts of this extension to use them, so for now we'll only +take a brief look on them. + +**Note**: collections have certain restrictions when it comes to amount of +data. By default, you'll see only around 10 records, even if you have more. The +same is true for `StaticCollection` - you can see it if you set `data` +attribute of its data-service to `range(1, 100)`. We'll learn how to control +these restrictions later. + +`StaticCollection` works with static data. It can be used for tests or as a +placeholder for a collection that is not yet implemented. In rare cases it can +be used with arbitrary iterable to create a standard interface for data +interaction. + +`ModelCollection` works with SQLAlchemy models. We are going to use two +attributes of its data-service: `model` and `is_scalar`. The former sets actual +model that collection processes, while the latter controls, how we work with +every individual record. By default, `ModelCollection` returns every record as +a number of columns, but we'll set `is_scalar=True` and receive model instance +for every record instead: + +```python +col = ModelCollection( + "", {}, + data_settings={"is_scalar": True, "model": model.User} +) + +for user in col: + assert isinstance(user, model.User) + print(f"{user.name}, {user.email}") +``` + +`ApiSearchCollection` works with API actions similar to `package_search`. They +have to use `rows` and `start` parameters for pagination and their result must +contain `count` and `results` keys. Its data-service accepts `action` attribute +with the name of API action that produces the data: + +```python +col = ApiSearchCollection( + "", {}, + data_settings={"action": "package_search"} +) + +for pkg in col: + print(f"{pkg['id']}: {pkg['title']}") +``` + +`ApiListCollection` works with API actions similar to `package_list`. They have +to use `limit` and `offset` parameters for pagination and their result must be +represented by a list. + +```python +col = ApiListCollection( + "", {}, + data_settings={"action": "package_list"} +) + +for name in col: + print(name) +``` + +`ApiCollection` works with API actions similar to `user_list`. They have to +return all records at once, as list. + +```python +col = ApiCollection( + "", {}, + data_settings={"action": "user_list"} +) + +for user in col: + print(user["name"]) +``` + +### Collection intialization + +Collection constructor has two mandatory arguments: name and parameters. + +Name is used as collection identifier and it's better to keep this value unique +accross collections. For example, name is used for computing HTML table `id` +attribute when serializing collection as an HTML table. If you render two +collections with the same name, you'll get two identical IDs on the page. + +Params are usually used by data and pager service for searching, sorting, +etc. Collection does not keep all the params. Instead, it stores only items +with key prefixed by `:`. I.e, if collection has name `hello`, and you +pass `{"hello:a": 1, "b": 2, "world:c": 3}`, collection will remove `b`(because +it has no collection name plus colon prefix) and `world:c` members(because it +uses `world` instead of `hello` in prefix). As for `hello:a`, collection strips +`:` prefix from it. So, in the end, collection stores `{"a": 1}`. You +can check params of the collection using `params` attribute: + +```python +col = Collection("hello", {"hello:a": 1, "b": 2, "world:c": 3}) +assert col.params == {"a": 1} + +col = Collection("world", {"hello:a": 1, "b": 2, "world:c": 3}) +assert col.params == {"c": 3} +``` + +It allows you rendering and processing multiple collections simultaneously on +the same page. Imagine that you have collection `users` and collection +`packages`. You want to see second page of `users` and fifth of +`packages`. Submit the query string `?users:page=2&packages:page=5` and +initialize collections using the following code: + +```python +from ckan.logic import parse_params +from ckan.plugins import toolkit as tk + +params = parse_params(tk.request.args) + +users = ModelCollection( + "users", params, + data_settings={"model": model.User} +) +packages = ModelCollection( + "packages", params, + data_settings={"model": model.Package} +) + +assert users.pager.page == 2 +assert packages.pager.page == 5 +``` + +### Services + +Collection itself contains just a bare minimum of logic, and all the +heavy-lifting is delegated to *services*. Collection knows how to initialize +services and usually the only difference between all your collections, is the +way all their services are configured. + +Collection contains the following services: +* `data`: controls the exact data that can be received from + collection. Contains logic for searching, filters, sorting, etc. +* `pager`: defines restrictions for data iteration. Exactly this service shows + only 10 records when you iterating over static collection +* `serializer`: specifies how collection can be transformed into desired + form. Using correct serializer you'll be able to dump the whole collection as + CSV, JSON, YAML or render it as HTML table. +* `columns`: contains configuration of specific data columns used by other + services. It may define model attributes that are dumped into CSV, names of + the transformation functions that are applied to the certain attribute, names + of the columns that are available for sorting in HTML representation of data. +* `filters`: contains configuration of additional widgets produced during data + serialization. For example, when data is serialized into an HTML table, + filters can define configuration of dropdowns and input fields from the data + search form. + +**Note**: You can define more services in custom collections. The list above +enumerates all the services that are available in the base collection and in +all collections shipped with the current extension. For example, one of +built-in collections, `DbCollection` has additional service called +`db_connection` that can communicate with DB. + + +When a collection is created, it creates an instance of each service using +service factories and service settings. Base collection and all collections +that extend it already have all details for initializing every service: + +```python +col = Collection("name", {}) +print(f"""Services: + {col.data=}, + {col.pager=}, + {col.serializer=}, + {col.columns=}, + {col.filters=}""") + +assert list(col) == [] +``` + +This collection has no data. We can initialize an instance of `StaticData` and +replace the existing data service of the collection with new `StaticData` +instance. + +Every service has one required argument: collection that owns the service. All +other arguments are used as a service settings and must be passed by +name. Remember, all the classes used in this manual are available inside +`ckanext.collection.utils`: + +```python +static_data = StaticData(col, data=[1,2,3]) +col.replace_service(static_data) + +assert list(col) == [1, 2, 3] +``` + +Look at `Colletion.replace_service`. It accepts only service instance. There is +no need to pass the name of the service that must be replaced - collection can +understand it without help. And pay attention to the first argument of service +constructor. It must be the collection that is going to use the service. Some +services may work even if you pass a random value as the first argument, but +it's an exceptional situation and one shouldn't rely on it. + +If existing collection is no longer used and you are going to create a new one, +you sometimes want to reuse a service from an existing collection. Just to +avoid creating the service and calling `Collection.replace_service`, which will +save you two lines of code. In this case, use `_instance` parameter of +the collection constructor: + +```python +another_col = Collection("another-name", {}, data_instance=col.data) +assert list(another_col) == [1, 2, 3] +``` + +If you do such thing, make sure you are not using old collection anymore. You +just transfered one of its services to another collection, so there is no +guarantees that old collection with detached service will function properly. + +It's usually better to customize service factory, instead of passing existing +customized instance of the service around. You can tell which class to use for +making an instance of a service using `_factory` parameter of the +collection contstructor: + +```python +col = Collection("name", {}, data_factory=StaticData) +assert list(col) == [] +``` + +But in this way we cannot specify the `data` attribute of the `data` factory! +No worries, there are multiple ways to overcome this problem. First of all, all +the settings of the service are available as its attributes. It means that +`data` setting is the same as `data` attribute of the service. If you can do +`StaticData(..., data=...)`, you can as well do `service = StaticData(...); +service.data = ...`: + +```python +col = Collection("name", {}, data_factory=StaticData) +col.data.data = [1, 2, 3] +assert list(col) == [1, 2, 3] +``` + +**Note**: `data` service caches its data. If you already accessed data property +from the `StaticData`, assigning an new value doesn't have any effect because +of the cache. You have to call `col.data.refresh_data()` after assigning to +rebuild the cache. + +But there is a better way. You can pass `_settings` dictionary to the +collection constructor and it will be passed down into corresponding service +factory: + +```python +col = Collection( + "name", {}, + data_factory=StaticData, + data_settings={"data": [1, 2, 3]} +) +assert list(col) == [1, 2, 3] +``` + + +It works well for individual scenarios, but when you are creating a lot of +collections with the static data, you want to omit some standard parameters. In +this case you should define a new class that extends Collection and declares +`Factory` attribute: + +```python +class MyCollection(Collection): + DataFactory = StaticData + +col = MyCollection( + "name", {}, + data_settings={"data": [1, 2, 3]} +) +assert list(col) == [1, 2, 3] +``` + +You still can pass `data_factory` into `MyCollection` constructor to override +data service factory. But now, by default, `StaticData` is used when it's not +specified explicitly. + +Finally, if you want to create a subclass of service, that has a specific value +of certain attributes, i.e something like this: + +```python +class OneTwoThreeData(StaticData): + data = [1, 2, 3] +``` + +you can use `Service.with_attributes(attr_name=attr_value)` factory method. It +produce a new service class(factory) with specified attributes bound to a +static value. For example, that's how we can define a collection, that always +contains `[1, 2, 3]`: + +```python +class MyCollection(Collection): + DataFactory = StaticData.with_attributes(data=[1, 2, 3]) + +col = MyCollection("name", {}) +assert list(col) == [1, 2, 3] +``` + +Now you don't have to specify `data_factory` or `data_settings` when creating a +collection. It will always use `StaticData` with `data` set to `[1, 2, 3]` +. Make sure you mean it, because now you cannot override the data using +`data_settings`. + + +#### Common logic + +All services share a few common features. First of all, all services contain a +reference to the collection that uses/owns the service. Only one collection can +own the service. If you move service from one collection to another, you must +never use the old collection, that no longer owns the service. Depending on +internal implementation of the service, it may work without changes, but we +recommend removing such collections. At any point you can get the collection +that owns the service via `attached` attribute of the service: + +```python +col = Collection("name", {}) +assert col.data.attached is col +assert col.pager.attached is col +assert col.columns.attached is col + +another_col = Collection( + "another-name", {}, + data_instance=col.data +) +assert col.data.attached is not col +assert col.data.attached is another_col +assert col.data is another_col.data +``` + +Second common point of services is **settings**. Let's use `StaticData` for +tests. It has one configurable attribute(setting) - `data`. We can specify it +directly when creating data service instance: `StaticData(..., data=DATA)`. Or +we can specify it via `data_settings` when creating a collection: +`StaticCollection("name", {}, data_settings={"data": DATA})`. In both cases +`DATA` will be available as a `data` attribute of the data service. But it +doesn't mean that we can pass just any attribute in this way: + +```python +data = StaticData(col, data=[], not_real=True) +assert hasattr(data, "data") +assert not hasattr(data, "not_real") +``` + +To allow overriding the value of attribute via settings, we have to define this +attribute as a **configurable attribute**. For this we need +`configurable_attribute` function from `ckanext.collection.shared`: + +```python +class MyData(StaticData): + i_am_real = configurable_attribute(False) + +data = MyData(col, data=[], i_am_real=True) +assert hasattr(data, "data") +assert hasattr(data, "i_am_real") +assert data.i_am_real is True +``` + +`configurable_attribute` accepts either positional default value of the +attribute, or named `default_factory` function that generated default value +every time new instance of the service is created. `default_factory` must +accept a single argument - a new service that is instantiated at the moment: + +```python +class MyData(StaticData): + ref = 42 + i_am_real = shared.configurable_attribute(default_factory=lambda self: self.ref * 10) + +data = MyData(col, data=[]) +assert data.i_am_real == 420 +``` + +Never use another configurable attributes in the `default_factory` - order in +which configurable attributes are initialized is not strictly defined. At the +moment of writing this manual, configurable attributes were initialized in +alphabetical order, but this implementation detail may change in future without +notice. + +TODO: with_attributes + +#### Data service + +This service produces the data for collection. Every data service must: + +* be Iterable and iterate over all available records by default +* define `total` property, that reflects number of available records so that + `len(list(data)) == data.total` +* define `range(start: Any, end: Any)` method that returns slice of the data + +Base class for data services - `Data` - already contains a simple version of +this logic. You need to define only one method to make you custom +implementations: `compute_data()`. When data if accessed for the first time, +`compute_data` is called. Its result cached and used for iteration in +for-loops, slicing via `range` method and size measurement via `total` +property. + + +```python +class CustomData(Data): + def compute_data(self) -> Any: + return "abcdefghijklmnopqrstuvwxyz" + +col = Collection("name", {}, data_factory=CustomData) +assert list(col) == ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] +assert col.data.total == 26 +assert col.data.range(-3, None) == "xyz" + +``` + +If you need more complex data source, make sure you defined `__iter__`, +`total`, and `range`: + +```python +class CustomData(Data): + names = configurable_attribute(default_factory=["Anna", "Henry", "Mary"]) + + @property + def total(self): + return len(self.names) + + def __iter__(self): + yield from sorted(self.names) + + def range(self, start: Any, end: Any): + if not isinstance(start, str) or not isinstance(end, str): + return [] + + for name in self: + if name < start: + continue + if name > end: + break + yield name + +``` + + +#### Pager service + +Pager service sets the upper and lower bounds on data used by +collection. Default pager used by collection relies on numeric `start`/`end` +values. But it's possible to define custom pager that uses alphabetical or +temporal bounds, as long as `range` method of your custom data service supports +these bounds. + +Standard pager(`ClassicPager`) has two configurable attributes: `page`(default: +1) and `rows_per_page`(default: 10). + +```python +col = StaticCollection("name", {}) +assert col.pager.page == 1 +assert col.pager.rows_per_page == 10 +``` + +Because of these values you see only first 10 records from data when iterating +the collection. Let's change pager settings: + +```python +col = StaticCollection( + "name", {}, + data_settings={"data": range(1, 100)}, + pager_settings={"page": 3, "rows_per_page": 6} +) +assert list(col) == [13, 14, 15, 16, 17, 18] +``` + +Pagination details are often passed with search parameters and have huge +implact on the required data frame. Because of it, if `pager_settings` are +missing, `ClassicPager` will look for settings inside collection +parameters(second argument of the collection constructor). But in this case, +pager will use only items that has `:` prefix: + +```python +col = StaticCollection( + "xxx", + {"xxx:page": 3, "xxx:rows_per_page": 6}, + data_settings={"data": range(1, 100)} +) +assert list(col) == [13, 14, 15, 16, 17, 18] + +col = StaticCollection( + "xxx", + {"page": 3, "rows_per_page": 6}, + data_settings={"data": range(1, 100)} +) +assert list(col) == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +``` + +#### Serializer service + +Serializer converts data into textual, binary or any other alternative +representation. For example, if you want to compute records produced by the +`data` service of the collection into pandas' DataFrame, you should probably +use serializer. + +Serializers are main users of columns service, because it contains details +about specific data columns. And serializers often iterate data service +directly(ignoring `range` method), to serialize all available records. + +The only required method for serializer is `serialize`. This method must return +an data from `data` service transformed into format provided by serializer. For +example, `JsonSerializer` returns string with JSON-encoded data. + +You are not restricted by textual or binary formats. Serializer that transforms +data into pandas' DataFrame is completely valid version of the serializer. + +```python +class NewLineSerializer(Serializer): + def serialize(self): + result = "" + for item in self.attached.data: + result += str(item) + "\n" + + return result + +col = StaticCollection( + "name", {}, + serializer_factory=NewLineSerializer, + data_settings={"data": [1, 2, 3]} +) +assert "".join(col.serializer.serialize()) == "1\n2\n3\n" +``` + +#### Columns service + +This service contains additional information about separate columns of data +records. It defines following settings: + +* names: all available column names. Used by other settings of columns service +* hidden: columns that should not be shown by serializer. Used by serializer + services +* visible: columns that must be shown by serializer. Used by serializer + services +* sortable: columns that support sorting. Used by data services +* filterable: columns that support filtration/facetting. Used by data services +* searchable: columns that support search by partial match. Used by data + services +* labels: human readable labels for columns. Used by serializer services + +This service contains information used by other service, so defining additional +attributes here is completely normal. For example, some custom serializer, that +serializes data into ORC, can expect `orc_format` attribute in the `columns` +service to be available. So you can add as much additional column related +details as required into this service. + +#### Filters service + +This service used only by HTML table serializers at the moment. It has two +configurable attributes `static_filters` and `static_actions`. `static_filters` +are used for building search form for the data table. `static_actions` are not +used, but you can put into it details about batch or record-level actions and +use these details to extend one of standard serializers. For example, +ckanext-admin-panel defines allowed actions (remove, restore, hide) for content +and creates custom templates that are referring these actions. + + +### Core classes and usage examples + +TBA + +#### Data +TBA + +#### StaticData +TBA + +#### BaseSaData +TBA + +#### StatementSaData +TBA + +#### UnionSaData +TBA + +#### ModelData +TBA + +#### ApiData +TBA + +#### ApiSearchData +TBA + +#### ApiListData +TBA + +#### Pager +TBA + +#### ClassicPager +TBA + +#### Columns +TBA + +#### Filters +TBA + +#### Serializer +TBA + +#### CsvSerializer +TBA + +#### JsonlSerializer +TBA + +#### JsonSerializer +TBA + +#### HtmlSerializer +TBA + +#### TableSerializer +TBA + +#### HtmxTableSerializer +TBA + +## Config settings + +```ini +# Names of registered collections that are viewable by any visitor, including +# anonymous. +# (optional, default: ) +ckanext.collection.auth.anonymous_collections = + +# Names of registered collections that are viewable by any authenticated +# user. +# (optional, default: ) +ckanext.collection.auth.authenticated_collections = + +# Add HTMX asset to pages. Enable this option if you are using CKAN v2.10 +# (optional, default: false) +ckanext.collection.include_htmx_asset = false + +# Initialize CKAN JS modules every time HTMX fetches HTML from the server. +# (optional, default: false) +ckanext.collection.htmx_init_modules = false + +# Import path for serializer used by CSV export endpoint. +# (optional, default: ckanext.collection.utils.serialize:CsvSerializer) +ckanext.collection.export.csv.serializer = ckanext.collection.utils.serialize:CsvSerializer + +# Import path for serializer used by JSON export endpoint. +# (optional, default: ckanext.collection.utils.serialize:JsonSerializer) +ckanext.collection.export.json.serializer = ckanext.collection.utils.serialize:JsonSerializer + +# Import path for serializer used by JSONl export endpoint. +# (optional, default: ckanext.collection.utils.serialize:JsonlSerializer) +ckanext.collection.export.jsonl.serializer = ckanext.collection.utils.serialize:JsonlSerializer + +# Import path for serializer used by `format`-export endpoint. +# (optional, default: ) +ckanext.collection.export..serializer = + +``` + +## Integrations + +### [ckanext-admin-panel](https://github.com/mutantsan/ckanext-admin-panel) + +To enable configuration form of ckanext-collection in the admin panel, enable +the following arbitrary schema + +```ini +scheming.arbitrary_schemas = + ckanext.collection:ap_config.yaml +``` + +## License + +[AGPL](https://www.gnu.org/licenses/agpl-3.0.en.html) diff --git a/docs/img/favicon.ico b/docs/img/favicon.ico new file mode 100644 index 0000000..0d9295c Binary files /dev/null and b/docs/img/favicon.ico differ diff --git a/docs/img/logo.png b/docs/img/logo.png new file mode 100644 index 0000000..3f631bf Binary files /dev/null and b/docs/img/logo.png differ diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..612c7a5 --- /dev/null +++ b/docs/index.md @@ -0,0 +1 @@ +--8<-- "README.md" diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..e339248 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,54 @@ +site_name: ckanext-collection +site_url: https://datashades.github.io/ckanext-collection/ + +repo_url: https://github.com/DataShades/ckanext-collection + +markdown_extensions: + - pymdownx.snippets + - pymdownx.blocks.admonition + - pymdownx.blocks.details + - pymdownx.blocks.tab + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + +plugins: + - mkdocstrings: + handlers: + python: + options: + show_symbol_type_heading: true + show_source: false + + - search: + separator: '[\s\-\.\_]+' + +watch: + - docs/ + - ckanext/collection/utils + +theme: + name: material + logo: img/logo.png + favicon: img/favicon.ico + palette: + primary: blue + features: + - navigation.instant + - navigation.instant.prefetch + - navigation.instant.progress + - navigation.footer + - navigation.indexes + - navigation.top + - content.code.copy + - content.code.select + - content.code.annotate + +nav: + - Overview: index.md + - detailed.md diff --git a/pyproject.toml b/pyproject.toml index 2f018d0..04d9403 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ lint.ignore = [ [tool.ruff.lint.per-file-ignores] "ckanext/collection/tests*" = ["S", "PL"] -"ckanext/collection/tests/test_readme.py" = ["F403", "F405"] +"ckanext/collection/tests/test_dive.py" = ["F403", "F405"] [tool.isort] known_ckan = "ckan" known_ckanext = "ckanext" diff --git a/setup.cfg b/setup.cfg index 8dbff56..688139d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = ckanext-collection -version = 0.1.21 +version = 0.2.0a0 description = long_description = file: README.md long_description_content_type = text/markdown @@ -15,6 +15,7 @@ classifiers = Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 keywords = CKAN [options] @@ -37,8 +38,14 @@ test = pytest-ckan ckanext-toolbelt +docs = + mkdocs + mkdocs-material + pymdown-extensions + mkdocstrings[python] dev = %(test)s + %(docs)s [extract_messages] keywords = translate isPlural @@ -61,10 +68,3 @@ previous = true domain = ckanext-collection directory = ckanext/collection/i18n statistics = true - -[tool:pytest] -filterwarnings = - ignore::sqlalchemy.exc.SADeprecationWarning - ignore::sqlalchemy.exc.SAWarning - ignore::DeprecationWarning -addopts = --ckan-ini test.ini