From 70d1a1e5a0a6514158485d8df452fa3d0758515d Mon Sep 17 00:00:00 2001 From: Lord-Mallam Date: Thu, 20 Feb 2020 13:00:57 +0100 Subject: [PATCH 1/2] refactor: using aether consumer sdk (#2) * refactor: setup artifacts, resources with sdk * fix: db connection test * feat: ckan resources * fix: travis scripts * test: travis fix * feat: add target_options to subscription schema * chore: include release branch build * fix: lint * chore: indent to space * fix: travis release node * fix: untrack .coverage * fix: setup * chore: remove ckan-test * feat: push messages to ckan * fix: lint * test: include remote ckan for integration test * chore: update readme * fix: typo --- .dockerignore | 1 - .env | 6 - .gitignore | 2 + .gitmodules | 3 - .travis.yml | 46 +- Dockerfile | 21 - Dockerfile.test | 28 - README.md | 286 +-- bin/travis-build.bash | 10 - bin/travis-run.sh | 5 - config/config.json | 22 - config/config.schema | 182 -- config/dataset_metadata.json | 12 - config/example_config.json | 45 - consumer/.dockerignore | 3 + consumer/Dockerfile | 36 + consumer/__init__.py | 0 .../run_gather.sh => consumer/app/__init__.py | 10 +- consumer/app/artifacts.py | 584 ++++++ consumer/app/config.py | 78 + consumer/app/consumer.py | 33 + .../app/fixtures/__init__.py | 13 +- consumer/app/fixtures/examples.py | 57 + consumer/app/fixtures/schemas.py | 369 ++++ consumer/app/utils.py | 137 ++ consumer/conf/consumer/consumer.json | 15 + consumer/conf/consumer/kafka.json | 12 + consumer/conf/extras/flake8.cfg | 16 + consumer/conf/extras/good_job.txt | 9 + consumer/conf/pip/primary-requirements.txt | 31 + consumer/conf/pip/requirements.txt | 73 + consumer/conf/pip/requirements_header.txt | 10 + consumer/config.py | 100 - consumer/core/__init__.py | 0 consumer/core/dataset_manager.py | 126 -- consumer/core/process_manager.py | 97 - consumer/core/resource_manager.py | 345 ---- consumer/core/server_manager.py | 266 --- consumer/core/topic_manager.py | 237 --- consumer/db.py | 98 - consumer/entrypoint.sh | 99 + consumer/main.py | 34 - consumer/manage.py | 34 + consumer/setup.cfg | 27 + consumer/setup.py | 52 + consumer/tests/.dockerignore | 1 + consumer/tests/.gitignore | 1 + consumer/tests/__init__.py | 1625 +++++++++++++++++ {example => consumer/tests/ckan}/.gitignore | 0 {example => consumer/tests}/ckan/Dockerfile | 8 +- consumer/tests/ckan/ckan-entrypoint.sh | 86 + .../tests}/ckan/ckan/production.ini | 4 +- {example => consumer/tests}/ckan/ckan/who.ini | 0 .../tests}/ckan/my_init.d/50_configure | 0 .../tests}/ckan/my_init.d/70_initdb | 0 .../tests}/ckan/postgresql/Dockerfile | 0 .../00_create_datastore.sh | 0 .../10_datastore-setup.sql | 0 .../20_postgis_permissions.sql | 0 .../tests/ckan/setup.sh | 33 +- .../tests}/ckan/solr/Dockerfile | 0 consumer/tests/ckan/solr/schema.xml | 188 ++ consumer/tests/ckan/solr/solrconfig.xml | 343 ++++ .../tests/ckan/start.sh | 4 +- .../tests/ckan/stop.sh | 7 +- {example => consumer/tests}/ckan/svc/ckan/run | 0 .../tests}/ckan/svc/nginx/run | 0 .../tests}/ckan/svc/postfix/run | 0 .../tests/ckan/wipe.sh | 11 +- consumer/tests/conf/consumer.json | 5 + consumer/tests/conf/kafka.json | 12 + consumer/tests/test_job_consumer.py | 164 ++ consumer/tests/test_unit.py | 85 + dev-requirements.txt | 4 - docker-compose-base.yml | 12 + docker-compose-test.yml | 85 + docker-compose.test.yml | 8 - docker-compose.yml | 46 +- example/aether-bootstrap | 1 - .../assets/aether-walkthrough-microcensus.xls | Bin 23040 -> 0 bytes example/assets/all_entitites.json | 212 --- example/assets/mapping.json | 106 -- example/assets/sample.json | 106 -- example/ckan-consumer/.env | 6 - example/ckan-consumer/.gitignore | 1 - example/ckan-consumer/config/config.json | 22 - example/ckan-consumer/config/config.schema | 182 -- .../config/dataset_metadata.json | 12 - example/ckan-consumer/config/old_config.json | 73 - example/ckan-consumer/db/.gitignore | 1 - example/ckan-consumer/docker-compose.yml | 24 - example/ckan/.env | 44 - example/ckan/ckan-entrypoint.sh | 68 - example/ckan/docker-compose.yml | 86 - example/ckan/solr/schema.xml | 188 -- example/ckan/solr/solrconfig.xml | 343 ---- example/gather/docker-compose-base.yml | 47 - example/gather/docker-compose.yml | 21 - example/scripts/stop_aether.sh | 25 - example/scripts/stop_ckan.sh | 24 - example/scripts/wipe_aether.sh | 30 - example/scripts/wipe_ckan.sh | 32 - requirements.txt | 7 - scripts/build_local.sh | 35 + scripts/release.sh | 11 +- scripts/run_integration_tests.sh | 25 + scripts/run_travis.sh | 26 + .../run_unit_tests.sh | 6 +- tests/fixtures/config.json | 3 - tests/fixtures/config.schema | 6 - tests/fixtures/config_malformed.json | 3 - tests/fixtures/config_not_valid.json | 3 - tests/test_config.py | 71 - tests/test_core/test_dataset_manager.py | 62 - tests/test_core/test_process_manager.py | 85 - tests/test_core/test_resource_manager.py | 123 -- tests/test_core/test_server_manager.py | 117 -- tests/test_core/test_topic_manager.py | 247 --- tests/test_db.py | 117 -- 119 files changed, 4656 insertions(+), 4347 deletions(-) delete mode 100644 .dockerignore delete mode 100644 .env delete mode 100644 .gitmodules delete mode 100644 Dockerfile delete mode 100644 Dockerfile.test delete mode 100644 bin/travis-build.bash delete mode 100644 bin/travis-run.sh delete mode 100644 config/config.json delete mode 100644 config/config.schema delete mode 100644 config/dataset_metadata.json delete mode 100644 config/example_config.json create mode 100644 consumer/.dockerignore create mode 100644 consumer/Dockerfile delete mode 100644 consumer/__init__.py rename example/scripts/run_gather.sh => consumer/app/__init__.py (82%) mode change 100755 => 100644 create mode 100644 consumer/app/artifacts.py create mode 100644 consumer/app/config.py create mode 100644 consumer/app/consumer.py rename example/scripts/start_goa.sh => consumer/app/fixtures/__init__.py (73%) mode change 100755 => 100644 create mode 100644 consumer/app/fixtures/examples.py create mode 100644 consumer/app/fixtures/schemas.py create mode 100644 consumer/app/utils.py create mode 100644 consumer/conf/consumer/consumer.json create mode 100644 consumer/conf/consumer/kafka.json create mode 100644 consumer/conf/extras/flake8.cfg create mode 100644 consumer/conf/extras/good_job.txt create mode 100644 consumer/conf/pip/primary-requirements.txt create mode 100644 consumer/conf/pip/requirements.txt create mode 100644 consumer/conf/pip/requirements_header.txt delete mode 100644 consumer/config.py delete mode 100644 consumer/core/__init__.py delete mode 100644 consumer/core/dataset_manager.py delete mode 100644 consumer/core/process_manager.py delete mode 100644 consumer/core/resource_manager.py delete mode 100644 consumer/core/server_manager.py delete mode 100644 consumer/core/topic_manager.py delete mode 100644 consumer/db.py create mode 100755 consumer/entrypoint.sh delete mode 100644 consumer/main.py create mode 100644 consumer/manage.py create mode 100644 consumer/setup.cfg create mode 100644 consumer/setup.py create mode 100644 consumer/tests/.dockerignore create mode 100644 consumer/tests/.gitignore create mode 100644 consumer/tests/__init__.py rename {example => consumer/tests/ckan}/.gitignore (100%) rename {example => consumer/tests}/ckan/Dockerfile (88%) create mode 100755 consumer/tests/ckan/ckan-entrypoint.sh rename {example => consumer/tests}/ckan/ckan/production.ini (98%) rename {example => consumer/tests}/ckan/ckan/who.ini (100%) rename {example => consumer/tests}/ckan/my_init.d/50_configure (100%) rename {example => consumer/tests}/ckan/my_init.d/70_initdb (100%) rename {example => consumer/tests}/ckan/postgresql/Dockerfile (100%) rename {example => consumer/tests}/ckan/postgresql/docker-entrypoint-initdb.d/00_create_datastore.sh (100%) rename {example => consumer/tests}/ckan/postgresql/docker-entrypoint-initdb.d/10_datastore-setup.sql (100%) rename {example => consumer/tests}/ckan/postgresql/docker-entrypoint-initdb.d/20_postgis_permissions.sql (100%) rename example/scripts/setup_ckan.sh => consumer/tests/ckan/setup.sh (53%) rename {example => consumer/tests}/ckan/solr/Dockerfile (100%) create mode 100644 consumer/tests/ckan/solr/schema.xml create mode 100644 consumer/tests/ckan/solr/solrconfig.xml rename example/scripts/run_ckan.sh => consumer/tests/ckan/start.sh (89%) rename example/scripts/rebuild_ckan.sh => consumer/tests/ckan/stop.sh (86%) rename {example => consumer/tests}/ckan/svc/ckan/run (100%) rename {example => consumer/tests}/ckan/svc/nginx/run (100%) rename {example => consumer/tests}/ckan/svc/postfix/run (100%) rename example/scripts/run_aether.sh => consumer/tests/ckan/wipe.sh (79%) create mode 100644 consumer/tests/conf/consumer.json create mode 100644 consumer/tests/conf/kafka.json create mode 100644 consumer/tests/test_job_consumer.py create mode 100644 consumer/tests/test_unit.py delete mode 100644 dev-requirements.txt create mode 100644 docker-compose-base.yml create mode 100644 docker-compose-test.yml delete mode 100644 docker-compose.test.yml delete mode 160000 example/aether-bootstrap delete mode 100644 example/assets/aether-walkthrough-microcensus.xls delete mode 100644 example/assets/all_entitites.json delete mode 100644 example/assets/mapping.json delete mode 100644 example/assets/sample.json delete mode 100644 example/ckan-consumer/.env delete mode 100644 example/ckan-consumer/.gitignore delete mode 100644 example/ckan-consumer/config/config.json delete mode 100644 example/ckan-consumer/config/config.schema delete mode 100644 example/ckan-consumer/config/dataset_metadata.json delete mode 100644 example/ckan-consumer/config/old_config.json delete mode 100644 example/ckan-consumer/db/.gitignore delete mode 100644 example/ckan-consumer/docker-compose.yml delete mode 100644 example/ckan/.env delete mode 100755 example/ckan/ckan-entrypoint.sh delete mode 100644 example/ckan/docker-compose.yml delete mode 100644 example/ckan/solr/schema.xml delete mode 100644 example/ckan/solr/solrconfig.xml delete mode 100644 example/gather/docker-compose-base.yml delete mode 100644 example/gather/docker-compose.yml delete mode 100755 example/scripts/stop_aether.sh delete mode 100755 example/scripts/stop_ckan.sh delete mode 100755 example/scripts/wipe_aether.sh delete mode 100755 example/scripts/wipe_ckan.sh delete mode 100644 requirements.txt create mode 100755 scripts/build_local.sh create mode 100755 scripts/run_integration_tests.sh create mode 100755 scripts/run_travis.sh rename example/scripts/run_consumer.sh => scripts/run_unit_tests.sh (82%) delete mode 100644 tests/fixtures/config.json delete mode 100644 tests/fixtures/config.schema delete mode 100644 tests/fixtures/config_malformed.json delete mode 100644 tests/fixtures/config_not_valid.json delete mode 100644 tests/test_config.py delete mode 100644 tests/test_core/test_dataset_manager.py delete mode 100644 tests/test_core/test_process_manager.py delete mode 100644 tests/test_core/test_resource_manager.py delete mode 100644 tests/test_core/test_server_manager.py delete mode 100644 tests/test_core/test_topic_manager.py delete mode 100644 tests/test_db.py diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 90c978b..0000000 --- a/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -example/ diff --git a/.env b/.env deleted file mode 100644 index 1f475f0..0000000 --- a/.env +++ /dev/null @@ -1,6 +0,0 @@ -# Forces stdin, stdout and stderr to be totally unbuffered. Useful for -# development. -PYTHONUNBUFFERED=1 - -# Can be set to test, development or production. -ENVIRONMENT=development diff --git a/.gitignore b/.gitignore index 1f94ee8..996f94c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.pyc /db .vscode +/ckan +*.coverage diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 72b6c5e..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "example/aether-bootstrap"] - path = example/aether-bootstrap - url = git@github.com:eHealthAfrica/aether-bootstrap.git diff --git a/.travis.yml b/.travis.yml index 42ca5ef..1b88408 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,18 +1,44 @@ language: python python: - - "2.7" + - "3.7" services: - docker cache: pip before_script: - docker login -u ehealthafricadevops -p $DOCKER_HUB_PASSWORD -install: bash bin/travis-build.bash -script: sh bin/travis-run.sh -deploy: -- provider: script - script: "./scripts/release.sh" - skip_cleanup: true - on: - all_branches: true - condition: ($TRAVIS_BRANCH =~ ^release\-[0-9]+\.[0-9]+[\.0-9]*$) || ($TRAVIS_TAG =~ ^[0-9]+\.[0-9]+[\.0-9]*$) +# build only the master branch or tags like #.#.# +branches: + only: + - master + - /^[0-9]+(\.[0-9]+){2}$/ + - /^release\-[0-9]+\.[0-9]+$/ + +# define stages and their execution order +stages: + - name: test + + # release only in: + # - branch master + # - tag #.#.# + # - never in forks or pull requests + - name: release + if: | + fork IS false AND \ + type != pull_request AND \ + ((branch = master) OR \ + (tag =~ ^[0-9]+(\.[0-9]+){2}$)) + +jobs: + fast_finish: true + include: + + - name: "Test" + stage: test + script: "./scripts/run_travis.sh" + + - name: "Release" + stage: release + script: "./scripts/release.sh" + +install: true diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index ddab185..0000000 --- a/Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -FROM python:2.7-alpine - -RUN apk update && \ - apk add --no-cache \ - bash \ - gcc \ - libffi-dev \ - linux-headers \ - musl-dev \ - openssl-dev - -WORKDIR /srv/app - -# This avoids reinstalling Python packages each time the image is rebuilt. -ADD ./requirements.txt /srv/app/requirements.txt - -RUN pip install -r requirements.txt - -ADD . /srv/app - -CMD ["python", "-m", "consumer.main"] diff --git a/Dockerfile.test b/Dockerfile.test deleted file mode 100644 index bd15ce1..0000000 --- a/Dockerfile.test +++ /dev/null @@ -1,28 +0,0 @@ -FROM python:2.7-alpine - -ENV ENVIRONMENT=test - -RUN apk update && \ - apk add --no-cache \ - bash \ - gcc \ - libffi-dev \ - linux-headers \ - musl-dev \ - openssl-dev - -WORKDIR /srv/app - -# This avoids reinstalling Python packages each time the image is rebuilt. -ADD ./requirements.txt /srv/app/requirements.txt -ADD ./dev-requirements.txt /srv/app/dev-requirements.txt - -RUN pip install -r requirements.txt -RUN pip install -r dev-requirements.txt - -ADD . /srv/app - -RUN pycodestyle ./consumer - -CMD ["nosetests", "./tests", "--nocapture"] - diff --git a/README.md b/README.md index 0048bde..69e97ab 100644 --- a/README.md +++ b/README.md @@ -7,144 +7,194 @@ CKAN portals with data in the Datastore. ### Running the app -The project is setup with Docker. It will spin up an environment with -Python 2.7 based on Alpine and install the required dependencies. +A CKAN portal has to be available fro the consumer to connect to. You can fine how to setup a CKAN portal [here](https://docs.ckan.org/en/2.8/maintaining/installing/index.html), or check our example of a docker-compose CKAN container `./consumer/tests/ckan` -To run the app just type: +After setting up CKAN and a CKAN user, get your [api-key](https://docs.ckan.org/en/ckan-2.7.3/api/#authentication-and-api-keys) for later use. -``` -docker-compose up --timeout 60 +To start the consumer run + +```bash +docker-compose up ``` -The `--timeout` argument tells Docker how many seconds to wait before it kills -the app when it is gracefully stopped. The default is 10 seconds. This should -be specified based on how many CKAN portals are configured, and how many -datasets need to be feed with data. Because when the app is gracefully stopped, - it will send all data processed from Kafka. +The consumer api should now be accessible on [http://localhost:9009](http://localhost:9009) ### Configuration -#### `config.json` +#### `/conf/consumer/kafka.json` + +To setup the main consumer, the kafka.json file should match your preferred Kafka settings. This is not user facing. The consumer running assumes that you have employed topic level access control. -The Consumer can be configured via `config.json`. This is a sample of its shape -and data: +You can also set the default masking and message filtering settings here, but if specified, the user's rules will take precedence. ```json { - "database": { - "url": "sqlite:////srv/app/db/consumer.db" - }, - "kafka": { - "url": "localhost:9092" + "auto_offset_reset" : "earliest", + "aether_emit_flag_required" : false, + "aether_masking_schema_levels" : ["false", "true"], + "aether_masking_schema_emit_level": "false", + "heartbeat_interval_ms": 2500, + "session_timeout_ms": 18000, + "request_timeout_ms": 20000, + "consumer_timeout_ms": 17000 +} +``` + +#### `/conf/consumer/consumer.json` + +The consumer takes data from kafka and groups them into `datasets` on CKAN. This file defines the defaults for datasets if none is provided by the user during subscription. + +```json +{ + "name": "CKAN_CONSUMER", + "metadata": { + "author": "eHealth Africa", + "author_email": "info@ehealthafrica.org", + "maintainer": "eHealth Africa", + "maintainer_email": "info@ehealthafrica.org", + "license_id": "cc-by", + "url": "https://www.ehealthafrica.org", + "version": "1.0", + "owner_org": "eHA", + "name": "demo-dataset-1", + "title": "Demo Dataset" + } +} +``` + +### Usage + +As with all consumers built on the SDK, tasks are driven by a Job which has a set of Resources. In this case, a Job has a `subscription` to a topic (or wildcard) on Kafka, and sends data to a `ckan` instance. All resource examples and schemas can be found in `/consumer/app/fixtures` + +Using the consumer API usually on `http://localhost:9009`, register the following artefacts. + +#### CKAN + +(post to `/ckan/add` as `json`) + +```json +{ + "id": "ckan-id", + "name": "CKAN Instance", + "url": "http://ckan:5000", + "key": `[your-ckan-api-key]` +} +``` + +#### Subscription + +(post to `/subscription/add` as `json`) + +```json +{ + "id": "sub-id", + "name": "Demo Subscription", + "topic_pattern": "*", + "topic_options": { + "masking_annotation": "@aether_masking", + "masking_levels": ["public", "private"], + "masking_emit_level": "public", + "filter_required": false }, - "ckan_servers": [ - { - "title": "Local CKAN portal", - "url": "http://localhost:5000", - "api_key": "2ef3752c-b615-405d-9627-2bf7321d4rty", - "datasets": [ - { - "metadata": { - "title": "Dataset title", - "name": "dataset-title", - "owner_org": "demo-org", - "notes": "Sample data" - }, - "resources": [ - { - "metadata": { - "title": "Sensor data", - "description": "Sensor data from wind turbines", - "name": "sensor-data" - }, - "topics": [ - { - "name": "test", - "number_of_consumers": 1 - } - ] - } - ] - } - ] + "target_options": { + "dataset_metadata": { + "title": "Pollution in Nigeria", + "name": "pollution-in-nigeria111", + "owner_org": "eHA", + "notes": "Some description", + "author": "eHealth Africa", + "private": false } - ] + } } ``` -Available options are: - -- `database`: Object storing information for local database. -- `database.url`: URL where the database is stored. -- `kafka`: Object storing information for Kafka. -- `kafka.url`: The URL where Kafka is running. -- `ckan_servers`: Array of CKAN server instances. -- `ckan_servers.title`: Title of the CKAN server instance. -- `ckan_servers.url`: The URL where the CKAN server instance is running. -- `ckan_servers.api_key`: The API key used for making API calls to a CKAN -server instance. The API key should be associated with a user that has -privileges to create a dataset in CKAN. -- `ckan_servers.datasets`: Array of datasets to feed data with for the CKAN -server instance. -- `ckan_servers.datasets.metadata`: Metadata information for the dataset that -will be created in CKAN. -- `ckan_servers.datasets.metadata.title`: Title of the dataset. -- `ckan_servers.datasets.metadata.name`: Name of the dataset. Create a unique -name for the dataset. The name should be unique per CKAN instance. For -instance, if you want to give the name *demo-dataset* for the dataset, check -the portal at `https://example.com/dataset/demo-dataset` to see if one already -exists. Also, keep in mind that datasets you don't have access to (private), -will be shown as "Not found" on the CKAN portal. So make sure the name is -trully unique for your purpose. -- `ckan_servers.datasets.metadata.owner_org`: Name of the organization. For -instance, for this organization `https://example.com/organization/org-name`, -the name of the organization is *org-name*. This name is unique per CKAN -instance. Note that this organization must be previously created in CKAN from -the UI. -- `ckan_servers.datasets.metadata.notes`: Description of the dataset. -- `ckan_servers.datasets.resources`: List of resources that should be feed with -data. -- `ckan_servers.datasets.resources.metadata`: Metadata information for the -resource in CKAN. -- `ckan_servers.datasets.resources.metadata.title`: Title of the resource. -- `ckan_servers.datasets.resources.metadata.description`: Description of the -resource. -- `ckan_servers.datasets.resources.metadata.name`: Unique name of the resource. -This name should be unique only for the dataset where it is specified, meaning -the same name can be used when used in other dataset. -- `ckan_servers.datasets.resources.topics`: Array of topics to pull data from -for a dataset. -- `ckan_servers.datasets.resources.topics.name`: Name of the topic in Kafka. -- `ckan_servers.datasets.resources.topics.number_of_consumers`: Number of -consumers to instantiate for the specified topic. Usually this should be set to - 1, but if the volume of data that comes from a topic increases, it should be -increased. (Default: 1) - -This configuration file is validated against `config.schema`, which is a JSON -Schema file. This makes sure that data in the `config.json` file is valid, as -well as its shape. - -#### `dataset_metadata.json` - -All datasets specified in `config.json` are created in CKAN with default -metadata fields obtainted from `dataset_metadata.json`. They can be overriden -per dataset through the `config.json` file, in the -`ckan_servers.datasets.resources.metadata` object. - -### Environment variables - -In `.env`, the following variables can be changed: - -- `PYTHONUNBUFFERED`: Useful for debugging in development. It forces stdin, -stdout and stderr to be totally unbuffered. It should be set to any value in -order to work. -- `ENVIRONMENT`: Can be set to *development* or *production*. Default is -*development*. +#### Job + +Finally we, tie it together with a Job that references the above artifacts by ID. (post to `/job/add` as `json`) + +```json +{ + "id": "job-id", + "name": "CKAN Consumer Job", + "ckan": "ckan-id", + "subscription": ["sub-id"] +} +``` + +### Environment Variables + +The following settings can be changed in `docker-compose.yml > services > ckan-consumer > environment`: + - `CONSUMER_NAME`: The name of the consumer + - `EXPOSE_PORT`: Port to access consumer API + - `ADMIN_USER`: Username for API authentication + - `ADMIN_PW`: Password for API authentication + + - `REDIS_DB`: Redis database name + - `REDIS_HOST`: Host to redis instance + - `REDIS_PORT`: Redis port + - `REDIS_PASSWORD`: Redis password + + +### Control and Artifact Functions + +The Aether Consumer SDK allows exposure of functionality on a per Job or per Resource basis. You can query for a list of available functions on any of the artifacts by hitting its describe endpoint. For example; /job/describe yields: + +```json +[ + { + "doc": "Described the available methods exposed by this resource type", + "method": "describe", + "signature": "(*args, **kwargs)" + }, + { + "doc": "Returns the schema for instances of this resource", + "method": "get_schema", + "signature": "(*args, **kwargs)" + }, + { + "doc": "Return a lengthy validations.\n{'valid': True} on success\n{'valid': False, 'validation_errors': [errors...]} on failure", + "method": "validate_pretty", + "signature": "(definition, *args, **kwargs)" + }, + { + "doc": "Temporarily Pause a job execution.\nWill restart if the system resets. For a longer pause, remove the job via DELETE", + "method": "pause", + "signature": "(self, *args, **kwargs)" + }, + { + "doc": "Resume the job after pausing it.", + "method": "resume", + "signature": "(self, *args, **kwargs)" + }, + { + "doc": null, + "method": "get_status", + "signature": "(self, *args, **kwargs) -> Union[Dict[str, Any], str]" + }, + { + "doc": "A list of the last 100 log entries from this job in format\n[\n (timestamp, log_level, message),\n (timestamp, log_level, message),\n ...\n]", + "method": "get_logs", + "signature": "(self, *arg, **kwargs)" + }, + { + "doc": "Get a list of topics to which the job can subscribe.\nYou can also use a wildcard at the end of names like:\nName* which would capture both Name1 && Name2, etc", + "method": "list_topics", + "signature": "(self, *args, **kwargs)" + }, + { + "doc": "A List of topics currently subscribed to by this job", + "method": "list_subscribed_topics", + "signature": "(self, *arg, **kwargs)" + } +] +``` ### Running the tests To run the tests type the following command which also checks for PEP8 errors: ``` -docker-compose -f docker-compose.test.yml up --build +./scripts/run_unit_tests.sh +./scripts/run_integration_tests.sh ``` diff --git a/bin/travis-build.bash b/bin/travis-build.bash deleted file mode 100644 index 7cb8eaa..0000000 --- a/bin/travis-build.bash +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -set -e - -echo "This is travis-build.bash..." - -echo "Installing project's requirements." -pip install -r requirements.txt -pip install -r dev-requirements.txt - -echo "travis-build.bash is done." diff --git a/bin/travis-run.sh b/bin/travis-run.sh deleted file mode 100644 index 9882219..0000000 --- a/bin/travis-run.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -e - -docker-compose -f ./docker-compose.test.yml run test -pycodestyle ./consumer -pycodestyle ./tests diff --git a/config/config.json b/config/config.json deleted file mode 100644 index 0eb9318..0000000 --- a/config/config.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "database": { - "url": "sqlite:////srv/app/db/consumer.db" - }, - "kafka": { - "bootstrap_servers": "kafka:29092", - "aether_emit_flag_required": false, - "aether_emit_flag_field_path": "$.approved", - "aether_emit_flag_values": [true], - "aether_masking_schema_levels" : ["false", "true"], - "aether_masking_schema_emit_level": "true" - }, - "ckan_servers": [ - { - "title": "CKAN Demo portal", - "url": "http://ckan:5000", - "api_key": "49d4cd30-85ab-467f-8710-676f0ce55df1", - "autoconfig_datasets" : true, - "autoconfig_owner_org": "ehademo" - } - ] -} diff --git a/config/config.schema b/config/config.schema deleted file mode 100644 index 311dd96..0000000 --- a/config/config.schema +++ /dev/null @@ -1,182 +0,0 @@ -{ - "type": "object", - "properties": { - "database": { - "type": "object", - "properties": { - "url": { - "type": "string" - } - }, - "required": ["url"] - }, - "kafka": { - "type": "object", - "properties": { - "bootstrap_servers": { - "type": "string" - }, - "aether_emit_flag_required": { - "type": "boolean" - }, - "aether_emit_flag_field_path": { - "type": "string" - }, - "aether_emit_flag_values": { - "anyOf": [{ - "type": "string" - }, { - "type": "boolean" - }, - { - "type": "array", - "anyOf": [{ - "items": { - "type": "boolean" - } - }, - { - "items": { - "type": "string" - } - }, { - "items": { - "type": "integer" - } - } - ] - } - ] - }, - "aether_masking_schema_levels": { - "anyOf": [{ - "type": "string" - }, { - "type": "boolean" - }, - { - "type": "array", - "anyOf": [{ - "items": { - "type": "boolean" - } - }, - { - "items": { - "type": "string" - } - }, { - "items": { - "type": "integer" - } - } - ] - } - ] - }, - "aether_masking_schema_emit_level": { - "anyOf": [{ - "type": "string" - }, { - "type": "boolean" - }, { - "type": "integer" - }] - } - - }, - "required": ["bootstrap_servers", "aether_emit_flag_required"] - }, - "ckan_servers": { - "type": "array", - "items": { - "type": "object", - "properties": { - "title": { - "type": "string" - }, - "url": { - "type": "string" - }, - "api_key": { - "type": "string" - }, - "autoconfig_datasets": { - "type": "boolean" - }, - "autoconfig_owner_org": { - "type": "string" - }, - "datasets": { - "type": "array", - "items": { - "type": "object", - "properties": { - "metadata": { - "type": "object", - "properties": { - "title": { - "type": "string" - }, - "name": { - "type": "string" - }, - "notes": { - "type": "string" - }, - "owner_org": { - "type": "string" - } - }, - "required": ["title", "name", "notes", "owner_org"] - }, - "resources": { - "type": "array", - "items": { - "type": "object", - "properties": { - "metadata": { - "type": "object", - "properties": { - "title": { - "type": "string" - }, - "description": { - "type": "string" - }, - "name": { - "type": "string" - } - }, - "required": ["title", "description", "name"] - }, - "topics": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "number_of_consumers": { - "type": "number" - } - }, - "required": ["name", "number_of_consumers"] - } - } - } - }, - "required": ["metadata", "topics"] - } - }, - "required": ["metadata", "resources"] - } - } - }, - "required": ["title", "url", "api_key", "autoconfig_datasets"] - } - } - }, - "required": ["kafka"] -} diff --git a/config/dataset_metadata.json b/config/dataset_metadata.json deleted file mode 100644 index 4d8dd17..0000000 --- a/config/dataset_metadata.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "private": false, - "author": "Aleksandar Jovanov", - "author_email": "ajovanov@viderum.com", - "maintainer": "John Smith", - "maintainer_email": "john.smith@email.com", - "license_id": "cc-by", - "url": "https://google.com", - "version": "1.0", - "tags": [{"name": "pollution"}, {"name": "population"}], - "extras": [{"key": "custom_field", "value": "Some text here"}] -} diff --git a/config/example_config.json b/config/example_config.json deleted file mode 100644 index 5a7d31f..0000000 --- a/config/example_config.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "database": { - "url": "sqlite:////srv/app/db/consumer.db" - }, - "kafka": { - "url": "localhost:9092", - "aether_emit_flag_required": true, - "aether_emit_flag_field_path": "$.approved", - "aether_emit_flag_values": [true] - }, - "ckan_servers": [ - { - "title": "CKAN Demo portal", - "url": "http://localhost:5000", - "api_key": "e7b4d292-f282-44d2-b40e-34d753fd22b3", - "autoconfig_datasets" : false, - "datasets": [ - { - "metadata": { - "title": "Pollution in Germany", - "name": "pollution-in-germany111", - "owner_org": "dsvre", - "notes": "Some description", - "author": "Field overriden from default_metadata.json" - }, - "resources": [ - { - "metadata": { - "title": "Sensor data", - "description": "Sensor data from wind turbines", - "name": "sensor-data" - }, - "topics": [ - { - "name": "test-1", - "number_of_consumers": 1 - } - ] - } - ] - } - ] - } - ] -} diff --git a/consumer/.dockerignore b/consumer/.dockerignore new file mode 100644 index 0000000..4a9d6c3 --- /dev/null +++ b/consumer/.dockerignore @@ -0,0 +1,3 @@ +.pytest* +tests/__pycache__ + diff --git a/consumer/Dockerfile b/consumer/Dockerfile new file mode 100644 index 0000000..967f1c7 --- /dev/null +++ b/consumer/Dockerfile @@ -0,0 +1,36 @@ +FROM python:3.7-slim-buster + +################################################################################ +## setup container +################################################################################ + +################################################################################ +## install app +## copy files one by one and split commands to use docker cache +################################################################################ + +RUN apt-get update -qq && \ + apt-get -qq \ + --yes \ + --allow-downgrades \ + --allow-remove-essential \ + --allow-change-held-packages \ + install gcc && \ + pip install -q --upgrade pip + +WORKDIR /code + +COPY ./conf/pip/requirements.txt /code/conf/pip/requirements.txt +RUN pip3 install -r /code/conf/pip/requirements.txt + +COPY ./ /code + +################################################################################ +## last setup steps +################################################################################ + +# create user to run container (avoid root user) +RUN useradd -ms /bin/false aether +RUN chown -R aether: /code + +ENTRYPOINT ["/code/entrypoint.sh"] diff --git a/consumer/__init__.py b/consumer/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/example/scripts/run_gather.sh b/consumer/app/__init__.py old mode 100755 new mode 100644 similarity index 82% rename from example/scripts/run_gather.sh rename to consumer/app/__init__.py index 6947adf..9f916c4 --- a/example/scripts/run_gather.sh +++ b/consumer/app/__init__.py @@ -1,6 +1,4 @@ -#!/usr/bin/env bash -# -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org # # See the NOTICE file distributed with this work for additional information # regarding copyright ownership. @@ -17,9 +15,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -# -set -Eeuo pipefail - -pushd gather -docker-compose up -popd diff --git a/consumer/app/artifacts.py b/consumer/app/artifacts.py new file mode 100644 index 0000000..6e7604a --- /dev/null +++ b/consumer/app/artifacts.py @@ -0,0 +1,584 @@ +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import fnmatch +import requests +import json +from time import sleep +from typing import ( + Callable, + List +) +from uuid import uuid4 +from ast import literal_eval + +from confluent_kafka import KafkaException +from ckanapi import RemoteCKAN +from ckanapi import errors as ckanapi_errors + +# Consumer SDK +from aet.exceptions import ConsumerHttpException +from aet.job import BaseJob, JobStatus +from aet.kafka import KafkaConsumer, FilterConfig, MaskConfig +from aet.logger import callback_logger, get_logger +from aet.resource import BaseResource + +from app.config import get_kafka_config, get_consumer_config +from app.fixtures import schemas +from app.utils import extract_fields_from_schema, prepare_fields_for_resource + + +LOG = get_logger('artifacts') +KAFKA_CONFIG = get_kafka_config() +CONSUMER_CONFIG = get_consumer_config() + + +class CKANInstance(BaseResource): + schema = schemas.CKAN_INSTANCE + jobs_path = '$.ckan' + name = 'ckan' + public_actions = BaseResource.public_actions + [ + 'test_connection' + ] + session: requests.Session() = None + + def get_session(self): + if self.session: + return self.session + self.session = requests.Session() + try: + self.session.auth = ( + self.definition.user, + self.definition.password + ) + except AttributeError: + pass # may not need creds + # add an _id so we can check the instance + setattr(self.session, 'instance_id', str(uuid4())) + return self.session + + def request(self, method, url, **kwargs): + try: + session = self.get_session() + except Exception as err: + raise ConsumerHttpException(str(err), 500) + full_url = f'{self.definition.url}{url}' + res = session.request(method, full_url, **kwargs) + try: + res.raise_for_status() + except Exception: + raise ConsumerHttpException(str(res.content), res.status_code) + return res + + def test_connection(self, *args, **kwargs) -> bool: + try: + res = self.request('get', '/api/action/status_show') + except requests.exceptions.ConnectionError as her: + raise ConsumerHttpException(str(her), 500) + except Exception as err: + LOG.debug(f'Error testing ckan connection {err}, {type(err)}') + raise ConsumerHttpException(err, 500) + try: + res.raise_for_status() + except requests.exceptions.HTTPError as her: + LOG.debug(f'Error testing ckan connection {her}') + raise ConsumerHttpException(her, her.response.status_code) + return res.json() + + +class Subscription(BaseResource): + schema = schemas.SUBSCRIPTION + jobs_path = '$.subscription' + name = 'subscription' + + def _handles_topic(self, topic, tenant): + topic_str = self.definition.topic_pattern + # remove tenant information + no_tenant = topic.lstrip(f'{tenant}.') + return fnmatch.fnmatch(no_tenant, topic_str) + + +class CKANJob(BaseJob): + name = 'job' + _resources = [CKANInstance, Subscription] + schema = schemas.CKAN_JOB + + public_actions = BaseJob.public_actions + [ + 'get_logs', + 'list_topics', + 'list_subscribed_topics' + ] + # publicly available list of topics + subscribed_topics: dict + log_stack: list + log: Callable # each job instance has it's own log object to keep log_stacks -> user reportable + + consumer: KafkaConsumer = None + # processing artifacts + _schemas: dict + _doc_types: dict + _routes: dict + _previous_topics: list + _ckan: CKANInstance + _subscriptions: List[Subscription] + + def _setup(self): + self.subscribed_topics = {} + self._schemas = {} + self._topic_fields = {} + self._subscriptions = [] + self._previous_topics = [] + self.log_stack = [] + self.log = callback_logger('JOB', self.log_stack, 100) + self.group_name = f'{self.tenant}.{self._id}' + self.sleep_delay: float = 0.5 + self.report_interval: int = 100 + args = {k.lower(): v for k, v in KAFKA_CONFIG.copy().items()} + args['group.id'] = self.group_name + LOG.debug(args) + self.consumer = KafkaConsumer(**args) + self.rename_fields = {} + self.bad_terms = [] + + def _job_ckan(self, config=None) -> CKANInstance: + if config: + ckan = self.get_resources('ckan', config) + if not ckan: + raise ConsumerHttpException('No CKAN instance associated with Job', 400) + self._ckan = ckan[0] + return self._ckan + + def _job_subscriptions(self, config=None) -> List[Subscription]: + if config: + subs = self.get_resources('subscription', config) + if not subs: + raise ConsumerHttpException('No Subscriptions associated with Job', 400) + self._subscriptions = subs + return self._subscriptions + + def _job_subscription_for_topic(self, topic): + return next(iter( + sorted([ + i for i in self._job_subscriptions() + if i._handles_topic(topic, self.tenant) + ])), + None) + + def _test_connections(self, config): + self._job_subscriptions(config) + self._job_ckan(config).test_connection() # raises CHE + return True + + def _get_messages(self, config): + try: + self.log.debug(f'{self._id} checking configurations...') + self._test_connections(config) + subs = self._job_subscriptions() + self._handle_new_subscriptions(subs) + self.log.debug(f'Job {self._id} getting messages') + return self.consumer.poll_and_deserialize( + timeout=5, + num_messages=1) # max + except ConsumerHttpException as cer: + # don't fetch messages if we can't post them + self.log.debug(f'Job not ready: {cer}') + self.status = JobStatus.RECONFIGURE + sleep(self.sleep_delay * 10) + return [] + except Exception as err: + import traceback + traceback_str = ''.join(traceback.format_tb(err.__traceback__)) + self.log.critical(f'unhandled error: {str(err)} | {traceback_str}') + raise err + sleep(self.sleep_delay) + return [] + + def _handle_new_subscriptions(self, subs): + old_subs = list(sorted(set(self.subscribed_topics.values()))) + for sub in subs: + pattern = sub.definition.topic_pattern + # only allow regex on the end of patterns + if pattern.endswith('*'): + self.subscribed_topics[sub.id] = f'^{self.tenant}.{pattern}' + else: + self.subscribed_topics[sub.id] = f'{self.tenant}.{pattern}' + new_subs = list(sorted(set(self.subscribed_topics.values()))) + _diff = list(set(old_subs).symmetric_difference(set(new_subs))) + if _diff: + self.log.info(f'{self.tenant} added subs to topics: {_diff}') + self.consumer.subscribe(new_subs, on_assign=self._on_assign) + + def _handle_messages(self, config, messages): + self.log.debug(f'{self.group_name} | reading {len(messages)} messages') + ckan_instance = self._job_ckan(config=config) + server_url = ckan_instance.definition.get('url') + api_key = ckan_instance.definition.get('key') + ckan_remote = RemoteCKAN(server_url, apikey=api_key) + count = 0 + records = [] + topic = None + for msg in messages: + topic = msg.topic + schema = msg.schema + if schema != self._schemas.get(topic): + self.log.info(f'{self._id} Schema change on {topic}') + self._schemas[topic] = schema + fields, definition_names = extract_fields_from_schema(schema) + fields = prepare_fields_for_resource(fields, definition_names) + self._topic_fields[topic] = fields + else: + self.log.debug('Schema unchanged.') + records.append(msg.value) + resource = self.submit_artefacts( + topic, + schema, + ckan_remote + ) + count += 1 + + if resource: + self._create_resource_in_datastore(resource, ckan_remote) + self.send_data_to_datastore(self._topic_fields[topic], records, resource, ckan_remote) + self.log.info(f'processed {count} {topic} docs') + + def submit_artefacts(self, topic, schema, ckan_remote): + subscription = self._job_subscription_for_topic(topic) + target_options = subscription.definition.get('target_options') + target_dataset_metadata = CONSUMER_CONFIG.get('metadata', {}) + target_dataset_metadata.update(target_options.get('dataset_metadata')) + dataset = self._create_dataset_in_ckan(target_dataset_metadata, ckan_remote) + if dataset: + resource_name = schema.get('name') + return self._create_resource_in_ckan(resource_name, dataset, ckan_remote) + + # called when a subscription causes a new + # assignment to be given to the consumer + def _on_assign(self, *args, **kwargs): + assignment = args[1] + for _part in assignment: + if _part.topic not in self._previous_topics: + self.log.info(f'New topic to configure: {_part.topic}') + self._apply_consumer_filters(_part.topic) + self._previous_topics.append(_part.topic) + + def _apply_consumer_filters(self, topic): + self.log.debug(f'{self._id} applying filter for new topic {topic}') + subscription = self._job_subscription_for_topic(topic) + if not subscription: + self.log.error(f'Could not find subscription for topic {topic}') + return + try: + opts = subscription.definition.topic_options + _flt = opts.get('filter_required', False) + if _flt: + _filter_options = { + 'check_condition_path': opts.get('filter_field_path', ''), + 'pass_conditions': opts.get('filter_pass_values', []), + 'requires_approval': _flt + } + self.log.info(_filter_options) + self.consumer.set_topic_filter_config( + topic, + FilterConfig(**_filter_options) + ) + mask_annotation = opts.get('masking_annotation', None) + if mask_annotation: + _mask_options = { + 'mask_query': mask_annotation, + 'mask_levels': opts.get('masking_levels', []), + 'emit_level': opts.get('masking_emit_level') + } + self.log.info(_mask_options) + self.consumer.set_topic_mask_config( + topic, + MaskConfig(**_mask_options) + ) + self.log.info(f'Filters applied for topic {topic}') + except AttributeError as aer: + self.log.error(f'No topic options for {subscription.id}| {aer}') + + def _create_dataset_in_ckan(self, dataset, ckan): + dataset_name = dataset.get('name').lower() + org_name = dataset.get('owner_org').lower() + # ckan allows only lower case dataset names + dataset.update({ + 'name': dataset_name, + 'owner_org': org_name + }) + + try: + ckan.action.organization_show(id=org_name) + except ckanapi_errors.NotFound: + self.log.debug(f'Creating {org_name} organization') + try: + org = { + 'name': org_name, + 'state': 'active', + } + ckan.action.organization_create(**org) + self.log.debug(f'Successfully created {org_name} organization') + except ckanapi_errors.ValidationError as e: + self.log.error(f'Cannot create organization {org_name} \ + because of the following errors: {json.dumps(e.error_dict)}') + return + except ckanapi_errors.ValidationError as e: + self.log.error( + f'Could not find {org_name} organization. {json.dumps(e.error_dict)}' + ) + return + + try: + return ckan.action.package_show(id=dataset_name) + except ckanapi_errors.NotFound: + # Dataset does not exist, so continue with execution to create it. + pass + + try: + new_dataset = ckan.action.package_create(**dataset) + self.log.debug(f'Dataset {dataset_name} created in CKAN portal.') + return new_dataset + except ckanapi_errors.NotAuthorized as e: + self.log.error( + f'Cannot create dataset {dataset_name}. {str(e)}' + ) + except ckanapi_errors.ValidationError as e: + self.log.error( + f'Cannot create dataset {dataset_name}. Payload is not valid. \ + Check the following errors: {json.dumps(e.error_dict)}' + ) + + def _create_resource_in_ckan(self, resource_name, dataset, ckan): + + try: + resources = ckan.action.resource_search(query=f'name:{resource_name}') + # todo: filter resource on dataset too + if resources['count']: + return resources['results'][0] + except Exception: + pass + + try: + self.log.debug(f'Creating {resource_name} resource') + resource = { + 'package_id': dataset.get('name'), + 'name': resource_name, + 'url_type': 'datastore', + } + new_resource = ckan.action.resource_create(**resource) + self.log.debug(f'Successfully created {resource_name} resource') + return new_resource + except ckanapi_errors.NotAuthorized as e: + self.log.error(f'Cannot create resource {resource_name}. {str(e)}') + except ckanapi_errors.ValidationError as e: + self.log.error( + f'Cannot create resource {resource_name}. Payload is not valid. \ + Check the following errors: {json.dumps(e.error_dict)}' + ) + + def _create_resource_in_datastore(self, resource, ckan): + payload = { + 'resource_id': resource.get('id'), + } + + try: + ckan.action.datastore_create(**payload) + except ckanapi_errors.CKANAPIError as e: + self.log.error( + f'An error occurred while creating resource \ + {resource.get("name")} in Datastore. {str(e)}' + ) + + def send_data_to_datastore(self, fields, records, resource, ckan): + resource_id = resource.get('id') + resource_name = resource.get('name') + payload = { + 'id': resource_id, + 'limit': 1, + } + + try: + response = ckan.action.datastore_search(**payload) + except ckanapi_errors.CKANAPIError as e: + self.log.error( + f'An error occurred while getting Datastore fields for resource \ + {resource_id}. {str(e)}' + ) + return + + new_fields = response.get('fields') + new_fields[:] = [ + field for field in new_fields if field.get('id') != '_id' + ] + + schema_changes = self.get_schema_changes(new_fields, fields) + + if len(new_fields) == 0 or len(schema_changes) > 0: + self.log.info('Datastore detected schema changes') + for new_field in schema_changes: + new_fields.append(new_field) + + payload = { + 'resource_id': resource_id, + 'fields': new_fields, + } + + try: + ckan.action.datastore_create(**payload) + except ckanapi_errors.CKANAPIError as cke: + self.log.error( + f'An error occurred while adding new fields for resource \ + {resource_name} in Datastore.' + ) + label = str(cke) + self.log.error( + 'ResourceType: {0} Error: {1}' + .format(resource_name, label) + ) + bad_fields = literal_eval(label).get('fields', None) + if not isinstance(bad_fields, list): + raise ValueError('Bad field could not be identified.') + issue = bad_fields[0] + bad_term = str(issue.split(' ')[0]).strip("'").strip('"') + self.bad_terms.append(bad_term) + self.log.info( + 'Recovery from error: bad field name %s' % bad_term) + self.log.info('Reverting %s' % (schema_changes,)) + for new_field in schema_changes: + new_fields.remove(new_field) + return self.send_data_to_datastore(fields, records, resource, ckan) + + records = self.convert_item_to_array(records, new_fields) + + payload = { + 'resource_id': resource_id, + 'method': 'insert', + 'records': records, + } + + try: + ckan.action.datastore_upsert(**payload) + self.log.info(f'Updated resource {resource_id} in {ckan.address}.') + except ckanapi_errors.CKANAPIError as cke: + self.log.error( + f'An error occurred while inserting data into resource {resource_name}' + ) + self.log.error( + f'ResourceType: {resource} Error: {str(cke)}' + ) + + def get_schema_changes(self, schema, fields): + ''' Only check if new field has been added. ''' + + new_fields = [] + + for field in fields: + field_found = False + + for schema_field in schema: + if field.get('id') == schema_field.get('id'): + field_found = True + break + + if not field_found: + if field.get('id') in self.bad_terms: + new_fields.append(self.rename_field(field)) + else: + new_fields.append(field) + + return new_fields + + def rename_field(self, field): + bad_name = field.get('id') + new_name = 'ae' + bad_name + self.rename_fields[bad_name] = new_name + field['id'] = new_name + return field + + def convert_item_to_array(self, records, new_fields): + ''' If a field is of type array, and the value for it contains a + primitive type, then convert it to an array of that primitive type. + + This mutation is required for all records, otherwise CKAN will raise + an exception. + + Example: + For given field which is of type array of integers + {'type': '_int', 'id': 'scores'} + Original record {'scores': 10} + Changed record {'scores': [10]} + ''' + + array_fields = [] + records = records[:] + + for field in new_fields: + if field.get('type').startswith('_'): + array_fields.append(field.get('id')) + + for record in records: + for key, value in record.items(): + if self.bad_terms: + name = self.rename_fields.get(key, key) + if name != key: + del record[key] + else: + name = key + if key in array_fields: + record[name] = [value] + else: + record[name] = value + + return records + + # public + def list_topics(self, *args, **kwargs): + ''' + Get a list of topics to which the job can subscribe. + You can also use a wildcard at the end of names like: + Name* which would capture both Name1 && Name2, etc + ''' + timeout = 5 + try: + md = self.consumer.list_topics(timeout=timeout) + except (KafkaException) as ker: + raise ConsumerHttpException(str(ker) + f'@timeout: {timeout}', 500) + topics = [ + str(t).split(f'{self.tenant}.')[1] + for t in iter(md.topics.values()) + if str(t).startswith(self.tenant) + ] + return topics + + # public + def list_subscribed_topics(self, *arg, **kwargs): + ''' + A List of topics currently subscribed to by this job + ''' + return list(self.subscribed_topics.values()) + + # public + def get_logs(self, *arg, **kwargs): + ''' + A list of the last 100 log entries from this job in format + [ + (timestamp, log_level, message), + (timestamp, log_level, message), + ... + ] + ''' + return self.log_stack[:] diff --git a/consumer/app/config.py b/consumer/app/config.py new file mode 100644 index 0000000..00e2462 --- /dev/null +++ b/consumer/app/config.py @@ -0,0 +1,78 @@ +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os + +from aet.settings import Settings + +consumer_config = None +kafka_config = None + +kafka_admin_uses = [ + 'bootstrap.servers', + 'security.protocol', + 'sasl.mechanism', + 'sasl.username', + 'sasl.password' +] + + +def load_config(): + CONSUMER_CONFIG_PATH = os.environ.get('CKAN_CONSUMER_CONFIG_PATH', None) + KAFKA_CONFIG_PATH = os.environ.get('CKAN_CONSUMER_KAFKA_CONFIG_PATH', None) + global consumer_config + consumer_config = Settings(file_path=CONSUMER_CONFIG_PATH) + global kafka_config + kafka_config = Settings( + file_path=KAFKA_CONFIG_PATH, + alias={'BOOTSTRAP.SERVERS': 'KAFKA_URL'}, + exclude=['KAFKA_URL'] + ) + + +def get_kafka_config(): + # load security settings in from environment + # if the security protocol is set + if kafka_config.get('SECURITY.PROTOCOL'): + for i in [ + 'SECURITY.PROTOCOL', + 'SASL.MECHANISM', + 'SASL.USERNAME', + 'SASL.PASSWORD' + ]: + kafka_config[i] = kafka_config.get(i) + return kafka_config + + +def get_kafka_admin_config(): + kafka_security = get_kafka_config().copy() + ks_keys = list(kafka_security.keys()) + for i in ks_keys: + if i.lower() not in kafka_admin_uses: + del kafka_security[i] + else: + kafka_security[i.lower()] = kafka_security[i] + del kafka_security[i] + return kafka_security + + +def get_consumer_config(): + return consumer_config + + +load_config() diff --git a/consumer/app/consumer.py b/consumer/app/consumer.py new file mode 100644 index 0000000..71db265 --- /dev/null +++ b/consumer/app/consumer.py @@ -0,0 +1,33 @@ +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from aet.consumer import BaseConsumer + +from app import artifacts + + +class CKANConsumer(BaseConsumer): + + def __init__(self, CON_CONF, KAFKA_CONF, redis_instance=None): + self.job_class = artifacts.CKANJob + super(CKANConsumer, self).__init__( + CON_CONF, + KAFKA_CONF, + self.job_class, + redis_instance=redis_instance + ) diff --git a/example/scripts/start_goa.sh b/consumer/app/fixtures/__init__.py old mode 100755 new mode 100644 similarity index 73% rename from example/scripts/start_goa.sh rename to consumer/app/fixtures/__init__.py index a4171bc..3ef024f --- a/example/scripts/start_goa.sh +++ b/consumer/app/fixtures/__init__.py @@ -1,11 +1,11 @@ -#!/usr/bin/env bash -# -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org +#!/usr/bin/env python + +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org # # See the NOTICE file distributed with this work for additional information # regarding copyright ownership. # -# Licensed under the Apache License, Version 2.0 (the "License"); +# Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with # the License. You may obtain a copy of the License at # @@ -17,8 +17,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -# -set -Eeuo pipefail - -scripts/run_aether.sh & -scripts/run_gather.sh & diff --git a/consumer/app/fixtures/examples.py b/consumer/app/fixtures/examples.py new file mode 100644 index 0000000..7e37aff --- /dev/null +++ b/consumer/app/fixtures/examples.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +CKAN_INSTANCE = { + 'id': 'ckan-id', + 'name': 'Test CKAN Instance', + 'url': 'http://ckan:5000', + 'key': 'ckan_api_key' +} + +SUBSCRIPTION = { + 'id': 'sub-id', + 'name': 'Test Subscription', + 'topic_pattern': '*', + 'topic_options': { + 'masking_annotation': '@aether_masking', # schema key for mask level of a field + 'masking_levels': ['public', 'private'], # classifications + 'masking_emit_level': 'public', # emit from this level -> + 'filter_required': True, # filter on a message value? + 'filter_field_path': 'operational_status', # which field? + 'filter_pass_values': ['operational'], # what are the passing values? + }, + 'target_options': { + 'dataset_metadata': { + 'title': 'Pollution in Nigeria', + 'name': 'pollution-in-nigeria111', + 'owner_org': 'eHA ', + 'notes': 'Some description', + 'author': 'eHealth Africa', + 'private': False + } + } +} + +JOB = { + 'id': 'job-id', + 'name': 'CKAN Consumer Job', + 'ckan': 'ckan-id', + 'subscription': ['sub-id'] +} diff --git a/consumer/app/fixtures/schemas.py b/consumer/app/fixtures/schemas.py new file mode 100644 index 0000000..110fd3a --- /dev/null +++ b/consumer/app/fixtures/schemas.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python + +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +CKAN_INSTANCE = ''' +{ + "definitions": {}, + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://example.com/root.json", + "type": "object", + "title": "The Root Schema", + "required": [ + "id", + "name", + "url" + ], + "properties": { + "id": { + "$id": "#/properties/id", + "type": "string", + "title": "The Id Schema", + "default": "", + "examples": [ + "the id for this resource" + ], + "pattern": "^(.*)$" + }, + "name": { + "$id": "#/properties/name", + "type": "string", + "title": "The Name Schema", + "default": "", + "examples": [ + "a nice name for this resource" + ], + "pattern": "^(.*)$" + }, + "url": { + "$id": "#/properties/url", + "type": "string", + "title": "The Url Schema", + "default": "", + "examples": [ + "url of the resource" + ], + "pattern": "^(.*)$" + }, + "key": { + "$id": "#/properties/key", + "type": "string", + "title": "The CKAN API Key", + "default": "", + "examples": [ + "api key for auth" + ], + "pattern": "^(.*)$" + } + } +} +''' + +SUBSCRIPTION = ''' +{ + "definitions": {}, + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://example.com/root.json", + "type": "object", + "title": "The Root Schema", + "required": [ + "id", + "name", + "topic_pattern", + "target_options" + ], + "properties": { + "id": { + "$id": "#/properties/id", + "type": "string", + "title": "The Id Schema", + "default": "", + "examples": [ + "the id for this resource" + ], + "pattern": "^(.*)$" + }, + "name": { + "$id": "#/properties/name", + "type": "string", + "title": "The Name Schema", + "default": "", + "examples": [ + "a nice name for this resource" + ], + "pattern": "^(.*)$" + }, + "topic_pattern": { + "$id": "#/properties/topic_pattern", + "type": "string", + "title": "The Topic_pattern Schema", + "default": "", + "examples": [ + "source topic for data i.e. gather*" + ], + "pattern": "^(.*)$" + }, + "topic_options": { + "$id": "#/properties/topic_options", + "type": "object", + "title": "The Topic_options Schema", + "anyOf": [ + {"required": [ + "masking_annotation" + ]}, + {"required": [ + "filter_required" + ]} + ], + "dependencies":{ + "filter_required": ["filter_field_path", "filter_pass_values"], + "masking_annotation": ["masking_levels", "masking_emit_level"] + }, + "properties": { + "masking_annotation": { + "$id": "#/properties/topic_options/properties/masking_annotation", + "type": "string", + "title": "The Masking_annotation Schema", + "default": "", + "examples": [ + "@aether_masking" + ], + "pattern": "^(.*)$" + }, + "masking_levels": { + "$id": "#/properties/topic_options/properties/masking_levels", + "type": "array", + "title": "The Masking_levels Schema", + "items": { + "$id": "#/properties/topic_options/properties/masking_levels/items", + "title": "The Items Schema", + "examples": [ + "private", + "public" + ], + "pattern": "^(.*)$" + } + }, + "masking_emit_level": { + "$id": "#/properties/topic_options/properties/masking_emit_level", + "type": "string", + "title": "The Masking_emit_level Schema", + "default": "", + "examples": [ + "public" + ], + "pattern": "^(.*)$" + }, + "filter_required": { + "$id": "#/properties/topic_options/properties/filter_required", + "type": "boolean", + "title": "The Filter_required Schema", + "default": false, + "examples": [ + false + ] + }, + "filter_field_path": { + "$id": "#/properties/topic_options/properties/filter_field_path", + "type": "string", + "title": "The Filter_field_path Schema", + "default": "", + "examples": [ + "some.json.path" + ], + "pattern": "^(.*)$" + }, + "filter_pass_values": { + "$id": "#/properties/topic_options/properties/filter_pass_values", + "type": "array", + "title": "The Filter_pass_values Schema", + "items": { + "$id": "#/properties/topic_options/properties/filter_pass_values/items", + "title": "The Items Schema", + "examples": [ + false + ] + } + } + } + }, + "target_options": { + "$id": "#/properties/target_options", + "type": "object", + "title": "The Target_options Schema", + "anyOf": [ + {"required": [ + "dataset_metadata", + "resource_metadata" + ]} + ], + "dependencies":{ + "dataset_name": ["default_read_level"] + }, + "properties": { + "dataset_metadata": { + "$id": "#/properties/target_options/properties/dataset_metadata", + "type": "object", + "title": "The Dataset_metadata Schema", + "anyOf": [ + {"required": [ + "title", + "name", + "owner_org", + "private" + ]} + ], + "examples": { + "title": "Pollution in Nigeria", + "name": "pollution-in-nigeria111", + "owner_org": "eHA ", + "notes": "Some description", + "author": "eHealth Africa", + "private": false + }, + "properties": { + "title": { +"$id": "#/properties/target_options/properties/dataset_metadata/properties/title", + "type": "string", + "title": "The dataset_metadata title Schema", + "default": "", + "examples": [ + "dataset title" + ], + "pattern": "^(.*)$" + }, + "name": { +"$id": "#/properties/target_options/properties/dataset_metadata/properties/name", + "type": "string", + "title": "The dataset_metadata name Schema", + "default": "", + "examples": [ + "dataset name" + ], + "pattern": "^(.*)$" + }, + "owner_org": { +"$id": "#/properties/target_options/properties/dataset_metadata/properties/owner_org", + "type": "string", + "title": "The dataset_metadata owner's org Schema", + "default": "", + "examples": [ + "dataset owner's organization" + ], + "pattern": "^(.*)$" + }, + "notes": { +"$id": "#/properties/target_options/properties/dataset_metadata/properties/notes", + "type": "string", + "title": "The dataset_metadata notes Schema", + "default": "", + "examples": [ + "dataset notes" + ], + "pattern": "^(.*)$" + }, + "author": { +"$id": "#/properties/target_options/properties/dataset_metadata/properties/author", + "type": "string", + "title": "The dataset_metadata author Schema", + "default": "", + "examples": [ + "dataset author" + ], + "pattern": "^(.*)$" + }, + "private": { +"$id": "#/properties/target_options/properties/dataset_metadata/properties/private", + "type": "boolean", + "title": "The dataset_metadata private Schema", + "default": "", + "examples": [ + false + ] + } + } + } + } + } + } +} +''' + +CKAN_JOB = ''' +{ + "definitions": {}, + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "http://example.com/root.json", + "type": "object", + "title": "The Root Schema", + "oneOf": [{ + "required": [ + "id", + "name", + "ckan" + ], + "properties": { + "id": { + "$id": "#/properties/id", + "type": "string", + "title": "The Id Schema", + "default": "", + "examples": [ + "the id for this resource" + ], + "pattern": "^(.*)$" + }, + "name": { + "$id": "#/properties/name", + "type": "string", + "title": "The Name Schema", + "default": "", + "examples": [ + "a nice name for this resource" + ], + "pattern": "^(.*)$" + }, + "ckan": { + "$id": "#/properties/ckan", + "type": "string", + "title": "The CKAN Schema", + "default": "", + "examples": [ + "id of the ckan instance to use" + ], + "pattern": "^(.*)$" + }, + "subscription": { + "$id": "#/properties/subscription", + "type": "array", + "title": "The Subscriptions Schema", + "items": { + "$id": "#/properties/subscription/items", + "type": "string", + "title": "The Items Schema", + "default": "", + "examples": [ + "id-of-sub" + ], + "pattern": "^(.*)$" + } + }} + }] +} +''' diff --git a/consumer/app/utils.py b/consumer/app/utils.py new file mode 100644 index 0000000..a5e29c6 --- /dev/null +++ b/consumer/app/utils.py @@ -0,0 +1,137 @@ +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +avroToPostgresPrimitiveTypes = { + 'string': 'text', + 'boolean': 'bool', + 'int': 'int4', + 'long': 'int8', + 'float': 'float4', + 'double': 'float8', + 'bytes': 'bytea', +} + + +def extract_fields_from_schema(schema): + fields = [] + definition_names = [] + if isinstance(schema, list): + for definition in schema: + is_base_schema = definition.get('aetherBaseSchema') + + if is_base_schema: + for field in definition.get('fields'): + fields.append({ + 'name': field.get('name'), + 'type': field.get('type'), + }) + else: + definition_names.append(definition.get('name')) + else: + for field in schema.get('fields'): + fields.append({ + 'name': field.get('name'), + 'type': field.get('type'), + }) + return fields, definition_names + + +def prepare_fields_for_resource(fields, definition_names): + resource_fields = [] + + for field in fields: + resource_field_type = None + if is_field_primitive_type(field): + resource_field_type = \ + avroToPostgresPrimitiveTypes.get(field.get('type')) + elif type(field.get('type')) is dict: + field_type = field.get('type').get('type') + + if field_type == 'record' or field_type == 'map': + resource_field_type = 'json' + elif field_type == 'array': + field_type = field.get('type').get('items') + resource_field_type = '_{0}'.format( + avroToPostgresPrimitiveTypes.get(field_type) + ) + elif field_type == 'enum': + resource_field_type = \ + avroToPostgresPrimitiveTypes.get('string') + elif type(field.get('type')) is list: + union_types = field.get('type') + + for union_type in union_types: + if union_type in definition_names or \ + (type(union_type) is dict and + union_type.get('type') == 'map'): + resource_field_type = 'json' + break + + if resource_field_type: + resource_fields.append({ + 'type': resource_field_type, + 'id': field.get('name'), + }) + continue + + for union_type in union_types: + if (type(union_type) is dict and + union_type.get('type') == 'array'): + field_type = union_type.get('items') + if not isinstance(field_type, dict): + resource_field_type = '_{0}'.format( + avroToPostgresPrimitiveTypes.get(field_type) + ) + else: + resource_field_type = 'json' + break + + if resource_field_type: + resource_fields.append({ + 'type': resource_field_type, + 'id': field.get('name'), + }) + continue + + if 'bytes' in union_types: + resource_field_type = \ + avroToPostgresPrimitiveTypes.get('bytes') + elif 'string' in union_types: + resource_field_type = \ + avroToPostgresPrimitiveTypes.get('string') + + elif isinstance(union_types[1], dict): + resource_field_type = 'json' + else: + resource_field_type = \ + avroToPostgresPrimitiveTypes.get(union_types[1]) + + if resource_field_type: + resource_fields.append({ + 'type': resource_field_type, + 'id': field.get('name'), + }) + + return resource_fields + + +def is_field_primitive_type(field): + field_type = field.get('type') + if isinstance(field_type, str) and field_type in avroToPostgresPrimitiveTypes.keys(): + return True + return False diff --git a/consumer/conf/consumer/consumer.json b/consumer/conf/consumer/consumer.json new file mode 100644 index 0000000..5e37596 --- /dev/null +++ b/consumer/conf/consumer/consumer.json @@ -0,0 +1,15 @@ +{ + "name": "CKAN_CONSUMER", + "metadata": { + "author": "eHealth Africa", + "author_email": "info@ehealthafrica.org", + "maintainer": "eHealth Africa", + "maintainer_email": "info@ehealthafrica.org", + "license_id": "cc-by", + "url": "https://www.ehealthafrica.org", + "version": "1.0", + "owner_org": "eHA", + "name": "demo-dataset-1", + "title": "Demo Dataset" + } +} diff --git a/consumer/conf/consumer/kafka.json b/consumer/conf/consumer/kafka.json new file mode 100644 index 0000000..41ac2f1 --- /dev/null +++ b/consumer/conf/consumer/kafka.json @@ -0,0 +1,12 @@ +{ + "auto.offset.reset" : "latest", + "group.id": "ckan-metadataonly", + "enable.auto.commit": true, + "auto.commit.interval.ms": 2500, + "aether_emit_flag_required" : false, + "aether_masking_schema_levels" : ["false", "true"], + "aether_masking_schema_emit_level": "false", + "heartbeat.interval.ms": 2500, + "session.timeout.ms": 18000, + "request.timeout.ms": 20000 +} diff --git a/consumer/conf/extras/flake8.cfg b/consumer/conf/extras/flake8.cfg new file mode 100644 index 0000000..6f54a77 --- /dev/null +++ b/consumer/conf/extras/flake8.cfg @@ -0,0 +1,16 @@ +[flake8] +max-line-length = 100 +ignore = + F403, + F405, + W503, + W504, + E731 +exclude = + /code/tests/* + /code/./.eggs/* + */__pycache__/* + +inline-quotes = single +multiline-quotes = ''' +docstring-quotes = ''' diff --git a/consumer/conf/extras/good_job.txt b/consumer/conf/extras/good_job.txt new file mode 100644 index 0000000..a4fd0e1 --- /dev/null +++ b/consumer/conf/extras/good_job.txt @@ -0,0 +1,9 @@ + + + ____ _ _ _ _ + / ___| ___ ___ __| | (_) ___ | |__ | | + | | _ / _ \ / _ \ / _` | | |/ _ \| '_ \| | + | |_| | (_) | (_) | (_| | | | (_) | |_) |_| + \____|\___/ \___/ \__,_| _/ |\___/|_.__/(_) + |__/ + diff --git a/consumer/conf/pip/primary-requirements.txt b/consumer/conf/pip/primary-requirements.txt new file mode 100644 index 0000000..576d43a --- /dev/null +++ b/consumer/conf/pip/primary-requirements.txt @@ -0,0 +1,31 @@ +################################################################################################# +# # +# Primary requirements # +# # +# CHANGE ME !!! # +# # +# # +# To check the outdated dependencies run # +# docker-compose run --rm --no-deps ckan-consumer eval pip list --outdated # +# Update me and then run # +# docker-compose run --rm ckan-consumer pip_freeze # +# # +################################################################################################# + + +# Consumer +aet.consumer >= 3.4.1 +aether-python +ckanapi +deepmerge +eha_jsonpath +jsonschema +requests +responses + +# Tests +flake8 +flake8-quotes +pytest +pytest-cov +pytest-runner diff --git a/consumer/conf/pip/requirements.txt b/consumer/conf/pip/requirements.txt new file mode 100644 index 0000000..6ae62fc --- /dev/null +++ b/consumer/conf/pip/requirements.txt @@ -0,0 +1,73 @@ +######################################################################################### +# # +# Frozen requirements # +# # +# DO NOT CHANGE !!! # +# # +# To update this first update 'primary-requirements.txt' # +# then run 'docker-compose run ckan-consumer pip_freeze' # +# # +######################################################################################### +aet.consumer==3.4.1 +aether.python==1.0.17 +atomicwrites==1.1.5 +attrs==18.1.0 +beautifulsoup4==4.8.0 +birdisle==0.1.3 +blessings==1.7 +certifi==2019.3.9 +cffi==1.13.0 +chardet==3.0.4 +ckanapi==4.3 +Click==7.0 +confluent-kafka==1.1.0 +coverage==5.0 +decorator==4.3.0 +deepmerge==0.1.0 +docopt==0.6.2 +eha-jsonpath==0.5.1 +elasticsearch==7.5.1 +entrypoints==0.3 +flake8==3.7.9 +flake8-quotes==2.1.1 +Flask==1.1.1 +idna==2.7 +importlib-metadata==0.17 +itsdangerous==1.1.0 +Jinja2==2.10.1 +jsonpath-ng==1.4.3 +jsonschema==3.2.0 +kafka-python==1.4.6 +MarkupSafe==1.1.1 +mccabe==0.6.1 +mock==3.0.5 +more-itertools==4.1.0 +packaging==19.0 +pbr==4.0.3 +pluggy==0.12.0 +ply==3.11 +py==1.5.3 +pycodestyle==2.5.0 +pycparser==2.19 +pyflakes==2.1.1 +pyparsing==2.4.0 +pyrsistent==0.15.4 +pytest==5.3.5 +pytest-cov==2.8.1 +pytest-runner==5.2 +python-slugify==4.0.0 +redis==3.3.8 +requests==2.22.0 +responses==0.10.9 +six==1.11.0 +soupsieve==1.9.3 +spavro==1.1.17 +SQLAlchemy==1.3.13 +text-unidecode==1.3 +urllib3==1.23 +waitress==1.3.1 +wcwidth==0.1.7 +WebOb==1.8.5 +WebTest==2.0.33 +Werkzeug==0.15.6 +zipp==0.5.1 diff --git a/consumer/conf/pip/requirements_header.txt b/consumer/conf/pip/requirements_header.txt new file mode 100644 index 0000000..c2ff6d0 --- /dev/null +++ b/consumer/conf/pip/requirements_header.txt @@ -0,0 +1,10 @@ +######################################################################################### +# # +# Frozen requirements # +# # +# DO NOT CHANGE !!! # +# # +# To update this first update 'primary-requirements.txt' # +# then run 'docker-compose run ckan-consumer pip_freeze' # +# # +######################################################################################### diff --git a/consumer/config.py b/consumer/config.py deleted file mode 100644 index 55c11cb..0000000 --- a/consumer/config.py +++ /dev/null @@ -1,100 +0,0 @@ -import os -import json -import logging -import sys - -from jsonschema import validate -from jsonschema.exceptions import ValidationError - - -logger = logging.getLogger(__name__) -config = None - - -def read_file(dir_path, file_name): - ''' Read file from local filesystem. - - :param dir_path: The directory path where the file is located. - :type dir_path: string - - :param file_name: The name of the file to read from. - :type file_name: string - - :raises SystemExit: If the file cannot be found from provided directory - path and file name. - - :returns: The contents of the file. - :rtype: string - - ''' - - file_path = os.path.join(dir_path, file_name) - - try: - with open(file_path, 'r') as f: - return f.read() - except IOError: - logger.error('{0} does not exist in directory {1}.' - .format(file_name, dir_path)) - sys.exit(1) - - -def parse_json_from_file(dir_path, file_name): - ''' Parses JSON file from local filesystem. - - :param dir_path: The directory path where the file is located. - :type dir_path: string - - :param file_name: The name of the file to read from. - :type file_name: string - - :raises SystemExit: If the file cannot be parsed as JSON. - - :returns: The contents of the file as JSON. - :rtype: dictionary - - ''' - - content = read_file(dir_path, file_name) - - try: - return json.loads(content) - except (ValueError, TypeError): - logger.error('{0} is not a valid JSON file.' - .format(file_name)) - sys.exit(1) - - -def validate_config(dir_path, config_file, schema_file): - ''' Validates the config file if it conforms to JSON Schema. - - :param dir_path: The directory path where the file is located. - :type dir_path: string - - :param config_file: The config file to validate. - :type config_file: string - - :param schema_file: The JSON Schema file to use for validation. - :type schema_file: string - - :raises SystemExit: If the config file is not valid according to - the JSON Schema file. - - ''' - - config_file = parse_json_from_file(dir_path, config_file) - schema_file = parse_json_from_file(dir_path, schema_file) - - try: - validate(config_file, schema_file) - global config - config = config_file - except ValidationError as e: - logger.error('Error while validating config.json. ' - 'Please fix the following error:') - logger.error(e) - sys.exit(1) - - -def get_config(): - return config diff --git a/consumer/core/__init__.py b/consumer/core/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/consumer/core/dataset_manager.py b/consumer/core/dataset_manager.py deleted file mode 100644 index 01e7343..0000000 --- a/consumer/core/dataset_manager.py +++ /dev/null @@ -1,126 +0,0 @@ -import logging -from threading import Thread -import json -import sys -import os - -from ckanapi import RemoteCKAN -from ckanapi import errors as ckanapi_errors - -from consumer.core.resource_manager import ResourceManager -from consumer.config import parse_json_from_file - - -class DatasetManager(Thread): - - def __init__(self, server_manager, config): - super(DatasetManager, self).__init__() - - self.logger = logging.getLogger(__name__) - self.config = config - self.title = config.get( - "dataset", {}).get( - "metadata", {}).get( - 'title') - self.server_manager = server_manager - - def run(self): - self.create_dataset_in_ckan() - self.spawn_resource_managers() - - def spawn_resource_managers(self): - dataset = self.config.get('dataset') - dataset_name = dataset.get('metadata').get('name') - resources = dataset.get('resources') - self.resource_managers = [] - - for resource in resources: - config = { - 'dataset': dataset, - 'resource': resource, - 'server_name': self.config.get('server_name'), - 'ckan_url': self.config.get('ckan_url'), - 'api_key': self.config.get('api_key'), - } - resource_manager = ResourceManager(self, config) - self.resource_managers.append(resource_manager) - - if len(self.resource_managers) == 0: - self.logger.info('No Resource Managers spawned.') - else: - self.logger.info( - 'Spawned {0} Resource manager(s) for dataset {1}.' - .format(len(self.resource_managers), dataset_name) - ) - - for resource_manager in self.resource_managers: - resource_manager.start() - - def create_dataset_in_ckan(self): - server_url = self.config.get('ckan_url') - server_title = self.config.get('server_name') - api_key = self.config.get('api_key') - metadata = self.config.get('dataset').get('metadata') - dataset_name = metadata.get('name') - - ckan = RemoteCKAN(server_url, apikey=api_key) - - try: - ckan.action.package_show(id=dataset_name) - return - except ckanapi_errors.NotFound: - # Dataset does not exist, so continue with execution to create it. - pass - - try: - data = self.update_metadata_from_defaults(metadata) - dataset = ckan.action.package_create(**data) - dataset_url = '{0}/dataset/{1}'.format(server_url, dataset_name) - self.logger.info('Dataset {0} created in CKAN portal {1}: {2}.' - .format( - dataset_name, - server_title, - dataset_url - )) - except ckanapi_errors.NotAuthorized as e: - self.logger.error( - 'Cannot create dataset {0}. {1}'.format( - dataset_name, - str(e), - ) - ) - sys.exit(1) - except ckanapi_errors.ValidationError as e: - self.logger.error( - 'Cannot create dataset {0}. Payload is not valid. Check the ' - 'following errors: {1}'.format( - dataset_name, - json.dumps(e.error_dict), - ) - ) - sys.exit(1) - - def update_metadata_from_defaults(self, overwritten_data): - dir_path = os.getcwd() - file_name = os.path.join('config', 'dataset_metadata.json') - - default_metadata = parse_json_from_file(dir_path, file_name) - - updated_metadata = {} - updated_metadata.update(default_metadata) - updated_metadata.update(overwritten_data) - - return updated_metadata - - def stop(self): - for resource_manager in self.resource_managers: - resource_manager.stop() - - def on_resource_exit(self, resource_name): - for resource_manager in self.resource_managers: - if resource_manager.name == resource_name: - self.resource_managers.remove(resource_manager) - break - - if len(self.resource_managers) == 0: - self.server_manager.on_dataset_exit(self.name) diff --git a/consumer/core/process_manager.py b/consumer/core/process_manager.py deleted file mode 100644 index 7139854..0000000 --- a/consumer/core/process_manager.py +++ /dev/null @@ -1,97 +0,0 @@ -import signal -import sys -import logging -from time import sleep -import os - -from consumer.core.server_manager import ServerManager -from consumer.config import get_config -from consumer.db import CkanServer - - -class ProcessManager(object): - ''' Responsible for managing the internal lifecycle of the application. ''' - - def __init__(self): - self.logger = logging.getLogger(__name__) - self.stopped = False - - def run(self): - self.listen_stop_signal() - config = get_config() - self.spawn_server_managers(config) - environment = os.getenv('ENVIRONMENT') - - if environment != 'test': - self.logger.debug('Live environment detected') - while True: - if self.stopped: - self.logger.info('App gracefully stopped.') - break - - sleep(1) - else: - self.logger.debug('stopping for test completion') - - def on_stop_handler(self, signum, frame): - ''' Called when the application needs to be gracefully stopped. ''' - - self.logger.info('Gracefully stopping...') - - for server_manager in self.server_managers: - server_manager.stop() - - def listen_stop_signal(self): - ''' Listens for the SIGTERM signal so that the application can be - gracefully stopped. ''' - - # Catches Ctrl+C and docker stop - signal.signal(signal.SIGTERM, self.on_stop_handler) - - def spawn_server_managers(self, config): - ''' Spawns Server Managers based on the config. - - :param config: Configuration based on config.json. - :type config: dictionary - - ''' - - servers = config.get('ckan_servers') - self.server_managers = [] - - for server_config in servers: - server_manager = ServerManager(self, server_config) - - server_available = server_manager.check_server_availability( - server_config - ) - - if server_available: - ckan_server_url = server_config.get('url') - ckan_server = CkanServer.get_by_url( - ckan_server_url=ckan_server_url - ) - - if not ckan_server: - CkanServer.create(ckan_server_url=ckan_server_url) - - self.server_managers.append(server_manager) - server_manager.spawn_dataset_managers(server_config) - else: - sys.exit(1) - - if len(self.server_managers) == 0: - self.logger.error('No CKAN servers available.') - sys.exit(1) - else: - self.logger.info('Spawned {0} Server manager(s).' - .format(len(self.server_managers))) - - def on_server_exit(self, server_url): - for server_manager in self.server_managers: - if server_manager.server_config.get('url') == server_url: - self.server_managers.remove(server_manager) - break - - if len(self.server_managers) == 0: - self.stopped = True diff --git a/consumer/core/resource_manager.py b/consumer/core/resource_manager.py deleted file mode 100644 index d816d6c..0000000 --- a/consumer/core/resource_manager.py +++ /dev/null @@ -1,345 +0,0 @@ -from ast import literal_eval -import logging -from threading import Thread, Lock -import sys -import json - -from ckanapi import RemoteCKAN -from ckanapi import errors as ckanapi_errors - -from consumer.core.topic_manager import TopicManager -from consumer.db import Resource, CkanServer - -# Lock is created when creating a resource in CKAN, because there is a bug if -# multiple resources are created concurrently. It sets the resource metadata -# field "state" to "deleted". See https://github.com/ckan/ckan/issues/4217 -resource_create_lock = Lock() - - -class ResourceManager(Thread): - - def __init__(self, dataset_manager, config): - super(ResourceManager, self).__init__() - - self.logger = logging.getLogger(__name__) - self.config = config - self.schema = None - self.rename_fields = {} - self.bad_terms = [] - self.dataset_manager = dataset_manager - - def run(self): - resource = self.config.get('resource') - dataset_name = self.config.get('dataset').get('metadata').get('name') - - with resource_create_lock: - self.create_resource_in_ckan(resource, dataset_name) - self.create_resource_in_datastore() - self.create_resource_in_db(resource, dataset_name) - - self.spawn_topic_managers() - - def spawn_topic_managers(self): - dataset = self.config.get('dataset') - dataset_name = dataset.get('metadata').get('name') - resource = self.config.get('resource') - self.topic_managers = [] - - topics = resource.get('topics') - - for topic_config in topics: - number_of_consumers = topic_config.get('number_of_consumers', 1) - - for i in range(number_of_consumers): - config = { - 'server_name': self.config.get('server_name'), - 'dataset_name': dataset_name, - 'topic': topic_config, - 'resource_name': resource.get('metadata').get('name') - } - topic_manager = TopicManager(self, config) - self.topic_managers.append(topic_manager) - - if len(self.topic_managers) == 0: - self.logger.info('No Topic Managers spawned.') - else: - self.logger.info( - 'Spawned {0} Topic manager(s) for dataset {1}.' - .format(len(self.topic_managers), dataset_name) - ) - - for topic_manager in self.topic_managers: - topic_manager.start() - - def create_resource_in_db(self, resource, dataset_name): - metadata = resource.get('metadata') - resource_name = metadata.get('name') - ckan_server_url = self.config.get('ckan_url') - ckan_server = CkanServer.get_by_url( - ckan_server_url=ckan_server_url - ) - resource = Resource.get( - resource_name=resource_name, - ckan_server_id=ckan_server.ckan_server_id, - dataset_name=dataset_name - ) - - if not resource: - data = { - 'resource_name': resource_name, - 'dataset_name': dataset_name, - 'ckan_server_id': ckan_server.ckan_server_id, - 'resource_id': self.resource_id - } - - Resource.create(**data) - - def create_resource_in_ckan(self, resource, dataset_name): - server_url = self.config.get('ckan_url') - server_title = self.config.get('server_name') - api_key = self.config.get('api_key') - title = resource.get('metadata').get('title') - resource_name = resource.get('metadata').get('name') - resource_description = resource.get('metadata').get('description') - - payload = { - 'package_id': dataset_name, - 'name': title, - 'description': resource_description, - 'url_type': 'datastore', - } - - self.ckan = RemoteCKAN(server_url, apikey=api_key) - - try: - ckan_server = CkanServer.get_by_url( - ckan_server_url=server_url - ) - db_resource = Resource.get( - resource_name=resource_name, - dataset_name=dataset_name, - ckan_server_id=ckan_server.ckan_server_id - ) - - if db_resource: - self.resource_id = db_resource.resource_id - return - except ckanapi_errors.NotFound: - # Resource does not exist, so continue with execution to create it. - pass - - try: - response = self.ckan.action.resource_create(**payload) - self.resource_id = response.get('id') - resource_url = '{0}/dataset/{1}/resource/{2}'.format( - server_url, dataset_name, response.get('id') - ) - self.logger.info('Resource {0} created in CKAN portal {1}: {2}.' - .format( - resource_name, - server_title, - resource_url - )) - except ckanapi_errors.NotAuthorized as e: - self.logger.error( - 'Cannot create resource {0}. {1}'.format( - resource_name, - str(e), - ) - ) - sys.exit(1) - except ckanapi_errors.ValidationError as e: - self.logger.error( - 'Cannot create resource {0}. Payload is not valid. Check the ' - 'following errors: {1}'.format( - resource_name, - json.dumps(e.error_dict), - ) - ) - sys.exit(1) - - def send_data_to_datastore(self, fields, records): - if not self.schema: - payload = { - 'id': self.resource_id, - 'limit': 1, - } - - try: - response = self.ckan.action.datastore_search(**payload) - except ckanapi_errors.CKANAPIError: - self.logger.error( - 'An error occured while getting Datastore fields for ' - 'resource {0}' - .format(self.get_resource_url()) - ) - return - - new_fields = response.get('fields') - - new_fields[:] = [field for field in new_fields - if field.get('id') != '_id'] - - self.schema = new_fields - - schema_changes = self.get_schema_changes(self.schema, fields) - - if len(self.schema) == 0 or len(schema_changes) > 0: - self.logger.info('Detected Schema Changes') - for new_field in schema_changes: - self.schema.append(new_field) - - payload = { - 'resource_id': self.resource_id, - 'fields': self.schema, - } - - try: - self.ckan.action.datastore_create(**payload) - except ckanapi_errors.CKANAPIError as cke: - self.logger.error( - 'An error occured while adding new fields for resource {0}' - ' in Datastore' - .format(self.get_resource_url()) - ) - label = str(cke) - self.logger.error( - 'ResourceType: {0}' - ' Error: {1}' - .format(self.name, label) - ) - bad_fields = literal_eval(label).get('fields', None) - if not isinstance(bad_fields, list): - raise ValueError('Bad field could not be identified.') - issue = bad_fields[0] - bad_term = str(issue.split(" ")[0]).strip("'").strip('"') - self.bad_terms.append(bad_term) - self.logger.info( - 'Recovery from error: bad field name %s' % bad_term) - self.logger.info('Reverting %s' % (schema_changes,)) - for new_field in schema_changes: - self.schema.remove(new_field) - return self.send_data_to_datastore(fields, records) - - records = self.convert_item_to_array(records) - - payload = { - 'resource_id': self.resource_id, - 'method': 'insert', - 'records': records, - } - - try: - self.ckan.action.datastore_upsert(**payload) - self.logger.info('Updated resource {0} in {1}.'.format( - self.resource_id, self.ckan.address - )) - except ckanapi_errors.CKANAPIError as cke: - self.logger.error( - 'An error occured while inserting data into resource {0}' - .format(self.get_resource_url()) - ) - self.logger.error( - 'ResourceType: {0}' - ' Error: {1}' - .format(self.name, cke) - ) - - def create_resource_in_datastore(self): - payload = { - 'resource_id': self.resource_id, - } - - try: - self.ckan.action.datastore_create(**payload) - except ckanapi_errors.CKANAPIError: - self.logger.error( - 'An error occured while creating resource {0} in Datastore' - .format(self.get_resource_url()) - ) - - def get_schema_changes(self, schema, fields): - """ Only check if new field has been added. """ - - new_fields = [] - - for field in fields: - field_found = False - - for schema_field in schema: - if field.get('id') == schema_field.get('id'): - field_found = True - break - - if not field_found: - if field.get('id') in self.bad_terms: - new_fields.append(self.rename_field(field)) - else: - new_fields.append(field) - - return new_fields - - def rename_field(self, field): - bad_name = field.get('id') - new_name = "ae" + bad_name - self.rename_fields[bad_name] = new_name - field['id'] = new_name - return field - - def convert_item_to_array(self, records): - """ If a field is of type array, and the value for it contains a - primitive type, then convert it to an array of that primtive type. - - This mutation is required for all records, otherwise CKAN will raise - an exception. - - Example: - For given field which is of type array of integers - {'type': '_int', 'id': 'scores'} - Original record {'scores': 10} - Changed record {'scores': [10]} - """ - - array_fields = [] - records = records[:] - - for field in self.schema: - if field.get('type').startswith('_'): - array_fields.append(field.get('id')) - - for record in records: - for key, value in record.items(): - if self.bad_terms: - name = self.rename_fields.get(key, key) - if name != key: - del record[key] - else: - name = key - if key in array_fields: - record[name] = [value] - else: - record[name] = value - - return records - - def stop(self): - for topic_manager in self.topic_managers: - topic_manager.stop() - - def on_topic_exit(self, topic_name): - for topic_manager in self.topic_managers: - if topic_manager.name == topic_name: - self.topic_managers.remove(topic_manager) - break - - if len(self.topic_managers) == 0: - self.dataset_manager.on_resource_exit(self.name) - - def get_resource_url(self): - server_url = self.config.get('ckan_url') - dataset_name = self.config.get('dataset').get('metadata').get('name') - resource_url = '{0}/dataset/{1}/resource/{2}'.format( - server_url, dataset_name, self.resource_id - ) - - return resource_url diff --git a/consumer/core/server_manager.py b/consumer/core/server_manager.py deleted file mode 100644 index 4db5450..0000000 --- a/consumer/core/server_manager.py +++ /dev/null @@ -1,266 +0,0 @@ -from time import sleep -import logging -import re -import requests -from requests.exceptions import ConnectionError -from threading import Thread - -from aet.consumer import KafkaConsumer -from kafka.consumer.fetcher import NoOffsetForPartitionError - -from consumer.core.dataset_manager import DatasetManager -from consumer.config import get_config - -CONN_RETRY = 3 -CONN_RETRY_WAIT_TIME = 2 - - -class ServerManager(object): - - def __init__(self, process_manager, server_config): - self.logger = logging.getLogger(__name__) - self.server_config = server_config - self.process_manager = process_manager - self.dataset_managers = [] - self.ignored_topics = [] - self.autoconfig_watcher = None - - def check_server_availability(self, server_config): - ''' Checks the server availability using CKAN's API action - "status_show". Retry logic exist with a wait time between retries. - - :param server_config: The configuration for the server. - :type server_config: dictionary - - :returns: Is server available. - :rtype: boolean - - ''' - - server_url = server_config.get('url') - url = '{0}/api/action/status_show'.format(server_url) - response = None - - for i in range(CONN_RETRY): - try: - response = requests.get(url) - break - except ConnectionError: - self.logger.error('Server {0} not available. Retrying...' - .format(url)) - sleep(CONN_RETRY_WAIT_TIME) - - if response is None: - self.logger.error('Server {0} not available.' - .format(url)) - return False - - if response.status_code != 200: - self.logger.error('Response for {0} not successful.'.format(url)) - return False - - try: - data = response.json() - except (ValueError, TypeError): - self.logger.error('Expected JSON response for {0}.'.format(url)) - return False - - if data.get('success') is True: - self.logger.info('Server {0} available.'.format(url)) - return True - - return False - - def get_datasets_from_kafka(self, server_config): - ''' Gets dataset configurations from looking at metadata available - in all Kafka Topics. - - :param server_config: The configuration for the server. - :type server_config: dictionary - - ''' - self.logger.info('Looking for new topics to auto configure') - kafka_settings = get_config().get('kafka') - kafka_url = kafka_settings.get('bootstrap_servers') - consumer = KafkaConsumer(bootstrap_servers=[kafka_url]) - topics = consumer.topics() - consumer.close() - datasets = [] - existing_datasets = [ - i.title for i in self.dataset_managers if hasattr(i, 'title')] - for topic in topics: - if topic in self.ignored_topics: - continue - if topic in existing_datasets: - self.logger.debug( - 'Dataset for topic {0} already exists.'.format(topic)) - continue - self.logger.info('Creating dataset for topic {0}.'.format(topic)) - consumer = KafkaConsumer( - auto_offset_reset='earliest', - **kafka_settings) - dataset = self.get_dataset_from_topic( - consumer, topic, server_config) - consumer.close() - if dataset: - self.logger.info('Dataset {0} created.'.format(topic)) - datasets.append(dataset) - else: - self.logger.info( - 'Dataset {0} failed to be created.'.format(topic)) - self.ignored_topics.append(topic) - return datasets - - def get_dataset_from_topic(self, consumer, topic, server_config): - ''' Gets dataset configurations from looking at metadata available - in all Kafka Topics. - - :param consumer: A KafkaConsumer attached to a Kafka Instance - :type consumer: KafkaConsumer - :param topic: The name of the topic - :type topic: string - :param server_config: The configuration for the server. - :type server_config: dictionary - - ''' - - try: - consumer.subscribe(topic) - retry = 30 - for x in range(retry): - res = consumer.poll(timeout_ms=1000) - if res: - break - sleep(1) - try: - consumer.seek_to_beginning() - except AssertionError: - raise IOError('Could not connect to Kafka partition for topic %s' % topic) - poll_result = consumer.poll_and_deserialize( - timeout_ms=1000, - max_records=1) - for parition_key, packages in poll_result.items(): - for package in packages: - schema = package.get('schema') - if not schema: - raise AttributeError('Topic %s has no schema.' % topic) - self.logger.info("Schema: %s" % schema) - - except NoOffsetForPartitionError as nofpe: - self.logger.error( - "Error on dataset creation for topic {0}; {1}".format( - topic, - nofpe)) - return None - except AttributeError as aer: - self.logger.error( - "Error on dataset creation for topic {0}; {1}".format( - topic, - aer)) - return None - safe_name = re.sub(r'\W+', '', topic).lower() - tmp = { - "metadata": { - "title": topic, - "name": safe_name, - "owner_org": server_config.get('autoconfig_owner_org'), - "notes": None, - "author": None - }, - "resources": [ - { - "metadata": { - "title": topic, - "description": None, - "name": safe_name+"-resource" - }, - "topics": [ - { - "name": topic, - "number_of_consumers": 1 - } - ] - } - ] - } - return tmp - - def spawn_dataset_managers(self, server_config): - ''' Spawns Server Managers based on the config. - - :param server_config: The configuration for the server. - :type server_config: dictionary - - ''' - auto_config = server_config.get('autoconfig_datasets') - - if auto_config: - if not self.autoconfig_watcher: - self.autoconfig_watcher = AutoconfigWatcher( - self, server_config) - self.autoconfig_watcher.start() - # Delegating to the threaded / repeatable process - return - else: - datasets = self.get_datasets_from_kafka(server_config) - - else: - datasets = server_config.get('datasets') - - new_dataset_managers = [] - - for dataset in datasets: - config = { - 'ckan_url': server_config.get('url'), - 'server_name': server_config.get('title'), - 'api_key': server_config.get('api_key'), - 'dataset': dataset, - } - dataset_manager = DatasetManager(self, config) - new_dataset_managers.append(dataset_manager) - self.dataset_managers.append(dataset_manager) - - if len(new_dataset_managers) == 0: - self.logger.info('No new Dataset Managers spawned.') - else: - self.logger.info( - 'Spawned {0} Dataset manager(s) for server {1}.' - .format(len(new_dataset_managers), server_config.get('title')) - ) - - for dataset_manager in new_dataset_managers: - dataset_manager.start() - - def stop(self): - if self.autoconfig_watcher: - self.autoconfig_watcher.stop() - for dataset_manager in self.dataset_managers: - dataset_manager.stop() - - def on_dataset_exit(self, dataset_name): - for dataset_manager in self.dataset_managers: - if dataset_manager.name == dataset_name: - self.dataset_managers.remove(dataset_manager) - break - - if len(self.dataset_managers) == 0: - self.process_manager.on_server_exit(self.server_config.get('url')) - - -class AutoconfigWatcher(Thread): - def __init__(self, server_manager, server_config): - super(AutoconfigWatcher, self).__init__() - self.stopped = False - self.server_manager = server_manager - self.server_config = server_config - - def run(self): - while not self.stopped: - self.server_manager.spawn_dataset_managers(self.server_config) - for tick in range(30): - sleep(1) - if self.stopped: - return - - def stop(self): - self.stopped = True diff --git a/consumer/core/topic_manager.py b/consumer/core/topic_manager.py deleted file mode 100644 index e9a8792..0000000 --- a/consumer/core/topic_manager.py +++ /dev/null @@ -1,237 +0,0 @@ -import ast -import logging -from threading import Thread -from time import sleep -import io -import json -import sys - -from aet.consumer import KafkaConsumer -from kafka import errors as KafkaErrors -from spavro.datafile import DataFileReader -from spavro.io import DatumReader - -from consumer.config import get_config - -CONN_RETRY = 3 -CONN_RETRY_WAIT_TIME = 2 - -avroToPostgresPrimitiveTypes = { - 'string': 'text', - 'boolean': 'bool', - 'int': 'int4', - 'long': 'int8', - 'float': 'float4', - 'double': 'float8', - 'bytes': 'bytea', -} - -# Temporary set the log level for Kafka to ERROR during development, so that -# stdout is not bloated with messages -logger = logging.getLogger('kafka') -logger.addHandler(logging.StreamHandler(sys.stdout)) -logger.setLevel(logging.ERROR) - - -class TopicManager(Thread): - - def __init__(self, resource_manager, topic_config): - super(TopicManager, self).__init__() - - self.logger = logging.getLogger(__name__) - self.topic_config = topic_config - self.definition_names = [] - self.resource_manager = resource_manager - self.stopped = False - - def run(self): - self.create_kafka_consumer() - - if self.consumer: - topic_name = self.topic_config.get('topic').get('name') - self.consumer.subscribe([topic_name]) - self.logger.info( - 'Subscribed to topic "{0}" from Topic Manager "{1}"' - .format(topic_name, self.getName()) - ) - self.read_messages() - - def create_kafka_consumer(self): - consumer_settings = get_config().get('kafka') - server_name = self.topic_config.get('server_name') - dataset_name = self.topic_config.get('dataset_name') - topic_name = self.topic_config.get('topic').get('name') - resource_name = self.topic_config.get('resource_name') - group_id = 'CKAN_{0}_{1}_{2}_{3}'.format( - '-'.join(server_name.split(' ')), - dataset_name, - resource_name, - topic_name - ) - - for i in range(CONN_RETRY): - try: - consumer_settings['group_id'] = group_id - self.consumer = KafkaConsumer( - auto_offset_reset='earliest', - **consumer_settings) - return True - except KafkaErrors.NoBrokersAvailable: - self.logger.error('Kafka not available. Retrying...') - sleep(CONN_RETRY_WAIT_TIME) - - self.logger.error('Could not connect to Kafka.') - - return False - - def poll_messages(self): - messages = self.consumer.poll(timeout_ms=1000) - - return messages - - def read_messages(self): - schema = None - last_schema = None - while True: - if self.stopped: - self.consumer.close() - self.resource_manager.on_topic_exit(self.name) - break - new_records = self.consumer.poll_and_deserialize(timeout_ms=1000) - for parition_key, packages in new_records.items(): - for package in packages: - schema = package.get('schema') - messages = package.get('messages') - if schema != last_schema: - fields = self.extract_fields_from_schema(schema) - fields = self.prepare_fields_for_resource(fields) - records = [] - for msg in messages: - records.append(msg) - if records: # Emit rules may parse out messages. - self.resource_manager.send_data_to_datastore( - fields, - records - ) - last_schema = schema - sleep(1) - - def extract_schema(self, reader): - raw_schema = ast.literal_eval(str(reader.meta)) - schema = json.loads(raw_schema.get("avro.schema")) - return schema - - def extract_fields_from_schema(self, schema): - fields = [] - if isinstance(schema, list): - for definition in schema: - is_base_schema = definition.get('aetherBaseSchema') - - if is_base_schema: - for field in definition.get('fields'): - fields.append({ - 'name': field.get('name'), - 'type': field.get('type'), - }) - - else: - self.definition_names.append(definition.get('name')) - else: - for field in schema.get('fields'): - fields.append({ - 'name': field.get('name'), - 'type': field.get('type'), - }) - - return fields - - def prepare_fields_for_resource(self, fields): - resource_fields = [] - - for field in fields: - resource_field_type = None - - if self.is_field_primitive_type(field): - resource_field_type = \ - avroToPostgresPrimitiveTypes.get(field.get('type')) - elif type(field.get('type')) is dict: - field_type = field.get('type').get('type') - - if field_type == 'record' or field_type == 'map': - resource_field_type = 'json' - elif field_type == 'array': - field_type = field.get('type').get('items') - resource_field_type = '_{0}'.format( - avroToPostgresPrimitiveTypes.get(field_type) - ) - elif field_type == 'enum': - resource_field_type = \ - avroToPostgresPrimitiveTypes.get('string') - elif type(field.get('type')) is list: - union_types = field.get('type') - - for union_type in union_types: - if union_type in self.definition_names or \ - (type(union_type) is dict and - union_type.get('type') == 'map'): - resource_field_type = 'json' - break - - if resource_field_type: - resource_fields.append({ - 'type': resource_field_type, - 'id': field.get('name'), - }) - continue - - for union_type in union_types: - if (type(union_type) is dict and - union_type.get('type') == 'array'): - field_type = union_type.get('items') - if not isinstance(field_type, dict): - resource_field_type = '_{0}'.format( - avroToPostgresPrimitiveTypes.get(field_type) - ) - else: - resource_field_type = 'json' - break - - if resource_field_type: - resource_fields.append({ - 'type': resource_field_type, - 'id': field.get('name'), - }) - continue - - if 'bytes' in union_types: - resource_field_type = \ - avroToPostgresPrimitiveTypes.get('bytes') - elif 'string' in union_types: - resource_field_type = \ - avroToPostgresPrimitiveTypes.get('string') - - elif isinstance(union_types[1], dict): - resource_field_type = 'json' - else: - self.logger.info(union_types) - resource_field_type = \ - avroToPostgresPrimitiveTypes.get(union_types[1]) - - if resource_field_type: - resource_fields.append({ - 'type': resource_field_type, - 'id': field.get('name'), - }) - - return resource_fields - - def is_field_primitive_type(self, field): - field_type = field.get('type') - - if field_type in avroToPostgresPrimitiveTypes.keys(): - return True - - return False - - def stop(self): - self.stopped = True diff --git a/consumer/db.py b/consumer/db.py deleted file mode 100644 index 1d35374..0000000 --- a/consumer/db.py +++ /dev/null @@ -1,98 +0,0 @@ -from datetime import datetime -import logging -import sys -import os -import uuid - -from sqlalchemy import Column, ForeignKey, String, DateTime, inspect -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy import create_engine -from sqlalchemy.exc import SQLAlchemyError -from sqlalchemy.orm import sessionmaker - -Base = declarative_base() -logger = logging.getLogger(__name__) -engine = None -Session = None - - -def init(url): - global engine - engine = create_engine(url, connect_args={'check_same_thread': False}) - - try: - logger.info('Connecting to DB @ %s' % url) - Base.metadata.create_all(engine) - - global Session - Session = sessionmaker(bind=engine) - - logger.info('Database initialized.') - except SQLAlchemyError as sar: - logger.error('Database could not be initialized: %s' % sar) - sys.exit(1) - - -def get_session(): - return Session() - - -def get_engine(): - return engine - - -def make_uuid(): - return str(uuid.uuid4()) - - -class Resource(Base): - __tablename__ = 'resource' - - resource_id = Column(String, primary_key=True) - resource_name = Column(String, nullable=False) - ckan_server_id = Column(String, ForeignKey('ckan_server.ckan_server_id')) - dataset_name = Column(String, nullable=False) - created_at = Column(DateTime, default=datetime.utcnow) - - @classmethod - def create(cls, **kwargs): - resource = cls(**kwargs) - session = get_session() - - session.add(resource) - session.commit() - - return resource - - @classmethod - def get(cls, **kwargs): - session = get_session() - resource = session.query(cls).filter_by(**kwargs).first() - - return resource - - -class CkanServer(Base): - __tablename__ = 'ckan_server' - - ckan_server_id = Column(String, primary_key=True, default=make_uuid) - ckan_server_url = Column(String, nullable=False) - - @classmethod - def create(cls, ckan_server_url): - ckan_server = cls(ckan_server_url=ckan_server_url) - - session = get_session() - session.add(ckan_server) - session.commit() - - return ckan_server - - @classmethod - def get_by_url(cls, ckan_server_url): - session = get_session() - ckan_server = session.query(cls).filter_by( - ckan_server_url=ckan_server_url - ).first() - - return ckan_server diff --git a/consumer/entrypoint.sh b/consumer/entrypoint.sh new file mode 100755 index 0000000..d661f4c --- /dev/null +++ b/consumer/entrypoint.sh @@ -0,0 +1,99 @@ +#!/bin/bash +set -Eeuo pipefail + + +# Define help message +show_help() { + echo """ + Commands + ---------------------------------------------------------------------------- + bash : run bash + build : build python wheel of library in /dist + eval : eval shell command + pip_freeze : freeze pip dependencies and write to requirements.txt + start : run application + test_unit : run tests + test_lint : run flake8 tests + test_integration : run tests with coverage output + + """ +} + +PYTEST="pytest --cov-report term-missing --cov=app --cov-append -p no:cacheprovider" + +test_flake8() { + flake8 /code/. --config=/code/conf/extras/flake8.cfg +} + +test_unit() { + $PYTEST -m unit + cat /code/conf/extras/good_job.txt + rm -rf tests/__pycache__ || true +} + +test_integration() { + $PYTEST -m integration + cat /code/conf/extras/good_job.txt + rm -rf tests/__pycache__ || true +} + +case "$1" in + bash ) + bash + ;; + + eval ) + eval "${@:2}" + ;; + + + pip_freeze ) + + rm -rf /tmp/env + pip3 install -r ./conf/pip/primary-requirements.txt --upgrade + + cat /code/conf/pip/requirements_header.txt | tee conf/pip/requirements.txt + pip freeze --local | grep -v appdir | tee -a conf/pip/requirements.txt + ;; + + start ) + python manage.py "${@:2}" + ;; + + test_unit) + test_flake8 + test_unit "${@:2}" + ;; + + test_lint) + test_flake8 + ;; + + test_integration) + test_flake8 + test_integration "${@:2}" + ;; + + build) + # remove previous build if needed + rm -rf dist + rm -rf build + rm -rf .eggs + rm -rf aether-sdk-example.egg-info + + # create the distribution + python setup.py bdist_wheel --universal + + # remove useless content + rm -rf build + rm -rf myconsumer.egg-info + ;; + + help) + show_help + ;; + + *) + show_help + ;; +esac diff --git a/consumer/main.py b/consumer/main.py deleted file mode 100644 index 21f83d6..0000000 --- a/consumer/main.py +++ /dev/null @@ -1,34 +0,0 @@ -import logging -import os - -from consumer.core.process_manager import ProcessManager -from consumer.config import validate_config, get_config -from consumer import db - - -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' -) - -logger = logging.getLogger(__name__) - -if __name__ == '__main__': - logger.info('Starting application...') - - if not os.environ.get('CONSUMER_CONF_DIR'): - dir_path = os.getcwd() - else: - dir_path = os.environ.get('CONSUMER_CONF_DIR') - - # Located in the config directory - config_file = os.path.join('config', 'config.json') - schema_file = os.path.join('config', 'config.schema') - - validate_config(dir_path, config_file, schema_file) - - url = get_config().get('database').get('url') - db.init(url) - - processManager = ProcessManager() - processManager.run() diff --git a/consumer/manage.py b/consumer/manage.py new file mode 100644 index 0000000..27ad31f --- /dev/null +++ b/consumer/manage.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from time import sleep +from app.consumer import CKANConsumer +from app.config import consumer_config, kafka_config + +if __name__ == '__main__': + manager = CKANConsumer(consumer_config, kafka_config) + while True: + try: + for x in range(10): + sleep(1) + else: + break + except KeyboardInterrupt: + manager.stop() diff --git a/consumer/setup.cfg b/consumer/setup.cfg new file mode 100644 index 0000000..aa77026 --- /dev/null +++ b/consumer/setup.cfg @@ -0,0 +1,27 @@ +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[aliases] +test=pytest + +[metadata] +description-file = README.md + +[tool:pytest] +python_files = tests/test*.py +addopts = --maxfail=100 -s --capture=sys -p no:warnings diff --git a/consumer/setup.py b/consumer/setup.py new file mode 100644 index 0000000..30d04c1 --- /dev/null +++ b/consumer/setup.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from setuptools import setup + +VERSION = os.environ.get('VERSION', '0.0.0') + +setup( + name='aether_ckan_consumer', + author='eHealth Africa', + author_email='aether@ehealthafrica.org', + decription='Aether CKAN Kafka consumer', + version=VERSION, + install_requires=[ + 'aet.consumer>=3.4.1', + 'aether-python', + 'ckanapi', + 'deepmerge', + 'eha_jsonpath', + 'jsonschema', + 'requests', + 'responses', + ], + tests_require=[ + 'flake8', + 'flake8-quotes', + 'pytest', + 'pytest-cov', + 'pytest-runner', + ], + url='https://github.com/eHealthAfrica/aether-ckan-consumer', + keywords=['aet', 'aether', 'kafka', 'consumer', 'ckan'], + classifiers=[] +) diff --git a/consumer/tests/.dockerignore b/consumer/tests/.dockerignore new file mode 100644 index 0000000..96403d3 --- /dev/null +++ b/consumer/tests/.dockerignore @@ -0,0 +1 @@ +__pycache__/* diff --git a/consumer/tests/.gitignore b/consumer/tests/.gitignore new file mode 100644 index 0000000..bee8a64 --- /dev/null +++ b/consumer/tests/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/consumer/tests/__init__.py b/consumer/tests/__init__.py new file mode 100644 index 0000000..7f1f7c7 --- /dev/null +++ b/consumer/tests/__init__.py @@ -0,0 +1,1625 @@ +#!/usr/bin/env python + +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import pytest +import os +import requests +from time import sleep +from uuid import uuid4 + +import birdisle +import birdisle.redis +from spavro.schema import parse + +from aet.kafka_utils import ( + create_topic, + delete_topic, + get_producer, + get_admin_client, + get_broker_info, + is_kafka_available, + produce +) +from aet.helpers import chunk_iterable +from aet.logger import get_logger +from aet.jsonpath import CachedParser + +from aether.python.avro import generation +from aether.python.avro.schema import Node + +from app import config +from app.fixtures import examples + +from app import consumer + +CONSUMER_CONFIG = config.consumer_config +KAFKA_CONFIG = config.get_kafka_config() +KAFKA_ADMIN_CONFIG = config.get_kafka_admin_config() + +LOG = get_logger('FIXTURE') + +# Some of the fixtures are non-compliant so we don't QA this file. +# flake8: noqa + +URL = 'http://localhost:9013' + +TENANT = 'test' +TEST_TOPIC = 'ckan_test_topic' + +# instances of samples pushed to Kafka +GENERATED_SAMPLES = {} + + +# convenience function for jsonpath (used in test_index_handler) +def first(path, obj): + m = CachedParser.find(path, obj) + return [i.value for i in m][0] + + +@pytest.mark.unit +@pytest.fixture(scope='session') +def birdisle_server(): + password = config.get_consumer_config().get('REDIS_PASSWORD') + server = birdisle.Server(f'requirepass {password}') + yield server + server.close() + + +@pytest.mark.unit +@pytest.fixture(scope='session') +def Birdisle(birdisle_server): + birdisle.redis.LocalSocketConnection.health_check_interval = 0 + password = config.get_consumer_config().get('REDIS_PASSWORD') + r = birdisle.redis.StrictRedis(server=birdisle_server, password=password) + r.config_set('notify-keyspace-events', 'KEA') + return r + + +@pytest.mark.unit +@pytest.mark.integration +@pytest.fixture(scope='session') +def CKANConsumer(birdisle_server, Birdisle): + settings = config.get_consumer_config() + c = consumer.CKANConsumer(settings, None, Birdisle) + yield c + c.stop() + + +# @pytest.mark.integration +@pytest.fixture(scope='session', autouse=True) +def create_remote_kafka_assets(request, sample_generator, *args): + # @mark annotation does not work with autouse=True. + if 'integration' not in request.config.invocation_params.args: + LOG.debug(f'NOT creating Kafka Assets') + # return + LOG.debug(f'Creating Kafka Assets') + kafka_security = config.get_kafka_admin_config() + kadmin = get_admin_client(kafka_security) + new_topic = f'{TENANT}.{TEST_TOPIC}' + create_topic(kadmin, new_topic) + GENERATED_SAMPLES[new_topic] = [] + producer = get_producer(kafka_security) + schema = parse(json.dumps(ANNOTATED_SCHEMA)) + for subset in sample_generator(max=100, chunk=10): + GENERATED_SAMPLES[new_topic].extend(subset) + produce(subset, schema, new_topic, producer) + yield None # end of work before clean-up + LOG.debug(f'deleting topic: {new_topic}') + delete_topic(kadmin, new_topic) + + +@pytest.fixture(scope='session', autouse=True) +def check_ckan_readyness(request, *args): + # @mark annotation does not work with autouse=True + if 'integration' not in request.config.invocation_params.args: + LOG.debug(f'NOT Checking for CKAN') + return + LOG.debug('Waiting for CKAN') + CC = config.get_consumer_config() + url = CC.get('url') + for _ in range(120): + try: + res = requests.get(f'http://{url}') + res.raise_for_status() + return + except Exception: + sleep(.5) + raise TimeoutError('Could not connect to ckan for integration test') + +@pytest.mark.unit +@pytest.mark.integration +@pytest.fixture(scope='session') +def sample_generator(): + t = generation.SampleGenerator(ANNOTATED_SCHEMA) + t.set_overrides('geometry.latitude', {'min': 44.754512, 'max': 53.048971}) + t.set_overrides('geometry.longitude', {'min': 8.013135, 'max': 28.456375}) + t.set_overrides('url', {'constant': 'http://ehealthafrica.org'}) + for field in ['beds', 'staff_doctors', 'staff_nurses']: + t.set_overrides(field, {'min': 0, 'max': 50}) + + def _gen(max=None, chunk=None): + + def _single(max): + if not max: + while True: + yield t.make_sample() + for x in range(max): + yield t.make_sample() + + def _chunked(max, chunk): + return chunk_iterable(_single(max), chunk) + + if chunk: + yield from _chunked(max, chunk) + else: + yield from _single(max) + yield _gen + + +@pytest.mark.unit +@pytest.mark.integration +@pytest.fixture(scope='session') +def RequestClientT1(): + s = requests.Session() + s.headers.update({'x-oauth-realm': TENANT}) + yield s + + +@pytest.mark.unit +@pytest.fixture(scope='session') +def RequestClientT2(): + s = requests.Session() + s.headers.update({'x-oauth-realm': f'{TENANT}-2'}) + yield s + + +# We can use 'mark' distinctions to chose which tests are run and which assets are built +# @pytest.mark.integration +# @pytest.mark.unit +# When possible use fixtures for reusable test assets +# @pytest.fixture(scope='session') + + +@pytest.mark.unit +@pytest.mark.integration +@pytest.fixture(scope='session') +def SubscriptionDefinition(): + return examples.SUBSCRIPTION + + +@pytest.mark.unit +@pytest.fixture(scope='module') +def SimpleSchema(): + return Node(SIMPLE_SCHEMA) # noqa + + +@pytest.mark.unit +@pytest.fixture(scope='module') +def AutoGenSchema(): + return Node(AUTOGEN_SCHEMA) # noqa + + +@pytest.mark.unit +@pytest.fixture(scope='module') +def ComplexSchema(): + return Node(ANNOTATED_SCHEMA) # noqa + + +SAMPLE_FIELD_LOOKUP = { + 'operational_status': { + 'id': 'static_lookup', + 'params': { + 'lookupEntries': [ + { + 'value': 'Operational', + 'key': 'operational' + }, + { + 'value': 'Non Operational', + 'key': 'non_operational' + }, + { + 'value': 'Unknown', + 'key': 'unknown' + } + ], + 'unknownKeyValue': 'Other' + } + } +} + +SAMPLE_DOC = { + 'start': '2018-08-14T13:50:04.064000+01:00', + 'end': '2018-08-14T13:52:51.024000+01:00', + 'today': '2018-08-14T00:00:00', + 'deviceid': '355662090560127', + 'phonenumber': None, + 'note_start': None, + 'acknowledge_intro': 'OK', + 'residents_module': { + 'supervisor_name': 'patricia_gauji', + 'enumerator': 'idris_muazu', + 'cluster': 'constitution_independence', + 'respondent_name': 'Rukayyah', + 'gender': 'female', + 'age': 41, + 'connected_discos_supplied': 'no', + 'use_alternative_power': 'yes', + 'largest_source_noise': 'generator musica trade traffic', + 'wish_to_relocate': 'yes', + 'why_would_you_choose_to_relocat': 'noise power_supply quality_of_light safety traffic', + 'common_safety_concerns_in_the_market': 'vandalism robbery_personal', + 'comments': 'None' + }, + 'pic': '1534251107462.jpg', + 'geo': { + 'latitude': 10.513823509216309, + 'longitude': 7.4525299072265625, + 'altitude': 598.2000122070312, + 'accuracy': 4.699999809265137 + }, + 'meta': { + 'instanceID': 'uuid:601e659e-7393-450b-a59d-f80706e7f55d' + }, + 'id': '2856a498-1705-4d2f-b083-ba4199619705', + '_id': 'residence_questionnaire', + '_version': '1' +} + +SAMPLE_DOC2 = { + 'Encounter_Date_Time': None, + 'Location': { + 'accuracy': 26.0, + 'altitude': 486.0, + 'latitude': 9.070648346096277, + 'longitude': 7.413686318323016 + }, + 'Patient_Age': 29, + 'Patient_Name': 'Jane Smith ', + 'QR_Code': '0626b3a2-401c-4012-8b81-1f5b14df8c7b', + 'Test_Name': 'TEST_1', + '_id': 'rapidtest_start', + '_version': '0', + 'end': '2019-01-28T09:05:51.154000+01:00', + 'id': '75dc93fa-647a-4c53-bc5a-18aa5394fd40', + 'meta': { + 'instanceID': 'uuid:adda71c0-2099-4123-87d1-c210838e0565' + }, + 'start': '2019-01-28T09:05:18.680000+01:00' +} + +TYPE_INSTRUCTIONS = { + '_meta': { + 'aet_subscribed_topics': [ + 'Residence_Questionnaire_1_3' + ], + 'aet_geopoint': 'geo_point' + } +} + +AUTOGEN_SCHEMA = { + 'type': 'record', + 'fields': [ + { + 'name': 'start', + 'type': 'string' + }, + { + 'name': 'end', + 'type': 'string' + }, + { + 'name': 'today', + 'type': 'string' + }, + { + 'name': 'deviceid', + 'type': 'string' + }, + { + 'name': 'phonenumber', + 'type': 'None' + }, + { + 'name': 'note_start', + 'type': 'None' + }, + { + 'name': 'acknowledge_intro', + 'type': 'string' + }, + { + 'name': 'residents_module', + 'type': { + 'type': 'record', + 'fields': [ + { + 'name': 'supervisor_name', + 'type': 'string' + }, + { + 'name': 'enumerator', + 'type': 'string' + }, + { + 'name': 'cluster', + 'type': 'string' + }, + { + 'name': 'respondent_name', + 'type': 'string' + }, + { + 'name': 'gender', + 'type': 'string' + }, + { + 'name': 'age', + 'type': 'int' + }, + { + 'name': 'connected_discos_supplied', + 'type': 'string' + }, + { + 'name': 'use_alternative_power', + 'type': 'string' + }, + { + 'name': 'largest_source_noise', + 'type': 'string' + }, + { + 'name': 'wish_to_relocate', + 'type': 'string' + }, + { + 'name': 'why_would_you_choose_to_relocat', + 'type': 'string' + }, + { + 'name': 'common_safety_concerns_in_the_market', + 'type': 'string' + }, + { + 'name': 'comments', + 'type': 'string' + } + ], + 'name': 'Auto_1' + } + }, + { + 'name': 'pic', + 'type': 'string' + }, + { + 'name': 'geo', + 'type': { + 'type': 'record', + 'fields': [ + { + 'name': 'latitude', + 'type': 'float' + }, + { + 'name': 'longitude', + 'type': 'float' + }, + { + 'name': 'altitude', + 'type': 'float' + }, + { + 'name': 'accuracy', + 'type': 'float' + } + ], + 'name': 'Auto_2' + } + }, + { + 'name': 'meta', + 'type': { + 'type': 'record', + 'fields': [ + { + 'name': 'instanceID', + 'type': 'string' + } + ], + 'name': 'Auto_3' + } + }, + { + 'name': 'id', + 'type': 'string' + }, + { + 'name': 'es_reserved__id', + 'type': 'string' + }, + { + 'name': 'es_reserved__version', + 'type': 'string' + } + ], + 'name': 'Auto_0' +} + + +SIMPLE_SCHEMA = { + 'name': 'rapidtest', + 'doc': 'Rapid Test - Start (id: rapidtest_start, version: 2019012807)', + 'fields': [ + { + 'default': 'rapidtest_start', + 'doc': 'xForm ID', + 'name': '_id', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + }, + { + 'default': '2019012807', + 'doc': 'xForm version', + 'name': '_version', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + }, + { + 'name': 'start', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + }, + { + 'name': 'end', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + }, + { + 'doc': 'Test Name', + 'name': 'Test_Name', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + }, + { + 'doc': 'Scan QR Code', + 'name': 'QR_Code', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + }, + { + 'doc': 'Patient Name', + 'name': 'Patient_Name', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + }, + { + 'doc': 'Patient Age', + 'name': 'Patient_Age', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'int' + ] + }, + { + 'doc': 'Location', + 'name': 'Location', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + { + 'doc': 'Location', + 'fields': [ + { + 'doc': 'latitude', + 'name': 'latitude', + 'namespace': 'Rapidtest_Start_2019012807.Location', + 'type': [ + 'None', + 'float' + ] + }, + { + 'doc': 'longitude', + 'name': 'longitude', + 'namespace': 'Rapidtest_Start_2019012807.Location', + 'type': [ + 'None', + 'float' + ] + }, + { + 'doc': 'altitude', + 'name': 'altitude', + 'namespace': 'Rapidtest_Start_2019012807.Location', + 'type': [ + 'None', + 'float' + ] + }, + { + 'doc': 'accuracy', + 'name': 'accuracy', + 'namespace': 'Rapidtest_Start_2019012807.Location', + 'type': [ + 'None', + 'float' + ] + } + ], + 'name': 'Location', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': 'record' + } + ] + }, + { + 'name': 'Encounter_Date_Time', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + }, + { + 'name': 'meta', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + { + 'fields': [ + { + 'name': 'instanceID', + 'namespace': 'Rapidtest_Start_2019012807.meta', + 'type': [ + 'None', + 'string' + ] + } + ], + 'name': 'meta', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': 'record' + } + ] + }, + { + 'doc': 'UUID', + 'name': 'id', + 'type': 'string' + } + ] +} + + +ANNOTATED_SCHEMA = { + 'doc': 'MySurvey (title: HS OSM Gather Test id: gth_hs_test, version: 2)', + 'name': 'MySurvey', + 'type': 'record', + 'fields': [ + { + 'doc': 'xForm ID', + 'name': '_id', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey' + }, + { + 'doc': 'xForm version', + 'name': '_version', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_default_visualization': 'undefined' + }, + { + 'doc': 'Surveyor', + 'name': '_surveyor', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey' + }, + { + 'doc': 'Submitted at', + 'name': '_submitted_at', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'dateTime' + }, + { + 'name': '_start', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'dateTime' + }, + { + 'name': 'timestamp', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'dateTime' + }, + { + 'name': 'username', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'name': 'source', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'name': 'osm_id', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Name of Facility', + 'name': 'name', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Address', + 'name': 'addr_full', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Phone Number', + 'name': 'contact_number', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Facility Operator Name', + 'name': 'operator', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Operator Type', + 'name': 'operator_type', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_default_visualization': 'pie', + '@aether_lookup': [ + { + 'label': 'Public', + 'value': 'public' + }, + { + 'label': 'Private', + 'value': 'private' + }, + { + 'label': 'Community', + 'value': 'community' + }, + { + 'label': 'Religious', + 'value': 'religious' + }, + { + 'label': 'Government', + 'value': 'government' + }, + { + 'label': 'NGO', + 'value': 'ngo' + }, + { + 'label': 'Combination', + 'value': 'combination' + } + ], + '@aether_extended_type': 'select1' + }, + { + 'doc': 'Facility Location', + 'name': 'geometry', + 'type': [ + 'null', + { + 'doc': 'Facility Location', + 'name': 'geometry', + 'type': 'record', + 'fields': [ + { + 'doc': 'latitude', + 'name': 'latitude', + 'type': [ + 'null', + 'float' + ], + 'namespace': 'MySurvey.geometry' + }, + { + 'doc': 'longitude', + 'name': 'longitude', + 'type': [ + 'null', + 'float' + ], + 'namespace': 'MySurvey.geometry' + }, + { + 'doc': 'altitude', + 'name': 'altitude', + 'type': [ + 'null', + 'float' + ], + 'namespace': 'MySurvey.geometry' + }, + { + 'doc': 'accuracy', + 'name': 'accuracy', + 'type': [ + 'null', + 'float' + ], + 'namespace': 'MySurvey.geometry' + } + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'geopoint' + } + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'geopoint' + }, + { + 'doc': 'Operational Status', + 'name': 'operational_status', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_default_visualization': 'pie', + '@aether_lookup': [ + { + 'label': 'Operational', + 'value': 'operational' + }, + { + 'label': 'Non Operational', + 'value': 'non_operational' + }, + { + 'label': 'Unknown', + 'value': 'unknown' + } + ], + '@aether_extended_type': 'select1' + }, + { + 'doc': 'When is the facility open?', + 'name': '_opening_hours_type', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'Pick the days of the week open and enter hours for each day', + 'value': 'oh_select' + }, + { + 'label': 'Only open on weekdays with the same hours every day.', + 'value': 'oh_weekday' + }, + { + 'label': '24/7 - All day, every day', + 'value': 'oh_24_7' + }, + { + 'label': 'Type in OSM String by hand (Advanced Option)', + 'value': 'oh_advanced' + }, + { + 'label': 'I do not know the operating hours', + 'value': 'oh_unknown' + } + ], + '@aether_extended_type': 'select1' + }, + { + 'doc': 'Which days is this facility open?', + 'name': '_open_days', + 'type': [ + 'null', + { + 'type': 'array', + 'items': 'string' + } + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'Monday', + 'value': 'Mo' + }, + { + 'label': 'Tuesday', + 'value': 'Tu' + }, + { + 'label': 'Wednesday', + 'value': 'We' + }, + { + 'label': 'Thursday', + 'value': 'Th' + }, + { + 'label': 'Friday', + 'value': 'Fr' + }, + { + 'label': 'Saturday', + 'value': 'Sa' + }, + { + 'label': 'Sunday', + 'value': 'Su' + }, + { + 'label': 'Public Holidays', + 'value': 'PH' + } + ], + '@aether_extended_type': 'select' + }, + { + 'doc': 'Open hours by day of the week', + 'name': '_dow_group', + 'type': [ + 'null', + { + 'doc': 'Open hours by day of the week', + 'name': '_dow_group', + 'type': 'record', + 'fields': [ + { + 'doc': 'Enter open hours for each day:', + 'name': '_hours_note', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey._dow_group', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Monday open hours', + 'name': '_mon_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey._dow_group', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Tuesday open hours', + 'name': '_tue_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey._dow_group', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Wednesday open hours', + 'name': '_wed_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey._dow_group', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Thursday open hours', + 'name': '_thu_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey._dow_group', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Friday open hours', + 'name': '_fri_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey._dow_group', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Saturday open hours', + 'name': '_sat_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey._dow_group', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Sunday open hours', + 'name': '_sun_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey._dow_group', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Public Holiday open hours', + 'name': '_ph_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey._dow_group', + '@aether_extended_type': 'string' + }, + { + 'name': '_select_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey._dow_group', + '@aether_extended_type': 'string' + } + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'group' + } + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'group' + }, + { + 'doc': 'Enter weekday hours', + 'name': '_weekday_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'doc': 'OSM:opening_hours', + 'name': '_advanced_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'name': 'opening_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Verify the open hours are correct or go back and fix:', + 'name': '_disp_hours', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'doc': 'Facility Category', + 'name': 'amenity', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'Clinic', + 'value': 'clinic' + }, + { + 'label': 'Doctors', + 'value': 'doctors' + }, + { + 'label': 'Hospital', + 'value': 'hospital' + }, + { + 'label': 'Dentist', + 'value': 'dentist' + }, + { + 'label': 'Pharmacy', + 'value': 'pharmacy' + } + ], + '@aether_extended_type': 'select1' + }, + { + 'doc': 'Available Services', + 'name': 'healthcare', + 'type': [ + 'null', + { + 'type': 'array', + 'items': 'string' + } + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'Doctor', + 'value': 'doctor' + }, + { + 'label': 'Pharmacy', + 'value': 'pharmacy' + }, + { + 'label': 'Hospital', + 'value': 'hospital' + }, + { + 'label': 'Clinic', + 'value': 'clinic' + }, + { + 'label': 'Dentist', + 'value': 'dentist' + }, + { + 'label': 'Physiotherapist', + 'value': 'physiotherapist' + }, + { + 'label': 'Alternative', + 'value': 'alternative' + }, + { + 'label': 'Laboratory', + 'value': 'laboratory' + }, + { + 'label': 'Optometrist', + 'value': 'optometrist' + }, + { + 'label': 'Rehabilitation', + 'value': 'rehabilitation' + }, + { + 'label': 'Blood donation', + 'value': 'blood_donation' + }, + { + 'label': 'Birthing center', + 'value': 'birthing_center' + } + ], + '@aether_extended_type': 'select' + }, + { + 'doc': 'Specialities', + 'name': 'speciality', + 'type': [ + 'null', + { + 'type': 'array', + 'items': 'string' + } + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'xx', + 'value': 'xx' + } + ], + '@aether_extended_type': 'select' + }, + { + 'doc': 'Speciality medical equipment available', + 'name': 'health_amenity_type', + 'type': [ + 'null', + { + 'type': 'array', + 'items': 'string' + } + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'Ultrasound', + 'value': 'ultrasound' + }, + { + 'label': 'MRI', + 'value': 'mri' + }, + { + 'label': 'X-Ray', + 'value': 'x_ray' + }, + { + 'label': 'Dialysis', + 'value': 'dialysis' + }, + { + 'label': 'Operating Theater', + 'value': 'operating_theater' + }, + { + 'label': 'Laboratory', + 'value': 'laboratory' + }, + { + 'label': 'Imaging Equipment', + 'value': 'imaging_equipment' + }, + { + 'label': 'Intensive Care Unit', + 'value': 'intensive_care_unit' + }, + { + 'label': 'Emergency Department', + 'value': 'emergency_department' + } + ], + '@aether_extended_type': 'select' + }, + { + 'doc': 'Does this facility provide Emergency Services?', + 'name': 'emergency', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'Yes', + 'value': 'yes' + }, + { + 'label': 'No', + 'value': 'no' + } + ], + '@aether_extended_type': 'select1' + }, + { + 'doc': 'Does the pharmacy dispense prescription medication?', + 'name': 'dispensing', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'Yes', + 'value': 'yes' + }, + { + 'label': 'No', + 'value': 'no' + } + ], + '@aether_extended_type': 'select1' + }, + { + 'doc': 'Number of Beds', + 'name': 'beds', + 'type': [ + 'null', + 'int' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'int', + '@aether_masking': 'private' + }, + { + 'doc': 'Number of Doctors', + 'name': 'staff_doctors', + 'type': [ + 'null', + 'int' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'int', + '@aether_masking': 'private' + }, + { + 'doc': 'Number of Nurses', + 'name': 'staff_nurses', + 'type': [ + 'null', + 'int' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'int', + '@aether_masking': 'private' + }, + { + 'doc': 'Types of insurance accepted?', + 'name': 'insurance', + 'type': [ + 'null', + { + 'type': 'array', + 'items': 'string' + } + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'Public', + 'value': 'public' + }, + { + 'label': 'Private', + 'value': 'private' + }, + { + 'label': 'None', + 'value': 'no' + }, + { + 'label': 'Unknown', + 'value': 'unknown' + } + ], + '@aether_extended_type': 'select', + '@aether_masking': 'public' + }, + { + 'doc': 'Is this facility wheelchair accessible?', + 'name': 'wheelchair', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'Yes', + 'value': 'yes' + }, + { + 'label': 'No', + 'value': 'no' + } + ], + '@aether_extended_type': 'select1' + }, + { + 'doc': 'What is the source of water for this facility?', + 'name': 'water_source', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'Well', + 'value': 'well' + }, + { + 'label': 'Water works', + 'value': 'water_works' + }, + { + 'label': 'Manual pump', + 'value': 'manual_pump' + }, + { + 'label': 'Powered pump', + 'value': 'powered_pump' + }, + { + 'label': 'Groundwater', + 'value': 'groundwater' + }, + { + 'label': 'Rain', + 'value': 'rain' + } + ], + '@aether_extended_type': 'select1' + }, + { + 'doc': 'What is the source of power for this facility?', + 'name': 'electricity', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_lookup': [ + { + 'label': 'Power grid', + 'value': 'grid' + }, + { + 'label': 'Generator', + 'value': 'generator' + }, + { + 'label': 'Solar', + 'value': 'solar' + }, + { + 'label': 'Other Power', + 'value': 'other' + }, + { + 'label': 'No Power', + 'value': 'none' + } + ], + '@aether_extended_type': 'select1' + }, + { + 'doc': 'URL for this location (if available)', + 'name': 'url', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'doc': 'In which health are is the facility located?', + 'name': 'is_in_health_area', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'doc': 'In which health zone is the facility located?', + 'name': 'is_in_health_zone', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'string' + }, + { + 'name': 'meta', + 'type': [ + 'null', + { + 'name': 'meta', + 'type': 'record', + 'fields': [ + { + 'name': 'instanceID', + 'type': [ + 'null', + 'string' + ], + 'namespace': 'MySurvey.meta', + '@aether_extended_type': 'string' + } + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'group' + } + ], + 'namespace': 'MySurvey', + '@aether_extended_type': 'group' + }, + { + 'doc': 'UUID', + 'name': 'id', + 'type': 'string' + } + ], + 'namespace': 'org.ehealthafrica.aether.odk.xforms.Mysurvey' +} + +TEST_SIMPLE_SCHEMA = { + 'name': 'simpletest', + 'fields': [ + { + 'default': 'rapidtest_start', + 'doc': 'xForm ID', + 'name': '_id', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + }, + { + 'default': '2019012807', + 'doc': 'xForm version', + 'name': '_version', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + } + ] +} + +TEST_LIST_SIMPLE_SCHEMAS = [ + { + 'name': 'simpletest-1', + 'aetherBaseSchema': True, + 'fields': [ + { + 'default': 'rapidtest_start', + 'doc': 'xForm ID', + 'name': '_id', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + }, + { + 'default': '2019012807', + 'doc': 'xForm version', + 'name': '_version', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'string' + ] + } + ] + }, + { + 'name': 'simpletest-2', + 'aetherBaseSchema': True, + 'fields': [ + { + 'default': '2019012807', + 'doc': 'xForm _extra', + 'name': '_extra', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'int' + ] + } + ] + }, + { + 'name': 'simpletest-3', + 'fields': [ + { + 'default': '2019012807', + 'doc': 'xForm _extra', + 'name': '_new', + 'namespace': 'Rapidtest_Start_2019012807', + 'type': [ + 'None', + 'bool' + ] + } + ] + } +] \ No newline at end of file diff --git a/example/.gitignore b/consumer/tests/ckan/.gitignore similarity index 100% rename from example/.gitignore rename to consumer/tests/ckan/.gitignore diff --git a/example/ckan/Dockerfile b/consumer/tests/ckan/Dockerfile similarity index 88% rename from example/ckan/Dockerfile rename to consumer/tests/ckan/Dockerfile index 9a87e5d..425a109 100644 --- a/example/ckan/Dockerfile +++ b/consumer/tests/ckan/Dockerfile @@ -1,5 +1,9 @@ +# ---------------------------------------------------- +# https://github.com/ckan/ckan/blob/master/Dockerfile +# ---------------------------------------------------- + # See CKAN docs on installation from Docker Compose on usage -FROM debian:jessie +FROM debian:stretch MAINTAINER Open Knowledge # Install required system packages @@ -39,7 +43,7 @@ RUN useradd -r -u 900 -m -c "ckan account" -d $CKAN_HOME -s /bin/false ckan # Setup virtual environment for CKAN RUN mkdir -p $CKAN_VENV $CKAN_CONFIG $CKAN_STORAGE_PATH && \ virtualenv $CKAN_VENV && \ - ln -s $CKAN_VENV/bin/pip /usr/local/bin/ckan-pip &&\ + ln -s $CKAN_VENV/bin/pip /usr/local/bin/ckan-pip && \ ln -s $CKAN_VENV/bin/paster /usr/local/bin/ckan-paster # Setup CKAN diff --git a/consumer/tests/ckan/ckan-entrypoint.sh b/consumer/tests/ckan/ckan-entrypoint.sh new file mode 100755 index 0000000..9a31078 --- /dev/null +++ b/consumer/tests/ckan/ckan-entrypoint.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# URL for the primary database, in the format expected by sqlalchemy (required +# unless linked to a container called 'db') +: ${CKAN_SQLALCHEMY_URL:=} +# URL for solr (required unless linked to a container called 'solr') +: ${CKAN_SOLR_URL:=} +# URL for redis (required unless linked to a container called 'redis') +: ${CKAN_REDIS_URL:=} +# URL for datapusher (required unless linked to a container called 'datapusher') +: ${CKAN_DATAPUSHER_URL:=} + +CONFIG="${CKAN_CONFIG}/production.ini" + +abort () { + echo "$@" >&2 + exit 1 +} + +set_environment () { + export CKAN_SITE_ID=${CKAN_SITE_ID} + export CKAN_SITE_URL=${CKAN_SITE_URL} + export CKAN_SQLALCHEMY_URL=${CKAN_SQLALCHEMY_URL} + export CKAN_SOLR_URL=${CKAN_SOLR_URL} + export CKAN_REDIS_URL=${CKAN_REDIS_URL} + export CKAN_STORAGE_PATH=/var/lib/ckan + export CKAN_DATAPUSHER_URL=${CKAN_DATAPUSHER_URL} + export CKAN_DATASTORE_WRITE_URL=${CKAN_DATASTORE_WRITE_URL} + export CKAN_DATASTORE_READ_URL=${CKAN_DATASTORE_READ_URL} + export CKAN_SMTP_SERVER=${CKAN_SMTP_SERVER} + export CKAN_SMTP_STARTTLS=${CKAN_SMTP_STARTTLS} + export CKAN_SMTP_USER=${CKAN_SMTP_USER} + export CKAN_SMTP_PASSWORD=${CKAN_SMTP_PASSWORD} + export CKAN_SMTP_MAIL_FROM=${CKAN_SMTP_MAIL_FROM} + export CKAN_MAX_UPLOAD_SIZE_MB=${CKAN_MAX_UPLOAD_SIZE_MB} +} + +write_config () { + ckan-paster make-config --no-interactive ckan "$CONFIG" +} + +# If we don't already have a config file, bootstrap +if [ ! -e "$CONFIG" ]; then + write_config +fi + +# Get or create CKAN_SQLALCHEMY_URL +if [ -z "$CKAN_SQLALCHEMY_URL" ]; then + abort "ERROR: no CKAN_SQLALCHEMY_URL specified in docker-compose.yml" +fi + +if [ -z "$CKAN_SOLR_URL" ]; then + abort "ERROR: no CKAN_SOLR_URL specified in docker-compose.yml" +fi + +if [ -z "$CKAN_REDIS_URL" ]; then + abort "ERROR: no CKAN_REDIS_URL specified in docker-compose.yml" +fi + +if [ -z "$CKAN_DATAPUSHER_URL" ]; then + abort "ERROR: no CKAN_DATAPUSHER_URL specified in docker-compose.yml" +fi + +set_environment +ckan-paster --plugin=ckan datastore set-permissions -c "${CKAN_CONFIG}/production.ini" | sudo -u postgres psql --set ON_ERROR_STOP=1 +ckan-paster --plugin=ckan db init -c "${CKAN_CONFIG}/production.ini" +exec "$@" diff --git a/example/ckan/ckan/production.ini b/consumer/tests/ckan/ckan/production.ini similarity index 98% rename from example/ckan/ckan/production.ini rename to consumer/tests/ckan/ckan/production.ini index 68a15f1..30fdd16 100644 --- a/example/ckan/ckan/production.ini +++ b/consumer/tests/ckan/ckan/production.ini @@ -99,8 +99,8 @@ ckan.site_id = default # Note: Add ``datastore`` to enable the CKAN DataStore # Add ``datapusher`` to enable DataPusher -# Add ``resource_proxy`` to enable resorce proxying and get around the -# same origin policy +# Add ``resource_proxy`` to enable resorce proxying and get around the +# same origin policy ckan.plugins = stats text_view image_view recline_view datastore # Define which views should be created by default diff --git a/example/ckan/ckan/who.ini b/consumer/tests/ckan/ckan/who.ini similarity index 100% rename from example/ckan/ckan/who.ini rename to consumer/tests/ckan/ckan/who.ini diff --git a/example/ckan/my_init.d/50_configure b/consumer/tests/ckan/my_init.d/50_configure similarity index 100% rename from example/ckan/my_init.d/50_configure rename to consumer/tests/ckan/my_init.d/50_configure diff --git a/example/ckan/my_init.d/70_initdb b/consumer/tests/ckan/my_init.d/70_initdb similarity index 100% rename from example/ckan/my_init.d/70_initdb rename to consumer/tests/ckan/my_init.d/70_initdb diff --git a/example/ckan/postgresql/Dockerfile b/consumer/tests/ckan/postgresql/Dockerfile similarity index 100% rename from example/ckan/postgresql/Dockerfile rename to consumer/tests/ckan/postgresql/Dockerfile diff --git a/example/ckan/postgresql/docker-entrypoint-initdb.d/00_create_datastore.sh b/consumer/tests/ckan/postgresql/docker-entrypoint-initdb.d/00_create_datastore.sh similarity index 100% rename from example/ckan/postgresql/docker-entrypoint-initdb.d/00_create_datastore.sh rename to consumer/tests/ckan/postgresql/docker-entrypoint-initdb.d/00_create_datastore.sh diff --git a/example/ckan/postgresql/docker-entrypoint-initdb.d/10_datastore-setup.sql b/consumer/tests/ckan/postgresql/docker-entrypoint-initdb.d/10_datastore-setup.sql similarity index 100% rename from example/ckan/postgresql/docker-entrypoint-initdb.d/10_datastore-setup.sql rename to consumer/tests/ckan/postgresql/docker-entrypoint-initdb.d/10_datastore-setup.sql diff --git a/example/ckan/postgresql/docker-entrypoint-initdb.d/20_postgis_permissions.sql b/consumer/tests/ckan/postgresql/docker-entrypoint-initdb.d/20_postgis_permissions.sql similarity index 100% rename from example/ckan/postgresql/docker-entrypoint-initdb.d/20_postgis_permissions.sql rename to consumer/tests/ckan/postgresql/docker-entrypoint-initdb.d/20_postgis_permissions.sql diff --git a/example/scripts/setup_ckan.sh b/consumer/tests/ckan/setup.sh similarity index 53% rename from example/scripts/setup_ckan.sh rename to consumer/tests/ckan/setup.sh index 6d050ed..9df0b4f 100755 --- a/example/scripts/setup_ckan.sh +++ b/consumer/tests/ckan/setup.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org +# Copyright (C) 2019 by eHealth Africa : http://www.eHealthAfrica.org # # See the NOTICE file distributed with this work for additional information # regarding copyright ownership. @@ -20,25 +20,36 @@ # set -Eeuo pipefail -pushd ckan +source scripts/lib.sh || \ + ( echo -e "\033[91mRun this script from root folder\033[0m" && \ + exit 1 ) +source .env -{ - docker network create aether_internal -} || { # catch - echo "aether_internal is ready." -} +create_docker_assets + +docker network create ckan_bootstrap_net || true + +pushd ckan { # try - docker-compose -f docker-compose.yml build + docker-compose build --pull --force-rm } || { # catch echo 'not ready...' } -docker-compose -f docker-compose.yml up -d -until docker exec -it ckan /usr/local/bin/ckan-paster --plugin=ckan sysadmin -c /etc/ckan/production.ini add admin | tee ../creds.txt && echo "done" +docker-compose up -d + +retries=1 +until docker exec -it ckan_ckan_1 /usr/local/bin/ckan-paster --plugin=ckan sysadmin -c /etc/ckan/production.ini add admin | tee creds.txt && echo "done" do - echo "waiting for ckan container to be ready..." + echo "waiting for ckan container to be ready... $retries" sleep 5 + + ((retries++)) + if [[ $retries -gt 30 ]]; then + echo "It was not possible to start CKAN" + exit 1 + fi done popd diff --git a/example/ckan/solr/Dockerfile b/consumer/tests/ckan/solr/Dockerfile similarity index 100% rename from example/ckan/solr/Dockerfile rename to consumer/tests/ckan/solr/Dockerfile diff --git a/consumer/tests/ckan/solr/schema.xml b/consumer/tests/ckan/solr/schema.xml new file mode 100644 index 0000000..a42c649 --- /dev/null +++ b/consumer/tests/ckan/solr/schema.xml @@ -0,0 +1,188 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + index_id + text + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/consumer/tests/ckan/solr/solrconfig.xml b/consumer/tests/ckan/solr/solrconfig.xml new file mode 100644 index 0000000..fad3704 --- /dev/null +++ b/consumer/tests/ckan/solr/solrconfig.xml @@ -0,0 +1,343 @@ + + + + + + 6.0.0 + + + + + + + + + + + ${solr.data.dir:} + + + + + + + ${solr.lock.type:native} + + + + + + + + ${solr.ulog.dir:} + ${solr.ulog.numVersionBuckets:65536} + + + + ${solr.autoCommit.maxTime:15000} + false + + + + ${solr.autoSoftCommit.maxTime:-1} + + + + + + + 1024 + + + + + true + 20 + 200 + + + + + + + + + + false + 2 + + + + + + + + + + + + + + explicit + 10 + + + + + + + + explicit + json + true + + + + + + + + explicit + + + + + + + + _text_ + + + + + + + + add-unknown-fields-to-the-schema + + + + + + + + true + ignored_ + _text_ + + + + + + + + + + + explicit + true + + + + + + + text_general + + + default + _text_ + solr.DirectSolrSpellChecker + internal + 0.5 + 2 + 1 + 5 + 4 + 0.01 + + + + + + + + default + on + true + 10 + 5 + 5 + true + true + 10 + 5 + + + + spellcheck + + + + + + + + + + true + + + tvComponent + + + + + + + + + + true + false + + + + terms + + + + + + + string + elevate.xml + + + + + + + explicit + + + elevator + + + + + + + + + + + 100 + + + + + + 70 + 0.5 + [-\w ,/\n\"']{20,200} + + + + + + ]]> + ]]> + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + + 10 + .,!? + + + + + + WORD + en + US + + + + + + + + + + + + + + + + [^\w-\.] + _ + + + + + + + yyyy-MM-dd'T'HH:mm:ss.SSSZ + yyyy-MM-dd'T'HH:mm:ss,SSSZ + yyyy-MM-dd'T'HH:mm:ss.SSS + yyyy-MM-dd'T'HH:mm:ss,SSS + yyyy-MM-dd'T'HH:mm:ssZ + yyyy-MM-dd'T'HH:mm:ss + yyyy-MM-dd'T'HH:mmZ + yyyy-MM-dd'T'HH:mm + yyyy-MM-dd HH:mm:ss.SSSZ + yyyy-MM-dd HH:mm:ss,SSSZ + yyyy-MM-dd HH:mm:ss.SSS + yyyy-MM-dd HH:mm:ss,SSS + yyyy-MM-dd HH:mm:ssZ + yyyy-MM-dd HH:mm:ss + yyyy-MM-dd HH:mmZ + yyyy-MM-dd HH:mm + yyyy-MM-dd + + + + + + + text/plain; charset=UTF-8 + + + + ${velocity.template.base.dir:} + ${velocity.solr.resource.loader.enabled:true} + ${velocity.params.resource.loader.enabled:false} + + + + 5 + + + diff --git a/example/scripts/run_ckan.sh b/consumer/tests/ckan/start.sh similarity index 89% rename from example/scripts/run_ckan.sh rename to consumer/tests/ckan/start.sh index 22f2203..d63c9d9 100755 --- a/example/scripts/run_ckan.sh +++ b/consumer/tests/ckan/start.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org +# Copyright (C) 2019 by eHealth Africa : http://www.eHealthAfrica.org # # See the NOTICE file distributed with this work for additional information # regarding copyright ownership. @@ -21,5 +21,5 @@ set -Eeuo pipefail pushd ckan -docker-compose up +docker-compose up -d popd diff --git a/example/scripts/rebuild_ckan.sh b/consumer/tests/ckan/stop.sh similarity index 86% rename from example/scripts/rebuild_ckan.sh rename to consumer/tests/ckan/stop.sh index a687541..fdccacc 100755 --- a/example/scripts/rebuild_ckan.sh +++ b/consumer/tests/ckan/stop.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org +# Copyright (C) 2019 by eHealth Africa : http://www.eHealthAfrica.org # # See the NOTICE file distributed with this work for additional information # regarding copyright ownership. @@ -21,7 +21,6 @@ set -Eeuo pipefail pushd ckan - -docker-compose -f docker-compose.yml build - +docker-compose kill +docker-compose down popd diff --git a/example/ckan/svc/ckan/run b/consumer/tests/ckan/svc/ckan/run similarity index 100% rename from example/ckan/svc/ckan/run rename to consumer/tests/ckan/svc/ckan/run diff --git a/example/ckan/svc/nginx/run b/consumer/tests/ckan/svc/nginx/run similarity index 100% rename from example/ckan/svc/nginx/run rename to consumer/tests/ckan/svc/nginx/run diff --git a/example/ckan/svc/postfix/run b/consumer/tests/ckan/svc/postfix/run similarity index 100% rename from example/ckan/svc/postfix/run rename to consumer/tests/ckan/svc/postfix/run diff --git a/example/scripts/run_aether.sh b/consumer/tests/ckan/wipe.sh similarity index 79% rename from example/scripts/run_aether.sh rename to consumer/tests/ckan/wipe.sh index 94af931..1ea4c84 100755 --- a/example/scripts/run_aether.sh +++ b/consumer/tests/ckan/wipe.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org +# Copyright (C) 2019 by eHealth Africa : http://www.eHealthAfrica.org # # See the NOTICE file distributed with this work for additional information # regarding copyright ownership. @@ -18,8 +18,11 @@ # specific language governing permissions and limitations # under the License. # -set -Eeuo pipefail +set -Euo pipefail -pushd aether-bootstrap/ -./scripts/run.sh & +pushd ckan +docker-compose kill +docker-compose down -v popd + +docker network rm ckan_bootstrap_net || true diff --git a/consumer/tests/conf/consumer.json b/consumer/tests/conf/consumer.json new file mode 100644 index 0000000..ea3b884 --- /dev/null +++ b/consumer/tests/conf/consumer.json @@ -0,0 +1,5 @@ +{ + "url": "ckan:5000", + "name": "CKAN_CONSUMER", + "log_level": "debug" +} diff --git a/consumer/tests/conf/kafka.json b/consumer/tests/conf/kafka.json new file mode 100644 index 0000000..6912228 --- /dev/null +++ b/consumer/tests/conf/kafka.json @@ -0,0 +1,12 @@ +{ + "auto.offset.reset" : "earliest", + "group.id": "dev.temporary", + "enable.auto.commit": false, + "auto.commit.interval.ms": 2500, + "aether_emit_flag_required" : false, + "aether_masking_schema_levels" : ["false", "true"], + "aether_masking_schema_emit_level": "false", + "heartbeat.interval.ms": 2500, + "session.timeout.ms": 18000, + "request.timeout.ms": 20000 +} diff --git a/consumer/tests/test_job_consumer.py b/consumer/tests/test_job_consumer.py new file mode 100644 index 0000000..abb1ee4 --- /dev/null +++ b/consumer/tests/test_job_consumer.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python + +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import pytest +import requests +import json +from time import sleep + +from . import * # noqa +from . import ( # noqa # for the linter + CKANConsumer, + RequestClientT1, + RequestClientT2, + URL +) + +from aet.logger import get_logger +from app.fixtures import examples + +LOG = get_logger('TEST') + + +''' + API Tests +''' + + +@pytest.mark.unit +def test__consumer_add_delete_respect_tenants(CKANConsumer, RequestClientT1, RequestClientT2): + res = RequestClientT1.post(f'{URL}/ckan/add', json=examples.CKAN_INSTANCE) + assert(res.json() is True) + res = RequestClientT1.get(f'{URL}/ckan/list') + assert(res.json() != []) + res = RequestClientT2.get(f'{URL}/ckan/list') + assert(res.json() == []) + res = RequestClientT1.delete(f'{URL}/ckan/delete?id=ckan-id') + assert(res.json() is True) + res = RequestClientT1.get(f'{URL}/ckan/list') + assert(res.json() == []) + + +@pytest.mark.parametrize('example,endpoint', [ + (examples.CKAN_INSTANCE, 'ckan'), + (examples.JOB, 'job') +]) +@pytest.mark.unit +def test__api_validate(CKANConsumer, RequestClientT1, example, endpoint): + res = RequestClientT1.post(f'{URL}/{endpoint}/validate', json=example) + assert(res.json().get('valid') is True), str(res.text) + + +@pytest.mark.parametrize('example,endpoint', [ + (examples.CKAN_INSTANCE, 'ckan'), + (examples.JOB, 'job') +]) +@pytest.mark.unit +def test__api_validate_pretty(CKANConsumer, RequestClientT1, example, endpoint): + res = RequestClientT1.post(f'{URL}/{endpoint}/validate_pretty', json=example) + assert(res.json().get('valid') is True), str(res.text) + + +@pytest.mark.parametrize('endpoint', [ + ('ckan'), + ('job') +]) +@pytest.mark.unit +def test__api_describe_assets(CKANConsumer, RequestClientT1, endpoint): + res = RequestClientT1.get(f'{URL}/{endpoint}/describe') + assert(res.json() is not None), str(res.text) + + +@pytest.mark.parametrize('endpoint', [ + ('ckan'), + ('job') +]) +@pytest.mark.unit +def test__api_get_schema(CKANConsumer, RequestClientT1, endpoint): + res = RequestClientT1.get(f'{URL}/{endpoint}/get_schema') + assert(res.json() is not None), str(res.text) + + +@pytest.mark.unit +def test__api_resource_instance(CKANConsumer, RequestClientT1): + doc_id = examples.CKAN_INSTANCE.get("id") + res = RequestClientT1.post(f'{URL}/ckan/add', json=examples.CKAN_INSTANCE) + assert(res.json() is True) + res = RequestClientT1.get(f'{URL}/ckan/list') + assert(doc_id in res.json()) + res = RequestClientT1.get(f'{URL}/ckan/test_connection?id={doc_id}') + assert(res.json().get('success') is True) + + res = RequestClientT1.delete(f'{URL}/ckan/delete?id={examples.CKAN_INSTANCE.get("id")}') + assert(res.json() is True) + + +@pytest.mark.integration +def test__api_resource_ckan(CKANConsumer, RequestClientT1): + doc_id = examples.CKAN_INSTANCE.get('id') + res = RequestClientT1.post(f'{URL}/ckan/add', json=examples.CKAN_INSTANCE) + assert(res.json() is True) + res = RequestClientT1.get(f'{URL}/ckan/list') + assert(doc_id in res.json()) + res = RequestClientT1.get(f'{URL}/ckan/test_connection?id={doc_id}') + res.raise_for_status() + assert(res.json().get('success') is True) + + +@pytest.mark.integration +def test__api_job_and_resource_create(CKANConsumer, RequestClientT1): + doc_id = examples.JOB.get('id') + res = RequestClientT1.post(f'{URL}/ckan/add', json=examples.CKAN_INSTANCE) + assert(res.json() is True) + res = RequestClientT1.post(f'{URL}/subscription/add', json=examples.SUBSCRIPTION) + assert(res.json() is True) + + res = RequestClientT1.post(f'{URL}/job/add', json=examples.JOB) + assert(res.json() is True) + + sleep(.25) # take a few MS for the job to be started + + +@pytest.mark.integration +def test__api_job_and_resource_public_endpoints(CKANConsumer, RequestClientT1): + doc_id = examples.JOB.get('id') + res = RequestClientT1.get(f'{URL}/job/list_subscribed_topics?id={doc_id}') + res.raise_for_status() + topics = res.json() + LOG.debug(topics) + assert(TEST_TOPIC not in topics) + sleep(60) + res = RequestClientT1.get(f'{URL}/job/get_logs?id={doc_id}') + res.raise_for_status() + logs = res.json() + assert(len(logs) > 0) + + +@pytest.mark.integration +def test__api_job_and_resource_delete(CKANConsumer, RequestClientT1): + doc_id = examples.JOB.get('id') + res = RequestClientT1.delete(f'{URL}/ckan/delete?id={examples.CKAN_INSTANCE.get("id")}') + assert(res.json() is True) + res = RequestClientT1.post(f'{URL}/subscription/delete?id={examples.SUBSCRIPTION.get("id")}') + assert(res.json() is True) + + res = RequestClientT1.post(f'{URL}/job/delete?id={doc_id}', json=examples.JOB) + assert(res.json() is True) diff --git a/consumer/tests/test_unit.py b/consumer/tests/test_unit.py new file mode 100644 index 0000000..b68ad9a --- /dev/null +++ b/consumer/tests/test_unit.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import pytest + +from . import ( + KAFKA_CONFIG, + KAFKA_ADMIN_CONFIG, + CONSUMER_CONFIG, + CKANConsumer, + AUTOGEN_SCHEMA, + TEST_LIST_SIMPLE_SCHEMAS, + TEST_SIMPLE_SCHEMA, + ) # noqa # get all test assets from tests/__init__.py + +from app.fixtures import examples +from app.utils import ( + extract_fields_from_schema, + prepare_fields_for_resource, + is_field_primitive_type, + avroToPostgresPrimitiveTypes, +) +from app.artifacts import ( + CKANInstance +) + +# Test Suite contains both unit and integration tests +# Unit tests can be run on their own from the root directory +# enter the bash environment for the version of python you want to test +# for example for python 3 +# `docker-compose run consumer-sdk-test bash` +# then start the unit tests with +# `pytest -m unit` +# to run integration tests / all tests run the test_all.sh script from the /tests directory. + +@pytest.mark.unit +def test__get_config_alias(): + ckan_servers = KAFKA_CONFIG.get('bootstrap.servers') + assert(ckan_servers is not None) + args = KAFKA_CONFIG.copy() + bootstrap = 'bootstrap.servers'.upper() + assert(bootstrap in args) + assert(args.get(bootstrap) == os.environ.get('KAFKA_URL')) + assert(args.get('KAFKA_URL') is None) + + +@pytest.mark.unit +def test__utils(): + fields, definition_names = extract_fields_from_schema(TEST_SIMPLE_SCHEMA) + assert(len(fields) == 2) + assert(len(definition_names) == 0) + resource_types = prepare_fields_for_resource(fields, definition_names) + for _type in resource_types: + assert(_type['type'] not in avroToPostgresPrimitiveTypes) + + fields, definition_names = extract_fields_from_schema(TEST_LIST_SIMPLE_SCHEMAS) + assert(len(fields) == 3) + assert(len(definition_names) == 1) + assert('int' in fields[2]['type']) + resource_types = prepare_fields_for_resource(fields, definition_names) + for _type in resource_types: + assert(_type['type'] not in avroToPostgresPrimitiveTypes) + + fields, definition_names = extract_fields_from_schema(AUTOGEN_SCHEMA) + resource_types = prepare_fields_for_resource(fields, definition_names) + for _type in resource_types: + assert(_type['type'] not in avroToPostgresPrimitiveTypes) diff --git a/dev-requirements.txt b/dev-requirements.txt deleted file mode 100644 index 97b6ca3..0000000 --- a/dev-requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -nose==1.3.7 -pycodestyle==2.3.1 -mock==2.0.0 -pook==0.2.5 diff --git a/docker-compose-base.yml b/docker-compose-base.yml new file mode 100644 index 0000000..9958b2b --- /dev/null +++ b/docker-compose-base.yml @@ -0,0 +1,12 @@ +version: "2.1" +services: + + ckan-consumer-base: + build: consumer + image: aether-ckan-consumer + stdin_open: true + volumes: + - ./consumer:/code + environment: + PYTHONUNBUFFERED: 0 + command: start diff --git a/docker-compose-test.yml b/docker-compose-test.yml new file mode 100644 index 0000000..a0360b9 --- /dev/null +++ b/docker-compose-test.yml @@ -0,0 +1,85 @@ +version: "2.1" +services: + consumer-test: + extends: + file: docker-compose-base.yml + service: ckan-consumer-base + environment: + TENANCY_HEADER: x-oauth-realm + + CONSUMER_NAME: 'CKAN-TEST' + + EXPOSE_PORT: 9013 + LOG_LEVEL: "DEBUG" + CKAN_CONSUMER_CONFIG_PATH: "/code/tests/conf/consumer.json" + CKAN_CONSUMER_KAFKA_CONFIG_PATH: "/code/tests/conf/kafka.json" + CONNECT_RETRY_WAIT: 1 + STARTUP_CONNECTION_RETRY: 3 + + KAFKA_URL: kafka-test:29092 + SECURITY.PROTOCOL: SASL_SSL + SASL.MECHANISM: PLAIN + SASL.USERNAME: root + SASL.PASSWORD: 09049a938686107c15ed6748010e2f1b + + REDIS_DB: 2 + REDIS_HOST: localhost + REDIS_PORT: 6380 + REDIS_PASSWORD: redispassword + + ckan: + image: ehealthafrica/ckan:2.8.0-rc + environment: + # Defaults work with linked containers, change to use own Postgres, SolR, Redis or Datapusher + - DATASTORE_READONLY_PASSWORD=datastore + - DS_RO_PASS=datastore + - CKAN_SITE_URL=http://ckan:5000 + - CKAN_PORT=5000 + - CKAN_MAX_UPLOAD_SIZE_MB=20000 + - CKAN_SQLALCHEMY_URL=postgresql://ckan:ckan@ckanpg/ckan + - CKAN_DATASTORE_WRITE_URL=postgresql://ckan:ckan@ckanpg/datastore + - CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:datastore@ckanpg/datastore + - CKAN_SOLR_URL=http://solr:8983/solr/ckan + - CKAN_REDIS_URL=redis://user:redispassword@redis:6379/2 + - CKAN_DATAPUSHER_URL=http://datapusher:8800 + - POSTGRES_PASSWORD=db_password + volumes: + - ./consumer/tests/ckan/ckan:/etc/ckan + depends_on: + - ckanpg + - solr + - redis + ports: + - 5000:5000 + restart: on-failure + + datapusher: + image: clementmouchet/datapusher + ports: + - 8800:8800 + + ckanpg: + build: + context: ./consumer/tests/ckan + dockerfile: postgresql/Dockerfile + args: + - DS_RO_PASS=datastore + - POSTGRES_PASSWORD=ckan + environment: + - DS_RO_PASS=datastore + - POSTGRES_PASSWORD=ckan + + solr: + build: + context: ./consumer/tests/ckan + dockerfile: solr/Dockerfile + + redis: + image: redis:alpine + command: > + redis-server + --requirepass redispassword + --notify-keyspace-events KEA + --appendonly yes + --auto-aof-rewrite-percentage 100 + --auto-aof-rewrite-min-size 64mb diff --git a/docker-compose.test.yml b/docker-compose.test.yml deleted file mode 100644 index fbe6d48..0000000 --- a/docker-compose.test.yml +++ /dev/null @@ -1,8 +0,0 @@ -version: "3" -services: - test: - build: - context: . - dockerfile: Dockerfile.test - volumes: - - ./db/:/srv/app/db/ diff --git a/docker-compose.yml b/docker-compose.yml index 9aae5b9..f288451 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,25 +1,39 @@ -version: "3" +version: "2.1" networks: aether: external: - name: aether_internal - ckan-net: - external: - name: ckan_ckan_net + name: aether_bootstrap_net services: ckan-consumer: - build: . - image: aether-ckan-consumer - env_file: - - .env - # Temporary set to enable access to local Kafka from outside the container. + extends: + file: docker-compose-base.yml + service: ckan-consumer-base + environment: + CKAN_CONSUMER_CONFIG_PATH: "/code/conf/consumer/consumer.json" + CKAN_CONSUMER_KAFKA_CONFIG_PATH: "/code/conf/consumer/kafka.json" + LOG_LEVEL: "DEBUG" + EXPOSE_PORT: 9009 + CONSUMER_NAME: 'CKAN-CONSUMER' + + REDIS_DB: 2 + REDIS_HOST: redis + REDIS_PORT: 6379 + REDIS_PASSWORD: redispassword + networks: + - aether + ports: + - 9009:9009 + + redis: + image: redis:alpine + command: > + redis-server + --requirepass redispassword + --notify-keyspace-events KEA + --appendonly yes + --auto-aof-rewrite-percentage 100 + --auto-aof-rewrite-min-size 64mb networks: - aether - - ckan-net - environment: - - CONSUMER_CONF_DIR=/srv/app - volumes: - - ./config:/srv/app/config - - ./db/:/srv/app/db/ diff --git a/example/aether-bootstrap b/example/aether-bootstrap deleted file mode 160000 index 1c706af..0000000 --- a/example/aether-bootstrap +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1c706afa2199bab52be963e4962fda133db94685 diff --git a/example/assets/aether-walkthrough-microcensus.xls b/example/assets/aether-walkthrough-microcensus.xls deleted file mode 100644 index b51203ce37b9bb58110bf1e0be36fe2da8a28a1b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 23040 zcmeHv2UrwW*YNDJz)}z z>wai^n_wKh2_xc@%#1Km!?WNX6LnY<1RHJ;{v?yhXe2?v?Z3x=kp@13tQ@%wjpbLg zrzzZXA(=rkhhzcC5>hir%^_Jq0v8}`Ahm#G3&{>rOGx&RT0v?JsSTvIklI0N52*vB zj*uK6@gO-ua)Q(ek~1V1NUo6FAh|>84582m{gt4;CV=UltF)9$maq3JApfB|K_+InT-`ied83C9_7bD_;yO>BL5-2&+)QdOi zz6HT%At|JZnrX^I7}LfKrc1JjzQ8$y$ReUah6zo1T%!!|SJuC&{E+}Dr(#SDC$fQm z1|gtyKM1UlOz8wMQbGv#nHn550e7fB^2IjXRQ{;1OL8)3Gi+C=Kcu0E0qX7l-*$(Z z{r~e|f%T0bz+>&0Z5Ly7q%ct|l8BO}y#A9C$#kB-uWtY^P9#o~q>`i*3eepFmFYbk zao)ki>+Tg_QCXiqeS#Ki-VD7^^FYk4*=t_Di%zxPwwOzJxb1H61>8nR*nTlT7!vkT zp^&f-8V?Eks9BJ(pDKie{Zc6;^m%LORIu;Ceo0lY=;UN!BFP&mA;o~leC(%G`H_M| zVWw0hNu@Oad7+T0<{#iT< z21@EJ14w`NK>Pjm*-}j}@K5RXQ-%u(MS^At_rj>L1Q374R11gbJ+^`&Yfgb^>+%nw>BmN5Hl z3A@jfU=dx3!)Oe>Tv))xa0xc_X`hqS#$X9g7K;cp$c6LS64${m4AVf3^)@I6mgxG~ z51Ebs)w7Qh+5ehXmx;DExVp!E#H8CuVKW8x<$Cb*_25hN;8*IwZ`Fg} zt_QDAo@IKcQ{UgQx4O&6k!3 zmPQ!z?4ao2yoQovYvN(rPvdcYUS6IDs}`-b@Qk>G>-D|YKm=Q@ej?~QpCwWgVc0t8XiXqu*gEKyO(0^}I@mOuK*X?hFy1tQh+*sC z%e*EKF>D=NW&JD>{{X|*!PkjRAY#}$xK7msB8IJli{(uqV%R#orV%l09ml2-F>D@9VhSA^7M?Lg+ zJyW+Ob0FfaB!X(ARHv>sv=Jf=2UDM>5Rru#CgR%^BFwPPRJb1yp%t#PI@${NhX+&& z$3eToLfS4C0GTSk4++W9t#Db*3N07=&-p#mfL%c-l^19=Mnxl&<*}4PX}eN44I-s+ zdR$5Ns8o%}B!=$>0uf{I6x!`%vKpm&6s^CaqC!T!)}tgp#*@inl_jf8P3oK`8ys$jar8WN5HEJit-5 zeyv#nO03oxDPi%Xz(HW8gw>8iJ(QAQM1X*$sh%b57VstyW}5!KE<7(@SYjfXDfKJj zMiNEXC&>>Mr=zSH;SW<07lH@zn@5Df``<)ZL&g0+*z$~^DOnPx95T7#_Dh*D-cywz zWG*x5YQTMc(2-)u|bhBy8WTV5=d)hxAy2}(;nFb8um0EunBB}r}+mIY|&$z4oo&WJk8kv z8+A|f4|u(6mhRegWU|rWY3>HtsC$}!Ky`JsZZ-}~Haa}b(*PTFPxB9uH6-h1!(+11 z;c31G*r$Yi6#)4Cd9qwZ<`0b9-$=wf3D@3deBsO&ex82x4k*cGj> zuOx_m;*&H{2%oqkNZwak!UC0&D0mc;s3U`@x)%||lUzUDM4eSc!GD-UT^K~weTJSq zQN21-QWq6b@D?V~&J3dJ9s*T9PUx1@RYeqhgGtnjK~&uzV3+R7I^9IwR7AlOm_+>; zMAfYi+u?SE>mtg9{pK(P>mx`i;&#`xJgXh-kL?3fvm|BLXpEyTm|mkE@gC>ka*JET zdsdk8^%bNiW~U3JLQ!TBccvymw!O>(HXh@a65M*14tJ`pzNXah3yI=5SHS?M1QY`k z*nf|iIM`-^8q^`K24toIEn!>1ds~)XSjKMGRQds^BF9y627^MMO*yn)bV6f|V(PG&Wwi%3^VImwZ5`nw$ zHOaEt!glO%AS+@x49?*d(t-F2>6*if0a))DND7i*{~QlS?2M`#S;Ln&13@D6K$Iv2 zq@+jclvLoh{YFfn$)Q7-f@E|!TEcEl_^1oPryp$pv`}5A=kAii)thpr|h{K`5Ibkw<{~oit@-5x_DET8fzZ1OiDB zAc7eT=D>-gL{hSdo1pm!+Z>t~J|YDfaIFGoL=4M;cPZAsSM|AsSl3 zwjhECSVWG_mQn_u$Y9zMNJmpu);6(voW`)GVPZt6B%vXNBs8Rugoe=5g3;4P2&L&z zdp}J^EtXDE}4Q15*#31kU*vbVKMntwopta zQMc)2jv!MCQAv=MMGC}%%tR8xL85A~0S`)~0DYy7GEGkkn3CWh1aH~mQUy|8nlLkoCraii3L|fPIs_al zflLu*#*0a)nN*XAIK5kDR<~}jSUw1glVXx55oM5AsbsMzgXW(q%9fC+qVyzQmY9@4 zg+f42$wD$c$tztbkwQ#^8jvZZ*ikME#TqJ;LZU=e9Yl$V*;%0B5C;G3K!GSA$XWBj z6woj!)_xY)LXNy5(6uYfgQkj)&mqMU7!}6H!)sx;oUF7|;eh1WoTSX8elcXWq<>=A zfbMSa5Q-d+t^jh&4F_};CX4q2U^ulCAJmV4GRI#%2LWYHfO<{<(3RT;!~%s0lR#xc zK{`OgR1oY$$pm3l`LG2gN|foPLZpMAWC$mbNpOwCD-f333!HE=DS`87QUVPyQv|0d z5$4nSgn*k&W(d+rxEVm*fcqm1mq@rkNx8P{;ZCQjz&RE5Ll2U>V0=oJ1Xc}{Ahs+J zTlg?+3p@daU=#~2s~)aDV+#W#1;87nJ#fv2 z>sz?i!Zix)7!5Cp&cQX2YYzzs$PX|Nc17^NjBD0xnOaV>g=}Kz5F-xm^+U8H@$jbA zF$nrZdnj)U*D-xX&@3~h@gu-UlCj>CGSas%J{wi#Yu&$b;D`MCUV}?3Lb)yvzdl`X zdi}KHKd*4Muf9^TY2^FHhzle7Z06agdo~0#uIAqrA8|0h6g^W%R_;kEJl{TP4s z{KwZWk4`^)qoAT-THv^vs7jgL_%|UBY%kW#7&8Mjg5P3l>Z|PXraz8@o&v!>&^8HZ zi|G;{Z#>Uh^y`EwU(31AW*^BvpBrdvq=Ow2Ap4 z?yQZ^8n^WNv~$6`+e`o8*_UiJsk+lQtJ;5@v`HzR^Q0Ya-37c-En}bJtBQYzRs`+~YrW_F_>Yb=@GkH5sB?Vgd1KrG=Lx+QgvUL&*x30_|3@CK z=ImhI!dJGkJ~|U3!6_d7)t#EybQSzi?8l>P7qknvKjw39M(W?*kG73*$R{^MM?Y?J zZB#|yf^NeGja~hxcg)C=AAIMGU-PzNS7EaQ9PwF4&vpZOuEihxOjbI-4}ZMv&)A9I zji^X5*_~9}rt;j_zmLQnoARt~YV^n;QOJTV6XzV@c|QEBu3gLDs;ZaP4y<_a*CMwK z!_GF&>Q`+Uv+P#Eu~R3G_{}X<1LwnnpZoV5n%-iY=={H;8oVm*->eiv| zcm0Z)RmR&fuH>UT-=OqKznS8%= z(RUNl2c4xynYKLT)Guy-t$5w*n_07bdQ9&xdv||K(Aft(+x6l_ z+Z}^0%vkZIt1o}Y!|w0yIIro-J+bzD?VB{>yF>d0k}ItP#kpp zeDsHWQS8C|wZez>Yv&c+lbVh-Yxm|;^4{)AzdTrHd$W0d%9DvzXXcOn%xrzX+%j^&cRfDDtqBbg zE?RH2&{Fu5#qxQp9Dh1(J3MMx#ogbGcN}F+e*gEImBk@PCck^NGOqmQ;SX_vKgN>P z9o8oa&U}1v<2TvI+EzE8g-m~cx>z=F)6G*Bwz8*dX5==WEq^xL%ELS2#oKKTH$T3v zx>hcGd#y3P$DWDZd-&b9ZIO8U#_2z9cCxQxclhwa>{#hZ=RPNkLTw)8Z0>mGXZFqb zPJf3pi7E;ni(S)b^CKVsK` z*S+1wS=nqFo zwlZ}7O%2pen4JaJh{0EO+Po4O&byL4K-ZN-xc|+vl@P4Bl`KK)Q&HeSo6pt6% zV_I}N8vQEw`9a|;#}VCAy5xp#+T{AY_sq9ecYYEsHDya%S00IbRK*>=cgPWoiHCO{ zxPNTHW4|lK(Ozv^T`A_}`*<16zp-?3d4}t}FKq2t0b%FH@4w(RZ`4ztej~=#C3tMQ ze*Na2+3nW)Rs6kbxwAw1^nF)XKRn-|SeLT^*YeKe+q#30ZG%e=@yR z!urC+%5Ox`jneEJbze^G_DPl-w_D^B-{}9oN5P|Ur{;FNd$71VDz3>*^OW828=tt(<-&+ij6rS^V@9ufE zxxH7}^t)R-*oS&Gnt!%^vS&qRA^WRSU{=?FPQ|^v^QnTpc!`xGQoQN+T zM{?{|XL{_MTD&Q?c+OSho*9dMPt>}u^V-m^r%k2RZOhZhOQK4eB{=yty3gky{bKr4ladGR*B=;}R%rcc&%LXKv7gfK z9e=oQ$%|%#{>UN<>4W?!=J zW(!nkW_J5_!pMt0RBHO8X7i_GwbvyC~(4$kC)YR|B(7lDm!B(z7}KA_f=&47Ox%UHML_ha$edMohA-mZ~0xH zLoI&E>HhVc)a_G?x@H`=pX|DO2P^vKH%kg+hg;<2PxpFU++|1BDDywV$}X87dNVA@ z@=={&)DFujds_`1m1GxJ_4?p+PF!p0sD!|%Q-sm$2_xFB?xeboF0e$uY-Ul)ji zp0rF^Jj6ZH)p?k^x8HJcbGN}uO3!^W;$7a(;0=pn?>TMwKJ4d1=LR-r=dOLRk~Qro z=OYQ$Q|pK}%Z8T7c7F_RJ3qz8!G7$gm@rAhY-9KJr)q9>TDdiXb7suJgC~DeR-MK6 zf{iBn#^F#*?a(nQA38*n0CQlfdfFHCsWwbyKFA#CsUBF?F~Pv)E9rw0Y6-}gmV%sVBx(Dobf|`F7aW@iwltn zLqp;zOc(0xL7l&bGmFThreQQq7g!dDl_%Cr%z-HyZ0~{JUmDs{elsASdSVu0;b)n+ zL;!VM3~^JtcZd7gk7D+BoP9T>Gu-bCzdglnXMIQ{d?aeb!y3bEmU>R>m^z^ zbRuRsl!=K$g)nibXC@9ki-|+mWa3a~OdKpXXyH&->g7ICzDvMnU_5Auh2LB%AvQp$ zB|6UtAk+^HaR5Ti(2y}es1q780SL8$5K`j956cEzpgWu74=;0~|Iq@m~Ln^Qm zOZ0?ge!8R)NX4h5r}I!U)C@u>6QA-U8p7vrA0Q3kbEq2{QhyGm=ffg1orkX`dcsP) z0>bCe%kVj4kVeH1>&eI0pj00I)Im>JVpl+HfchvQ4nSdY2z9`RwojMDmrgwaRzS!P zmN6-srW*WETPl9IHr*5c=#pH}OHX+7tANldxJ-jmqlfXK3sFG$9ImI+dC0G&5>l@p zdSOrM*TAqO+OsFoPhQfT;@4jZsXvGI;8Qh7P>z< zKTv2Mue$mJjQ?G%G~+#O42%$pcyFX5-ZOC+ZJ0Qu!Nh^T z4y)u;NjhHoDj_V18#nMdGm56a96~GcVLJt#hq~v(h64qJG;wD`Jw&Rlp!%O-E06Xc z6^)=2L4;Cg`lL4+(b;HnJpPB#2>l^#5J9|x^zZ1D(kOM(6^$^Xi>GEZLj6&1P?XV# zts@$#&2 - exit 1 -} - -set_environment () { - export CKAN_SITE_ID=${CKAN_SITE_ID} - export CKAN_SITE_URL=${CKAN_SITE_URL} - export CKAN_SQLALCHEMY_URL=${CKAN_SQLALCHEMY_URL} - export CKAN_SOLR_URL=${CKAN_SOLR_URL} - export CKAN_REDIS_URL=${CKAN_REDIS_URL} - export CKAN_STORAGE_PATH=/var/lib/ckan - export CKAN_DATAPUSHER_URL=${CKAN_DATAPUSHER_URL} - export CKAN_DATASTORE_WRITE_URL=${CKAN_DATASTORE_WRITE_URL} - export CKAN_DATASTORE_READ_URL=${CKAN_DATASTORE_READ_URL} - export CKAN_SMTP_SERVER=${CKAN_SMTP_SERVER} - export CKAN_SMTP_STARTTLS=${CKAN_SMTP_STARTTLS} - export CKAN_SMTP_USER=${CKAN_SMTP_USER} - export CKAN_SMTP_PASSWORD=${CKAN_SMTP_PASSWORD} - export CKAN_SMTP_MAIL_FROM=${CKAN_SMTP_MAIL_FROM} - export CKAN_MAX_UPLOAD_SIZE_MB=${CKAN_MAX_UPLOAD_SIZE_MB} -} - -write_config () { - ckan-paster make-config --no-interactive ckan "$CONFIG" -} - -# If we don't already have a config file, bootstrap -if [ ! -e "$CONFIG" ]; then - write_config -fi - -# Get or create CKAN_SQLALCHEMY_URL -if [ -z "$CKAN_SQLALCHEMY_URL" ]; then - abort "ERROR: no CKAN_SQLALCHEMY_URL specified in docker-compose.yml" -fi - -if [ -z "$CKAN_SOLR_URL" ]; then - abort "ERROR: no CKAN_SOLR_URL specified in docker-compose.yml" -fi - -if [ -z "$CKAN_REDIS_URL" ]; then - abort "ERROR: no CKAN_REDIS_URL specified in docker-compose.yml" -fi - -if [ -z "$CKAN_DATAPUSHER_URL" ]; then - abort "ERROR: no CKAN_DATAPUSHER_URL specified in docker-compose.yml" -fi - -set_environment -ckan-paster --plugin=ckan datastore set-permissions -c "${CKAN_CONFIG}/production.ini" | sudo -u postgres psql --set ON_ERROR_STOP=1 -ckan-paster --plugin=ckan db init -c "${CKAN_CONFIG}/production.ini" -exec "$@" diff --git a/example/ckan/docker-compose.yml b/example/ckan/docker-compose.yml deleted file mode 100644 index 5b77a5c..0000000 --- a/example/ckan/docker-compose.yml +++ /dev/null @@ -1,86 +0,0 @@ -# docker-compose build && docker-compose up -d -# If "docker-compose logs ckan" shows DB not ready, run "docker-compose restart ckan" a few times. -version: "3" - -# TEST - -services: - ckan: - container_name: ckan - image: ckan/ckan:latest - # build: - # context: . - # args: - # - CKAN_SITE_URL=${CKAN_SITE_URL} - links: - - ckanpg - - solr - - redis - ports: - - "0.0.0.0:5000:5000" - environment: - # Defaults work with linked containers, change to use own Postgres, SolR, Redis or Datapusher - - DATASTORE_READONLY_PASSWORD=datastore - - DS_RO_PASS=datastore - - POSTGRES_PASSWORD=ckan - - CKAN_SITE_URL=http://localhost:5000 - - CKAN_PORT=5000 - - CKAN_MAX_UPLOAD_SIZE_MB=20000 - - CKAN_SQLALCHEMY_URL=postgresql://ckan:ckan@ckanpg/ckan - - CKAN_DATASTORE_WRITE_URL=postgresql://ckan:ckan@ckanpg/datastore - - CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:datastore@ckanpg/datastore - - CKAN_SOLR_URL=http://solr:8983/solr/ckan - - CKAN_REDIS_URL=redis://redis:6379/1 - - CKAN_DATAPUSHER_URL=http://datapusher:8800 - #- CKAN_MAX_UPLOAD_SIZE_MB=${CKAN_MAX_UPLOAD_SIZE_MB} - #- POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - #- DS_RO_PASS=${DATASTORE_READONLY_PASSWORD} - volumes: - - ./ckan:/etc/ckan - networks: - - aether - - ckan_net - restart: on-failure - - datapusher: - container_name: datapusher - image: clementmouchet/datapusher - ports: - - "8800:8800" - networks: - - ckan_net - - ckanpg: - container_name: ckanpg - build: - context: . - dockerfile: postgresql/Dockerfile - args: - - DS_RO_PASS=datastore - - POSTGRES_PASSWORD=ckan - environment: - - DS_RO_PASS=datastore - - POSTGRES_PASSWORD=ckan - networks: - - ckan_net - - solr: - container_name: solr - build: - context: . - dockerfile: solr/Dockerfile - networks: - - ckan_net - - redis: - container_name: redis - image: redis:latest - networks: - - ckan_net - -networks: - aether: - external: - name: aether_internal - ckan_net: - driver: bridge diff --git a/example/ckan/solr/schema.xml b/example/ckan/solr/schema.xml deleted file mode 100644 index 8e5018a..0000000 --- a/example/ckan/solr/schema.xml +++ /dev/null @@ -1,188 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -index_id -text - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/example/ckan/solr/solrconfig.xml b/example/ckan/solr/solrconfig.xml deleted file mode 100644 index 9ac620c..0000000 --- a/example/ckan/solr/solrconfig.xml +++ /dev/null @@ -1,343 +0,0 @@ - - - - - - 6.0.0 - - - - - - - - - - - ${solr.data.dir:} - - - - - - - ${solr.lock.type:native} - - - - - - - - ${solr.ulog.dir:} - ${solr.ulog.numVersionBuckets:65536} - - - - ${solr.autoCommit.maxTime:15000} - false - - - - ${solr.autoSoftCommit.maxTime:-1} - - - - - - - 1024 - - - - - true - 20 - 200 - - - - - - - - - - false - 2 - - - - - - - - - - - - - - explicit - 10 - - - - - - - - explicit - json - true - - - - - - - - explicit - - - - - - - - _text_ - - - - - - - - add-unknown-fields-to-the-schema - - - - - - - - true - ignored_ - _text_ - - - - - - - - - - - explicit - true - - - - - - - text_general - - - default - _text_ - solr.DirectSolrSpellChecker - internal - 0.5 - 2 - 1 - 5 - 4 - 0.01 - - - - - - - - default - on - true - 10 - 5 - 5 - true - true - 10 - 5 - - - - spellcheck - - - - - - - - - - true - - - tvComponent - - - - - - - - - - true - false - - - - terms - - - - - - - string - elevate.xml - - - - - - - explicit - - - elevator - - - - - - - - - - - 100 - - - - - - 70 - 0.5 - [-\w ,/\n\"']{20,200} - - - - - - ]]> - ]]> - - - - - - - - - - - - ,, - ,, - ,, - ,, - ,]]> - ]]> - - - - - - - 10 - .,!? - - - - - - WORD - en - US - - - - - - - - - - - - - - - - [^\w-\.] - _ - - - - - - - yyyy-MM-dd'T'HH:mm:ss.SSSZ - yyyy-MM-dd'T'HH:mm:ss,SSSZ - yyyy-MM-dd'T'HH:mm:ss.SSS - yyyy-MM-dd'T'HH:mm:ss,SSS - yyyy-MM-dd'T'HH:mm:ssZ - yyyy-MM-dd'T'HH:mm:ss - yyyy-MM-dd'T'HH:mmZ - yyyy-MM-dd'T'HH:mm - yyyy-MM-dd HH:mm:ss.SSSZ - yyyy-MM-dd HH:mm:ss,SSSZ - yyyy-MM-dd HH:mm:ss.SSS - yyyy-MM-dd HH:mm:ss,SSS - yyyy-MM-dd HH:mm:ssZ - yyyy-MM-dd HH:mm:ss - yyyy-MM-dd HH:mmZ - yyyy-MM-dd HH:mm - yyyy-MM-dd - - - - - - - text/plain; charset=UTF-8 - - - - ${velocity.template.base.dir:} - ${velocity.solr.resource.loader.enabled:true} - ${velocity.params.resource.loader.enabled:false} - - - - 5 - - - diff --git a/example/gather/docker-compose-base.yml b/example/gather/docker-compose-base.yml deleted file mode 100644 index fbc9765..0000000 --- a/example/gather/docker-compose-base.yml +++ /dev/null @@ -1,47 +0,0 @@ -version: "2.1" - -services: - - # --------------------------------- - # Gather container - # --------------------------------- - - gather-base: - image: ehealthafrica/gather:3.0.0-rc - stdin_open: true - tty: true - environment: - CAS_SERVER_URL: https://ums-dev.ehealthafrica.org - CSRF_COOKIE_DOMAIN: gather.local - HOSTNAME: gather.local - DEBUG: "true" - - DJANGO_SECRET_KEY: "T7cyDESETLHDz43xf3Spx9NN6UdbDkpeREfy6fwATcckcwXUUvnqfHcwyNLHrXAZ" - STATIC_ROOT: /code/gather/static - - ADMIN_PASSWORD: adminadmin - - AETHER_KERNEL_TOKEN: a2d6bc20ad16ec8e715f2f42f54eb00cbbea2d24 - AETHER_KERNEL_URL: http://kernel:8000 - AETHER_KERNEL_URL_TEST: http://kernel-test:9000 - - AETHER_MODULES: "kernel,odk," - - AETHER_ODK_TOKEN: a2d6bc20ad16ec8e715f2f42f54eb00cbbea2d24 - AETHER_ODK_URL: http://odk:8443 - AETHER_ODK_URL_TEST: http://odk-test:9443 - - RDS_DB_NAME: gather - RDS_HOSTNAME: db - RDS_PASSWORD: "" - RDS_PORT: 5432 - RDS_USERNAME: postgres - - CSV_HEADER_RULES: remove-prefix;payload.,remove-prefix;None.,replace;.;:; - CSV_HEADER_RULES_SEP: ; - CSV_MAX_ROWS_SIZE: 1000 - - WEB_SERVER_PORT: 8005 - ports: - - "8005:8005" - command: start_dev diff --git a/example/gather/docker-compose.yml b/example/gather/docker-compose.yml deleted file mode 100644 index 25abbad..0000000 --- a/example/gather/docker-compose.yml +++ /dev/null @@ -1,21 +0,0 @@ -version: "2.1" - -networks: - aether: - external: - name: aether_internal - -services: - -# --------------------------------- - # Gather container - # --------------------------------- - - gather: - extends: - file: docker-compose-base.yml - service: gather-base - networks: - aether: - aliases: - - gather.local diff --git a/example/scripts/stop_aether.sh b/example/scripts/stop_aether.sh deleted file mode 100755 index 51746aa..0000000 --- a/example/scripts/stop_aether.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org -# -# See the NOTICE file distributed with this work for additional information -# regarding copyright ownership. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -set -Eeuo pipefail - -pushd aether-bootstrap -scripts/kill_all.sh -popd diff --git a/example/scripts/stop_ckan.sh b/example/scripts/stop_ckan.sh deleted file mode 100755 index 4fbf3a7..0000000 --- a/example/scripts/stop_ckan.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org -# -# See the NOTICE file distributed with this work for additional information -# regarding copyright ownership. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -set -Eeuo pipefail - -cd ckan -docker-compose -f docker-compose.yml kill diff --git a/example/scripts/wipe_aether.sh b/example/scripts/wipe_aether.sh deleted file mode 100755 index f3ddb46..0000000 --- a/example/scripts/wipe_aether.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org -# -# See the NOTICE file distributed with this work for additional information -# regarding copyright ownership. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -set -Eeuo pipefail - -pushd aether-bootstrap -docker-compose -f docker-compose.yml down -docker-compose -f docker-compose-connect.yml down -sudo rm -r ./.persistent_data -popd -pushd gather -docker-compose -f docker-compose.yml down -popd diff --git a/example/scripts/wipe_ckan.sh b/example/scripts/wipe_ckan.sh deleted file mode 100755 index 05efd18..0000000 --- a/example/scripts/wipe_ckan.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org -# -# See the NOTICE file distributed with this work for additional information -# regarding copyright ownership. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -set -Euo pipefail - -pushd ckan -docker-compose -f docker-compose.yml kill -docker-compose -f docker-compose.yml down -popd -pushd ckan-consumer -docker-compose -f docker-compose.yml kill -docker-compose -f docker-compose.yml down -pushd db -sudo rm consumer.db -popd diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8f51aa4..0000000 --- a/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -aet.consumer==0.9.1 -jsonschema==2.6.0 -requests==2.18.4 -kafka-python==1.4.2 -avro==1.8.2 -sqlalchemy==1.2.0 -ckanapi==4.1 diff --git a/scripts/build_local.sh b/scripts/build_local.sh new file mode 100755 index 0000000..c13da7a --- /dev/null +++ b/scripts/build_local.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +set -Eeuo pipefail + +# Build docker images +IMAGE_REPO='ehealthafrica' +COMPOSE_PATH='docker-compose.yml' +VERSION='local' + +APP_NAME="ckan-consumer" +AETHER_APP="aether-${APP_NAME}" +echo "$AETHER_APP" +echo "version: $VERSION" +echo "Building Docker image ${IMAGE_REPO}/${AETHER_APP}:${VERSION}" +docker-compose -f $COMPOSE_PATH build --build-arg VERSION=$VERSION $APP_NAME + +docker tag ${AETHER_APP} "${IMAGE_REPO}/${AETHER_APP}:${VERSION}" diff --git a/scripts/release.sh b/scripts/release.sh index 3f745e2..58f761b 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org # # See the NOTICE file distributed with this work for additional information # regarding copyright ownership. @@ -31,8 +31,7 @@ set -Eeuo pipefail IMAGE_REPO='ehealthafrica' CORE_APPS=( ckan-consumer ) CORE_COMPOSE='docker-compose.yml' -VERSION=`cat VERSION` -TRAVIS_COMMIT=`git rev-parse HEAD` +VERSION=$TRAVIS_TAG release_app () { APP_NAME=$1 @@ -40,7 +39,6 @@ release_app () { AETHER_APP="aether-${1}" echo "$AETHER_APP" echo "version: $VERSION" - echo "TRAVIS_COMMIT: $TRAVIS_COMMIT" echo "Building Docker image ${IMAGE_REPO}/${AETHER_APP}:${VERSION}" docker-compose -f $COMPOSE_PATH build --build-arg VERSION=$VERSION $APP_NAME @@ -50,11 +48,6 @@ release_app () { } -if [ -z "$TRAVIS_TAG" ]; -then - VERSION=${VERSION}-rc -fi - for APP in "${CORE_APPS[@]}" do release_app $APP $CORE_COMPOSE diff --git a/scripts/run_integration_tests.sh b/scripts/run_integration_tests.sh new file mode 100755 index 0000000..d96a1e7 --- /dev/null +++ b/scripts/run_integration_tests.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +set -Eeuo pipefail + +docker-compose -f ./docker-compose-test.yml run --rm consumer-test test_integration +docker-compose -f docker-compose-test.yml down \ No newline at end of file diff --git a/scripts/run_travis.sh b/scripts/run_travis.sh new file mode 100755 index 0000000..142e331 --- /dev/null +++ b/scripts/run_travis.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org +# +# See the NOTICE file distributed with this work for additional information +# regarding copyright ownership. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +set -Eeuo pipefail + +docker-compose -f ./docker-compose-test.yml up -d ckan +scripts/run_unit_tests.sh +scripts/run_integration_tests.sh +docker-compose -f ./docker-compose-test.yml down diff --git a/example/scripts/run_consumer.sh b/scripts/run_unit_tests.sh similarity index 82% rename from example/scripts/run_consumer.sh rename to scripts/run_unit_tests.sh index 6f9c312..a597bf7 100755 --- a/example/scripts/run_consumer.sh +++ b/scripts/run_unit_tests.sh @@ -1,6 +1,6 @@ -#!/usr/bin/env bash +#!/bin/bash # -# Copyright (C) 2018 by eHealth Africa : http://www.eHealthAfrica.org +# Copyright (C) 2020 by eHealth Africa : http://www.eHealthAfrica.org # # See the NOTICE file distributed with this work for additional information # regarding copyright ownership. @@ -20,5 +20,5 @@ # set -Eeuo pipefail -docker-compose -f ckan-consumer/docker-compose.yml up --build +docker-compose -f docker-compose-test.yml run --rm consumer-test test_unit diff --git a/tests/fixtures/config.json b/tests/fixtures/config.json deleted file mode 100644 index 1ca2914..0000000 --- a/tests/fixtures/config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "foo": 1 -} diff --git a/tests/fixtures/config.schema b/tests/fixtures/config.schema deleted file mode 100644 index 41a519a..0000000 --- a/tests/fixtures/config.schema +++ /dev/null @@ -1,6 +0,0 @@ -{ - "type": "object", - "properties": { - "foo": {"type": "number"} - } -} diff --git a/tests/fixtures/config_malformed.json b/tests/fixtures/config_malformed.json deleted file mode 100644 index a169842..0000000 --- a/tests/fixtures/config_malformed.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "foo: 1 -} diff --git a/tests/fixtures/config_not_valid.json b/tests/fixtures/config_not_valid.json deleted file mode 100644 index e63d37b..0000000 --- a/tests/fixtures/config_not_valid.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "foo": "bar" -} diff --git a/tests/test_config.py b/tests/test_config.py deleted file mode 100644 index 59f3efd..0000000 --- a/tests/test_config.py +++ /dev/null @@ -1,71 +0,0 @@ -import unittest -import os - -from consumer import config as Config - - -class TestConfig(unittest.TestCase): - def test_read_file(self): - dir_path = os.path.join(os.getcwd(), 'tests', 'fixtures') - file_name = 'config.json' - - contents = Config.read_file(dir_path, file_name) - - assert len(contents) > 0 - - def test_read_file_non_existing(self): - dir_path = os.path.join(os.getcwd(), 'tests', 'fixtures') - file_name = 'non_existing_file.json' - - with self.assertRaises(SystemExit) as context: - Config.read_file(dir_path, file_name) - - def test_parse_json_from_file(self): - dir_path = os.path.join(os.getcwd(), 'tests', 'fixtures') - file_name = 'config.json' - - contents = Config.parse_json_from_file( - dir_path, - file_name - ) - - assert type(contents) == dict - assert contents.get('foo') == 1 - - def test_parse_json_from_file_malformed(self): - dir_path = os.path.join(os.getcwd(), 'tests', 'fixtures') - file_name = 'config_malformed.json' - - with self.assertRaises(SystemExit) as context: - Config.parse_json_from_file(dir_path, file_name) - - def test_validate_config(self): - dir_path = os.path.join(os.getcwd(), 'tests', 'fixtures') - config_file = 'config.json' - schema_file = 'config.schema' - - config = Config.validate_config( - dir_path, - config_file, - schema_file - ) - - def test_validate_config_not_valid(self): - dir_path = os.path.join(os.getcwd(), 'tests', 'fixtures') - config_file = 'config_not_valid.json' - schema_file = 'config.schema' - - with self.assertRaises(SystemExit) as context: - Config.validate_config( - dir_path, - config_file, - schema_file - ) - - def test_get_config(self): - self.test_validate_config() - - config = Config.get_config() - - assert type(config) is dict - assert config.get('foo') == 1 diff --git a/tests/test_core/test_dataset_manager.py b/tests/test_core/test_dataset_manager.py deleted file mode 100644 index 1b3bb77..0000000 --- a/tests/test_core/test_dataset_manager.py +++ /dev/null @@ -1,62 +0,0 @@ -import unittest -import os - -import pook -import sqlalchemy - -from consumer.core.dataset_manager import DatasetManager -from consumer import db -from consumer.config import parse_json_from_file, validate_config - - -class TestDatasetManager(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super(TestDatasetManager, self).__init__(*args, **kwargs) - - url = 'sqlite://' - db.init(url) - - config = { - 'ckan_url': 'http://ckan-server1.com', - 'server_name': 'Test server', - 'dataset': { - 'metadata': { - 'title': 'Some title', - 'name': 'Some name', - 'notes': 'Some description', - }, - 'resources': [ - { - 'metadata': { - 'title': 'Sensor data', - 'description': 'Sensor data from wind turbines', - 'name': 'sensor-data' - }, - 'topics': [ - { - 'name': 'wind-turbines', - 'number_of_consumers': 1 - } - ] - } - ] - } - } - - self.dataset_manager = DatasetManager(None, config) - - def test_update_metadata_from_defaults(self): - dir_path = os.getcwd() - file_name = os.path.join('config', 'dataset_metadata.json') - default_metadata = parse_json_from_file(dir_path, file_name) - updated_metadata = self.dataset_manager.update_metadata_from_defaults( - { - 'author': 'Overwritten author', - 'maintainer': 'Overwritten author' - } - ) - - assert default_metadata.get('author') != updated_metadata.get('author') - assert default_metadata.get('maintainer') != \ - updated_metadata.get('maintainer') diff --git a/tests/test_core/test_process_manager.py b/tests/test_core/test_process_manager.py deleted file mode 100644 index 6147518..0000000 --- a/tests/test_core/test_process_manager.py +++ /dev/null @@ -1,85 +0,0 @@ -import unittest -import os -import signal - -from mock import Mock -import pook - -from consumer.core.process_manager import ProcessManager -from consumer.core.server_manager import ServerManager -from consumer import db - - -class TestProcessManager(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super(TestProcessManager, self).__init__(*args, **kwargs) - - self.process_manager = ProcessManager() - - url = 'sqlite://' - db.init(url) - - def test_run(self): - self.process_manager.listen_stop_signal = Mock() - self.process_manager.spawn_server_managers = Mock() - self.process_manager.run() - - assert self.process_manager.listen_stop_signal.called is True - assert self.process_manager.spawn_server_managers.called is True - - @pook.activate - def test_spawn_server_managers(self): - pook.get('http://ckan-server1.com/api/action/status_show') \ - .reply(200) \ - .json({'success': True}) - - pook.get('http://ckan-server2.com/api/action/status_show') \ - .reply(200) \ - .json({'success': True}) - - config = { - 'ckan_servers': [ - {'url': 'http://ckan-server1.com', 'datasets': []}, - {'url': 'http://ckan-server2.com', 'datasets': []} - ] - } - - self.process_manager.spawn_server_managers(config) - - assert len(self.process_manager.server_managers) == \ - len(config.get('ckan_servers')) - assert type(self.process_manager.server_managers[0]) is ServerManager - assert type(self.process_manager.server_managers[1]) is ServerManager - - @pook.activate - def test_spawn_server_managers_with_404(self): - pook.get('http://ckan-server1.com/api/action/status_show') \ - .reply(200) \ - .json({'success': True}) - - pook.get('http://ckan-server2.com/api/action/status_show') \ - .reply(404) \ - .json({'success': True}) - - config = { - 'ckan_servers': [ - {'url': 'http://ckan-server1.com', 'datasets': []}, - {'url': 'http://ckan-server2.com', 'datasets': []} - ] - } - - with self.assertRaises(SystemExit): - self.process_manager.spawn_server_managers(config) - - def test_on_stop_handler(self): - assert(True) - ''' - self.process_manager.spawn_server_managers = Mock() - self.process_manager.on_stop_handler = Mock() - self.process_manager.run() - - os.kill(os.getpid(), signal.SIGTERM) - - assert self.process_manager.on_stop_handler.called is True - ''' diff --git a/tests/test_core/test_resource_manager.py b/tests/test_core/test_resource_manager.py deleted file mode 100644 index 8468c6b..0000000 --- a/tests/test_core/test_resource_manager.py +++ /dev/null @@ -1,123 +0,0 @@ -import unittest - -from consumer import db -from consumer.core.resource_manager import ResourceManager - - -class TestResourceManager(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super(TestResourceManager, self).__init__(*args, **kwargs) - - url = 'sqlite://' - db.init(url) - - config = { - 'ckan_url': 'http://ckan-server1.com', - 'server_name': 'Test server', - 'resource': { - 'metadata': { - 'title': 'Sensor data', - 'description': 'Sensor data from wind turbines', - 'name': 'sensor-data' - }, - 'topics': [ - { - 'name': 'wind-turbines', - 'number_of_consumers': 1 - } - ] - }, - 'dataset': { - 'metadata': { - 'title': 'Some title', - 'name': 'Some name', - 'notes': 'Some description', - }, - 'resources': [ - { - 'metadata': { - 'title': 'Sensor data', - 'description': 'Sensor data from wind turbines', - 'name': 'sensor-data' - }, - 'topics': [ - { - 'name': 'wind-turbines', - 'number_of_consumers': 1 - } - ] - } - ] - } - } - - self.resource_manager = ResourceManager(None, config) - - def test_get_schema_changes_add_field(self): - schema = [ - {'id': 'name', 'type': 'text'}, - {'id': 'age', 'type': 'int4'}, - ] - - fields = [ - {'id': 'name', 'type': 'text'}, - {'id': 'age', 'type': 'int4'}, - {'id': 'gender', 'type': 'text'}, - ] - - schema_changes = self.resource_manager.get_schema_changes( - schema, fields - ) - - assert len(schema_changes) == 1 - assert schema_changes[0] == {'id': 'gender', 'type': 'text'} - - def test_get_schema_changes_remove_field(self): - schema = [ - {'id': 'name', 'type': 'text'}, - {'id': 'age', 'type': 'int4'}, - ] - - fields = [ - {'id': 'name', 'type': 'text'}, - ] - - schema_changes = self.resource_manager.get_schema_changes( - schema, fields - ) - - assert len(schema_changes) == 0 - - def test_get_schema_changes_same_fields(self): - schema = [ - {'id': 'name', 'type': 'text'}, - {'id': 'age', 'type': 'int4'}, - ] - - fields = [ - {'id': 'name', 'type': 'text'}, - {'id': 'age', 'type': 'int4'}, - ] - - schema_changes = self.resource_manager.get_schema_changes( - schema, fields - ) - - assert len(schema_changes) == 0 - - def test_convert_item_to_array(self): - self.resource_manager.schema = [ - {'id': 'name', 'type': 'text'}, - {'id': 'scores', 'type': '_int4'}, - ] - - records = [ - {'name': 'Aleksandar', 'scores': [10, 11]}, - {'name': 'Ana', 'scores': 8}, - ] - - records = self.resource_manager.convert_item_to_array(records) - - assert len(records) == 2 - assert records[1].get('scores') == [8] diff --git a/tests/test_core/test_server_manager.py b/tests/test_core/test_server_manager.py deleted file mode 100644 index 16dbd8a..0000000 --- a/tests/test_core/test_server_manager.py +++ /dev/null @@ -1,117 +0,0 @@ -import unittest - -import pook - -from consumer.core.server_manager import ServerManager -from consumer.core.dataset_manager import DatasetManager - - -class TestServerManager(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super(TestServerManager, self).__init__(*args, **kwargs) - - config = { - 'url': 'http://ckan-server1.com', - 'datasets': [] - } - - self.server_manager = ServerManager(None, config) - - @pook.activate - def test_check_server_availability(self): - pook.get('http://ckan-server1.com/api/action/status_show') \ - .reply(200) \ - .json({'success': True}) - - server_config = { - 'url': 'http://ckan-server1.com', - 'datasets': [] - } - - server_available = self.server_manager.check_server_availability( - server_config - ) - - assert server_available is True - - @pook.activate - def test_check_server_availability_with_404(self): - pook.get('http://ckan-server1.com/api/action/status_show') \ - .reply(404) \ - .json({'success': True}) - - server_config = { - 'url': 'http://ckan-server1.com', - 'datasets': [] - } - - server_available = self.server_manager.check_server_availability( - server_config - ) - - assert server_available is False - - @pook.activate - def test_check_server_availability_with_no_success(self): - pook.get('http://ckan-server1.com/api/action/status_show') \ - .reply(404) \ - .json({'success': False}) - - server_config = { - 'url': 'http://ckan-server1.com', - 'datasets': [] - } - - server_available = self.server_manager.check_server_availability( - server_config - ) - - assert server_available is False - - @pook.activate - def test_check_server_availability_with_no_json(self): - pook.get('http://ckan-server1.com/api/action/status_show') \ - .reply(404) \ - .body('i am a body') - - server_config = { - 'url': 'http://ckan-server1.com', - 'datasets': [] - } - - server_available = self.server_manager.check_server_availability( - server_config - ) - - assert server_available is False - - @pook.activate - def test_spawn_dataset_managers(self): - data = { - 'error': { - 'message': 'Not found', - '__type': 'Not Found Error' - } - } - - pook.post('http://ckan-server1.com/api/action/package_show') \ - .reply(200) \ - .json({'success': True, 'result': {}}) - - config = { - 'url': 'http://ckan-server1.com', - 'datasets': [{ - 'metadata': { - 'title': 'Some title', - 'name': 'Some name', - 'notes': 'Some description', - }, - 'resources': [], - }] - } - - self.server_manager.spawn_dataset_managers(config) - - assert len(self.server_manager.dataset_managers) == 1 - assert type(self.server_manager.dataset_managers[0]) is DatasetManager diff --git a/tests/test_core/test_topic_manager.py b/tests/test_core/test_topic_manager.py deleted file mode 100644 index 9a1ef53..0000000 --- a/tests/test_core/test_topic_manager.py +++ /dev/null @@ -1,247 +0,0 @@ -import unittest - -from consumer import db -from consumer.core.topic_manager import TopicManager - - -class TestTopicManager(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super(TestTopicManager, self).__init__(*args, **kwargs) - - url = 'sqlite://' - db.init(url) - - config = { - 'topic': { - 'name': 'test', - 'number_of_consumers': 1 - }, - 'server_name': 'Test server', - 'dataset_name': 'test', - 'resource_name': 'test', - } - - self.topic_manager = TopicManager(None, config) - - def test_extract_fields_from_schema(self): - schema = [ - { - "fields": [ - { - "jsonldPredicate": { - "_type": "@id", - "_id": "http://demo.eha.org/GeoLocation" - }, - "type": [ - "string", - { - "items": "string", - "type": "array" - } - ], - "name": "GeoLocation-id" - } - ], - "type": "record", - "name": "http://demo.eha.org/centralPoint" - }, - { - "fields": [ - { - "jsonldPredicate": { - "_type": "@id", - "_id": "http://demo.eha.org/GeoLocation" - }, - "type": [ - "string", - { - "items": "string", - "type": "array" - } - ], - "name": "GeoLocation-id" - } - ], - "type": "record", - "name": "http://demo.eha.org/location" - }, - { - "fields": [ - { - "jsonldPredicate": { - "_type": "@id", - "_id": "http://demo.eha.org/GeoLocation" - }, - "type": [ - "string", - { - "items": "string", - "type": "array" - } - ], - "name": "GeoLocation-id" - } - ], - "type": "record", - "name": "http://demo.eha.org/perimeter" - }, - { - "extends": "http://demo.eha.org/BaseModel", - "type": "record", - "name": "http://demo.eha.org/Place", - "aetherBaseSchema": True, - "fields": [ - { - "jsonldPredicate": "@id", - "type": "string", - "name": "id", - "inherited_from": "http://demo.eha.org/BaseModel" - }, - { - "jsonldPredicate": "http://demo.eha.org/centralPoint", - "type": [ - "null", - "http://demo.eha.org/centralPoint" - ], - "name": "centralPoint" - }, - { - "doc": "A description of the thing.", - "jsonldPredicate": "http://demo.eha.org/description", - "type": [ - "null", - "string", - { - "items": "string", - "type": "array" - } - ], - "name": "description" - }, - { - "doc": "A Name", - "jsonldPredicate": "http://demo.eha.org/name", - "type": [ - "null", - "string", - { - "items": "string", - "type": "array" - } - ], - "name": "name" - }, - { - "jsonldPredicate": "http://demo.eha.org/perimeter", - "type": [ - "null", - "http://demo.eha.org/perimeter" - ], - "name": "perimeter" - }, - { - "jsonldPredicate": "http://demo.eha.org/location", - "type": [ - "null", - "http://demo.eha.org/location" - ], - "name": "location" - } - ] - } - ] - - fields = self.topic_manager.extract_fields_from_schema(schema) - - assert len(fields) == 6 - assert fields[0] == {'type': 'string', 'name': 'id'} - assert fields[1] == { - 'type': ['null', 'http://demo.eha.org/centralPoint'], - 'name': 'centralPoint' - } - assert fields[2] == { - 'type': ['null', 'string', {'items': 'string', 'type': 'array'}], - 'name': 'description' - } - assert fields[3] == { - 'type': ['null', 'string', {'items': 'string', 'type': 'array'}], - 'name': 'name' - } - assert fields[4] == { - 'type': ['null', 'http://demo.eha.org/perimeter'], - 'name': 'perimeter' - } - assert fields[5] == { - 'type': ['null', 'http://demo.eha.org/location'], - 'name': 'location' - } - - def test_prepare_fields_for_resource(self): - fields = [ - {'type': 'string', 'name': 'string_field'}, - {'type': 'int', 'name': 'int_field'}, - {'type': 'boolean', 'name': 'boolean_field'}, - {'type': 'long', 'name': 'long_field'}, - {'type': 'float', 'name': 'float_field'}, - {'type': 'double', 'name': 'double_field'}, - {'type': 'bytes', 'name': 'bytes_field'}, - {'type': {'type': 'record'}, 'name': 'record_field'}, - {'type': {'type': 'map'}, 'name': 'map_field'}, - {'type': ['null', 'string', 'int'], 'name': 'union_field'}, - { - 'type': {'type': 'array', 'items': 'string'}, - 'name': 'array_field' - } - ] - - resource_fields = self.topic_manager.prepare_fields_for_resource( - fields - ) - - assert resource_fields[0] == {'type': 'text', 'id': 'string_field'} - assert resource_fields[1] == {'type': 'int4', 'id': 'int_field'} - assert resource_fields[2] == {'type': 'bool', 'id': 'boolean_field'} - assert resource_fields[3] == {'type': 'int8', 'id': 'long_field'} - assert resource_fields[4] == {'type': 'float4', 'id': 'float_field'} - assert resource_fields[5] == {'type': 'float8', 'id': 'double_field'} - assert resource_fields[6] == {'type': 'bytea', 'id': 'bytes_field'} - assert resource_fields[7] == {'type': 'json', 'id': 'record_field'} - assert resource_fields[8] == {'type': 'json', 'id': 'map_field'} - assert resource_fields[9] == {'type': 'text', 'id': 'union_field'} - assert resource_fields[10] == {'type': '_text', 'id': 'array_field'} - - def test_is_field_primitive_type(self): - assert self.topic_manager.is_field_primitive_type({ - 'type': 'string' - }) is True - assert self.topic_manager.is_field_primitive_type({ - 'type': 'boolean' - }) is True - assert self.topic_manager.is_field_primitive_type({ - 'type': 'int' - }) is True - assert self.topic_manager.is_field_primitive_type({ - 'type': 'long' - }) is True - assert self.topic_manager.is_field_primitive_type({ - 'type': 'float' - }) is True - assert self.topic_manager.is_field_primitive_type({ - 'type': 'double' - }) is True - assert self.topic_manager.is_field_primitive_type({ - 'type': 'bytes' - }) is True - assert self.topic_manager.is_field_primitive_type({ - 'type': 'record' - }) is False - assert self.topic_manager.is_field_primitive_type({ - 'type': 'map' - }) is False - assert self.topic_manager.is_field_primitive_type({ - 'type': 'array' - }) is False - assert self.topic_manager.is_field_primitive_type({ - 'type': ['null', 'int'] - }) is False diff --git a/tests/test_db.py b/tests/test_db.py deleted file mode 100644 index d036639..0000000 --- a/tests/test_db.py +++ /dev/null @@ -1,117 +0,0 @@ -import unittest -import uuid - -import sqlalchemy - -from consumer import db - - -def gen_uuid(): - return str(uuid.uuid4()) - - -class TestDB(unittest.TestCase): - def __init__(self, *args, **kwargs): - super(TestDB, self).__init__(*args, **kwargs) - self.db_url = 'sqlite://' - - @classmethod - def tearDownClass(cls): - engine = db.get_engine() - sql = sqlalchemy.text('DROP TABLE IF EXISTS resource;') - engine.execute(sql) - sql = sqlalchemy.text('DROP TABLE IF EXISTS ckan_server;') - engine.execute(sql) - - def test_init(self): - db.init(self.db_url) - - engine = db.get_engine() - created_tables = engine.table_names() - expected_tables = ['ckan_server', 'resource'] - - assert len(created_tables) == len(expected_tables) - assert set(created_tables).issuperset(set(expected_tables)) is True - - def test_get_session(self): - db.init(self.db_url) - - session = db.get_session() - - assert isinstance(session, sqlalchemy.orm.session.Session) is True - - def test_get_engine(self): - db.init(self.db_url) - - engine = db.get_engine() - - assert isinstance(engine, sqlalchemy.engine.Engine) is True - - def test_make_uuid(self): - uuid = db.make_uuid() - - assert len(uuid) == 36 - - def test_create_resource(self): - db.init(self.db_url) - - data = { - 'resource_name': gen_uuid(), - 'dataset_name': gen_uuid(), - 'ckan_server_id': gen_uuid(), - 'resource_id': gen_uuid() - } - - resource = db.Resource.create(**data) - - assert resource.resource_name == data.get('resource_name') - assert resource.dataset_name == data.get('dataset_name') - assert resource.ckan_server_id == data.get('ckan_server_id') - assert resource.resource_id == data.get('resource_id') - - def test_get_resource(self): - db.init(self.db_url) - - data = { - 'resource_name': gen_uuid(), - 'dataset_name': gen_uuid(), - 'ckan_server_id': gen_uuid(), - 'resource_id': gen_uuid() - } - - db.Resource.create(**data) - - resource = db.Resource.get( - resource_name=data.get('resource_name'), - ckan_server_id=data.get('ckan_server_id'), - dataset_name=data.get('dataset_name') - ) - - assert resource.resource_name == data.get('resource_name') - assert resource.dataset_name == data.get('dataset_name') - assert resource.ckan_server_id == data.get('ckan_server_id') - assert resource.resource_id == data.get('resource_id') - - def test_create_ckan_server(self): - db.init(self.db_url) - - data = { - 'ckan_server_url': gen_uuid() - } - - ckan_server = db.CkanServer.create(**data) - - assert ckan_server.ckan_server_url == data.get('ckan_server_url') - - def test_get_ckan_server(self): - db.init(self.db_url) - - data = { - 'ckan_server_url': gen_uuid() - } - - db.CkanServer.create(**data) - - ckan_server = db.CkanServer.get_by_url(**data) - - assert ckan_server.ckan_server_url == data.get('ckan_server_url') From c364b47b708a1939d36375654b3d19ce7acf9cb5 Mon Sep 17 00:00:00 2001 From: Obdulia Losantos Date: Tue, 15 Sep 2020 12:19:43 +0200 Subject: [PATCH 2/2] feat: use generic config path env var names (#4) --- consumer/app/config.py | 10 ++++++++-- docker-compose-base.yml | 2 +- docker-compose-test.yml | 6 +++--- docker-compose.yml | 6 +++--- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/consumer/app/config.py b/consumer/app/config.py index 00e2462..e25053c 100644 --- a/consumer/app/config.py +++ b/consumer/app/config.py @@ -33,8 +33,14 @@ def load_config(): - CONSUMER_CONFIG_PATH = os.environ.get('CKAN_CONSUMER_CONFIG_PATH', None) - KAFKA_CONFIG_PATH = os.environ.get('CKAN_CONSUMER_KAFKA_CONFIG_PATH', None) + # Use generic config path environment variable names + # but also keep old ones for legacy deployments + CONSUMER_CONFIG_PATH = os.environ.get( + 'CONSUMER_CONFIG_PATH', + os.environ.get('CKAN_CONSUMER_CONFIG_PATH', None)) + KAFKA_CONFIG_PATH = os.environ.get( + 'CONSUMER_KAFKA_CONFIG_PATH', + os.environ.get('CKAN_CONSUMER_KAFKA_CONFIG_PATH', None)) global consumer_config consumer_config = Settings(file_path=CONSUMER_CONFIG_PATH) global kafka_config diff --git a/docker-compose-base.yml b/docker-compose-base.yml index 9958b2b..2da1d60 100644 --- a/docker-compose-base.yml +++ b/docker-compose-base.yml @@ -1,4 +1,4 @@ -version: "2.1" +version: "2.4" services: ckan-consumer-base: diff --git a/docker-compose-test.yml b/docker-compose-test.yml index a0360b9..8ac712c 100644 --- a/docker-compose-test.yml +++ b/docker-compose-test.yml @@ -1,4 +1,4 @@ -version: "2.1" +version: "2.4" services: consumer-test: extends: @@ -11,8 +11,8 @@ services: EXPOSE_PORT: 9013 LOG_LEVEL: "DEBUG" - CKAN_CONSUMER_CONFIG_PATH: "/code/tests/conf/consumer.json" - CKAN_CONSUMER_KAFKA_CONFIG_PATH: "/code/tests/conf/kafka.json" + CONSUMER_CONFIG_PATH: "/code/tests/conf/consumer.json" + CONSUMER_KAFKA_CONFIG_PATH: "/code/tests/conf/kafka.json" CONNECT_RETRY_WAIT: 1 STARTUP_CONNECTION_RETRY: 3 diff --git a/docker-compose.yml b/docker-compose.yml index f288451..ef9577e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -version: "2.1" +version: "2.4" networks: aether: @@ -11,8 +11,8 @@ services: file: docker-compose-base.yml service: ckan-consumer-base environment: - CKAN_CONSUMER_CONFIG_PATH: "/code/conf/consumer/consumer.json" - CKAN_CONSUMER_KAFKA_CONFIG_PATH: "/code/conf/consumer/kafka.json" + CONSUMER_CONFIG_PATH: "/code/conf/consumer/consumer.json" + CONSUMER_KAFKA_CONFIG_PATH: "/code/conf/consumer/kafka.json" LOG_LEVEL: "DEBUG" EXPOSE_PORT: 9009 CONSUMER_NAME: 'CKAN-CONSUMER'