From ec005a2222b755db4a0c05216ea5f3ae7bf7b936 Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Thu, 12 Sep 2024 13:29:30 +0100 Subject: [PATCH 01/16] Add github action for testing notebooks --- .github/workflows/actions.yml | 50 ++++++++++++++++++++++++++++ tests/test-changed-notebooks.sh | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 .github/workflows/actions.yml create mode 100644 tests/test-changed-notebooks.sh diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml new file mode 100644 index 00000000..3728208c --- /dev/null +++ b/.github/workflows/actions.yml @@ -0,0 +1,50 @@ +name: Run tests + +on: + pull_request: + +jobs: + run_notebooks: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4.1.7 + + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v45 + # To compare changes between the current commit and the last pushed remote commit set `since_last_remote_commit: true`. e.g + # with: + # since_last_remote_commit: true + with: + files: | + **.ipynb + + - name: Start docker and test container + if: steps.changed-files.outputs.any_changed == 'true' + env: + ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} + run: | + chmod +x ./tests/test-changed-notebooks.sh + ./tests/test-changed-notebooks.sh + shell: bash + + - name: Test all changed files + if: steps.changed-files.outputs.any_changed == 'true' + env: + ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} + run: | + for file in ${ALL_CHANGED_FILES}; do + echo "$file" + docker exec jupyter papermill ../$file /dev/null --execution-timeout=1200 --log-level ERROR + done + + - name: Stop docker and test container + if: steps.changed-files.outputs.any_changed == 'true' + env: + ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} + run: | + docker compose --profile all-services down -v + shell: bash + + + diff --git a/tests/test-changed-notebooks.sh b/tests/test-changed-notebooks.sh new file mode 100644 index 00000000..2765ec0c --- /dev/null +++ b/tests/test-changed-notebooks.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# +# This script run notebook execution tests. +# Usage: +# test_notebooks.sh +# Parameters: +# (optional): path to notebook(s) to test +# +# The script will: +# - use docker-compose-local.yaml config to build jupyter-img and bring up the full stack, +# - Wait for services to report status +# - Run all notebooks found recursively within the specified , +# if no path is specified, it will use the current path and any notebooks in any subfolders +# + + + +retry() { + local action="$1" # action to run + local retries="${2:-10}" # max retries + local sleep_seconds="${3:-2}" # wait between tries + + exit_code=999 + + while [[ "$exit_code" -ne "0" && "$retries" -gt 0 ]]; do + #run action and consume output, no need to show it + echo " trying...[${action[@]}]" + output=`${action[@]}` + local exit_code=$? + echo " output: [${output}]" + echo " exit code:${exit_code}" + retries=$(($retries - 1)) + if [[ $exit_code -ne 0 ]]; then + sleep $sleep_seconds + fi + done +} + + + + +docker compose --profile all-services up -d +docker exec jupyter pip install papermill + +if [ $# -ge 1 ]; + then TEST_PATH=$1 +else + TEST_PATH=../notebooks/ +fi + +# check that druid is running +echo "Waiting for Druid readiness..." +retry 'curl http://localhost:8081/status' 50 2 +retry 'curl http://localhost:8082/status' 50 2 +retry 'curl http://localhost:8083/status' 50 2 +retry 'curl http://localhost:8091/status' 50 2 +retry 'curl http://localhost:8888/status' 50 2 +#echo "Waiting for Data Generator readiness..." +retry 'curl http://localhost:9999/jobs' 50 2 From 5d8a35d05234c9c272b795a7b798dd0b196f7aff Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Tue, 3 Sep 2024 14:36:45 +0100 Subject: [PATCH 02/16] Added spell checking, basic linting, and checks for broken links in markdown --- .pre-commit-config.yaml | 17 +++++++++++++++++ ignore-spelling-words.txt | 8 ++++++++ requirements.txt | 4 ++++ 3 files changed, 29 insertions(+) create mode 100644 .pre-commit-config.yaml create mode 100644 ignore-spelling-words.txt create mode 100644 requirements.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..428e67f8 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,17 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-json +- repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + name: codespell + description: Checks for common misspellings in text files. + entry: codespell --ignore-words=ignore-spelling-words.txt + language: python + types: [text] diff --git a/ignore-spelling-words.txt b/ignore-spelling-words.txt new file mode 100644 index 00000000..3104459b --- /dev/null +++ b/ignore-spelling-words.txt @@ -0,0 +1,8 @@ +EGE +MKE +MOT +SAV +AGS + +Rouge +Nome diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..c16e82f6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +pre-commit +black +flake +codespell From 7a0d6fb52e56aef19f8f8ef897ee65200f8b5fee Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Tue, 3 Sep 2024 14:40:06 +0100 Subject: [PATCH 03/16] Added pre-commit instructions to README.md --- README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 016b4f3e..af85b1f9 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ It contains: Suggestions or comments? Call into the [discussions](https://github.com/implydata/learn-druid/discussions). Found a problem or want to request a notebook? Raise an [issue](https://github.com/implydata/learn-druid/issues). Want to contribute? Raise a [PR](https://github.com/implydata/learn-druid/pulls). [Contributions](contributing.md) to this community resource are welcome! Contribute your own notebook on a topic that's not listed here, and check out the [issue](https://github.com/implydata/learn-druid/issues) list, where you'll find bugs and enhancement requests. - + Come meet your friendly Apache Druid [community](https://druid.apache.org/community) if you have any questions about the functionality you see here. ## Pre-requisites @@ -48,7 +48,7 @@ To use the "Learn Druid" Docker Compose, you need: To get started quickly: 1. Clone the repository: - + ```bash git clone https://github.com/implydata/learn-druid ``` @@ -130,6 +130,15 @@ The Learn Druid environment includes the following services: [**Apache Druid**](https://druid.apache.org/): The currently released version of Apache Druid by default. +## Contributing + +Please install and run the [pre-commit](https://pre-commit.com/) before raising PRs. + +```bash +pip install pre-commit +pre-commit install +``` + --- **This repository is not affiliated with, endorsed by, or otherwise associated with the Apache Software Foundation (ASF) or any of its projects. Apache, Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of ASF in the USA and other countries.** From 39b10e0d152251c467f2075e301a269378281f74 Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Thu, 12 Sep 2024 11:51:58 +0100 Subject: [PATCH 04/16] Moved contributing note to correct location --- README.md | 9 --------- contributing.md | 15 +++++++++++---- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index af85b1f9..e3a3e84b 100644 --- a/README.md +++ b/README.md @@ -130,15 +130,6 @@ The Learn Druid environment includes the following services: [**Apache Druid**](https://druid.apache.org/): The currently released version of Apache Druid by default. -## Contributing - -Please install and run the [pre-commit](https://pre-commit.com/) before raising PRs. - -```bash -pip install pre-commit -pre-commit install -``` - --- **This repository is not affiliated with, endorsed by, or otherwise associated with the Apache Software Foundation (ASF) or any of its projects. Apache, Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of ASF in the USA and other countries.** diff --git a/contributing.md b/contributing.md index cd85378b..2c1e8014 100644 --- a/contributing.md +++ b/contributing.md @@ -11,14 +11,21 @@ Here are some general guidelines on making a notebook. The [contributing](https://github.com/implydata/learn-druid/tree/main/notebooks/99-contributing) folder contains a notebook template as a starting point. You'll find boilerplate elements including: * Setting the connection to Druid, Kafka, and the data generator. -* Starter elements for ingesting from example datas sets or the data generator. +* Starter elements for ingesting from example data sets or the data generator. * Clean-up elements, like dropping tables, stopping streaming ingestion, and halting data generator jobs. -* Re-usable code elements that other contributors have found useful. +* Reusable code elements that other contributors have found useful. And don't forget that the template itself is open to contribution! ### Raise a PR +Please install and run the [pre-commit](https://pre-commit.com/) before raising PRs. + +```bash +pip install pre-commit +pre-commit install +``` + When you have a notebook and you're ready for feedback, it's a good idea to raise a draft PR first. Feel free to use the comments section to ask for initial feedback, or drop into the docs channel in the official Apache Druid Slack channel. And when it's ready to go, finalize your PR. Add reviewers, get formal feedback, make any necessary changes, etc. in the usual way. @@ -39,12 +46,12 @@ Use the same route to run a locally-built Docker image by using the appropriate ### Run automated tests on notebooks -Make sure that docker compose is down and all volumes have been deleted, then start tests with: +Make sure that docker compose is down and all volumes have been deleted, then start tests with: ```shell cd tests ./test-notebooks.sh - ``` + ``` To test single notebook: From b7c94050da586bd713983676aa6152f1322572e9 Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Thu, 19 Sep 2024 10:43:43 +0100 Subject: [PATCH 05/16] Added papermill test as a seperate option to nbmkae --- .github/workflows/actions.yml | 4 ++-- .gitignore | 1 + ...otebooks.sh => launch-test-environment.sh} | 0 tests/test-notebooks-papermill.sh | 22 +++++++++++++++++++ 4 files changed, 25 insertions(+), 2 deletions(-) rename tests/{test-changed-notebooks.sh => launch-test-environment.sh} (100%) create mode 100644 tests/test-notebooks-papermill.sh diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 3728208c..9fec19be 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -24,8 +24,8 @@ jobs: env: ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} run: | - chmod +x ./tests/test-changed-notebooks.sh - ./tests/test-changed-notebooks.sh + chmod +x ./tests/launch-test-environment.sh + ./tests/launch-test-environment.sh shell: bash - name: Test all changed files diff --git a/.gitignore b/.gitignore index 78e29ab7..854c24d8 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .ipynb_checkpoints .ipynb_checkpoints/* .DS_Store +output.ipynb diff --git a/tests/test-changed-notebooks.sh b/tests/launch-test-environment.sh similarity index 100% rename from tests/test-changed-notebooks.sh rename to tests/launch-test-environment.sh diff --git a/tests/test-notebooks-papermill.sh b/tests/test-notebooks-papermill.sh new file mode 100644 index 00000000..f9cd7e5e --- /dev/null +++ b/tests/test-notebooks-papermill.sh @@ -0,0 +1,22 @@ +bash launch-test-environment.sh + +skiplList=("notebooks/03-query/11-joins.ipynb" +"notebooks/03-query/08-functions-strings.ipynb" +"notebooks/03-query/07-functions-datetime.ipynb" +"notebooks/03-query/19-groupby-earliest.ipynb" +"notebooks/02-ingestion/13-native-transforms.ipynb" +"notebooks/02-ingestion/02-batch-ingestion.ipynb" +"notebooks/02-ingestion/12-spatial-dimensions.ipynb" +"notebooks/99-contributing/notebook-template.ipynb") + +ALL_CHANGED_FILES=$(find notebooks -type f -name '*.ipynb' | awk '!/\/\./') +for file in ${ALL_CHANGED_FILES}; do + echo "$file" + if [[ ! " ${skiplList[*]} " =~ [[:space:]]${file}[[:space:]] ]]; then + docker exec jupyter papermill ../$file output.ipynb --log-level ERROR + else + echo "Skipped" + fi +done + +docker compose --profile all-services down -v \ No newline at end of file From 6b1a0ae7d5da27d6e9462add0a3c5ce9604c1bc9 Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Tue, 3 Sep 2024 14:40:06 +0100 Subject: [PATCH 06/16] Added pre-commit instructions to README.md --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index e3a3e84b..af85b1f9 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,15 @@ The Learn Druid environment includes the following services: [**Apache Druid**](https://druid.apache.org/): The currently released version of Apache Druid by default. +## Contributing + +Please install and run the [pre-commit](https://pre-commit.com/) before raising PRs. + +```bash +pip install pre-commit +pre-commit install +``` + --- **This repository is not affiliated with, endorsed by, or otherwise associated with the Apache Software Foundation (ASF) or any of its projects. Apache, Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of ASF in the USA and other countries.** From 89119c0391c52645ae48b2b3c91d51148e27b970 Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Thu, 12 Sep 2024 11:51:58 +0100 Subject: [PATCH 07/16] Moved contributing note to correct location --- README.md | 9 --------- 1 file changed, 9 deletions(-) diff --git a/README.md b/README.md index af85b1f9..e3a3e84b 100644 --- a/README.md +++ b/README.md @@ -130,15 +130,6 @@ The Learn Druid environment includes the following services: [**Apache Druid**](https://druid.apache.org/): The currently released version of Apache Druid by default. -## Contributing - -Please install and run the [pre-commit](https://pre-commit.com/) before raising PRs. - -```bash -pip install pre-commit -pre-commit install -``` - --- **This repository is not affiliated with, endorsed by, or otherwise associated with the Apache Software Foundation (ASF) or any of its projects. Apache, Apache Druid, Druid, and the Druid logo are either registered trademarks or trademarks of ASF in the USA and other countries.** From f470ec7459fd68dcb3c5bd0adfc1a725147d8b9d Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Mon, 14 Oct 2024 16:30:38 +0100 Subject: [PATCH 08/16] remove 03-08 from skip list add delay --- notebooks/03-query/08-functions-strings.ipynb | 205 ++++++++++++++++-- tests/test-notebooks-papermill.sh | 9 +- 2 files changed, 196 insertions(+), 18 deletions(-) diff --git a/notebooks/03-query/08-functions-strings.ipynb b/notebooks/03-query/08-functions-strings.ipynb index 798c8714..6a3690c0 100644 --- a/notebooks/03-query/08-functions-strings.ipynb +++ b/notebooks/03-query/08-functions-strings.ipynb @@ -58,10 +58,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "c1ec783b-df3f-4168-9be2-cdc6ad3e33c2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Opening a connection to http://router:8888.\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'30.0.0'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import druidapi\n", "import os\n", @@ -95,10 +156,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "f52a94fb-d2e4-403f-ab10-84d3af7bf2c8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading data, status:[SUCCESS]: 100%|██████████| 100.0/100.0 [00:19<00:00, 5.20it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
PositionNameType
1__timeTIMESTAMP
2referrerVARCHAR
3event_typeVARCHAR
4event_subtypeVARCHAR
5cityVARCHAR
6osVARCHAR
7continentVARCHAR
8countryVARCHAR
9XXXXXcountryXXXXXVARCHAR
10browserVARCHAR
11sessionVARCHAR
12session_lengthBIGINT
13screenVARCHAR
14loaded_imageVARCHAR
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "sql='''\n", "REPLACE INTO \"example-koalas-strings\" OVERWRITE ALL\n", @@ -144,14 +241,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "dc4c2524-0eba-4bc6-84ed-da3a25aa5fbe", "metadata": {}, "outputs": [], "source": [ "import matplotlib\n", "import matplotlib.pyplot as plt\n", - "import pandas as pd" + "import pandas as pd\n", + "import time" ] }, { @@ -171,10 +269,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "b9abf5a4-e9a3-408e-ae40-05fb85057849", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "
events
8669
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "sql='''\n", "SELECT\n", @@ -201,10 +315,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "42d09b48-c2f5-465c-aed1-774ca818bc7e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "
google_referred_sessionsnot_google_referred_sessions
10311068
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "sql='''\n", "SELECT\n", @@ -229,10 +359,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "bbe2366a-f9d1-455c-bc91-4cf960da0a29", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
timesuspicious_errorssecure_suspicious_errors
2019-08-25T12:00:00.000Z00
2019-08-25T13:00:00.000Z00
2019-08-25T14:00:00.000Z20
2019-08-25T15:00:00.000Z00
2019-08-25T16:00:00.000Z00
2019-08-25T17:00:00.000Z32
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "sql='''\n", "SELECT\n", @@ -268,10 +419,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "f2234b60-8b02-460d-bddd-d9076f369a4f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
CITYosyrtnuoc
BRANTFORDwindowsadanaC
BULIMBAwindowsailartsuA
ENGLEWOODchrome ossetatS detinU
GALLATINwindowssetatS detinU
INDIANAPOLISandroidsetatS detinU
MANHUACUwindowslizarB
PFLUGERVILLEwindowssetatS detinU
RICHARDSONwindowssetatS detinU
SANTIAGO DE CALIwindowsaibmoloC
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "sql='''\n", "SELECT DISTINCT\n", @@ -549,6 +724,8 @@ "metadata": {}, "outputs": [], "source": [ + "time.sleep(100) # Give previous cell some time to complete\n", + "\n", "sql='''\n", "SELECT\n", " TRIM(LEADING 'X' FROM \"XXXXXcountryXXXXX\") AS \"leadingTrim\",\n", diff --git a/tests/test-notebooks-papermill.sh b/tests/test-notebooks-papermill.sh index f9cd7e5e..7454f9a6 100644 --- a/tests/test-notebooks-papermill.sh +++ b/tests/test-notebooks-papermill.sh @@ -1,7 +1,8 @@ -bash launch-test-environment.sh +#!/bin/bash + +bash ./launch-test-environment.sh skiplList=("notebooks/03-query/11-joins.ipynb" -"notebooks/03-query/08-functions-strings.ipynb" "notebooks/03-query/07-functions-datetime.ipynb" "notebooks/03-query/19-groupby-earliest.ipynb" "notebooks/02-ingestion/13-native-transforms.ipynb" @@ -16,7 +17,7 @@ for file in ${ALL_CHANGED_FILES}; do docker exec jupyter papermill ../$file output.ipynb --log-level ERROR else echo "Skipped" - fi + fi done -docker compose --profile all-services down -v \ No newline at end of file +docker compose --profile all-services down -v From 3776edbca7ec009e8ecfb4f7a8d89565828ec334 Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Mon, 14 Oct 2024 16:37:50 +0100 Subject: [PATCH 09/16] remove 02-02 from skip list add --- .../02-ingestion/02-batch-ingestion.ipynb | 4 +- notebooks/03-query/08-functions-strings.ipynb | 202 ++---------------- 2 files changed, 16 insertions(+), 190 deletions(-) diff --git a/notebooks/02-ingestion/02-batch-ingestion.ipynb b/notebooks/02-ingestion/02-batch-ingestion.ipynb index 7f1b7b2c..b5f10bc7 100644 --- a/notebooks/02-ingestion/02-batch-ingestion.ipynb +++ b/notebooks/02-ingestion/02-batch-ingestion.ipynb @@ -588,7 +588,7 @@ "metadata": {}, "outputs": [], "source": [ - "druid.datasources.drop(table_name, True)" + "druid.datasources.drop(\"example-clickstream-transforms\", True)" ] }, { @@ -626,7 +626,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.12.7" } }, "nbformat": 4, diff --git a/notebooks/03-query/08-functions-strings.ipynb b/notebooks/03-query/08-functions-strings.ipynb index 6a3690c0..56c7bfa0 100644 --- a/notebooks/03-query/08-functions-strings.ipynb +++ b/notebooks/03-query/08-functions-strings.ipynb @@ -58,71 +58,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "c1ec783b-df3f-4168-9be2-cdc6ad3e33c2", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Opening a connection to http://router:8888.\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'30.0.0'" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import druidapi\n", "import os\n", @@ -156,46 +95,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "f52a94fb-d2e4-403f-ab10-84d3af7bf2c8", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading data, status:[SUCCESS]: 100%|██████████| 100.0/100.0 [00:19<00:00, 5.20it/s]\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
PositionNameType
1__timeTIMESTAMP
2referrerVARCHAR
3event_typeVARCHAR
4event_subtypeVARCHAR
5cityVARCHAR
6osVARCHAR
7continentVARCHAR
8countryVARCHAR
9XXXXXcountryXXXXXVARCHAR
10browserVARCHAR
11sessionVARCHAR
12session_lengthBIGINT
13screenVARCHAR
14loaded_imageVARCHAR
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "sql='''\n", "REPLACE INTO \"example-koalas-strings\" OVERWRITE ALL\n", @@ -241,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "dc4c2524-0eba-4bc6-84ed-da3a25aa5fbe", "metadata": {}, "outputs": [], @@ -269,26 +172,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "b9abf5a4-e9a3-408e-ae40-05fb85057849", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "
events
8669
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "sql='''\n", "SELECT\n", @@ -315,26 +202,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "42d09b48-c2f5-465c-aed1-774ca818bc7e", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "
google_referred_sessionsnot_google_referred_sessions
10311068
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "sql='''\n", "SELECT\n", @@ -359,31 +230,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "bbe2366a-f9d1-455c-bc91-4cf960da0a29", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
timesuspicious_errorssecure_suspicious_errors
2019-08-25T12:00:00.000Z00
2019-08-25T13:00:00.000Z00
2019-08-25T14:00:00.000Z20
2019-08-25T15:00:00.000Z00
2019-08-25T16:00:00.000Z00
2019-08-25T17:00:00.000Z32
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "sql='''\n", "SELECT\n", @@ -419,34 +269,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "f2234b60-8b02-460d-bddd-d9076f369a4f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
CITYosyrtnuoc
BRANTFORDwindowsadanaC
BULIMBAwindowsailartsuA
ENGLEWOODchrome ossetatS detinU
GALLATINwindowssetatS detinU
INDIANAPOLISandroidsetatS detinU
MANHUACUwindowslizarB
PFLUGERVILLEwindowssetatS detinU
RICHARDSONwindowssetatS detinU
SANTIAGO DE CALIwindowsaibmoloC
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "sql='''\n", "SELECT DISTINCT\n", @@ -921,7 +747,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.12.7" } }, "nbformat": 4, From 887140cbc045a851d6a618f514fa84346424574c Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Mon, 14 Oct 2024 17:03:22 +0100 Subject: [PATCH 10/16] removed 02-13 from skip list added delay --- notebooks/02-ingestion/13-native-transforms.ipynb | 3 +++ tests/test-notebooks-papermill.sh | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/notebooks/02-ingestion/13-native-transforms.ipynb b/notebooks/02-ingestion/13-native-transforms.ipynb index 61e2570d..5cf2fd6d 100644 --- a/notebooks/02-ingestion/13-native-transforms.ipynb +++ b/notebooks/02-ingestion/13-native-transforms.ipynb @@ -767,6 +767,9 @@ "metadata": {}, "outputs": [], "source": [ + "import time\n", + "time.sleep(100) # Give previous cell some time to complete\n", + "\n", "time_now = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')\n", "\n", "sql=f'''\n", diff --git a/tests/test-notebooks-papermill.sh b/tests/test-notebooks-papermill.sh index 7454f9a6..ad966383 100644 --- a/tests/test-notebooks-papermill.sh +++ b/tests/test-notebooks-papermill.sh @@ -5,8 +5,6 @@ bash ./launch-test-environment.sh skiplList=("notebooks/03-query/11-joins.ipynb" "notebooks/03-query/07-functions-datetime.ipynb" "notebooks/03-query/19-groupby-earliest.ipynb" -"notebooks/02-ingestion/13-native-transforms.ipynb" -"notebooks/02-ingestion/02-batch-ingestion.ipynb" "notebooks/02-ingestion/12-spatial-dimensions.ipynb" "notebooks/99-contributing/notebook-template.ipynb") From 85f11fc0d29909329bfd0f86de434e3e137cab0e Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Tue, 15 Oct 2024 13:21:57 +0100 Subject: [PATCH 11/16] remove 03-07 from skip list added delay --- notebooks/03-query/07-functions-datetime.ipynb | 7 +++++-- tests/test-notebooks-papermill.sh | 3 +-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/notebooks/03-query/07-functions-datetime.ipynb b/notebooks/03-query/07-functions-datetime.ipynb index ff42b82b..4874cb37 100644 --- a/notebooks/03-query/07-functions-datetime.ipynb +++ b/notebooks/03-query/07-functions-datetime.ipynb @@ -148,7 +148,8 @@ "source": [ "import matplotlib\n", "import matplotlib.pyplot as plt\n", - "import pandas as pd" + "import pandas as pd\n", + "import time" ] }, { @@ -436,6 +437,8 @@ "metadata": {}, "outputs": [], "source": [ + "time.sleep(100) # Give previous cell some time to complete\n", + "\n", "sql='''\n", "SELECT\n", " __time AS \"start\",\n", @@ -446,7 +449,7 @@ "LIMIT 10\n", "'''\n", "\n", - "display.sql(sql)" + "display.sql(sql) " ] }, { diff --git a/tests/test-notebooks-papermill.sh b/tests/test-notebooks-papermill.sh index ad966383..9ad0f628 100644 --- a/tests/test-notebooks-papermill.sh +++ b/tests/test-notebooks-papermill.sh @@ -2,8 +2,7 @@ bash ./launch-test-environment.sh -skiplList=("notebooks/03-query/11-joins.ipynb" -"notebooks/03-query/07-functions-datetime.ipynb" +skiplList=("notebooks/03-query/11-joins.ipynb" #Hangs forever on users data generation, even when run manually, just broken? "notebooks/03-query/19-groupby-earliest.ipynb" "notebooks/02-ingestion/12-spatial-dimensions.ipynb" "notebooks/99-contributing/notebook-template.ipynb") From 2b8280b5b669020a3fc160b2fec00b0877f7e038 Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Tue, 15 Oct 2024 14:01:06 +0100 Subject: [PATCH 12/16] remove 02-12 from skip list fix bad import and add delay --- notebooks/02-ingestion/12-spatial-dimensions.ipynb | 7 +++++-- tests/test-notebooks-papermill.sh | 3 +-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/notebooks/02-ingestion/12-spatial-dimensions.ipynb b/notebooks/02-ingestion/12-spatial-dimensions.ipynb index 5af7c91b..e69dc441 100644 --- a/notebooks/02-ingestion/12-spatial-dimensions.ipynb +++ b/notebooks/02-ingestion/12-spatial-dimensions.ipynb @@ -73,6 +73,7 @@ "source": [ "import druidapi\n", "import os\n", + "import json\n", "\n", "if 'DRUID_HOST' not in os.environ.keys():\n", " druid_host=f\"http://localhost:8888\"\n", @@ -235,8 +236,6 @@ "metadata": {}, "outputs": [], "source": [ - "import json\n", - "\n", "spatial_index_spec = {\n", " \"type\": \"index_parallel\",\n", " \"spec\": {\n", @@ -365,6 +364,10 @@ "metadata": {}, "outputs": [], "source": [ + "import time\n", + "\n", + "time.sleep(100) # Give previous cell some time to complete\n", + "\n", "rectangular_filter_query = {\n", " \"queryType\": \"topN\",\n", " \"dataSource\": {\n", diff --git a/tests/test-notebooks-papermill.sh b/tests/test-notebooks-papermill.sh index 9ad0f628..2bc3e693 100644 --- a/tests/test-notebooks-papermill.sh +++ b/tests/test-notebooks-papermill.sh @@ -3,8 +3,7 @@ bash ./launch-test-environment.sh skiplList=("notebooks/03-query/11-joins.ipynb" #Hangs forever on users data generation, even when run manually, just broken? -"notebooks/03-query/19-groupby-earliest.ipynb" -"notebooks/02-ingestion/12-spatial-dimensions.ipynb" +"notebooks/03-query/19-groupby-earliest.ipynb" #Queries for session length column which has already been filtered out at ingestion? "notebooks/99-contributing/notebook-template.ipynb") ALL_CHANGED_FILES=$(find notebooks -type f -name '*.ipynb' | awk '!/\/\./') From 9131c07c7d718a7d4007265d010737a7a3014ac3 Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Tue, 15 Oct 2024 16:59:29 +0100 Subject: [PATCH 13/16] Fixed undropped table --- notebooks/02-ingestion/02-batch-ingestion.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/02-ingestion/02-batch-ingestion.ipynb b/notebooks/02-ingestion/02-batch-ingestion.ipynb index b5f10bc7..d8e28716 100644 --- a/notebooks/02-ingestion/02-batch-ingestion.ipynb +++ b/notebooks/02-ingestion/02-batch-ingestion.ipynb @@ -588,7 +588,7 @@ "metadata": {}, "outputs": [], "source": [ - "druid.datasources.drop(\"example-clickstream-transforms\", True)" + "druid.datasources.drop(\"example-kttm-transform-batch\", True)" ] }, { From 1cefeb422929dc3075f353b1440af82e3437cf40 Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Tue, 15 Oct 2024 17:02:11 +0100 Subject: [PATCH 14/16] remove 03-19 from skip list fix missing column --- notebooks/03-query/19-groupby-earliest.ipynb | 3 ++- tests/test-notebooks-papermill.sh | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/notebooks/03-query/19-groupby-earliest.ipynb b/notebooks/03-query/19-groupby-earliest.ipynb index 5de89516..57d94ee7 100644 --- a/notebooks/03-query/19-groupby-earliest.ipynb +++ b/notebooks/03-query/19-groupby-earliest.ipynb @@ -141,9 +141,10 @@ " \"country\",\n", " \"loaded_image\",\n", " \"os\",\n", + " \"session_length\",\n", " LATEST_BY(\"session_length\",TIME_PARSE(\"timestamp\")) \"latest_session_length\"\n", "FROM \"ext\"\n", - "GROUP BY 1,2,3,4,5,6,7\n", + "GROUP BY 1,2,3,4,5,6,7,8\n", "PARTITIONED BY DAY\n", "'''\n", "\n", diff --git a/tests/test-notebooks-papermill.sh b/tests/test-notebooks-papermill.sh index 2bc3e693..c0787207 100644 --- a/tests/test-notebooks-papermill.sh +++ b/tests/test-notebooks-papermill.sh @@ -3,8 +3,7 @@ bash ./launch-test-environment.sh skiplList=("notebooks/03-query/11-joins.ipynb" #Hangs forever on users data generation, even when run manually, just broken? -"notebooks/03-query/19-groupby-earliest.ipynb" #Queries for session length column which has already been filtered out at ingestion? -"notebooks/99-contributing/notebook-template.ipynb") +"notebooks/99-contributing/notebook-template.ipynb") #Contains invalid sql ALL_CHANGED_FILES=$(find notebooks -type f -name '*.ipynb' | awk '!/\/\./') for file in ${ALL_CHANGED_FILES}; do From a538d9ba50bdc6f5ce3f3692f0f1f9d8d68c6655 Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Tue, 15 Oct 2024 17:27:52 +0100 Subject: [PATCH 15/16] Add skip list to action --- .github/workflows/actions.yml | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 9fec19be..99cc41d6 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v4.1.7 - + - name: Get changed files id: changed-files uses: tj-actions/changed-files@v45 @@ -18,7 +18,7 @@ jobs: with: files: | **.ipynb - + - name: Start docker and test container if: steps.changed-files.outputs.any_changed == 'true' env: @@ -33,11 +33,18 @@ jobs: env: ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} run: | + skiplList=("notebooks/03-query/11-joins.ipynb" #Hangs forever on users data generation, even when run manually, just broken? + "notebooks/99-contributing/notebook-template.ipynb") #Contains invalid sql + for file in ${ALL_CHANGED_FILES}; do echo "$file" - docker exec jupyter papermill ../$file /dev/null --execution-timeout=1200 --log-level ERROR + if [[ ! " ${skiplList[*]} " =~ [[:space:]]${file}[[:space:]] ]]; then + docker exec jupyter papermill ../$file /dev/null --log-level ERROR + else + echo "Skipped" + fi done - + - name: Stop docker and test container if: steps.changed-files.outputs.any_changed == 'true' env: @@ -45,6 +52,3 @@ jobs: run: | docker compose --profile all-services down -v shell: bash - - - From 39646447431de2540dcbf5e8a2ad79dbd50e438c Mon Sep 17 00:00:00 2001 From: Peter Marshall Date: Thu, 31 Oct 2024 09:27:40 +0000 Subject: [PATCH 16/16] Revert "Add github action for testing notebooks" --- .github/workflows/actions.yml | 54 ----------------- .gitignore | 1 - .../02-ingestion/02-batch-ingestion.ipynb | 4 +- .../02-ingestion/12-spatial-dimensions.ipynb | 7 +-- .../02-ingestion/13-native-transforms.ipynb | 3 - .../03-query/07-functions-datetime.ipynb | 7 +-- notebooks/03-query/08-functions-strings.ipynb | 7 +-- notebooks/03-query/19-groupby-earliest.ipynb | 3 +- tests/launch-test-environment.sh | 59 ------------------- tests/test-notebooks-papermill.sh | 18 ------ 10 files changed, 9 insertions(+), 154 deletions(-) delete mode 100644 .github/workflows/actions.yml delete mode 100644 tests/launch-test-environment.sh delete mode 100644 tests/test-notebooks-papermill.sh diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml deleted file mode 100644 index 99cc41d6..00000000 --- a/.github/workflows/actions.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: Run tests - -on: - pull_request: - -jobs: - run_notebooks: - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@v4.1.7 - - - name: Get changed files - id: changed-files - uses: tj-actions/changed-files@v45 - # To compare changes between the current commit and the last pushed remote commit set `since_last_remote_commit: true`. e.g - # with: - # since_last_remote_commit: true - with: - files: | - **.ipynb - - - name: Start docker and test container - if: steps.changed-files.outputs.any_changed == 'true' - env: - ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} - run: | - chmod +x ./tests/launch-test-environment.sh - ./tests/launch-test-environment.sh - shell: bash - - - name: Test all changed files - if: steps.changed-files.outputs.any_changed == 'true' - env: - ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} - run: | - skiplList=("notebooks/03-query/11-joins.ipynb" #Hangs forever on users data generation, even when run manually, just broken? - "notebooks/99-contributing/notebook-template.ipynb") #Contains invalid sql - - for file in ${ALL_CHANGED_FILES}; do - echo "$file" - if [[ ! " ${skiplList[*]} " =~ [[:space:]]${file}[[:space:]] ]]; then - docker exec jupyter papermill ../$file /dev/null --log-level ERROR - else - echo "Skipped" - fi - done - - - name: Stop docker and test container - if: steps.changed-files.outputs.any_changed == 'true' - env: - ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} - run: | - docker compose --profile all-services down -v - shell: bash diff --git a/.gitignore b/.gitignore index 854c24d8..78e29ab7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,3 @@ .ipynb_checkpoints .ipynb_checkpoints/* .DS_Store -output.ipynb diff --git a/notebooks/02-ingestion/02-batch-ingestion.ipynb b/notebooks/02-ingestion/02-batch-ingestion.ipynb index d8e28716..7f1b7b2c 100644 --- a/notebooks/02-ingestion/02-batch-ingestion.ipynb +++ b/notebooks/02-ingestion/02-batch-ingestion.ipynb @@ -588,7 +588,7 @@ "metadata": {}, "outputs": [], "source": [ - "druid.datasources.drop(\"example-kttm-transform-batch\", True)" + "druid.datasources.drop(table_name, True)" ] }, { @@ -626,7 +626,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.11.6" } }, "nbformat": 4, diff --git a/notebooks/02-ingestion/12-spatial-dimensions.ipynb b/notebooks/02-ingestion/12-spatial-dimensions.ipynb index e69dc441..5af7c91b 100644 --- a/notebooks/02-ingestion/12-spatial-dimensions.ipynb +++ b/notebooks/02-ingestion/12-spatial-dimensions.ipynb @@ -73,7 +73,6 @@ "source": [ "import druidapi\n", "import os\n", - "import json\n", "\n", "if 'DRUID_HOST' not in os.environ.keys():\n", " druid_host=f\"http://localhost:8888\"\n", @@ -236,6 +235,8 @@ "metadata": {}, "outputs": [], "source": [ + "import json\n", + "\n", "spatial_index_spec = {\n", " \"type\": \"index_parallel\",\n", " \"spec\": {\n", @@ -364,10 +365,6 @@ "metadata": {}, "outputs": [], "source": [ - "import time\n", - "\n", - "time.sleep(100) # Give previous cell some time to complete\n", - "\n", "rectangular_filter_query = {\n", " \"queryType\": \"topN\",\n", " \"dataSource\": {\n", diff --git a/notebooks/02-ingestion/13-native-transforms.ipynb b/notebooks/02-ingestion/13-native-transforms.ipynb index 5cf2fd6d..61e2570d 100644 --- a/notebooks/02-ingestion/13-native-transforms.ipynb +++ b/notebooks/02-ingestion/13-native-transforms.ipynb @@ -767,9 +767,6 @@ "metadata": {}, "outputs": [], "source": [ - "import time\n", - "time.sleep(100) # Give previous cell some time to complete\n", - "\n", "time_now = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')\n", "\n", "sql=f'''\n", diff --git a/notebooks/03-query/07-functions-datetime.ipynb b/notebooks/03-query/07-functions-datetime.ipynb index 4874cb37..ff42b82b 100644 --- a/notebooks/03-query/07-functions-datetime.ipynb +++ b/notebooks/03-query/07-functions-datetime.ipynb @@ -148,8 +148,7 @@ "source": [ "import matplotlib\n", "import matplotlib.pyplot as plt\n", - "import pandas as pd\n", - "import time" + "import pandas as pd" ] }, { @@ -437,8 +436,6 @@ "metadata": {}, "outputs": [], "source": [ - "time.sleep(100) # Give previous cell some time to complete\n", - "\n", "sql='''\n", "SELECT\n", " __time AS \"start\",\n", @@ -449,7 +446,7 @@ "LIMIT 10\n", "'''\n", "\n", - "display.sql(sql) " + "display.sql(sql)" ] }, { diff --git a/notebooks/03-query/08-functions-strings.ipynb b/notebooks/03-query/08-functions-strings.ipynb index 56c7bfa0..798c8714 100644 --- a/notebooks/03-query/08-functions-strings.ipynb +++ b/notebooks/03-query/08-functions-strings.ipynb @@ -151,8 +151,7 @@ "source": [ "import matplotlib\n", "import matplotlib.pyplot as plt\n", - "import pandas as pd\n", - "import time" + "import pandas as pd" ] }, { @@ -550,8 +549,6 @@ "metadata": {}, "outputs": [], "source": [ - "time.sleep(100) # Give previous cell some time to complete\n", - "\n", "sql='''\n", "SELECT\n", " TRIM(LEADING 'X' FROM \"XXXXXcountryXXXXX\") AS \"leadingTrim\",\n", @@ -747,7 +744,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/notebooks/03-query/19-groupby-earliest.ipynb b/notebooks/03-query/19-groupby-earliest.ipynb index 57d94ee7..5de89516 100644 --- a/notebooks/03-query/19-groupby-earliest.ipynb +++ b/notebooks/03-query/19-groupby-earliest.ipynb @@ -141,10 +141,9 @@ " \"country\",\n", " \"loaded_image\",\n", " \"os\",\n", - " \"session_length\",\n", " LATEST_BY(\"session_length\",TIME_PARSE(\"timestamp\")) \"latest_session_length\"\n", "FROM \"ext\"\n", - "GROUP BY 1,2,3,4,5,6,7,8\n", + "GROUP BY 1,2,3,4,5,6,7\n", "PARTITIONED BY DAY\n", "'''\n", "\n", diff --git a/tests/launch-test-environment.sh b/tests/launch-test-environment.sh deleted file mode 100644 index 2765ec0c..00000000 --- a/tests/launch-test-environment.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash -# -# This script run notebook execution tests. -# Usage: -# test_notebooks.sh -# Parameters: -# (optional): path to notebook(s) to test -# -# The script will: -# - use docker-compose-local.yaml config to build jupyter-img and bring up the full stack, -# - Wait for services to report status -# - Run all notebooks found recursively within the specified , -# if no path is specified, it will use the current path and any notebooks in any subfolders -# - - - -retry() { - local action="$1" # action to run - local retries="${2:-10}" # max retries - local sleep_seconds="${3:-2}" # wait between tries - - exit_code=999 - - while [[ "$exit_code" -ne "0" && "$retries" -gt 0 ]]; do - #run action and consume output, no need to show it - echo " trying...[${action[@]}]" - output=`${action[@]}` - local exit_code=$? - echo " output: [${output}]" - echo " exit code:${exit_code}" - retries=$(($retries - 1)) - if [[ $exit_code -ne 0 ]]; then - sleep $sleep_seconds - fi - done -} - - - - -docker compose --profile all-services up -d -docker exec jupyter pip install papermill - -if [ $# -ge 1 ]; - then TEST_PATH=$1 -else - TEST_PATH=../notebooks/ -fi - -# check that druid is running -echo "Waiting for Druid readiness..." -retry 'curl http://localhost:8081/status' 50 2 -retry 'curl http://localhost:8082/status' 50 2 -retry 'curl http://localhost:8083/status' 50 2 -retry 'curl http://localhost:8091/status' 50 2 -retry 'curl http://localhost:8888/status' 50 2 -#echo "Waiting for Data Generator readiness..." -retry 'curl http://localhost:9999/jobs' 50 2 diff --git a/tests/test-notebooks-papermill.sh b/tests/test-notebooks-papermill.sh deleted file mode 100644 index c0787207..00000000 --- a/tests/test-notebooks-papermill.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -bash ./launch-test-environment.sh - -skiplList=("notebooks/03-query/11-joins.ipynb" #Hangs forever on users data generation, even when run manually, just broken? -"notebooks/99-contributing/notebook-template.ipynb") #Contains invalid sql - -ALL_CHANGED_FILES=$(find notebooks -type f -name '*.ipynb' | awk '!/\/\./') -for file in ${ALL_CHANGED_FILES}; do - echo "$file" - if [[ ! " ${skiplList[*]} " =~ [[:space:]]${file}[[:space:]] ]]; then - docker exec jupyter papermill ../$file output.ipynb --log-level ERROR - else - echo "Skipped" - fi -done - -docker compose --profile all-services down -v