From fae6373a78d6ee79a0314f9e474e09f1bcc25e26 Mon Sep 17 00:00:00 2001 From: Hugh Evans Date: Mon, 14 Oct 2024 16:37:50 +0100 Subject: [PATCH] remove 02-02 from skip list add --- .../02-ingestion/02-batch-ingestion.ipynb | 266 ++++++++++++++++-- 1 file changed, 238 insertions(+), 28 deletions(-) diff --git a/notebooks/02-ingestion/02-batch-ingestion.ipynb b/notebooks/02-ingestion/02-batch-ingestion.ipynb index 7f1b7b2..61e6652 100644 --- a/notebooks/02-ingestion/02-batch-ingestion.ipynb +++ b/notebooks/02-ingestion/02-batch-ingestion.ipynb @@ -56,10 +56,64 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "c1ec783b-df3f-4168-9be2-cdc6ad3e33c2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'30.0.0'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import druidapi\n", "import os\n", @@ -97,10 +151,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "fd47fcff-9055-4058-852f-6f5c61d07965", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading data, status:[SUCCESS]: 100%|██████████| 100.0/100.0 [00:09<00:00, 10.84it/s]\n" + ] + } + ], "source": [ "table_name = \"example-wikipedia-batch\"\n", "\n", @@ -135,10 +197,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "d2584e65-c952-47f1-a885-2e5d3a4fef2b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
channelnum_events
#en.wikipedia6650
#sh.wikipedia3969
#sv.wikipedia1867
#ceb.wikipedia1808
#de.wikipedia1357
#fr.wikipedia1328
#ru.wikipedia996
#it.wikipedia916
#es.wikipedia708
#ja.wikipedia472
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "sql = f'''\n", "SELECT channel, count(*) num_events\n", @@ -162,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "4963c87b-76e6-42af-a8ca-54d7a66f09a5", "metadata": {}, "outputs": [], @@ -192,10 +279,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "dd91d16b-1880-4a4c-8e8a-650075e26015", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading data, status:[SUCCESS]: 100%|██████████| 100.0/100.0 [00:08<00:00, 12.23it/s]\n" + ] + } + ], "source": [ "table_name = \"example-wikipedia-bigbatch\"\n", "\n", @@ -235,10 +330,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "f278c0d2-9fd5-4a7b-bb65-a6db7cf98971", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
channelnum_events
#en.wikipedia19950
#sh.wikipedia11907
#sv.wikipedia5601
#ceb.wikipedia5424
#de.wikipedia4071
#fr.wikipedia3984
#ru.wikipedia2988
#it.wikipedia2748
#es.wikipedia2124
#ja.wikipedia1416
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "sql = f'''\n", "SELECT channel, count(*) num_events\n", @@ -262,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "dc6b287c-be0e-48fa-ae39-e111ae9dd965", "metadata": {}, "outputs": [], @@ -290,10 +410,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "55313e69-e63e-47d9-917c-2da87926ab73", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading data, status:[SUCCESS]: 100%|██████████| 100.0/100.0 [00:10<00:00, 9.68it/s]\n" + ] + } + ], "source": [ "table_name = \"example-wikipedia-4-batch\"\n", "\n", @@ -341,10 +469,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "fb5b3ade-63e8-4d5f-909b-4e6e83f0115d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "
startendsizenum_rows
2016-06-27T00:00:00.000Z2016-06-28T00:00:00.000Z892847473299
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "sql=f'''\n", "SELECT\n", @@ -370,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "3af97ce0-6105-474f-b487-7b9756bad676", "metadata": {}, "outputs": [], @@ -404,10 +548,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "d3b1fcaa-1e78-475b-b2ef-131aa88ead51", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading data, status:[SUCCESS]: 100%|██████████| 100.0/100.0 [00:07<00:00, 13.98it/s] \n" + ] + } + ], "source": [ "table_name = \"example-wikipedia-only-human\"\n", "\n", @@ -446,10 +598,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "5c825d39-0bbd-4cd3-ae8a-760146d9fff7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
isRobotchannelnum_events
false#en.wikipedia6114
false#de.wikipedia1171
false#fr.wikipedia1148
false#ru.wikipedia930
false#es.wikipedia658
false#it.wikipedia494
false#ja.wikipedia467
false#zh.wikipedia382
false#pt.wikipedia348
false#nl.wikipedia299
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "sql=f'''\n", "SELECT isRobot, channel, count(*) num_events\n", @@ -472,7 +649,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "eb16c069-2258-49a1-93b4-df0b03b255d8", "metadata": {}, "outputs": [], @@ -500,10 +677,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "5dbddfb3-a482-477a-94a7-18a5539590ce", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading data, status:[SUCCESS]: 100%|██████████| 100.0/100.0 [00:18<00:00, 5.45it/s]\n" + ] + } + ], "source": [ "table_name = \"example-kttm-transform-batch\"\n", "\n", @@ -556,10 +741,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "40e787fe-65f5-4110-bc69-ce583285ba41", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
time_hourcitysession_count
20Chicago70
19Houston69
23Greenbrier63
23Chicago62
17Houston61
20Brooklyn61
17Omaha55
2Saint-Sauveur54
17Miami51
17Chicago48
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "sql = f'''\n", "SELECT EXTRACT( HOUR FROM \"__time\") time_hour, city, count(distinct \"session\") session_count\n", @@ -583,12 +793,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "205ee22a-407f-4826-9499-afafe2d00ecc", "metadata": {}, "outputs": [], "source": [ - "druid.datasources.drop(table_name, True)" + "druid.datasources.drop(\"example-clickstream-transforms\", True)" ] }, {