Adding River

JBris · Feb 8, 2024 · 52c2c3d · 52c2c3d
1 parent 57f0244
commit 52c2c3d
Show file tree

Hide file tree

Showing 13 changed files with 2,935 additions and 0 deletions.
diff --git a/river/Bike-sharing-forecasting.ipynb b/river/Bike-sharing-forecasting.ipynb
@@ -0,0 +1,289 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "c9d42a09",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading https://maxhalford.github.io/files/datasets/toulouse_bikes.zip (1.12 MB)\n",
+      "Uncompressing into /home/jbris/river_data/Bikes\n",
+      "{'clouds': 75,\n",
+      " 'description': 'light rain',\n",
+      " 'humidity': 81,\n",
+      " 'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),\n",
+      " 'pressure': 1017.0,\n",
+      " 'station': 'metro-canal-du-midi',\n",
+      " 'temperature': 6.54,\n",
+      " 'wind': 9.3}\n",
+      "Number of available bikes: 1\n",
+      "[20,000] MAE: 4.912763\n",
+      "[40,000] MAE: 5.333578\n",
+      "[60,000] MAE: 5.330969\n",
+      "[80,000] MAE: 5.392334\n",
+      "[100,000] MAE: 5.423078\n",
+      "[120,000] MAE: 5.541239\n",
+      "[140,000] MAE: 5.613038\n",
+      "[160,000] MAE: 5.622441\n",
+      "[180,000] MAE: 5.567836\n",
+      "[182,470] MAE: 5.563905\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "MAE: 5.563905"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pprint import pprint\n",
+    "from river import datasets\n",
+    "\n",
+    "dataset = datasets.Bikes()\n",
+    "\n",
+    "for x, y in dataset:\n",
+    "    pprint(x)\n",
+    "    print(f'Number of available bikes: {y}')\n",
+    "    break\n",
+    "    \n",
+    "from river import compose\n",
+    "from river import linear_model\n",
+    "from river import metrics\n",
+    "from river import evaluate\n",
+    "from river import preprocessing\n",
+    "from river import optim\n",
+    "\n",
+    "model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n",
+    "model |= preprocessing.StandardScaler()\n",
+    "model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))\n",
+    "\n",
+    "metric = metrics.MAE()\n",
+    "\n",
+    "evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "93b94267",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[20,000] MAE: 3.720766\n",
+      "[40,000] MAE: 3.829739\n",
+      "[60,000] MAE: 3.844905\n",
+      "[80,000] MAE: 3.910137\n",
+      "[100,000] MAE: 3.888553\n",
+      "[120,000] MAE: 3.923644\n",
+      "[140,000] MAE: 3.980882\n",
+      "[160,000] MAE: 3.949972\n",
+      "[180,000] MAE: 3.934489\n",
+      "[182,470] MAE: 3.933442\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "MAE: 3.933442"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from river import feature_extraction\n",
+    "from river import stats\n",
+    "\n",
+    "def get_hour(x):\n",
+    "    x['hour'] = x['moment'].hour\n",
+    "    return x\n",
+    "\n",
+    "model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n",
+    "model += (\n",
+    "    get_hour |\n",
+    "    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())\n",
+    ")\n",
+    "model |= preprocessing.StandardScaler()\n",
+    "model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))\n",
+    "\n",
+    "metric = metrics.MAE()\n",
+    "\n",
+    "evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "aa7a091c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0. Input\n",
+      "--------\n",
+      "clouds: 75 (int)\n",
+      "description: light rain (str)\n",
+      "humidity: 81 (int)\n",
+      "moment: 2016-04-01 00:00:07 (datetime)\n",
+      "pressure: 1,017.00000 (float)\n",
+      "station: metro-canal-du-midi (str)\n",
+      "temperature: 6.54000 (float)\n",
+      "wind: 9.30000 (float)\n",
+      "\n",
+      "1. Transformer union\n",
+      "--------------------\n",
+      "    1.0 Select\n",
+      "    ----------\n",
+      "    clouds: 75 (int)\n",
+      "    humidity: 81 (int)\n",
+      "    pressure: 1,017.00000 (float)\n",
+      "    temperature: 6.54000 (float)\n",
+      "    wind: 9.30000 (float)\n",
+      "\n",
+      "    1.1 get_hour | y_mean_by_station_and_hour\n",
+      "    -----------------------------------------\n",
+      "    y_mean_by_station_and_hour: 4.43243 (float)\n",
+      "\n",
+      "clouds: 75 (int)\n",
+      "humidity: 81 (int)\n",
+      "pressure: 1,017.00000 (float)\n",
+      "temperature: 6.54000 (float)\n",
+      "wind: 9.30000 (float)\n",
+      "y_mean_by_station_and_hour: 4.43243 (float)\n",
+      "\n",
+      "2. StandardScaler\n",
+      "-----------------\n",
+      "clouds: 0.47566 (float)\n",
+      "humidity: 0.42247 (float)\n",
+      "pressure: 1.05314 (float)\n",
+      "temperature: -1.22098 (float)\n",
+      "wind: 2.21104 (float)\n",
+      "y_mean_by_station_and_hour: -0.59098 (float)\n",
+      "\n",
+      "3. LinearRegression\n",
+      "-------------------\n",
+      "Name                         Value      Weight     Contribution  \n",
+      "                 Intercept    1.00000    6.58252        6.58252  \n",
+      "                  pressure    1.05314    3.78529        3.98646  \n",
+      "                  humidity    0.42247    1.44921        0.61225  \n",
+      "y_mean_by_station_and_hour   -0.59098    0.54167       -0.32011  \n",
+      "                    clouds    0.47566   -1.92255       -0.91448  \n",
+      "                      wind    2.21104   -0.77720       -1.71843  \n",
+      "               temperature   -1.22098    2.47030       -3.01619  \n",
+      "\n",
+      "Prediction: 5.21201\n"
+     ]
+    }
+   ],
+   "source": [
+    "import itertools\n",
+    "\n",
+    "model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n",
+    "model += (\n",
+    "    get_hour |\n",
+    "    feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())\n",
+    ")\n",
+    "model |= preprocessing.StandardScaler()\n",
+    "model |= linear_model.LinearRegression()\n",
+    "\n",
+    "for x, y in itertools.islice(dataset, 10000):\n",
+    "    y_pred = model.predict_one(x)\n",
+    "    model.learn_one(x, y)\n",
+    "\n",
+    "x, y = next(iter(dataset))\n",
+    "print(model.debug_one(x))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "a06bc18b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[20,000] MAE: 20.198137\n",
+      "[40,000] MAE: 12.199763\n",
+      "[60,000] MAE: 9.468279\n",
+      "[80,000] MAE: 8.126625\n",
+      "[100,000] MAE: 7.273133\n",
+      "[120,000] MAE: 6.735469\n",
+      "[140,000] MAE: 6.376704\n",
+      "[160,000] MAE: 6.06156\n",
+      "[180,000] MAE: 5.806744\n",
+      "[182,470] MAE: 5.780772\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "MAE: 5.780772"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import datetime as dt\n",
+    "\n",
+    "evaluate.progressive_val_score(\n",
+    "    dataset=dataset,\n",
+    "    model=model.clone(),\n",
+    "    metric=metrics.MAE(),\n",
+    "    moment='moment',\n",
+    "    delay=dt.timedelta(minutes=30),\n",
+    "    print_every=20_000\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9bbc8b4e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}