Skip to content

Commit

Permalink
Adding River
Browse files Browse the repository at this point in the history
  • Loading branch information
James Bristow committed Feb 8, 2024
1 parent 57f0244 commit 52c2c3d
Show file tree
Hide file tree
Showing 13 changed files with 2,935 additions and 0 deletions.
289 changes: 289 additions & 0 deletions river/Bike-sharing-forecasting.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "c9d42a09",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading https://maxhalford.github.io/files/datasets/toulouse_bikes.zip (1.12 MB)\n",
"Uncompressing into /home/jbris/river_data/Bikes\n",
"{'clouds': 75,\n",
" 'description': 'light rain',\n",
" 'humidity': 81,\n",
" 'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),\n",
" 'pressure': 1017.0,\n",
" 'station': 'metro-canal-du-midi',\n",
" 'temperature': 6.54,\n",
" 'wind': 9.3}\n",
"Number of available bikes: 1\n",
"[20,000] MAE: 4.912763\n",
"[40,000] MAE: 5.333578\n",
"[60,000] MAE: 5.330969\n",
"[80,000] MAE: 5.392334\n",
"[100,000] MAE: 5.423078\n",
"[120,000] MAE: 5.541239\n",
"[140,000] MAE: 5.613038\n",
"[160,000] MAE: 5.622441\n",
"[180,000] MAE: 5.567836\n",
"[182,470] MAE: 5.563905\n"
]
},
{
"data": {
"text/plain": [
"MAE: 5.563905"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from pprint import pprint\n",
"from river import datasets\n",
"\n",
"dataset = datasets.Bikes()\n",
"\n",
"for x, y in dataset:\n",
" pprint(x)\n",
" print(f'Number of available bikes: {y}')\n",
" break\n",
" \n",
"from river import compose\n",
"from river import linear_model\n",
"from river import metrics\n",
"from river import evaluate\n",
"from river import preprocessing\n",
"from river import optim\n",
"\n",
"model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n",
"model |= preprocessing.StandardScaler()\n",
"model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))\n",
"\n",
"metric = metrics.MAE()\n",
"\n",
"evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "93b94267",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[20,000] MAE: 3.720766\n",
"[40,000] MAE: 3.829739\n",
"[60,000] MAE: 3.844905\n",
"[80,000] MAE: 3.910137\n",
"[100,000] MAE: 3.888553\n",
"[120,000] MAE: 3.923644\n",
"[140,000] MAE: 3.980882\n",
"[160,000] MAE: 3.949972\n",
"[180,000] MAE: 3.934489\n",
"[182,470] MAE: 3.933442\n"
]
},
{
"data": {
"text/plain": [
"MAE: 3.933442"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from river import feature_extraction\n",
"from river import stats\n",
"\n",
"def get_hour(x):\n",
" x['hour'] = x['moment'].hour\n",
" return x\n",
"\n",
"model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n",
"model += (\n",
" get_hour |\n",
" feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())\n",
")\n",
"model |= preprocessing.StandardScaler()\n",
"model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))\n",
"\n",
"metric = metrics.MAE()\n",
"\n",
"evaluate.progressive_val_score(dataset, model, metric, print_every=20_000)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "aa7a091c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0. Input\n",
"--------\n",
"clouds: 75 (int)\n",
"description: light rain (str)\n",
"humidity: 81 (int)\n",
"moment: 2016-04-01 00:00:07 (datetime)\n",
"pressure: 1,017.00000 (float)\n",
"station: metro-canal-du-midi (str)\n",
"temperature: 6.54000 (float)\n",
"wind: 9.30000 (float)\n",
"\n",
"1. Transformer union\n",
"--------------------\n",
" 1.0 Select\n",
" ----------\n",
" clouds: 75 (int)\n",
" humidity: 81 (int)\n",
" pressure: 1,017.00000 (float)\n",
" temperature: 6.54000 (float)\n",
" wind: 9.30000 (float)\n",
"\n",
" 1.1 get_hour | y_mean_by_station_and_hour\n",
" -----------------------------------------\n",
" y_mean_by_station_and_hour: 4.43243 (float)\n",
"\n",
"clouds: 75 (int)\n",
"humidity: 81 (int)\n",
"pressure: 1,017.00000 (float)\n",
"temperature: 6.54000 (float)\n",
"wind: 9.30000 (float)\n",
"y_mean_by_station_and_hour: 4.43243 (float)\n",
"\n",
"2. StandardScaler\n",
"-----------------\n",
"clouds: 0.47566 (float)\n",
"humidity: 0.42247 (float)\n",
"pressure: 1.05314 (float)\n",
"temperature: -1.22098 (float)\n",
"wind: 2.21104 (float)\n",
"y_mean_by_station_and_hour: -0.59098 (float)\n",
"\n",
"3. LinearRegression\n",
"-------------------\n",
"Name Value Weight Contribution \n",
" Intercept 1.00000 6.58252 6.58252 \n",
" pressure 1.05314 3.78529 3.98646 \n",
" humidity 0.42247 1.44921 0.61225 \n",
"y_mean_by_station_and_hour -0.59098 0.54167 -0.32011 \n",
" clouds 0.47566 -1.92255 -0.91448 \n",
" wind 2.21104 -0.77720 -1.71843 \n",
" temperature -1.22098 2.47030 -3.01619 \n",
"\n",
"Prediction: 5.21201\n"
]
}
],
"source": [
"import itertools\n",
"\n",
"model = compose.Select('clouds', 'humidity', 'pressure', 'temperature', 'wind')\n",
"model += (\n",
" get_hour |\n",
" feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())\n",
")\n",
"model |= preprocessing.StandardScaler()\n",
"model |= linear_model.LinearRegression()\n",
"\n",
"for x, y in itertools.islice(dataset, 10000):\n",
" y_pred = model.predict_one(x)\n",
" model.learn_one(x, y)\n",
"\n",
"x, y = next(iter(dataset))\n",
"print(model.debug_one(x))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a06bc18b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[20,000] MAE: 20.198137\n",
"[40,000] MAE: 12.199763\n",
"[60,000] MAE: 9.468279\n",
"[80,000] MAE: 8.126625\n",
"[100,000] MAE: 7.273133\n",
"[120,000] MAE: 6.735469\n",
"[140,000] MAE: 6.376704\n",
"[160,000] MAE: 6.06156\n",
"[180,000] MAE: 5.806744\n",
"[182,470] MAE: 5.780772\n"
]
},
{
"data": {
"text/plain": [
"MAE: 5.780772"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import datetime as dt\n",
"\n",
"evaluate.progressive_val_score(\n",
" dataset=dataset,\n",
" model=model.clone(),\n",
" metric=metrics.MAE(),\n",
" moment='moment',\n",
" delay=dt.timedelta(minutes=30),\n",
" print_every=20_000\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9bbc8b4e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 52c2c3d

Please sign in to comment.