From 3eafaf7356853de689b98760c4e6448bf9f8d639 Mon Sep 17 00:00:00 2001
From: KarsVeldkamp <105375796+KarsVeldkamp@users.noreply.github.com>
Date: Tue, 14 Jan 2025 07:33:09 +0100
Subject: [PATCH] Tutorial ppg (#106)

* Start heart rate estimation including tfd

* stash updates

* update based on merge conflicts

* update hr main function

* update minor stuff matlab files

* change tfd parameters to dict

* First steps to refactoring to numpy

* Finalizing heart rate pipeline plus renaming preprocessing to acc instead of imu

* setting execution count to null

* fix output types

* fixing pytype due to old configs

* fix min_hr_samples pytype error

* remove irrelevant config

* resolved comments PR reviewer

* remove data type since it causes pytype error

* clear execution count

* import heart_rate_estimation

* merge updates

* update ppg_analysis

* replace df to numpy arrays

* fix notebook

* adding accelerometer feature

* fix type error list vs float

* implement accelerometer feature

* update matlab file based on refactoring

* fixing types, docstrings and readibility

* adding documentation acc feature

* add documentation of autocorrelation

* Made requested changes

* fix config

* adding aggregation heart rate

* update aggregates

* remove config

* add start tutorial

* aggregation implementation with dictionary

* add lines for consistency

* moved windoweddataextractor to segmenting

* resolve merge conflict

* update loading + preprocessing with seperate functionality

* update tutorial notebook and corresponding notebook

* Update test to io function

* update tutorial and notebook with simplified loading of the data

* Reset execution counts in notebook cells

* update execution_counts and output

* Updates from PR feedback

* fix type error

* made final feedback

---------

Co-authored-by: Erikpostt <erik_post_6@hotmail.com>
Co-authored-by: Erik Post <57133568+Erikpostt@users.noreply.github.com>
---
 docs/notebooks/ppg/ppg_analysis.ipynb         |   58 +-
 docs/tutorials/heart_rate_analysis.ipynb      | 1278 +++++++++++++++++
 .../heart_rate/heart_rate_analysis.py         |    2 +-
 src/paradigma/preprocessing.py                |  104 +-
 tests/test_ppg_analysis.py                    |    4 +-
 5 files changed, 1397 insertions(+), 49 deletions(-)
 create mode 100644 docs/tutorials/heart_rate_analysis.ipynb

diff --git a/docs/notebooks/ppg/ppg_analysis.ipynb b/docs/notebooks/ppg/ppg_analysis.ipynb
index b01ea488..a96ed896 100644
--- a/docs/notebooks/ppg/ppg_analysis.ipynb
+++ b/docs/notebooks/ppg/ppg_analysis.ipynb
@@ -13,10 +13,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
+    "import importlib.resources\n",
+    "import pickle\n",
+    "from pathlib import Path\n",
+    "\n",
     "from paradigma.config import PPGConfig, IMUConfig, SignalQualityFeatureExtractionConfig, SignalQualityFeatureExtractionAccConfig, SignalQualityClassificationConfig, HeartRateExtractionConfig\n",
-    "from paradigma.preprocessing import scan_and_sync_segments, preprocess_ppg_data\n",
-    "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features, signal_quality_classification, estimate_heart_rate, aggregate_heart_rate"
+    "from paradigma.preprocessing import preprocess_ppg_data\n",
+    "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features, signal_quality_classification, estimate_heart_rate, aggregate_heart_rate\n",
+    "from paradigma.util import load_tsdf_dataframe"
    ]
   },
   {
@@ -33,18 +37,42 @@
     "branch = 'heart_rate'\n",
     "sensor = 'ppg'\n",
     "\n",
-    "path_to_data =  '../../../tests/data'\n",
-    "path_to_classifier = os.path.join(path_to_data, '0.classification', sensor)\n",
-    "path_to_sensor_data = os.path.join(path_to_data, '1.prepared_data')\n",
-    "path_to_preprocessed_data = os.path.join(path_to_data, '2.preprocessed_data', sensor)\n",
-    "path_to_quality_features = os.path.join(path_to_data, '3.extracted_features', branch)\n",
-    "path_to_signal_quality = os.path.join(path_to_data, '4.predictions', branch)\n",
-    "path_to_hr_estimate = os.path.join(path_to_data, '5.quantification', branch)\n",
-    "path_to_hr_aggregation = os.path.join(path_to_data, '6.aggregation', branch)\n",
+    "ppg_prefix = 'PPG'\n",
+    "imu_prefix = 'IMU'\n",
+    "\n",
+    "ppg_classifier_package_filename = 'ppg_quality_classifier.pkl'\n",
+    "\n",
+    "path_to_data =  Path('../../../tests/data')\n",
+    "path_to_assets = Path('../../../src/paradigma/assets')\n",
+    "\n",
+    "path_to_classifier = path_to_data / '0.Classification' / sensor\n",
+    "path_to_prepared_data = path_to_data / '1.prepared_data'\n",
+    "path_to_preprocessed_data = path_to_data / '2.preprocessed_data' / sensor\n",
+    "path_to_extracted_features = path_to_data / '3.extracted_features' / branch\n",
+    "path_to_predictions = path_to_data / '4.predictions' / branch\n",
+    "path_to_quantification = path_to_data / '5.quantification' / branch\n",
+    "path_to_aggregation = path_to_data / '6.aggregation' / branch\n",
     "\n",
     "aggregation_filename = 'heart_rate_aggregates.json'"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_ppg, _, _ = load_tsdf_dataframe(path_to_prepared_data / ppg_prefix, prefix = ppg_prefix)\n",
+    "df_imu, _, _ = load_tsdf_dataframe(path_to_prepared_data / imu_prefix, prefix = imu_prefix)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -61,8 +89,7 @@
     "ppg_config = PPGConfig()\n",
     "imu_config = IMUConfig()\n",
     "\n",
-    "metadatas_ppg, metadatas_imu = scan_and_sync_segments(os.path.join(path_to_sensor_data, sensor), os.path.join(path_to_sensor_data, 'imu'))\n",
-    "df_ppg_proc, df_acc_proc = preprocess_ppg_data(metadatas_ppg[0], metadatas_imu[0], path_to_preprocessed_data, ppg_config, imu_config)"
+    "df_ppg_proc, df_acc_proc = preprocess_ppg_data(df_ppg, df_imu, ppg_config, imu_config)"
    ]
   },
   {
@@ -96,8 +123,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "full_path_to_classifier_package = path_to_assets / ppg_classifier_package_filename\n",
+    "with importlib.resources.files('paradigma.assets').joinpath(ppg_classifier_package_filename).open('rb') as f:\n",
+    "    clf_package = pickle.load(f)\n",
     "config = SignalQualityClassificationConfig()\n",
-    "df_sqa = signal_quality_classification(df_features, config, path_to_classifier)"
+    "df_sqa = signal_quality_classification(df_features, config, clf_package)"
    ]
   },
   {
diff --git a/docs/tutorials/heart_rate_analysis.ipynb b/docs/tutorials/heart_rate_analysis.ipynb
new file mode 100644
index 00000000..1abd5a09
--- /dev/null
+++ b/docs/tutorials/heart_rate_analysis.ipynb
@@ -0,0 +1,1278 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Heart rate analysis"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This tutorial shows how to extract heart rate estimates using photoplethysmography (PPG) data and accelerometer data. The pipeline consists of a stepwise approach to determine signal quality, assessing both PPG morphology and accounting for periodic artifacts using the accelerometer. Based on the signal quality, we extract high-quality segments and estimate the heart rate for every 2 s using the smoothed pseudo Wigner-Ville Distribution. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This pipeline requires accelerometer and PPG data to run. In this example we loaded data from a participant of the Personalized Parkinson Project. We load the corresponding dataframes using the `load_tsdf_dataframe function`. The channel `green` represents the values obtained with PPG using green light."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>time</th>\n",
+       "      <th>green</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.00000</td>\n",
+       "      <td>649511</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.00996</td>\n",
+       "      <td>648214</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.01992</td>\n",
+       "      <td>646786</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.02988</td>\n",
+       "      <td>645334</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.03984</td>\n",
+       "      <td>644317</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64770</th>\n",
+       "      <td>644.54720</td>\n",
+       "      <td>553884</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64771</th>\n",
+       "      <td>644.55716</td>\n",
+       "      <td>554088</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64772</th>\n",
+       "      <td>644.56712</td>\n",
+       "      <td>554240</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64773</th>\n",
+       "      <td>644.57708</td>\n",
+       "      <td>555134</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64774</th>\n",
+       "      <td>644.58704</td>\n",
+       "      <td>557205</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>64775 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            time   green\n",
+       "0        0.00000  649511\n",
+       "1        0.00996  648214\n",
+       "2        0.01992  646786\n",
+       "3        0.02988  645334\n",
+       "4        0.03984  644317\n",
+       "...          ...     ...\n",
+       "64770  644.54720  553884\n",
+       "64771  644.55716  554088\n",
+       "64772  644.56712  554240\n",
+       "64773  644.57708  555134\n",
+       "64774  644.58704  557205\n",
+       "\n",
+       "[64775 rows x 2 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>time</th>\n",
+       "      <th>accelerometer_x</th>\n",
+       "      <th>accelerometer_y</th>\n",
+       "      <th>accelerometer_z</th>\n",
+       "      <th>gyroscope_x</th>\n",
+       "      <th>gyroscope_y</th>\n",
+       "      <th>gyroscope_z</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.00000</td>\n",
+       "      <td>0.550718</td>\n",
+       "      <td>0.574163</td>\n",
+       "      <td>-0.273684</td>\n",
+       "      <td>-115.670732</td>\n",
+       "      <td>32.012195</td>\n",
+       "      <td>-26.097561</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.01004</td>\n",
+       "      <td>0.535885</td>\n",
+       "      <td>0.623445</td>\n",
+       "      <td>-0.254545</td>\n",
+       "      <td>-110.609757</td>\n",
+       "      <td>34.634146</td>\n",
+       "      <td>-24.695122</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.02008</td>\n",
+       "      <td>0.504306</td>\n",
+       "      <td>0.651675</td>\n",
+       "      <td>-0.251675</td>\n",
+       "      <td>-103.231708</td>\n",
+       "      <td>36.768293</td>\n",
+       "      <td>-22.926829</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.03012</td>\n",
+       "      <td>0.488517</td>\n",
+       "      <td>0.686603</td>\n",
+       "      <td>-0.265550</td>\n",
+       "      <td>-96.280488</td>\n",
+       "      <td>38.719512</td>\n",
+       "      <td>-21.158537</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.04016</td>\n",
+       "      <td>0.494258</td>\n",
+       "      <td>0.725359</td>\n",
+       "      <td>-0.278469</td>\n",
+       "      <td>-92.560976</td>\n",
+       "      <td>41.280488</td>\n",
+       "      <td>-20.304878</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72942</th>\n",
+       "      <td>730.74468</td>\n",
+       "      <td>0.234928</td>\n",
+       "      <td>-0.516268</td>\n",
+       "      <td>-0.802871</td>\n",
+       "      <td>0.975610</td>\n",
+       "      <td>-2.256098</td>\n",
+       "      <td>2.256098</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72943</th>\n",
+       "      <td>730.75472</td>\n",
+       "      <td>0.245455</td>\n",
+       "      <td>-0.514354</td>\n",
+       "      <td>-0.806699</td>\n",
+       "      <td>0.304878</td>\n",
+       "      <td>-1.707317</td>\n",
+       "      <td>1.768293</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72944</th>\n",
+       "      <td>730.76476</td>\n",
+       "      <td>0.243541</td>\n",
+       "      <td>-0.511005</td>\n",
+       "      <td>-0.807177</td>\n",
+       "      <td>0.304878</td>\n",
+       "      <td>-1.585366</td>\n",
+       "      <td>1.890244</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72945</th>\n",
+       "      <td>730.77480</td>\n",
+       "      <td>0.240191</td>\n",
+       "      <td>-0.514354</td>\n",
+       "      <td>-0.808134</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>-1.280488</td>\n",
+       "      <td>1.585366</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72946</th>\n",
+       "      <td>730.78484</td>\n",
+       "      <td>0.243541</td>\n",
+       "      <td>-0.511005</td>\n",
+       "      <td>-0.808134</td>\n",
+       "      <td>-0.060976</td>\n",
+       "      <td>-1.036585</td>\n",
+       "      <td>1.219512</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>72947 rows × 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            time  accelerometer_x  accelerometer_y  accelerometer_z  \\\n",
+       "0        0.00000         0.550718         0.574163        -0.273684   \n",
+       "1        0.01004         0.535885         0.623445        -0.254545   \n",
+       "2        0.02008         0.504306         0.651675        -0.251675   \n",
+       "3        0.03012         0.488517         0.686603        -0.265550   \n",
+       "4        0.04016         0.494258         0.725359        -0.278469   \n",
+       "...          ...              ...              ...              ...   \n",
+       "72942  730.74468         0.234928        -0.516268        -0.802871   \n",
+       "72943  730.75472         0.245455        -0.514354        -0.806699   \n",
+       "72944  730.76476         0.243541        -0.511005        -0.807177   \n",
+       "72945  730.77480         0.240191        -0.514354        -0.808134   \n",
+       "72946  730.78484         0.243541        -0.511005        -0.808134   \n",
+       "\n",
+       "       gyroscope_x  gyroscope_y  gyroscope_z  \n",
+       "0      -115.670732    32.012195   -26.097561  \n",
+       "1      -110.609757    34.634146   -24.695122  \n",
+       "2      -103.231708    36.768293   -22.926829  \n",
+       "3       -96.280488    38.719512   -21.158537  \n",
+       "4       -92.560976    41.280488   -20.304878  \n",
+       "...            ...          ...          ...  \n",
+       "72942     0.975610    -2.256098     2.256098  \n",
+       "72943     0.304878    -1.707317     1.768293  \n",
+       "72944     0.304878    -1.585366     1.890244  \n",
+       "72945     0.000000    -1.280488     1.585366  \n",
+       "72946    -0.060976    -1.036585     1.219512  \n",
+       "\n",
+       "[72947 rows x 7 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from pathlib import Path\n",
+    "from paradigma.util import load_tsdf_dataframe\n",
+    "\n",
+    "path_to_data = Path('../../tests/data')\n",
+    "path_to_prepared_data = path_to_data / '1.prepared_data'\n",
+    "\n",
+    "ppg_prefix = 'PPG'\n",
+    "imu_prefix = 'IMU'\n",
+    "\n",
+    "df_ppg, _, _ = load_tsdf_dataframe(path_to_prepared_data / ppg_prefix, prefix = ppg_prefix)\n",
+    "df_imu, _, _ = load_tsdf_dataframe(path_to_prepared_data / imu_prefix, prefix = imu_prefix)\n",
+    "\n",
+    "display(df_ppg, df_imu)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: Preprocess data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The first step after loading the data is preprocessing. This begins by isolating segments containing both PPG and IMU data, discarding portions where one modality (e.g., PPG) extends beyond the other, such as when the PPG recording is longer than the accelerometer data. After this step, the preprocess_ppg_data function resamples the PPG and accelerometer data to uniformly distributed timestamps, addressing the fixed but non-uniform sampling rates of the sensors. After this, a bandpass Butterworth filter (4th-order, bandpass frequencies: 0.4--3.5 Hz) is applied to the PPG signal, while a high-pass Butterworth filter (4th-order, cut-off frequency: 0.2 Hz) is applied to the accelerometer data.\n",
+    "\n",
+    "Note: the printed shapes are (rows, columns) with each row corresponding to a single data point and each column representing a data column (e.g.time). The number of rows of the overlapping segments of PPG and accelerometer are not exactly the same due to sampling differences (other sensors and possibly other sampling frequencies). "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Original data shapes:\n",
+      "- PPG data: (64775, 2)\n",
+      "- Accelerometer data: (72947, 4)\n",
+      "Overlapping data shapes:\n",
+      "- PPG data: (64775, 2)\n",
+      "- Accelerometer data: (64361, 4)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>time</th>\n",
+       "      <th>green</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>-1434.856838</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.033333</td>\n",
+       "      <td>-3461.717789</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.066667</td>\n",
+       "      <td>-5172.913796</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.100000</td>\n",
+       "      <td>-6343.203160</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.133333</td>\n",
+       "      <td>-6875.904054</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19333</th>\n",
+       "      <td>644.433333</td>\n",
+       "      <td>-2403.547097</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19334</th>\n",
+       "      <td>644.466667</td>\n",
+       "      <td>-7434.311944</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19335</th>\n",
+       "      <td>644.500000</td>\n",
+       "      <td>-8214.847078</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19336</th>\n",
+       "      <td>644.533333</td>\n",
+       "      <td>-5194.393329</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19337</th>\n",
+       "      <td>644.566667</td>\n",
+       "      <td>70.147000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>19338 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             time        green\n",
+       "0        0.000000 -1434.856838\n",
+       "1        0.033333 -3461.717789\n",
+       "2        0.066667 -5172.913796\n",
+       "3        0.100000 -6343.203160\n",
+       "4        0.133333 -6875.904054\n",
+       "...           ...          ...\n",
+       "19333  644.433333 -2403.547097\n",
+       "19334  644.466667 -7434.311944\n",
+       "19335  644.500000 -8214.847078\n",
+       "19336  644.533333 -5194.393329\n",
+       "19337  644.566667    70.147000\n",
+       "\n",
+       "[19338 rows x 2 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>time</th>\n",
+       "      <th>accelerometer_x</th>\n",
+       "      <th>grav_accelerometer_x</th>\n",
+       "      <th>accelerometer_y</th>\n",
+       "      <th>grav_accelerometer_y</th>\n",
+       "      <th>accelerometer_z</th>\n",
+       "      <th>grav_accelerometer_z</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.053078</td>\n",
+       "      <td>0.497639</td>\n",
+       "      <td>0.010040</td>\n",
+       "      <td>0.564123</td>\n",
+       "      <td>-0.273154</td>\n",
+       "      <td>-0.000530</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.01</td>\n",
+       "      <td>0.038337</td>\n",
+       "      <td>0.497666</td>\n",
+       "      <td>0.058802</td>\n",
+       "      <td>0.564510</td>\n",
+       "      <td>-0.256899</td>\n",
+       "      <td>0.002305</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.02</td>\n",
+       "      <td>0.006824</td>\n",
+       "      <td>0.497698</td>\n",
+       "      <td>0.086559</td>\n",
+       "      <td>0.564887</td>\n",
+       "      <td>-0.256739</td>\n",
+       "      <td>0.005122</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.03</td>\n",
+       "      <td>-0.009156</td>\n",
+       "      <td>0.497733</td>\n",
+       "      <td>0.120855</td>\n",
+       "      <td>0.565254</td>\n",
+       "      <td>-0.273280</td>\n",
+       "      <td>0.007919</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.04</td>\n",
+       "      <td>-0.003770</td>\n",
+       "      <td>0.497772</td>\n",
+       "      <td>0.159316</td>\n",
+       "      <td>0.565610</td>\n",
+       "      <td>-0.289007</td>\n",
+       "      <td>0.010696</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64454</th>\n",
+       "      <td>644.54</td>\n",
+       "      <td>0.104185</td>\n",
+       "      <td>0.879377</td>\n",
+       "      <td>0.101627</td>\n",
+       "      <td>0.200866</td>\n",
+       "      <td>-0.029342</td>\n",
+       "      <td>-0.239111</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64455</th>\n",
+       "      <td>644.55</td>\n",
+       "      <td>0.120403</td>\n",
+       "      <td>0.879377</td>\n",
+       "      <td>0.090413</td>\n",
+       "      <td>0.200865</td>\n",
+       "      <td>0.051113</td>\n",
+       "      <td>-0.239111</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64456</th>\n",
+       "      <td>644.56</td>\n",
+       "      <td>0.101289</td>\n",
+       "      <td>0.879377</td>\n",
+       "      <td>0.128684</td>\n",
+       "      <td>0.200865</td>\n",
+       "      <td>0.037951</td>\n",
+       "      <td>-0.239111</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64457</th>\n",
+       "      <td>644.57</td>\n",
+       "      <td>0.073139</td>\n",
+       "      <td>0.879377</td>\n",
+       "      <td>0.137399</td>\n",
+       "      <td>0.200865</td>\n",
+       "      <td>0.030834</td>\n",
+       "      <td>-0.239111</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>64458</th>\n",
+       "      <td>644.58</td>\n",
+       "      <td>0.004578</td>\n",
+       "      <td>0.879377</td>\n",
+       "      <td>0.102862</td>\n",
+       "      <td>0.200865</td>\n",
+       "      <td>0.025627</td>\n",
+       "      <td>-0.239111</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>64459 rows × 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         time  accelerometer_x  grav_accelerometer_x  accelerometer_y  \\\n",
+       "0        0.00         0.053078              0.497639         0.010040   \n",
+       "1        0.01         0.038337              0.497666         0.058802   \n",
+       "2        0.02         0.006824              0.497698         0.086559   \n",
+       "3        0.03        -0.009156              0.497733         0.120855   \n",
+       "4        0.04        -0.003770              0.497772         0.159316   \n",
+       "...       ...              ...                   ...              ...   \n",
+       "64454  644.54         0.104185              0.879377         0.101627   \n",
+       "64455  644.55         0.120403              0.879377         0.090413   \n",
+       "64456  644.56         0.101289              0.879377         0.128684   \n",
+       "64457  644.57         0.073139              0.879377         0.137399   \n",
+       "64458  644.58         0.004578              0.879377         0.102862   \n",
+       "\n",
+       "       grav_accelerometer_y  accelerometer_z  grav_accelerometer_z  \n",
+       "0                  0.564123        -0.273154             -0.000530  \n",
+       "1                  0.564510        -0.256899              0.002305  \n",
+       "2                  0.564887        -0.256739              0.005122  \n",
+       "3                  0.565254        -0.273280              0.007919  \n",
+       "4                  0.565610        -0.289007              0.010696  \n",
+       "...                     ...              ...                   ...  \n",
+       "64454              0.200866        -0.029342             -0.239111  \n",
+       "64455              0.200865         0.051113             -0.239111  \n",
+       "64456              0.200865         0.037951             -0.239111  \n",
+       "64457              0.200865         0.030834             -0.239111  \n",
+       "64458              0.200865         0.025627             -0.239111  \n",
+       "\n",
+       "[64459 rows x 7 columns]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from paradigma.config import PPGConfig, IMUConfig\n",
+    "from paradigma.preprocessing import preprocess_ppg_data\n",
+    "\n",
+    "ppg_config = PPGConfig()\n",
+    "imu_config = IMUConfig()\n",
+    "\n",
+    "df_ppg_proc, df_acc_proc = preprocess_ppg_data(df_ppg, df_imu, ppg_config, imu_config)\n",
+    "\n",
+    "display(df_ppg_proc, df_acc_proc)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2: Extract signal quality features"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The preprocessed data (PPG & accelerometer) is windowed into overlapping windows of length `ppg_config.window_length_s` with a window step of `ppg_config.window_step_length_s`. From the PPG windows 10 time- and frequency domain features are extracted to assess PPG morphology and from the accelerometer windows one relative power feature is calculated to assess periodic motion artifacts. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The default window length for the signal quality feature extraction is set to 6 seconds.\n",
+      "The default step size for the signal quality feature extraction is set to 1 seconds.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>time</th>\n",
+       "      <th>var</th>\n",
+       "      <th>mean</th>\n",
+       "      <th>median</th>\n",
+       "      <th>kurtosis</th>\n",
+       "      <th>skewness</th>\n",
+       "      <th>signal_to_noise</th>\n",
+       "      <th>auto_corr</th>\n",
+       "      <th>f_dom</th>\n",
+       "      <th>rel_power</th>\n",
+       "      <th>spectral_entropy</th>\n",
+       "      <th>acc_power_ratio</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.697510e+07</td>\n",
+       "      <td>3418.696392</td>\n",
+       "      <td>2892.815105</td>\n",
+       "      <td>2.540447</td>\n",
+       "      <td>0.277237</td>\n",
+       "      <td>3.241634</td>\n",
+       "      <td>0.282452</td>\n",
+       "      <td>1.171875</td>\n",
+       "      <td>0.179922</td>\n",
+       "      <td>0.561186</td>\n",
+       "      <td>0.014196</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.635033e+07</td>\n",
+       "      <td>3350.769158</td>\n",
+       "      <td>2892.815105</td>\n",
+       "      <td>2.647951</td>\n",
+       "      <td>0.420163</td>\n",
+       "      <td>3.202313</td>\n",
+       "      <td>0.276526</td>\n",
+       "      <td>1.171875</td>\n",
+       "      <td>0.201972</td>\n",
+       "      <td>0.547812</td>\n",
+       "      <td>0.015343</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.921248e+07</td>\n",
+       "      <td>3698.963099</td>\n",
+       "      <td>3310.773556</td>\n",
+       "      <td>2.333687</td>\n",
+       "      <td>0.481974</td>\n",
+       "      <td>3.521882</td>\n",
+       "      <td>0.210984</td>\n",
+       "      <td>0.820312</td>\n",
+       "      <td>0.264922</td>\n",
+       "      <td>0.509015</td>\n",
+       "      <td>0.042583</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.646562e+07</td>\n",
+       "      <td>3389.325775</td>\n",
+       "      <td>2946.588526</td>\n",
+       "      <td>2.608187</td>\n",
+       "      <td>0.733913</td>\n",
+       "      <td>3.346236</td>\n",
+       "      <td>0.225823</td>\n",
+       "      <td>0.703125</td>\n",
+       "      <td>0.270707</td>\n",
+       "      <td>0.496051</td>\n",
+       "      <td>0.034215</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4.0</td>\n",
+       "      <td>1.603135e+07</td>\n",
+       "      <td>3364.366313</td>\n",
+       "      <td>2912.280798</td>\n",
+       "      <td>2.268768</td>\n",
+       "      <td>0.440357</td>\n",
+       "      <td>3.314565</td>\n",
+       "      <td>0.305998</td>\n",
+       "      <td>0.820312</td>\n",
+       "      <td>0.264097</td>\n",
+       "      <td>0.496697</td>\n",
+       "      <td>0.056204</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>634</th>\n",
+       "      <td>634.0</td>\n",
+       "      <td>2.281660e+06</td>\n",
+       "      <td>1106.006037</td>\n",
+       "      <td>838.307729</td>\n",
+       "      <td>4.058037</td>\n",
+       "      <td>0.674184</td>\n",
+       "      <td>2.072660</td>\n",
+       "      <td>0.172052</td>\n",
+       "      <td>0.703125</td>\n",
+       "      <td>0.204028</td>\n",
+       "      <td>0.577570</td>\n",
+       "      <td>0.060486</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>635</th>\n",
+       "      <td>635.0</td>\n",
+       "      <td>2.224441e+06</td>\n",
+       "      <td>1113.194933</td>\n",
+       "      <td>838.307729</td>\n",
+       "      <td>3.684159</td>\n",
+       "      <td>0.067979</td>\n",
+       "      <td>2.118019</td>\n",
+       "      <td>0.236408</td>\n",
+       "      <td>0.703125</td>\n",
+       "      <td>0.221044</td>\n",
+       "      <td>0.560362</td>\n",
+       "      <td>0.053120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>636</th>\n",
+       "      <td>636.0</td>\n",
+       "      <td>5.103141e+06</td>\n",
+       "      <td>1627.618918</td>\n",
+       "      <td>1030.113303</td>\n",
+       "      <td>3.913068</td>\n",
+       "      <td>0.852331</td>\n",
+       "      <td>2.040094</td>\n",
+       "      <td>0.235822</td>\n",
+       "      <td>0.585938</td>\n",
+       "      <td>0.157667</td>\n",
+       "      <td>0.514717</td>\n",
+       "      <td>0.058853</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>637</th>\n",
+       "      <td>637.0</td>\n",
+       "      <td>7.428728e+06</td>\n",
+       "      <td>2093.736007</td>\n",
+       "      <td>1520.640067</td>\n",
+       "      <td>3.188753</td>\n",
+       "      <td>0.305780</td>\n",
+       "      <td>2.457446</td>\n",
+       "      <td>0.322034</td>\n",
+       "      <td>0.468750</td>\n",
+       "      <td>0.150582</td>\n",
+       "      <td>0.458477</td>\n",
+       "      <td>0.061269</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>638</th>\n",
+       "      <td>638.0</td>\n",
+       "      <td>1.549534e+07</td>\n",
+       "      <td>2974.462555</td>\n",
+       "      <td>2482.703944</td>\n",
+       "      <td>4.042171</td>\n",
+       "      <td>-0.628008</td>\n",
+       "      <td>2.228730</td>\n",
+       "      <td>0.095918</td>\n",
+       "      <td>0.468750</td>\n",
+       "      <td>0.102976</td>\n",
+       "      <td>0.493830</td>\n",
+       "      <td>0.105554</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>639 rows × 12 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      time           var         mean       median  kurtosis  skewness  \\\n",
+       "0      0.0  1.697510e+07  3418.696392  2892.815105  2.540447  0.277237   \n",
+       "1      1.0  1.635033e+07  3350.769158  2892.815105  2.647951  0.420163   \n",
+       "2      2.0  1.921248e+07  3698.963099  3310.773556  2.333687  0.481974   \n",
+       "3      3.0  1.646562e+07  3389.325775  2946.588526  2.608187  0.733913   \n",
+       "4      4.0  1.603135e+07  3364.366313  2912.280798  2.268768  0.440357   \n",
+       "..     ...           ...          ...          ...       ...       ...   \n",
+       "634  634.0  2.281660e+06  1106.006037   838.307729  4.058037  0.674184   \n",
+       "635  635.0  2.224441e+06  1113.194933   838.307729  3.684159  0.067979   \n",
+       "636  636.0  5.103141e+06  1627.618918  1030.113303  3.913068  0.852331   \n",
+       "637  637.0  7.428728e+06  2093.736007  1520.640067  3.188753  0.305780   \n",
+       "638  638.0  1.549534e+07  2974.462555  2482.703944  4.042171 -0.628008   \n",
+       "\n",
+       "     signal_to_noise  auto_corr     f_dom  rel_power  spectral_entropy  \\\n",
+       "0           3.241634   0.282452  1.171875   0.179922          0.561186   \n",
+       "1           3.202313   0.276526  1.171875   0.201972          0.547812   \n",
+       "2           3.521882   0.210984  0.820312   0.264922          0.509015   \n",
+       "3           3.346236   0.225823  0.703125   0.270707          0.496051   \n",
+       "4           3.314565   0.305998  0.820312   0.264097          0.496697   \n",
+       "..               ...        ...       ...        ...               ...   \n",
+       "634         2.072660   0.172052  0.703125   0.204028          0.577570   \n",
+       "635         2.118019   0.236408  0.703125   0.221044          0.560362   \n",
+       "636         2.040094   0.235822  0.585938   0.157667          0.514717   \n",
+       "637         2.457446   0.322034  0.468750   0.150582          0.458477   \n",
+       "638         2.228730   0.095918  0.468750   0.102976          0.493830   \n",
+       "\n",
+       "     acc_power_ratio  \n",
+       "0           0.014196  \n",
+       "1           0.015343  \n",
+       "2           0.042583  \n",
+       "3           0.034215  \n",
+       "4           0.056204  \n",
+       "..               ...  \n",
+       "634         0.060486  \n",
+       "635         0.053120  \n",
+       "636         0.058853  \n",
+       "637         0.061269  \n",
+       "638         0.105554  \n",
+       "\n",
+       "[639 rows x 12 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from paradigma.config import SignalQualityFeatureExtractionConfig, SignalQualityFeatureExtractionAccConfig\n",
+    "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features\n",
+    "\n",
+    "ppg_config = SignalQualityFeatureExtractionConfig()\n",
+    "acc_config = SignalQualityFeatureExtractionAccConfig()\n",
+    "\n",
+    "print(\"The default window length for the signal quality feature extraction is set to\", ppg_config.window_length_s, \"seconds.\")\n",
+    "print(\"The default step size for the signal quality feature extraction is set to\", ppg_config.window_step_length_s, \"seconds.\")\n",
+    "\n",
+    "df_features = extract_signal_quality_features(ppg_config, df_ppg_proc, acc_config, df_acc_proc)\n",
+    "\n",
+    "df_features\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 3: Signal quality classification"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "A trained logistic classifier is used to predict PPG signal quality and returns the `pred_sqa_proba`, which is the posterior probability of a PPG window to look like the typical PPG morphology (higher probability indicates toward the typical PPG morphology). The relative power feature from the accelerometer is compared to a threshold for periodic artifacts and therefore `pred_sqa_acc_label` is used to return a label indicating predicted periodic motion artifacts (label 0) or no periodic motion artifacts (label 1). "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>pred_sqa_proba</th>\n",
+       "      <th>pred_sqa_acc_label</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.006031</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.011725</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.068063</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.080808</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.074591</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>634</th>\n",
+       "      <td>0.001387</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>635</th>\n",
+       "      <td>0.001570</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>636</th>\n",
+       "      <td>0.000380</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>637</th>\n",
+       "      <td>0.000406</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>638</th>\n",
+       "      <td>0.000004</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>639 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     pred_sqa_proba  pred_sqa_acc_label\n",
+       "0          0.006031                   1\n",
+       "1          0.011725                   1\n",
+       "2          0.068063                   1\n",
+       "3          0.080808                   1\n",
+       "4          0.074591                   1\n",
+       "..              ...                 ...\n",
+       "634        0.001387                   1\n",
+       "635        0.001570                   1\n",
+       "636        0.000380                   1\n",
+       "637        0.000406                   1\n",
+       "638        0.000004                   1\n",
+       "\n",
+       "[639 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from paradigma.config import SignalQualityClassificationConfig\n",
+    "from paradigma.heart_rate.heart_rate_analysis import signal_quality_classification\n",
+    "\n",
+    "path_to_classifier = Path('../../tests/data/0.Classification/ppg')\n",
+    "config = SignalQualityClassificationConfig()\n",
+    "\n",
+    "df_sqa = signal_quality_classification(df_features, config, path_to_classifier)\n",
+    "\n",
+    "df_sqa"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 4: Heart rate estimation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For heart rate estimation, we extract segments of `config.tfd_length`. We calculate the smoothed-pseudo Wigner-Ville Distribution (SPWVD) to obtain the frequency content of the PPG signal over time. We extract for every timestamp in the SPWVD the frequency with the highest power. For every non-overlapping 2 s window we average the corresponding frequencies to obtain a heart rate per window. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The default minimal window length for the heart rate extraction is set to 10 seconds.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>time</th>\n",
+       "      <th>heart_rate</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>56.0</td>\n",
+       "      <td>86.404715</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>58.0</td>\n",
+       "      <td>86.640472</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>60.0</td>\n",
+       "      <td>86.345776</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>62.0</td>\n",
+       "      <td>84.872299</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>64.0</td>\n",
+       "      <td>84.872299</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>66.0</td>\n",
+       "      <td>84.194499</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   time  heart_rate\n",
+       "0  56.0   86.404715\n",
+       "1  58.0   86.640472\n",
+       "2  60.0   86.345776\n",
+       "3  62.0   84.872299\n",
+       "4  64.0   84.872299\n",
+       "5  66.0   84.194499"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from paradigma.config import HeartRateExtractionConfig\n",
+    "from paradigma.heart_rate.heart_rate_analysis import estimate_heart_rate\n",
+    "\n",
+    "config = HeartRateExtractionConfig()\n",
+    "\n",
+    "print(\"The default minimal window length for the heart rate extraction is set to\", config.tfd_length, \"seconds.\")\n",
+    "\n",
+    "df_hr = estimate_heart_rate(df_sqa, df_ppg_proc, config)\n",
+    "\n",
+    "df_hr"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 5: Heart rate aggregation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The final step is to aggregate all 2 s heart rate estimates. In the current example, the mode and 99th percentile are calculated. We hypothesize that the mode gives representation of the resting heart rate while the 99th percentile indicates the maximum heart rate. In Parkinson's disease, we expect that these two measures could reflect autonomic (dys)functioning. The `nr_hr_est` in the metadata indicates based on how many 2 s windows these aggregates are determined."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'hr_aggregates': {'99p_heart_rate': 86.62868369351669,\n",
+      "                   'mode_heart_rate': 84.8722986247544},\n",
+      " 'metadata': {'nr_hr_est': 6}}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pprint\n",
+    "from paradigma.heart_rate.heart_rate_analysis import aggregate_heart_rate\n",
+    "hr_values = df_hr['heart_rate'].values\n",
+    "df_hr_agg = aggregate_heart_rate(hr_values, aggregates = ['mode', '99p'])\n",
+    "\n",
+    "pprint.pprint(df_hr_agg)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "paradigma-8ps1bg0Z-py3.12",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/paradigma/heart_rate/heart_rate_analysis.py b/src/paradigma/heart_rate/heart_rate_analysis.py
index ac9e9449..baee092c 100644
--- a/src/paradigma/heart_rate/heart_rate_analysis.py
+++ b/src/paradigma/heart_rate/heart_rate_analysis.py
@@ -249,7 +249,7 @@ def estimate_heart_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame,
         t_hr_rel[hr_pos:hr_pos + n_hr] = hr_time
         hr_pos += n_hr
 
-    df_hr = pd.DataFrame({"rel_time": t_hr_rel, "heart_rate": v_hr_rel})
+    df_hr = pd.DataFrame({"time": t_hr_rel, "heart_rate": v_hr_rel})
 
     return df_hr
 
diff --git a/src/paradigma/preprocessing.py b/src/paradigma/preprocessing.py
index 4513129d..690739c9 100644
--- a/src/paradigma/preprocessing.py
+++ b/src/paradigma/preprocessing.py
@@ -244,7 +244,22 @@ def preprocess_imu_data_io(path_to_input: str | Path, path_to_output: str | Path
             write_df_data(metadata_time, metadata_values, path_to_output, f'{sensor}_meta.json', df_sensor)
 
 
-def scan_and_sync_segments(input_path_ppg, input_path_imu):
+def scan_and_sync_segments(input_path_ppg: str | Path, input_path_imu: str | Path) -> Tuple[List[tsdf.TSDFMetadata], List[tsdf.TSDFMetadata]]:
+    """
+    Scan for available TSDF metadata files in the specified directories and synchronize the data segments based on the metadata start and end times.
+
+    Parameters
+    ----------
+    input_path_ppg : str
+        Path to the directory containing PPG data.
+    input_path_imu : str
+        Path to the directory containing IMU data.
+
+    Returns
+    -------
+    Tuple[List[tsdf.TSDFMetadata], List[tsdf.TSDFMetadata]]
+        A tuple containing lists of metadata objects for PPG and IMU data, respectively.
+    """ 
 
     # Scan for available TSDF metadata files
     meta_ppg = extract_meta_from_tsdf_files(input_path_ppg)
@@ -262,11 +277,10 @@ def scan_and_sync_segments(input_path_ppg, input_path_imu):
     return metadatas_ppg, metadatas_imu
 
 
-def preprocess_ppg_data(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TSDFMetadata, 
-                        output_path: Union[str, Path], ppg_config: PPGConfig, 
-                        imu_config: IMUConfig, store_locally:bool = True) -> Tuple[pd.DataFrame, pd.DataFrame]:
+def preprocess_ppg_data(df_ppg: pd.DataFrame, df_imu: pd.DataFrame, ppg_config: PPGConfig, 
+                        imu_config: IMUConfig) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """
-    Preprocess PPG and IMU data by resampling, filtering, and aligning the data segments.
+    Preprocess PPG and IMU (accelerometer only) data by resampling, filtering, and aligning the data segments.
 
     Parameters
     ----------
@@ -289,24 +303,15 @@ def preprocess_ppg_data(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TS
         Preprocessed PPG and IMU data as DataFrames.
     
     """
-    # Load PPG data
-    metadata_time_ppg = tsdf_meta_ppg[ppg_config.time_filename]
-    metadata_values_ppg = tsdf_meta_ppg[ppg_config.values_filename]
-    df_ppg = tsdf.load_dataframe_from_binaries([metadata_time_ppg, metadata_values_ppg], tsdf.constants.ConcatenationType.columns)
-
-    # Load IMU data
-    metadata_time_imu = tsdf_meta_imu[imu_config.time_filename]
-    metadata_values_imu = tsdf_meta_imu[imu_config.values_filename]
-    df_imu = tsdf.load_dataframe_from_binaries([metadata_time_imu, metadata_values_imu], tsdf.constants.ConcatenationType.columns)
 
     # Drop the gyroscope columns from the IMU data
     cols_to_drop = df_imu.filter(regex='^gyroscope_').columns
     df_acc = df_imu.drop(cols_to_drop, axis=1)
 
     # Extract overlapping segments
-    print("Shape of the original data:", df_ppg.shape, df_acc.shape)
+    print(f"Original data shapes:\n- PPG data: {df_ppg.shape}\n- Accelerometer data: {df_acc.shape}")
     df_ppg_overlapping, df_acc_overlapping = extract_overlapping_segments(df_ppg, df_acc)
-    print("Shape of the overlapping segments:", df_ppg_overlapping.shape, df_acc_overlapping.shape)
+    print(f"Overlapping data shapes:\n- PPG data: {df_ppg_overlapping.shape}\n- Accelerometer data: {df_acc_overlapping.shape}")
     
     # Resample accelerometer data
     df_acc_proc = resample_data(
@@ -350,25 +355,60 @@ def preprocess_ppg_data(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TS
 
         df_ppg_proc = df_ppg_proc.drop(columns=[col])
         df_ppg_proc = df_ppg_proc.rename(columns={f'filt_{col}': col})
-
-    if store_locally:
-        # Store data
-        metadata_values_imu.channels = list(imu_config.d_channels_accelerometer.keys())
-        metadata_values_imu.units = list(imu_config.d_channels_accelerometer.values())
-        metadata_values_imu.file_name = 'accelerometer_values.bin'
-        metadata_time_imu.units = [TimeUnit.ABSOLUTE_MS]
-        metadata_time_imu.file_name = 'accelerometer_time.bin'
-        write_df_data(metadata_time_imu, metadata_values_imu, output_path, 'accelerometer_meta.json', df_acc_proc)
-
-        metadata_values_ppg.channels = list(ppg_config.d_channels_ppg.keys())
-        metadata_values_ppg.units = list(ppg_config.d_channels_ppg.values())
-        metadata_values_ppg.file_name = 'PPG_values.bin'
-        metadata_time_ppg.units = [TimeUnit.ABSOLUTE_MS]
-        metadata_time_ppg.file_name = 'PPG_time.bin'
-        write_df_data(metadata_time_ppg, metadata_values_ppg, output_path, 'PPG_meta.json', df_ppg_proc)
     
     return df_ppg_proc, df_acc_proc
 
+def preprocess_ppg_data_io(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TSDFMetadata, 
+                        output_path: Union[str, Path], ppg_config: PPGConfig, 
+                           imu_config: IMUConfig) -> None:
+    """	
+    Preprocess PPG and IMU data by resampling, filtering, and aligning the data segments.
+
+    Parameters
+    ----------
+    tsdf_meta_ppg : tsdf.TSDFMetadata
+        Metadata for the PPG data.
+    tsdf_meta_imu : tsdf.TSDFMetadata
+        Metadata for the IMU data.
+    output_path : Union[str, Path]
+        Path to store the preprocessed data.
+    ppg_config : PPGConfig
+        Configuration object for PPG preprocessing.
+    imu_config : IMUConfig
+        Configuration object for IMU preprocessing.
+
+    Returns
+    -------
+    None
+    """ 
+
+    # Load PPG data
+    metadata_time_ppg = tsdf_meta_ppg[ppg_config.time_filename]
+    metadata_values_ppg = tsdf_meta_ppg[ppg_config.values_filename]
+    df_ppg = tsdf.load_dataframe_from_binaries([metadata_time_ppg, metadata_values_ppg], tsdf.constants.ConcatenationType.columns)
+
+    # Load IMU data
+    metadata_time_imu = tsdf_meta_imu[imu_config.time_filename]
+    metadata_values_imu = tsdf_meta_imu[imu_config.values_filename]
+    df_imu = tsdf.load_dataframe_from_binaries([metadata_time_imu, metadata_values_imu], tsdf.constants.ConcatenationType.columns)
+
+    # Preprocess data
+    df_ppg_proc, df_acc_proc = preprocess_ppg_data(df_ppg=df_ppg, df_imu=df_imu, ppg_config=ppg_config, imu_config=imu_config)
+
+    # Store data
+    metadata_values_imu.channels = list(imu_config.d_channels_accelerometer.keys())
+    metadata_values_imu.units = list(imu_config.d_channels_accelerometer.values())
+    metadata_values_imu.file_name = 'accelerometer_values.bin'
+    metadata_time_imu.units = [TimeUnit.ABSOLUTE_MS]
+    metadata_time_imu.file_name = 'accelerometer_time.bin'
+    write_df_data(metadata_time_imu, metadata_values_imu, output_path, 'accelerometer_meta.json', df_acc_proc)
+
+    metadata_values_ppg.channels = list(ppg_config.d_channels_ppg.keys())
+    metadata_values_ppg.units = list(ppg_config.d_channels_ppg.values())
+    metadata_values_ppg.file_name = 'PPG_values.bin'
+    metadata_time_ppg.units = [TimeUnit.ABSOLUTE_MS]
+    metadata_time_ppg.file_name = 'PPG_time.bin'
+    write_df_data(metadata_time_ppg, metadata_values_ppg, output_path, 'PPG_meta.json', df_ppg_proc)
 
 # TODO: ideally something like this should be possible directly in the tsdf library
 def extract_meta_from_tsdf_files(tsdf_data_dir : str) -> List[dict]:
diff --git a/tests/test_ppg_analysis.py b/tests/test_ppg_analysis.py
index 4b493884..4fb6430b 100644
--- a/tests/test_ppg_analysis.py
+++ b/tests/test_ppg_analysis.py
@@ -2,7 +2,7 @@
 
 from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features
 from paradigma.config import PPGConfig, IMUConfig, SignalQualityFeatureExtractionConfig
-from paradigma.preprocessing import preprocess_ppg_data, scan_and_sync_segments
+from paradigma.preprocessing import preprocess_ppg_data_io, scan_and_sync_segments
 from test_notebooks import compare_data
 
 
@@ -53,7 +53,7 @@ def compare_ppg_preprocessing(
     metadatas_ppg, metadatas_imu = scan_and_sync_segments(
         path_to_ppg_input, path_to_imu_input
     )
-    preprocess_ppg_data(
+    preprocess_ppg_data_io(
         metadatas_ppg[0], metadatas_imu[0], path_to_tested_output, ppg_config, imu_config
     )
     compare_data(path_to_reference_output, path_to_tested_output, binaries_pairs)