From 3eafaf7356853de689b98760c4e6448bf9f8d639 Mon Sep 17 00:00:00 2001 From: KarsVeldkamp <105375796+KarsVeldkamp@users.noreply.github.com> Date: Tue, 14 Jan 2025 07:33:09 +0100 Subject: [PATCH] Tutorial ppg (#106) * Start heart rate estimation including tfd * stash updates * update based on merge conflicts * update hr main function * update minor stuff matlab files * change tfd parameters to dict * First steps to refactoring to numpy * Finalizing heart rate pipeline plus renaming preprocessing to acc instead of imu * setting execution count to null * fix output types * fixing pytype due to old configs * fix min_hr_samples pytype error * remove irrelevant config * resolved comments PR reviewer * remove data type since it causes pytype error * clear execution count * import heart_rate_estimation * merge updates * update ppg_analysis * replace df to numpy arrays * fix notebook * adding accelerometer feature * fix type error list vs float * implement accelerometer feature * update matlab file based on refactoring * fixing types, docstrings and readibility * adding documentation acc feature * add documentation of autocorrelation * Made requested changes * fix config * adding aggregation heart rate * update aggregates * remove config * add start tutorial * aggregation implementation with dictionary * add lines for consistency * moved windoweddataextractor to segmenting * resolve merge conflict * update loading + preprocessing with seperate functionality * update tutorial notebook and corresponding notebook * Update test to io function * update tutorial and notebook with simplified loading of the data * Reset execution counts in notebook cells * update execution_counts and output * Updates from PR feedback * fix type error * made final feedback --------- Co-authored-by: Erikpostt Co-authored-by: Erik Post <57133568+Erikpostt@users.noreply.github.com> --- docs/notebooks/ppg/ppg_analysis.ipynb | 58 +- docs/tutorials/heart_rate_analysis.ipynb | 1278 +++++++++++++++++ .../heart_rate/heart_rate_analysis.py | 2 +- src/paradigma/preprocessing.py | 104 +- tests/test_ppg_analysis.py | 4 +- 5 files changed, 1397 insertions(+), 49 deletions(-) create mode 100644 docs/tutorials/heart_rate_analysis.ipynb diff --git a/docs/notebooks/ppg/ppg_analysis.ipynb b/docs/notebooks/ppg/ppg_analysis.ipynb index b01ea488..a96ed896 100644 --- a/docs/notebooks/ppg/ppg_analysis.ipynb +++ b/docs/notebooks/ppg/ppg_analysis.ipynb @@ -13,10 +13,14 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", + "import importlib.resources\n", + "import pickle\n", + "from pathlib import Path\n", + "\n", "from paradigma.config import PPGConfig, IMUConfig, SignalQualityFeatureExtractionConfig, SignalQualityFeatureExtractionAccConfig, SignalQualityClassificationConfig, HeartRateExtractionConfig\n", - "from paradigma.preprocessing import scan_and_sync_segments, preprocess_ppg_data\n", - "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features, signal_quality_classification, estimate_heart_rate, aggregate_heart_rate" + "from paradigma.preprocessing import preprocess_ppg_data\n", + "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features, signal_quality_classification, estimate_heart_rate, aggregate_heart_rate\n", + "from paradigma.util import load_tsdf_dataframe" ] }, { @@ -33,18 +37,42 @@ "branch = 'heart_rate'\n", "sensor = 'ppg'\n", "\n", - "path_to_data = '../../../tests/data'\n", - "path_to_classifier = os.path.join(path_to_data, '0.classification', sensor)\n", - "path_to_sensor_data = os.path.join(path_to_data, '1.prepared_data')\n", - "path_to_preprocessed_data = os.path.join(path_to_data, '2.preprocessed_data', sensor)\n", - "path_to_quality_features = os.path.join(path_to_data, '3.extracted_features', branch)\n", - "path_to_signal_quality = os.path.join(path_to_data, '4.predictions', branch)\n", - "path_to_hr_estimate = os.path.join(path_to_data, '5.quantification', branch)\n", - "path_to_hr_aggregation = os.path.join(path_to_data, '6.aggregation', branch)\n", + "ppg_prefix = 'PPG'\n", + "imu_prefix = 'IMU'\n", + "\n", + "ppg_classifier_package_filename = 'ppg_quality_classifier.pkl'\n", + "\n", + "path_to_data = Path('../../../tests/data')\n", + "path_to_assets = Path('../../../src/paradigma/assets')\n", + "\n", + "path_to_classifier = path_to_data / '0.Classification' / sensor\n", + "path_to_prepared_data = path_to_data / '1.prepared_data'\n", + "path_to_preprocessed_data = path_to_data / '2.preprocessed_data' / sensor\n", + "path_to_extracted_features = path_to_data / '3.extracted_features' / branch\n", + "path_to_predictions = path_to_data / '4.predictions' / branch\n", + "path_to_quantification = path_to_data / '5.quantification' / branch\n", + "path_to_aggregation = path_to_data / '6.aggregation' / branch\n", "\n", "aggregation_filename = 'heart_rate_aggregates.json'" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_ppg, _, _ = load_tsdf_dataframe(path_to_prepared_data / ppg_prefix, prefix = ppg_prefix)\n", + "df_imu, _, _ = load_tsdf_dataframe(path_to_prepared_data / imu_prefix, prefix = imu_prefix)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -61,8 +89,7 @@ "ppg_config = PPGConfig()\n", "imu_config = IMUConfig()\n", "\n", - "metadatas_ppg, metadatas_imu = scan_and_sync_segments(os.path.join(path_to_sensor_data, sensor), os.path.join(path_to_sensor_data, 'imu'))\n", - "df_ppg_proc, df_acc_proc = preprocess_ppg_data(metadatas_ppg[0], metadatas_imu[0], path_to_preprocessed_data, ppg_config, imu_config)" + "df_ppg_proc, df_acc_proc = preprocess_ppg_data(df_ppg, df_imu, ppg_config, imu_config)" ] }, { @@ -96,8 +123,11 @@ "metadata": {}, "outputs": [], "source": [ + "full_path_to_classifier_package = path_to_assets / ppg_classifier_package_filename\n", + "with importlib.resources.files('paradigma.assets').joinpath(ppg_classifier_package_filename).open('rb') as f:\n", + " clf_package = pickle.load(f)\n", "config = SignalQualityClassificationConfig()\n", - "df_sqa = signal_quality_classification(df_features, config, path_to_classifier)" + "df_sqa = signal_quality_classification(df_features, config, clf_package)" ] }, { diff --git a/docs/tutorials/heart_rate_analysis.ipynb b/docs/tutorials/heart_rate_analysis.ipynb new file mode 100644 index 00000000..1abd5a09 --- /dev/null +++ b/docs/tutorials/heart_rate_analysis.ipynb @@ -0,0 +1,1278 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Heart rate analysis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This tutorial shows how to extract heart rate estimates using photoplethysmography (PPG) data and accelerometer data. The pipeline consists of a stepwise approach to determine signal quality, assessing both PPG morphology and accounting for periodic artifacts using the accelerometer. Based on the signal quality, we extract high-quality segments and estimate the heart rate for every 2 s using the smoothed pseudo Wigner-Ville Distribution. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This pipeline requires accelerometer and PPG data to run. In this example we loaded data from a participant of the Personalized Parkinson Project. We load the corresponding dataframes using the `load_tsdf_dataframe function`. The channel `green` represents the values obtained with PPG using green light." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timegreen
00.00000649511
10.00996648214
20.01992646786
30.02988645334
40.03984644317
.........
64770644.54720553884
64771644.55716554088
64772644.56712554240
64773644.57708555134
64774644.58704557205
\n", + "

64775 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " time green\n", + "0 0.00000 649511\n", + "1 0.00996 648214\n", + "2 0.01992 646786\n", + "3 0.02988 645334\n", + "4 0.03984 644317\n", + "... ... ...\n", + "64770 644.54720 553884\n", + "64771 644.55716 554088\n", + "64772 644.56712 554240\n", + "64773 644.57708 555134\n", + "64774 644.58704 557205\n", + "\n", + "[64775 rows x 2 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timeaccelerometer_xaccelerometer_yaccelerometer_zgyroscope_xgyroscope_ygyroscope_z
00.000000.5507180.574163-0.273684-115.67073232.012195-26.097561
10.010040.5358850.623445-0.254545-110.60975734.634146-24.695122
20.020080.5043060.651675-0.251675-103.23170836.768293-22.926829
30.030120.4885170.686603-0.265550-96.28048838.719512-21.158537
40.040160.4942580.725359-0.278469-92.56097641.280488-20.304878
........................
72942730.744680.234928-0.516268-0.8028710.975610-2.2560982.256098
72943730.754720.245455-0.514354-0.8066990.304878-1.7073171.768293
72944730.764760.243541-0.511005-0.8071770.304878-1.5853661.890244
72945730.774800.240191-0.514354-0.8081340.000000-1.2804881.585366
72946730.784840.243541-0.511005-0.808134-0.060976-1.0365851.219512
\n", + "

72947 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " time accelerometer_x accelerometer_y accelerometer_z \\\n", + "0 0.00000 0.550718 0.574163 -0.273684 \n", + "1 0.01004 0.535885 0.623445 -0.254545 \n", + "2 0.02008 0.504306 0.651675 -0.251675 \n", + "3 0.03012 0.488517 0.686603 -0.265550 \n", + "4 0.04016 0.494258 0.725359 -0.278469 \n", + "... ... ... ... ... \n", + "72942 730.74468 0.234928 -0.516268 -0.802871 \n", + "72943 730.75472 0.245455 -0.514354 -0.806699 \n", + "72944 730.76476 0.243541 -0.511005 -0.807177 \n", + "72945 730.77480 0.240191 -0.514354 -0.808134 \n", + "72946 730.78484 0.243541 -0.511005 -0.808134 \n", + "\n", + " gyroscope_x gyroscope_y gyroscope_z \n", + "0 -115.670732 32.012195 -26.097561 \n", + "1 -110.609757 34.634146 -24.695122 \n", + "2 -103.231708 36.768293 -22.926829 \n", + "3 -96.280488 38.719512 -21.158537 \n", + "4 -92.560976 41.280488 -20.304878 \n", + "... ... ... ... \n", + "72942 0.975610 -2.256098 2.256098 \n", + "72943 0.304878 -1.707317 1.768293 \n", + "72944 0.304878 -1.585366 1.890244 \n", + "72945 0.000000 -1.280488 1.585366 \n", + "72946 -0.060976 -1.036585 1.219512 \n", + "\n", + "[72947 rows x 7 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from pathlib import Path\n", + "from paradigma.util import load_tsdf_dataframe\n", + "\n", + "path_to_data = Path('../../tests/data')\n", + "path_to_prepared_data = path_to_data / '1.prepared_data'\n", + "\n", + "ppg_prefix = 'PPG'\n", + "imu_prefix = 'IMU'\n", + "\n", + "df_ppg, _, _ = load_tsdf_dataframe(path_to_prepared_data / ppg_prefix, prefix = ppg_prefix)\n", + "df_imu, _, _ = load_tsdf_dataframe(path_to_prepared_data / imu_prefix, prefix = imu_prefix)\n", + "\n", + "display(df_ppg, df_imu)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Preprocess data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The first step after loading the data is preprocessing. This begins by isolating segments containing both PPG and IMU data, discarding portions where one modality (e.g., PPG) extends beyond the other, such as when the PPG recording is longer than the accelerometer data. After this step, the preprocess_ppg_data function resamples the PPG and accelerometer data to uniformly distributed timestamps, addressing the fixed but non-uniform sampling rates of the sensors. After this, a bandpass Butterworth filter (4th-order, bandpass frequencies: 0.4--3.5 Hz) is applied to the PPG signal, while a high-pass Butterworth filter (4th-order, cut-off frequency: 0.2 Hz) is applied to the accelerometer data.\n", + "\n", + "Note: the printed shapes are (rows, columns) with each row corresponding to a single data point and each column representing a data column (e.g.time). The number of rows of the overlapping segments of PPG and accelerometer are not exactly the same due to sampling differences (other sensors and possibly other sampling frequencies). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original data shapes:\n", + "- PPG data: (64775, 2)\n", + "- Accelerometer data: (72947, 4)\n", + "Overlapping data shapes:\n", + "- PPG data: (64775, 2)\n", + "- Accelerometer data: (64361, 4)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timegreen
00.000000-1434.856838
10.033333-3461.717789
20.066667-5172.913796
30.100000-6343.203160
40.133333-6875.904054
.........
19333644.433333-2403.547097
19334644.466667-7434.311944
19335644.500000-8214.847078
19336644.533333-5194.393329
19337644.56666770.147000
\n", + "

19338 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " time green\n", + "0 0.000000 -1434.856838\n", + "1 0.033333 -3461.717789\n", + "2 0.066667 -5172.913796\n", + "3 0.100000 -6343.203160\n", + "4 0.133333 -6875.904054\n", + "... ... ...\n", + "19333 644.433333 -2403.547097\n", + "19334 644.466667 -7434.311944\n", + "19335 644.500000 -8214.847078\n", + "19336 644.533333 -5194.393329\n", + "19337 644.566667 70.147000\n", + "\n", + "[19338 rows x 2 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timeaccelerometer_xgrav_accelerometer_xaccelerometer_ygrav_accelerometer_yaccelerometer_zgrav_accelerometer_z
00.000.0530780.4976390.0100400.564123-0.273154-0.000530
10.010.0383370.4976660.0588020.564510-0.2568990.002305
20.020.0068240.4976980.0865590.564887-0.2567390.005122
30.03-0.0091560.4977330.1208550.565254-0.2732800.007919
40.04-0.0037700.4977720.1593160.565610-0.2890070.010696
........................
64454644.540.1041850.8793770.1016270.200866-0.029342-0.239111
64455644.550.1204030.8793770.0904130.2008650.051113-0.239111
64456644.560.1012890.8793770.1286840.2008650.037951-0.239111
64457644.570.0731390.8793770.1373990.2008650.030834-0.239111
64458644.580.0045780.8793770.1028620.2008650.025627-0.239111
\n", + "

64459 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " time accelerometer_x grav_accelerometer_x accelerometer_y \\\n", + "0 0.00 0.053078 0.497639 0.010040 \n", + "1 0.01 0.038337 0.497666 0.058802 \n", + "2 0.02 0.006824 0.497698 0.086559 \n", + "3 0.03 -0.009156 0.497733 0.120855 \n", + "4 0.04 -0.003770 0.497772 0.159316 \n", + "... ... ... ... ... \n", + "64454 644.54 0.104185 0.879377 0.101627 \n", + "64455 644.55 0.120403 0.879377 0.090413 \n", + "64456 644.56 0.101289 0.879377 0.128684 \n", + "64457 644.57 0.073139 0.879377 0.137399 \n", + "64458 644.58 0.004578 0.879377 0.102862 \n", + "\n", + " grav_accelerometer_y accelerometer_z grav_accelerometer_z \n", + "0 0.564123 -0.273154 -0.000530 \n", + "1 0.564510 -0.256899 0.002305 \n", + "2 0.564887 -0.256739 0.005122 \n", + "3 0.565254 -0.273280 0.007919 \n", + "4 0.565610 -0.289007 0.010696 \n", + "... ... ... ... \n", + "64454 0.200866 -0.029342 -0.239111 \n", + "64455 0.200865 0.051113 -0.239111 \n", + "64456 0.200865 0.037951 -0.239111 \n", + "64457 0.200865 0.030834 -0.239111 \n", + "64458 0.200865 0.025627 -0.239111 \n", + "\n", + "[64459 rows x 7 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from paradigma.config import PPGConfig, IMUConfig\n", + "from paradigma.preprocessing import preprocess_ppg_data\n", + "\n", + "ppg_config = PPGConfig()\n", + "imu_config = IMUConfig()\n", + "\n", + "df_ppg_proc, df_acc_proc = preprocess_ppg_data(df_ppg, df_imu, ppg_config, imu_config)\n", + "\n", + "display(df_ppg_proc, df_acc_proc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Extract signal quality features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The preprocessed data (PPG & accelerometer) is windowed into overlapping windows of length `ppg_config.window_length_s` with a window step of `ppg_config.window_step_length_s`. From the PPG windows 10 time- and frequency domain features are extracted to assess PPG morphology and from the accelerometer windows one relative power feature is calculated to assess periodic motion artifacts. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The default window length for the signal quality feature extraction is set to 6 seconds.\n", + "The default step size for the signal quality feature extraction is set to 1 seconds.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timevarmeanmediankurtosisskewnesssignal_to_noiseauto_corrf_domrel_powerspectral_entropyacc_power_ratio
00.01.697510e+073418.6963922892.8151052.5404470.2772373.2416340.2824521.1718750.1799220.5611860.014196
11.01.635033e+073350.7691582892.8151052.6479510.4201633.2023130.2765261.1718750.2019720.5478120.015343
22.01.921248e+073698.9630993310.7735562.3336870.4819743.5218820.2109840.8203120.2649220.5090150.042583
33.01.646562e+073389.3257752946.5885262.6081870.7339133.3462360.2258230.7031250.2707070.4960510.034215
44.01.603135e+073364.3663132912.2807982.2687680.4403573.3145650.3059980.8203120.2640970.4966970.056204
.......................................
634634.02.281660e+061106.006037838.3077294.0580370.6741842.0726600.1720520.7031250.2040280.5775700.060486
635635.02.224441e+061113.194933838.3077293.6841590.0679792.1180190.2364080.7031250.2210440.5603620.053120
636636.05.103141e+061627.6189181030.1133033.9130680.8523312.0400940.2358220.5859380.1576670.5147170.058853
637637.07.428728e+062093.7360071520.6400673.1887530.3057802.4574460.3220340.4687500.1505820.4584770.061269
638638.01.549534e+072974.4625552482.7039444.042171-0.6280082.2287300.0959180.4687500.1029760.4938300.105554
\n", + "

639 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " time var mean median kurtosis skewness \\\n", + "0 0.0 1.697510e+07 3418.696392 2892.815105 2.540447 0.277237 \n", + "1 1.0 1.635033e+07 3350.769158 2892.815105 2.647951 0.420163 \n", + "2 2.0 1.921248e+07 3698.963099 3310.773556 2.333687 0.481974 \n", + "3 3.0 1.646562e+07 3389.325775 2946.588526 2.608187 0.733913 \n", + "4 4.0 1.603135e+07 3364.366313 2912.280798 2.268768 0.440357 \n", + ".. ... ... ... ... ... ... \n", + "634 634.0 2.281660e+06 1106.006037 838.307729 4.058037 0.674184 \n", + "635 635.0 2.224441e+06 1113.194933 838.307729 3.684159 0.067979 \n", + "636 636.0 5.103141e+06 1627.618918 1030.113303 3.913068 0.852331 \n", + "637 637.0 7.428728e+06 2093.736007 1520.640067 3.188753 0.305780 \n", + "638 638.0 1.549534e+07 2974.462555 2482.703944 4.042171 -0.628008 \n", + "\n", + " signal_to_noise auto_corr f_dom rel_power spectral_entropy \\\n", + "0 3.241634 0.282452 1.171875 0.179922 0.561186 \n", + "1 3.202313 0.276526 1.171875 0.201972 0.547812 \n", + "2 3.521882 0.210984 0.820312 0.264922 0.509015 \n", + "3 3.346236 0.225823 0.703125 0.270707 0.496051 \n", + "4 3.314565 0.305998 0.820312 0.264097 0.496697 \n", + ".. ... ... ... ... ... \n", + "634 2.072660 0.172052 0.703125 0.204028 0.577570 \n", + "635 2.118019 0.236408 0.703125 0.221044 0.560362 \n", + "636 2.040094 0.235822 0.585938 0.157667 0.514717 \n", + "637 2.457446 0.322034 0.468750 0.150582 0.458477 \n", + "638 2.228730 0.095918 0.468750 0.102976 0.493830 \n", + "\n", + " acc_power_ratio \n", + "0 0.014196 \n", + "1 0.015343 \n", + "2 0.042583 \n", + "3 0.034215 \n", + "4 0.056204 \n", + ".. ... \n", + "634 0.060486 \n", + "635 0.053120 \n", + "636 0.058853 \n", + "637 0.061269 \n", + "638 0.105554 \n", + "\n", + "[639 rows x 12 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from paradigma.config import SignalQualityFeatureExtractionConfig, SignalQualityFeatureExtractionAccConfig\n", + "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features\n", + "\n", + "ppg_config = SignalQualityFeatureExtractionConfig()\n", + "acc_config = SignalQualityFeatureExtractionAccConfig()\n", + "\n", + "print(\"The default window length for the signal quality feature extraction is set to\", ppg_config.window_length_s, \"seconds.\")\n", + "print(\"The default step size for the signal quality feature extraction is set to\", ppg_config.window_step_length_s, \"seconds.\")\n", + "\n", + "df_features = extract_signal_quality_features(ppg_config, df_ppg_proc, acc_config, df_acc_proc)\n", + "\n", + "df_features\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Signal quality classification" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A trained logistic classifier is used to predict PPG signal quality and returns the `pred_sqa_proba`, which is the posterior probability of a PPG window to look like the typical PPG morphology (higher probability indicates toward the typical PPG morphology). The relative power feature from the accelerometer is compared to a threshold for periodic artifacts and therefore `pred_sqa_acc_label` is used to return a label indicating predicted periodic motion artifacts (label 0) or no periodic motion artifacts (label 1). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pred_sqa_probapred_sqa_acc_label
00.0060311
10.0117251
20.0680631
30.0808081
40.0745911
.........
6340.0013871
6350.0015701
6360.0003801
6370.0004061
6380.0000041
\n", + "

639 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " pred_sqa_proba pred_sqa_acc_label\n", + "0 0.006031 1\n", + "1 0.011725 1\n", + "2 0.068063 1\n", + "3 0.080808 1\n", + "4 0.074591 1\n", + ".. ... ...\n", + "634 0.001387 1\n", + "635 0.001570 1\n", + "636 0.000380 1\n", + "637 0.000406 1\n", + "638 0.000004 1\n", + "\n", + "[639 rows x 2 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from paradigma.config import SignalQualityClassificationConfig\n", + "from paradigma.heart_rate.heart_rate_analysis import signal_quality_classification\n", + "\n", + "path_to_classifier = Path('../../tests/data/0.Classification/ppg')\n", + "config = SignalQualityClassificationConfig()\n", + "\n", + "df_sqa = signal_quality_classification(df_features, config, path_to_classifier)\n", + "\n", + "df_sqa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Heart rate estimation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For heart rate estimation, we extract segments of `config.tfd_length`. We calculate the smoothed-pseudo Wigner-Ville Distribution (SPWVD) to obtain the frequency content of the PPG signal over time. We extract for every timestamp in the SPWVD the frequency with the highest power. For every non-overlapping 2 s window we average the corresponding frequencies to obtain a heart rate per window. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The default minimal window length for the heart rate extraction is set to 10 seconds.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
timeheart_rate
056.086.404715
158.086.640472
260.086.345776
362.084.872299
464.084.872299
566.084.194499
\n", + "
" + ], + "text/plain": [ + " time heart_rate\n", + "0 56.0 86.404715\n", + "1 58.0 86.640472\n", + "2 60.0 86.345776\n", + "3 62.0 84.872299\n", + "4 64.0 84.872299\n", + "5 66.0 84.194499" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from paradigma.config import HeartRateExtractionConfig\n", + "from paradigma.heart_rate.heart_rate_analysis import estimate_heart_rate\n", + "\n", + "config = HeartRateExtractionConfig()\n", + "\n", + "print(\"The default minimal window length for the heart rate extraction is set to\", config.tfd_length, \"seconds.\")\n", + "\n", + "df_hr = estimate_heart_rate(df_sqa, df_ppg_proc, config)\n", + "\n", + "df_hr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5: Heart rate aggregation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The final step is to aggregate all 2 s heart rate estimates. In the current example, the mode and 99th percentile are calculated. We hypothesize that the mode gives representation of the resting heart rate while the 99th percentile indicates the maximum heart rate. In Parkinson's disease, we expect that these two measures could reflect autonomic (dys)functioning. The `nr_hr_est` in the metadata indicates based on how many 2 s windows these aggregates are determined." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'hr_aggregates': {'99p_heart_rate': 86.62868369351669,\n", + " 'mode_heart_rate': 84.8722986247544},\n", + " 'metadata': {'nr_hr_est': 6}}\n" + ] + } + ], + "source": [ + "import pprint\n", + "from paradigma.heart_rate.heart_rate_analysis import aggregate_heart_rate\n", + "hr_values = df_hr['heart_rate'].values\n", + "df_hr_agg = aggregate_heart_rate(hr_values, aggregates = ['mode', '99p'])\n", + "\n", + "pprint.pprint(df_hr_agg)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "paradigma-8ps1bg0Z-py3.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/paradigma/heart_rate/heart_rate_analysis.py b/src/paradigma/heart_rate/heart_rate_analysis.py index ac9e9449..baee092c 100644 --- a/src/paradigma/heart_rate/heart_rate_analysis.py +++ b/src/paradigma/heart_rate/heart_rate_analysis.py @@ -249,7 +249,7 @@ def estimate_heart_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, t_hr_rel[hr_pos:hr_pos + n_hr] = hr_time hr_pos += n_hr - df_hr = pd.DataFrame({"rel_time": t_hr_rel, "heart_rate": v_hr_rel}) + df_hr = pd.DataFrame({"time": t_hr_rel, "heart_rate": v_hr_rel}) return df_hr diff --git a/src/paradigma/preprocessing.py b/src/paradigma/preprocessing.py index 4513129d..690739c9 100644 --- a/src/paradigma/preprocessing.py +++ b/src/paradigma/preprocessing.py @@ -244,7 +244,22 @@ def preprocess_imu_data_io(path_to_input: str | Path, path_to_output: str | Path write_df_data(metadata_time, metadata_values, path_to_output, f'{sensor}_meta.json', df_sensor) -def scan_and_sync_segments(input_path_ppg, input_path_imu): +def scan_and_sync_segments(input_path_ppg: str | Path, input_path_imu: str | Path) -> Tuple[List[tsdf.TSDFMetadata], List[tsdf.TSDFMetadata]]: + """ + Scan for available TSDF metadata files in the specified directories and synchronize the data segments based on the metadata start and end times. + + Parameters + ---------- + input_path_ppg : str + Path to the directory containing PPG data. + input_path_imu : str + Path to the directory containing IMU data. + + Returns + ------- + Tuple[List[tsdf.TSDFMetadata], List[tsdf.TSDFMetadata]] + A tuple containing lists of metadata objects for PPG and IMU data, respectively. + """ # Scan for available TSDF metadata files meta_ppg = extract_meta_from_tsdf_files(input_path_ppg) @@ -262,11 +277,10 @@ def scan_and_sync_segments(input_path_ppg, input_path_imu): return metadatas_ppg, metadatas_imu -def preprocess_ppg_data(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TSDFMetadata, - output_path: Union[str, Path], ppg_config: PPGConfig, - imu_config: IMUConfig, store_locally:bool = True) -> Tuple[pd.DataFrame, pd.DataFrame]: +def preprocess_ppg_data(df_ppg: pd.DataFrame, df_imu: pd.DataFrame, ppg_config: PPGConfig, + imu_config: IMUConfig) -> Tuple[pd.DataFrame, pd.DataFrame]: """ - Preprocess PPG and IMU data by resampling, filtering, and aligning the data segments. + Preprocess PPG and IMU (accelerometer only) data by resampling, filtering, and aligning the data segments. Parameters ---------- @@ -289,24 +303,15 @@ def preprocess_ppg_data(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TS Preprocessed PPG and IMU data as DataFrames. """ - # Load PPG data - metadata_time_ppg = tsdf_meta_ppg[ppg_config.time_filename] - metadata_values_ppg = tsdf_meta_ppg[ppg_config.values_filename] - df_ppg = tsdf.load_dataframe_from_binaries([metadata_time_ppg, metadata_values_ppg], tsdf.constants.ConcatenationType.columns) - - # Load IMU data - metadata_time_imu = tsdf_meta_imu[imu_config.time_filename] - metadata_values_imu = tsdf_meta_imu[imu_config.values_filename] - df_imu = tsdf.load_dataframe_from_binaries([metadata_time_imu, metadata_values_imu], tsdf.constants.ConcatenationType.columns) # Drop the gyroscope columns from the IMU data cols_to_drop = df_imu.filter(regex='^gyroscope_').columns df_acc = df_imu.drop(cols_to_drop, axis=1) # Extract overlapping segments - print("Shape of the original data:", df_ppg.shape, df_acc.shape) + print(f"Original data shapes:\n- PPG data: {df_ppg.shape}\n- Accelerometer data: {df_acc.shape}") df_ppg_overlapping, df_acc_overlapping = extract_overlapping_segments(df_ppg, df_acc) - print("Shape of the overlapping segments:", df_ppg_overlapping.shape, df_acc_overlapping.shape) + print(f"Overlapping data shapes:\n- PPG data: {df_ppg_overlapping.shape}\n- Accelerometer data: {df_acc_overlapping.shape}") # Resample accelerometer data df_acc_proc = resample_data( @@ -350,25 +355,60 @@ def preprocess_ppg_data(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TS df_ppg_proc = df_ppg_proc.drop(columns=[col]) df_ppg_proc = df_ppg_proc.rename(columns={f'filt_{col}': col}) - - if store_locally: - # Store data - metadata_values_imu.channels = list(imu_config.d_channels_accelerometer.keys()) - metadata_values_imu.units = list(imu_config.d_channels_accelerometer.values()) - metadata_values_imu.file_name = 'accelerometer_values.bin' - metadata_time_imu.units = [TimeUnit.ABSOLUTE_MS] - metadata_time_imu.file_name = 'accelerometer_time.bin' - write_df_data(metadata_time_imu, metadata_values_imu, output_path, 'accelerometer_meta.json', df_acc_proc) - - metadata_values_ppg.channels = list(ppg_config.d_channels_ppg.keys()) - metadata_values_ppg.units = list(ppg_config.d_channels_ppg.values()) - metadata_values_ppg.file_name = 'PPG_values.bin' - metadata_time_ppg.units = [TimeUnit.ABSOLUTE_MS] - metadata_time_ppg.file_name = 'PPG_time.bin' - write_df_data(metadata_time_ppg, metadata_values_ppg, output_path, 'PPG_meta.json', df_ppg_proc) return df_ppg_proc, df_acc_proc +def preprocess_ppg_data_io(tsdf_meta_ppg: tsdf.TSDFMetadata, tsdf_meta_imu: tsdf.TSDFMetadata, + output_path: Union[str, Path], ppg_config: PPGConfig, + imu_config: IMUConfig) -> None: + """ + Preprocess PPG and IMU data by resampling, filtering, and aligning the data segments. + + Parameters + ---------- + tsdf_meta_ppg : tsdf.TSDFMetadata + Metadata for the PPG data. + tsdf_meta_imu : tsdf.TSDFMetadata + Metadata for the IMU data. + output_path : Union[str, Path] + Path to store the preprocessed data. + ppg_config : PPGConfig + Configuration object for PPG preprocessing. + imu_config : IMUConfig + Configuration object for IMU preprocessing. + + Returns + ------- + None + """ + + # Load PPG data + metadata_time_ppg = tsdf_meta_ppg[ppg_config.time_filename] + metadata_values_ppg = tsdf_meta_ppg[ppg_config.values_filename] + df_ppg = tsdf.load_dataframe_from_binaries([metadata_time_ppg, metadata_values_ppg], tsdf.constants.ConcatenationType.columns) + + # Load IMU data + metadata_time_imu = tsdf_meta_imu[imu_config.time_filename] + metadata_values_imu = tsdf_meta_imu[imu_config.values_filename] + df_imu = tsdf.load_dataframe_from_binaries([metadata_time_imu, metadata_values_imu], tsdf.constants.ConcatenationType.columns) + + # Preprocess data + df_ppg_proc, df_acc_proc = preprocess_ppg_data(df_ppg=df_ppg, df_imu=df_imu, ppg_config=ppg_config, imu_config=imu_config) + + # Store data + metadata_values_imu.channels = list(imu_config.d_channels_accelerometer.keys()) + metadata_values_imu.units = list(imu_config.d_channels_accelerometer.values()) + metadata_values_imu.file_name = 'accelerometer_values.bin' + metadata_time_imu.units = [TimeUnit.ABSOLUTE_MS] + metadata_time_imu.file_name = 'accelerometer_time.bin' + write_df_data(metadata_time_imu, metadata_values_imu, output_path, 'accelerometer_meta.json', df_acc_proc) + + metadata_values_ppg.channels = list(ppg_config.d_channels_ppg.keys()) + metadata_values_ppg.units = list(ppg_config.d_channels_ppg.values()) + metadata_values_ppg.file_name = 'PPG_values.bin' + metadata_time_ppg.units = [TimeUnit.ABSOLUTE_MS] + metadata_time_ppg.file_name = 'PPG_time.bin' + write_df_data(metadata_time_ppg, metadata_values_ppg, output_path, 'PPG_meta.json', df_ppg_proc) # TODO: ideally something like this should be possible directly in the tsdf library def extract_meta_from_tsdf_files(tsdf_data_dir : str) -> List[dict]: diff --git a/tests/test_ppg_analysis.py b/tests/test_ppg_analysis.py index 4b493884..4fb6430b 100644 --- a/tests/test_ppg_analysis.py +++ b/tests/test_ppg_analysis.py @@ -2,7 +2,7 @@ from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features from paradigma.config import PPGConfig, IMUConfig, SignalQualityFeatureExtractionConfig -from paradigma.preprocessing import preprocess_ppg_data, scan_and_sync_segments +from paradigma.preprocessing import preprocess_ppg_data_io, scan_and_sync_segments from test_notebooks import compare_data @@ -53,7 +53,7 @@ def compare_ppg_preprocessing( metadatas_ppg, metadatas_imu = scan_and_sync_segments( path_to_ppg_input, path_to_imu_input ) - preprocess_ppg_data( + preprocess_ppg_data_io( metadatas_ppg[0], metadatas_imu[0], path_to_tested_output, ppg_config, imu_config ) compare_data(path_to_reference_output, path_to_tested_output, binaries_pairs)