diff --git a/docs/guides/config.md b/docs/guides/config.md new file mode 100644 index 00000000..1981e850 --- /dev/null +++ b/docs/guides/config.md @@ -0,0 +1,11 @@ +# Config +Throughout the ParaDigMa toolbox we use configuration objects `Config` to specificy parameters used as input of processes. All configuration classes care defined in `config.py`, and can be imported using `from paradigma.config import X`. The configuration classes frequently use static column names defined in `constants.py` to ensure robustness and consistency. + +## Config classes +The config classes are defined either for sensors (IMU, PPG) or for domains (gait, heart rate, tremor). + +### Sensor configs +There are two sensor config classes: `IMUConfig` and `PPGConfig`. + +### Domain configs +For the latter category, config classes can further be distinguished by the processing steps displayed [here](https://github.com/biomarkersParkinson/paradigma). \ No newline at end of file diff --git a/docs/guides/coordinate_system.md b/docs/guides/coordinate_system.md new file mode 100644 index 00000000..d015afdf --- /dev/null +++ b/docs/guides/coordinate_system.md @@ -0,0 +1,18 @@ +# Coordinate System +As a prerequisite to reliably estimating gait and arm swing measures, it is important to align the coordinate system of the IMU sensor with the coordinate system of the trained classifiers. For the tremor and heart rate pipelines, differences between coordinate systems do not affect outcomes. + +## Coordinate system used +The coordinate system of the IMU sensor used for training the classifiers can be observed below. The direction of acceleration is indicated by arrows, and the direction of gyroscope rotation can be determined using [Ampère's right-hand grip rule](https://en.wikipedia.org/wiki/Right-hand_rule#Amp%C3%A8re's_right-hand_grip_rule) applied to the accelerometer axes. + +

+ +

+ +### Accelerometer +The three accelerometer axes are set such that the x-axis is aligned with the arm pointing toward the hand, the y-axis is perpendicular to the arm pointing upward from the top of the sensor, and the z-axis points away from the arm and body. If the arrow representing a specific axis is pointing downward to the ground, parallel to and in the direction of the arrow representing gravity (1G), the acceleration of this specific axis is equal to -1g if the sensor is stable (i.e., no acceleration due to movement). + +### Gyroscope +If the sensor is rotating in the direction of the arrow (deducted using the [Ampère's right-hand grip rule](https://en.wikipedia.org/wiki/Right-hand_rule#Amp%C3%A8re's_right-hand_grip_rule) applied to the accelerometer axes), the gyroscope data will be positive. + +## Lateral differences +Wearing the watch on the left wrist or right wrist influences the relation between movements and the coordinate system. In fact, the x-axis of the accelerometer, and the y-axis and z-axis of the gyroscope, are inverted. For this purpose, we have added `invert_watch_side` to the toolbox (which can be imported using `from paradigma.util import invert_watch_side`). First, ensure the coordinate system aligns with the coordinate system shown above. Do this for all participants wearing the watch on one specific side, for example the left wrist. Then, apply `invert_watch_side` to ensure individuals wearing the watch on the right wrist have the correct coordinate system accounting for differences in sensor orientation. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index babd3d1f..604f6618 100755 --- a/docs/index.md +++ b/docs/index.md @@ -1,30 +1,34 @@ -```{include} ../README.md +```{toctree} +:maxdepth: 2 +:caption: Tutorials +tutorials/data_preparation.ipynb ``` ```{toctree} :maxdepth: 2 -:caption: Example notebooks +:caption: Template Notebooks notebooks/gait/gait_analysis.ipynb notebooks/ppg/ppg_analysis.ipynb ``` +```{toctree} +:maxdepth: 2 +:caption: User Guides +guides/coordinate_system.md +``` + ```{toctree} :maxdepth: 2 :caption: API autoapi/index ``` -```{toctree} + ```{toctree} -:maxdepth: 2 -:caption: Development -changelog.md -contributing.md -conduct.md -``` +:maxdepth: 2 \ No newline at end of file diff --git a/docs/notebooks/ppg/ppg_analysis.ipynb b/docs/notebooks/ppg/ppg_analysis.ipynb index 3d91b8fa..b01ea488 100644 --- a/docs/notebooks/ppg/ppg_analysis.ipynb +++ b/docs/notebooks/ppg/ppg_analysis.ipynb @@ -16,7 +16,7 @@ "import os\n", "from paradigma.config import PPGConfig, IMUConfig, SignalQualityFeatureExtractionConfig, SignalQualityFeatureExtractionAccConfig, SignalQualityClassificationConfig, HeartRateExtractionConfig\n", "from paradigma.preprocessing import scan_and_sync_segments, preprocess_ppg_data\n", - "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features_io, extract_signal_quality_features, signal_quality_classification, estimate_heart_rate" + "from paradigma.heart_rate.heart_rate_analysis import extract_signal_quality_features, signal_quality_classification, estimate_heart_rate, aggregate_heart_rate" ] }, { @@ -37,9 +37,12 @@ "path_to_classifier = os.path.join(path_to_data, '0.classification', sensor)\n", "path_to_sensor_data = os.path.join(path_to_data, '1.prepared_data')\n", "path_to_preprocessed_data = os.path.join(path_to_data, '2.preprocessed_data', sensor)\n", - "path_to_quality_features = os.path.join(path_to_data, '3.extracted_features', sensor)\n", - "path_to_signal_quality = os.path.join(path_to_data, '4.predictions', sensor)\n", - "path_to_hr_estimate = os.path.join(path_to_data, '5.quantification', sensor)" + "path_to_quality_features = os.path.join(path_to_data, '3.extracted_features', branch)\n", + "path_to_signal_quality = os.path.join(path_to_data, '4.predictions', branch)\n", + "path_to_hr_estimate = os.path.join(path_to_data, '5.quantification', branch)\n", + "path_to_hr_aggregation = os.path.join(path_to_data, '6.aggregation', branch)\n", + "\n", + "aggregation_filename = 'heart_rate_aggregates.json'" ] }, { @@ -113,6 +116,23 @@ "config = HeartRateExtractionConfig()\n", "df_hr = estimate_heart_rate(df_sqa, df_ppg_proc, config)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Heart rate aggregation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hr_values = df_hr['heart_rate'].values\n", + "df_hr_agg = aggregate_heart_rate(hr_values, aggregates = ['mode', '99p'])" + ] } ], "metadata": { diff --git a/docs/source/_static/img/directions_axes.png b/docs/source/_static/img/directions_axes.png new file mode 100644 index 00000000..f7ee19dd Binary files /dev/null and b/docs/source/_static/img/directions_axes.png differ diff --git a/docs/source/_static/img/pipeline-architecture.png b/docs/source/_static/img/pipeline-architecture.png index 30a0f299..1f09fbf7 100644 Binary files a/docs/source/_static/img/pipeline-architecture.png and b/docs/source/_static/img/pipeline-architecture.png differ diff --git a/docs/tutorials/data_preparation.ipynb b/docs/tutorials/data_preparation.ipynb index a3ac120f..561c66d6 100644 --- a/docs/tutorials/data_preparation.ipynb +++ b/docs/tutorials/data_preparation.ipynb @@ -13,7 +13,7 @@ "metadata": {}, "source": [ "## Load data\n", - "This example uses data of the Personalized Parkinson Project, which is stored in Time Series Data Format (TSDF). IMU and PPG data are sampled at a different sampling frequency and hence stored separately. " + "This example uses data of the Personalized Parkinson Project, which is stored in Time Series Data Format (TSDF). IMU and PPG data are sampled at a different sampling frequency and therefore stored separately. " ] }, { @@ -54,6 +54,13 @@ "df_ppg.head(5)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The timestamps in this dataset correspond to delta milliseconds, and the data is not uniformly distributed as can be observed." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -117,7 +124,8 @@ "metadata": {}, "source": [ "#### Set sensor values to the correct units\n", - "First, TSDF stores the data efficiently using scaling factors. We should therefore convert the sensor values back to the true values. " + "TSDF stores the data efficiently using scaling factors. We should therefore convert the sensor values back to the true values. This is only relevant if you use TSDF and scaled the data for storage purposes.\n", + "\n" ] }, { @@ -171,7 +179,7 @@ "metadata": {}, "source": [ "#### Account for watch side\n", - "For the Gait & Arm Swing pipeline, it is essential to ensure correct sensor axes orientation. For more information please read [X]. If the sensors are not correctly aligned, you can use `invert_watch_side` to ensure consistency between sensors worn on the left or right wrist." + "For the Gait & Arm Swing pipeline, it is essential to ensure correct sensor axes orientation. For more information please read [Coordinate System](../guides/coordinate_system.md) and set the axes of the data accordingly." ] }, { @@ -189,6 +197,7 @@ "df_imu[DataColumns.GYROSCOPE_Y] *= -1\n", "df_imu[DataColumns.GYROSCOPE_Z] *= -1\n", "\n", + "# Invert the axes when worn on the right-hand side\n", "df_imu = invert_watch_side(df_imu, watch_side)\n", "df_imu.head(5)" ] @@ -198,7 +207,7 @@ "metadata": {}, "source": [ "#### Change time column\n", - "ParaDigMa expects the data to be in seconds relative to the first row. The toolbox has the built-in function `transform_time_array` to help users transform their time column to the correct format." + "ParaDigMa expects the data to be in seconds relative to the first row, which should be equal to 0. The toolbox has the built-in function `transform_time_array` to help users transform their time column to the correct format." ] }, { diff --git a/src/paradigma/constants.py b/src/paradigma/constants.py index 69961d9e..d5c0ffbf 100644 --- a/src/paradigma/constants.py +++ b/src/paradigma/constants.py @@ -57,6 +57,9 @@ class DataColumns(): PRED_SQA_PROBA: str = "pred_sqa_proba" PRED_SQA_ACC_LABEL: str = "pred_sqa_acc_label" PRED_SQA: str = "pred_sqa" + + # Constants for heart rate + HEART_RATE: str = "heart_rate" @dataclass(frozen=True) class DataUnits(): diff --git a/src/paradigma/gait/gait_analysis.py b/src/paradigma/gait/gait_analysis.py index 6329e6cd..bfa75dc0 100644 --- a/src/paradigma/gait/gait_analysis.py +++ b/src/paradigma/gait/gait_analysis.py @@ -14,8 +14,8 @@ from paradigma.gait.feature_extraction import extract_temporal_domain_features, \ extract_spectral_domain_features, pca_transform_gyroscope, compute_angle, remove_moving_average_angle, \ extract_angle_extremes, compute_range_of_motion, compute_peak_angular_velocity -from paradigma.segmenting import tabulate_windows, create_segments, discard_segments, categorize_segments -from paradigma.util import get_end_iso8601, write_df_data, read_metadata, aggregate_parameter, WindowedDataExtractor +from paradigma.segmenting import tabulate_windows, create_segments, discard_segments, categorize_segments, WindowedDataExtractor +from paradigma.util import get_end_iso8601, write_df_data, read_metadata, aggregate_parameter def extract_gait_features( diff --git a/src/paradigma/heart_rate/heart_rate_analysis.py b/src/paradigma/heart_rate/heart_rate_analysis.py index 4fddbccc..ac9e9449 100644 --- a/src/paradigma/heart_rate/heart_rate_analysis.py +++ b/src/paradigma/heart_rate/heart_rate_analysis.py @@ -3,17 +3,18 @@ import pandas as pd import os import numpy as np +import json +from typing import List import tsdf from paradigma.constants import DataColumns from paradigma.config import SignalQualityFeatureExtractionConfig, SignalQualityFeatureExtractionAccConfig, SignalQualityClassificationConfig, \ - HeartRateExtractionConfig, HeartRateExtractionConfig + HeartRateExtractionConfig from paradigma.heart_rate.feature_extraction import extract_temporal_domain_features, extract_spectral_domain_features, extract_accelerometer_feature from paradigma.heart_rate.heart_rate_estimation import assign_sqa_label, extract_hr_segments, extract_hr_from_segment -from paradigma.segmenting import tabulate_windows - -from paradigma.util import read_metadata, WindowedDataExtractor +from paradigma.segmenting import tabulate_windows, WindowedDataExtractor +from paradigma.util import read_metadata, aggregate_parameter def extract_signal_quality_features(config_ppg: SignalQualityFeatureExtractionConfig, df_ppg: pd.DataFrame, config_acc: SignalQualityFeatureExtractionAccConfig, df_acc: pd.DataFrame) -> pd.DataFrame: """ @@ -251,3 +252,63 @@ def estimate_heart_rate(df_sqa: pd.DataFrame, df_ppg_preprocessed: pd.DataFrame, df_hr = pd.DataFrame({"rel_time": t_hr_rel, "heart_rate": v_hr_rel}) return df_hr + + +def aggregate_heart_rate(hr_values: np.ndarray, aggregates: List[str] = ['mode', '99p']) -> dict: + """ + Aggregate the heart rate estimates using the specified aggregation methods. + + Parameters + ---------- + hr_values : np.ndarray + The array containing the heart rate estimates + aggregates : List[str] + The list of aggregation methods to be used for the heart rate estimates. The default is ['mode', '99p']. + + Returns + ------- + aggregated_results : dict + The dictionary containing the aggregated results of the heart rate estimates. + """ + # Initialize the dictionary for the aggregated results + aggregated_results = {} + + # Initialize the dictionary for the aggregated results with the metadata + aggregated_results = { + 'metadata': { + 'nr_hr_est': len(hr_values) + }, + 'hr_aggregates': {} +} + for aggregate in aggregates: + aggregated_results['hr_aggregates'][f'{aggregate}_{DataColumns.HEART_RATE}'] = aggregate_parameter(hr_values, aggregate) + + return aggregated_results + + +def aggregate_heart_rate_io(full_path_to_input: Union[str, Path], full_path_to_output: Union[str, Path], aggregates: List[str] = ['mode', '99p']) -> None: + """ + Extract heart rate from the PPG signal and save the aggregated heart rate estimates to a file. + + Parameters + ---------- + input_path : Union[str, Path] + The path to the directory containing the heart rate estimates. + output_path : Union[str, Path] + The path to the directory where the aggregated heart rate estimates will be saved. + aggregates : List[str] + The list of aggregation methods to be used for the heart rate estimates. The default is ['mode', '99p']. + + """ + + # Load the heart rate estimates + with open(full_path_to_input, 'r') as f: + df_hr = json.load(f) + + # Aggregate the heart rate estimates + hr_values = df_hr['heart_rate'].values + df_hr_aggregates = aggregate_heart_rate(hr_values, aggregates) + + # Save the aggregated heart rate estimates + with open(full_path_to_output, 'w') as json_file: + json.dump(df_hr_aggregates, json_file, indent=4) \ No newline at end of file diff --git a/src/paradigma/segmenting.py b/src/paradigma/segmenting.py index 6bdda755..80982fd9 100644 --- a/src/paradigma/segmenting.py +++ b/src/paradigma/segmenting.py @@ -4,7 +4,6 @@ from typing import List from paradigma.constants import DataColumns - import numpy as np def tabulate_windows( @@ -310,3 +309,88 @@ def categorize(segment_size): # Apply categorization to the DataFrame return df[DataColumns.SEGMENT_NR].map(segment_sizes).map(categorize).astype('category') + +class WindowedDataExtractor: + """ + A utility class for extracting specific column indices and slices + from a list of windowed column names. + + Attributes + ---------- + column_indices : dict + A dictionary mapping column names to their indices. + + Methods + ------- + get_index(col) + Returns the index of a specific column. + get_slice(cols) + Returns a slice object for a range of consecutive columns. + """ + + def __init__(self, windowed_cols): + """ + Initialize the WindowedDataExtractor. + + Parameters + ---------- + windowed_cols : list of str + A list of column names in the windowed data. + + Raises + ------ + ValueError + If the list of `windowed_cols` is empty. + """ + if not windowed_cols: + raise ValueError("The list of windowed columns cannot be empty.") + self.column_indices = {col: idx for idx, col in enumerate(windowed_cols)} + + def get_index(self, col): + """ + Get the index of a specific column. + + Parameters + ---------- + col : str + The name of the column to retrieve the index for. + + Returns + ------- + int + The index of the specified column. + + Raises + ------ + ValueError + If the column is not found in the `windowed_cols` list. + """ + if col not in self.column_indices: + raise ValueError(f"Column '{col}' not found in windowed_cols.") + return self.column_indices[col] + + def get_slice(self, cols): + """ + Get a slice object for a range of consecutive columns. + + Parameters + ---------- + cols : list of str + A list of consecutive column names to define the slice. + + Returns + ------- + slice + A slice object spanning the indices of the given columns. + + Raises + ------ + ValueError + If one or more columns in `cols` are not found in the `windowed_cols` list. + """ + if not all(col in self.column_indices for col in cols): + missing = [col for col in cols if col not in self.column_indices] + raise ValueError(f"The following columns are missing from windowed_cols: {missing}") + start_idx = self.column_indices[cols[0]] + end_idx = self.column_indices[cols[-1]] + 1 + return slice(start_idx, end_idx) \ No newline at end of file diff --git a/src/paradigma/tremor/tremor_analysis.py b/src/paradigma/tremor/tremor_analysis.py index ea1a2532..fbfd635e 100644 --- a/src/paradigma/tremor/tremor_analysis.py +++ b/src/paradigma/tremor/tremor_analysis.py @@ -11,8 +11,8 @@ from paradigma.constants import DataColumns from paradigma.config import TremorFeatureExtractionConfig, TremorDetectionConfig, TremorAggregationConfig from paradigma.tremor.feature_extraction import extract_spectral_domain_features -from paradigma.segmenting import tabulate_windows -from paradigma.util import get_end_iso8601, write_df_data, read_metadata, aggregate_parameter, WindowedDataExtractor +from paradigma.segmenting import tabulate_windows, WindowedDataExtractor +from paradigma.util import get_end_iso8601, write_df_data, read_metadata, aggregate_parameter def extract_tremor_features(df: pd.DataFrame, config: TremorFeatureExtractionConfig) -> pd.DataFrame: diff --git a/src/paradigma/util.py b/src/paradigma/util.py index 1423e9e0..0ccf1704 100644 --- a/src/paradigma/util.py +++ b/src/paradigma/util.py @@ -161,81 +161,6 @@ def load_metadata_list( return metadata_list -class WindowedDataExtractor: - """ - A utility class for extracting specific column indices and slices - from a list of windowed column names. - Attributes - ---------- - column_indices : dict - A dictionary mapping column names to their indices. - Methods - ------- - get_index(col) - Returns the index of a specific column. - get_slice(cols) - Returns a slice object for a range of consecutive columns. - """ - - def __init__(self, windowed_cols): - """ - Initialize the WindowedDataExtractor. - Parameters - ---------- - windowed_cols : list of str - A list of column names in the windowed data. - Raises - ------ - ValueError - If the list of `windowed_cols` is empty. - """ - if not windowed_cols: - raise ValueError("The list of windowed columns cannot be empty.") - self.column_indices = {col: idx for idx, col in enumerate(windowed_cols)} - - def get_index(self, col): - """ - Get the index of a specific column. - Parameters - ---------- - col : str - The name of the column to retrieve the index for. - Returns - ------- - int - The index of the specified column. - Raises - ------ - ValueError - If the column is not found in the `windowed_cols` list. - """ - if col not in self.column_indices: - raise ValueError(f"Column '{col}' not found in windowed_cols.") - return self.column_indices[col] - - def get_slice(self, cols): - """ - Get a slice object for a range of consecutive columns. - Parameters - ---------- - cols : list of str - A list of consecutive column names to define the slice. - Returns - ------- - slice - A slice object spanning the indices of the given columns. - Raises - ------ - ValueError - If one or more columns in `cols` are not found in the `windowed_cols` list. - """ - if not all(col in self.column_indices for col in cols): - missing = [col for col in cols if col not in self.column_indices] - raise ValueError(f"The following columns are missing from windowed_cols: {missing}") - start_idx = self.column_indices[cols[0]] - end_idx = self.column_indices[cols[-1]] + 1 - return slice(start_idx, end_idx) -# TODO: ideally something like this should be possible directly in the tsdf library def extract_meta_from_tsdf_files(tsdf_data_dir : str) -> List[dict]: """ For each given TSDF directory, transcribe TSDF metadata contents to a list of dictionaries. @@ -438,8 +363,11 @@ def aggregate_parameter(parameter: np.ndarray, aggregate: str) -> np.ndarray: """ if aggregate == 'mean': return np.mean(parameter) - if aggregate == 'median': + elif aggregate == 'median': return np.median(parameter) + elif aggregate == 'mode': + unique_values, counts = np.unique(parameter, return_counts=True) + return unique_values[np.argmax(counts)] elif aggregate == '90p': return np.percentile(parameter, 90) elif aggregate == '95p': @@ -450,88 +378,3 @@ def aggregate_parameter(parameter: np.ndarray, aggregate: str) -> np.ndarray: return np.std(parameter) else: raise ValueError(f"Invalid aggregation method: {aggregate}") - -class WindowedDataExtractor: - """ - A utility class for extracting specific column indices and slices - from a list of windowed column names. - - Attributes - ---------- - column_indices : dict - A dictionary mapping column names to their indices. - - Methods - ------- - get_index(col) - Returns the index of a specific column. - get_slice(cols) - Returns a slice object for a range of consecutive columns. - """ - - def __init__(self, windowed_cols): - """ - Initialize the WindowedDataExtractor. - - Parameters - ---------- - windowed_cols : list of str - A list of column names in the windowed data. - - Raises - ------ - ValueError - If the list of `windowed_cols` is empty. - """ - if not windowed_cols: - raise ValueError("The list of windowed columns cannot be empty.") - self.column_indices = {col: idx for idx, col in enumerate(windowed_cols)} - - def get_index(self, col): - """ - Get the index of a specific column. - - Parameters - ---------- - col : str - The name of the column to retrieve the index for. - - Returns - ------- - int - The index of the specified column. - - Raises - ------ - ValueError - If the column is not found in the `windowed_cols` list. - """ - if col not in self.column_indices: - raise ValueError(f"Column '{col}' not found in windowed_cols.") - return self.column_indices[col] - - def get_slice(self, cols): - """ - Get a slice object for a range of consecutive columns. - - Parameters - ---------- - cols : list of str - A list of consecutive column names to define the slice. - - Returns - ------- - slice - A slice object spanning the indices of the given columns. - - Raises - ------ - ValueError - If one or more columns in `cols` are not found in the `windowed_cols` list. - """ - if not all(col in self.column_indices for col in cols): - missing = [col for col in cols if col not in self.column_indices] - raise ValueError(f"The following columns are missing from windowed_cols: {missing}") - start_idx = self.column_indices[cols[0]] - end_idx = self.column_indices[cols[-1]] + 1 - return slice(start_idx, end_idx) \ No newline at end of file