Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up and minor API changes #25

Merged
merged 7 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ examples and more information on the functions can be found in the documentation
data, sample_freq = paat.read_gt3x('path/to/gt3x/file')

# Detect non-wear time
data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_syed2021(data, sample_freq)
data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_hees2011(data, sample_freq)

# Detect sleep episodes
data.loc[:, "Sleep"] = paat.detect_sleep_weitz2022(data, sample_freq)
data.loc[:, "Time in Bed"] = paat.detect_time_in_bed_weitz2024(data, sample_freq)

# Classify moderate-to-vigorous and sedentary behavior
data.loc[:, ["MVPA", "SB"]] = paat.calculate_pa_levels(data, sample_freq)

# Merge the activity columns into one labelled column. columns indicates the
# importance of the columns, later names are more important and will be kept
data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Sleep", "Non Wear Time"])
data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Time in Bed", "Non Wear Time"])

# Remove the other columns after merging
data = data[["X", "Y", "Z", "Activity"]]
Expand Down
4 changes: 2 additions & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ examples and more information on the functions can be found in the documentation
data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_hees2011(data, sample_freq)

# Detect sleep episodes
data.loc[:, "Sleep"] = paat.detect_time_in_bed_weitz2024(data, sample_freq)
data.loc[:, "Time in Bed"] = paat.detect_time_in_bed_weitz2024(data, sample_freq)

# Classify moderate-to-vigorous and sedentary behavior
data.loc[:, ["MVPA", "SB"]] = paat.calculate_pa_levels(data, sample_freq)

# Merge the activity columns into one labelled column. columns indicates the
# importance of the columns, later names are more important and will be kept
data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Sleep", "Non Wear Time"])
data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Time in Bed", "Non Wear Time"])

# Remove the other columns after merging
data = data[["X", "Y", "Z", "Activity"]]
Expand Down
6 changes: 6 additions & 0 deletions docs/source/paat.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ API Documentation
This toolbox is divided into different modules:

- Input & Output Module (:mod:`paat.io`)
- Calibration Module (:mod:`paat.calibration`)
- Preprocessing Module (:mod:`paat.preprocessing`)
- Features Module (:mod:`paat.features`)
- Wear Time Module (:mod:`paat.wear_time`)
Expand All @@ -21,6 +22,11 @@ it might be cleaner to call the functions directly from the module.
:undoc-members:
:show-inheritance:

.. automodule:: paat.calibration
:members:
:undoc-members:
:show-inheritance:

.. automodule:: paat.preprocessing
:members:
:undoc-members:
Expand Down
6 changes: 3 additions & 3 deletions docs/source/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,17 @@ examples and more information on the functions can be found in the documentation
data, sample_freq = paat.read_gt3x('path/to/gt3x/file')

# Detect non-wear time
data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_syed2021(data, sample_freq)
data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_hees2011(data, sample_freq)

# Detect sleep episodes
data.loc[:, "Sleep"] = paat.detect_sleep_weitz2022(data, sample_freq)
data.loc[:, "Time in Bed"] = paat.detect_time_in_bed_weitz2024(data, sample_freq)

# Classify moderate-to-vigorous and sedentary behavior
data.loc[:, ["MVPA", "SB"]] = paat.calculate_pa_levels(data, sample_freq)

# Merge the activity columns into one labelled column. columns indicates the
# importance of the columns, later names are more important and will be kept
data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Sleep", "Non Wear Time"])
data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Time in Bed", "Non Wear Time"])

# Remove the other columns after merging
data = data[["X", "Y", "Z", "Activity"]]
Expand Down
5 changes: 3 additions & 2 deletions paat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@
# Expose API functions
from .estimates import calculate_pa_levels, create_activity_column
from .features import calculate_actigraph_counts, calculate_vector_magnitude, calculate_brond_counts
from .io import read_gt3x
from .sleep import detect_sleep_weitz2022, detect_sleep_triaxial_weitz2022, detect_time_in_bed_weitz2024
from .io import read_gt3x, read_metadata
from .calibration import calibrate
from .sleep import detect_time_in_bed_weitz2024
from .wear_time import detect_non_wear_time_naive, detect_non_wear_time_hees2011, detect_non_wear_time_syed2021

try:
Expand Down
25 changes: 25 additions & 0 deletions paat/calibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

"""
Sensor Calibration Module
-------------------------

*paat.calibation* provides functions to calibrate the raw acceleration signal.

"""
import pandas as pd


def estimate_calibration_coefficents(data):
raise NotImplementedError("Autocalibration is not implemented yet. Please use GGIR to estimate the calibration coefficients.")


def calibrate(acc, scale, offset):
columns = ["Y", "X", "Z"]
index = acc.index.copy()
acc = (scale * acc[columns].values) + offset

acc = pd.DataFrame(acc.astype(float),
columns=columns,
index=index)

return acc[["X", "Y", "Z"]]
23 changes: 22 additions & 1 deletion paat/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def calculate_vector_magnitude(data, minus_one=False, round_negative_to_zero=Fal
round_negative_to_zero : Boolean (optional)
If set to True, round negative values to zero
dtype : np.dtype (optional)
set the data type of the return array. Standard float 16, but can be set to better precision
set the data type of the return array. Standard float 32, but can be set to better precision


Returns
Expand Down Expand Up @@ -78,6 +78,27 @@ def calculate_vector_magnitude(data, minus_one=False, round_negative_to_zero=Fal
return vector_magnitude.reshape(data.shape[0], 1)


def calculate_enmo(data, dtype=np.float32):
"""
Calculate the Euclidean norm minus one from raw acceleration data.
This function is a wrapper of `calculate_vector_magnitude`.
Parameters
----------
data : array_like
numpy array with acceleration data
dtype : np.dtype (optional)
set the data type of the return array. Standard float 32, but can be set to better precision
Returns
-------
vector_magnitude : np.array (acceleration values, 1)(np.float)
numpy array with vector magnitude of the acceleration
"""
if isinstance(data, pd.DataFrame):
data = data[["Y", "X", "Z"]].values

return calculate_vector_magnitude(data, minus_one=True, round_negative_to_zero=True)


def calculate_frequency_features(data, win_len=60, win_step=60, sample_rate=100, nfft=512, nfilt=40):
"""
Calculate frequency features from raw acceleration signal.
Expand Down
36 changes: 34 additions & 2 deletions paat/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def _create_time_vector(start, n_samples, hz):
return time_data.flatten()


def read_gt3x(file, rescale=True, pandas=True):
def read_gt3x(file, rescale=True, pandas=True, metadata=False):
"""
Reads a .gt3x file and returns the tri-axial acceleration values together
with the corresponding time stamps and all meta data.
Expand All @@ -475,6 +475,8 @@ def read_gt3x(file, rescale=True, pandas=True):
boolean indicating whether raw acceleration data should be rescaled to g values
pandas : boolean (optional)
boolean indicating whether the data should be returned as a pandas DataFrame
metadata : boolean (optional)
boolean indicating whether the full metadata should be returned

Returns
-------
Expand Down Expand Up @@ -515,6 +517,36 @@ def read_gt3x(file, rescale=True, pandas=True):
if pandas:
data = pd.DataFrame(values, columns=["Y", "X", "Z"], index=time)
data = data[["X", "Y", "Z"]]
return data, meta['Sample_Rate']
if metadata:
return data, meta['Sample_Rate'], meta
else:
return data, meta['Sample_Rate']
else:
return time, values, meta


def read_metadata(file):
"""
Reads the metadata from a .gt3x file.

Parameters
----------
file : string
file location of the .gt3x file

Returns
-------
meta : dict
a dict containing all meta data produced by ActiGraph

"""
with tempfile.TemporaryDirectory() as tmpdirname:
# unzip .gt3x file and get the file location of the binary log.bin (which contains the raw data) and the info.txt which contains the meta-data
_, info_txt = _unzip_gt3x_file(file=file, save_location=tmpdirname)

# get meta data from info.txt file
meta = _extract_info(info_txt)

meta = _format_meta_data(meta)

return meta
Binary file removed paat/models/SleepModel.pt
Binary file not shown.
Binary file removed paat/models/SleepModel_triaxial.pt
Binary file not shown.
154 changes: 3 additions & 151 deletions paat/sleep.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

import pandas as pd
import numpy as np
from torch import nn
import torch
import tensorflow as tf
from tensorflow.keras import models

Expand All @@ -21,152 +19,6 @@

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from . import features

class _SleepModel(nn.Module):
def __init__(self, input_dim, hid_dim, output_dim, n_layers, dropout, batch_first=False):
super().__init__()

self.output_dim = output_dim
self.hid_dim = hid_dim
self.n_layers = n_layers
self.name = "LSTM"

self.rnn = nn.LSTM(input_dim, hid_dim, n_layers, dropout=dropout, batch_first=batch_first)

self.fc_out = nn.Linear(hid_dim, output_dim)

self.sigmoid = nn.Sigmoid()

self.dropout = nn.Dropout(dropout)

def forward(self, X, lens):
"""
Performs model's forward pass
"""

packed_input = nn.utils.rnn.pack_padded_sequence(X, lens.to('cpu'), batch_first=True, enforce_sorted=False)
packed_output, _ = self.rnn(packed_input)
output, lens = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)

return self.sigmoid(self.fc_out(output))


def detect_sleep_weitz2022(data, sample_freq, means=None, stds=None):
"""
Infer time in bed from raw acceleration signal using frequency features.

.. warning::
This method turned out to work not as accurately as initially thought. Use with care!

Parameters
----------
data : DataFrame
a DataFrame containg the raw acceleration data
sample_freq : int
the sampling frequency in which the data was recorded
means : array_like (optional)
a numpy array with the channel means, will be calculated for the sample
if not specified
stds : array_like (optional)
a numpy array with the channel stds, will be calculated for the sample
if not specified

Returns
-------
is_sleep : np.array (n_samples,)
a numpy array indicating whether the values of the acceleration data is
sleep on minute resolution

"""

feature_vec = features.calculate_frequency_features(data)

X = torch.from_numpy(feature_vec).float()

# If no means and stds are given, calculate it
if not means or not stds:
means, stds = X.mean(axis=0), X.std(axis=0)

# Normalize input
X = (X - means) / stds

X = X.unsqueeze(0)
lengths = torch.Tensor([X.shape[1]])

# Load model hard coded. Should later be changed to ONNX or similar
model = _SleepModel(160, 4, 1, 1, dropout=0, batch_first=True)
model_path = os.path.join(os.path.pardir, os.path.dirname(__file__), 'models', 'SleepModel.pt')
model.load_state_dict(torch.load(model_path))
model.eval()

# Predict sleep periods
predictions = (model(X, lengths) >= .5).squeeze().numpy()
predictions = np.repeat(predictions, 60 * sample_freq)

return predictions


def detect_sleep_triaxial_weitz2022(data, sample_freq, resampled_frequency="1min", means=None, stds=None, model=None):
"""
Infer time in bed from raw acceleration signal.

.. warning::
This method turned out to work not as accurately as initially thought. Use with care!

Parameters
----------
data : DataFrame
a DataFrame containg the raw acceleration data
sample_freq : int
the sampling frequency in which the data was recorded
resampled_frequency : str (optional)
a str indicating to what frequency the data should be resampled. This depends
on the model used to predict, defaults to 1min.
means : array_like (optional)
a numpy array with the channel means, will be calculated for the sample
if not specified
stds : array_like (optional)
a numpy array with the channel stds, will be calculated for the sample
if not specified
model : nn.Module (optional)
a loaded pytorch custom model.

Returns
-------
predicted_time_in_bed : np.array (n_samples,)
a numpy array indicating whether the values of the acceleration data were spent in bed

"""
if resampled_frequency:
data = data[['X', 'Y', 'Z']].resample(resampled_frequency).mean()

X = torch.from_numpy(data[['X', 'Y', 'Z']].values).float()

# If no means and stds are given, calculate subject's mean and std
# to normalize by this
if not means or not stds:
means, stds = X.mean(axis=0), X.std(axis=0)

# Normalize input
X = (X - means) / stds
lengths = torch.Tensor(X.shape[0]).float().unsqueeze(0)

X = X.unsqueeze(0)
lengths = torch.Tensor([X.shape[1]])

# Load model if not specified
if not model:
model = _SleepModel(3, 2, 1, 1, dropout=0, batch_first=True)
model_path = os.path.join(os.path.pardir, os.path.dirname(__file__), 'models', 'SleepModel_triaxial.pt')
model.load_state_dict(torch.load(model_path))
model.eval()

predictions = (model(X, lengths) >= .5).squeeze().numpy()
seconds = pd.Timedelta(resampled_frequency).seconds
predictions = np.repeat(predictions, seconds * sample_freq)

return predictions

def detect_time_in_bed_weitz2024(data, sample_freq, resampled_frequency="1min", means=None, stds=None, model=None):
"""
Expand All @@ -187,8 +39,8 @@ def detect_time_in_bed_weitz2024(data, sample_freq, resampled_frequency="1min",
stds : array_like (optional)
a numpy array with the channel stds, will be calculated for the sample
if not specified
model : nn.Module (optional)
a loaded pytorch custom model.
model : keras.Model (optional)
a loaded keras custom model.

Returns
-------
Expand All @@ -214,7 +66,7 @@ def detect_time_in_bed_weitz2024(data, sample_freq, resampled_frequency="1min",
model_path = os.path.join(os.path.pardir, os.path.dirname(__file__), 'models', 'TIB_model.h5')
model = models.load_model(model_path)

predictions = (model.predict(X[np.newaxis], verbose=0).squeeze() >= .5)
predictions = (model.predict(X[np.newaxis], verbose=0).squeeze() > .5)

seconds = pd.Timedelta(resampled_frequency).seconds
predictions = np.repeat(predictions, seconds * sample_freq)
Expand Down
Loading
Loading