Trybnetic · Trybnetic · Sep 6, 2024 · Sep 5, 2024 · Sep 5, 2024 · Sep 5, 2024
diff --git a/README.rst b/README.rst
@@ -56,17 +56,17 @@ examples and more information on the functions can be found in the documentation
     data, sample_freq = paat.read_gt3x('path/to/gt3x/file')
 
     # Detect non-wear time
-    data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_syed2021(data, sample_freq)
+    data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_hees2011(data, sample_freq)
 
     # Detect sleep episodes
-    data.loc[:, "Sleep"] = paat.detect_sleep_weitz2022(data, sample_freq)
+    data.loc[:, "Time in Bed"] = paat.detect_time_in_bed_weitz2024(data, sample_freq)
 
     # Classify moderate-to-vigorous and sedentary behavior
     data.loc[:, ["MVPA", "SB"]] = paat.calculate_pa_levels(data, sample_freq)
 
     # Merge the activity columns into one labelled column. columns indicates the
     # importance of the columns, later names are more important and will be kept
-    data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Sleep", "Non Wear Time"])
+    data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Time in Bed", "Non Wear Time"])
 
     # Remove the other columns after merging
     data =  data[["X", "Y", "Z", "Activity"]]

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -44,14 +44,14 @@ examples and more information on the functions can be found in the documentation
     data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_hees2011(data, sample_freq)
 
     # Detect sleep episodes
-    data.loc[:, "Sleep"] = paat.detect_time_in_bed_weitz2024(data, sample_freq)
+    data.loc[:, "Time in Bed"] = paat.detect_time_in_bed_weitz2024(data, sample_freq)
 
     # Classify moderate-to-vigorous and sedentary behavior
     data.loc[:, ["MVPA", "SB"]] = paat.calculate_pa_levels(data, sample_freq)
 
     # Merge the activity columns into one labelled column. columns indicates the
     # importance of the columns, later names are more important and will be kept
-    data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Sleep", "Non Wear Time"])
+    data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Time in Bed", "Non Wear Time"])
 
     # Remove the other columns after merging
     data =  data[["X", "Y", "Z", "Activity"]]

diff --git a/docs/source/paat.rst b/docs/source/paat.rst
@@ -6,6 +6,7 @@ API Documentation
 This toolbox is divided into different modules:
 
 - Input & Output Module (:mod:`paat.io`)
+- Calibration Module (:mod:`paat.calibration`)
 - Preprocessing Module (:mod:`paat.preprocessing`)
 - Features Module (:mod:`paat.features`)
 - Wear Time Module (:mod:`paat.wear_time`)
@@ -21,6 +22,11 @@ it might be cleaner to call the functions directly from the module.
     :undoc-members:
     :show-inheritance:
 
+.. automodule:: paat.calibration
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 .. automodule:: paat.preprocessing
     :members:
     :undoc-members:

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
@@ -25,17 +25,17 @@ examples and more information on the functions can be found in the documentation
     data, sample_freq = paat.read_gt3x('path/to/gt3x/file')
 
     # Detect non-wear time
-    data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_syed2021(data, sample_freq)
+    data.loc[:, "Non Wear Time"] = paat.detect_non_wear_time_hees2011(data, sample_freq)
 
     # Detect sleep episodes
-    data.loc[:, "Sleep"] = paat.detect_sleep_weitz2022(data, sample_freq)
+    data.loc[:, "Time in Bed"] = paat.detect_time_in_bed_weitz2024(data, sample_freq)
 
     # Classify moderate-to-vigorous and sedentary behavior
     data.loc[:, ["MVPA", "SB"]] = paat.calculate_pa_levels(data, sample_freq)
 
     # Merge the activity columns into one labelled column. columns indicates the
     # importance of the columns, later names are more important and will be kept
-    data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Sleep", "Non Wear Time"])
+    data.loc[:, "Activity"] = paat.create_activity_column(data, columns=["SB", "MVPA", "Time in Bed", "Non Wear Time"])
 
     # Remove the other columns after merging
     data =  data[["X", "Y", "Z", "Activity"]]

diff --git a/paat/__init__.py b/paat/__init__.py
@@ -30,8 +30,9 @@
 # Expose API functions
 from .estimates import calculate_pa_levels, create_activity_column
 from .features import calculate_actigraph_counts, calculate_vector_magnitude, calculate_brond_counts
-from .io import read_gt3x
-from .sleep import detect_sleep_weitz2022, detect_sleep_triaxial_weitz2022, detect_time_in_bed_weitz2024
+from .io import read_gt3x, read_metadata
+from .calibration import calibrate
+from .sleep import detect_time_in_bed_weitz2024
 from .wear_time import detect_non_wear_time_naive, detect_non_wear_time_hees2011, detect_non_wear_time_syed2021
 
 try:

diff --git a/paat/calibration.py b/paat/calibration.py
@@ -0,0 +1,25 @@
+
+"""
+Sensor Calibration Module
+-------------------------
+
+*paat.calibation* provides functions to calibrate the raw acceleration signal.
+
+"""
+import pandas as pd
+
+
+def estimate_calibration_coefficents(data):
+    raise NotImplementedError("Autocalibration is not implemented yet. Please use GGIR to estimate the calibration coefficients.")
+
+
+def calibrate(acc, scale, offset):
+    columns = ["Y", "X", "Z"]
+    index = acc.index.copy()
+    acc = (scale * acc[columns].values) + offset
+
+    acc = pd.DataFrame(acc.astype(float), 
+                       columns=columns, 
+                       index=index)
+
+    return acc[["X", "Y", "Z"]]
diff --git a/paat/features.py b/paat/features.py
@@ -37,7 +37,7 @@ def calculate_vector_magnitude(data, minus_one=False, round_negative_to_zero=Fal
     round_negative_to_zero : Boolean (optional)
         If set to True, round negative values to zero
     dtype : np.dtype (optional)
-        set the data type of the return array. Standard float 16, but can be set to better precision
+        set the data type of the return array. Standard float 32, but can be set to better precision
 
 
     Returns
@@ -78,6 +78,27 @@ def calculate_vector_magnitude(data, minus_one=False, round_negative_to_zero=Fal
     return vector_magnitude.reshape(data.shape[0], 1)
 
 
+def calculate_enmo(data, dtype=np.float32):
+    """
+    Calculate the Euclidean norm minus one from raw acceleration data.
+    This function is a wrapper of `calculate_vector_magnitude`.
+    Parameters
+    ----------
+    data : array_like
+        numpy array with acceleration data
+    dtype : np.dtype (optional)
+        set the data type of the return array. Standard float 32, but can be set to better precision
+    Returns
+    -------
+    vector_magnitude : np.array (acceleration values, 1)(np.float)
+       numpy array with vector magnitude of the acceleration
+    """
+    if isinstance(data, pd.DataFrame):
+        data = data[["Y", "X", "Z"]].values
+
+    return calculate_vector_magnitude(data, minus_one=True, round_negative_to_zero=True)
+
+
 def calculate_frequency_features(data, win_len=60, win_step=60, sample_rate=100, nfft=512, nfilt=40):
     """
     Calculate frequency features from raw acceleration signal.

diff --git a/paat/io.py b/paat/io.py
@@ -462,7 +462,7 @@ def _create_time_vector(start, n_samples, hz):
     return time_data.flatten()
 
 
-def read_gt3x(file, rescale=True, pandas=True):
+def read_gt3x(file, rescale=True, pandas=True, metadata=False):
     """
     Reads a .gt3x file and returns the tri-axial acceleration values together
     with the corresponding time stamps and all meta data.
@@ -475,6 +475,8 @@ def read_gt3x(file, rescale=True, pandas=True):
         boolean indicating whether raw acceleration data should be rescaled to g values
     pandas : boolean (optional)
         boolean indicating whether the data should be returned as a pandas DataFrame
+    metadata : boolean (optional)
+        boolean indicating whether the full metadata should be returned
 
     Returns
     -------
@@ -515,6 +517,36 @@ def read_gt3x(file, rescale=True, pandas=True):
     if pandas:
         data = pd.DataFrame(values, columns=["Y", "X", "Z"], index=time)
         data = data[["X", "Y", "Z"]]
-        return data, meta['Sample_Rate']
+        if metadata:
+            return data, meta['Sample_Rate'], meta
+        else:
+            return data, meta['Sample_Rate']
     else:
         return time, values, meta
+
+
+def read_metadata(file):
+    """
+    Reads the metadata from a .gt3x file.
+
+    Parameters
+    ----------
+    file : string
+        file location of the .gt3x file
+
+    Returns
+    -------
+    meta : dict
+        a dict containing all meta data produced by ActiGraph
+
+    """
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # unzip .gt3x file and get the file location of the binary log.bin (which contains the raw data) and the info.txt which contains the meta-data
+        _, info_txt = _unzip_gt3x_file(file=file, save_location=tmpdirname)
+
+        # get meta data from info.txt file
+        meta = _extract_info(info_txt)
+
+    meta = _format_meta_data(meta)
+
+    return meta
diff --git a/paat/models/SleepModel.pt b/paat/models/SleepModel.pt
diff --git a/paat/models/SleepModel_triaxial.pt b/paat/models/SleepModel_triaxial.pt
diff --git a/paat/sleep.py b/paat/sleep.py
@@ -10,8 +10,6 @@
 
 import pandas as pd
 import numpy as np
-from torch import nn
-import torch
 import tensorflow as tf
 from tensorflow.keras import models
 
@@ -21,152 +19,6 @@
 
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 
-from . import features
-
-class _SleepModel(nn.Module):
-    def __init__(self, input_dim, hid_dim, output_dim, n_layers, dropout, batch_first=False):
-        super().__init__()
-
-        self.output_dim = output_dim
-        self.hid_dim = hid_dim
-        self.n_layers = n_layers
-        self.name = "LSTM"
-
-        self.rnn = nn.LSTM(input_dim, hid_dim, n_layers, dropout=dropout, batch_first=batch_first)
-
-        self.fc_out = nn.Linear(hid_dim, output_dim)
-
-        self.sigmoid = nn.Sigmoid()
-
-        self.dropout = nn.Dropout(dropout)
-
-    def forward(self, X, lens):
-        """
-        Performs model's forward pass
-        """
-
-        packed_input = nn.utils.rnn.pack_padded_sequence(X, lens.to('cpu'), batch_first=True, enforce_sorted=False)
-        packed_output, _ = self.rnn(packed_input)
-        output, lens = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
-
-        return self.sigmoid(self.fc_out(output))
-
-
-def detect_sleep_weitz2022(data, sample_freq, means=None, stds=None):
-    """
-    Infer time in bed from raw acceleration signal using frequency features.
-
-    .. warning::
-        This method turned out to work not as accurately as initially thought. Use with care!
-
-    Parameters
-    ----------
-    data : DataFrame
-        a DataFrame containg the raw acceleration data
-    sample_freq : int
-        the sampling frequency in which the data was recorded
-    means : array_like (optional)
-        a numpy array with the channel means, will be calculated for the sample
-        if not specified
-    stds : array_like (optional)
-        a numpy array with the channel stds, will be calculated for the sample
-        if not specified
-
-    Returns
-    -------
-    is_sleep : np.array (n_samples,)
-        a numpy array indicating whether the values of the acceleration data is
-        sleep on minute resolution
-
-    """
-
-    feature_vec = features.calculate_frequency_features(data)
-
-    X = torch.from_numpy(feature_vec).float()
-
-    # If no means and stds are given, calculate it
-    if not means or not stds:
-        means, stds = X.mean(axis=0), X.std(axis=0)
-
-    # Normalize input
-    X = (X - means) / stds
-
-    X = X.unsqueeze(0)
-    lengths = torch.Tensor([X.shape[1]])
-
-    # Load model hard coded. Should later be changed to ONNX or similar
-    model = _SleepModel(160, 4, 1, 1, dropout=0, batch_first=True)
-    model_path = os.path.join(os.path.pardir, os.path.dirname(__file__), 'models', 'SleepModel.pt')
-    model.load_state_dict(torch.load(model_path))
-    model.eval()
-
-    # Predict sleep periods
-    predictions = (model(X, lengths) >= .5).squeeze().numpy()
-    predictions = np.repeat(predictions, 60 * sample_freq)
-
-    return predictions
-
-
-def detect_sleep_triaxial_weitz2022(data, sample_freq, resampled_frequency="1min", means=None, stds=None, model=None):
-    """
-    Infer time in bed from raw acceleration signal.
-
-    .. warning::
-        This method turned out to work not as accurately as initially thought. Use with care!
-
-    Parameters
-    ----------
-    data : DataFrame
-        a DataFrame containg the raw acceleration data
-    sample_freq : int
-        the sampling frequency in which the data was recorded
-    resampled_frequency : str (optional)
-        a str indicating to what frequency the data should be resampled. This depends
-        on the model used to predict, defaults to 1min.
-    means : array_like (optional)
-        a numpy array with the channel means, will be calculated for the sample
-        if not specified
-    stds : array_like (optional)
-        a numpy array with the channel stds, will be calculated for the sample
-        if not specified
-    model : nn.Module (optional)
-        a loaded pytorch custom model.
-
-    Returns
-    -------
-    predicted_time_in_bed : np.array (n_samples,)
-        a numpy array indicating whether the values of the acceleration data were spent in bed
-
-    """
-    if resampled_frequency:
-        data = data[['X', 'Y', 'Z']].resample(resampled_frequency).mean()
-
-    X = torch.from_numpy(data[['X', 'Y', 'Z']].values).float()
-
-    # If no means and stds are given, calculate subject's mean and std
-    # to normalize by this
-    if not means or not stds:
-        means, stds = X.mean(axis=0), X.std(axis=0)
-
-    # Normalize input
-    X = (X - means) / stds
-    lengths = torch.Tensor(X.shape[0]).float().unsqueeze(0)
-
-    X = X.unsqueeze(0)
-    lengths = torch.Tensor([X.shape[1]])
-
-    # Load model if not specified
-    if not model:
-        model = _SleepModel(3, 2, 1, 1, dropout=0, batch_first=True)
-        model_path = os.path.join(os.path.pardir, os.path.dirname(__file__), 'models', 'SleepModel_triaxial.pt')
-        model.load_state_dict(torch.load(model_path))
-        model.eval()
-
-    predictions = (model(X, lengths) >= .5).squeeze().numpy()
-    seconds = pd.Timedelta(resampled_frequency).seconds
-    predictions = np.repeat(predictions, seconds * sample_freq)
-
-    return predictions
 
 def detect_time_in_bed_weitz2024(data, sample_freq, resampled_frequency="1min", means=None, stds=None, model=None):
     """
@@ -187,8 +39,8 @@ def detect_time_in_bed_weitz2024(data, sample_freq, resampled_frequency="1min",
     stds : array_like (optional)
         a numpy array with the channel stds, will be calculated for the sample
         if not specified
-    model : nn.Module (optional)
-        a loaded pytorch custom model.
+    model : keras.Model (optional)
+        a loaded keras custom model.
 
     Returns
     -------
@@ -214,7 +66,7 @@ def detect_time_in_bed_weitz2024(data, sample_freq, resampled_frequency="1min",
         model_path = os.path.join(os.path.pardir, os.path.dirname(__file__), 'models', 'TIB_model.h5')
         model = models.load_model(model_path)
 
-    predictions = (model.predict(X[np.newaxis], verbose=0).squeeze() >= .5)
+    predictions = (model.predict(X[np.newaxis], verbose=0).squeeze() > .5)
 
     seconds = pd.Timedelta(resampled_frequency).seconds
     predictions = np.repeat(predictions, seconds * sample_freq)