Merge pull request #182 from pfizer-opensource/development

Version 0.17.0
pfizer-opensource · Nov 20, 2024 · caccd24 · caccd24
2 parents aaf56e4 + 922a24d
commit caccd24
Show file tree

Hide file tree

Showing 11 changed files with 139 additions and 90 deletions.
diff --git a/meson.build b/meson.build
@@ -1,7 +1,7 @@
 project(
     'scikit-digital-health',
     'c',
-    version: '0.16.15',
+    version: '0.17.0',
     license: 'MIT',
     meson_version: '>=1.1',
 )

diff --git a/src/skdh/activity/__init__.py b/src/skdh/activity/__init__.py
@@ -254,7 +254,13 @@ def metric_fn(accel, wlen, \*args, \*\*kwargs):
 from skdh.activity import endpoints
 
 __all__ = (
-    ["ActivityLevelClassification", "StaudenmayerClassification", "metrics", "get_available_cutpoints", "endpoints"]
+    [
+        "ActivityLevelClassification",
+        "StaudenmayerClassification",
+        "metrics",
+        "get_available_cutpoints",
+        "endpoints",
+    ]
     + metrics.__all__
     + endpoints.__all__
 )
diff --git a/src/skdh/activity/core.py b/src/skdh/activity/core.py
@@ -24,7 +24,7 @@
     arange,
     arcsin,
     pi,
-    isnan
+    isnan,
 )
 from numpy.linalg import norm
 from pandas import Timedelta, Timestamp
@@ -305,16 +305,20 @@ def _update_date_results(
         # subtracting a day handles cases where the recording starts well into a day, in which case
         # the date should be the previous day, when the window WOULD have started had their been
         # more data
-        window_start_dt = start_dt - Timedelta(1, unit='day') if (start_dt.hour < day_start_hour) else start_dt
+        window_start_dt = (
+            start_dt - Timedelta(1, unit="day")
+            if (start_dt.hour < day_start_hour)
+            else start_dt
+        )
 
         results["Date"][day_n] = window_start_dt.strftime("%Y-%m-%d")
-        results["Day Start Timestamp"][day_n] = start_dt.strftime(
-            "%Y-%m-%d %H:%M:%S"
-        )
+        results["Day Start Timestamp"][day_n] = start_dt.strftime("%Y-%m-%d %H:%M:%S")
         # round end time as well
-        results["Day End Timestamp"][day_n] = self.convert_timestamps(
-            time[day_stop_idx]
-        ).round("min").strftime("%Y-%m-%d %H:%M:%S")
+        results["Day End Timestamp"][day_n] = (
+            self.convert_timestamps(time[day_stop_idx])
+            .round("min")
+            .strftime("%Y-%m-%d %H:%M:%S")
+        )
         results["Weekday"][day_n] = window_start_dt.strftime("%A")
         results["Day N"][day_n] = day_n + 1
         results["N hours"][day_n] = around(
@@ -753,22 +757,30 @@ class StaudenmayerClassification(BaseProcess):
         Default is True.
     min_wear_time : int, optional
         Minimum wear time in hours for a day to be analyzed. Default is 10 hours.
-    
+
     References
     ----------
-    .. [1] J. Staudenmayer, S. He, A. Hickey, J. Sasaki, and P. Freedson, 
-        “Methods to estimate aspects of physical activity and sedentary behavior 
-        from high-frequency wrist accelerometer measurements,” J Appl Physiol, 
+    .. [1] J. Staudenmayer, S. He, A. Hickey, J. Sasaki, and P. Freedson,
+        “Methods to estimate aspects of physical activity and sedentary behavior
+        from high-frequency wrist accelerometer measurements,” J Appl Physiol,
         vol. 119, no. 4, pp. 396–403, Aug. 2015, doi: 10.1152/japplphysiol.00026.2015.
 
     """
-    def __init__(self, arm_axis, demean=False, use_power=True, min_wear_time=10, day_window=(0, 24),):
+
+    def __init__(
+        self,
+        arm_axis,
+        demean=False,
+        use_power=True,
+        min_wear_time=10,
+        day_window=(0, 24),
+    ):
         super().__init__(
             arm_axis=arm_axis,
             demean=demean,
             use_power=use_power,
             min_wear_time=min_wear_time,
-            day_window=day_window
+            day_window=day_window,
         )
 
         self.axis = arm_axis
@@ -778,19 +790,13 @@ def __init__(self, arm_axis, demean=False, use_power=True, min_wear_time=10, day
 
         self._return_raw = False
 
-        self.endpoints = [
-            'sedentary',
-            'nonsedentary',
-            'light',
-            'moderate',
-            'vigorous'
-        ]
+        self.endpoints = ["sedentary", "nonsedentary", "light", "moderate", "vigorous"]
 
         if day_window is None:
             self.day_key = (-1, -1)
         else:
             self.day_key = tuple(day_window)
-    
+
     def _update_date_results(
         self, results, time, day_n, day_start_idx, day_stop_idx, day_start_hour
     ):
@@ -801,16 +807,20 @@ def _update_date_results(
         # subtracting a day handles cases where the recording starts well into a day, in which case
         # the date should be the previous day, when the window WOULD have started had their been
         # more data
-        window_start_dt = start_dt - Timedelta(1, unit='day') if (start_dt.hour < day_start_hour) else start_dt
+        window_start_dt = (
+            start_dt - Timedelta(1, unit="day")
+            if (start_dt.hour < day_start_hour)
+            else start_dt
+        )
 
         results["Date"][day_n] = window_start_dt.strftime("%Y-%m-%d")
-        results["Day Start Timestamp"][day_n] = start_dt.strftime(
-            "%Y-%m-%d %H:%M:%S"
-        )
+        results["Day Start Timestamp"][day_n] = start_dt.strftime("%Y-%m-%d %H:%M:%S")
         # round end time as well
-        results["Day End Timestamp"][day_n] = self.convert_timestamps(
-            time[day_stop_idx]
-        ).round("min").strftime("%Y-%m-%d %H:%M:%S")
+        results["Day End Timestamp"][day_n] = (
+            self.convert_timestamps(time[day_stop_idx])
+            .round("min")
+            .strftime("%Y-%m-%d %H:%M:%S")
+        )
         results["Weekday"][day_n] = window_start_dt.strftime("%A")
         results["Day N"][day_n] = day_n + 1
         results["N hours"][day_n] = around(
@@ -821,7 +831,7 @@ def _update_date_results(
         )
 
         return start_dt
-    
+
     @handle_process_returns(results_to_kwargs=False)
     def predict(self, *, time, accel, fs=None, wear=None, tz_name=None, **kwargs):
         """
@@ -898,14 +908,8 @@ def predict(self, *, time, accel, fs=None, wear=None, tz_name=None, **kwargs):
         filt_name = "wear" if sleep is None else "wear-wake"
         for endpt in self.endpoints:
             res[f"{filt_name}_{endpt}"] = full(self.day_idx[0].size, nan, dtype="float")
-
-        raw = {
-            'starts': [],
-            'stops': [],
-            'sd_avm': [],
-            'mean_angle': [],
-            'p625': []
-        }
+
+        raw = {"starts": [], "stops": [], "sd_avm": [], "mean_angle": [], "p625": []}
         # =============================================================================
         # PROCESSING
         # =============================================================================
@@ -922,12 +926,12 @@ def predict(self, *, time, accel, fs=None, wear=None, tz_name=None, **kwargs):
             )
 
             # save wear time
-            res['N wear hours'][iday] = around(
+            res["N wear hours"][iday] = around(
                 sum(dwear_stops - dwear_starts) / fs / 3600, 1
             )
-            res['Wear Minutes'][iday] = sum(time[dwear_stops] - time[dwear_starts]) / 60
+            res["Wear Minutes"][iday] = sum(time[dwear_stops] - time[dwear_starts]) / 60
 
-            if res['N wear hours'][iday] < self.min_wear_time:
+            if res["N wear hours"][iday] < self.min_wear_time:
                 continue  # skip day if less than minimum wear time
 
             # if there is sleep data, add it to the intersection of indices
@@ -937,15 +941,24 @@ def predict(self, *, time, accel, fs=None, wear=None, tz_name=None, **kwargs):
                     (self.wear_idx[1], sleep_stops),
                     (True, False),  # include wear time, exclude sleeping time
                     day_start,
-                    day_stop
+                    day_stop,
                 )
-            
+
             self._compute_wear_wake_activity(
-                res, accel, fs, iday, dwear_starts, dwear_stops, nwlen, epm, filt_name, raw
+                res,
+                accel,
+                fs,
+                iday,
+                dwear_starts,
+                dwear_stops,
+                nwlen,
+                epm,
+                filt_name,
+                raw,
             )
-        
+
         if self._return_raw:
-            res.update({'raw': raw})
+            res.update({"raw": raw})
 
         return res
 
@@ -959,7 +972,7 @@ def _compute_wear_wake_activity(
         # minimum hours should have nan values
         for endpt in self.endpoints:
             results[f"{filt_prefix}_{endpt}"][day_n] = 0.0
-        
+
         # get starts and stops for wich we can actually compute values
         mask = (stops - starts) > n_wlen
         starts = starts[mask]
@@ -979,7 +992,7 @@ def _compute_wear_wake_activity(
                 use_modulus=not self.use_power,
                 normalize=True,
             )
-            
+
             # windowed acceleration magnitude
             acc_vm_w = get_windowed_view(avm, n_wlen, n_wlen, ensure_c_contiguity=True)
 
@@ -989,11 +1002,11 @@ def _compute_wear_wake_activity(
             p625 = ft.compute(acc_vm_w, fs=fs, axis=-1)
 
             # append raw
-            raw['starts'].append(start)
-            raw['stops'].append(stop)
-            raw['sd_avm'].append(sd_avm)
-            raw['mean_angle'].append(mean_angle)
-            raw['p625'].append(p625)
+            raw["starts"].append(start)
+            raw["stops"].append(stop)
+            raw["sd_avm"].append(sd_avm)
+            raw["mean_angle"].append(mean_angle)
+            raw["p625"].append(p625)
 
             # number of available labels
             n_labels = sd_avm.size - isnan(sd_avm).sum()
@@ -1024,12 +1037,10 @@ def _compute_wear_wake_activity(
 
             if (mask_s.sum() + mask_ns.sum()) != n_labels:
                 raise ValueError("Sed/Nonsed masks do not equal input size")
-
-            # add the time to the results
-            results[f'{filt_prefix}_light'][day_n] += mask_l.sum() / epm
-            results[f'{filt_prefix}_moderate'][day_n] += mask_m.sum() / epm
-            results[f'{filt_prefix}_vigorous'][day_n] += mask_v.sum() / epm
-            results[f'{filt_prefix}_sedentary'][day_n] += mask_s.sum() / epm
-            results[f'{filt_prefix}_nonsedentary'][day_n] += mask_ns.sum() / epm
-
 
+            # add the time to the results
+            results[f"{filt_prefix}_light"][day_n] += mask_l.sum() / epm
+            results[f"{filt_prefix}_moderate"][day_n] += mask_m.sum() / epm
+            results[f"{filt_prefix}_vigorous"][day_n] += mask_v.sum() / epm
+            results[f"{filt_prefix}_sedentary"][day_n] += mask_s.sum() / epm
+            results[f"{filt_prefix}_nonsedentary"][day_n] += mask_ns.sum() / epm
diff --git a/src/skdh/features/lib/frequency.py b/src/skdh/features/lib/frequency.py
@@ -234,7 +234,15 @@ class RangePowerSum(Feature):
 
     __slots__ = ("pad", "low_cut", "high_cut", "demean", "use_modulus", "normalize")
 
-    def __init__(self, padlevel=2, low_cutoff=0.0, high_cutoff=5.0, demean=False, use_modulus=False, normalize=False):
+    def __init__(
+        self,
+        padlevel=2,
+        low_cutoff=0.0,
+        high_cutoff=5.0,
+        demean=False,
+        use_modulus=False,
+        normalize=False,
+    ):
         super(RangePowerSum, self).__init__(
             padlevel=padlevel,
             low_cutoff=low_cutoff,
@@ -272,7 +280,14 @@ def compute(self, signal, fs=1.0, *, axis=-1):
         """
         x = super().compute(signal, fs, axis=axis)
         return extensions.range_power_sum(
-            x, fs, self.pad, self.low_cut, self.high_cut, self.demean, self.use_modulus, self.normalize
+            x,
+            fs,
+            self.pad,
+            self.low_cut,
+            self.high_cut,
+            self.demean,
+            self.use_modulus,
+            self.normalize,
         )
 
 

diff --git a/src/skdh/gait/substeps/s1_preprocessing.py b/src/skdh/gait/substeps/s1_preprocessing.py
@@ -7,7 +7,7 @@
 
 from numpy import (
     mean,
-    median,
+    nanmedian,
     argmax,
     sign,
     abs,
@@ -18,6 +18,9 @@
     exp,
     sum,
     max as npmax,
+    searchsorted,
+    append,
+    nan,
 )
 from scipy.signal import detrend, butter, sosfiltfilt, find_peaks, correlate
 import pywt
@@ -136,11 +139,17 @@ def get_ap_axis_sign(fs, accel, ap_axis):
         ap_acc_f = sosfiltfilt(sos, accel[:, ap_axis])
 
         mx, mx_meta = find_peaks(ap_acc_f, prominence=0.05)
-        med_prom = median(mx_meta["prominences"])
+        med_prom = nanmedian(mx_meta["prominences"])
         mask = mx_meta["prominences"] > (0.75 * med_prom)
 
-        left_med = median(mx[mask] - mx_meta["left_bases"][mask])
-        right_med = median(mx_meta["right_bases"][mask] - mx[mask])
+        # can't use bases here from maxima since it is effected by prominence and
+        # will get bases pase other peaks
+        mn, _ = find_peaks(-ap_acc_f, prominence=0.75 * med_prom)
+        idx = searchsorted(mn, mx[mask])
+        # make sure that we dont go out of bounds by adding a nan to the end
+        mn = append(mn, nan)
+        left_med = nanmedian(mx[mask] - mn[idx - 1])
+        right_med = nanmedian(mn[idx] - mx[mask])
 
         sign = -1 if (left_med < right_med) else 1
 
@@ -189,7 +198,7 @@ def get_step_time(fs, accel, ap_axis):
             raise ValueError(
                 "Not enough valid autocovariance windows to estimate step frequency."
             )
-        step_samples = median(first_peaks)
+        step_samples = nanmedian(first_peaks)
 
         return step_samples / fs
 

diff --git a/src/skdh/io/axivity.py b/src/skdh/io/axivity.py
@@ -27,7 +27,7 @@ class ReadCwa(BaseIO):
         Trim keys provided in the `predict` method. Default (None) will not do any trimming.
         Trimming of either start or end can be accomplished by providing None in the place
         of the key you do not want to trim. If provided, the tuple should be of the form
-        (start_key, end_key). When provided, trim datetimes will be assumed to be in the 
+        (start_key, end_key). When provided, trim datetimes will be assumed to be in the
         same timezone as the data (ie naive if naive, or in the timezone provided).
     ext_error : {"warn", "raise", "skip"}, optional
         What to do if the file extension does not match the expected extension (.cwa).
@@ -138,16 +138,15 @@ def predict(self, *, file, tz_name=None, **kwargs):
             results[self._gyro] = ascontiguousarray(imudata[:end, gyr_axes])
         if mag_axes is not None:  # pragma: no cover :: don't have data to test this
             results[self._mag] = ascontiguousarray(imudata[:end, mag_axes])
-        
+
         if self.trim_keys is not None:
             results = self.trim_data(
                 *self.trim_keys,
                 tz_name,
                 kwargs,
-                **results  # contains the time array/argument
+                **results,  # contains the time array/argument
             )
-
-        results['fs'] = fs
 
+        results["fs"] = fs
 
         return results