From c063fadf7d9d1a21a967ec6f5f3d40a1d0cbf28d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erlend=20H=C3=A5rstad=20=28IT=20SI=20SIB=29?= Date: Wed, 6 Feb 2019 09:14:50 +0100 Subject: [PATCH] Add rolling median filter processor --- camille/process/__init__.py | 3 ++ camille/process/rolling_median.py | 41 ++++++++++++++++++++++++++++ tests/process/test_rolling_median.py | 20 ++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 camille/process/rolling_median.py create mode 100644 tests/process/test_rolling_median.py diff --git a/camille/process/__init__.py b/camille/process/__init__.py index ad99c26f..4200ac42 100644 --- a/camille/process/__init__.py +++ b/camille/process/__init__.py @@ -14,6 +14,7 @@ * :func:`~camille.process.high_pass` * :func:`~camille.process.band_pass` * :func:`~camille.process.mooring_fatigue` +* :func:`~camille.process.rolling_median` """ from .atm_stb import process as atm_stb @@ -24,6 +25,7 @@ from .pass_filter import high_pass from .pass_filter import band_pass from .mooring_fatigue import process as mooring_fatigue +from .rolling_median import process as rolling_median __all__ = [ 'atm_stb', @@ -34,4 +36,5 @@ 'high_pass', 'band_pass', 'mooring_fatigue', + 'rolling_median' ] diff --git a/camille/process/rolling_median.py b/camille/process/rolling_median.py new file mode 100644 index 00000000..2ea973dc --- /dev/null +++ b/camille/process/rolling_median.py @@ -0,0 +1,41 @@ +import pandas as pd +import numpy as np + +def process(signal, wsize, tolerance=None): + """ Process rolling median + + Rolling window calculations + + Parameters + ---------- + signal : pd.Series + wsize : int or pd.Timedelta + size of the rolling window + tolerance : float, optional + threshold for outliers, default to the standard deviation of signal + + Returns + ------- + pd.Series + filtered signal + + Examples + -------- + >>> s = pd.Series(signal, index=t) + >>> processed = process.rolling_median(s, wsize=20, tolerance=2.0) + """ + + dt = signal.index[1] - signal.index[0] + if isinstance(wsize, pd.Timedelta): + if wsize < dt: + problem = "wsize is smaller than the samplerate of signal: {} < {}".format(wsize, dt) + raise ValueError(problem) + + wsize = int(np.ceil(wsize / dt)) + + if tolerance == None: tolerance = 1.0 * signal.std() + + rolling = signal.rolling(window=wsize, min_periods=1, center=True).median() + outliers = signal[(signal - rolling).abs() >= tolerance] + cleaned = signal.drop(outliers.index).iloc[wsize:-wsize] + return cleaned diff --git a/tests/process/test_rolling_median.py b/tests/process/test_rolling_median.py new file mode 100644 index 00000000..d9e72a95 --- /dev/null +++ b/tests/process/test_rolling_median.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +import numpy as np +import pandas as pd + +from camille import process + +def test_process(): + index = pd.date_range('1/1/2018', periods=100, freq='S') + + t = np.linspace(-np.pi, np.pi, num=100) + signal = np.sin(t) + signal[40] *= 5 #create an outlier + + s = pd.Series(signal, index=index) + dt = pd.Timedelta(seconds=5) + + processed = process.rolling_median(s, wsize=dt) + + expected = s.drop(s.index[40]).iloc[5:-5] + pd.testing.assert_series_equal(expected, processed)