Skip to content

Commit

Permalink
feat: add first-time-to-predict option to FEMTO and XJTU-SY (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
tilman151 authored Feb 21, 2023
1 parent f277338 commit f605f51
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 3 deletions.
21 changes: 21 additions & 0 deletions rul_datasets/reader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,27 @@
The effects of `percent_broken` and `percent_fail_runs` are summarized under the term
**truncation** as they effectively truncate the dataset in two dimensions.
The readers for the FEMTO and XJTU-SY datasets have two additional constructor
arguments. The `first_time_to_predict` lets you set an individual maximum RUL value
per run in the dataset. As both are bearing datasets, the first-time-to-predict is
defined as the time step where the degradation of the bearing is first noticeable.
The RUL value before this time step is assumed to be constant. Setting `norm_rul`
scales the RUL between [0, 1] per run, as it is best practice when using
first-time-to-predict.
```pycon
>>> fttp = [10, 20, 30, 40, 50]
>>> fd1 = rul_datasets.reader.XjtuSyReader(
... fd=1, first_time_to_predict=fttp, norm_rul=True
... )
>>> fd1.prepare_data()
>>> features, labels = fd1.load_split("dev")
>>> labels[0][:15]
array([1. , 1. , 1. , 1. , 1. ,
1. , 1. , 1. , 1. , 1. ,
1. , 0.99115044, 0.98230088, 0.97345133, 0.96460177])
```
Readers can be used as is if you just want access to the dataset. If you plan to use
them with PyTorch or PyTorch Lightning, it is recommended to combine them with a
[RulDataModule][rul_datasets.core.RulDataModule]:
Expand Down
49 changes: 47 additions & 2 deletions rul_datasets/reader/femto.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ class FemtoReader(AbstractReader):
>>> features, labels = fd1.load_split("dev")
>>> features[0].shape
(2463, 2560, 2)
Set first-time-to-predict:
>>> import rul_datasets
>>> fttp = [10, 20, 30, 40, 50]
>>> fd1 = rul_datasets.reader.FemtoReader(fd=1, first_time_to_predict=fttp)
>>> fd1.prepare_data()
>>> features, labels = fd1.load_split("dev")
>>> labels[0][:15]
array([2793., 2793., 2793., 2793., 2793., 2793., 2793., 2793., 2793.,
2793., 2793., 2792., 2791., 2790., 2789.])
"""

_FEMTO_ROOT: str = os.path.join(get_data_root(), "FEMTOBearingDataSet")
Expand All @@ -68,11 +78,19 @@ def __init__(
percent_fail_runs: Optional[Union[float, List[int]]] = None,
truncate_val: bool = False,
run_split_dist: Optional[Dict[str, List[int]]] = None,
first_time_to_predict: List[int] = None,
norm_rul: bool = False,
) -> None:
"""
Create a new FEMTO reader for one of the sub-datasets. By default, the RUL
values are not capped. The default window size is 2560.
Use `first_time_to_predict` to set an individual RUL inflection point for
each run. It should be a list with an integer index for each run. The index
is the time step after which RUL declines. Before the time step it stays
constant. The `norm_rul` argument can then be used to scale the RUL of each
run between zero and one.
For more information about using readers refer to the [reader]
[rul_datasets.reader] module page.
Expand All @@ -84,10 +102,23 @@ def __init__(
percent_fail_runs: The percentage or index list of available time series.
truncate_val: Truncate the validation data with `percent_broken`, too.
run_split_dist: Dictionary that assigns each run idx to each split.
first_time_to_predict: The time step for each time series before which RUL
is constant.
norm_rul: Normalize RUL between zero and one.
"""
super().__init__(
fd, window_size, max_rul, percent_broken, percent_fail_runs, truncate_val
)

if (first_time_to_predict is not None) and (max_rul is not None):
raise ValueError(
"FemtoReader cannot use 'first_time_to_predict' "
"and 'max_rul' in conjunction."
)

self.first_time_to_predict = first_time_to_predict
self.norm_rul = norm_rul

self._preparator = FemtoPreparator(self.fd, self._FEMTO_ROOT, run_split_dist)

@property
Expand All @@ -101,7 +132,7 @@ def prepare_data(self) -> None:
dataset and each custom split for the first time.
The dataset is downloaded from a custom mirror and extracted into the data
root directory. The whole dataset is converted com CSV files to NPY files to
root directory. The whole dataset is converted from CSV files to NPY files to
speed up loading it from disk. Afterwards, a scaler is fit on the development
features. Previously completed steps are skipped.
"""
Expand All @@ -118,10 +149,24 @@ def load_complete_split(
features = [f[:, -self.window_size :, :] for f in features] # crop to window
features = scaling.scale_features(features, self._preparator.load_scaler())
if self.max_rul is not None:
targets = [np.clip(t, a_min=0, a_max=self.max_rul) for t in targets]
targets = [np.minimum(t, self.max_rul) for t in targets]
elif self.first_time_to_predict is not None:
targets = self._clip_first_time_to_predict(targets, split)

if self.norm_rul:
targets = [t / np.max(t) for t in targets]

return features, targets

def _clip_first_time_to_predict(self, targets, split):
fttp = [
self.first_time_to_predict[i - 1]
for i in self._preparator.run_split_dist[split]
]
targets = [np.minimum(t, len(t) - fttp) for t, fttp in zip(targets, fttp)]

return targets

def default_window_size(self, fd: int) -> int:
return FemtoPreparator.DEFAULT_WINDOW_SIZE

Expand Down
49 changes: 48 additions & 1 deletion rul_datasets/reader/xjtu_sy.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,16 @@ class XjtuSyReader(AbstractReader):
>>> features, labels = fd1.load_split("dev")
>>> features[0].shape
(52, 32768, 2)
Set first-time-to-predict:
>>> import rul_datasets
>>> fttp = [10, 20, 30, 40, 50]
>>> fd1 = rul_datasets.reader.XjtuSyReader(fd=1, first_time_to_predict=fttp)
>>> fd1.prepare_data()
>>> features, labels = fd1.load_split("dev")
>>> labels[0][:15]
array([113., 113., 113., 113., 113., 113., 113., 113., 113., 113., 113.,
112., 111., 110., 109.])
"""

_XJTU_SY_ROOT: str = os.path.join(get_data_root(), "XJTU-SY")
Expand All @@ -61,10 +71,18 @@ def __init__(
percent_fail_runs: Optional[Union[float, List[int]]] = None,
truncate_val: bool = False,
run_split_dist: Optional[Dict[str, List[int]]] = None,
first_time_to_predict: List[int] = None,
norm_rul: bool = False,
) -> None:
"""
Create a new XJTU-SY reader for one of the sub-datasets. By default, the RUL
values are not capped. The default window size is 2560.
values are not capped. The default window size is 32768.
Use `first_time_to_predict` to set an individual RUL inflection point for
each run. It should be a list with an integer index for each run. The index
is the time step after which RUL declines. Before the time step it stays
constant. The `norm_rul` argument can then be used to scale the RUL of each
run between zero and one.
For more information about using readers refer to the [reader]
[rul_datasets.reader] module page.
Expand All @@ -77,10 +95,23 @@ def __init__(
percent_fail_runs: The percentage or index list of available time series.
truncate_val: Truncate the validation data with `percent_broken`, too.
run_split_dist: Dictionary that assigns each run idx to each split.
first_time_to_predict: The time step for each time series before which RUL
is constant.
norm_rul: Normalize RUL between zero and one.
"""
super().__init__(
fd, window_size, max_rul, percent_broken, percent_fail_runs, truncate_val
)

if (first_time_to_predict is not None) and (max_rul is not None):
raise ValueError(
"FemtoReader cannot use 'first_time_to_predict' "
"and 'max_rul' in conjunction."
)

self.first_time_to_predict = first_time_to_predict
self.norm_rul = norm_rul

self._preparator = XjtuSyPreparator(self.fd, self._XJTU_SY_ROOT, run_split_dist)

@property
Expand Down Expand Up @@ -108,9 +139,25 @@ def load_complete_split(
features, targets = self._preparator.load_runs(split)
features = [f[:, -self.window_size :, :] for f in features] # crop to window
features = scaling.scale_features(features, self._preparator.load_scaler())
if self.max_rul is not None:
targets = [np.minimum(t, self.max_rul) for t in targets]
elif self.first_time_to_predict is not None:
targets = self._clip_first_time_to_predict(targets, split)

if self.norm_rul:
targets = [t / np.max(t) for t in targets]

return features, targets

def _clip_first_time_to_predict(self, targets, split):
fttp = [
self.first_time_to_predict[i - 1]
for i in self._preparator.run_split_dist[split]
]
targets = [np.minimum(t, len(t) - fttp) for t, fttp in zip(targets, fttp)]

return targets

def default_window_size(self, fd: int) -> int:
return XjtuSyPreparator.DEFAULT_WINDOW_SIZE

Expand Down
27 changes: 27 additions & 0 deletions tests/reader/test_femto.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,30 @@ def test_max_rul(self, max_rul):
npt.assert_equal(t, np.arange(len(t), 0, -1)) # is linear
else:
assert np.max(t) <= max_rul # capped at max_rul

def test_first_time_to_predict(self):
fttp = [10, 20, 30, 40, 50, 60, 70]
dataset = reader.FemtoReader(1, first_time_to_predict=fttp)
targets = (
dataset.load_split("dev")[1]
+ dataset.load_split("val")[1]
+ dataset.load_split("test")[1]
)
for target, first_time in zip(targets, fttp):
max_rul = len(target) - first_time
assert np.all(target[:first_time] == max_rul)

def test_norm_rul_with_max_rul(self):
dataset = reader.FemtoReader(1, max_rul=50, norm_rul=True)
for split in ["dev", "val", "test"]:
_, targets = dataset.load_split(split)
for target in targets:
assert np.max(target) == 1

def test_norm_rul_with_fttp(self):
fttp = [10, 20, 30, 40, 50, 60, 70]
dataset = reader.FemtoReader(1, first_time_to_predict=fttp, norm_rul=True)
for split in ["dev", "val", "test"]:
_, targets = dataset.load_split(split)
for target in targets:
assert np.max(target) == 1
27 changes: 27 additions & 0 deletions tests/reader/test_xjtu_sy.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,30 @@ def test_run_split_dist(self, fd, split, exp_length):
rul_loader = reader.XjtuSyReader(fd)
features, targets = rul_loader.load_split(split)
assert len(features) == len(targets) == exp_length

def test_first_time_to_predict(self):
fttp = [10, 20, 30, 40, 50]
dataset = reader.XjtuSyReader(1, first_time_to_predict=fttp)
targets = (
dataset.load_split("dev")[1]
+ dataset.load_split("val")[1]
+ dataset.load_split("test")[1]
)
for target, first_time in zip(targets, fttp):
max_rul = len(target) - first_time
assert np.all(target[:first_time] == max_rul)

def test_norm_rul_with_max_rul(self):
dataset = reader.XjtuSyReader(1, max_rul=50, norm_rul=True)
for split in ["dev", "val", "test"]:
_, targets = dataset.load_split(split)
for target in targets:
assert np.max(target) == 1

def test_norm_rul_with_fttp(self):
fttp = [10, 20, 30, 40, 50]
dataset = reader.XjtuSyReader(1, first_time_to_predict=fttp, norm_rul=True)
for split in ["dev", "val", "test"]:
_, targets = dataset.load_split(split)
for target in targets:
assert np.max(target) == 1

0 comments on commit f605f51

Please sign in to comment.