diff --git a/rul_datasets/reader/__init__.py b/rul_datasets/reader/__init__.py index c6b9d89..3e64629 100644 --- a/rul_datasets/reader/__init__.py +++ b/rul_datasets/reader/__init__.py @@ -139,6 +139,27 @@ The effects of `percent_broken` and `percent_fail_runs` are summarized under the term **truncation** as they effectively truncate the dataset in two dimensions. +The readers for the FEMTO and XJTU-SY datasets have two additional constructor +arguments. The `first_time_to_predict` lets you set an individual maximum RUL value +per run in the dataset. As both are bearing datasets, the first-time-to-predict is +defined as the time step where the degradation of the bearing is first noticeable. +The RUL value before this time step is assumed to be constant. Setting `norm_rul` +scales the RUL between [0, 1] per run, as it is best practice when using +first-time-to-predict. + +```pycon +>>> fttp = [10, 20, 30, 40, 50] +>>> fd1 = rul_datasets.reader.XjtuSyReader( +... fd=1, first_time_to_predict=fttp, norm_rul=True +... ) +>>> fd1.prepare_data() +>>> features, labels = fd1.load_split("dev") +>>> labels[0][:15] +array([1. , 1. , 1. , 1. , 1. , + 1. , 1. , 1. , 1. , 1. , + 1. , 0.99115044, 0.98230088, 0.97345133, 0.96460177]) +``` + Readers can be used as is if you just want access to the dataset. If you plan to use them with PyTorch or PyTorch Lightning, it is recommended to combine them with a [RulDataModule][rul_datasets.core.RulDataModule]: diff --git a/rul_datasets/reader/femto.py b/rul_datasets/reader/femto.py index d2494a5..ad3498c 100644 --- a/rul_datasets/reader/femto.py +++ b/rul_datasets/reader/femto.py @@ -54,6 +54,16 @@ class FemtoReader(AbstractReader): >>> features, labels = fd1.load_split("dev") >>> features[0].shape (2463, 2560, 2) + + Set first-time-to-predict: + >>> import rul_datasets + >>> fttp = [10, 20, 30, 40, 50] + >>> fd1 = rul_datasets.reader.FemtoReader(fd=1, first_time_to_predict=fttp) + >>> fd1.prepare_data() + >>> features, labels = fd1.load_split("dev") + >>> labels[0][:15] + array([2793., 2793., 2793., 2793., 2793., 2793., 2793., 2793., 2793., + 2793., 2793., 2792., 2791., 2790., 2789.]) """ _FEMTO_ROOT: str = os.path.join(get_data_root(), "FEMTOBearingDataSet") @@ -68,11 +78,19 @@ def __init__( percent_fail_runs: Optional[Union[float, List[int]]] = None, truncate_val: bool = False, run_split_dist: Optional[Dict[str, List[int]]] = None, + first_time_to_predict: List[int] = None, + norm_rul: bool = False, ) -> None: """ Create a new FEMTO reader for one of the sub-datasets. By default, the RUL values are not capped. The default window size is 2560. + Use `first_time_to_predict` to set an individual RUL inflection point for + each run. It should be a list with an integer index for each run. The index + is the time step after which RUL declines. Before the time step it stays + constant. The `norm_rul` argument can then be used to scale the RUL of each + run between zero and one. + For more information about using readers refer to the [reader] [rul_datasets.reader] module page. @@ -84,10 +102,23 @@ def __init__( percent_fail_runs: The percentage or index list of available time series. truncate_val: Truncate the validation data with `percent_broken`, too. run_split_dist: Dictionary that assigns each run idx to each split. + first_time_to_predict: The time step for each time series before which RUL + is constant. + norm_rul: Normalize RUL between zero and one. """ super().__init__( fd, window_size, max_rul, percent_broken, percent_fail_runs, truncate_val ) + + if (first_time_to_predict is not None) and (max_rul is not None): + raise ValueError( + "FemtoReader cannot use 'first_time_to_predict' " + "and 'max_rul' in conjunction." + ) + + self.first_time_to_predict = first_time_to_predict + self.norm_rul = norm_rul + self._preparator = FemtoPreparator(self.fd, self._FEMTO_ROOT, run_split_dist) @property @@ -101,7 +132,7 @@ def prepare_data(self) -> None: dataset and each custom split for the first time. The dataset is downloaded from a custom mirror and extracted into the data - root directory. The whole dataset is converted com CSV files to NPY files to + root directory. The whole dataset is converted from CSV files to NPY files to speed up loading it from disk. Afterwards, a scaler is fit on the development features. Previously completed steps are skipped. """ @@ -118,10 +149,24 @@ def load_complete_split( features = [f[:, -self.window_size :, :] for f in features] # crop to window features = scaling.scale_features(features, self._preparator.load_scaler()) if self.max_rul is not None: - targets = [np.clip(t, a_min=0, a_max=self.max_rul) for t in targets] + targets = [np.minimum(t, self.max_rul) for t in targets] + elif self.first_time_to_predict is not None: + targets = self._clip_first_time_to_predict(targets, split) + + if self.norm_rul: + targets = [t / np.max(t) for t in targets] return features, targets + def _clip_first_time_to_predict(self, targets, split): + fttp = [ + self.first_time_to_predict[i - 1] + for i in self._preparator.run_split_dist[split] + ] + targets = [np.minimum(t, len(t) - fttp) for t, fttp in zip(targets, fttp)] + + return targets + def default_window_size(self, fd: int) -> int: return FemtoPreparator.DEFAULT_WINDOW_SIZE diff --git a/rul_datasets/reader/xjtu_sy.py b/rul_datasets/reader/xjtu_sy.py index 8594061..4465d70 100644 --- a/rul_datasets/reader/xjtu_sy.py +++ b/rul_datasets/reader/xjtu_sy.py @@ -47,6 +47,16 @@ class XjtuSyReader(AbstractReader): >>> features, labels = fd1.load_split("dev") >>> features[0].shape (52, 32768, 2) + + Set first-time-to-predict: + >>> import rul_datasets + >>> fttp = [10, 20, 30, 40, 50] + >>> fd1 = rul_datasets.reader.XjtuSyReader(fd=1, first_time_to_predict=fttp) + >>> fd1.prepare_data() + >>> features, labels = fd1.load_split("dev") + >>> labels[0][:15] + array([113., 113., 113., 113., 113., 113., 113., 113., 113., 113., 113., + 112., 111., 110., 109.]) """ _XJTU_SY_ROOT: str = os.path.join(get_data_root(), "XJTU-SY") @@ -61,10 +71,18 @@ def __init__( percent_fail_runs: Optional[Union[float, List[int]]] = None, truncate_val: bool = False, run_split_dist: Optional[Dict[str, List[int]]] = None, + first_time_to_predict: List[int] = None, + norm_rul: bool = False, ) -> None: """ Create a new XJTU-SY reader for one of the sub-datasets. By default, the RUL - values are not capped. The default window size is 2560. + values are not capped. The default window size is 32768. + + Use `first_time_to_predict` to set an individual RUL inflection point for + each run. It should be a list with an integer index for each run. The index + is the time step after which RUL declines. Before the time step it stays + constant. The `norm_rul` argument can then be used to scale the RUL of each + run between zero and one. For more information about using readers refer to the [reader] [rul_datasets.reader] module page. @@ -77,10 +95,23 @@ def __init__( percent_fail_runs: The percentage or index list of available time series. truncate_val: Truncate the validation data with `percent_broken`, too. run_split_dist: Dictionary that assigns each run idx to each split. + first_time_to_predict: The time step for each time series before which RUL + is constant. + norm_rul: Normalize RUL between zero and one. """ super().__init__( fd, window_size, max_rul, percent_broken, percent_fail_runs, truncate_val ) + + if (first_time_to_predict is not None) and (max_rul is not None): + raise ValueError( + "FemtoReader cannot use 'first_time_to_predict' " + "and 'max_rul' in conjunction." + ) + + self.first_time_to_predict = first_time_to_predict + self.norm_rul = norm_rul + self._preparator = XjtuSyPreparator(self.fd, self._XJTU_SY_ROOT, run_split_dist) @property @@ -108,9 +139,25 @@ def load_complete_split( features, targets = self._preparator.load_runs(split) features = [f[:, -self.window_size :, :] for f in features] # crop to window features = scaling.scale_features(features, self._preparator.load_scaler()) + if self.max_rul is not None: + targets = [np.minimum(t, self.max_rul) for t in targets] + elif self.first_time_to_predict is not None: + targets = self._clip_first_time_to_predict(targets, split) + + if self.norm_rul: + targets = [t / np.max(t) for t in targets] return features, targets + def _clip_first_time_to_predict(self, targets, split): + fttp = [ + self.first_time_to_predict[i - 1] + for i in self._preparator.run_split_dist[split] + ] + targets = [np.minimum(t, len(t) - fttp) for t, fttp in zip(targets, fttp)] + + return targets + def default_window_size(self, fd: int) -> int: return XjtuSyPreparator.DEFAULT_WINDOW_SIZE diff --git a/tests/reader/test_femto.py b/tests/reader/test_femto.py index 17d5e1f..c1fa2a3 100644 --- a/tests/reader/test_femto.py +++ b/tests/reader/test_femto.py @@ -69,3 +69,30 @@ def test_max_rul(self, max_rul): npt.assert_equal(t, np.arange(len(t), 0, -1)) # is linear else: assert np.max(t) <= max_rul # capped at max_rul + + def test_first_time_to_predict(self): + fttp = [10, 20, 30, 40, 50, 60, 70] + dataset = reader.FemtoReader(1, first_time_to_predict=fttp) + targets = ( + dataset.load_split("dev")[1] + + dataset.load_split("val")[1] + + dataset.load_split("test")[1] + ) + for target, first_time in zip(targets, fttp): + max_rul = len(target) - first_time + assert np.all(target[:first_time] == max_rul) + + def test_norm_rul_with_max_rul(self): + dataset = reader.FemtoReader(1, max_rul=50, norm_rul=True) + for split in ["dev", "val", "test"]: + _, targets = dataset.load_split(split) + for target in targets: + assert np.max(target) == 1 + + def test_norm_rul_with_fttp(self): + fttp = [10, 20, 30, 40, 50, 60, 70] + dataset = reader.FemtoReader(1, first_time_to_predict=fttp, norm_rul=True) + for split in ["dev", "val", "test"]: + _, targets = dataset.load_split(split) + for target in targets: + assert np.max(target) == 1 diff --git a/tests/reader/test_xjtu_sy.py b/tests/reader/test_xjtu_sy.py index cec8202..2ff070c 100644 --- a/tests/reader/test_xjtu_sy.py +++ b/tests/reader/test_xjtu_sy.py @@ -62,3 +62,30 @@ def test_run_split_dist(self, fd, split, exp_length): rul_loader = reader.XjtuSyReader(fd) features, targets = rul_loader.load_split(split) assert len(features) == len(targets) == exp_length + + def test_first_time_to_predict(self): + fttp = [10, 20, 30, 40, 50] + dataset = reader.XjtuSyReader(1, first_time_to_predict=fttp) + targets = ( + dataset.load_split("dev")[1] + + dataset.load_split("val")[1] + + dataset.load_split("test")[1] + ) + for target, first_time in zip(targets, fttp): + max_rul = len(target) - first_time + assert np.all(target[:first_time] == max_rul) + + def test_norm_rul_with_max_rul(self): + dataset = reader.XjtuSyReader(1, max_rul=50, norm_rul=True) + for split in ["dev", "val", "test"]: + _, targets = dataset.load_split(split) + for target in targets: + assert np.max(target) == 1 + + def test_norm_rul_with_fttp(self): + fttp = [10, 20, 30, 40, 50] + dataset = reader.XjtuSyReader(1, first_time_to_predict=fttp, norm_rul=True) + for split in ["dev", "val", "test"]: + _, targets = dataset.load_split(split) + for target in targets: + assert np.max(target) == 1