From 2b417dd4ec48711564664b21256ecf863004146a Mon Sep 17 00:00:00 2001 From: ZhengyanZhu Date: Fri, 22 Mar 2024 17:17:15 +0100 Subject: [PATCH] Add docstrings --- rul_datasets/reader/ncmapss.py | 82 +++++++++++++++++----------------- tests/reader/test_ncmapps.py | 0 tests/reader/test_ncmapss.py | 15 +++---- 3 files changed, 49 insertions(+), 48 deletions(-) delete mode 100644 tests/reader/test_ncmapps.py diff --git a/rul_datasets/reader/ncmapss.py b/rul_datasets/reader/ncmapss.py index 9a24801..bf6527f 100644 --- a/rul_datasets/reader/ncmapss.py +++ b/rul_datasets/reader/ncmapss.py @@ -123,42 +123,42 @@ def __init__( truncate_degraded_only: bool = False, resolution_seconds: int = 1, padding_value: float = 0.0, - scaling_range: [float, float] = None, + scaling_range: Optional[Tuple[int, int]] = (0, 1), ) -> None: """ - Create a new reader for the New C-MAPSS dataset. The maximum RUL value is set - to 65 by default. The default channels are the four operating conditions, - the 14 physical, and 14 virtual sensors in this order. - - The default window size is, by default, the longest flight cycle in the - sub-dataset. Shorter cycles are padded on the left. The default padding value - is zero but can be overridden, e.g., as -1 to make filtering for padding easier - later on. - - The default `run_split_dist` is the same as in the original dataset, but with - the last unit of the original train split designated for validation. - - If the features are downsampled in time, the default window size is - automatically adjusted to `window_size // resolution_seconds`. Any manually - set `window_size` needs to take this into account as it is applied after - downsampling. - - For more information about using readers, refer to the [reader] - [rul_datasets.reader] module page. - - Args: - fd: The sub-dataset to use. Must be in `[1, 7]`. - max_rul: The maximum RUL value. - percent_broken: The maximum relative degradation per unit. - percent_fail_runs: The percentage or index list of available units. - feature_select: The indices of the features to use. - truncate_val: Truncate the validation data with `percent_broken`, too. - run_split_dist: The assignment of units to each split. - truncate_degraded_only: Only truncate the degraded part of the data - (< max RUL). - resolution_seconds: The number of consecutive seconds to average over for - downsampling. - padding_value: The value to use for padding the flight cycles. + Create a new reader for the New C-MAPSS dataset. The maximum RUL value is set + to 65 by default. The default channels are the four operating conditions, + the 14 physical, and 14 virtual sensors in this order. + + The default window size is, by default, the longest flight cycle in the + sub-dataset. Shorter cycles are padded on the left. The default padding value + is zero but can be overridden, e.g., as -1 to make filtering for padding easier + later on. + + The default `run_split_dist` is the same as in the original dataset, but with + the last unit of the original train split designated for validation. + + If the features are downsampled in time, the default window size is + automatically adjusted to `window_size // resolution_seconds`. Any manually + set `window_size` needs to take this into account as it is applied after + downsampling. + + For more information about using readers, refer to the [reader] + [rul_datasets.reader] module page. + + Args: + fd: The sub-dataset to use. Must be in `[1, 7]`. + max_rul: The maximum RUL value. + percent_broken: The maximum relative degradation per unit. + percent_fail_runs: The percentage or index list of available units. + feature_select: The indices of the features to use. + truncate_val: Truncate the validation data with `percent_broken`, too. + run_split_dist: The assignment of units to each split. + truncate_degraded_only: Only truncate the degraded part of the data + (< max RUL). + resolution_seconds: The number of consecutive seconds to average over for + downsampling. + padding_value: The value to use for padding the flight cycles. """ super().__init__( fd, @@ -217,13 +217,15 @@ def prepare_data(self) -> None: """ if not os.path.exists(self._NCMAPSS_ROOT): _download_ncmapss(self._NCMAPSS_ROOT) - #if not os.path.exists(self._get_scaler_path()): - features, _, _ = self._load_data("dev") - scaler = scaling.fit_scaler(features, MinMaxScaler()) - scaling.save_scaler(scaler, self._get_scaler_path()) + if not os.path.exists(self._get_scaler_path()): + features, _, _ = self._load_data("dev") + scaler = scaling.fit_scaler(features, MinMaxScaler(self.scaling_range)) + scaling.save_scaler(scaler, self._get_scaler_path()) def _get_scaler_path(self): - file_name = f"scaler_{self.fd}_{self.run_split_dist['dev']}.pkl" + file_name = ( + f"scaler_{self.fd}_{self.run_split_dist['dev']}_{self.scaling_range}.pkl" + ) file_path = os.path.join(self._NCMAPSS_ROOT, file_name) return file_path @@ -304,7 +306,7 @@ def _select_units(self, units, split): return [units[i] for i in self.run_split_dist[split]] def _window_by_cycle( - self, features: np.ndarray, targets: np.ndarray, auxiliary: np.ndarray + self, features: np.ndarray, targets: np.ndarray, auxiliary: np.ndarray ) -> Tuple[np.ndarray, np.ndarray]: cycle_end_idx = self._get_end_idx(auxiliary[:, 1]) split_features = np.split(features, cycle_end_idx[:-1]) diff --git a/tests/reader/test_ncmapps.py b/tests/reader/test_ncmapps.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/reader/test_ncmapss.py b/tests/reader/test_ncmapss.py index 499e51c..c476574 100644 --- a/tests/reader/test_ncmapss.py +++ b/tests/reader/test_ncmapss.py @@ -47,20 +47,19 @@ def test_prepare_data(should_run, mocker): mock_save_scaler.assert_not_called() - @pytest.mark.needs_data -@pytest.mark.parametrize("scaling_range", [(-1.0, 1.0), (0.0, 2.0)]) +@pytest.mark.parametrize("scaling_range", [(-1, 1), (0, 1)]) def test_scaling_range(scaling_range): reader = NCmapssReader(fd=1, scaling_range=scaling_range) reader.prepare_data() features, _ = reader.load_split("dev") - reader = NCmapssReader(fd=1, scaling_range=(0, 1)) - reader.prepare_data() - features_default, _ = reader.load_split("dev") - - assert not np.array_equal(features[0][:, :, 1], features_default[0][:, :, 1]) - + min_val, max_val = scaling_range + for feature in features: + flat_features = feature.flatten() + np.testing.assert_almost_equal( + flat_features, np.clip(flat_features, min_val, max_val) + ) @pytest.mark.needs_data