From 2ba0afb22784742020ada112d26228fd4f33bb38 Mon Sep 17 00:00:00 2001 From: Tilman Krokotsch Date: Thu, 23 May 2024 09:24:21 +0200 Subject: [PATCH] fix: entity splitting bug (#63) splitting entities was implemented under the assumption that entity ids are strictly monotonic. This assumption holds for DS01 but not for most other subdatasets. --- rul_datasets/reader/ncmapss.py | 5 +++-- tests/reader/test_ncmapss.py | 19 +++++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/rul_datasets/reader/ncmapss.py b/rul_datasets/reader/ncmapss.py index 696cf04..183779a 100644 --- a/rul_datasets/reader/ncmapss.py +++ b/rul_datasets/reader/ncmapss.py @@ -358,8 +358,9 @@ def _window_by_cycle( @staticmethod def _get_end_idx(identifiers): - _, split_idx = np.unique(identifiers, return_counts=True) - split_idx = np.cumsum(split_idx) + _, split_idx = np.unique(identifiers, return_index=True) + split_idx = np.sort(split_idx) + split_idx = np.concatenate([split_idx[1:], [len(identifiers)]]) return split_idx diff --git a/tests/reader/test_ncmapss.py b/tests/reader/test_ncmapss.py index 333ffd0..1081972 100644 --- a/tests/reader/test_ncmapss.py +++ b/tests/reader/test_ncmapss.py @@ -103,8 +103,9 @@ def test_max_rul(max_rul, prepared_ncmapss): @pytest.mark.needs_data -def test__split_by_unit(prepared_ncmapss): - reader = NCmapssReader(1) +@pytest.mark.parametrize("fd", range(1, 8)) +def test__split_by_unit(fd, prepared_ncmapss): + reader = NCmapssReader(fd) features, targets, auxiliary = reader._load_raw_data() features, targets, auxiliary = reader._split_by_unit(features, targets, auxiliary) @@ -113,6 +114,20 @@ def test__split_by_unit(prepared_ncmapss): assert np.unique(auxiliary[i][:, 0]).size == 1 # only one unit id present +@pytest.mark.needs_data +@pytest.mark.parametrize("fd", range(1, 8)) +def test__get_end_idx_for_cycles(fd, prepared_ncmapss): + reader = NCmapssReader(fd) + features, targets, auxiliary = reader._load_raw_data() + features, targets, auxiliary = reader._split_by_unit(features, targets, auxiliary) + + for aux in auxiliary: + cycle_end_idx = reader._get_end_idx(aux[:, 1]) + split_aux = np.split(aux, cycle_end_idx[:-1]) + for cycle in split_aux: + assert np.unique(cycle[:, 1]).size == 1 # only one cycle id present + + @pytest.mark.needs_data @pytest.mark.parametrize("window_size", [10, 100]) def test_padding_and_window_size(window_size, prepared_ncmapss):