diff --git a/docs/use_cases/libraries.md b/docs/use_cases/libraries.md index 00ddc4f..47fe5f6 100644 --- a/docs/use_cases/libraries.md +++ b/docs/use_cases/libraries.md @@ -110,12 +110,12 @@ import rul_datasets cmapss_fd1 = rul_datasets.CmapssReader(fd=1) cmapss_fd1.prepare_data() # (1)! dev_features, _ = cmapss_fd1.load_split("dev") # (2)! -dev_data = np.concatenate([np.transpose(f.numpy(), (0, 2, 1)) for f in dev_features]) # (3)! +dev_data = np.concatenate(dev_features) # (3)! km = tslearn.clustering.TimeSeriesKMeans(n_clusters=5, metric="dtw") km.fit(dev_data) ``` 1. You need to call `prepare_data` before using the reader. This downloads and pre-processes the dataset if not done already. -2. This yields a list of tensors with the shape `[len_time_series, num_features, window_size]`. -3. Convert the list of tensors to a single numpy array with the shape `[num_series, window_size, num_features]`. +2. This yields a list of numpy arrays with the shape `[len_time_series, window_size, num_features]`. +3. Concatenate to a single numpy array with the shape `[num_series, window_size, num_features]`. diff --git a/rul_datasets/core.py b/rul_datasets/core.py index 833e945..89779d0 100644 --- a/rul_datasets/core.py +++ b/rul_datasets/core.py @@ -209,8 +209,9 @@ def _setup_split(self, split: str) -> Tuple[torch.Tensor, torch.Tensor]: features, targets = self.reader.load_split(split) if features: features, targets = self._apply_feature_extractor_per_run(features, targets) - cat_features = torch.cat(features) - cat_targets = torch.cat(targets) + tensor_features, tensor_targets = utils.to_tensor(features, targets) + cat_features = torch.cat(tensor_features) + cat_targets = torch.cat(tensor_targets) else: cat_features = torch.empty(0, 0, 0) cat_targets = torch.empty(0) @@ -218,8 +219,8 @@ def _setup_split(self, split: str) -> Tuple[torch.Tensor, torch.Tensor]: return cat_features, cat_targets def _apply_feature_extractor_per_run( - self, features: List[torch.Tensor], targets: List[torch.Tensor] - ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: + self, features: List[np.ndarray], targets: List[np.ndarray] + ) -> Tuple[List[np.ndarray], List[np.ndarray]]: if self.feature_extractor is not None and self.window_size is not None: cutoff = self.window_size - 1 features = [self._apply_feature_extractor(f) for f in features] @@ -228,12 +229,9 @@ def _apply_feature_extractor_per_run( return features, targets - def _apply_feature_extractor(self, features: torch.Tensor) -> torch.Tensor: - dtype = features.dtype - numpy_features = torch.permute(features, (0, 2, 1)).numpy() - extracted = self.feature_extractor(numpy_features) # type: ignore - extracted = utils.extract_windows(extracted, self.window_size) # type: ignore - features = utils.feature_to_tensor(extracted, dtype) + def _apply_feature_extractor(self, features: np.ndarray) -> np.ndarray: + features = self.feature_extractor(features) # type: ignore + features = utils.extract_windows(features, self.window_size) # type: ignore return features @@ -323,7 +321,7 @@ def to_dataset(self, split: str) -> TensorDataset: class PairedRulDataset(IterableDataset): - """TODO.""" + """A dataset of sample pairs drawn from the same time series.""" def __init__( self, @@ -347,8 +345,8 @@ def __init__( reader.check_compatibility(self.readers[0]) self._run_domain_idx: np.ndarray - self._features: List[torch.Tensor] - self._labels: List[torch.Tensor] + self._features: List[np.ndarray] + self._labels: List[np.ndarray] self._prepare_datasets() self._max_rul = self._get_max_rul() @@ -480,14 +478,14 @@ def _get_labeled_pair_idx(self) -> Tuple[int, int, int, int, int]: def _build_pair( self, - run: torch.Tensor, + run: np.ndarray, anchor_idx: int, query_idx: int, distance: int, domain_label: int, ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - anchors = run[anchor_idx] - queries = run[query_idx] + anchors = utils.feature_to_tensor(run[anchor_idx], torch.float) + queries = utils.feature_to_tensor(run[query_idx], torch.float) domain_tensor = torch.tensor(domain_label, dtype=torch.float) distances = torch.tensor(distance, dtype=torch.float) / self._max_rul distances = torch.clamp_max(distances, max=1) # max distance is max_rul diff --git a/rul_datasets/reader/__init__.py b/rul_datasets/reader/__init__.py index 77e27d6..3e31a46 100644 --- a/rul_datasets/reader/__init__.py +++ b/rul_datasets/reader/__init__.py @@ -36,24 +36,24 @@ >>> test_features, test_targets = reader.load_split("test") ``` -The features are a list of [tensors][torch.Tensor] where each tensor has a shape of -`[num_windows, num_channels, window_size]`: +The features are a list of [numpy arrays][numpy.ndarray] where each array has a shape of +`[num_windows, window_size, num_channels]`: ```pycon >>> type(dev_features) >>> dev_features[0].shape -torch.Size([163, 14, 30]) +(163, 30, 14) ``` -The targets are a list of [tensors][torch.Tensor], too, where each tensor has a shape -of `[num_windows]`: +The targets are a list of [numpy arrays][numpy.ndarrays], too, where each array has a +shape of `[num_windows]`: ```pycon >>> type(dev_targets) >>> dev_targets[0].shape -torch.Size([163]) +(163,) ``` Each reader defines a default window size for its data. This can be overridden by the @@ -63,7 +63,7 @@ >>> fd1 = CmapssReader(fd=1, window_size=15) >>> features, _ = fd1.load_split("dev") >>> features[0].shape -torch.Size([163, 14, 15]) +(163, 15, 14) ``` Some datasets, i.e. CMAPSS, use a piece-wise linear RUL function, where a maximum RUL @@ -73,8 +73,8 @@ ```pycon >>> fd1 = CmapssReader(fd=1, max_rul=100) >>> targets = fd1.load_split("dev") ->>> max(torch.max(t) for t in targets) -tensor(100.) +>>> max(np.max(t) for t in targets) +100.0 ``` If you want to use a sub-dataset as unlabeled data, e.g. for unsupervised domain @@ -88,9 +88,9 @@ >>> fd1 = CmapssReader(fd=1, percent_broken=0.8) >>> features, targets = fd1.load_split("dev") >>> features[0].shape -torch.Size([130, 14, 30]) ->>> torch.min(targets[0]) -tensor(34.) +(130, 30, 14]) +>>> np.min(targets[0]) +34.0 ``` You may want to apply the same `percent_broken` from your training data to your @@ -101,8 +101,8 @@ ```pycon >>> fd1 = CmapssReader(fd=1, percent_broken=0.8, truncate_val=True) >>> features, targets = fd1.load_split("val") ->>> torch.min(targets[0]) -tensor(44.) +>>> np.min(targets[0]) +44.0 ``` Data-driven RUL estimation algorithms are often sensitive to the overall amount of @@ -149,7 +149,7 @@ ``` For more information, see [core][rul_datasets.core] module page or the -[Use Cases](/rul-datasets/) page. +[Libraries](/rul-datasets/use_cases/libraries) page. """ diff --git a/rul_datasets/reader/abstract.py b/rul_datasets/reader/abstract.py index 8488972..2bd879f 100644 --- a/rul_datasets/reader/abstract.py +++ b/rul_datasets/reader/abstract.py @@ -5,9 +5,7 @@ from typing import Optional, Union, List, Dict, Any, Iterable, Tuple import numpy as np -import torch -from rul_datasets import utils from rul_datasets.reader import truncating @@ -155,17 +153,14 @@ def load_complete_split( """ raise NotImplementedError - def load_split(self, split: str) -> Tuple[List[torch.Tensor], List[torch.Tensor]]: + def load_split(self, split: str) -> Tuple[List[np.ndarray], List[np.ndarray]]: """ Load a split as tensors and apply truncation to it. This function loads the scaled features and the targets of a split into memory. Afterwards, truncation is applied if the `split` is set to `dev`. The validation set is also truncated with `percent_broken` if `truncate_val` is - set to `True`. At last, the data is transformed into [tensors][torch.Tensor]. - While converting to them tensors, the axis of the features are transposed to - `[num_windows, num_channels, window_size]` to fit PyTorch's channel first - format. + set to `True`. Args: split: The desired split to load. @@ -182,9 +177,8 @@ def load_split(self, split: str) -> Tuple[List[torch.Tensor], List[torch.Tensor] features, targets = truncating.truncate_runs( features, targets, self.percent_broken ) - tensor_feats, tensor_targets = utils.to_tensor(features, targets) - return tensor_feats, tensor_targets + return features, targets def get_compatible( self, diff --git a/rul_datasets/reader/cmapss.py b/rul_datasets/reader/cmapss.py index 439b0a2..6161389 100644 --- a/rul_datasets/reader/cmapss.py +++ b/rul_datasets/reader/cmapss.py @@ -44,7 +44,7 @@ class CmapssReader(AbstractReader): >>> fd1.prepare_data() >>> features, labels = fd1.load_split("dev") >>> features[0].shape - torch.Size([163, 14, 30]) + (163, 30, 14) Custom channels >>> import rul_datasets @@ -52,7 +52,7 @@ class CmapssReader(AbstractReader): >>> fd1.prepare_data() >>> features, labels = fd1.load_split("dev") >>> features[0].shape - torch.Size([163, 3, 30]) + (163, 30, 3) """ _FMT: str = ( diff --git a/rul_datasets/reader/femto.py b/rul_datasets/reader/femto.py index a27697d..d2494a5 100644 --- a/rul_datasets/reader/femto.py +++ b/rul_datasets/reader/femto.py @@ -44,7 +44,7 @@ class FemtoReader(AbstractReader): >>> fd1.prepare_data() >>> features, labels = fd1.load_split("dev") >>> features[0].shape - torch.Size([2803, 2, 2560]) + (2803, 2560, 2) Custom splits: >>> import rul_datasets @@ -53,7 +53,7 @@ class FemtoReader(AbstractReader): >>> fd1.prepare_data() >>> features, labels = fd1.load_split("dev") >>> features[0].shape - torch.Size([2463, 2, 2560]) + (2463, 2560, 2) """ _FEMTO_ROOT: str = os.path.join(get_data_root(), "FEMTOBearingDataSet") diff --git a/rul_datasets/reader/xjtu_sy.py b/rul_datasets/reader/xjtu_sy.py index b7f5da4..a7212f7 100644 --- a/rul_datasets/reader/xjtu_sy.py +++ b/rul_datasets/reader/xjtu_sy.py @@ -37,7 +37,7 @@ class XjtuSyReader(AbstractReader): >>> fd1.prepare_data() >>> features, labels = fd1.load_split("dev") >>> features[0].shape - torch.Size([123, 2, 32768]) + (123, 32768, 2) Custom splits: >>> import rul_datasets @@ -46,7 +46,7 @@ class XjtuSyReader(AbstractReader): >>> fd1.prepare_data() >>> features, labels = fd1.load_split("dev") >>> features[0].shape - torch.Size([52, 2, 32768]) + (52, 32768, 2) """ _XJTU_SY_ROOT: str = os.path.join(get_data_root(), "XJTU-SY") diff --git a/rul_datasets/utils.py b/rul_datasets/utils.py index 674f4b6..ad6721f 100644 --- a/rul_datasets/utils.py +++ b/rul_datasets/utils.py @@ -105,4 +105,7 @@ def to_tensor( def feature_to_tensor(features: np.ndarray, dtype: torch.dtype) -> torch.Tensor: - return torch.tensor(features, dtype=dtype).permute(0, 2, 1) + if len(features.shape) == 2: + return torch.tensor(features, dtype=dtype).permute(1, 0) + else: + return torch.tensor(features, dtype=dtype).permute(0, 2, 1) diff --git a/tests/reader/test_cmapss.py b/tests/reader/test_cmapss.py index 67be7fe..218e49b 100644 --- a/tests/reader/test_cmapss.py +++ b/tests/reader/test_cmapss.py @@ -1,3 +1,4 @@ +import numpy as np import numpy.testing as npt import pytest import torch @@ -29,11 +30,11 @@ def _check_split(self, rul_loader, split, window_size): self._assert_run_correct(run, run_target, window_size) def _assert_run_correct(self, run, run_target, win): - assert win == run.shape[2] - assert self.NUM_CHANNELS == run.shape[1] + assert win == run.shape[1] + assert self.NUM_CHANNELS == run.shape[2] assert len(run) == len(run_target) - assert torch.float32 == run.dtype - assert torch.float32 == run_target.dtype + assert np.float == run.dtype + assert np.float == run_target.dtype @pytest.mark.parametrize( ("fd", "window_size"), [(1, 30), (2, 20), (3, 30), (4, 15)] @@ -52,7 +53,7 @@ def test_feature_select(self): for split in ["dev", "val", "test"]: features, _ = dataset.load_split(split) for run in features: - assert 7 == run.shape[1] + assert 7 == run.shape[2] def test_prepare_data_not_called_for_feature_select(self): dataset = reader.CmapssReader(1, feature_select=[4]) @@ -64,15 +65,15 @@ def test_normalization_min_max(self, fd): full_dataset = reader.CmapssReader(fd) full_dev, full_dev_targets = full_dataset.load_split("dev") - npt.assert_almost_equal(max(torch.max(r).item() for r in full_dev), 1.0) - npt.assert_almost_equal(min(torch.min(r).item() for r in full_dev), -1.0) + npt.assert_almost_equal(max(np.max(r) for r in full_dev), 1.0) + npt.assert_almost_equal(min(np.min(r) for r in full_dev), -1.0) trunc_dataset = reader.CmapssReader(fd, percent_fail_runs=0.8) trunc_dev, _ = trunc_dataset.load_split("dev") - assert max(torch.max(r).item() for r in trunc_dev) <= 1.0 - assert min(torch.min(r).item() for r in trunc_dev) >= -1.0 + assert np.round(max(np.max(r).item() for r in trunc_dev), decimals=7) <= 1.0 + assert np.round(min(np.min(r).item() for r in trunc_dev), decimals=7) >= -1.0 trunc_dataset = reader.CmapssReader(fd, percent_broken=0.2) trunc_dev, _ = trunc_dataset.load_split("dev") - assert max(torch.max(r).item() for r in trunc_dev) <= 1.0 - assert min(torch.min(r).item() for r in trunc_dev) >= -1.0 + assert np.round(max(np.max(r).item() for r in trunc_dev), decimals=7) <= 1.0 + assert np.round(min(np.min(r).item() for r in trunc_dev), decimals=7) >= -1.0 diff --git a/tests/reader/test_femto.py b/tests/reader/test_femto.py index 04af2fb..17d5e1f 100644 --- a/tests/reader/test_femto.py +++ b/tests/reader/test_femto.py @@ -26,11 +26,11 @@ def test_run_shape_and_dtype(self, fd, window_size, split): self._assert_run_correct(run, run_target, window_size) def _assert_run_correct(self, run, run_target, win): - assert win == run.shape[2] - assert self.NUM_CHANNELS == run.shape[1] + assert win == run.shape[1] + assert self.NUM_CHANNELS == run.shape[2] assert len(run) == len(run_target) - assert torch.float32 == run.dtype - assert torch.float32 == run_target.dtype + assert np.float64 == run.dtype + assert np.float64 == run_target.dtype def test_standardization(self): for i in range(1, 3): @@ -38,26 +38,26 @@ def test_standardization(self): full_train, full_train_targets = full_dataset.load_split("dev") npt.assert_almost_equal( - 0.0, torch.mean(torch.cat(full_train)).item(), decimal=3 + 0.0, np.mean(np.concatenate(full_train)).item(), decimal=3 ) npt.assert_almost_equal( - 1.0, torch.std(torch.cat(full_train)).item(), decimal=3 + 1.0, np.std(np.concatenate(full_train)).item(), decimal=3 ) truncated_dataset = reader.FemtoReader(fd=i, percent_fail_runs=0.8) trunc_train, trunc_train_targets = truncated_dataset.load_split("dev") npt.assert_almost_equal( - 0.0, torch.mean(torch.cat(trunc_train)).item(), decimal=2 + 0.0, np.mean(np.concatenate(trunc_train)).item(), decimal=2 ) npt.assert_almost_equal( - 1.0, torch.std(torch.cat(trunc_train)).item(), decimal=1 + 1.0, np.std(np.concatenate(trunc_train)).item(), decimal=1 ) # percent_broken is supposed to change the std but not the mean truncated_dataset = reader.FemtoReader(fd=i, percent_broken=0.2) trunc_train, trunc_train_targets = truncated_dataset.load_split("dev") npt.assert_almost_equal( - 0.0, torch.mean(torch.cat(trunc_train)).item(), decimal=1 + 0.0, np.mean(np.concatenate(trunc_train)).item(), decimal=1 ) @pytest.mark.parametrize("max_rul", [125, None]) @@ -65,7 +65,6 @@ def test_max_rul(self, max_rul): dataset = reader.FemtoReader(fd=1, max_rul=max_rul) _, targets = dataset.load_split("dev") for t in targets: - t = t.numpy() if max_rul is None: npt.assert_equal(t, np.arange(len(t), 0, -1)) # is linear else: diff --git a/tests/reader/test_xjtu_sy.py b/tests/reader/test_xjtu_sy.py index d79983e..cec8202 100644 --- a/tests/reader/test_xjtu_sy.py +++ b/tests/reader/test_xjtu_sy.py @@ -1,3 +1,4 @@ +import numpy as np import pytest import torch from numpy import testing as npt @@ -25,23 +26,17 @@ def test_standardization(self, fd): full_dataset = reader.XjtuSyReader(fd) full_train, full_train_targets = full_dataset.load_split("dev") - npt.assert_almost_equal( - 0.0, torch.mean(torch.cat(full_train)).item(), decimal=4 - ) - npt.assert_almost_equal(1.0, torch.std(torch.cat(full_train)).item(), decimal=4) + npt.assert_almost_equal(0.0, np.mean(np.concatenate(full_train)), decimal=4) + npt.assert_almost_equal(1.0, np.std(np.concatenate(full_train)), decimal=4) truncated_dataset = reader.XjtuSyReader(fd, percent_fail_runs=0.8) trunc_train, trunc_train_targets = truncated_dataset.load_split("dev") - npt.assert_almost_equal( - 0.0, torch.mean(torch.cat(trunc_train)).item(), decimal=1 - ) + npt.assert_almost_equal(0.0, np.mean(np.concatenate(trunc_train)), decimal=1) # percent_broken is supposed to change the std but not the mean truncated_dataset = reader.XjtuSyReader(fd, percent_broken=0.2) trunc_train, trunc_train_targets = truncated_dataset.load_split("dev") - npt.assert_almost_equal( - 0.0, torch.mean(torch.cat(trunc_train)).item(), decimal=1 - ) + npt.assert_almost_equal(0.0, np.mean(np.concatenate(trunc_train)), decimal=1) @pytest.mark.parametrize("window_size", [1500, 100]) @pytest.mark.parametrize("fd", [1, 2, 3]) @@ -53,11 +48,11 @@ def test_run_shape_and_dtype(self, window_size, fd, split): self._assert_run_correct(run, run_target, window_size) def _assert_run_correct(self, run, run_target, win): - assert win == run.shape[2] - assert self.NUM_CHANNELS == run.shape[1] + assert win == run.shape[1] + assert self.NUM_CHANNELS == run.shape[2] assert len(run) == len(run_target) - assert torch.float32 == run.dtype - assert torch.float32 == run_target.dtype + assert np.float64 == run.dtype + assert np.float64 == run_target.dtype @pytest.mark.parametrize("fd", [1, 2, 3]) @pytest.mark.parametrize( diff --git a/tests/test_adaption.py b/tests/test_adaption.py index 191fef5..d8ae5f4 100644 --- a/tests/test_adaption.py +++ b/tests/test_adaption.py @@ -2,6 +2,7 @@ import warnings from unittest import mock +import numpy as np import torch from torch.utils.data import RandomSampler, TensorDataset @@ -154,7 +155,7 @@ class TestPretrainingDataModuleFullData( PretrainingDataModuleTemplate, unittest.TestCase ): def setUp(self): - source_mock_runs = [torch.randn(16, 14, 1)] * 3, [torch.rand(16)] * 3 + source_mock_runs = [np.random.randn(16, 1, 14)] * 3, [np.random.rand(16)] * 3 self.source_loader = mock.MagicMock(name="CMAPSSLoader") self.source_loader.fd = 3 self.source_loader.percent_fail_runs = None @@ -168,7 +169,7 @@ def setUp(self): self.source_loader.load_split.return_value = source_mock_runs self.source_data = core.RulDataModule(self.source_loader, batch_size=16) - target_mock_runs = [torch.randn(16, 14, 1)] * 2, [torch.rand(16)] * 2 + target_mock_runs = [np.random.randn(16, 1, 14)] * 2, [np.random.rand(16)] * 2 self.target_loader = mock.MagicMock(name="CMAPSSLoader") self.target_loader.fd = 1 self.target_loader.percent_fail_runs = 0.8 @@ -266,7 +267,7 @@ class TestPretrainingDataModuleLowData( PretrainingDataModuleTemplate, unittest.TestCase ): def setUp(self): - source_mock_runs = [torch.randn(16, 14, 1)] * 3, [torch.rand(16)] * 3 + source_mock_runs = [np.random.randn(16, 1, 14)] * 3, [np.random.rand(16)] * 3 self.source_loader = mock.MagicMock(name="CMAPSSLoader") self.source_loader.fd = 3 self.source_loader.percent_fail_runs = None @@ -281,8 +282,8 @@ def setUp(self): self.source_data = core.RulDataModule(self.source_loader, batch_size=16) target_mock_runs = ( - [torch.randn(3, 14, 1), torch.randn(1, 14, 1)], - [torch.rand(3), torch.rand(1)], + [np.random.randn(3, 1, 14), np.random.randn(1, 1, 14)], + [np.random.rand(3), np.random.rand(1)], ) self.target_loader = mock.MagicMock(name="CMAPSSLoader") self.target_loader.fd = 1 diff --git a/tests/test_baseline.py b/tests/test_baseline.py index 3e72310..7fb56d5 100644 --- a/tests/test_baseline.py +++ b/tests/test_baseline.py @@ -2,6 +2,7 @@ import warnings from unittest import mock +import numpy as np import torch import torch.utils.data @@ -68,7 +69,7 @@ class TestPretrainingBaselineDataModuleFullData( PretrainingDataModuleTemplate, unittest.TestCase ): def setUp(self): - self.mock_runs = [torch.randn(16, 14, 1)] * 8, [torch.rand(16)] * 8 + self.mock_runs = [np.random.randn(16, 1, 14)] * 8, [np.random.rand(16)] * 8 self.failed_loader = mock.MagicMock(name="CMAPSSLoader") self.failed_loader.fd = 1 @@ -218,7 +219,7 @@ class TestPretrainingBaselineDataModuleLowData( PretrainingDataModuleTemplate, unittest.TestCase ): def setUp(self): - self.mock_runs = [torch.randn(16, 14, 1)] * 2, [torch.rand(16)] * 2 + self.mock_runs = [np.random.randn(16, 1, 14)] * 2, [np.random.rand(16)] * 2 self.failed_loader = mock.MagicMock(name="CMAPSSLoader") self.failed_loader.fd = 1 diff --git a/tests/test_core.py b/tests/test_core.py index d337664..570b41d 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -18,7 +18,7 @@ def setUp(self): "test": 0, "window_size": 30, } - self.mock_runs = [torch.zeros(1, 1, 1)], [torch.zeros(1)] + self.mock_runs = [np.zeros((1, 1, 1))], [np.zeros(1)] self.mock_loader.load_split.return_value = self.mock_runs def test_created_correctly(self): @@ -62,7 +62,7 @@ def test_setup(self): self.mock_loader.load_split.assert_has_calls( [mock.call("dev"), mock.call("val"), mock.call("test")] ) - mock_runs = tuple(torch.cat(r) for r in self.mock_runs) + mock_runs = tuple(torch.tensor(np.concatenate(r)) for r in self.mock_runs) self.assertDictEqual( {"dev": mock_runs, "val": mock_runs, "test": mock_runs}, dataset._data ) @@ -119,8 +119,8 @@ def test_test_dataloader(self, mock_to_dataset): def test_train_batch_structure(self): self.mock_loader.load_split.return_value = ( - [torch.zeros(8, 14, 30)] * 4, - [torch.zeros(8)] * 4, + [np.zeros((8, 30, 14))] * 4, + [np.zeros(8)] * 4, ) dataset = core.RulDataModule(self.mock_loader, batch_size=16) dataset.setup() @@ -129,8 +129,8 @@ def test_train_batch_structure(self): def test_val_batch_structure(self): self.mock_loader.load_split.return_value = ( - [torch.zeros(8, 14, 30)] * 4, - [torch.zeros(8)] * 4, + [np.zeros((8, 30, 14))] * 4, + [np.zeros(8)] * 4, ) dataset = core.RulDataModule(self.mock_loader, batch_size=16) dataset.setup() @@ -139,8 +139,8 @@ def test_val_batch_structure(self): def test_test_batch_structure(self): self.mock_loader.load_split.return_value = ( - [torch.zeros(8, 14, 30)] * 4, - [torch.zeros(8)] * 4, + [np.zeros((8, 30, 14))] * 4, + [np.zeros(8)] * 4, ) dataset = core.RulDataModule(self.mock_loader, batch_size=16) dataset.setup() @@ -201,7 +201,7 @@ def test_is_mutually_exclusive(self): def test_feature_extractor(self): self.mock_loader.load_split.return_value = ( - [torch.zeros(8, 14, 30) + torch.arange(8)[:, None, None]], + [np.zeros((8, 30, 14)) + np.arange(8)[:, None, None]], [torch.arange(8)], ) fe = lambda x: np.mean(x, axis=1) @@ -231,12 +231,12 @@ class DummyRul(reader.AbstractReader): def __init__(self, length): self.data = { "dev": ( - [torch.zeros(length, self.window_size, 5)], - [torch.clamp_max(torch.arange(length, 0, step=-1), 125)], + [np.zeros((length, self.window_size, 5))], + [np.clip(np.arange(length, 0, step=-1), a_min=None, a_max=125)], ), "val": ( - [torch.zeros(100, self.window_size, 5)], - [torch.clamp_max(torch.arange(100, 0, step=-1), 125)], + [np.zeros((100, self.window_size, 5))], + [np.clip(np.arange(100, 0, step=-1), a_min=None, a_max=125)], ), } @@ -270,18 +270,18 @@ class DummyRulShortRuns(reader.AbstractReader): data = { "dev": ( [ - torch.ones(100, window_size, 5) - * torch.arange(1, 101).view(100, 1, 1), # normal run - torch.zeros(2, window_size, 5), # too short run - torch.ones(100, window_size, 5) - * torch.arange(1, 101).view(100, 1, 1), # normal run - torch.zeros(1, window_size, 5), # empty run + np.ones((100, window_size, 5)) + * np.arange(1, 101).reshape((100, 1, 1)), # normal run + np.zeros((2, window_size, 5)), # too short run + np.ones((100, window_size, 5)) + * np.arange(1, 101).reshape((100, 1, 1)), # normal run + np.zeros((1, window_size, 5)), # empty run ], [ - torch.clamp_max(torch.arange(100, 0, step=-1), 125), - torch.ones(2) * 500, - torch.clamp_max(torch.arange(100, 0, step=-1), 125), - torch.ones(1) * 500, + np.clip(np.arange(100, 0, step=-1), a_min=None, a_max=125), + np.ones(2) * 500, + np.clip(torch.arange(100, 0, step=-1), a_min=None, a_max=125), + np.ones(1) * 500, ], ), } @@ -375,8 +375,12 @@ def test_sampled_data(self, cmapss_short): for i, sample in enumerate(data): idx = 3 * i expected_run = data._features[fixed_idx[idx]] - expected_anchor = expected_run[fixed_idx[idx + 1]] - expected_query = expected_run[fixed_idx[idx + 2]] + expected_anchor = torch.tensor(expected_run[fixed_idx[idx + 1]]).transpose( + 1, 0 + ) + expected_query = torch.tensor(expected_run[fixed_idx[idx + 2]]).transpose( + 1, 0 + ) expected_distance = min(125, fixed_idx[idx + 2] - fixed_idx[idx + 1]) / 125 expected_domain_idx = 0 assert 0 == torch.dist(expected_anchor, sample[0])