diff --git a/docs/use_cases/libraries.md b/docs/use_cases/libraries.md
index 00ddc4f..47fe5f6 100644
--- a/docs/use_cases/libraries.md
+++ b/docs/use_cases/libraries.md
@@ -110,12 +110,12 @@ import rul_datasets
 cmapss_fd1 = rul_datasets.CmapssReader(fd=1)
 cmapss_fd1.prepare_data() # (1)!
 dev_features, _ = cmapss_fd1.load_split("dev") # (2)!
-dev_data = np.concatenate([np.transpose(f.numpy(), (0, 2, 1)) for f in dev_features]) # (3)!
+dev_data = np.concatenate(dev_features) # (3)!
 
 km = tslearn.clustering.TimeSeriesKMeans(n_clusters=5, metric="dtw")
 km.fit(dev_data)
 ```
 
 1. You need to call `prepare_data` before using the reader. This downloads and pre-processes the dataset if not done already.
-2. This yields a list of tensors with the shape `[len_time_series, num_features, window_size]`.
-3. Convert the list of tensors to a single numpy array with the shape `[num_series, window_size, num_features]`.
+2. This yields a list of numpy arrays with the shape `[len_time_series, window_size, num_features]`.
+3. Concatenate to a single numpy array with the shape `[num_series, window_size, num_features]`.
diff --git a/rul_datasets/core.py b/rul_datasets/core.py
index 833e945..89779d0 100644
--- a/rul_datasets/core.py
+++ b/rul_datasets/core.py
@@ -209,8 +209,9 @@ def _setup_split(self, split: str) -> Tuple[torch.Tensor, torch.Tensor]:
         features, targets = self.reader.load_split(split)
         if features:
             features, targets = self._apply_feature_extractor_per_run(features, targets)
-            cat_features = torch.cat(features)
-            cat_targets = torch.cat(targets)
+            tensor_features, tensor_targets = utils.to_tensor(features, targets)
+            cat_features = torch.cat(tensor_features)
+            cat_targets = torch.cat(tensor_targets)
         else:
             cat_features = torch.empty(0, 0, 0)
             cat_targets = torch.empty(0)
@@ -218,8 +219,8 @@ def _setup_split(self, split: str) -> Tuple[torch.Tensor, torch.Tensor]:
         return cat_features, cat_targets
 
     def _apply_feature_extractor_per_run(
-        self, features: List[torch.Tensor], targets: List[torch.Tensor]
-    ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+        self, features: List[np.ndarray], targets: List[np.ndarray]
+    ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
         if self.feature_extractor is not None and self.window_size is not None:
             cutoff = self.window_size - 1
             features = [self._apply_feature_extractor(f) for f in features]
@@ -228,12 +229,9 @@ def _apply_feature_extractor_per_run(
 
         return features, targets
 
-    def _apply_feature_extractor(self, features: torch.Tensor) -> torch.Tensor:
-        dtype = features.dtype
-        numpy_features = torch.permute(features, (0, 2, 1)).numpy()
-        extracted = self.feature_extractor(numpy_features)  # type: ignore
-        extracted = utils.extract_windows(extracted, self.window_size)  # type: ignore
-        features = utils.feature_to_tensor(extracted, dtype)
+    def _apply_feature_extractor(self, features: np.ndarray) -> np.ndarray:
+        features = self.feature_extractor(features)  # type: ignore
+        features = utils.extract_windows(features, self.window_size)  # type: ignore
 
         return features
 
@@ -323,7 +321,7 @@ def to_dataset(self, split: str) -> TensorDataset:
 
 
 class PairedRulDataset(IterableDataset):
-    """TODO."""
+    """A dataset of sample pairs drawn from the same time series."""
 
     def __init__(
         self,
@@ -347,8 +345,8 @@ def __init__(
             reader.check_compatibility(self.readers[0])
 
         self._run_domain_idx: np.ndarray
-        self._features: List[torch.Tensor]
-        self._labels: List[torch.Tensor]
+        self._features: List[np.ndarray]
+        self._labels: List[np.ndarray]
         self._prepare_datasets()
 
         self._max_rul = self._get_max_rul()
@@ -480,14 +478,14 @@ def _get_labeled_pair_idx(self) -> Tuple[int, int, int, int, int]:
 
     def _build_pair(
         self,
-        run: torch.Tensor,
+        run: np.ndarray,
         anchor_idx: int,
         query_idx: int,
         distance: int,
         domain_label: int,
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        anchors = run[anchor_idx]
-        queries = run[query_idx]
+        anchors = utils.feature_to_tensor(run[anchor_idx], torch.float)
+        queries = utils.feature_to_tensor(run[query_idx], torch.float)
         domain_tensor = torch.tensor(domain_label, dtype=torch.float)
         distances = torch.tensor(distance, dtype=torch.float) / self._max_rul
         distances = torch.clamp_max(distances, max=1)  # max distance is max_rul
diff --git a/rul_datasets/reader/__init__.py b/rul_datasets/reader/__init__.py
index 77e27d6..3e31a46 100644
--- a/rul_datasets/reader/__init__.py
+++ b/rul_datasets/reader/__init__.py
@@ -36,24 +36,24 @@
 >>> test_features, test_targets = reader.load_split("test")
 ```
 
-The features are a list of [tensors][torch.Tensor] where each tensor has a shape of
-`[num_windows, num_channels, window_size]`:
+The features are a list of [numpy arrays][numpy.ndarray] where each array has a shape of
+`[num_windows, window_size, num_channels]`:
 
 ```pycon
 >>> type(dev_features)
 <class 'list'>
 >>> dev_features[0].shape
-torch.Size([163, 14, 30])
+(163, 30, 14)
 ```
 
-The targets are a list of [tensors][torch.Tensor], too, where each tensor has a shape
-of `[num_windows]`:
+The targets are a list of [numpy arrays][numpy.ndarrays], too, where each array has a
+shape of `[num_windows]`:
 
 ```pycon
 >>> type(dev_targets)
 <class 'list'>
 >>> dev_targets[0].shape
-torch.Size([163])
+(163,)
 ```
 
 Each reader defines a default window size for its data. This can be overridden by the
@@ -63,7 +63,7 @@
 >>> fd1 = CmapssReader(fd=1, window_size=15)
 >>> features, _ = fd1.load_split("dev")
 >>> features[0].shape
-torch.Size([163, 14, 15])
+(163, 15, 14)
 ```
 
 Some datasets, i.e. CMAPSS, use a piece-wise linear RUL function, where a maximum RUL
@@ -73,8 +73,8 @@
 ```pycon
 >>> fd1 = CmapssReader(fd=1, max_rul=100)
 >>> targets = fd1.load_split("dev")
->>> max(torch.max(t) for t in targets)
-tensor(100.)
+>>> max(np.max(t) for t in targets)
+100.0
 ```
 
 If you want to use a sub-dataset as unlabeled data, e.g. for unsupervised domain
@@ -88,9 +88,9 @@
 >>> fd1 = CmapssReader(fd=1, percent_broken=0.8)
 >>> features, targets = fd1.load_split("dev")
 >>> features[0].shape
-torch.Size([130, 14, 30])
->>> torch.min(targets[0])
-tensor(34.)
+(130, 30, 14])
+>>> np.min(targets[0])
+34.0
 ```
 
 You may want to apply the same `percent_broken` from your training data to your
@@ -101,8 +101,8 @@
 ```pycon
 >>> fd1 = CmapssReader(fd=1, percent_broken=0.8, truncate_val=True)
 >>> features, targets = fd1.load_split("val")
->>> torch.min(targets[0])
-tensor(44.)
+>>> np.min(targets[0])
+44.0
 ```
 
 Data-driven RUL estimation algorithms are often sensitive to the overall amount of
@@ -149,7 +149,7 @@
 ```
 
 For more information, see [core][rul_datasets.core] module page or the
-[Use Cases](/rul-datasets/) page.
+[Libraries](/rul-datasets/use_cases/libraries) page.
 
  """
 
diff --git a/rul_datasets/reader/abstract.py b/rul_datasets/reader/abstract.py
index 8488972..2bd879f 100644
--- a/rul_datasets/reader/abstract.py
+++ b/rul_datasets/reader/abstract.py
@@ -5,9 +5,7 @@
 from typing import Optional, Union, List, Dict, Any, Iterable, Tuple
 
 import numpy as np
-import torch
 
-from rul_datasets import utils
 from rul_datasets.reader import truncating
 
 
@@ -155,17 +153,14 @@ def load_complete_split(
         """
         raise NotImplementedError
 
-    def load_split(self, split: str) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+    def load_split(self, split: str) -> Tuple[List[np.ndarray], List[np.ndarray]]:
         """
         Load a split as tensors and apply truncation to it.
 
         This function loads the scaled features and the targets of a split into
         memory. Afterwards, truncation is applied if the `split` is set to `dev`. The
         validation set is also truncated with `percent_broken` if `truncate_val` is
-        set to `True`. At last, the data is transformed into [tensors][torch.Tensor].
-        While converting to them tensors, the axis of the features are transposed to
-        `[num_windows, num_channels, window_size]` to fit PyTorch's channel first
-        format.
+        set to `True`.
 
         Args:
             split: The desired split to load.
@@ -182,9 +177,8 @@ def load_split(self, split: str) -> Tuple[List[torch.Tensor], List[torch.Tensor]
             features, targets = truncating.truncate_runs(
                 features, targets, self.percent_broken
             )
-        tensor_feats, tensor_targets = utils.to_tensor(features, targets)
 
-        return tensor_feats, tensor_targets
+        return features, targets
 
     def get_compatible(
         self,
diff --git a/rul_datasets/reader/cmapss.py b/rul_datasets/reader/cmapss.py
index 439b0a2..6161389 100644
--- a/rul_datasets/reader/cmapss.py
+++ b/rul_datasets/reader/cmapss.py
@@ -44,7 +44,7 @@ class CmapssReader(AbstractReader):
         >>> fd1.prepare_data()
         >>> features, labels = fd1.load_split("dev")
         >>> features[0].shape
-        torch.Size([163, 14, 30])
+        (163, 30, 14)
 
         Custom channels
         >>> import rul_datasets
@@ -52,7 +52,7 @@ class CmapssReader(AbstractReader):
         >>> fd1.prepare_data()
         >>> features, labels = fd1.load_split("dev")
         >>> features[0].shape
-        torch.Size([163, 3, 30])
+        (163, 30, 3)
     """
 
     _FMT: str = (
diff --git a/rul_datasets/reader/femto.py b/rul_datasets/reader/femto.py
index a27697d..d2494a5 100644
--- a/rul_datasets/reader/femto.py
+++ b/rul_datasets/reader/femto.py
@@ -44,7 +44,7 @@ class FemtoReader(AbstractReader):
         >>> fd1.prepare_data()
         >>> features, labels = fd1.load_split("dev")
         >>> features[0].shape
-        torch.Size([2803, 2, 2560])
+        (2803, 2560, 2)
 
         Custom splits:
         >>> import rul_datasets
@@ -53,7 +53,7 @@ class FemtoReader(AbstractReader):
         >>> fd1.prepare_data()
         >>> features, labels = fd1.load_split("dev")
         >>> features[0].shape
-        torch.Size([2463, 2, 2560])
+        (2463, 2560, 2)
     """
 
     _FEMTO_ROOT: str = os.path.join(get_data_root(), "FEMTOBearingDataSet")
diff --git a/rul_datasets/reader/xjtu_sy.py b/rul_datasets/reader/xjtu_sy.py
index b7f5da4..a7212f7 100644
--- a/rul_datasets/reader/xjtu_sy.py
+++ b/rul_datasets/reader/xjtu_sy.py
@@ -37,7 +37,7 @@ class XjtuSyReader(AbstractReader):
         >>> fd1.prepare_data()
         >>> features, labels = fd1.load_split("dev")
         >>> features[0].shape
-        torch.Size([123, 2, 32768])
+        (123, 32768, 2)
 
         Custom splits:
         >>> import rul_datasets
@@ -46,7 +46,7 @@ class XjtuSyReader(AbstractReader):
         >>> fd1.prepare_data()
         >>> features, labels = fd1.load_split("dev")
         >>> features[0].shape
-        torch.Size([52, 2, 32768])
+        (52, 32768, 2)
     """
 
     _XJTU_SY_ROOT: str = os.path.join(get_data_root(), "XJTU-SY")
diff --git a/rul_datasets/utils.py b/rul_datasets/utils.py
index 674f4b6..ad6721f 100644
--- a/rul_datasets/utils.py
+++ b/rul_datasets/utils.py
@@ -105,4 +105,7 @@ def to_tensor(
 
 
 def feature_to_tensor(features: np.ndarray, dtype: torch.dtype) -> torch.Tensor:
-    return torch.tensor(features, dtype=dtype).permute(0, 2, 1)
+    if len(features.shape) == 2:
+        return torch.tensor(features, dtype=dtype).permute(1, 0)
+    else:
+        return torch.tensor(features, dtype=dtype).permute(0, 2, 1)
diff --git a/tests/reader/test_cmapss.py b/tests/reader/test_cmapss.py
index 67be7fe..218e49b 100644
--- a/tests/reader/test_cmapss.py
+++ b/tests/reader/test_cmapss.py
@@ -1,3 +1,4 @@
+import numpy as np
 import numpy.testing as npt
 import pytest
 import torch
@@ -29,11 +30,11 @@ def _check_split(self, rul_loader, split, window_size):
             self._assert_run_correct(run, run_target, window_size)
 
     def _assert_run_correct(self, run, run_target, win):
-        assert win == run.shape[2]
-        assert self.NUM_CHANNELS == run.shape[1]
+        assert win == run.shape[1]
+        assert self.NUM_CHANNELS == run.shape[2]
         assert len(run) == len(run_target)
-        assert torch.float32 == run.dtype
-        assert torch.float32 == run_target.dtype
+        assert np.float == run.dtype
+        assert np.float == run_target.dtype
 
     @pytest.mark.parametrize(
         ("fd", "window_size"), [(1, 30), (2, 20), (3, 30), (4, 15)]
@@ -52,7 +53,7 @@ def test_feature_select(self):
         for split in ["dev", "val", "test"]:
             features, _ = dataset.load_split(split)
             for run in features:
-                assert 7 == run.shape[1]
+                assert 7 == run.shape[2]
 
     def test_prepare_data_not_called_for_feature_select(self):
         dataset = reader.CmapssReader(1, feature_select=[4])
@@ -64,15 +65,15 @@ def test_normalization_min_max(self, fd):
         full_dataset = reader.CmapssReader(fd)
         full_dev, full_dev_targets = full_dataset.load_split("dev")
 
-        npt.assert_almost_equal(max(torch.max(r).item() for r in full_dev), 1.0)
-        npt.assert_almost_equal(min(torch.min(r).item() for r in full_dev), -1.0)
+        npt.assert_almost_equal(max(np.max(r) for r in full_dev), 1.0)
+        npt.assert_almost_equal(min(np.min(r) for r in full_dev), -1.0)
 
         trunc_dataset = reader.CmapssReader(fd, percent_fail_runs=0.8)
         trunc_dev, _ = trunc_dataset.load_split("dev")
-        assert max(torch.max(r).item() for r in trunc_dev) <= 1.0
-        assert min(torch.min(r).item() for r in trunc_dev) >= -1.0
+        assert np.round(max(np.max(r).item() for r in trunc_dev), decimals=7) <= 1.0
+        assert np.round(min(np.min(r).item() for r in trunc_dev), decimals=7) >= -1.0
 
         trunc_dataset = reader.CmapssReader(fd, percent_broken=0.2)
         trunc_dev, _ = trunc_dataset.load_split("dev")
-        assert max(torch.max(r).item() for r in trunc_dev) <= 1.0
-        assert min(torch.min(r).item() for r in trunc_dev) >= -1.0
+        assert np.round(max(np.max(r).item() for r in trunc_dev), decimals=7) <= 1.0
+        assert np.round(min(np.min(r).item() for r in trunc_dev), decimals=7) >= -1.0
diff --git a/tests/reader/test_femto.py b/tests/reader/test_femto.py
index 04af2fb..17d5e1f 100644
--- a/tests/reader/test_femto.py
+++ b/tests/reader/test_femto.py
@@ -26,11 +26,11 @@ def test_run_shape_and_dtype(self, fd, window_size, split):
             self._assert_run_correct(run, run_target, window_size)
 
     def _assert_run_correct(self, run, run_target, win):
-        assert win == run.shape[2]
-        assert self.NUM_CHANNELS == run.shape[1]
+        assert win == run.shape[1]
+        assert self.NUM_CHANNELS == run.shape[2]
         assert len(run) == len(run_target)
-        assert torch.float32 == run.dtype
-        assert torch.float32 == run_target.dtype
+        assert np.float64 == run.dtype
+        assert np.float64 == run_target.dtype
 
     def test_standardization(self):
         for i in range(1, 3):
@@ -38,26 +38,26 @@ def test_standardization(self):
             full_train, full_train_targets = full_dataset.load_split("dev")
 
             npt.assert_almost_equal(
-                0.0, torch.mean(torch.cat(full_train)).item(), decimal=3
+                0.0, np.mean(np.concatenate(full_train)).item(), decimal=3
             )
             npt.assert_almost_equal(
-                1.0, torch.std(torch.cat(full_train)).item(), decimal=3
+                1.0, np.std(np.concatenate(full_train)).item(), decimal=3
             )
 
             truncated_dataset = reader.FemtoReader(fd=i, percent_fail_runs=0.8)
             trunc_train, trunc_train_targets = truncated_dataset.load_split("dev")
             npt.assert_almost_equal(
-                0.0, torch.mean(torch.cat(trunc_train)).item(), decimal=2
+                0.0, np.mean(np.concatenate(trunc_train)).item(), decimal=2
             )
             npt.assert_almost_equal(
-                1.0, torch.std(torch.cat(trunc_train)).item(), decimal=1
+                1.0, np.std(np.concatenate(trunc_train)).item(), decimal=1
             )
 
             # percent_broken is supposed to change the std but not the mean
             truncated_dataset = reader.FemtoReader(fd=i, percent_broken=0.2)
             trunc_train, trunc_train_targets = truncated_dataset.load_split("dev")
             npt.assert_almost_equal(
-                0.0, torch.mean(torch.cat(trunc_train)).item(), decimal=1
+                0.0, np.mean(np.concatenate(trunc_train)).item(), decimal=1
             )
 
     @pytest.mark.parametrize("max_rul", [125, None])
@@ -65,7 +65,6 @@ def test_max_rul(self, max_rul):
         dataset = reader.FemtoReader(fd=1, max_rul=max_rul)
         _, targets = dataset.load_split("dev")
         for t in targets:
-            t = t.numpy()
             if max_rul is None:
                 npt.assert_equal(t, np.arange(len(t), 0, -1))  # is linear
             else:
diff --git a/tests/reader/test_xjtu_sy.py b/tests/reader/test_xjtu_sy.py
index d79983e..cec8202 100644
--- a/tests/reader/test_xjtu_sy.py
+++ b/tests/reader/test_xjtu_sy.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pytest
 import torch
 from numpy import testing as npt
@@ -25,23 +26,17 @@ def test_standardization(self, fd):
         full_dataset = reader.XjtuSyReader(fd)
         full_train, full_train_targets = full_dataset.load_split("dev")
 
-        npt.assert_almost_equal(
-            0.0, torch.mean(torch.cat(full_train)).item(), decimal=4
-        )
-        npt.assert_almost_equal(1.0, torch.std(torch.cat(full_train)).item(), decimal=4)
+        npt.assert_almost_equal(0.0, np.mean(np.concatenate(full_train)), decimal=4)
+        npt.assert_almost_equal(1.0, np.std(np.concatenate(full_train)), decimal=4)
 
         truncated_dataset = reader.XjtuSyReader(fd, percent_fail_runs=0.8)
         trunc_train, trunc_train_targets = truncated_dataset.load_split("dev")
-        npt.assert_almost_equal(
-            0.0, torch.mean(torch.cat(trunc_train)).item(), decimal=1
-        )
+        npt.assert_almost_equal(0.0, np.mean(np.concatenate(trunc_train)), decimal=1)
 
         # percent_broken is supposed to change the std but not the mean
         truncated_dataset = reader.XjtuSyReader(fd, percent_broken=0.2)
         trunc_train, trunc_train_targets = truncated_dataset.load_split("dev")
-        npt.assert_almost_equal(
-            0.0, torch.mean(torch.cat(trunc_train)).item(), decimal=1
-        )
+        npt.assert_almost_equal(0.0, np.mean(np.concatenate(trunc_train)), decimal=1)
 
     @pytest.mark.parametrize("window_size", [1500, 100])
     @pytest.mark.parametrize("fd", [1, 2, 3])
@@ -53,11 +48,11 @@ def test_run_shape_and_dtype(self, window_size, fd, split):
             self._assert_run_correct(run, run_target, window_size)
 
     def _assert_run_correct(self, run, run_target, win):
-        assert win == run.shape[2]
-        assert self.NUM_CHANNELS == run.shape[1]
+        assert win == run.shape[1]
+        assert self.NUM_CHANNELS == run.shape[2]
         assert len(run) == len(run_target)
-        assert torch.float32 == run.dtype
-        assert torch.float32 == run_target.dtype
+        assert np.float64 == run.dtype
+        assert np.float64 == run_target.dtype
 
     @pytest.mark.parametrize("fd", [1, 2, 3])
     @pytest.mark.parametrize(
diff --git a/tests/test_adaption.py b/tests/test_adaption.py
index 191fef5..d8ae5f4 100644
--- a/tests/test_adaption.py
+++ b/tests/test_adaption.py
@@ -2,6 +2,7 @@
 import warnings
 from unittest import mock
 
+import numpy as np
 import torch
 from torch.utils.data import RandomSampler, TensorDataset
 
@@ -154,7 +155,7 @@ class TestPretrainingDataModuleFullData(
     PretrainingDataModuleTemplate, unittest.TestCase
 ):
     def setUp(self):
-        source_mock_runs = [torch.randn(16, 14, 1)] * 3, [torch.rand(16)] * 3
+        source_mock_runs = [np.random.randn(16, 1, 14)] * 3, [np.random.rand(16)] * 3
         self.source_loader = mock.MagicMock(name="CMAPSSLoader")
         self.source_loader.fd = 3
         self.source_loader.percent_fail_runs = None
@@ -168,7 +169,7 @@ def setUp(self):
         self.source_loader.load_split.return_value = source_mock_runs
         self.source_data = core.RulDataModule(self.source_loader, batch_size=16)
 
-        target_mock_runs = [torch.randn(16, 14, 1)] * 2, [torch.rand(16)] * 2
+        target_mock_runs = [np.random.randn(16, 1, 14)] * 2, [np.random.rand(16)] * 2
         self.target_loader = mock.MagicMock(name="CMAPSSLoader")
         self.target_loader.fd = 1
         self.target_loader.percent_fail_runs = 0.8
@@ -266,7 +267,7 @@ class TestPretrainingDataModuleLowData(
     PretrainingDataModuleTemplate, unittest.TestCase
 ):
     def setUp(self):
-        source_mock_runs = [torch.randn(16, 14, 1)] * 3, [torch.rand(16)] * 3
+        source_mock_runs = [np.random.randn(16, 1, 14)] * 3, [np.random.rand(16)] * 3
         self.source_loader = mock.MagicMock(name="CMAPSSLoader")
         self.source_loader.fd = 3
         self.source_loader.percent_fail_runs = None
@@ -281,8 +282,8 @@ def setUp(self):
         self.source_data = core.RulDataModule(self.source_loader, batch_size=16)
 
         target_mock_runs = (
-            [torch.randn(3, 14, 1), torch.randn(1, 14, 1)],
-            [torch.rand(3), torch.rand(1)],
+            [np.random.randn(3, 1, 14), np.random.randn(1, 1, 14)],
+            [np.random.rand(3), np.random.rand(1)],
         )
         self.target_loader = mock.MagicMock(name="CMAPSSLoader")
         self.target_loader.fd = 1
diff --git a/tests/test_baseline.py b/tests/test_baseline.py
index 3e72310..7fb56d5 100644
--- a/tests/test_baseline.py
+++ b/tests/test_baseline.py
@@ -2,6 +2,7 @@
 import warnings
 from unittest import mock
 
+import numpy as np
 import torch
 import torch.utils.data
 
@@ -68,7 +69,7 @@ class TestPretrainingBaselineDataModuleFullData(
     PretrainingDataModuleTemplate, unittest.TestCase
 ):
     def setUp(self):
-        self.mock_runs = [torch.randn(16, 14, 1)] * 8, [torch.rand(16)] * 8
+        self.mock_runs = [np.random.randn(16, 1, 14)] * 8, [np.random.rand(16)] * 8
 
         self.failed_loader = mock.MagicMock(name="CMAPSSLoader")
         self.failed_loader.fd = 1
@@ -218,7 +219,7 @@ class TestPretrainingBaselineDataModuleLowData(
     PretrainingDataModuleTemplate, unittest.TestCase
 ):
     def setUp(self):
-        self.mock_runs = [torch.randn(16, 14, 1)] * 2, [torch.rand(16)] * 2
+        self.mock_runs = [np.random.randn(16, 1, 14)] * 2, [np.random.rand(16)] * 2
 
         self.failed_loader = mock.MagicMock(name="CMAPSSLoader")
         self.failed_loader.fd = 1
diff --git a/tests/test_core.py b/tests/test_core.py
index d337664..570b41d 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -18,7 +18,7 @@ def setUp(self):
             "test": 0,
             "window_size": 30,
         }
-        self.mock_runs = [torch.zeros(1, 1, 1)], [torch.zeros(1)]
+        self.mock_runs = [np.zeros((1, 1, 1))], [np.zeros(1)]
         self.mock_loader.load_split.return_value = self.mock_runs
 
     def test_created_correctly(self):
@@ -62,7 +62,7 @@ def test_setup(self):
         self.mock_loader.load_split.assert_has_calls(
             [mock.call("dev"), mock.call("val"), mock.call("test")]
         )
-        mock_runs = tuple(torch.cat(r) for r in self.mock_runs)
+        mock_runs = tuple(torch.tensor(np.concatenate(r)) for r in self.mock_runs)
         self.assertDictEqual(
             {"dev": mock_runs, "val": mock_runs, "test": mock_runs}, dataset._data
         )
@@ -119,8 +119,8 @@ def test_test_dataloader(self, mock_to_dataset):
 
     def test_train_batch_structure(self):
         self.mock_loader.load_split.return_value = (
-            [torch.zeros(8, 14, 30)] * 4,
-            [torch.zeros(8)] * 4,
+            [np.zeros((8, 30, 14))] * 4,
+            [np.zeros(8)] * 4,
         )
         dataset = core.RulDataModule(self.mock_loader, batch_size=16)
         dataset.setup()
@@ -129,8 +129,8 @@ def test_train_batch_structure(self):
 
     def test_val_batch_structure(self):
         self.mock_loader.load_split.return_value = (
-            [torch.zeros(8, 14, 30)] * 4,
-            [torch.zeros(8)] * 4,
+            [np.zeros((8, 30, 14))] * 4,
+            [np.zeros(8)] * 4,
         )
         dataset = core.RulDataModule(self.mock_loader, batch_size=16)
         dataset.setup()
@@ -139,8 +139,8 @@ def test_val_batch_structure(self):
 
     def test_test_batch_structure(self):
         self.mock_loader.load_split.return_value = (
-            [torch.zeros(8, 14, 30)] * 4,
-            [torch.zeros(8)] * 4,
+            [np.zeros((8, 30, 14))] * 4,
+            [np.zeros(8)] * 4,
         )
         dataset = core.RulDataModule(self.mock_loader, batch_size=16)
         dataset.setup()
@@ -201,7 +201,7 @@ def test_is_mutually_exclusive(self):
 
     def test_feature_extractor(self):
         self.mock_loader.load_split.return_value = (
-            [torch.zeros(8, 14, 30) + torch.arange(8)[:, None, None]],
+            [np.zeros((8, 30, 14)) + np.arange(8)[:, None, None]],
             [torch.arange(8)],
         )
         fe = lambda x: np.mean(x, axis=1)
@@ -231,12 +231,12 @@ class DummyRul(reader.AbstractReader):
     def __init__(self, length):
         self.data = {
             "dev": (
-                [torch.zeros(length, self.window_size, 5)],
-                [torch.clamp_max(torch.arange(length, 0, step=-1), 125)],
+                [np.zeros((length, self.window_size, 5))],
+                [np.clip(np.arange(length, 0, step=-1), a_min=None, a_max=125)],
             ),
             "val": (
-                [torch.zeros(100, self.window_size, 5)],
-                [torch.clamp_max(torch.arange(100, 0, step=-1), 125)],
+                [np.zeros((100, self.window_size, 5))],
+                [np.clip(np.arange(100, 0, step=-1), a_min=None, a_max=125)],
             ),
         }
 
@@ -270,18 +270,18 @@ class DummyRulShortRuns(reader.AbstractReader):
     data = {
         "dev": (
             [
-                torch.ones(100, window_size, 5)
-                * torch.arange(1, 101).view(100, 1, 1),  # normal run
-                torch.zeros(2, window_size, 5),  # too short run
-                torch.ones(100, window_size, 5)
-                * torch.arange(1, 101).view(100, 1, 1),  # normal run
-                torch.zeros(1, window_size, 5),  # empty run
+                np.ones((100, window_size, 5))
+                * np.arange(1, 101).reshape((100, 1, 1)),  # normal run
+                np.zeros((2, window_size, 5)),  # too short run
+                np.ones((100, window_size, 5))
+                * np.arange(1, 101).reshape((100, 1, 1)),  # normal run
+                np.zeros((1, window_size, 5)),  # empty run
             ],
             [
-                torch.clamp_max(torch.arange(100, 0, step=-1), 125),
-                torch.ones(2) * 500,
-                torch.clamp_max(torch.arange(100, 0, step=-1), 125),
-                torch.ones(1) * 500,
+                np.clip(np.arange(100, 0, step=-1), a_min=None, a_max=125),
+                np.ones(2) * 500,
+                np.clip(torch.arange(100, 0, step=-1), a_min=None, a_max=125),
+                np.ones(1) * 500,
             ],
         ),
     }
@@ -375,8 +375,12 @@ def test_sampled_data(self, cmapss_short):
         for i, sample in enumerate(data):
             idx = 3 * i
             expected_run = data._features[fixed_idx[idx]]
-            expected_anchor = expected_run[fixed_idx[idx + 1]]
-            expected_query = expected_run[fixed_idx[idx + 2]]
+            expected_anchor = torch.tensor(expected_run[fixed_idx[idx + 1]]).transpose(
+                1, 0
+            )
+            expected_query = torch.tensor(expected_run[fixed_idx[idx + 2]]).transpose(
+                1, 0
+            )
             expected_distance = min(125, fixed_idx[idx + 2] - fixed_idx[idx + 1]) / 125
             expected_domain_idx = 0
             assert 0 == torch.dist(expected_anchor, sample[0])