From 55e447d40bbc421eee6445318f56d95ad49a3e06 Mon Sep 17 00:00:00 2001
From: Sooah Lee <sooah.lee@intel.com>
Date: Fri, 13 Sep 2024 11:06:55 +0900
Subject: [PATCH] Revert "Mergeback 1.9.0 to develop" (#1605)

Reverts openvinotoolkit/datumaro#1604
---
 3rd-party.txt                                 |  17 --
 CHANGELOG.md                                  |   8 +-
 docs/source/docs/release_notes.rst            |  16 --
 requirements-core.txt                         |   3 -
 setup.py                                      |   2 +-
 .../plugins/data_formats/kitti_raw/base.py    |   4 +-
 .../data_formats/kitti_raw/exporter.py        |   8 +-
 src/datumaro/plugins/framework_converter.py   |  51 +---
 src/datumaro/version.py                       |   2 +-
 .../integration/cli/test_kitti_raw_format.py  |   8 +-
 tests/unit/test_framework_converter.py        | 244 +-----------------
 tests/unit/test_kitti_raw_format.py           |  20 +-
 12 files changed, 33 insertions(+), 350 deletions(-)

diff --git a/3rd-party.txt b/3rd-party.txt
index 85d2f2edf2..0401a59ee0 100644
--- a/3rd-party.txt
+++ b/3rd-party.txt
@@ -7518,22 +7518,5 @@ Apache-2.0
    See the License for the specific language governing permissions and
    limitations under the License.
 -------------------------------------------------------------
-portalocker
-
-BSD-3-Clause
-
-Copyright 2022 Rick van Hattem
-
-Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
--------------------------------------------------------------
 
 * Other names and brands may be claimed as the property of others.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 161b2d54e9..c246c0f18c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,22 +5,16 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## \[Q3 2024 Release 1.9.0\]
+## \[unreleased\]
 ### New features
 - Add a new CLI command: datum format
   (<https://github.com/openvinotoolkit/datumaro/pull/1570>)
-- Support language dataset for DmTorchDataset
-  (<https://github.com/openvinotoolkit/datumaro/pull/1592>)
 
 ### Enhancements
 - Change _Shape to Shape and add comments for subclasses of Shape
   (<https://github.com/openvinotoolkit/datumaro/pull/1568>)
-- Fix `kitti_raw` importer and exporter for dimensions (height, width, length) in meters
-  (<https://github.com/openvinotoolkit/datumaro/pull/1596>)
 
 ### Bug fixes
-- Fix KITTI-3D importer and exporter
-  (<https://github.com/openvinotoolkit/datumaro/pull/1596>)
 
 ## Q3 2024 Release 1.8.0
 ### New features
diff --git a/docs/source/docs/release_notes.rst b/docs/source/docs/release_notes.rst
index cf9cdb0309..221e4658b0 100644
--- a/docs/source/docs/release_notes.rst
+++ b/docs/source/docs/release_notes.rst
@@ -4,22 +4,6 @@ Release Notes
 .. toctree::
    :maxdepth: 1
 
-v1.9.0 (2024 Q3)
-----------------
-
-New features
-^^^^^^^^^^^^
-- Add a new CLI command: datum format
-- Support language dataset for DmTorchDataset
-
-Enhancements
-^^^^^^^^^^^^
-- Change _Shape to Shape and add comments for subclasses of Shape
-
-Bug fixes
-^^^^^^^^^
-- Fix KITTI-3D importer and exporter
-
 v1.8.0 (2024 Q3)
 ----------------
 
diff --git a/requirements-core.txt b/requirements-core.txt
index 078171ef59..1d2ce11bf3 100644
--- a/requirements-core.txt
+++ b/requirements-core.txt
@@ -64,6 +64,3 @@ json-stream
 
 # TabularValidator
 nltk
-
-# torch converter for language
-portalocker
diff --git a/setup.py b/setup.py
index 91b1b51e8c..acc6925fdc 100644
--- a/setup.py
+++ b/setup.py
@@ -85,7 +85,7 @@ def parse_requirements(filename=CORE_REQUIREMENTS_FILE):
     extras_require={
         "tf": ["tensorflow"],
         "tfds": ["tensorflow-datasets<4.9.3"],
-        "torch": ["torch", "torchvision", "torchtext==0.16.0"],
+        "torch": ["torch", "torchvision"],
         "default": DEFAULT_REQUIREMENTS,
     },
     ext_modules=ext_modules,
diff --git a/src/datumaro/plugins/data_formats/kitti_raw/base.py b/src/datumaro/plugins/data_formats/kitti_raw/base.py
index 836ad28574..92e04cc88e 100644
--- a/src/datumaro/plugins/data_formats/kitti_raw/base.py
+++ b/src/datumaro/plugins/data_formats/kitti_raw/base.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024 Intel Corporation
+# Copyright (C) 2021-2023 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -182,7 +182,7 @@ def _parse_attr(cls, value):
     @classmethod
     def _parse_track(cls, track_id, track, categories):
         common_attrs = {k: cls._parse_attr(v) for k, v in track["attributes"].items()}
-        scale = [track["scale"][k] for k in ["h", "w", "l"]]
+        scale = [track["scale"][k] for k in ["w", "h", "l"]]
         label = categories[AnnotationType.label].find(track["label"])[0]
 
         kf_occluded = False
diff --git a/src/datumaro/plugins/data_formats/kitti_raw/exporter.py b/src/datumaro/plugins/data_formats/kitti_raw/exporter.py
index 3d01b1d822..8e2f250d29 100644
--- a/src/datumaro/plugins/data_formats/kitti_raw/exporter.py
+++ b/src/datumaro/plugins/data_formats/kitti_raw/exporter.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024 Intel Corporation
+# Copyright (C) 2021 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -339,8 +339,8 @@ def _create_tracklets(self, subset):
                 if not track:
                     track = {
                         "objectType": label,
-                        "h": ann.scale[0],
-                        "w": ann.scale[1],
+                        "h": ann.scale[1],
+                        "w": ann.scale[0],
                         "l": ann.scale[2],
                         "first_frame": frame_id,
                         "poses": [],
@@ -348,7 +348,7 @@ def _create_tracklets(self, subset):
                     }
                     tracks[track_id] = track
                 else:
-                    if [track["h"], track["w"], track["l"]] != ann.scale:
+                    if [track["w"], track["h"], track["l"]] != ann.scale:
                         # Tracks have fixed scale in the format
                         raise DatasetExportError(
                             "Item %s: mismatching track shapes, "
diff --git a/src/datumaro/plugins/framework_converter.py b/src/datumaro/plugins/framework_converter.py
index e5a5b7f6c2..556005e1b7 100644
--- a/src/datumaro/plugins/framework_converter.py
+++ b/src/datumaro/plugins/framework_converter.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -17,7 +17,6 @@
     "detection": AnnotationType.bbox,
     "instance_segmentation": AnnotationType.polygon,
     "semantic_segmentation": AnnotationType.mask,
-    "tabular": [AnnotationType.label, AnnotationType.caption],
 }
 
 
@@ -89,10 +88,7 @@ def _gen_item(self, idx: int):
                 if ann.type == TASK_ANN_TYPE[self.task]
             ]
             label = mask_tools.merge_masks((mask, label_id) for mask, label_id in masks)
-        elif self.task == "tabular":
-            label = [
-                ann.as_dict() for ann in item.annotations if ann.type in TASK_ANN_TYPE[self.task]
-            ]
+
         return image, label
 
 
@@ -107,58 +103,15 @@ def __init__(
             task: str,
             transform: Optional[Callable] = None,
             target_transform: Optional[Callable] = None,
-            target: Optional[str] = None,
-            tokenizer: Optional[tuple[Callable, Callable]] = None,
-            vocab: Optional[tuple[Callable, Callable]] = None,
         ):
             super().__init__(dataset=dataset, subset=subset, task=task)
 
             self.transform = transform
             self.target_transform = target_transform
 
-            if self.task == "tabular":
-                if not isinstance(target, dict):
-                    raise ValueError(
-                        "Target should be a dictionary with 'input' and 'output' keys."
-                    )
-                self.input_target = target.get("input")
-                self.output_target = target.get("output")
-                if not self.input_target:
-                    raise ValueError(
-                        "Please provide target column for tabular task which is used for input"
-                    )
-
-                if not (tokenizer and vocab):
-                    raise ValueError("Both tokenizer and vocab must be provided for tabular task")
-                self.tokenizer = tokenizer
-                self.vocab = vocab
-
         def __getitem__(self, idx):
             image, label = self._gen_item(idx)
 
-            if self.task == "tabular":
-                text = image()[self.input_target]
-
-                if self.output_target:
-                    src_tokenizer, tgt_tokenizer = self.tokenizer
-                    src_vocab, tgt_vocab = self.vocab
-                    src_tokens = src_tokenizer(text)
-                    src_token_ids = src_vocab(src_tokens)
-
-                    label_text = label[0]["caption"].split(f"{self.output_target}:")[-1]
-                    tgt_tokens = tgt_tokenizer(label_text)
-                    tgt_token_ids = tgt_vocab(tgt_tokens)
-
-                    return torch.tensor(src_token_ids, dtype=torch.long), torch.tensor(
-                        tgt_token_ids, dtype=torch.long
-                    )
-                else:
-                    tokens = self.tokenizer(text)
-                    token_ids = self.vocab(tokens)
-                    return torch.tensor(token_ids, dtype=torch.long), torch.tensor(
-                        label[0]["label"], dtype=torch.long
-                    )
-
             if len(image.shape) == 2:
                 image = np.expand_dims(image, axis=-1)
 
diff --git a/src/datumaro/version.py b/src/datumaro/version.py
index 0a0a43a57e..b3de187d2e 100644
--- a/src/datumaro/version.py
+++ b/src/datumaro/version.py
@@ -1 +1 @@
-__version__ = "1.9.0"
+__version__ = "1.9.0rc0"
diff --git a/tests/integration/cli/test_kitti_raw_format.py b/tests/integration/cli/test_kitti_raw_format.py
index f7810bc981..884cc02708 100644
--- a/tests/integration/cli/test_kitti_raw_format.py
+++ b/tests/integration/cli/test_kitti_raw_format.py
@@ -33,13 +33,13 @@ def test_can_convert_to_kitti_raw(self):
                         annotations=[
                             Cuboid3d(
                                 position=[1, 2, 3],
-                                scale=[-3.62, 7.95, -1.03],
+                                scale=[7.95, -3.62, -1.03],
                                 label=1,
                                 attributes={"occluded": False, "track_id": 1},
                             ),
                             Cuboid3d(
                                 position=[1, 1, 0],
-                                scale=[23.01, 8.34, -0.76],
+                                scale=[8.34, 23.01, -0.76],
                                 label=0,
                                 attributes={"occluded": False, "track_id": 2},
                             ),
@@ -65,7 +65,7 @@ def test_can_convert_to_kitti_raw(self):
                         annotations=[
                             Cuboid3d(
                                 position=[0, 1, 0],
-                                scale=[23.01, 8.34, -0.76],
+                                scale=[8.34, 23.01, -0.76],
                                 rotation=[1, 1, 3],
                                 label=0,
                                 attributes={"occluded": True, "track_id": 2},
@@ -92,7 +92,7 @@ def test_can_convert_to_kitti_raw(self):
                         annotations=[
                             Cuboid3d(
                                 position=[1, 2, 3],
-                                scale=[13.54, -9.41, 0.24],
+                                scale=[-9.41, 13.54, 0.24],
                                 label=1,
                                 attributes={"occluded": False, "track_id": 3},
                             )
diff --git a/tests/unit/test_framework_converter.py b/tests/unit/test_framework_converter.py
index 83fd9a97c5..0933884293 100644
--- a/tests/unit/test_framework_converter.py
+++ b/tests/unit/test_framework_converter.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Intel Corporation
+# Copyright (C) 2023 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -13,16 +13,14 @@
 from datumaro.components.annotation import (
     AnnotationType,
     Bbox,
-    Caption,
     Label,
     LabelCategories,
     Mask,
     Polygon,
-    Tabular,
 )
 from datumaro.components.dataset import Dataset
 from datumaro.components.dataset_base import DatasetItem
-from datumaro.components.media import Image, Table, TableRow
+from datumaro.components.media import Image
 from datumaro.plugins.framework_converter import (
     TASK_ANN_TYPE,
     DmTfDataset,
@@ -38,8 +36,6 @@
 
 try:
     import torch
-    from torchtext.data.utils import get_tokenizer
-    from torchtext.vocab import build_vocab_from_iterator
     from torchvision import datasets, transforms
 except ImportError:
     TORCH_AVAILABLE = False
@@ -146,89 +142,6 @@ def fxt_dataset():
     )
 
 
-@pytest.fixture
-def fxt_tabular_label_dataset():
-    table = Table.from_list(
-        [
-            {
-                "label": 1,
-                "text": "I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered "
-                "controversial"
-                " I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.<br /><br />What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, even then it's not shot like some cheaply made porno. While my countrymen mind find it shocking, in reality sex and nudity are a major staple in Swedish cinema. Even Ingmar Bergman, arguably their answer to good old boy John Ford, had sex scenes in his films.<br /><br />I do commend the filmmakers for the fact that any sex shown in the film is shown for artistic purposes rather than just to shock people and make money to be shown in pornographic theaters in America. I AM CURIOUS-YELLOW is a good film for anyone wanting to study the meat and potatoes (no pun intended) of Swedish cinema. But really, this film doesn't have much of a plot.",
-            }
-        ]
-    )
-    return Dataset.from_iterable(
-        [
-            DatasetItem(
-                id=0,
-                subset="train",
-                media=TableRow(table=table, index=0),
-                annotations=[Label(id=0, attributes={}, group=0, object_id=-1, label=0)],
-            )
-        ],
-        categories={
-            AnnotationType.label: LabelCategories.from_iterable(
-                [("label:1", "label"), ("label:2", "label")]
-            )
-        },
-        media_type=TableRow,
-    )
-
-
-@pytest.fixture
-def fxt_tabular_caption_dataset():
-    table = Table.from_list(
-        [
-            {
-                "source": "Zwei junge weiße Männer sind im Freien in der Nähe vieler Büsche.",
-                "target": "Two young, White males are outside near many bushes.",
-            }
-        ]
-    )
-    return Dataset.from_iterable(
-        [
-            DatasetItem(
-                id=0,
-                subset="train",
-                media=TableRow(table=table, index=0),
-                annotations=[
-                    Caption("target:Two young, White males are outside near many bushes.")
-                ],
-            )
-        ],
-        categories={},
-        media_type=TableRow,
-    )
-
-
-@pytest.fixture
-def fxt_dummy_tokenizer():
-    def dummy_tokenizer(text):
-        return text.split()
-
-    return dummy_tokenizer
-
-
-@pytest.fixture
-def data_iter():
-    return [(1, "This is a sample text"), (2, "Another sample text")]
-
-
-@pytest.fixture
-def fxt_dummy_vocab(fxt_dummy_tokenizer, data_iter):
-    vocab = build_vocab_from_iterator(
-        map(fxt_dummy_tokenizer, (text for _, text in data_iter)), specials=["<unk>"]
-    )
-    vocab.set_default_index(vocab["<unk>"])
-    return vocab
-
-
-@pytest.fixture
-def fxt_tabular_fixture(fxt_dummy_tokenizer, fxt_dummy_vocab):
-    return {"target": {"input": "text"}, "tokenizer": fxt_dummy_tokenizer, "vocab": fxt_dummy_vocab}
-
-
 @pytest.mark.new
 @mark_requirement(Requirements.DATUM_GENERAL_REQ)
 class FrameworkConverterFactoryTest(TestCase):
@@ -260,49 +173,38 @@ def test_create_converter_tf_importerror(self):
 @mark_requirement(Requirements.DATUM_GENERAL_REQ)
 class MultiframeworkConverterTest:
     @pytest.mark.parametrize(
-        "fxt_dataset_type,fxt_subset,fxt_task",
+        "fxt_subset,fxt_task",
         [
             (
-                "fxt_dataset",
                 "train",
                 "classification",
             ),
             (
-                "fxt_dataset",
                 "val",
                 "multilabel_classification",
             ),
             (
-                "fxt_dataset",
                 "train",
                 "detection",
             ),
             (
-                "fxt_dataset",
                 "val",
                 "instance_segmentation",
             ),
             (
-                "fxt_dataset",
                 "train",
                 "semantic_segmentation",
             ),
-            ("fxt_tabular_label_dataset", "train", "tabular"),
         ],
     )
-    def test_multi_framework_dataset(
-        self, fxt_dataset_type: str, fxt_subset: str, fxt_task: str, request
-    ):
-        dataset = request.getfixturevalue(fxt_dataset_type)
+    def test_multi_framework_dataset(self, fxt_dataset: Dataset, fxt_subset: str, fxt_task: str):
         dm_multi_framework_dataset = _MultiFrameworkDataset(
-            dataset=dataset, subset=fxt_subset, task=fxt_task
+            dataset=fxt_dataset, subset=fxt_subset, task=fxt_task
         )
 
         for idx in range(len(dm_multi_framework_dataset)):
             image, label = dm_multi_framework_dataset._gen_item(idx)
-            if fxt_task == "tabular":
-                image = image()
-            assert isinstance(image, (np.ndarray, dict))
+            assert isinstance(image, np.ndarray)
             if fxt_task == "classification":
                 assert isinstance(label, int)
             elif fxt_task == "multilabel_classification":
@@ -311,8 +213,6 @@ def test_multi_framework_dataset(
                 assert isinstance(label, list)
             if fxt_task == "semantic_segmentation":
                 assert isinstance(label, np.ndarray)
-            elif fxt_task == "tabular":
-                assert isinstance(label, list)
 
     @pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch is not installed")
     @pytest.mark.parametrize(
@@ -361,6 +261,7 @@ def test_can_convert_torch_framework(
         fxt_subset: str,
         fxt_task: str,
         fxt_convert_kwargs: Dict[str, Any],
+        request: pytest.FixtureRequest,
     ):
         multi_framework_dataset = FrameworkConverter(fxt_dataset, subset=fxt_subset, task=fxt_task)
 
@@ -393,12 +294,7 @@ def test_can_convert_torch_framework(
                     if ann.type == TASK_ANN_TYPE[fxt_task]
                 ]
                 label = np.sum(masks, axis=0, dtype=np.uint8)
-            elif fxt_task == "tabular":
-                label = [
-                    ann.as_dict()
-                    for ann in exp_item.annotations
-                    if ann.type in TASK_ANN_TYPE[fxt_task]
-                ]
+
             if fxt_convert_kwargs.get("transform", None):
                 actual = dm_torch_item[0].permute(1, 2, 0).mul(255.0).to(torch.uint8).numpy()
                 assert np.array_equal(image, actual)
@@ -478,130 +374,6 @@ def test_can_convert_torch_framework_detection(self):
                 assert torch_ann["bbox"] == [x1, y1, x2 - x1, y2 - y1]
                 assert torch_ann["iscrowd"] == dm_ann["attributes"]["is_crowd"]
 
-    @pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch is not installed")
-    def test_can_convert_torch_framework_tabular_label(self, fxt_tabular_label_dataset):
-        class IMDBDataset(Dataset):
-            def __init__(self, data_iter, vocab, transform=None):
-                self.data = list(data_iter)
-                self.vocab = vocab
-                self.transform = transform
-                self.tokenizer = get_tokenizer("basic_english")
-
-            def __len__(self):
-                return len(self.data)
-
-            def __getitem__(self, idx):
-                label, text = self.data[idx]
-                token_ids = [self.vocab[token] for token in self.tokenizer(text)]
-
-                if self.transform:
-                    token_ids = self.transform(token_ids)
-
-                return torch.tensor(token_ids, dtype=torch.long), torch.tensor(
-                    label, dtype=torch.long
-                )
-
-        # Prepare data and tokenizer
-        # First item of IMDB
-        first_item = (
-            1,
-            "I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered \"controversial\" I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.<br /><br />What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, even then it's not shot like some cheaply made porno. While my countrymen mind find it shocking, in reality sex and nudity are a major staple in Swedish cinema. Even Ingmar Bergman, arguably their answer to good old boy John Ford, had sex scenes in his films.<br /><br />I do commend the filmmakers for the fact that any sex shown in the film is shown for artistic purposes rather than just to shock people and make money to be shown in pornographic theaters in America. I AM CURIOUS-YELLOW is a good film for anyone wanting to study the meat and potatoes (no pun intended) of Swedish cinema. But really, this film doesn't have much of a plot.",
-        )
-        tokenizer = get_tokenizer("basic_english")
-
-        # Build vocabulary
-        vocab = build_vocab_from_iterator([tokenizer(first_item[1])], specials=["<unk>"])
-        vocab.set_default_index(vocab["<unk>"])
-
-        # Create torch dataset
-        torch_dataset = IMDBDataset(iter([first_item]), vocab)
-
-        # Convert to dm_torch_dataset
-        dm_dataset = fxt_tabular_label_dataset
-        multi_framework_dataset = FrameworkConverter(dm_dataset, subset="train", task="tabular")
-        dm_torch_dataset = multi_framework_dataset.to_framework(
-            framework="torch", target={"input": "text"}, tokenizer=tokenizer, vocab=vocab
-        )
-
-        # Verify equality of items in torch_dataset and dm_torch_dataset
-        label_indices = dm_dataset.categories().get(AnnotationType.label)._indices
-        torch_item = torch_dataset[0]
-        dm_item = dm_torch_dataset[0]
-        assert torch.equal(torch_item[0], dm_item[0]), "Token IDs do not match"
-
-        # Extract and compare labels
-        torch_item_label = str(torch_item[1].item())
-        dm_item_label = list(label_indices.keys())[list(label_indices.values()).index(0)].split(
-            ":"
-        )[-1]
-        assert torch_item_label == dm_item_label, "Labels do not match"
-
-    @pytest.mark.skipif(not TORCH_AVAILABLE, reason="PyTorch is not installed")
-    def test_can_convert_torch_framework_tabular_caption(self, fxt_tabular_caption_dataset):
-        class Multi30kDataset(Dataset):
-            def __init__(self, dataset, src_tokenizer, tgt_tokenizer, src_vocab, tgt_vocab):
-                self.dataset = list(dataset)
-                self.src_tokenizer = src_tokenizer
-                self.tgt_tokenizer = tgt_tokenizer
-                self.src_vocab = src_vocab
-                self.tgt_vocab = tgt_vocab
-
-            def __len__(self):
-                return len(self.dataset)
-
-            def _data_process(self, text, tokenizer, vocab):
-                tokens = tokenizer(text)
-                token_ids = [vocab[token] for token in tokens]
-                return torch.tensor(token_ids, dtype=torch.long)
-
-            def __getitem__(self, idx):
-                src, tgt = self.dataset[idx]
-                src_tensor = self._data_process(src, self.src_tokenizer, self.src_vocab)
-                tgt_tensor = self._data_process(tgt, self.tgt_tokenizer, self.tgt_vocab)
-                return src_tensor, tgt_tensor
-
-        # Prepare data and tokenizer
-        # First item of Multi30k
-        first_item = (
-            "Zwei junge weiße Männer sind im Freien in der Nähe vieler Büsche.",
-            "Two young, White males are outside near many bushes.",
-        )
-
-        dummy_tokenizer = str.split
-
-        def build_single_vocab(item, tokenizer, specials):
-            tokens = tokenizer(item)
-            vocab = build_vocab_from_iterator([tokens], specials=specials)
-            vocab.set_default_index(vocab["<unk>"])
-            return vocab
-
-        # Build vocabularies
-        specials = ["<unk>", "<pad>", "<bos>", "<eos>"]
-        src_vocab = build_single_vocab(first_item[0], dummy_tokenizer, specials)
-        tgt_vocab = build_single_vocab(first_item[1], dummy_tokenizer, specials)
-
-        # Create torch dataset
-        torch_dataset = Multi30kDataset(
-            iter([first_item]), dummy_tokenizer, dummy_tokenizer, src_vocab, tgt_vocab
-        )
-
-        # Convert to dm_torch_dataset
-        dm_dataset = fxt_tabular_caption_dataset
-        multi_framework_dataset = FrameworkConverter(dm_dataset, subset="train", task="tabular")
-        dm_torch_dataset = multi_framework_dataset.to_framework(
-            framework="torch",
-            target={"input": "source", "output": "target"},
-            tokenizer=(dummy_tokenizer, dummy_tokenizer),
-            vocab=(src_vocab, tgt_vocab),
-        )
-
-        # Verify equality of items in torch_dataset and dm_torch_dataset
-        torch_item = torch_dataset[0]
-        dm_item = dm_torch_dataset[0]
-
-        assert torch.equal(torch_item[0], dm_item[0]), "Token IDs for de do not match"
-        assert torch.equal(torch_item[1], dm_item[1]), "Token IDs for en do not match"
-
     @pytest.mark.skipif(not TF_AVAILABLE, reason="Tensorflow is not installed")
     @pytest.mark.parametrize(
         "fxt_subset,fxt_task,fxt_convert_kwargs",
diff --git a/tests/unit/test_kitti_raw_format.py b/tests/unit/test_kitti_raw_format.py
index 498e99b20f..e8ab776b75 100644
--- a/tests/unit/test_kitti_raw_format.py
+++ b/tests/unit/test_kitti_raw_format.py
@@ -52,13 +52,13 @@ def test_can_load(self):
                     annotations=[
                         Cuboid3d(
                             position=[1, 2, 3],
-                            scale=[-3.62, 7.95, -1.03],
+                            scale=[7.95, -3.62, -1.03],
                             label=1,
                             attributes={"occluded": False, "track_id": 1},
                         ),
                         Cuboid3d(
                             position=[1, 1, 0],
-                            scale=[23.01, 8.34, -0.76],
+                            scale=[8.34, 23.01, -0.76],
                             label=0,
                             attributes={"occluded": False, "track_id": 2},
                         ),
@@ -71,7 +71,7 @@ def test_can_load(self):
                     annotations=[
                         Cuboid3d(
                             position=[0, 1, 0],
-                            scale=[23.01, 8.34, -0.76],
+                            scale=[8.34, 23.01, -0.76],
                             rotation=[1, 1, 3],
                             label=0,
                             attributes={"occluded": True, "track_id": 2},
@@ -85,7 +85,7 @@ def test_can_load(self):
                     annotations=[
                         Cuboid3d(
                             position=[1, 2, 3],
-                            scale=[13.54, -9.41, 0.24],
+                            scale=[-9.41, 13.54, 0.24],
                             label=1,
                             attributes={"occluded": False, "track_id": 3},
                         )
@@ -161,7 +161,7 @@ def test_can_save_and_load(self):
                         Cuboid3d(position=[1.4, 2.1, 1.4], label=1, attributes={"track_id": 2}),
                         Cuboid3d(
                             position=[11.4, -0.1, 4.2],
-                            scale=[1, 2, 2],
+                            scale=[2, 1, 2],
                             label=0,
                             attributes={"track_id": 3},
                         ),
@@ -172,7 +172,7 @@ def test_can_save_and_load(self):
                     annotations=[
                         Cuboid3d(
                             position=[0.4, -1, 2.24],
-                            scale=[1, 2, 2],
+                            scale=[2, 1, 2],
                             label=0,
                             attributes={"track_id": 3},
                         ),
@@ -185,7 +185,7 @@ def test_can_save_and_load(self):
                     annotations=[
                         Cuboid3d(
                             position=[0.4, -1, 3.24],
-                            scale=[1, 2, 2],
+                            scale=[2, 1, 2],
                             label=0,
                             attributes={"track_id": 3},
                         ),
@@ -244,7 +244,7 @@ def test_can_save_and_load(self):
                             ),
                             Cuboid3d(
                                 position=[11.4, -0.1, 4.2],
-                                scale=[1, 2, 2],
+                                scale=[2, 1, 2],
                                 label=0,
                                 attributes={"occluded": False, "track_id": 3},
                             ),
@@ -256,7 +256,7 @@ def test_can_save_and_load(self):
                         annotations=[
                             Cuboid3d(
                                 position=[0.4, -1, 2.24],
-                                scale=[1, 2, 2],
+                                scale=[2, 1, 2],
                                 label=0,
                                 attributes={"occluded": False, "track_id": 3},
                             ),
@@ -271,7 +271,7 @@ def test_can_save_and_load(self):
                         annotations=[
                             Cuboid3d(
                                 position=[0.4, -1, 3.24],
-                                scale=[1, 2, 2],
+                                scale=[2, 1, 2],
                                 label=0,
                                 attributes={"occluded": False, "track_id": 3},
                             ),