Skip to content

Commit

Permalink
add PathSeparatorInSubsetNameError
Browse files Browse the repository at this point in the history
  • Loading branch information
jihyeonyi committed Sep 24, 2024
1 parent d8fdcd3 commit 7da2334
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 3 deletions.
10 changes: 10 additions & 0 deletions src/datumaro/components/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,16 @@ def __str__(self):
return f"Item {self.item_id} is repeated in the source sequence."


@define(auto_exc=False)
class PathSeparatorInSubsetNameError(DatasetError):
subset: str = field()

def __str__(self):
return (
f"Failed to export the subset '{self.subset}': subset name contains path separator(s)."
)


class DatasetQualityError(DatasetError):
pass

Expand Down
8 changes: 6 additions & 2 deletions src/datumaro/plugins/data_formats/datumaro/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from datumaro.components.crypter import NULL_CRYPTER
from datumaro.components.dataset_base import DatasetItem
from datumaro.components.dataset_item_storage import ItemStatus
from datumaro.components.errors import PathSeparatorInSubsetNameError
from datumaro.components.exporter import ExportContextComponent, Exporter
from datumaro.components.media import Image, MediaElement, PointCloud, Video, VideoFrame
from datumaro.util import cast, dump_json_file
Expand Down Expand Up @@ -514,13 +515,16 @@ def create_writer(
default_image_ext=self._default_image_ext,
)

if os.path.sep in subset:
raise PathSeparatorInSubsetNameError(subset)

return (
_SubsetWriter(
context=self,
subset=subset,
ann_file=osp.join(
self._annotations_dir,
subset.replace(os.sep, "_") + self.PATH_CLS.ANNOTATION_EXT,
subset + self.PATH_CLS.ANNOTATION_EXT,
),
export_context=export_context,
)
Expand All @@ -530,7 +534,7 @@ def create_writer(
subset=subset,
ann_file=osp.join(
self._annotations_dir,
subset.replace(os.sep, "_") + self.PATH_CLS.ANNOTATION_EXT,
subset + self.PATH_CLS.ANNOTATION_EXT,
),
export_context=export_context,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from datumaro.components.crypter import NULL_CRYPTER, Crypter
from datumaro.components.dataset_base import DatasetItem, IDataset
from datumaro.components.errors import DatumaroError
from datumaro.components.errors import DatumaroError, PathSeparatorInSubsetNameError
from datumaro.components.exporter import ExportContext, ExportContextComponent, Exporter
from datumaro.plugins.data_formats.datumaro.exporter import DatumaroExporter
from datumaro.plugins.data_formats.datumaro.exporter import _SubsetWriter as __SubsetWriter
Expand Down Expand Up @@ -309,6 +309,9 @@ def create_writer(
default_image_ext=self._default_image_ext,
)

if osp.sep in subset:
raise PathSeparatorInSubsetNameError(subset)

return _SubsetWriter(
context=self,
subset=subset,
Expand Down
184 changes: 184 additions & 0 deletions tests/unit/data_formats/datumaro/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,190 @@ def fxt_test_datumaro_format_dataset():
)


@pytest.fixture
def fxt_test_datumaro_format_dataset_with_path_separator():
label_categories = LabelCategories(attributes={"a", "b", "score"})
for i in range(5):
label_categories.add("cat" + str(i), attributes={"x", "y"})

mask_categories = MaskCategories(generate_colormap(len(label_categories.items)))

points_categories = PointsCategories()
for index, _ in enumerate(label_categories.items):
points_categories.add(index, ["cat1", "cat2"], joints=[[0, 1]])

return Dataset.from_iterable(
[
DatasetItem(
id="100/0",
subset="my/train",
media=Image.from_numpy(data=np.ones((10, 6, 3))),
annotations=[
Caption("hello", id=1),
Caption("world", id=2, group=5),
Label(
2,
id=3,
attributes={
"x": 1,
"y": "2",
},
),
Bbox(
1,
2,
3,
4,
label=4,
id=4,
z_order=1,
attributes={
"score": 1.0,
},
),
Bbox(
5,
6,
7,
8,
id=5,
group=5,
attributes={
"a": 1.5,
"b": "text",
},
),
Points(
[1, 2, 2, 0, 1, 1],
label=0,
id=5,
z_order=4,
attributes={
"x": 1,
"y": "2",
},
),
Mask(
label=3,
id=5,
z_order=2,
image=np.ones((2, 3)),
attributes={
"x": 1,
"y": "2",
},
),
Ellipse(
5,
6,
7,
8,
label=3,
id=5,
z_order=2,
attributes={
"x": 1,
"y": "2",
},
),
Cuboid2D(
[
(1, 1),
(3, 1),
(3, 3),
(1, 3),
(1.5, 1.5),
(3.5, 1.5),
(3.5, 3.5),
(1.5, 3.5),
],
label=3,
id=5,
z_order=2,
attributes={
"x": 1,
"y": "2",
},
),
],
),
DatasetItem(
id=21,
media=Image.from_numpy(data=np.ones((10, 6, 3))),
subset="train",
annotations=[
Caption("test"),
Label(2),
Bbox(1, 2, 3, 4, label=5, id=42, group=42),
],
),
DatasetItem(
id=2,
media=Image.from_numpy(data=np.ones((10, 6, 3))),
subset="my/val",
annotations=[
PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
],
),
DatasetItem(
id="1/1",
media=Image.from_numpy(data=np.ones((10, 6, 3))),
subset="test",
annotations=[
Cuboid3d(
[1.0, 2.0, 3.0],
[2.0, 2.0, 4.0],
[1.0, 3.0, 4.0],
id=6,
label=0,
attributes={"occluded": True},
group=6,
)
],
),
DatasetItem(
id=42,
media=Image.from_numpy(data=np.ones((10, 6, 3))),
subset="my/test",
attributes={"a1": 5, "a2": "42"},
),
DatasetItem(
id=42,
media=Image.from_numpy(data=np.ones((10, 6, 3))),
# id and group integer value can be higher than 32bits limits (COCO instances).
annotations=[
Mask(
id=900100087038, group=900100087038, image=np.ones((2, 3), dtype=np.uint8)
),
RleMask(
rle=mask_tools.encode(np.ones((2, 3), dtype=np.uint8, order="F")),
id=900100087038,
group=900100087038,
),
],
),
DatasetItem(
id="1/b/c",
media=Image.from_file(path="1/b/c.qq", size=(2, 4)),
),
],
categories={
AnnotationType.label: label_categories,
AnnotationType.mask: mask_categories,
AnnotationType.points: points_categories,
},
infos={
"string": "test",
"int": 0,
"float": 0.0,
"string_list": ["test0", "test1", "test2"],
"int_list": [0, 1, 2],
"float_list": [0.0, 0.1, 0.2],
},
)


@pytest.fixture
def fxt_test_datumaro_format_video_dataset(test_dir) -> Dataset:
video_path = osp.join(test_dir, "video.avi")
Expand Down
26 changes: 26 additions & 0 deletions tests/unit/data_formats/datumaro/test_datumaro_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from datumaro.components.dataset_base import DatasetItem
from datumaro.components.environment import Environment
from datumaro.components.errors import PathSeparatorInSubsetNameError
from datumaro.components.importer import DatasetImportError
from datumaro.components.media import Image
from datumaro.components.project import Dataset
Expand Down Expand Up @@ -155,6 +156,31 @@ def test_can_save_and_load(
stream=stream,
)

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
@pytest.mark.parametrize("require_media", [True, False])
@pytest.mark.parametrize("stream", [True, False])
def test_cannot_export_dataset_with_subset_containing_path_separators(
self,
fxt_test_datumaro_format_dataset_with_path_separator,
test_dir,
fxt_import_kwargs,
fxt_export_kwargs,
stream,
require_media,
helper_tc,
):
with pytest.raises(PathSeparatorInSubsetNameError):
self._test_save_and_load(
helper_tc,
fxt_test_datumaro_format_dataset_with_path_separator,
partial(self.exporter.convert, save_media=True, stream=stream, **fxt_export_kwargs),
test_dir,
compare=compare_datasets,
require_media=require_media,
importer_args=fxt_import_kwargs,
stream=stream,
)

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_export_video_only_once(
self,
Expand Down

0 comments on commit 7da2334

Please sign in to comment.