From a3227e79fd4b6553af9aa10db43e373d2d5ae39c Mon Sep 17 00:00:00 2001 From: jenhaoyang Date: Wed, 29 Dec 2021 21:01:19 +0800 Subject: [PATCH 01/15] fix broken url (#599) * fix broken url for import command --- site/content/en/docs/user-manual/how_to_use_datumaro.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/content/en/docs/user-manual/how_to_use_datumaro.md b/site/content/en/docs/user-manual/how_to_use_datumaro.md index ffb252f410..4b077f8b4f 100644 --- a/site/content/en/docs/user-manual/how_to_use_datumaro.md +++ b/site/content/en/docs/user-manual/how_to_use_datumaro.md @@ -104,7 +104,7 @@ to use Datumaro from the command-line: - Create a Datumaro project and operate on it: - Create an empty project with [`create`](/docs/user-manual/command-reference/create) - - Import existing datasets with [`import`](/docs/user-manual/command-reference/import) + - Import existing datasets with [`import`](/docs/user-manual/command-reference/sources#source-import) - Modify the project with [`transform`](/docs/user-manual/command-reference/transform) and [`filter`](/docs/user-manual/command-reference/filter) - Create new revisions of the project with [`commit`](/docs/user-manual/command-reference/commit), navigate over From 1d111b0a8bddf3becf4ca728a4508a5e58e2f79f Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 29 Dec 2021 16:02:26 +0300 Subject: [PATCH 02/15] Fix broken links in the sources section (#601) --- site/content/en/docs/user-manual/command-reference/sources.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/site/content/en/docs/user-manual/command-reference/sources.md b/site/content/en/docs/user-manual/command-reference/sources.md index d37abf8e6e..5f40c596d8 100644 --- a/site/content/en/docs/user-manual/command-reference/sources.md +++ b/site/content/en/docs/user-manual/command-reference/sources.md @@ -12,7 +12,7 @@ These commands are specific for Data Sources. Read more about them [here](/docs/ Datasets can be added to a Datumaro project with the `import` command, which adds a dataset link into the project and downloads (or copies) the dataset. If you need to add a dataset already copied into the project, -use the [`add`](./sources#source-add) command. +use the [`add`](#source-add) command. Dataset format readers can provide some additional import options. To pass such options, use the `--` separator after the main command arguments. @@ -77,7 +77,7 @@ datum export -f tf_detection_api -- --save-images Existing datasets can be added to a Datumaro project with the `add` command. The command adds a project-local directory as a data source in the project. -Unlike the [`import`](./sources#source-import) +Unlike the [`import`](#source-import) command, it does not copy datasets and only works with local directories. The source name is defined by the directory name. From add81ddb59502362fa65fa07e5bc4d8c9f61afde Mon Sep 17 00:00:00 2001 From: Anastasia Yasakova Date: Wed, 29 Dec 2021 16:21:03 +0300 Subject: [PATCH 03/15] Fix: Cannot convert LabelMe dataset, that has no subsets (#600) * fix label_me extractor * update Changelog --- CHANGELOG.md | 2 ++ datumaro/plugins/labelme_format.py | 2 +- tests/requirements.py | 1 + tests/test_labelme_format.py | 10 ++++++++++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f22e5eb371..67bfad3aac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Fails in multimerge when lines are not approximated and when there are no label categories () +- Cannot convert LabelMe dataset, that has no subsets + () ### Security - TBD diff --git a/datumaro/plugins/labelme_format.py b/datumaro/plugins/labelme_format.py index 821e11d75d..4cf047050b 100644 --- a/datumaro/plugins/labelme_format.py +++ b/datumaro/plugins/labelme_format.py @@ -95,7 +95,7 @@ def _parse(self, dataset_root): items.append(DatasetItem(id=item_id, subset=subset, image=image, annotations=annotations)) - subsets.add(subset) + subsets.add(items[-1].subset) return items, categories, subsets def _escape(s): diff --git a/tests/requirements.py b/tests/requirements.py index b9f14e2823..f3ad6332ad 100644 --- a/tests/requirements.py +++ b/tests/requirements.py @@ -36,6 +36,7 @@ class Requirements: DATUM_BUG_219 = "Return format is not uniform" DATUM_BUG_257 = "Dataset.filter doesn't count removed items" DATUM_BUG_259 = "Dataset.filter fails on merged datasets" + DATUM_BUG_289 = "Cannot convert LabelMe dataset, that has no subsets" DATUM_BUG_314 = "Unsuccessful remap_labels" DATUM_BUG_402 = "Troubles running 'remap_labels' on ProjectDataset" DATUM_BUG_404 = "custom importer/extractor not loading" diff --git a/tests/test_labelme_format.py b/tests/test_labelme_format.py index f966c9f28b..e496cc9303 100644 --- a/tests/test_labelme_format.py +++ b/tests/test_labelme_format.py @@ -303,3 +303,13 @@ def test_can_import(self): parsed = Dataset.import_from(DUMMY_DATASET_DIR, 'label_me') compare_datasets(self, expected=target_dataset, actual=parsed) + + @mark_requirement(Requirements.DATUM_BUG_289) + def test_can_convert(self): + source_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'label_me') + with TestDir() as test_dir: + LabelMeConverter.convert(source_dataset, test_dir, save_images=True) + parsed_dataset = Dataset.import_from(test_dir, 'label_me') + + compare_datasets(self, source_dataset, parsed_dataset, + require_images=True) From f316af443c9761721e661834054c7e0bf6311eaa Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Mon, 10 Jan 2022 15:47:43 +0300 Subject: [PATCH 04/15] Avoid PermissionError in GitPython 3.1.25 (#614) --- requirements-default.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-default.txt b/requirements-default.txt index 1557af3a62..7511cd88db 100644 --- a/requirements-default.txt +++ b/requirements-default.txt @@ -1,2 +1,2 @@ dvc>=2.7.0 -GitPython>=3.1.18 +GitPython>=3.1.18,!=3.1.25 # https://github.com/openvinotoolkit/datumaro/issues/612 From a76d815e0d623d08709f8328555e64d57740e2a1 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Mon, 10 Jan 2022 16:36:26 +0300 Subject: [PATCH 05/15] Remove most hacks that were added to work around issues in pycocotools (#449) pycocotools 2.0.4 includes changes to the build system that make it possible to downgrade NumPy after installing pycocotools, and therefore: * --no-binary=pycocotools is no longer necessary; * TensorFlow can be installed in the same `pip` invocation as Datumaro. The only hack that remains (that I know of) is the usage of `pycocotools-windows` on Windows, since the `pycocotools` PyPI project still doesn't provide Windows wheels (or any other wheels). --- .github/workflows/health_check.yml | 3 +-- .github/workflows/pr_checks.yml | 3 +-- CHANGELOG.md | 2 ++ requirements-core.txt | 2 +- requirements.txt | 2 +- site/content/en/docs/user-manual/installation.md | 14 -------------- 6 files changed, 6 insertions(+), 20 deletions(-) diff --git a/.github/workflows/health_check.yml b/.github/workflows/health_check.yml index dcc172b114..d40a6138d5 100644 --- a/.github/workflows/health_check.yml +++ b/.github/workflows/health_check.yml @@ -18,8 +18,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Installing dependencies run: | - pip install tensorflow pytest pytest-cov - pip install -e .[default,tfds] + pip install -e '.[default,tf,tfds]' pytest pytest-cov - name: Code instrumentation run: | pytest -v --cov --cov-report xml:coverage.xml diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index 98645c7085..affa016a39 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -29,8 +29,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Installing dependencies run: | - pip install tensorflow pytest - pip install -e .[default,tfds] + pip install -e '.[default,tf,tfds]' pytest - name: Unit testing run: | pytest -v diff --git a/CHANGELOG.md b/CHANGELOG.md index 67bfad3aac..ee5672477d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `smooth_line` from `datumaro.util.annotation_util` - the function is renamed to `approximate_line` and has updated interface () +- The `pycocotools` dependency lower bound is raised to `2.0.4`. + () ### Deprecated - TBD diff --git a/requirements-core.txt b/requirements-core.txt index 6ebd01ee79..937c88db25 100644 --- a/requirements-core.txt +++ b/requirements-core.txt @@ -8,7 +8,7 @@ Pillow>=6.1.0 ruamel.yaml>=0.17.0 typing_extensions>=3.7.4.3 -pycocotools>=2.0.2; platform_system != "Windows" or python_version >= '3.9' +pycocotools>=2.0.4; platform_system != "Windows" or python_version >= '3.9' pycocotools-windows; platform_system == "Windows" and python_version < '3.9' PyYAML>=5.3.1 diff --git a/requirements.txt b/requirements.txt index 399f71d534..a128a65db0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ --r requirements-core.txt --no-binary=pycocotools # https://github.com/openvinotoolkit/datumaro/issues/253 +-r requirements-core.txt -r requirements-default.txt opencv-python-headless>=4.1.0.25 diff --git a/site/content/en/docs/user-manual/installation.md b/site/content/en/docs/user-manual/installation.md index 707adac15c..644bb228fc 100644 --- a/site/content/en/docs/user-manual/installation.md +++ b/site/content/en/docs/user-manual/installation.md @@ -61,20 +61,6 @@ plugin dependencies in the [plugins](/docs/user-manual/extending) section. This option can't be covered by extras due to Python packaging system limitations. -- Although Datumaro excludes `pycocotools` of version 2.0.2 in - requirements, it works with this version perfectly fine. The - reason for such requirement is binary incompatibility of the `numpy` - dependency in the `TensorFlow` and `pycocotools` binary packages, - and the current workaround forces this package to be build from sources - on most platforms - (see [#253](https://github.com/openvinotoolkit/datumaro/issues/253)). - If you need to use 2.0.2, make sure it is linked with the same version - of `numpy` as `TensorFlow` by reinstalling the package: - ``` bash - pip uninstall pycocotools - pip install pycocotools --no-binary=pycocotools - ``` - - When installing directly from the repository, you can change the installation branch with `...@`. Also use `--force-reinstall` parameter in this case. It can be useful for testing of unreleased From ead337f5612f19031a965c470e292a9f2dd00775 Mon Sep 17 00:00:00 2001 From: Kirill Sizov Date: Mon, 10 Jan 2022 16:38:21 +0300 Subject: [PATCH 06/15] Update formats documentation (#548) Add docs for ImageNet, LFW, Vgg face 2, WIDERFace formats --- site/content/en/docs/formats/imagenet.md | 146 +++++++++++++++++++++ site/content/en/docs/formats/lfw.md | 124 +++++++++++++++++ site/content/en/docs/formats/vgg_face2.md | 137 +++++++++++++++++++ site/content/en/docs/formats/wider_face.md | 130 ++++++++++++++++++ 4 files changed, 537 insertions(+) create mode 100644 site/content/en/docs/formats/imagenet.md create mode 100644 site/content/en/docs/formats/lfw.md create mode 100644 site/content/en/docs/formats/vgg_face2.md create mode 100644 site/content/en/docs/formats/wider_face.md diff --git a/site/content/en/docs/formats/imagenet.md b/site/content/en/docs/formats/imagenet.md new file mode 100644 index 0000000000..b728308503 --- /dev/null +++ b/site/content/en/docs/formats/imagenet.md @@ -0,0 +1,146 @@ +--- +title: 'ImageNet' +linkTitle: 'ImageNet' +description: '' +weight: 9 +--- + +## Format specification +ImageNet is one of the most popular datasets for image classification task, +this dataset is available for downloading +[here](https://image-net.org/download.php) + +Supported types of annotations: +- `Label` + +Format doesn't support any attributes for annotations objects. + +The original ImageNet dataset contains about 1.2M images and information +about class name for each image. Datumaro supports two versions of ImageNet +format: `imagenet` and `imagenet_txt`. The `imagenet_txt` format assumes storing +information about the class of the image in `*.txt` files. And `imagenet` format +assumes storing information about the class of the image in the name of +directory where is this image stored. + +## Import ImageNet dataset + +A Datumaro project with a ImageNet dataset can be created +in the following way: + +``` +datum create +datum import -f imagenet +# or +datum import -f imagenet_txt +``` + +> Note: if you use `datum import` then should not be a +> subdirectory of directory with Datumaro project, see more information about +> it in the [docs](/docs/user-manual/command-reference/sources/#source-add). + +Load ImageNet dataset through the Python API: + +```python +from datumaro.components.dataset import Dataset + +dataset = Dataset.import_from('', format='imagenet_txt') +``` + +For successful importing of ImageNet dataset the input directory with dataset +should has the following structure: + + +{{< tabpane >}} + {{< tab header="imagenet">}} +imagenet_dataset/ +├── label_0 +│ ├── .jpg +│ ├── .jpg +│ ├── .jpg +│ ├── ... +├── label_1 +│ ├── .jpg +│ ├── .jpg +│ ├── .jpg +│ ├── ... +├── ... + {{< /tab >}} + {{< tab header="imagenet_txt">}} +imagenet_txt_dataset/ +├── images # directory with images +│ ├── .jpg +│ ├── .jpg +│ ├── .jpg +│ ├── ... +├── synsets.txt # optional, list of labels +└── train.txt # list of pairs (image_name, label) + {{< /tab >}} +{{< /tabpane >}} + +> Note: if you don't have synsets file then Datumaro will automatically generate +> classes with a name pattern `class-`. + +Datumaro has few import options for `imagenet_txt` format, to apply them +use the `--` after the main command argument. + +`imagenet_txt` import options: +- `--labels` {`file`, `generate`}: allow to specify where to get label + descriptions from (use `file` to load from the file specified + by `--labels-file`; `generate` to create generic ones) +- `--labels-file` allow to specify path to the file with label descriptions + ("synsets.txt") + +## Export ImageNet dataset + +Datumaro can convert ImageNet into any other format +[Datumaro supports](/docs/user-manual/supported_formats). +To get the expected result, convert the dataset to a format +that supports `Label` annotation objects. + +``` +# Using `convert` command +datum convert -if imagenet -i \ + -f voc -o -- --save-images + +# Using Datumaro project +datum create +datum import -f imagenet_txt -- --labels generate +datum export -f open_images -o +``` + +And also you can convert your ImageNet dataset using Python API + +```python +from datumaro.components.dataset import Dataset + +imagenet_dataset = Dataset.import_from('', format='vgg_face2', save_images=True) +``` + +> Note: some formats have extra export options. For particular format see the +> [docs](/docs/formats/) to get information about it. + +## Export dataset to the ImageNet format + +If your dataset contains `Label` for images and you want to convert this +dataset into the ImagetNet format, you can use Datumaro for it: + +``` +# Using convert command +datum convert -if open_images -i \ + -f imagenet_txt -o -- --save-images --save-dataset-meta + +# Using Datumaro project +datum create +datum import -f open_images +datum export -f imagenet -o +``` + +Extra options for exporting to ImageNet formats: +- `--save-images` allow to export dataset with saving images + (by default `False`) +- `--image-ext ` allow to specify image extension + for exporting the dataset (by default `.png`) +- `--save-dataset-meta` - allow to export dataset with saving dataset meta + file (by default `False`) diff --git a/site/content/en/docs/formats/lfw.md b/site/content/en/docs/formats/lfw.md new file mode 100644 index 0000000000..cc76f28580 --- /dev/null +++ b/site/content/en/docs/formats/lfw.md @@ -0,0 +1,124 @@ +--- +title: 'LFW' +linkTitle: 'LFW' +description: '' +weight: 6 +--- + +## Format specification + +[LFW (Labeled Faces in the Wild Home)](http://vis-www.cs.umass.edu/lfw/) +it's dataset for face identification task, +specification for this format is available +[here](http://vis-www.cs.umass.edu/lfw/README.txt). +You can also download original LFW dataset +[here](http://vis-www.cs.umass.edu/lfw/#download). + +Original dataset contains images with people faces. +For each image contains information about person's name, as well as +information about images that matched with this person +and mismatched with this person. +Also LFW contains additional information about landmark points on the face. + + +Supported annotation types: +- `Label` +- `Points` (face landmark points) + +Supported attributes: +- `negative_pairs`: list with names of mismatched persons; +- `positive_pairs`: list with names of matched persons; + + +## Import LFW dataset + +Importing LFW dataset into the Datumaro project: +``` +datum create +datum import -f lfw +``` +See more information about adding datasets to the project in the +[docs](/docs/user-manual/command-reference/sources/#source-add). + +Also you can import LFW dataset from Python API: +```python +from datumaro.components.dataset import Dataset + +lfw_dataset = Dataset.import_from('', 'lfw') +``` + +For successful importing the LFW dataset, the directory with it +should has the following structure: + +``` +/ +├── subset_1 +│ ├── annotations +│ │ ├── landmarks.txt # list with landmark points for each image +│ │ ├── pairs.txt # list of matched and mismatched pairs of person +│ │ └── people.txt # optional file with a list of persons name +│ └── images +│ ├── name0 +│ │ ├── name0_0001.jpg +│ │ ├── name0_0002.jpg +│ │ ├── ... +│ ├── name1 +│ │ ├── name1_0001.jpg +│ │ ├── name1_0002.jpg +│ │ ├── ... +├── subset_2 +│ ├── ... +├── ... +``` + +Full description of annotation `*.txt` files available +[here](http://vis-www.cs.umass.edu/lfw/README.txt). + +## Export LFW dataset + +With Datumaro you can convert LFW dataset into any other +format [Datumaro supports](/docs/user-manual/supported_formats/). +Pay attention that this format should also support `Label` and/or `Points` +annotation types. + + +There is few ways to convert LFW dataset into other format: + +``` + +# Converting to ImageNet with `convert` command: +datum convert -if lfw -i ./lfw_dataset \ + -f imagenet -o ./output_dir -- --save-images + + +# Converting to VggFace2 through the Datumaro project: +datum create +datum add -f lfw ./lfw_dataset +datum export -f vgg_face2 -o ./output_dir2 +``` + +> Note: some formats have extra export options. For particular format see the +> [docs](/docs/formats/) to get information about it. + +## Export dataset to the LFW format + +With Datumaro you can export dataset that has `Label` or/and `Points` +annotations to the LFW format, example: + +``` +# Converting VGG Face2 dataset into the LFW format +datum convert -if vgg_face2 -i ./voc_dataset \ + -f lfw -o ./output_dir + + +# Export dataaset to the LFW format through the Datumaro project: +datum create +datum import -f voc_classification ../vgg_dataset +datum export -f lfw -o ./output_dir -- --save-images --image-ext png +``` + +Available extra export options for LFW dataset format: +- `--save-images` allow to export dataset with saving images. + (by default `False`) +- `--image-ext IMAGE_EXT` allow to specify image extension + for exporting dataset (by default - keep original) diff --git a/site/content/en/docs/formats/vgg_face2.md b/site/content/en/docs/formats/vgg_face2.md new file mode 100644 index 0000000000..877d793463 --- /dev/null +++ b/site/content/en/docs/formats/vgg_face2.md @@ -0,0 +1,137 @@ +--- +title: 'Vgg Face2 CSV' +linkTitle: 'Vgg Face2 CSV' +description: '' +weight: 24 +--- + +## Format specification + +Vgg Face 2 is a dataset for face-recognition task, +the repository with some information and sample data of Vgg Face 2 is available +[here](https://github.com/ox-vgg/vgg_face2) + +Supported types of annotations: +- `Bbox` +- `Points` +- `Label` + +Format doesn't support any attributes for annotations objects. + +## Import Vgg Face2 dataset + +A Datumaro project with a Vgg Face 2 dataset can be created +in the following way: + +``` +datum create +datum import -f vgg_face2 +``` + +> Note: if you use `datum import` then should not be a +> subdirectory of directory with Datumaro project, see more information about +> it in the [docs](/docs/user-manual/command-reference/sources/#source-add). + +And you can also load Vgg Face 2 through the Python API: + +```python +from datumaro.components.dataset import Dataset + +dataset = Dataset.import_from('', format='vgg_face2') +``` + +For successful importing of Vgg Face2 face the input directory with dataset +should has the following structure: + +``` +vgg_face2_dataset/ +├── labels.txt # labels mapping +├── bb_landmark +│ ├── loose_bb_test.csv # information about bounding boxes for test subset +│ ├── loose_bb_train.csv +│ ├── loose_bb_.csv +│ ├── loose_landmark_test.csv # landmark points information for test subset +│ ├── loose_landmark_train.csv +│ └── loose_landmark_.csv +├── test +│ ├── n000001 # directory with images for n000001 label +│ │ ├── 0001_01.jpg +│ │ ├── 0001_02.jpg +│ │ ├── ... +│ ├── n000002 # directory with images for n000002 label +│ │ ├── 0002_01.jpg +│ │ ├── 0003_01.jpg +│ │ ├── ... +│ ├── ... +├── train +│ ├── n000004 +│ │ ├── 0004_01.jpg +│ │ ├── 0004_02.jpg +│ │ ├── ... +│ ├── ... +└── + ├── ... +``` + +## Export Vgg Face2 dataset + +Datumaro can convert a Vgg Face2 dataset into any other format +[Datumaro supports](/docs/user-manual/supported_formats/). +There is few examples how to do it: + +``` +# Using `convert` command +datum convert -if vgg_face2 -i \ + -f voc -o -- --save-images + +# Using Datumaro project +datum create +datum import -f vgg_face2 +datum export -f yolo -o +``` + +> Note: to get the expected result from the conversion, the output format +> should support the same types of annotations (one or more) as Vgg Face2 +> (`Bbox`, `Points`, `Label`) + +And also you can convert your Vgg Face2 dataset using Python API + +```python +from datumaro.components.dataset import Dataset + +vgg_face2_dataset = Dataset.import_from('', format='open_images', save_images=True) +``` + +> Note: some formats have extra export options. For particular format see the +> [docs](/docs/formats/) to get information about it. + +## Export dataset to the Vgg Face2 format + +If you have dataset in some format and want to convert this dataset +into the Vgg Face2, ensure that this dataset contains `Bbox` or/and `Points` +or/and `Label` and use Datumaro to perform conversion. +There is few examples: + +``` +# Using convert command +datum convert -if wider_face -i \ + -f vgg_face2 -o + +# Using Datumaro project +datum create +datum import -f wider_face +datum export -f vgg_face2 -o -- --save-images --image-ext '.png' +``` + +> Note: `vgg_face2` format supports only one `Bbox` per image + +Extra options for exporting to Vgg Face2 format: + +- `--save-images` allow to export dataset with saving images + (by default `False`) +- `--image-ext ` allow to specify image extension + for exporting the dataset (by default `.png`) +- `--save-dataset-meta` - allow to export dataset with saving dataset meta + file (by default `False`) diff --git a/site/content/en/docs/formats/wider_face.md b/site/content/en/docs/formats/wider_face.md new file mode 100644 index 0000000000..a8f3763b4a --- /dev/null +++ b/site/content/en/docs/formats/wider_face.md @@ -0,0 +1,130 @@ +--- +title: WIDER Face +linkTitle: WIDER Face +description: '' +weight: 19 +--- + +## Format specification + +WIDER Face dataset is a face detection benchmark dataset, +that available for download [here](http://shuoyang1213.me/WIDERFACE/#Download). + +Supported types of annotation: +- `Bbox` +- `Label` + +Supported attributes for bboxes: +- `blur`: + - 0 face without blur; + - 1 face with normal blur; + - 2 face with heavy blur. +- `expression`: + - 0 face with typical expression; + - 1 face with exaggerate expression. +- `illumination`: + - 0 image contains normal illumination; + - 1 image contains extreme illumination. +- `pose`: + - 0 pose is typical; + - 1 pose is atypical. +- `invalid`: + - 0 image is valid; + - 1 image is invalid. +- `occluded`: + - 0 face without occlusion; + - 1 face with partial occlusion; + - 2 face with heavy occlusion. + + +## Import WIDER Face dataset + +Importing of WIDER Face dataset into the Datumaro project: +``` +datum create +datum import -f wider_face +``` + +Directory with WIDER Face dataset should has the following structure: +``` + +├── labels.txt # optional file with list of classes +├── wider_face_split # directory with description of bboxes for each image +│   ├── wider_face_subset1_bbx_gt.txt +│   ├── wider_face_subset2_bbx_gt.txt +│   ├── ... +├── WIDER_subset1 # instead of 'subset1' you can use any other subset name +│   └── images +│   ├── 0--label_0 # instead of 'label_' you can use any other class name +│   │   ├── 0_label_0_image_01.jpg +│   │   ├── 0_label_0_image_02.jpg +│   │   ├── ... +│   ├── 1--label_1 +│   │   ├── 1_label_1_image_01.jpg +│   │   ├── 1_label_1_image_02.jpg +│   │   ├── ... +│   ├── ... +├── WIDER_subset2 +│ └── images +│  ├── ... +├── ... +``` +Check [README](http://shuoyang1213.me/WIDERFACE/support/bbx_annotation/wider_face_split.zip) +file of the original WIDER Face dataset to get more information +about structure of `.txt` annotation files. +Also example of WIDER Face dataset available in our +[test assets](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/widerface_dataset). + +## Export WIDER Face dataset + +With Datumaro you can convert WIDER Face dataset into any other +format [Datumaro supports](/docs/user-manual/supported_formats/). +Pay attention that this format should also support `Label` and/or `Bbox` +annotation types. + +Few ways to export WIDER Face dataset using CLI: +``` +# Using `convert` command +datum convert -if wider_face -i \ + -f voc -o -- --save-images + +# Through the Datumaro project +datum create +datum import -f wider_face +datum export -f voc -o -- -save-images +``` + +Export WIDER Face dataset using Python API: +```python +from datumaro.components.dataset import Dataset + +dataset = Dataset.import_from(' Note: some formats have extra export options. For particular format see the +> [docs](/docs/formats/) to get information about it. + +## Export to WIDER Face dataset + +Using Datumaro you can convert your dataset into the WIDER Face format, +but for succseful exporting your dataset should contain `Label` and/or `Bbox`. + +Here example of exporting VOC dataset (object detection task) +into the WIDER Face format: + +``` +datum create +datum import -f voc_detection +datum export -f wider_face -o -- --save-images --image-ext='.png' +``` + +Available extra export options for WIDER Face dataset format: +- `--save-images` allow to export dataset with saving images. + (by default `False`) +- `--image-ext IMAGE_EXT` allow to specify image extension + for exporting dataset (by default - keep original) From 8f62699976d9eff4f482a2feabbda6cc320fec98 Mon Sep 17 00:00:00 2001 From: jenhaoyang Date: Tue, 11 Jan 2022 15:27:59 +0800 Subject: [PATCH 07/15] add more example for add command (#608) * add more examples for add command --- .../user-manual/command-reference/sources.md | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/site/content/en/docs/user-manual/command-reference/sources.md b/site/content/en/docs/user-manual/command-reference/sources.md index 5f40c596d8..fd0905563e 100644 --- a/site/content/en/docs/user-manual/command-reference/sources.md +++ b/site/content/en/docs/user-manual/command-reference/sources.md @@ -125,6 +125,30 @@ datum add -f cvat dataset2/train.xml datum export -f yolo -- --save-images ``` +Example: add an existing dataset into a project, avoid data copying + +To add a dataset, we need to have it inside the project directory: + +```bash +proj/ +├─ .datumaro/ +├─ .dvc/ +├─ my_coco/ +│ └─ images/ +│ ├─ image1.jpg +│ └─ ... +│ └─ annotations/ +│ └─ coco_annotation.json +├─ .dvcignore +└─ .gitignore +``` + +``` bash +datum create -o proj/ +mv ~/my_coco/ proj/my_coco/ # move the dataset into the project directory +datum add -p proj/ -f coco proj/my_coco/ +``` + ### Remove Datasets To remove a data source from a project, use the `remove` command. From 5da6618a355c8f979d3702b9a0799dd2a9a37b9f Mon Sep 17 00:00:00 2001 From: Anastasia Yasakova Date: Tue, 11 Jan 2022 12:21:52 +0300 Subject: [PATCH 08/15] Update the list of formats in the documentation (#598) * update the list of formats in docs and readme --- README.md | 28 ++- .../en/docs/user-manual/supported_formats.md | 183 ++++++++++-------- 2 files changed, 114 insertions(+), 97 deletions(-) diff --git a/README.md b/README.md index ebc822fab1..82bbd19d4e 100644 --- a/README.md +++ b/README.md @@ -26,28 +26,22 @@ CVAT annotations ---> Publication, statistics etc. [(Back to top)](#dataset-management-framework-datumaro) -- Dataset reading, writing, conversion in any direction. [Supported formats](https://openvinotoolkit.github.io/datumaro/docs/user-manual/supported_formats): - - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`) - - [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) (`classification`, `detection`, `segmentation`, `action_classification`, `person_layout`) - - [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`) - - [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`) - - [WIDER Face](http://shuoyang1213.me/WIDERFACE/) (`bboxes`) - - [VGGFace2](https://github.com/ox-vgg/vgg_face2) (`landmarks`, `bboxes`) - - [MOT sequences](https://arxiv.org/pdf/1906.04567.pdf) - - [MOTS PNG](https://www.vision.rwth-aachen.de/page/mots) - - [ImageNet](http://image-net.org/) +- Dataset reading, writing, conversion in any direction. - [CIFAR-10/100](https://www.cs.toronto.edu/~kriz/cifar.html) (`classification`) - - [MNIST](http://yann.lecun.com/exdb/mnist/) (`classification`) - - [MNIST in CSV](https://pjreddie.com/projects/mnist-in-csv/) (`classification`) - - [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) - [Cityscapes](https://www.cityscapes-dataset.com/) - - [Kitti](http://www.cvlibs.net/datasets/kitti/index.php) (`segmentation`, `detection`, `3D raw` / `velodyne points`) - - [Supervisely](https://docs.supervise.ly/data-organization/00_ann_format_navi) (`point cloud`) + - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`) - [CVAT](https://openvinotoolkit.github.io/cvat/docs/manual/advanced/xml_format) + - [ImageNet](http://image-net.org/) + - [Kitti](http://www.cvlibs.net/datasets/kitti/index.php) (`segmentation`, `detection`, `3D raw` / `velodyne points`) - [LabelMe](http://labelme.csail.mit.edu/Release3.0) - - [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2) (`word_recognition`, `text_localization`, `text_segmentation`) - - [Market-1501](https://www.aitribune.com/dataset/2018051063) (`person re-identification`) - [LFW](http://vis-www.cs.umass.edu/lfw/) (`classification`, `person re-identification`, `landmarks`) + - [MNIST](http://yann.lecun.com/exdb/mnist/) (`classification`) + - [Open Images](https://storage.googleapis.com/openimages/web/download.html) + - [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) (`classification`, `detection`, `segmentation`, `action_classification`, `person_layout`) + - [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`) + - [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`) + + Other formats and documentation for them can be found [here](https://openvinotoolkit.github.io/datumaro/docs/user-manual/supported_formats). - Dataset building - Merging multiple datasets into one - Dataset filtering by a custom criteria: diff --git a/site/content/en/docs/user-manual/supported_formats.md b/site/content/en/docs/user-manual/supported_formats.md index 8311652763..0de49a0107 100644 --- a/site/content/en/docs/user-manual/supported_formats.md +++ b/site/content/en/docs/user-manual/supported_formats.md @@ -6,61 +6,45 @@ weight: 3 --- List of supported formats: -- MS COCO - (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`,`panoptic`, `stuff`) - - [Format specification](http://cocodataset.org/#format-data) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/coco_dataset) - - `labels` are our extension - like `instances` with only `category_id` - - [Format documentation](/docs/formats/coco) -- PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), - `action_classification`, `person_layout`) - - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/voc_dataset) - - [Format documentation](/docs/formats/pascal_voc) -- YOLO (`bboxes`) - - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/yolo_dataset) - - [Format documentation](/docs/formats/yolo) -- TF Detection API (`bboxes`, `masks`) - - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), - [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/tf_detection_api_dataset) -- WIDER Face (`bboxes`) - - [Format specification](http://shuoyang1213.me/WIDERFACE/) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/widerface_dataset) -- VGGFace2 (`landmarks`, `bboxes`) - - [Format specification](https://github.com/ox-vgg/vgg_face2) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/vgg_face2_dataset) -- MOT sequences - - [Format specification](https://arxiv.org/pdf/1906.04567.pdf) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mot_dataset) -- MOTS (png) - - [Format specification](https://www.vision.rwth-aachen.de/page/mots) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mots_dataset) -- ImageNet (`classification`, `detection`) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/imagenet_dataset) - - [Dataset example (txt for classification)](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/imagenet_txt_dataset) - - Detection format is the same as in PASCAL VOC +- ADE20k (v2017) (import-only) + - [Format specification](https://www.kaggle.com/soumikrakshit/ade20k) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/ade20k2017_dataset) + - [Format documentation](/docs/formats/ade20k2017) +- ADE20k (v2020) (import-only) + - [Format specification](https://groups.csail.mit.edu/vision/datasets/ADE20K/) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/ade20k2020_dataset) + - [Format documentation](/docs/formats/ade20k2020) +- Align CelebA (`classification`, `landmarks`) (import-only) + - [Format specification](https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/align_celeba_dataset) + - [Format documentation](/docs/formats/align_celeba) +- CamVid (`segmentation`) + - [Format specification](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/camvid_dataset) +- CelebA (`classification`, `detection`, `landmarks`) (import-only) + - [Format specification](https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/celeba_dataset) + - [Format documentation](/docs/formats/celeba) - CIFAR-10/100 (`classification` (python version)) - [Format specification](https://www.cs.toronto.edu/~kriz/cifar.html) - [Dataset example CIFAR-10](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/cifar10_dataset) - [Dataset example CIFAR-100](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/cifar100_dataset) - [Format documentation](/docs/formats/cifar) -- MNIST (`classification`) - - [Format specification](http://yann.lecun.com/exdb/mnist/) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mnist_dataset) - - [Format documentation](/docs/formats/mnist) -- MNIST in CSV (`classification`) - - [Format specification](https://pjreddie.com/projects/mnist-in-csv/) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mnist_csv_dataset) - - [Format documentation](/docs/formats/mnist) -- CamVid (`segmentation`) - - [Format specification](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/camvid_dataset) - Cityscapes (`segmentation`) - [Format specification](https://www.cityscapes-dataset.com/dataset-overview/) - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/cityscapes_dataset) - [Format documentation](/docs/formats/cityscapes) +- CVAT (`for images`, `for video` (import-only)) + - [Format specification](https://openvinotoolkit.github.io/cvat/docs/manual/advanced/xml_format) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/cvat_dataset) +- ICDAR13/15 (`word_recognition`, `text_localization`, `text_segmentation`) + - [Format specification](https://rrc.cvc.uab.es/?ch=2) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/icdar_dataset) +- ImageNet (`classification`, `detection`) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/imagenet_dataset) + - [Dataset example (txt for classification)](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/imagenet_txt_dataset) + - Detection format is the same as in PASCAL VOC + - [Format documentation](/docs/formats/imagenet) - KITTI (`segmentation`, `detection`) - [Format specification](http://www.cvlibs.net/datasets/kitti/index.php) - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/kitti_dataset) @@ -69,53 +53,92 @@ List of supported formats: - [Format specification](http://www.cvlibs.net/datasets/kitti/raw_data.php) - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/kitti_dataset/kitti_raw) - [Format documentation](/docs/formats/kitti_raw) +- LabelMe (`labels`, `boxes`, `masks`) + - [Format specification](http://labelme.csail.mit.edu/Release3.0) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/labelme_dataset) +- LFW (`classification`, `person re-identification`, `landmarks`) + - [Format specification](http://vis-www.cs.umass.edu/lfw/) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/lfw_dataset) + - [Format documentation](/docs/formats/lfw) +- Mapillary Vistas (import-only) + - [Format specification](https://www.mapillary.com/dataset/vistas) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mapillary_vistas_dataset) + - [Format documentation](/docs/formats/mapillary_vistas) +- Market-1501 (`person re-identification`) + - [Format specification](https://www.aitribune.com/dataset/2018051063) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/market1501_dataset) +- MARS (import-only) + - [Format specification](https://zheng-lab.cecs.anu.edu.au/Project/project_mars.html) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mars_dataset) + - [Format documentation](/docs/formats/mars) +- MNIST (`classification`) + - [Format specification](http://yann.lecun.com/exdb/mnist/) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mnist_dataset) + - [Format documentation](/docs/formats/mnist) +- MNIST in CSV (`classification`) + - [Format specification](https://pjreddie.com/projects/mnist-in-csv/) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mnist_csv_dataset) + - [Format documentation](/docs/formats/mnist) +- MOT sequences + - [Format specification](https://arxiv.org/pdf/1906.04567.pdf) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mot_dataset) +- MOTS (png) + - [Format specification](https://www.vision.rwth-aachen.de/page/mots) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mots_dataset) +- MPII Human Pose Dataset (`detection`, `pose estimation`) (import-only) + - [Format specification](http://human-pose.mpi-inf.mpg.de) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mpii_dataset) + - [Format documentation](/docs/formats/mpii) +- MPII Human Pose Dataset (JSON) (`detection`, `pose estimation`) (import-only) + - [Format specification](http://human-pose.mpi-inf.mpg.de) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mpii_json_dataset) + - [Format documentation](/docs/formats/mpii_json) +- MS COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`,`panoptic`, `stuff`) + - [Format specification](http://cocodataset.org/#format-data) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/coco_dataset) + - `labels` are our extension - like `instances` with only `category_id` + - [Format documentation](/docs/formats/coco) +- Open Images (`classification`, `detection`, `segmentation`) + - [Format specification](https://storage.googleapis.com/openimages/web/download.html) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/open_images_dataset) + - [Format documentation](/docs/formats/open_images) +- PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), + `action_classification`, `person_layout`) + - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/voc_dataset) + - [Format documentation](/docs/formats/pascal_voc) - Supervisely (`pointcloud`) - [Format specification](https://docs.supervise.ly/data-organization/00_ann_format_navi) - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/sly_pointcloud_dataset) - [Format documentation](/docs/formats/sly_pointcloud) -- SYNTHIA (`segmentation`) +- SYNTHIA (`segmentation`) (import-only) - [Format specification](https://synthia-dataset.net/) - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/synthia_dataset) - [Format documentation](/docs/formats/synthia) -- CVAT - - [Format specification](https://openvinotoolkit.github.io/cvat/docs/manual/advanced/xml_format) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/cvat_dataset) -- LabelMe - - [Format specification](http://labelme.csail.mit.edu/Release3.0) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/labelme_dataset) -- ICDAR13/15 (`word_recognition`, `text_localization`, `text_segmentation`) - - [Format specification](https://rrc.cvc.uab.es/?ch=2) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/icdar_dataset) -- Market-1501 (`person re-identification`) - - [Format specification](https://www.aitribune.com/dataset/2018051063) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/market1501_dataset) -- LFW (`classification`, `person re-identification`, `landmarks`) - - [Format specification](http://vis-www.cs.umass.edu/lfw/) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/lfw_dataset) -- CelebA (`classification`, `detection`, `landmarks`) - - [Format specification](https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/celeba_dataset) - - [Format documentation](/docs/formats/celeba) -- Align CelebA (`classification`, `landmarks`) - - [Format specification](https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/align_celeba_dataset) - - [Format documentation](/docs/formats/align_celeba) -- VoTT CSV (`detection`) +- TF Detection API (`bboxes`, `masks`) + - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), + [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/tf_detection_api_dataset) +- VGGFace2 (`landmarks`, `bboxes`) + - [Format specification](https://github.com/ox-vgg/vgg_face2) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/vgg_face2_dataset) + - [Format documentation](/docs/formats/vgg_face2) +- VoTT CSV (`detection`) (import-only) - [Format specification](https://github.com/microsoft/VoTT) - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/vott_csv_dataset) - [Format documentation](/docs/formats/vott_csv) -- VoTT JSON (`detection`) +- VoTT JSON (`detection`) (import-only) - [Format specification](https://github.com/microsoft/VoTT) - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/vott_json_dataset) - [Format documentation](/docs/formats/vott_json) -- MPII Human Pose Dataset (`detection`, `pose estimation`) - - [Format specification](http://human-pose.mpi-inf.mpg.de) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mpii_dataset) - - [Format documentation](/docs/formats/mpii) -- MPII Human Pose Dataset (JSON) (`detection`, `pose estimation`) - - [Format specification](http://human-pose.mpi-inf.mpg.de) - - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mpii_json_dataset) - - [Format documentation](/docs/formats/mpii_json) +- WIDER Face (`bboxes`) + - [Format specification](http://shuoyang1213.me/WIDERFACE/) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/widerface_dataset) + - [Format documentation](/docs/formats/wider_face) +- YOLO (`bboxes`) + - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) + - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/yolo_dataset) + - [Format documentation](/docs/formats/yolo) ### Supported annotation types From 46bbbbf4bfaca5464c06c186880d3f17639d7caa Mon Sep 17 00:00:00 2001 From: Timur Osmanov <54434686+TOsmanov@users.noreply.github.com> Date: Tue, 11 Jan 2022 12:26:23 +0300 Subject: [PATCH 09/15] Improved documentation site. Added copy button for the code block (#611) * fix broken links * add the copy code button --- site/assets/scss/_custom.scss | 34 +++++++++++++++++++ site/config.toml | 2 +- site/content/en/docs/contributing.md | 4 ++- site/content/en/docs/design.md | 2 +- site/content/en/docs/developer_manual.md | 2 +- site/content/en/docs/formats/ade20k2017.md | 6 ++-- site/content/en/docs/formats/ade20k2020.md | 6 ++-- site/content/en/docs/formats/align_celeba.md | 6 ++-- site/content/en/docs/formats/celeba.md | 6 ++-- site/content/en/docs/formats/cifar.md | 9 +++-- site/content/en/docs/formats/cityscapes.md | 13 ++++--- site/content/en/docs/formats/coco.md | 8 +++-- site/content/en/docs/formats/icdar.md | 7 ++-- site/content/en/docs/formats/kitti.md | 14 +++++--- site/content/en/docs/formats/kitti_raw.md | 8 +++-- .../en/docs/formats/mapillary_vistas.md | 8 +++-- site/content/en/docs/formats/market1501.md | 7 ++-- site/content/en/docs/formats/mnist.md | 8 +++-- site/content/en/docs/formats/mpii.md | 4 ++- site/content/en/docs/formats/mpii_json.md | 4 ++- site/content/en/docs/formats/open_images.md | 11 +++--- site/content/en/docs/formats/pascal_voc.md | 19 ++++++----- .../content/en/docs/formats/sly_pointcloud.md | 10 ++++-- site/content/en/docs/formats/synthia.md | 6 ++-- site/content/en/docs/formats/vott_csv.md | 6 ++-- site/content/en/docs/formats/vott_json.md | 6 ++-- site/content/en/docs/formats/yolo.md | 16 +++++---- .../user-manual/command-reference/stats.md | 2 +- .../command-reference/transform.md | 3 +- site/content/en/docs/user-manual/extending.md | 12 +++++-- .../en/docs/user-manual/installation.md | 3 +- .../en/docs/user-manual/supported_formats.md | 2 +- 32 files changed, 180 insertions(+), 74 deletions(-) diff --git a/site/assets/scss/_custom.scss b/site/assets/scss/_custom.scss index c7244b3cfe..90592013a0 100644 --- a/site/assets/scss/_custom.scss +++ b/site/assets/scss/_custom.scss @@ -71,3 +71,37 @@ html { scroll-padding-top: 70px; /* height of sticky header */ } + +/* Code blocks */ + +div.code-toolbar .toolbar { + padding-right: 0.6em; +} + +pre[class*="language-"] { + background: #f8f9fa !important; +} + +details { + max-width: 80%; +} + +@media (max-width: 991px) { + details { + max-width: 100% !important; + } +} + +li > details { + max-width: 100%; +} + +.code-toolbar { + max-width: 80%; +} + +details > .code-toolbar, +details > summary > .code-toolbar, +.highlight > .code-toolbar { + max-width: 100%; +} diff --git a/site/config.toml b/site/config.toml index be9754a89b..26ccb47a04 100644 --- a/site/config.toml +++ b/site/config.toml @@ -139,7 +139,7 @@ algolia_docsearch = false offlineSearch = true # Enable syntax highlighting and copy buttons on code blocks with Prism -prism_syntax_highlighting = false +prism_syntax_highlighting = true # User interface configuration [params.ui] diff --git a/site/content/en/docs/contributing.md b/site/content/en/docs/contributing.md index ad9b68ed10..0e5e8070ca 100644 --- a/site/content/en/docs/contributing.md +++ b/site/content/en/docs/contributing.md @@ -131,7 +131,9 @@ To run tests use: ``` bash pytest -v -# or +``` +or +``` bash python -m pytest -v ``` diff --git a/site/content/en/docs/design.md b/site/content/en/docs/design.md index 5acef3aeb1..ccf64f6251 100644 --- a/site/content/en/docs/design.md +++ b/site/content/en/docs/design.md @@ -82,7 +82,7 @@ extending CVAT UI capabilities regarding task and project operations. It should be capable of downloading and processing data from CVAT. -```lang-none +``` User | v diff --git a/site/content/en/docs/developer_manual.md b/site/content/en/docs/developer_manual.md index 7dabd02b9c..6fec21a601 100644 --- a/site/content/en/docs/developer_manual.md +++ b/site/content/en/docs/developer_manual.md @@ -16,7 +16,7 @@ all of this. Basic library usage and data flow: -```lang-none +``` Extractors -> Dataset -> Converter | Filtration diff --git a/site/content/en/docs/formats/ade20k2017.md b/site/content/en/docs/formats/ade20k2017.md index 6c3fc6bc52..de26b2a3a7 100644 --- a/site/content/en/docs/formats/ade20k2017.md +++ b/site/content/en/docs/formats/ade20k2017.md @@ -83,7 +83,7 @@ image. Each line in the text file contains: Each column is separated by a `#`. See example of dataset [here](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/ade20k2017_dataset). -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). ## Export to other formats @@ -98,7 +98,9 @@ formats using CLI: datum create datum import -f ade20k2017 datum export -f coco -o -- --save-images -# or +``` +or +``` bash datum convert -if ade20k2017 -i \ -f coco -o -- --save-images ``` diff --git a/site/content/en/docs/formats/ade20k2020.md b/site/content/en/docs/formats/ade20k2020.md index fe79939291..21f416d4a5 100644 --- a/site/content/en/docs/formats/ade20k2020.md +++ b/site/content/en/docs/formats/ade20k2020.md @@ -102,7 +102,7 @@ See our [tests asset](https://github.com/openvinotoolkit/datumaro/tree/develop/t for example of this file, or check [ADE20K toolkit](https://github.com/CSAILVision/ADE20K) for it. -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). ## Export to other formats @@ -117,7 +117,9 @@ formats using CLI: datum create datum import -f ade20k2020 datum export -f coco -o ./save_dir -- --save-images -# or +``` +or +``` bash datum convert -if ade20k2020 -i \ -f coco -o -- --save-images ``` diff --git a/site/content/en/docs/formats/align_celeba.md b/site/content/en/docs/formats/align_celeba.md index dca8ad5ff0..319898f4b4 100644 --- a/site/content/en/docs/formats/align_celeba.md +++ b/site/content/en/docs/formats/align_celeba.md @@ -70,7 +70,7 @@ landmarks and subsets respectively (optional). The original CelebA dataset stores images in a .7z archive. The archive needs to be unpacked before importing. -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). ## Export to other formats @@ -85,7 +85,9 @@ formats using CLI: datum create datum import -f align_celeba datum export -f imagenet_txt -o ./save_dir -- --save-images -# or +``` +or +``` bash datum convert -if align_celeba -i \ -f imagenet_txt -o -- --save-images ``` diff --git a/site/content/en/docs/formats/celeba.md b/site/content/en/docs/formats/celeba.md index e975a1eedd..9830648fa6 100644 --- a/site/content/en/docs/formats/celeba.md +++ b/site/content/en/docs/formats/celeba.md @@ -72,7 +72,7 @@ attributes, bounding boxes, landmarks and subsets respectively The original CelebA dataset stores images in a .7z archive. The archive needs to be unpacked before importing. -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). ## Export to other formats @@ -87,7 +87,9 @@ formats using CLI: datum create datum import -f celeba datum export -f imagenet_txt -o ./save_dir -- --save-images -# or +``` +or +``` bash datum convert -if celeba -i \ -f imagenet_txt -o -- --save-images ``` diff --git a/site/content/en/docs/formats/cifar.md b/site/content/en/docs/formats/cifar.md index b21177f016..74aa613ac2 100644 --- a/site/content/en/docs/formats/cifar.md +++ b/site/content/en/docs/formats/cifar.md @@ -102,7 +102,7 @@ CIFAR-100: 'coarse_labels': list of integers ``` -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). ## Export to other formats @@ -117,7 +117,9 @@ formats using CLI: datum create datum import -f cifar datum export -f imagenet -o -# or +``` +or +``` bash datum convert -if cifar -i \ -f imagenet -o -- --save-images ``` @@ -139,7 +141,8 @@ There are several ways to convert a dataset to CIFAR format: # export dataset into CIFAR format from existing project datum export -p -f cifar -o \ -- --save-images - +``` +``` bash # converting to CIFAR format from other format datum convert -if imagenet -i \ -f cifar -o -- --save-images diff --git a/site/content/en/docs/formats/cityscapes.md b/site/content/en/docs/formats/cityscapes.md index e110dac60c..274a21b9e7 100644 --- a/site/content/en/docs/formats/cityscapes.md +++ b/site/content/en/docs/formats/cityscapes.md @@ -68,7 +68,7 @@ Annotated files description: is the instance ID. If a certain annotation describes multiple instances, then the pixels have the regular ID of that class -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file) +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file) and `label_colors.txt`. If the `dataset_meta.json` is not represented in the dataset, then `label_colors.txt` will be imported if possible. @@ -97,7 +97,9 @@ formats using CLI: datum create datum import -f cityscapes datum export -f voc -o -# or +``` +or +``` bash datum convert -if cityscapes -i \ -f voc -o -- --save-images ``` @@ -119,6 +121,8 @@ There are several ways to convert a dataset to Cityscapes format: # export dataset into Cityscapes format from existing project datum export -p -f cityscapes -o \ -- --save-images +``` +``` bash # converting to Cityscapes format from other format datum convert -if voc -i \ -f cityscapes -o -- --save-images @@ -139,8 +143,9 @@ Extra options for exporting to Cityscapes format: # 255 0 0 person #... datum export -f cityscapes -- --label-map mycolormap.txt - -# or you can use original cityscapes colomap: +``` +or you can use original cityscapes colomap: +``` bash datum export -f cityscapes -- --label-map cityscapes ``` diff --git a/site/content/en/docs/formats/coco.md b/site/content/en/docs/formats/coco.md index 99a14bde35..04fc788ad4 100644 --- a/site/content/en/docs/formats/coco.md +++ b/site/content/en/docs/formats/coco.md @@ -132,7 +132,7 @@ task-specific formats instead of plain `coco`: `coco_captions`, `coco_person_keypoints`, `coco_stuff`. In this case all items of the dataset will be added to the `default` subset. -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). You can import a dataset for one or several tasks instead of the whole dataset. This option also allows to import annotation @@ -164,7 +164,9 @@ using CLI: datum create datum import -f coco datum export -f voc -o -# or +``` +or +``` bash datum convert -if coco -i -f voc -o ``` @@ -185,6 +187,8 @@ There are several ways to convert a dataset to COCO format: # export dataset into COCO format from existing project datum export -p -f coco -o \ -- --save-images +``` +``` bash # converting to COCO format from other format datum convert -if voc -i \ -f coco -o -- --save-images diff --git a/site/content/en/docs/formats/icdar.md b/site/content/en/docs/formats/icdar.md index ba2ed7e5f5..2be4fb4183 100644 --- a/site/content/en/docs/formats/icdar.md +++ b/site/content/en/docs/formats/icdar.md @@ -42,7 +42,7 @@ Supported attributes: There is few ways to import ICDAR dataset with Datumaro: - Through the Datumaro project -``` +``` bash datum create datum import -f icdar_text_localization datum import -f icdar_text_segmentation @@ -107,11 +107,12 @@ See more information about adding datasets to the project in the ## Export to other formats Datumaro can convert ICDAR dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/). Examples: -``` +``` bash # converting ICDAR text segmentation dataset into the VOC with `convert` command datum convert -if icdar_text_segmentation -i source_dataset \ -f voc -o export_dir -- --save-images - +``` +``` bash # converting ICDAR text localization into the LabelMe through Datumaro project datum create datum import -f icdar_text_localization source_dataset diff --git a/site/content/en/docs/formats/kitti.md b/site/content/en/docs/formats/kitti.md index fde450c3d7..23227326a0 100644 --- a/site/content/en/docs/formats/kitti.md +++ b/site/content/en/docs/formats/kitti.md @@ -15,7 +15,7 @@ Supported tasks / formats: The format specification is available in `README.md` [here](https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_object.zip). - [Segmentation](http://www.cvlibs.net/datasets/kitti/eval_semseg.php?benchmark=semantics2015) - `kitti_segmentation` The format specification is available in `README.md` [here](https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_semantics.zip). -- Raw 3D / Velodyne Points - described [here](/formats/kitti_raw) +- Raw 3D / Velodyne Points - described [here](/docs/formats/kitti_raw) Supported annotation types: - `Bbox` (object detection) @@ -100,7 +100,7 @@ KITTI segmentation dataset directory should have the following structure: └── ... ``` -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file) +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file) and `label_colors.txt`. If the `dataset_meta.json` is not represented in the dataset, then `label_colors.txt` will be imported if possible. @@ -131,7 +131,9 @@ There are several ways to convert a KITTI dataset to other dataset formats: datum create datum import -f kitti datum export -f cityscapes -o -# or +``` +or +``` bash datum convert -if kitti -i -f cityscapes -o ``` @@ -152,6 +154,8 @@ There are several ways to convert a dataset to KITTI format: # export dataset into KITTI format from existing project datum export -p -f kitti -o \ -- --save-images +``` +``` bash # converting to KITTI format from other format datum convert -if cityscapes -i \ -f kitti -o -- --save-images @@ -175,7 +179,9 @@ Extra options for exporting to KITTI format: #... datum export -f kitti -- --label-map mycolormap.txt -# or you can use original kitti colomap: +``` +or you can use original kitti colomap: +``` bash datum export -f kitti -- --label-map kitti ``` - `--tasks TASKS` allow to specify tasks for export dataset, diff --git a/site/content/en/docs/formats/kitti_raw.md b/site/content/en/docs/formats/kitti_raw.md index 7b727b9561..1a49527a9b 100644 --- a/site/content/en/docs/formats/kitti_raw.md +++ b/site/content/en/docs/formats/kitti_raw.md @@ -71,7 +71,7 @@ provides an option to use a special index file to allow this. ... ``` -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). A Datumaro project with a KITTI source can be created in the following way: @@ -100,7 +100,9 @@ There are several ways to convert a KITTI Raw dataset to other dataset formats: datum create datum import -f kitti_raw datum export -f sly_pointcloud -o -# or +``` +or +``` bash datum convert -if kitti_raw -i -f sly_pointcloud ``` @@ -121,6 +123,8 @@ There are several ways to convert a dataset to KITTI Raw format: # export dataset into KITTI Raw format from existing project datum export -p -f kitti_raw -o \ -- --save-images +``` +``` bash # converting to KITTI Raw format from other format datum convert -if sly_pointcloud -i \ -f kitti_raw -o -- --save-images --reindex diff --git a/site/content/en/docs/formats/mapillary_vistas.md b/site/content/en/docs/formats/mapillary_vistas.md index 2a1e7d448a..de637c9035 100644 --- a/site/content/en/docs/formats/mapillary_vistas.md +++ b/site/content/en/docs/formats/mapillary_vistas.md @@ -40,7 +40,9 @@ Use one of subformats (`mapillary_vistas_instances`, `mapillary_vistas_panoptic` if your dataset contains both panoptic and instance masks: ```bash datum add -f mapillary_vistas_instances ./dataset -# or +``` +or +``` bash datum add -f mapillary_vistas_panoptic ./dataset ``` @@ -163,7 +165,7 @@ dataset {{< /tab >}} {{< /tabpane >}} -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). See examples of annotation files in -[test assets](https://github.com/openvinotoolkit/datumaro/blob/develop/tests/assets/mappilary_vistas_dataset). +[test assets](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mapillary_vistas_dataset). diff --git a/site/content/en/docs/formats/market1501.md b/site/content/en/docs/formats/market1501.md index f1c3e14c37..ce720cd6eb 100644 --- a/site/content/en/docs/formats/market1501.md +++ b/site/content/en/docs/formats/market1501.md @@ -40,7 +40,7 @@ These item attributes decodes into the image name with such convention: ## Import Market-1501 dataset Importing of Market-1501 dataset into the Datumaro project: -``` +```bash datum create datum import -f market1501 ``` @@ -81,11 +81,12 @@ market1501_dataset/ With Datumaro you can export dataset, that has `person_id` item attribute, to the Market-1501 format, example: -``` +```bash # Converting MARS dataset into the Market-1501 datum convert -if mars -i ./mars_dataset \ -f market1501 -o ./output_dir - +``` +``` bash # Export dataaset to the Market-1501 format through the Datumaro project: datum create datum add -f mars ../mars diff --git a/site/content/en/docs/formats/mnist.md b/site/content/en/docs/formats/mnist.md index 26628dfa21..ca3ac0f551 100644 --- a/site/content/en/docs/formats/mnist.md +++ b/site/content/en/docs/formats/mnist.md @@ -87,7 +87,7 @@ MNIST in CSV dataset directory should have the following structure: └── mnist_train.csv ``` -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file) +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file) and `labels.txt`. If the `dataset_meta.json` is not represented in the dataset, then `labels.txt` will be imported if possible. @@ -121,7 +121,9 @@ There are several ways to convert a MNIST dataset to other dataset formats: datum create datum import -f mnist datum export -f imagenet -o -# or +``` +or +``` bash datum convert -if mnist -i -f imagenet -o ``` @@ -145,6 +147,8 @@ There are several ways to convert a dataset to MNIST format: # export dataset into MNIST format from existing project datum export -p -f mnist -o \ -- --save-images +``` +``` bash # converting to MNIST format from other format datum convert -if imagenet -i \ -f mnist -o -- --save-images diff --git a/site/content/en/docs/formats/mpii.md b/site/content/en/docs/formats/mpii.md index 6cee546930..f9408be680 100644 --- a/site/content/en/docs/formats/mpii.md +++ b/site/content/en/docs/formats/mpii.md @@ -63,7 +63,9 @@ to other dataset formats using CLI: datum create datum import -f mpii datum export -f voc -o ./save_dir -- --save-images -# or +``` +or +``` bash datum convert -if mpii -i \ -f voc -o -- --save-images ``` diff --git a/site/content/en/docs/formats/mpii_json.md b/site/content/en/docs/formats/mpii_json.md index 1eed4801b6..c8ca3f19f9 100644 --- a/site/content/en/docs/formats/mpii_json.md +++ b/site/content/en/docs/formats/mpii_json.md @@ -66,7 +66,9 @@ to other dataset formats using CLI: datum create datum import -f mpii_json datum export -f voc -o ./save_dir -- --save-images -# or +``` +or +``` bash datum convert -if mpii_json -i \ -f voc -o -- --save-images ``` diff --git a/site/content/en/docs/formats/open_images.md b/site/content/en/docs/formats/open_images.md index dbca5ef610..0cf5a20b35 100644 --- a/site/content/en/docs/formats/open_images.md +++ b/site/content/en/docs/formats/open_images.md @@ -192,7 +192,7 @@ The mask images must be extracted from the ZIP archives linked above. To use per-subset image description files instead of `image_ids_and_rotation.csv`, place them in the `annotations` subdirectory. -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). ### Creating an image metadata file @@ -209,7 +209,7 @@ and record it in an image metadata file. This file must be placed at `annotations/images.meta`, and must contain one line per image, with the following structure: -``` +``` bash ``` @@ -241,7 +241,9 @@ There are several ways to convert OID to other dataset formats: datum create datum import -f open_images datum export -f cvat -o -# or +``` +or +``` bash datum convert -if open_images -i -f cvat -o ``` @@ -262,7 +264,8 @@ There are several ways to convert an existing dataset to the Open Images format: # export dataset into Open Images format from existing project datum export -p -f open_images -o \ -- --save_images - +``` +``` bash # convert a dataset in another format to the Open Images format datum convert -if imagenet -i \ -f open_images -o \ diff --git a/site/content/en/docs/formats/pascal_voc.md b/site/content/en/docs/formats/pascal_voc.md index ea4e6eab05..704b9ca31d 100644 --- a/site/content/en/docs/formats/pascal_voc.md +++ b/site/content/en/docs/formats/pascal_voc.md @@ -108,7 +108,7 @@ These directories contain `.txt` files with a list of images in a subset, the subset name is the same as the `.txt` file name. Subset names can be arbitrary. -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file) +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file) and `labelmap.txt`. If the `dataset_meta.json` is not represented in the dataset, then `labelmap.txt` will be imported if possible. @@ -116,7 +116,7 @@ If the `dataset_meta.json` is not represented in the dataset, then In `labelmap.txt` you can define custom color map and non-pascal labels, for example: -``` +``` txt # label_map [label : color_rgb : parts : actions] helicopter::: elephant:0:124:134:head,ear,foot: @@ -129,7 +129,7 @@ have arbitrary, but different, colors. If there are gaps in the used color indices in the annotations, they must be filled with arbitrary dummy labels. Example: -``` +``` txt car:0,128,0:: # color index 0 aeroplane:10,10,128:: # color index 1 _dummy2:2,2,2:: # filler for color index 2 @@ -168,9 +168,10 @@ There are several ways to convert a Pascal VOC dataset to other dataset formats: datum create datum import -f voc datum export -f coco -o -# or +``` +or +``` bash datum convert -if voc -i -f coco -o - ``` Or, using Python API: @@ -189,7 +190,8 @@ There are several ways to convert an existing dataset to Pascal VOC format: ``` bash # export dataset into Pascal VOC format (classification) from existing project datum export -p -f voc -o -- --tasks classification - +``` +``` bash # converting to Pascal VOC format from other format datum convert -if imagenet -i \ -f voc -o \ @@ -223,8 +225,9 @@ datum export -f voc -- --tasks detection,classification # cat:0,0,255:: # person:255,0,0:head: datum export -f voc_segmentation -- --label-map mycolormap.txt - -# or you can use original voc colomap: +``` +or you can use original voc colomap: +``` bash datum export -f voc_segmentation -- --label-map voc ``` diff --git a/site/content/en/docs/formats/sly_pointcloud.md b/site/content/en/docs/formats/sly_pointcloud.md index ed7cd25140..472daa8b16 100644 --- a/site/content/en/docs/formats/sly_pointcloud.md +++ b/site/content/en/docs/formats/sly_pointcloud.md @@ -67,7 +67,9 @@ There are two ways to import a Supervisely Point Cloud dataset: ```bash datum create datum import --format sly_pointcloud --input-path -# or +``` +or +``` bash datum create datum import -f sly_pointcloud ``` @@ -93,7 +95,9 @@ to other dataset formats: datum create datum import -f sly_pointcloud datum export -f kitti_raw -o -# or +``` +or +``` bash datum convert -if sly_pointcloud -i -f kitti_raw ``` @@ -114,6 +118,8 @@ There are several ways to convert a dataset to Supervisely Point Cloud format: # export dataset into Supervisely Point Cloud format from existing project datum export -p -f sly_pointcloud -o \ -- --save-images +``` +``` bash # converting to Supervisely Point Cloud format from other format datum convert -if kitti_raw -i \ -f sly_pointcloud -o -- --save-images diff --git a/site/content/en/docs/formats/synthia.md b/site/content/en/docs/formats/synthia.md index 6397cd67cf..272eaffeca 100644 --- a/site/content/en/docs/formats/synthia.md +++ b/site/content/en/docs/formats/synthia.md @@ -98,7 +98,7 @@ If it is missing, `GT/COLOR` folder will be used. The original dataset also contains depth information, but Datumaro does not currently support it. -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). ## Export to other formats @@ -113,7 +113,9 @@ formats using CLI: datum create datum import -f synthia datum export -f voc -o -- --save-images -# or +``` +or +``` bash datum convert -if synthia -i \ -f voc -o -- --save-images ``` diff --git a/site/content/en/docs/formats/vott_csv.md b/site/content/en/docs/formats/vott_csv.md index 05743b6815..294eb3f73a 100644 --- a/site/content/en/docs/formats/vott_csv.md +++ b/site/content/en/docs/formats/vott_csv.md @@ -48,7 +48,7 @@ dataset/ └── ... ``` -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). ## Export to other formats @@ -63,7 +63,9 @@ formats using CLI: datum create datum import -f vott_csv datum export -f voc -o ./save_dir -- --save-images -# or +``` +or +``` bash datum convert -if vott_csv -i \ -f voc -o -- --save-images ``` diff --git a/site/content/en/docs/formats/vott_json.md b/site/content/en/docs/formats/vott_json.md index 46f5d17480..5d8d08d4f8 100644 --- a/site/content/en/docs/formats/vott_json.md +++ b/site/content/en/docs/formats/vott_json.md @@ -48,7 +48,7 @@ dataset/ └── ... ``` -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). ## Export to other formats @@ -63,7 +63,9 @@ formats using CLI: datum create datum import -f vott_json datum export -f voc -o ./save_dir -- --save-images -# or +``` +or +``` bash datum convert -if vott_json -i \ -f voc -o -- --save-images ``` diff --git a/site/content/en/docs/formats/yolo.md b/site/content/en/docs/formats/yolo.md index 33e50ea2f3..9a17d09562 100644 --- a/site/content/en/docs/formats/yolo.md +++ b/site/content/en/docs/formats/yolo.md @@ -64,7 +64,7 @@ YOLO dataset directory should have the following structure: - `obj.data` should have the following content, it is not necessary to have both subsets, but necessary to have one of them: -``` +``` txt classes = 5 # optional names = train = @@ -73,14 +73,14 @@ backup = backup/ # optional ``` - `obj.names` contains a list of classes. The line number for the class is the same as its index: -``` +``` txt label1 # label1 has index 0 label2 # label2 has index 1 label3 # label2 has index 2 ... ``` - Files `train.txt` and `valid.txt` should have the following structure: -``` +``` txt ... @@ -88,7 +88,7 @@ label3 # label2 has index 2 - Files in directories `obj_train_data/` and `obj_valid_data/` should contain information about labeled bounding boxes for images: -``` +``` txt # image1.txt: # 0 0.250000 0.400000 0.300000 0.400000 @@ -98,7 +98,7 @@ Here `x_center`, `y_center`, `width`, and `height` are relative to the image's width and height. The `x_center` and `y_center` are center of rectangle (are not top-left corner). -To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file). +To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file). ## Export to other formats @@ -113,7 +113,9 @@ There are several ways to convert a YOLO dataset to other dataset formats: datum create datum add -f yolo datum export -f voc -o -# or +``` +or +``` bash datum convert -if yolo -i \ -f coco_instances -o ``` @@ -134,7 +136,7 @@ if the dataset supports object detection task. Example: -``` +```bash datum create datum import -f coco_instances datum export -f yolo -o -- --save-images diff --git a/site/content/en/docs/user-manual/command-reference/stats.md b/site/content/en/docs/user-manual/command-reference/stats.md index c2eb54612a..aab11eea6b 100644 --- a/site/content/en/docs/user-manual/command-reference/stats.md +++ b/site/content/en/docs/user-manual/command-reference/stats.md @@ -35,7 +35,7 @@ Sample output:
-``` +``` json { "annotations": { "labels": { diff --git a/site/content/en/docs/user-manual/command-reference/transform.md b/site/content/en/docs/user-manual/command-reference/transform.md index 727f397d3d..7da52497dd 100644 --- a/site/content/en/docs/user-manual/command-reference/transform.md +++ b/site/content/en/docs/user-manual/command-reference/transform.md @@ -211,5 +211,6 @@ datum transform -t ndr -- \ - Resize dataset images and annotations. Supports upscaling, downscaling and mixed variants. -``` +```bash datum transform -t resize -- -dw 256 -dh 256 +``` diff --git a/site/content/en/docs/user-manual/extending.md b/site/content/en/docs/user-manual/extending.md index e491312692..4328006e2f 100644 --- a/site/content/en/docs/user-manual/extending.md +++ b/site/content/en/docs/user-manual/extending.md @@ -28,11 +28,17 @@ The plugin depends on TensorFlow, which can be installed with `pip`: ``` bash pip install tensorflow -# or +``` +or +``` bash pip install tensorflow-gpu -# or +``` +or +``` bash pip install datumaro[tf] -# or +``` +or +``` bash pip install datumaro[tf-gpu] ``` diff --git a/site/content/en/docs/user-manual/installation.md b/site/content/en/docs/user-manual/installation.md index 644bb228fc..e6496ec2d8 100644 --- a/site/content/en/docs/user-manual/installation.md +++ b/site/content/en/docs/user-manual/installation.md @@ -24,7 +24,8 @@ Install: ``` bash # From PyPI: pip install datumaro[default] - +``` +``` bash # From the GitHub repository: pip install 'git+https://github.com/openvinotoolkit/datumaro[default]' ``` diff --git a/site/content/en/docs/user-manual/supported_formats.md b/site/content/en/docs/user-manual/supported_formats.md index 0de49a0107..ae16f0a687 100644 --- a/site/content/en/docs/user-manual/supported_formats.md +++ b/site/content/en/docs/user-manual/supported_formats.md @@ -160,7 +160,7 @@ in a specific format, only relevant annotations are exported. It is possible to use classes that are not original to the format. To do this, use `dataset_meta.json`. -``` +```json { "label_map": {"0": "background", "1": "car", "2": "person"}, "segmentation_colors": [[0, 0, 0], [255, 0, 0], [0, 0, 255]], From 86cf76387e976e7e3e1b33ced65b07f8a31aad8d Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 11 Jan 2022 15:29:02 +0300 Subject: [PATCH 10/15] Mark python 3.6 unsupported (#617) * Mark python 3.6 unsupported in setup.py * Update changelog * Drop py3.6 from CI * Update docs * Update python code --- .github/workflows/health_check.yml | 4 ++-- .github/workflows/pr_checks.yml | 2 +- CHANGELOG.md | 3 ++- datumaro/util/attrs_util.py | 8 +------- setup.py | 2 +- site/content/en/docs/contributing.md | 2 +- site/content/en/docs/getting_started.md | 2 +- site/content/en/docs/user-manual/installation.md | 2 +- 8 files changed, 10 insertions(+), 15 deletions(-) diff --git a/.github/workflows/health_check.yml b/.github/workflows/health_check.yml index d40a6138d5..1090b96ae9 100644 --- a/.github/workflows/health_check.yml +++ b/.github/workflows/health_check.yml @@ -8,7 +8,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.6', '3.7', '3.8', '3.9'] + python-version: ['3.7', '3.8', '3.9'] runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -24,6 +24,6 @@ jobs: pytest -v --cov --cov-report xml:coverage.xml datum -h - name: Sending coverage results - if: matrix.python-version == '3.6' + if: matrix.python-version == '3.7' run: | bash <(curl -Ls https://coverage.codacy.com/get.sh) report -r coverage.xml -t ${{ secrets.CODACY_PROJECT_TOKEN }} diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index affa016a39..8879a4c4c9 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: os: ['macos-10.15', 'ubuntu-20.04', 'windows-2016'] - python-version: ['3.6', '3.7', '3.8', '3.9'] + python-version: ['3.7', '3.8', '3.9'] name: build and test (${{ matrix.os }}, Python ${{ matrix.python-version }}) runs-on: ${{ matrix.os }} steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index ee5672477d..ec1538e630 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - TBD ### Removed -- TBD +- Official support of Python 3.6 (due to it's EOL) + () ### Fixed - Fails in multimerge when lines are not approximated and when there are no diff --git a/datumaro/util/attrs_util.py b/datumaro/util/attrs_util.py index fe40bbfc00..1b7b41e472 100644 --- a/datumaro/util/attrs_util.py +++ b/datumaro/util/attrs_util.py @@ -22,13 +22,7 @@ def validator(inst, attribute, value): value = default else: dst_type = None - if attribute.type and inspect.isclass(attribute.type) and \ - not hasattr(attribute.type, '__origin__'): - # ^^^^^^^ - # Disallow Generics in python 3.6 - # Can be dropped with 3.6 support. Generics canot be used - # in isinstance() checks. - + if attribute.type and inspect.isclass(attribute.type): dst_type = attribute.type elif conv and inspect.isclass(conv): dst_type = conv diff --git a/setup.py b/setup.py index d82a827c99..3000534844 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ def parse_requirements(filename=CORE_REQUIREMENTS_FILE): "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], - python_requires='>=3.6', + python_requires='>=3.7', install_requires=CORE_REQUIREMENTS, extras_require={ 'tf': ['tensorflow'], diff --git a/site/content/en/docs/contributing.md b/site/content/en/docs/contributing.md index 0e5e8070ca..e9e905be60 100644 --- a/site/content/en/docs/contributing.md +++ b/site/content/en/docs/contributing.md @@ -14,7 +14,7 @@ weight: 50 ### Prerequisites -- Python (3.6+) +- Python (3.7+) ``` bash git clone https://github.com/openvinotoolkit/datumaro diff --git a/site/content/en/docs/getting_started.md b/site/content/en/docs/getting_started.md index fdb98da7c9..ceb7094635 100644 --- a/site/content/en/docs/getting_started.md +++ b/site/content/en/docs/getting_started.md @@ -12,7 +12,7 @@ To read about the design concept and features of Datumaro, go to the [design sec ### Dependencies -- Python (3.6+) +- Python (3.7+) - Optional: OpenVINO, TensorFlow, PyTorch, MxNet, Caffe, Accuracy Checker Optionally, create a virtual environment: diff --git a/site/content/en/docs/user-manual/installation.md b/site/content/en/docs/user-manual/installation.md index e6496ec2d8..ce8b811877 100644 --- a/site/content/en/docs/user-manual/installation.md +++ b/site/content/en/docs/user-manual/installation.md @@ -7,7 +7,7 @@ weight: 1 ### Dependencies -- Python (3.6+) +- Python (3.7+) - Optional: OpenVINO, TensorFlow, PyTorch, MxNet, Caffe, Accuracy Checker ### Installation steps From 260ad5b2f7dbb20c73a62c78de7740d59e7db0e3 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Tue, 11 Jan 2022 16:15:37 +0300 Subject: [PATCH 11/15] Upgrade code to use Python 3.7 features (#619) * Make use of postponed evaluation of annotations Now that Python 3.7 is the minimal supported version, we don't need to manually quote types anymore. * Import NoReturn from typing rather than typing_extensions Now that the minimum version of Python is 3.7, it's guaranteed to be there. --- datumaro/components/annotation.py | 12 ++++---- datumaro/components/dataset.py | 30 ++++++++++--------- datumaro/components/extractor.py | 8 +++-- datumaro/components/extractor_tfds.py | 14 +++++---- datumaro/components/format_detection.py | 7 ++--- datumaro/components/media.py | 12 ++++---- datumaro/components/project.py | 26 ++++++++-------- .../sly_pointcloud_format/converter.py | 6 ++-- datumaro/util/scope.py | 8 +++-- 9 files changed, 69 insertions(+), 54 deletions(-) diff --git a/datumaro/components/annotation.py b/datumaro/components/annotation.py index 8fb74ec43f..6ef26f73aa 100644 --- a/datumaro/components/annotation.py +++ b/datumaro/components/annotation.py @@ -1,7 +1,9 @@ -# Copyright (C) 2021 Intel Corporation +# Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT +from __future__ import annotations + from enum import Enum, auto from itertools import zip_longest from typing import ( @@ -101,7 +103,7 @@ def from_iterable(cls, iterable: Iterable[Union[ Tuple[str], Tuple[str, str], Tuple[str, str, List[str]], - ]]) -> 'LabelCategories': + ]]) -> LabelCategories: """ Creates a LabelCategories from iterable. @@ -180,7 +182,7 @@ class MaskCategories(Categories): @classmethod def generate(cls, size: int = 255, include_background: bool = True) \ - -> 'MaskCategories': + -> MaskCategories: """ Generates MaskCategories with the specified size. @@ -336,7 +338,7 @@ class CompiledMask: @staticmethod def from_instance_masks(instance_masks: Iterable[Mask], instance_ids: Optional[Iterable[int]] = None, - instance_labels: Optional[Iterable[int]] = None) -> 'CompiledMask': + instance_labels: Optional[Iterable[int]] = None) -> CompiledMask: """ Joins instance masks into a single mask. Masks are sorted by z_order (ascending) prior to merging. @@ -655,7 +657,7 @@ class Category: def from_iterable(cls, iterable: Union[ Tuple[int, List[str]], Tuple[int, List[str], Set[Tuple[int, int]]], - ]) -> 'PointsCategories': + ]) -> PointsCategories: """ Create PointsCategories from an iterable. diff --git a/datumaro/components/dataset.py b/datumaro/components/dataset.py index 981b270518..a9c00d73a1 100644 --- a/datumaro/components/dataset.py +++ b/datumaro/components/dataset.py @@ -1,7 +1,9 @@ -# Copyright (C) 2020-2021 Intel Corporation +# Copyright (C) 2020-2022 Intel Corporation # # SPDX-License-Identifier: MIT +from __future__ import annotations + from contextlib import contextmanager from copy import copy from enum import Enum, auto @@ -105,7 +107,7 @@ def __copy__(self): class DatasetItemStorageDatasetView(IDataset): class Subset(IDataset): - def __init__(self, parent: 'DatasetItemStorageDatasetView', name: str): + def __init__(self, parent: DatasetItemStorageDatasetView, name: str): super().__init__() self.parent = parent self.name = name @@ -182,7 +184,7 @@ class DatasetPatch: class DatasetPatchWrapper(DatasetItemStorageDatasetView): # The purpose of this class is to indicate that the input dataset is # a patch and autofill patch info in Converter - def __init__(self, patch: 'DatasetPatch', parent: IDataset): + def __init__(self, patch: DatasetPatch, parent: IDataset): super().__init__(patch.data, parent.categories()) self.patch = patch @@ -212,7 +214,7 @@ def as_dataset(self, parent: IDataset) -> IDataset: return __class__.DatasetPatchWrapper(self, parent) class DatasetSubset(IDataset): # non-owning view - def __init__(self, parent: 'Dataset', name: str): + def __init__(self, parent: Dataset, name: str): super().__init__() self.parent = parent self.name = name @@ -249,7 +251,7 @@ def subsets(self): def categories(self): return self.parent.categories() - def as_dataset(self) -> 'Dataset': + def as_dataset(self) -> Dataset: return Dataset.from_extractors(self, env=self.parent.env) @@ -608,7 +610,7 @@ class Dataset(IDataset): @classmethod def from_iterable(cls, iterable: Iterable[DatasetItem], categories: Union[CategoriesInfo, List[str], None] = None, - env: Optional[Environment] = None) -> 'Dataset': + env: Optional[Environment] = None) -> Dataset: if isinstance(categories, list): categories = { AnnotationType.label: LabelCategories.from_iterable(categories) @@ -632,7 +634,7 @@ def categories(self): @staticmethod def from_extractors(*sources: IDataset, - env: Optional[Environment] = None) -> 'Dataset': + env: Optional[Environment] = None) -> Dataset: if len(sources) == 1: source = sources[0] else: @@ -709,7 +711,7 @@ def remove(self, id: str, subset: Optional[str] = None) -> None: self._data.remove(id, subset) def filter(self, expr: str, filter_annotations: bool = False, - remove_empty: bool = False) -> 'Dataset': + remove_empty: bool = False) -> Dataset: if filter_annotations: return self.transform(XPathAnnotationsFilter, expr, remove_empty) else: @@ -717,7 +719,7 @@ def filter(self, expr: str, filter_annotations: bool = False, def update(self, source: Union[DatasetPatch, IExtractor, Iterable[DatasetItem]]) \ - -> 'Dataset': + -> Dataset: """ Updates items of the current dataset from another dataset or an iterable (the source). Items from the source overwrite matching @@ -734,7 +736,7 @@ def update(self, return self def transform(self, method: Union[str, Type[Transform]], - *args, **kwargs) -> 'Dataset': + *args, **kwargs) -> Dataset: """ Applies some function to dataset items. """ @@ -754,7 +756,7 @@ def transform(self, method: Union[str, Type[Transform]], return self - def run_model(self, model, batch_size=1) -> 'Dataset': + def run_model(self, model, batch_size=1) -> Dataset: from datumaro.components.launcher import Launcher, ModelTransform if isinstance(model, Launcher): return self.transform(ModelTransform, launcher=model, @@ -765,7 +767,7 @@ def run_model(self, model, batch_size=1) -> 'Dataset': raise TypeError("Unexpected 'model' argument type: %s" % \ type(model)) - def select(self, pred: Callable[[DatasetItem], bool]) -> 'Dataset': + def select(self, pred: Callable[[DatasetItem], bool]) -> Dataset: class _DatasetFilter(ItemTransform): def transform_item(self, item): if pred(item): @@ -863,12 +865,12 @@ def save(self, save_dir: Optional[str] = None, **kwargs) -> None: format=self._format, **options) @classmethod - def load(cls, path: str, **kwargs) -> 'Dataset': + def load(cls, path: str, **kwargs) -> Dataset: return cls.import_from(path, format=DEFAULT_FORMAT, **kwargs) @classmethod def import_from(cls, path: str, format: Optional[str] = None, - env: Optional[Environment] = None, **kwargs) -> 'Dataset': + env: Optional[Environment] = None, **kwargs) -> Dataset: from datumaro.components.config_model import Source if env is None: diff --git a/datumaro/components/extractor.py b/datumaro/components/extractor.py index 0923eeb659..20072dc233 100644 --- a/datumaro/components/extractor.py +++ b/datumaro/components/extractor.py @@ -1,7 +1,9 @@ -# Copyright (C) 2019-2021 Intel Corporation +# Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT +from __future__ import annotations + from glob import iglob from typing import Any, Callable, Dict, Iterator, List, Optional import os @@ -101,10 +103,10 @@ def __len__(self) -> int: def __bool__(self): # avoid __len__ use for truth checking return True - def subsets(self) -> Dict[str, 'IExtractor']: + def subsets(self) -> Dict[str, IExtractor]: raise NotImplementedError() - def get_subset(self, name) -> 'IExtractor': + def get_subset(self, name) -> IExtractor: raise NotImplementedError() def categories(self) -> CategoriesInfo: diff --git a/datumaro/components/extractor_tfds.py b/datumaro/components/extractor_tfds.py index b2efc3fb77..cd54d3112a 100644 --- a/datumaro/components/extractor_tfds.py +++ b/datumaro/components/extractor_tfds.py @@ -1,7 +1,9 @@ -# Copyright (C) 2021 Intel Corporation +# Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT +from __future__ import annotations + from typing import ( Any, Callable, Dict, Iterator, Mapping, Optional, Sequence, Tuple, Union, ) @@ -44,7 +46,7 @@ class _TfdsAdapter: metadata: TfdsDatasetMetadata def transform_categories(self, - tfds_builder: 'tfds.core.DatasetBuilder', categories: CategoriesInfo, + tfds_builder: tfds.core.DatasetBuilder, categories: CategoriesInfo, ) -> None: for t in self.category_transformers: t(tfds_builder, categories) @@ -58,7 +60,7 @@ class _SetLabelCategoriesFromClassLabelFeature: feature_path: Union[str, Tuple[str, ...]] def __call__(self, - tfds_builder: 'tfds.core.DatasetBuilder', categories: CategoriesInfo, + tfds_builder: tfds.core.DatasetBuilder, categories: CategoriesInfo, ) -> None: assert AnnotationType.label not in categories if isinstance(self.feature_path, str): @@ -186,9 +188,9 @@ def __call__(self, tfds_example: Any) -> str: } class _TfdsSplitExtractor(IExtractor): - def __init__(self, parent: '_TfdsExtractor', - tfds_split: 'tf.data.Dataset', - tfds_split_info: 'tfds.core.SplitInfo', + def __init__(self, parent: _TfdsExtractor, + tfds_split: tf.data.Dataset, + tfds_split_info: tfds.core.SplitInfo, ): self._parent = parent self._tfds_split = tfds_split diff --git a/datumaro/components/format_detection.py b/datumaro/components/format_detection.py index 1a92574482..2a735884ad 100644 --- a/datumaro/components/format_detection.py +++ b/datumaro/components/format_detection.py @@ -1,18 +1,17 @@ -# Copyright (C) 2021 Intel Corporation +# Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT from enum import IntEnum from typing import ( - Callable, Collection, Iterator, List, Optional, Sequence, TextIO, Union, + Callable, Collection, Iterator, List, NoReturn, Optional, Sequence, TextIO, + Union, ) import contextlib import fnmatch import glob import os.path as osp -from typing_extensions import NoReturn - class FormatDetectionConfidence(IntEnum): """ diff --git a/datumaro/components/media.py b/datumaro/components/media.py index 2c72b21ec6..63d48d60a1 100644 --- a/datumaro/components/media.py +++ b/datumaro/components/media.py @@ -1,7 +1,9 @@ -# Copyright (C) 2021 Intel Corporation +# Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT +from __future__ import annotations + from typing import Callable, Iterable, Iterator, Optional, Tuple, Union import os import os.path as osp @@ -194,7 +196,7 @@ def save(self, path): save_image(path, self.data) class VideoFrame(Image): - def __init__(self, video: 'Video', index: int): + def __init__(self, video: Video, index: int): self._video = video self._index = index @@ -209,19 +211,19 @@ def index(self) -> int: return self._index @property - def video(self) -> 'Video': + def video(self) -> Video: return self._video class _VideoFrameIterator(Iterator[VideoFrame]): """ Provides sequential access to the video frames. """ - _video: 'Video' + _video: Video _iterator: Iterator[VideoFrame] _pos: int _current_frame_data: Optional[np.ndarray] - def __init__(self, video: 'Video'): + def __init__(self, video: Video): self._video = video self._reset() diff --git a/datumaro/components/project.py b/datumaro/components/project.py index c4448db8f0..0d61c5b761 100644 --- a/datumaro/components/project.py +++ b/datumaro/components/project.py @@ -1,7 +1,9 @@ -# Copyright (C) 2019-2021 Intel Corporation +# Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT +from __future__ import annotations + from contextlib import ExitStack, suppress from enum import Enum, auto from typing import ( @@ -48,7 +50,7 @@ class ProjectSourceDataset(IDataset): - def __init__(self, path: str, tree: 'Tree', source: str, + def __init__(self, path: str, tree: Tree, source: str, readonly: bool = False): config = tree.sources[source] @@ -191,7 +193,7 @@ def __contains__(self, name: str): return name in self._data class _DataSourceBase(CrudProxy[Source]): - def __init__(self, tree: 'Tree', config_field: str): + def __init__(self, tree: Tree, config_field: str): self._tree = tree self._field = config_field @@ -209,7 +211,7 @@ def remove(self, name: str): self._data.remove(name) class ProjectSources(_DataSourceBase): - def __init__(self, tree: 'Tree'): + def __init__(self, tree: Tree): super().__init__(tree, 'sources') def __getitem__(self, name): @@ -297,13 +299,13 @@ def _get_subgraph(graph, target): """ return graph.subgraph(nx.ancestors(graph, target) | {target}) - def get_slice(self, target) -> 'Pipeline': + def get_slice(self, target) -> Pipeline: pipeline = Pipeline() pipeline._graph = self._get_subgraph(self._graph, target).copy() return pipeline class ProjectBuilder: - def __init__(self, project: 'Project', tree: 'Tree'): + def __init__(self, project: Project, tree: Tree): self._project = project self._tree = tree @@ -663,7 +665,7 @@ class ProjectBuildTargets(CrudProxy[BuildTarget]): MAIN_TARGET = 'project' BASE_STAGE = 'root' - def __init__(self, tree: 'Tree'): + def __init__(self, tree: Tree): self._tree = tree @property @@ -1353,9 +1355,9 @@ class Tree: # - attached to the work dir # - attached to a revision - def __init__(self, project: 'Project', + def __init__(self, project: Project, config: Union[None, Dict, Config, TreeConfig] = None, - rev: Union[None, 'Revision'] = None): + rev: Union[None, Revision] = None): assert isinstance(project, Project) assert not rev or project.is_ref(rev), rev @@ -1379,7 +1381,7 @@ def dump(self, path): os.makedirs(osp.dirname(path), exist_ok=True) self._config.dump(path) - def clone(self) -> 'Tree': + def clone(self) -> Tree: return Tree(self._project, TreeConfig(self.config), self._rev) @property @@ -1399,7 +1401,7 @@ def env(self) -> Environment: return self._project.env @property - def rev(self) -> Union[None, 'Revision']: + def rev(self) -> Union[None, Revision]: return self._rev def make_pipeline(self, target: Optional[str] = None) -> Pipeline: @@ -1622,7 +1624,7 @@ def _init_vcs(self): @classmethod @scoped - def init(cls, path) -> 'Project': + def init(cls, path) -> Project: existing_project = cls.find_project_dir(path) if existing_project: raise ProjectAlreadyExists(path) diff --git a/datumaro/plugins/sly_pointcloud_format/converter.py b/datumaro/plugins/sly_pointcloud_format/converter.py index c019da5509..d632a0c18f 100644 --- a/datumaro/plugins/sly_pointcloud_format/converter.py +++ b/datumaro/plugins/sly_pointcloud_format/converter.py @@ -1,10 +1,12 @@ -# Copyright (C) 2021 Intel Corporation +# Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT # The format is described here: # https://docs.supervise.ly/data-organization/00_ann_format_navi +from __future__ import annotations + from datetime import datetime import json import logging as log @@ -24,7 +26,7 @@ class _SuperviselyPointCloudDumper: def __init__(self, extractor: IExtractor, - context: 'SuperviselyPointCloudConverter'): + context: SuperviselyPointCloudConverter): self._extractor = extractor self._context = context diff --git a/datumaro/util/scope.py b/datumaro/util/scope.py index a857882ca5..69175c7ab1 100644 --- a/datumaro/util/scope.py +++ b/datumaro/util/scope.py @@ -1,7 +1,9 @@ -# Copyright (C) 2021 Intel Corporation +# Copyright (C) 2021-2022 Intel Corporation # # SPDX-License-Identifier: MIT +from __future__ import annotations + from contextlib import ExitStack, contextmanager from functools import partial, wraps from typing import Any, Callable, ContextManager, Dict, Optional, Tuple, TypeVar @@ -95,7 +97,7 @@ def disable(self): def close(self): self.__exit__(None, None, None) - def __enter__(self) -> 'Scope': + def __enter__(self) -> Scope: return self def __exit__(self, exc_type, exc_value, exc_traceback): @@ -106,7 +108,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback): self._stack.pop_all() # prevent issues on repetitive calls @classmethod - def current(cls) -> 'Scope': + def current(cls) -> Scope: return cls._thread_locals.current @contextmanager From 20d3d74cfbe9b5f23d53491c05c5102b8dfc7810 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Tue, 11 Jan 2022 17:12:48 +0300 Subject: [PATCH 12/15] Fix project dataset saving when rpath is used (#613) * Fix project dataset export when source rpath is specified * Add test * Update changelog --- CHANGELOG.md | 2 ++ datumaro/components/project.py | 12 ++++++++++-- tests/requirements.py | 1 + tests/test_project.py | 22 ++++++++++++++++++++++ 4 files changed, 35 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ec1538e630..92f7fb9a6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 label categories () - Cannot convert LabelMe dataset, that has no subsets () +- Saving (overwriting) a dataset in a project when rpath is used + () ### Security - TBD diff --git a/datumaro/components/project.py b/datumaro/components/project.py index 0d61c5b761..7361349475 100644 --- a/datumaro/components/project.py +++ b/datumaro/components/project.py @@ -54,12 +54,20 @@ def __init__(self, path: str, tree: Tree, source: str, readonly: bool = False): config = tree.sources[source] + rpath = path if config.path: - path = osp.join(path, config.path) + rpath = osp.join(path, config.path) - self.__dict__['_dataset'] = Dataset.import_from(path, + dataset = Dataset.import_from(rpath, env=tree.env, format=config.format, **config.options) + # Using rpath won't allow to save directly with .save() when a file + # path is specified. Dataset doesn't know the root location and if + # it exists at all, but in a project, we do. + dataset.bind(path, format=dataset.format, options=dataset.options) + + self.__dict__['_dataset'] = dataset + self.__dict__['_config'] = config self.__dict__['_readonly'] = readonly self.__dict__['name'] = source diff --git a/tests/requirements.py b/tests/requirements.py index f3ad6332ad..04c95e5197 100644 --- a/tests/requirements.py +++ b/tests/requirements.py @@ -45,6 +45,7 @@ class Requirements: DATUM_BUG_470 = "Cannot to import Cityscapes dataset without images" DATUM_BUG_560 = "Reading MOT dataset with seqinfo produces 0-based indexing in frames" DATUM_BUG_583 = "Empty lines in VOC subset lists are not ignored" + DATUM_BUG_602 = "Patch command example error" class SkipMessages: diff --git a/tests/test_project.py b/tests/test_project.py index b6c115dd15..e9b68fd791 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -1112,6 +1112,28 @@ def test_cant_redownload_unhashed(self): with self.assertRaises(MissingSourceHashError): project.working_tree.make_dataset('source1.root') + @mark_requirement(Requirements.DATUM_BUG_602) + @scoped + def test_can_save_local_source_with_relpath(self): + test_dir = scope_add(TestDir()) + source_url = osp.join(test_dir, 'source') + source_dataset = Dataset.from_iterable([ + DatasetItem(0, subset='a', image=np.ones((2, 3, 3)), + annotations=[ Bbox(1, 2, 3, 4, label=0) ]), + DatasetItem(1, subset='b', image=np.zeros((10, 20, 3)), + annotations=[ Bbox(1, 2, 3, 4, label=1) ]), + ], categories=['a', 'b']) + source_dataset.save(source_url, save_images=True) + + project = scope_add(Project.init(osp.join(test_dir, 'proj'))) + project.import_source('s1', url=source_url, format=DEFAULT_FORMAT, + rpath=osp.join('annotations', 'b.json')) + + read_dataset = project.working_tree.make_dataset('s1') + self.assertEqual(read_dataset.data_path, project.source_data_dir('s1')) + + read_dataset.save() + class BackwardCompatibilityTests_v0_1(TestCase): @mark_requirement(Requirements.DATUM_GENERAL_REQ) @scoped From a7791a662a41c88cc4db085d4b3659d7016526fb Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 12 Jan 2022 20:33:26 +0300 Subject: [PATCH 13/15] Sort command lists in code and documentation alphabetically (#624) Currently, commands are listed in a seemingly arbitrary order. Having them in alphabetical order is better, because: * It's easier to find a command by name. * It's clear where to insert a new command. I removed the description text for the `explain` command, since otherwise it sticks out for no reason. We could probably add a description line for _every_ command, but having it for just one is ugly. --- datumaro/cli/__main__.py | 16 ++++++------ site/content/en/docs/user-manual/_index.md | 30 +++++++++++----------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/datumaro/cli/__main__.py b/datumaro/cli/__main__.py index 26f3151bdf..a40c630d83 100644 --- a/datumaro/cli/__main__.py +++ b/datumaro/cli/__main__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2021 Intel Corporation +# Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT @@ -56,17 +56,17 @@ def _make_subcommands_help(commands, help_line_start=0): def _get_known_contexts(): return [ + ('model', contexts.model, "Actions with models"), ('project', contexts.project, "Actions with projects"), ('source', contexts.source, "Actions with data sources"), - ('model', contexts.model, "Actions with models"), ] def _get_known_commands(): return [ ("Project modification:", None, ''), + ('add', commands.add, "Add dataset"), ('create', commands.create, "Create empty project"), ('import', commands.import_, "Import dataset"), - ('add', commands.add, "Add dataset"), ('remove', commands.remove, "Remove dataset"), ("", None, ''), @@ -78,17 +78,17 @@ def _get_known_commands(): ("", None, ''), ("Dataset operations:", None, ''), + ('convert', commands.convert, "Convert dataset between formats"), + ('diff', commands.diff, "Compare datasets"), ('download', commands.download, "Download a publicly available dataset"), + ('explain', commands.explain, "Run Explainable AI algorithm for model"), ('export', commands.export, "Export dataset in some format"), ('filter', commands.filter, "Filter dataset items"), - ('transform', commands.transform, "Modify dataset items"), + ('info', commands.info, "Print dataset info"), ('merge', commands.merge, "Merge datasets"), ('patch', commands.patch, "Update dataset from another one"), - ('convert', commands.convert, "Convert dataset between formats"), - ('diff', commands.diff, "Compare datasets"), ('stats', commands.stats, "Compute dataset statistics"), - ('info', commands.info, "Print dataset info"), - ('explain', commands.explain, "Run Explainable AI algorithm for model"), + ('transform', commands.transform, "Modify dataset items"), ('validate', commands.validate, "Validate dataset") ] diff --git a/site/content/en/docs/user-manual/_index.md b/site/content/en/docs/user-manual/_index.md index 4645c1e0dd..913936ebdc 100644 --- a/site/content/en/docs/user-manual/_index.md +++ b/site/content/en/docs/user-manual/_index.md @@ -20,37 +20,37 @@ weight: 3 - [Supported annotation types](./supported_formats/#annotation-types) - [Supported media formats](./media_formats) - [Command reference](./command-reference) + - [Add](./command-reference/sources/#source-add) + - [Checkout](./command-reference/checkout) + - [Commit](./command-reference/commit) - [Convert](./command-reference/convert) - [Create](./command-reference/create) + - [Diff](./command-reference/diff) - [Download](./command-reference/download) - - [Import](./command-reference/sources/#source-import) + - [Explain](./command-reference/explain) - [Export](./command-reference/export) - - [Add](./command-reference/sources/#source-add) - - [Remove](./command-reference/sources/#source-remove) - [Filter](./command-reference/filter) + - [Import](./command-reference/sources/#source-import) + - [Info](./command-reference/info) + - [Log](./command-reference/log) - [Merge](./command-reference/merge) - [Patch](./command-reference/patch) - - [Diff](./command-reference/diff) - - [Info](./command-reference/info) + - [Remove](./command-reference/sources/#source-remove) - [Stats](./command-reference/stats) - - [Validate](./command-reference/validate) - - [Transform](./command-reference/transform) - - [Commit](./command-reference/commit) - - [Checkout](./command-reference/checkout) - [Status](./command-reference/status) - - [Log](./command-reference/log) - - [Run model inference explanation (explain)](./command-reference/explain) + - [Transform](./command-reference/transform) + - [Validate](./command-reference/validate) - Models: - [Add](./command-reference/models/#model-add) - [Remove](./command-reference/models/#model-remove) - [Run](./command-reference/models/#model-run) + - Projects: + - [Info](./command-reference/projects/#project-info) + - [Migrate](./command-reference/projects/#project-migrate) - Sources: - - [Import](./command-reference/sources/#source-import) - [Add](./command-reference/sources/#source-add) + - [Import](./command-reference/sources/#source-import) - [Remove](./command-reference/sources/#source-remove) - - Projects: - - [Migrate](./command-reference/projects/#project-migrate) - - [Info](./command-reference/projects/#project-info) - [Extending](./extending) - [Builtin plugins](./extending/#builtin-plugins) - [Dataset Formats](./extending/#dataset-formats) From ba19f03c588a8a8ee7a8be9e63bad4079f20b631 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 13 Jan 2022 12:55:47 +0300 Subject: [PATCH 14/15] Fix resize memory use and image pixels (#622) * Fix image resizing in resize transform * Make resize lazy * Update changelog * Add image extension in Image ctor * Add tests for problems * Preserve output image extension * Optimize performance and memory in resize a bit * Update changelog * Require dot in ext * Remove ext validation * Fix imports * Fix comment * Invert size tuple components --- CHANGELOG.md | 6 ++++ datumaro/components/media.py | 46 ++++++++++++++++----------- datumaro/plugins/transforms.py | 57 ++++++++++++++++++++++------------ tests/requirements.py | 4 +++ tests/test_images.py | 13 ++++++++ tests/test_transforms.py | 26 +++++++++++++--- 6 files changed, 110 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92f7fb9a6b..af0d1a4d0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 () - Saving (overwriting) a dataset in a project when rpath is used () +- Output image extension preserving in the `Resize` transform + () +- Memory overuse in the `Resize` transform + () +- Invalid image pixels produced by the `Resize` transform + () ### Security - TBD diff --git a/datumaro/components/media.py b/datumaro/components/media.py index 63d48d60a1..d64914a5e3 100644 --- a/datumaro/components/media.py +++ b/datumaro/components/media.py @@ -28,7 +28,7 @@ def path(self) -> str: @property def ext(self) -> str: - """Media file extension""" + """Media file extension (with the leading dot)""" return osp.splitext(osp.basename(self.path))[1] def __eq__(self, other: object) -> bool: @@ -42,6 +42,7 @@ def __init__(self, data: Union[np.ndarray, Callable[[str], np.ndarray], None] = None, *, path: Optional[str] = None, + ext: Optional[str] = None, size: Optional[Tuple[int, int]] = None): assert size is None or len(size) == 2, size if size is not None: @@ -58,6 +59,17 @@ def __init__(self, path = osp.abspath(path).replace('\\', '/') self._path = path + assert ext is None or isinstance(ext, str), ext + if ext: + assert not path, "Can't specify both 'path' and 'ext' for image" + + if not ext.startswith('.'): + ext = '.' + ext + ext = ext.lower() + else: + ext = None + self._ext = ext + if not isinstance(data, np.ndarray): assert path or callable(data), "Image can not be empty" assert data is None or callable(data) @@ -75,7 +87,7 @@ def data(self) -> np.ndarray: data = self._data if self._size is None and data is not None: - self._size = tuple(map(int, data.shape[:2])) + self._size = tuple(map(int, data.shape[:2])) return data @property @@ -99,6 +111,14 @@ def size(self) -> Optional[Tuple[int, int]]: self._size = tuple(map(int, data.shape[:2])) return self._size + @property + def ext(self) -> str: + """Media file extension""" + if self._ext is not None: + return self._ext + else: + return osp.splitext(osp.basename(self.path))[1] + def __eq__(self, other): if not isinstance(other, __class__): return False @@ -141,7 +161,12 @@ def __init__(self, if path and osp.isfile(path) or data: data = lazy_image(path, loader=data) - super().__init__(path=path, size=size, + self._bytes_data = data + + if ext is None and path is None and isinstance(data, bytes): + ext = self._guess_ext(data) + + super().__init__(path=path, ext=ext, size=size, data=lambda _: decode_image(self.get_bytes())) if data is None: # We don't expect decoder to produce images from nothing, @@ -150,15 +175,6 @@ def __init__(self, # from the path, when no data is provided. self._data = None - self._bytes_data = data - if ext: - ext = ext.lower() - if not ext.startswith('.'): - ext = '.' + ext - elif path is None and isinstance(data, bytes): - ext = self._guess_ext(data) - self._ext = ext - @classmethod def _guess_ext(cls, data: bytes) -> Optional[str]: return next( @@ -172,12 +188,6 @@ def get_bytes(self): return self._bytes_data() return self._bytes_data - @property - def ext(self): - if self._ext: - return self._ext - return super().ext - def save(self, path): cur_path = osp.abspath(self.path) path = osp.abspath(path) diff --git a/datumaro/plugins/transforms.py b/datumaro/plugins/transforms.py index 8acd5243ab..e5b488195a 100644 --- a/datumaro/plugins/transforms.py +++ b/datumaro/plugins/transforms.py @@ -21,9 +21,11 @@ Points, PointsCategories, Polygon, PolyLine, RleMask, ) from datumaro.components.cli_plugin import CliPlugin +from datumaro.components.errors import DatumaroError from datumaro.components.extractor import ( DEFAULT_SUBSET_NAME, IExtractor, ItemTransform, Transform, ) +from datumaro.components.media import Image from datumaro.util import NOTSET, parse_str_enum_value, take_by from datumaro.util.annotation_util import find_group_leader, find_instances import datumaro.util.mask_tools as mask_tools @@ -763,21 +765,48 @@ def __init__(self, extractor: IExtractor, width: int, height: int) -> None: self._width = width self._height = height + @staticmethod + def _lazy_resize_image(image, new_size): + def _resize_image(_): + h, w = image.size + yscale = new_size[0] / float(h) + xscale = new_size[1] / float(w) + + # LANCZOS4 is preferable for upscaling, but it works quite slow + method = cv2.INTER_AREA if (xscale * yscale) < 1 \ + else cv2.INTER_CUBIC + + resized_image = cv2.resize(image.data / 255.0, new_size[::-1], + interpolation=method) + resized_image *= 255.0 + return resized_image + + return Image(_resize_image, ext=image.ext, size=new_size) + + @staticmethod + def _lazy_resize_mask(mask, new_size): + def _resize_image(): + # Can use only NEAREST for masks, + # because we can't have interpolated values + rescaled_mask = cv2.resize(mask.image.astype(np.float32), + new_size[::-1], interpolation=cv2.INTER_NEAREST) + return rescaled_mask.astype(np.uint8) + return _resize_image + def transform_item(self, item): if not item.has_image: - raise Exception("Image info is required for this transform") + raise DatumaroError("Item %s: image info is required for this " + "transform" % (item.id, )) h, w = item.image.size xscale = self._width / float(w) yscale = self._height / float(h) + new_size = (self._height, self._width) + + resized_image = None if item.image.has_data: - # LANCZOS4 is preferable for upscaling, but it works quite slow - method = cv2.INTER_AREA if (xscale * yscale) < 1 \ - else cv2.INTER_CUBIC - image = item.image.data / 255.0 - resized_image = cv2.resize(image, (self._width, self._height), - interpolation=method) + resized_image = self._lazy_resize_image(item.image, new_size) resized_annotations = [] for ann in item.annotations: @@ -798,18 +827,8 @@ def transform_item(self, item): ] )) elif isinstance(ann, Mask): - # Can use only NEAREST for masks, - # because we can't have interpolated values - rescaled_mask = cv2.resize(ann.image.astype(np.float32), - (self._width, self._height), - interpolation=cv2.INTER_NEAREST).astype(np.uint8) - - if isinstance(ann, RleMask): - rle = mask_tools.mask_to_rle(rescaled_mask) - resized_annotations.append(ann.wrap( - rle=mask_utils.frPyObjects(rle, *rle['size']))) - else: - resized_annotations.append(ann.wrap(image=rescaled_mask)) + rescaled_mask = self._lazy_resize_mask(ann, new_size) + resized_annotations.append(ann.wrap(image=rescaled_mask)) elif isinstance(ann, (Caption, Label)): resized_annotations.append(ann) else: diff --git a/tests/requirements.py b/tests/requirements.py index 04c95e5197..2b8a0e85fd 100644 --- a/tests/requirements.py +++ b/tests/requirements.py @@ -13,6 +13,8 @@ class DatumaroComponent: class Requirements: + # Please, maintain the ordering when adding new lines + # Exact requirements DATUM_GENERAL_REQ = "Datumaro general requirement" DATUM_TELEMETRY = "Datumaro telemetry requirement" @@ -46,6 +48,8 @@ class Requirements: DATUM_BUG_560 = "Reading MOT dataset with seqinfo produces 0-based indexing in frames" DATUM_BUG_583 = "Empty lines in VOC subset lists are not ignored" DATUM_BUG_602 = "Patch command example error" + DATUM_BUG_606 = "transform with resize also changed the image extension from .jpg to .png" + DATUM_BUG_618 = "ResizeTransform returns broken image pixels" class SkipMessages: diff --git a/tests/test_images.py b/tests/test_images.py index badef07d69..829ff0dcbb 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -82,8 +82,11 @@ def test_ctors(self): { 'data': image }, { 'data': image, 'path': path }, { 'data': image, 'path': path, 'size': (2, 4) }, + { 'data': image, 'ext': 'png' }, + { 'data': image, 'ext': 'png', 'size': (2, 4) }, { 'data': lambda p: image }, { 'data': lambda p: image, 'path': 'somepath' }, + { 'data': lambda p: image, 'ext': 'jpg' }, { 'path': path }, { 'path': path, 'data': load_image }, { 'path': path, 'data': load_image, 'size': (2, 4) }, @@ -95,6 +98,16 @@ def test_ctors(self): np.testing.assert_array_equal(img.data, image) self.assertEqual(img.size, tuple(image.shape[:2])) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_ctor_errors(self): + with self.subTest('no data specified'): + with self.assertRaisesRegex(Exception, "can not be empty"): + Image(ext='jpg', size=(1, 2)) + + with self.subTest('either path or ext'): + with self.assertRaisesRegex(Exception, "both 'path' and 'ext'"): + Image(path='somepath', ext='someext') + class BytesImageTest(TestCase): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_lazy_image_shape(self): diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 47e3222e92..4961315d93 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -8,12 +8,13 @@ PointsCategories, Polygon, PolyLine, ) from datumaro.components.extractor import DatasetItem +from datumaro.components.media import Image from datumaro.components.project import Dataset from datumaro.util.test_utils import compare_datasets import datumaro.plugins.transforms as transforms import datumaro.util.mask_tools as mask_tools -from .requirements import Requirements, mark_requirement +from .requirements import Requirements, mark_bug, mark_requirement class TransformsTest(TestCase): @@ -568,9 +569,10 @@ def test_bboxes_values_decrement_transform(self): compare_datasets(self, dst_dataset, actual) @mark_requirement(Requirements.DATUM_GENERAL_REQ) + @mark_bug(Requirements.DATUM_BUG_618) def test_can_resize(self): small_dataset = Dataset.from_iterable([ - DatasetItem(id=1, image=np.zeros((4, 4)), annotations=[ + DatasetItem(id=i, image=np.ones((4, 4)) * i, annotations=[ Label(1), Bbox(1, 1, 2, 2, label=2), Polygon([1, 1, 1, 2, 2, 2, 2, 1], label=1), @@ -582,11 +584,11 @@ def test_can_resize(self): [0, 1, 1, 0], [1, 1, 0, 0], ])) - ]) + ]) for i in range(3) ], categories=['a', 'b', 'c']) big_dataset = Dataset.from_iterable([ - DatasetItem(id=1, image=np.zeros((8, 8)), annotations=[ + DatasetItem(id=i, image=np.ones((8, 8)) * i, annotations=[ Label(1), Bbox(2, 2, 4, 4, label=2), Polygon([2, 2, 2, 4, 4, 4, 4, 2], label=1), @@ -602,7 +604,7 @@ def test_can_resize(self): [1, 1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 0, 0, 0, 0], ])) - ]) + ]) for i in range(3) ], categories=['a', 'b', 'c']) with self.subTest('upscale'): @@ -612,3 +614,17 @@ def test_can_resize(self): with self.subTest('downscale'): actual = transforms.ResizeTransform(big_dataset, width=4, height=4) compare_datasets(self, small_dataset, actual) + + @mark_bug(Requirements.DATUM_BUG_606) + def test_can_keep_image_ext_on_resize(self): + expected = Image(np.ones((8, 4)), ext='jpg') + + dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=Image(np.ones((4, 2)), ext='jpg')) + ]) + + dataset.transform('resize', width=4, height=8) + + actual = dataset.get('1').image + self.assertEqual(actual.ext, expected.ext) + self.assertTrue(np.array_equal(actual.data, expected.data)) From 72f3a3828c836c4bbf20208ed9b0b3ca39831486 Mon Sep 17 00:00:00 2001 From: Nikita Manovich Date: Fri, 14 Jan 2022 11:43:29 +0300 Subject: [PATCH 15/15] Depends on OpenVINO telemetry library 2022.1.0 from PyPI (#625) * Depends on OpenVINO telemetry library 2022.1.0 from PyPI * Update CHANGELOG.md * Better comment for openvino-telemetry dependency --- CHANGELOG.md | 2 ++ requirements-core.txt | 3 +++ requirements.txt | 4 ---- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index af0d1a4d0a..bfb23ea4f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 () - The `pycocotools` dependency lower bound is raised to `2.0.4`. () +- OpenVINO telemetry library 2022.1.0 from PyPI. + () ### Deprecated - TBD diff --git a/requirements-core.txt b/requirements-core.txt index 937c88db25..c667ecf448 100644 --- a/requirements-core.txt +++ b/requirements-core.txt @@ -13,6 +13,9 @@ pycocotools>=2.0.4; platform_system != "Windows" or python_version >= '3.9' pycocotools-windows; platform_system == "Windows" and python_version < '3.9' PyYAML>=5.3.1 +# A library to send the telemetry data from the OpenVINO toolkit components. +openvino-telemetry>=2022.1.0 + # 2.3 has an unlisted dependency on PyTorch, which we don't need tensorboardX>=1.8,!=2.3 diff --git a/requirements.txt b/requirements.txt index a128a65db0..bed04eb99c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,10 +3,6 @@ opencv-python-headless>=4.1.0.25 -# Move to the core list once released on pip -# OpenVINO telemetry library -openvino-telemetry @ git+https://github.com/openvinotoolkit/telemetry.git@master#egg=openvino-telemetry - # testing pytest>=5.3.5