From a3227e79fd4b6553af9aa10db43e373d2d5ae39c Mon Sep 17 00:00:00 2001
From: jenhaoyang <randy19962@gmail.com>
Date: Wed, 29 Dec 2021 21:01:19 +0800
Subject: [PATCH 01/15] fix broken url (#599)

* fix broken url for import command
---
 site/content/en/docs/user-manual/how_to_use_datumaro.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/site/content/en/docs/user-manual/how_to_use_datumaro.md b/site/content/en/docs/user-manual/how_to_use_datumaro.md
index ffb252f410..4b077f8b4f 100644
--- a/site/content/en/docs/user-manual/how_to_use_datumaro.md
+++ b/site/content/en/docs/user-manual/how_to_use_datumaro.md
@@ -104,7 +104,7 @@ to use Datumaro from the command-line:
 
 - Create a Datumaro project and operate on it:
   - Create an empty project with [`create`](/docs/user-manual/command-reference/create)
-  - Import existing datasets with [`import`](/docs/user-manual/command-reference/import)
+  - Import existing datasets with [`import`](/docs/user-manual/command-reference/sources#source-import)
   - Modify the project with [`transform`](/docs/user-manual/command-reference/transform) and [`filter`](/docs/user-manual/command-reference/filter)
   - Create new revisions of the project with
     [`commit`](/docs/user-manual/command-reference/commit), navigate over

From 1d111b0a8bddf3becf4ca728a4508a5e58e2f79f Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <maxim.zhiltsov@intel.com>
Date: Wed, 29 Dec 2021 16:02:26 +0300
Subject: [PATCH 02/15] Fix broken links in the sources section (#601)

---
 site/content/en/docs/user-manual/command-reference/sources.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/site/content/en/docs/user-manual/command-reference/sources.md b/site/content/en/docs/user-manual/command-reference/sources.md
index d37abf8e6e..5f40c596d8 100644
--- a/site/content/en/docs/user-manual/command-reference/sources.md
+++ b/site/content/en/docs/user-manual/command-reference/sources.md
@@ -12,7 +12,7 @@ These commands are specific for Data Sources. Read more about them [here](/docs/
 Datasets can be added to a Datumaro project with the `import` command,
 which adds a dataset link into the project and downloads (or copies)
 the dataset. If you need to add a dataset already copied into the project,
-use the [`add`](./sources#source-add) command.
+use the [`add`](#source-add) command.
 
 Dataset format readers can provide some additional import options. To pass
 such options, use the `--` separator after the main command arguments.
@@ -77,7 +77,7 @@ datum export -f tf_detection_api -- --save-images
 
 Existing datasets can be added to a Datumaro project with the `add` command.
 The command adds a project-local directory as a data source in the project.
-Unlike the [`import`](./sources#source-import)
+Unlike the [`import`](#source-import)
 command, it does not copy datasets and only works with local directories.
 The source name is defined by the directory name.
 

From add81ddb59502362fa65fa07e5bc4d8c9f61afde Mon Sep 17 00:00:00 2001
From: Anastasia Yasakova <anastasia.yasakova@intel.com>
Date: Wed, 29 Dec 2021 16:21:03 +0300
Subject: [PATCH 03/15] Fix: Cannot convert LabelMe dataset, that has no
 subsets (#600)

* fix label_me extractor

* update Changelog
---
 CHANGELOG.md                       |  2 ++
 datumaro/plugins/labelme_format.py |  2 +-
 tests/requirements.py              |  1 +
 tests/test_labelme_format.py       | 10 ++++++++++
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f22e5eb371..67bfad3aac 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 - Fails in multimerge when lines are not approximated and when there are no
   label categories (<https://github.com/openvinotoolkit/datumaro/pull/592>)
+- Cannot convert LabelMe dataset, that has no subsets
+  (<https://github.com/openvinotoolkit/datumaro/pull/600>)
 
 ### Security
 - TBD
diff --git a/datumaro/plugins/labelme_format.py b/datumaro/plugins/labelme_format.py
index 821e11d75d..4cf047050b 100644
--- a/datumaro/plugins/labelme_format.py
+++ b/datumaro/plugins/labelme_format.py
@@ -95,7 +95,7 @@ def _parse(self, dataset_root):
 
             items.append(DatasetItem(id=item_id, subset=subset,
                 image=image, annotations=annotations))
-            subsets.add(subset)
+            subsets.add(items[-1].subset)
         return items, categories, subsets
 
     def _escape(s):
diff --git a/tests/requirements.py b/tests/requirements.py
index b9f14e2823..f3ad6332ad 100644
--- a/tests/requirements.py
+++ b/tests/requirements.py
@@ -36,6 +36,7 @@ class Requirements:
     DATUM_BUG_219 = "Return format is not uniform"
     DATUM_BUG_257 = "Dataset.filter doesn't count removed items"
     DATUM_BUG_259 = "Dataset.filter fails on merged datasets"
+    DATUM_BUG_289 = "Cannot convert LabelMe dataset, that has no subsets"
     DATUM_BUG_314 = "Unsuccessful remap_labels"
     DATUM_BUG_402 = "Troubles running 'remap_labels' on ProjectDataset"
     DATUM_BUG_404 = "custom importer/extractor not loading"
diff --git a/tests/test_labelme_format.py b/tests/test_labelme_format.py
index f966c9f28b..e496cc9303 100644
--- a/tests/test_labelme_format.py
+++ b/tests/test_labelme_format.py
@@ -303,3 +303,13 @@ def test_can_import(self):
 
         parsed = Dataset.import_from(DUMMY_DATASET_DIR, 'label_me')
         compare_datasets(self, expected=target_dataset, actual=parsed)
+
+    @mark_requirement(Requirements.DATUM_BUG_289)
+    def test_can_convert(self):
+        source_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'label_me')
+        with TestDir() as test_dir:
+            LabelMeConverter.convert(source_dataset, test_dir, save_images=True)
+            parsed_dataset = Dataset.import_from(test_dir, 'label_me')
+
+            compare_datasets(self, source_dataset, parsed_dataset,
+                require_images=True)

From f316af443c9761721e661834054c7e0bf6311eaa Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <maxim.zhiltsov@intel.com>
Date: Mon, 10 Jan 2022 15:47:43 +0300
Subject: [PATCH 04/15] Avoid PermissionError in GitPython 3.1.25 (#614)

---
 requirements-default.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-default.txt b/requirements-default.txt
index 1557af3a62..7511cd88db 100644
--- a/requirements-default.txt
+++ b/requirements-default.txt
@@ -1,2 +1,2 @@
 dvc>=2.7.0
-GitPython>=3.1.18
+GitPython>=3.1.18,!=3.1.25 # https://github.com/openvinotoolkit/datumaro/issues/612

From a76d815e0d623d08709f8328555e64d57740e2a1 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@intel.com>
Date: Mon, 10 Jan 2022 16:36:26 +0300
Subject: [PATCH 05/15] Remove most hacks that were added to work around issues
 in pycocotools (#449)

pycocotools 2.0.4 includes changes to the build system that make it possible
to downgrade NumPy after installing pycocotools, and therefore:

* --no-binary=pycocotools is no longer necessary;
* TensorFlow can be installed in the same `pip` invocation as Datumaro.

The only hack that remains (that I know of) is the usage of
`pycocotools-windows` on Windows, since the `pycocotools` PyPI project still
doesn't provide Windows wheels (or any other wheels).
---
 .github/workflows/health_check.yml               |  3 +--
 .github/workflows/pr_checks.yml                  |  3 +--
 CHANGELOG.md                                     |  2 ++
 requirements-core.txt                            |  2 +-
 requirements.txt                                 |  2 +-
 site/content/en/docs/user-manual/installation.md | 14 --------------
 6 files changed, 6 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/health_check.yml b/.github/workflows/health_check.yml
index dcc172b114..d40a6138d5 100644
--- a/.github/workflows/health_check.yml
+++ b/.github/workflows/health_check.yml
@@ -18,8 +18,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Installing dependencies
         run: |
-          pip install tensorflow pytest pytest-cov
-          pip install -e .[default,tfds]
+          pip install -e '.[default,tf,tfds]' pytest pytest-cov
       - name: Code instrumentation
         run: |
           pytest -v --cov --cov-report xml:coverage.xml
diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml
index 98645c7085..affa016a39 100644
--- a/.github/workflows/pr_checks.yml
+++ b/.github/workflows/pr_checks.yml
@@ -29,8 +29,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Installing dependencies
         run: |
-          pip install tensorflow pytest
-          pip install -e .[default,tfds]
+          pip install -e '.[default,tf,tfds]' pytest
       - name: Unit testing
         run: |
           pytest -v
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 67bfad3aac..ee5672477d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `smooth_line` from `datumaro.util.annotation_util` - the function
   is renamed to `approximate_line` and has updated interface
   (<https://github.com/openvinotoolkit/datumaro/pull/592>)
+- The `pycocotools` dependency lower bound is raised to `2.0.4`.
+  (<https://github.com/openvinotoolkit/datumaro/pull/449>)
 
 ### Deprecated
 - TBD
diff --git a/requirements-core.txt b/requirements-core.txt
index 6ebd01ee79..937c88db25 100644
--- a/requirements-core.txt
+++ b/requirements-core.txt
@@ -8,7 +8,7 @@ Pillow>=6.1.0
 ruamel.yaml>=0.17.0
 typing_extensions>=3.7.4.3
 
-pycocotools>=2.0.2; platform_system != "Windows" or python_version >= '3.9'
+pycocotools>=2.0.4; platform_system != "Windows" or python_version >= '3.9'
 
 pycocotools-windows; platform_system == "Windows" and python_version < '3.9'
 PyYAML>=5.3.1
diff --git a/requirements.txt b/requirements.txt
index 399f71d534..a128a65db0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
--r requirements-core.txt --no-binary=pycocotools # https://github.com/openvinotoolkit/datumaro/issues/253
+-r requirements-core.txt
 -r requirements-default.txt
 
 opencv-python-headless>=4.1.0.25
diff --git a/site/content/en/docs/user-manual/installation.md b/site/content/en/docs/user-manual/installation.md
index 707adac15c..644bb228fc 100644
--- a/site/content/en/docs/user-manual/installation.md
+++ b/site/content/en/docs/user-manual/installation.md
@@ -61,20 +61,6 @@ plugin dependencies in the [plugins](/docs/user-manual/extending) section.
   This option can't be covered by extras due to Python packaging system
   limitations.
 
-- Although Datumaro excludes `pycocotools` of version 2.0.2 in
-  requirements, it works with this version perfectly fine. The
-  reason for such requirement is binary incompatibility of the `numpy`
-  dependency in the `TensorFlow` and `pycocotools` binary packages,
-  and the current workaround forces this package to be build from sources
-  on most platforms
-  (see [#253](https://github.com/openvinotoolkit/datumaro/issues/253)).
-  If you need to use 2.0.2, make sure it is linked with the same version
-  of `numpy` as `TensorFlow` by reinstalling the package:
-  ``` bash
-  pip uninstall pycocotools
-  pip install pycocotools --no-binary=pycocotools
-  ```
-
 - When installing directly from the repository, you can change the
   installation branch with `...@<branch_name>`. Also use `--force-reinstall`
   parameter in this case. It can be useful for testing of unreleased

From ead337f5612f19031a965c470e292a9f2dd00775 Mon Sep 17 00:00:00 2001
From: Kirill Sizov <kirill.sizov@intel.com>
Date: Mon, 10 Jan 2022 16:38:21 +0300
Subject: [PATCH 06/15] Update formats documentation (#548)

Add docs for ImageNet, LFW, Vgg face 2, WIDERFace formats
---
 site/content/en/docs/formats/imagenet.md   | 146 +++++++++++++++++++++
 site/content/en/docs/formats/lfw.md        | 124 +++++++++++++++++
 site/content/en/docs/formats/vgg_face2.md  | 137 +++++++++++++++++++
 site/content/en/docs/formats/wider_face.md | 130 ++++++++++++++++++
 4 files changed, 537 insertions(+)
 create mode 100644 site/content/en/docs/formats/imagenet.md
 create mode 100644 site/content/en/docs/formats/lfw.md
 create mode 100644 site/content/en/docs/formats/vgg_face2.md
 create mode 100644 site/content/en/docs/formats/wider_face.md

diff --git a/site/content/en/docs/formats/imagenet.md b/site/content/en/docs/formats/imagenet.md
new file mode 100644
index 0000000000..b728308503
--- /dev/null
+++ b/site/content/en/docs/formats/imagenet.md
@@ -0,0 +1,146 @@
+---
+title: 'ImageNet'
+linkTitle: 'ImageNet'
+description: ''
+weight: 9
+---
+
+## Format specification
+ImageNet is one of the most popular datasets for image classification task,
+this dataset is available for downloading
+[here](https://image-net.org/download.php)
+
+Supported types of annotations:
+- `Label`
+
+Format doesn't support any attributes for annotations objects.
+
+The original ImageNet dataset contains about 1.2M images and information
+about class name for each image. Datumaro supports two versions of ImageNet
+format: `imagenet` and `imagenet_txt`. The `imagenet_txt` format assumes storing
+information about the class of the image in `*.txt` files. And `imagenet` format
+assumes storing information about the class of the image in the name of
+directory where is this image stored.
+
+## Import ImageNet dataset
+
+A Datumaro project with a ImageNet dataset can be created
+in the following way:
+
+```
+datum create
+datum import -f imagenet <path_to_dataset>
+# or
+datum import -f imagenet_txt <path_to_dataset>
+```
+
+> Note: if you use `datum import` then <path_to_dataset> should not be a
+> subdirectory of directory with Datumaro project, see more information about
+> it in the [docs](/docs/user-manual/command-reference/sources/#source-add).
+
+Load ImageNet dataset through the Python API:
+
+```python
+from datumaro.components.dataset import Dataset
+
+dataset = Dataset.import_from('<path_to_dataset>', format='imagenet_txt')
+```
+
+For successful importing of ImageNet dataset the input directory with dataset
+should has the following structure:
+
+<!--lint disable fenced-code-flag-->
+{{< tabpane >}}
+  {{< tab header="imagenet">}}
+imagenet_dataset/
+├── label_0
+│   ├── <image_name_0>.jpg
+│   ├── <image_name_1>.jpg
+│   ├── <image_name_2>.jpg
+│   ├── ...
+├── label_1
+│    ├── <image_name_0>.jpg
+│    ├── <image_name_1>.jpg
+│    ├── <image_name_2>.jpg
+│    ├── ...
+├── ...
+  {{< /tab >}}
+  {{< tab header="imagenet_txt">}}
+imagenet_txt_dataset/
+├── images # directory with images
+│   ├── <image_name_0>.jpg
+│   ├── <image_name_1>.jpg
+│   ├── <image_name_2>.jpg
+│   ├── ...
+├── synsets.txt # optional, list of labels
+└── train.txt   # list of pairs (image_name, label)
+  {{< /tab >}}
+{{< /tabpane >}}
+
+> Note: if you don't have synsets file then Datumaro will automatically generate
+> classes with a name pattern `class-<i>`.
+
+Datumaro has few import options for `imagenet_txt` format, to apply them
+use the `--` after the main command argument.
+
+`imagenet_txt` import options:
+- `--labels` {`file`, `generate`}: allow to specify where to get label
+  descriptions from (use `file` to load from the file specified
+  by `--labels-file`; `generate` to create generic ones)
+- `--labels-file` allow to specify path to the file with label descriptions
+  ("synsets.txt")
+
+## Export ImageNet dataset
+
+Datumaro can convert ImageNet into any other format
+[Datumaro supports](/docs/user-manual/supported_formats).
+To get the expected result, convert the dataset to a format
+that supports `Label` annotation objects.
+
+```
+# Using `convert` command
+datum convert -if imagenet -i <path_to_imagenet> \
+    -f voc -o <output_dir> -- --save-images
+
+# Using Datumaro project
+datum create
+datum import -f imagenet_txt <path_to_imagenet> -- --labels generate
+datum export -f open_images -o <output_dir>
+```
+
+And also you can convert your ImageNet dataset using Python API
+
+```python
+from datumaro.components.dataset import Dataset
+
+imagenet_dataset = Dataset.import_from('<path_to_dataset', format='imagenet')
+
+imagenet_dataset.export('<output_dir>', format='vgg_face2', save_images=True)
+```
+
+> Note: some formats have extra export options. For particular format see the
+> [docs](/docs/formats/) to get information about it.
+
+## Export dataset to the ImageNet format
+
+If your dataset contains `Label` for images and you want to convert this
+dataset into the ImagetNet format, you can use Datumaro for it:
+
+```
+# Using convert command
+datum convert -if open_images -i <path_to_oid> \
+    -f imagenet_txt -o <output_dir> -- --save-images --save-dataset-meta
+
+# Using Datumaro project
+datum create
+datum import -f open_images <path_to_oid>
+datum export -f imagenet -o <output_dir>
+```
+
+Extra options for exporting to ImageNet formats:
+- `--save-images` allow to export dataset with saving images
+  (by default `False`)
+- `--image-ext <IMAGE_EXT>` allow to specify image extension
+  for exporting the dataset (by default `.png`)
+- `--save-dataset-meta` - allow to export dataset with saving dataset meta
+  file (by default `False`)
diff --git a/site/content/en/docs/formats/lfw.md b/site/content/en/docs/formats/lfw.md
new file mode 100644
index 0000000000..cc76f28580
--- /dev/null
+++ b/site/content/en/docs/formats/lfw.md
@@ -0,0 +1,124 @@
+---
+title: 'LFW'
+linkTitle: 'LFW'
+description: ''
+weight: 6
+---
+
+## Format specification
+
+[LFW (Labeled Faces in the Wild Home)](http://vis-www.cs.umass.edu/lfw/)
+it's dataset for face identification task,
+specification for this format is available
+[here](http://vis-www.cs.umass.edu/lfw/README.txt).
+You can also download original LFW dataset
+[here](http://vis-www.cs.umass.edu/lfw/#download).
+
+Original dataset contains images with people faces.
+For each image contains information about person's name, as well as
+information about images that matched with this person
+and mismatched with this person.
+Also LFW contains additional information about landmark points on the face.
+
+
+Supported annotation types:
+- `Label`
+- `Points` (face landmark points)
+
+Supported attributes:
+- `negative_pairs`: list with names of mismatched persons;
+- `positive_pairs`: list with names of matched persons;
+
+
+## Import LFW dataset
+
+Importing LFW dataset into the Datumaro project:
+```
+datum create
+datum import -f lfw <path_to_lfw_dataset>
+```
+See more information about adding datasets to the project in the
+[docs](/docs/user-manual/command-reference/sources/#source-add).
+
+Also you can import LFW dataset from Python API:
+```python
+from datumaro.components.dataset import Dataset
+
+lfw_dataset = Dataset.import_from('<path_to_lfw_dataset>', 'lfw')
+```
+
+For successful importing the LFW dataset, the directory with it
+should has the following structure:
+
+```
+<path_to_lfw_dataset>/
+├── subset_1
+│    ├── annotations
+│    │   ├── landmarks.txt # list with landmark points for each image
+│    │   ├── pairs.txt # list of matched and mismatched pairs of person
+│    │   └── people.txt # optional file with a list of persons name
+│    └── images
+│        ├── name0
+│        │   ├── name0_0001.jpg
+│        │   ├── name0_0002.jpg
+│        │   ├── ...
+│        ├── name1
+│        │   ├── name1_0001.jpg
+│        │   ├── name1_0002.jpg
+│        │   ├── ...
+├── subset_2
+│    ├── ...
+├── ...
+```
+
+Full description of annotation `*.txt` files available
+[here](http://vis-www.cs.umass.edu/lfw/README.txt).
+
+## Export LFW dataset
+
+With Datumaro you can convert LFW dataset into any other
+format [Datumaro supports](/docs/user-manual/supported_formats/).
+Pay attention that this format should also support `Label` and/or `Points`
+annotation types.
+
+
+There is few ways to convert LFW dataset into other format:
+
+```
+
+# Converting to ImageNet with `convert` command:
+datum convert -if lfw -i ./lfw_dataset \
+    -f imagenet -o ./output_dir -- --save-images
+
+
+# Converting to VggFace2 through the Datumaro project:
+datum create
+datum add -f lfw ./lfw_dataset
+datum export -f vgg_face2 -o ./output_dir2
+```
+
+> Note: some formats have extra export options. For particular format see the
+> [docs](/docs/formats/) to get information about it.
+
+## Export dataset to the LFW format
+
+With Datumaro you can export dataset that has `Label` or/and `Points`
+annotations to the LFW format, example:
+
+```
+# Converting VGG Face2 dataset into the LFW format
+datum convert -if vgg_face2 -i ./voc_dataset \
+    -f lfw -o ./output_dir
+
+
+# Export dataaset to the LFW format through the Datumaro project:
+datum create
+datum import -f voc_classification ../vgg_dataset
+datum export -f lfw -o ./output_dir -- --save-images --image-ext png
+```
+
+Available extra export options for LFW dataset format:
+- `--save-images` allow to export dataset with saving images.
+  (by default `False`)
+- `--image-ext IMAGE_EXT` allow to specify image extension
+  for exporting dataset (by default - keep original)
diff --git a/site/content/en/docs/formats/vgg_face2.md b/site/content/en/docs/formats/vgg_face2.md
new file mode 100644
index 0000000000..877d793463
--- /dev/null
+++ b/site/content/en/docs/formats/vgg_face2.md
@@ -0,0 +1,137 @@
+---
+title: 'Vgg Face2 CSV'
+linkTitle: 'Vgg Face2 CSV'
+description: ''
+weight: 24
+---
+
+## Format specification
+
+Vgg Face 2 is a dataset for face-recognition task,
+the repository with some information and sample data of Vgg Face 2 is available
+[here](https://github.com/ox-vgg/vgg_face2)
+
+Supported types of annotations:
+- `Bbox`
+- `Points`
+- `Label`
+
+Format doesn't support any attributes for annotations objects.
+
+## Import Vgg Face2 dataset
+
+A Datumaro project with a Vgg Face 2 dataset can be created
+in the following way:
+
+```
+datum create
+datum import -f vgg_face2 <path_to_dataset>
+```
+
+> Note: if you use `datum import` then <path_to_dataset> should not be a
+> subdirectory of directory with Datumaro project, see more information about
+> it in the [docs](/docs/user-manual/command-reference/sources/#source-add).
+
+And you can also load Vgg Face 2 through the Python API:
+
+```python
+from datumaro.components.dataset import Dataset
+
+dataset = Dataset.import_from('<path_to_dataset>', format='vgg_face2')
+```
+
+For successful importing of Vgg Face2 face the input directory with dataset
+should has the following structure:
+
+```
+vgg_face2_dataset/
+├── labels.txt # labels mapping
+├── bb_landmark
+│   ├── loose_bb_test.csv  # information about bounding boxes for test subset
+│   ├── loose_bb_train.csv
+│   ├── loose_bb_<any_other_subset_name>.csv
+│   ├── loose_landmark_test.csv # landmark points information for test subset
+│   ├── loose_landmark_train.csv
+│   └── loose_landmark_<any_other_subset_name>.csv
+├── test
+│   ├── n000001 # directory with images for n000001 label
+│   │   ├── 0001_01.jpg
+│   │   ├── 0001_02.jpg
+│   │   ├── ...
+│   ├── n000002 # directory with images for n000002 label
+│   │   ├── 0002_01.jpg
+│   │   ├── 0003_01.jpg
+│   │   ├── ...
+│   ├── ...
+├── train
+│   ├── n000004
+│   │   ├── 0004_01.jpg
+│   │   ├── 0004_02.jpg
+│   │   ├── ...
+│   ├── ...
+└── <any_other_subset_name>
+    ├── ...
+```
+
+## Export Vgg Face2 dataset
+
+Datumaro can convert a Vgg Face2 dataset into any other format
+[Datumaro supports](/docs/user-manual/supported_formats/).
+There is few examples how to do it:
+
+```
+# Using `convert` command
+datum convert -if vgg_face2 -i <path_to_vgg_face2> \
+    -f voc -o <output_dir> -- --save-images
+
+# Using Datumaro project
+datum create
+datum import -f vgg_face2 <path_to_vgg_face2>
+datum export -f yolo -o <output_dir>
+```
+
+> Note: to get the expected result from the conversion, the output format
+> should support the same types of annotations (one or more) as Vgg Face2
+> (`Bbox`, `Points`, `Label`)
+
+And also you can convert your Vgg Face2 dataset using Python API
+
+```python
+from datumaro.components.dataset import Dataset
+
+vgg_face2_dataset = Dataset.import_from('<path_to_dataset', format='vgg_face2')
+
+vgg_face2_dataset.export('<output_dir>', format='open_images', save_images=True)
+```
+
+> Note: some formats have extra export options. For particular format see the
+> [docs](/docs/formats/) to get information about it.
+
+## Export dataset to the Vgg Face2 format
+
+If you have dataset in some format and want to convert this dataset
+into the Vgg Face2, ensure that this dataset contains `Bbox` or/and `Points`
+or/and `Label` and use Datumaro to perform conversion.
+There is few examples:
+
+```
+# Using convert command
+datum convert -if wider_face -i <path_to_wider> \
+    -f vgg_face2 -o <output_dir>
+
+# Using Datumaro project
+datum create
+datum import -f wider_face <path_to_wider>
+datum export -f vgg_face2 -o <output_dir> -- --save-images --image-ext '.png'
+```
+
+> Note: `vgg_face2` format supports only one `Bbox` per image
+
+Extra options for exporting to Vgg Face2 format:
+
+- `--save-images` allow to export dataset with saving images
+  (by default `False`)
+- `--image-ext <IMAGE_EXT>` allow to specify image extension
+  for exporting the dataset (by default `.png`)
+- `--save-dataset-meta` - allow to export dataset with saving dataset meta
+  file (by default `False`)
diff --git a/site/content/en/docs/formats/wider_face.md b/site/content/en/docs/formats/wider_face.md
new file mode 100644
index 0000000000..a8f3763b4a
--- /dev/null
+++ b/site/content/en/docs/formats/wider_face.md
@@ -0,0 +1,130 @@
+---
+title: WIDER Face
+linkTitle: WIDER Face
+description: ''
+weight: 19
+---
+
+## Format specification
+
+WIDER Face dataset is a face detection benchmark dataset,
+that available for download [here](http://shuoyang1213.me/WIDERFACE/#Download).
+
+Supported types of annotation:
+- `Bbox`
+- `Label`
+
+Supported attributes for bboxes:
+- `blur`:
+  - 0 face without blur;
+  - 1 face with normal blur;
+  - 2 face with heavy blur.
+- `expression`:
+  - 0 face with typical expression;
+  - 1 face with exaggerate expression.
+- `illumination`:
+  - 0 image contains normal illumination;
+  - 1 image contains extreme illumination.
+- `pose`:
+  - 0 pose is typical;
+  - 1 pose is atypical.
+- `invalid`:
+  - 0 image is valid;
+  - 1 image is invalid.
+- `occluded`:
+  - 0 face without occlusion;
+  - 1 face with partial occlusion;
+  - 2 face with heavy occlusion.
+
+
+## Import WIDER Face dataset
+
+Importing of WIDER Face dataset into the Datumaro project:
+```
+datum create
+datum import -f wider_face <path_to_wider_face>
+```
+
+Directory with WIDER Face dataset should has the following structure:
+```
+<path_to_wider_face>
+├── labels.txt  # optional file with list of classes
+├── wider_face_split # directory with description of bboxes for each image
+│   ├── wider_face_subset1_bbx_gt.txt
+│   ├── wider_face_subset2_bbx_gt.txt
+│   ├── ...
+├── WIDER_subset1 # instead of 'subset1' you can use any other subset name
+│   └── images
+│       ├── 0--label_0 # instead of 'label_<n>' you can use any other class name
+│       │   ├──  0_label_0_image_01.jpg
+│       │   ├──  0_label_0_image_02.jpg
+│       │   ├──  ...
+│       ├── 1--label_1
+│       │   ├──  1_label_1_image_01.jpg
+│       │   ├──  1_label_1_image_02.jpg
+│       │   ├──  ...
+│       ├── ...
+├── WIDER_subset2
+│  └── images
+│      ├── ...
+├── ...
+```
+Check [README](http://shuoyang1213.me/WIDERFACE/support/bbx_annotation/wider_face_split.zip)
+file of the original WIDER Face dataset to get more information
+about structure of `.txt` annotation files.
+Also example of WIDER Face dataset available in our
+[test assets](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/widerface_dataset).
+
+## Export WIDER Face dataset
+
+With Datumaro you can convert WIDER Face dataset into any other
+format [Datumaro supports](/docs/user-manual/supported_formats/).
+Pay attention that this format should also support `Label` and/or `Bbox`
+annotation types.
+
+Few ways to export WIDER Face dataset using CLI:
+```
+# Using `convert` command
+datum convert -if wider_face -i <path_to_wider_face> \
+    -f voc -o <output_dir> -- --save-images
+
+# Through the Datumaro project
+datum create
+datum import -f wider_face <path_to_wider_face>
+datum export -f voc -o <output_dir> -- -save-images
+```
+
+Export WIDER Face dataset using Python API:
+```python
+from datumaro.components.dataset import Dataset
+
+dataset = Dataset.import_from('<path_to_wider_face', 'wider_face')
+
+# Here you can perform some transformation using dataset.transform or
+# dataset.filter
+
+dataset.export('output_dir', 'open_images', save_images=True)
+```
+
+> Note: some formats have extra export options. For particular format see the
+> [docs](/docs/formats/) to get information about it.
+
+## Export to WIDER Face dataset
+
+Using Datumaro you can convert your dataset into the WIDER Face format,
+but for succseful exporting your dataset should contain `Label` and/or `Bbox`.
+
+Here example of exporting VOC dataset (object detection task)
+into the WIDER Face format:
+
+```
+datum create
+datum import -f voc_detection <path_to_voc>
+datum export -f wider_face -o <output_dir> -- --save-images --image-ext='.png'
+```
+
+Available extra export options for WIDER Face dataset format:
+- `--save-images` allow to export dataset with saving images.
+  (by default `False`)
+- `--image-ext IMAGE_EXT` allow to specify image extension
+  for exporting dataset (by default - keep original)

From 8f62699976d9eff4f482a2feabbda6cc320fec98 Mon Sep 17 00:00:00 2001
From: jenhaoyang <randy19962@gmail.com>
Date: Tue, 11 Jan 2022 15:27:59 +0800
Subject: [PATCH 07/15] add more example for add command (#608)

* add more examples for add command
---
 .../user-manual/command-reference/sources.md  | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/site/content/en/docs/user-manual/command-reference/sources.md b/site/content/en/docs/user-manual/command-reference/sources.md
index 5f40c596d8..fd0905563e 100644
--- a/site/content/en/docs/user-manual/command-reference/sources.md
+++ b/site/content/en/docs/user-manual/command-reference/sources.md
@@ -125,6 +125,30 @@ datum add -f cvat dataset2/train.xml
 datum export -f yolo -- --save-images
 ```
 
+Example: add an existing dataset into a project, avoid data copying
+
+To add a dataset, we need to have it inside the project directory:
+
+```bash
+proj/
+├─ .datumaro/
+├─ .dvc/
+├─ my_coco/
+│  └─ images/
+│     ├─ image1.jpg
+│     └─ ...
+│  └─ annotations/
+│     └─ coco_annotation.json
+├─ .dvcignore
+└─ .gitignore
+```
+
+``` bash
+datum create -o proj/
+mv ~/my_coco/ proj/my_coco/ # move the dataset into the project directory
+datum add -p proj/ -f coco proj/my_coco/
+```
+
 ### Remove Datasets <a id="source-remove"></a>
 
 To remove a data source from a project, use the `remove` command.

From 5da6618a355c8f979d3702b9a0799dd2a9a37b9f Mon Sep 17 00:00:00 2001
From: Anastasia Yasakova <anastasia.yasakova@intel.com>
Date: Tue, 11 Jan 2022 12:21:52 +0300
Subject: [PATCH 08/15] Update the list of formats in the documentation (#598)

* update the list of formats in docs and readme
---
 README.md                                     |  28 ++-
 .../en/docs/user-manual/supported_formats.md  | 183 ++++++++++--------
 2 files changed, 114 insertions(+), 97 deletions(-)

diff --git a/README.md b/README.md
index ebc822fab1..82bbd19d4e 100644
--- a/README.md
+++ b/README.md
@@ -26,28 +26,22 @@ CVAT annotations                             ---> Publication, statistics etc.
 
 [(Back to top)](#dataset-management-framework-datumaro)
 
-- Dataset reading, writing, conversion in any direction. [Supported formats](https://openvinotoolkit.github.io/datumaro/docs/user-manual/supported_formats):
-  - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`)
-  - [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) (`classification`, `detection`, `segmentation`, `action_classification`, `person_layout`)
-  - [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`)
-  - [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`)
-  - [WIDER Face](http://shuoyang1213.me/WIDERFACE/) (`bboxes`)
-  - [VGGFace2](https://github.com/ox-vgg/vgg_face2) (`landmarks`, `bboxes`)
-  - [MOT sequences](https://arxiv.org/pdf/1906.04567.pdf)
-  - [MOTS PNG](https://www.vision.rwth-aachen.de/page/mots)
-  - [ImageNet](http://image-net.org/)
+- Dataset reading, writing, conversion in any direction.
   - [CIFAR-10/100](https://www.cs.toronto.edu/~kriz/cifar.html) (`classification`)
-  - [MNIST](http://yann.lecun.com/exdb/mnist/) (`classification`)
-  - [MNIST in CSV](https://pjreddie.com/projects/mnist-in-csv/) (`classification`)
-  - [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/)
   - [Cityscapes](https://www.cityscapes-dataset.com/)
-  - [Kitti](http://www.cvlibs.net/datasets/kitti/index.php) (`segmentation`, `detection`, `3D raw` / `velodyne points`)
-  - [Supervisely](https://docs.supervise.ly/data-organization/00_ann_format_navi) (`point cloud`)
+  - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`)
   - [CVAT](https://openvinotoolkit.github.io/cvat/docs/manual/advanced/xml_format)
+  - [ImageNet](http://image-net.org/)
+  - [Kitti](http://www.cvlibs.net/datasets/kitti/index.php) (`segmentation`, `detection`, `3D raw` / `velodyne points`)
   - [LabelMe](http://labelme.csail.mit.edu/Release3.0)
-  - [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2) (`word_recognition`, `text_localization`, `text_segmentation`)
-  - [Market-1501](https://www.aitribune.com/dataset/2018051063) (`person re-identification`)
   - [LFW](http://vis-www.cs.umass.edu/lfw/) (`classification`, `person re-identification`, `landmarks`)
+  - [MNIST](http://yann.lecun.com/exdb/mnist/) (`classification`)
+  - [Open Images](https://storage.googleapis.com/openimages/web/download.html)
+  - [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) (`classification`, `detection`, `segmentation`, `action_classification`, `person_layout`)
+  - [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`)
+  - [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`)
+
+  Other formats and documentation for them can be found [here](https://openvinotoolkit.github.io/datumaro/docs/user-manual/supported_formats).
 - Dataset building
   - Merging multiple datasets into one
   - Dataset filtering by a custom criteria:
diff --git a/site/content/en/docs/user-manual/supported_formats.md b/site/content/en/docs/user-manual/supported_formats.md
index 8311652763..0de49a0107 100644
--- a/site/content/en/docs/user-manual/supported_formats.md
+++ b/site/content/en/docs/user-manual/supported_formats.md
@@ -6,61 +6,45 @@ weight: 3
 ---
 
 List of supported formats:
-- MS COCO
-  (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`,`panoptic`, `stuff`)
-  - [Format specification](http://cocodataset.org/#format-data)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/coco_dataset)
-  - `labels` are our extension - like `instances` with only `category_id`
-  - [Format documentation](/docs/formats/coco)
-- PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances),
-  `action_classification`, `person_layout`)
-  - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/voc_dataset)
-  - [Format documentation](/docs/formats/pascal_voc)
-- YOLO (`bboxes`)
-  - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/yolo_dataset)
-  - [Format documentation](/docs/formats/yolo)
-- TF Detection API (`bboxes`, `masks`)
-  - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md),
-    [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/tf_detection_api_dataset)
-- WIDER Face (`bboxes`)
-  - [Format specification](http://shuoyang1213.me/WIDERFACE/)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/widerface_dataset)
-- VGGFace2 (`landmarks`, `bboxes`)
-  - [Format specification](https://github.com/ox-vgg/vgg_face2)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/vgg_face2_dataset)
-- MOT sequences
-  - [Format specification](https://arxiv.org/pdf/1906.04567.pdf)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mot_dataset)
-- MOTS (png)
-  - [Format specification](https://www.vision.rwth-aachen.de/page/mots)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mots_dataset)
-- ImageNet (`classification`, `detection`)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/imagenet_dataset)
-  - [Dataset example (txt for classification)](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/imagenet_txt_dataset)
-  - Detection format is the same as in PASCAL VOC
+- ADE20k (v2017) (import-only)
+  - [Format specification](https://www.kaggle.com/soumikrakshit/ade20k)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/ade20k2017_dataset)
+  - [Format documentation](/docs/formats/ade20k2017)
+- ADE20k (v2020) (import-only)
+  - [Format specification](https://groups.csail.mit.edu/vision/datasets/ADE20K/)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/ade20k2020_dataset)
+  - [Format documentation](/docs/formats/ade20k2020)
+- Align CelebA (`classification`, `landmarks`) (import-only)
+  - [Format specification](https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/align_celeba_dataset)
+  - [Format documentation](/docs/formats/align_celeba)
+- CamVid (`segmentation`)
+  - [Format specification](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/camvid_dataset)
+- CelebA (`classification`, `detection`, `landmarks`) (import-only)
+  - [Format specification](https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/celeba_dataset)
+  - [Format documentation](/docs/formats/celeba)
 - CIFAR-10/100 (`classification` (python version))
   - [Format specification](https://www.cs.toronto.edu/~kriz/cifar.html)
   - [Dataset example CIFAR-10](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/cifar10_dataset)
   - [Dataset example CIFAR-100](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/cifar100_dataset)
   - [Format documentation](/docs/formats/cifar)
-- MNIST (`classification`)
-  - [Format specification](http://yann.lecun.com/exdb/mnist/)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mnist_dataset)
-  - [Format documentation](/docs/formats/mnist)
-- MNIST in CSV (`classification`)
-  - [Format specification](https://pjreddie.com/projects/mnist-in-csv/)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mnist_csv_dataset)
-  - [Format documentation](/docs/formats/mnist)
-- CamVid (`segmentation`)
-  - [Format specification](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/camvid_dataset)
 - Cityscapes (`segmentation`)
   - [Format specification](https://www.cityscapes-dataset.com/dataset-overview/)
   - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/cityscapes_dataset)
   - [Format documentation](/docs/formats/cityscapes)
+- CVAT (`for images`, `for video` (import-only))
+  - [Format specification](https://openvinotoolkit.github.io/cvat/docs/manual/advanced/xml_format)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/cvat_dataset)
+- ICDAR13/15 (`word_recognition`, `text_localization`, `text_segmentation`)
+  - [Format specification](https://rrc.cvc.uab.es/?ch=2)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/icdar_dataset)
+- ImageNet (`classification`, `detection`)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/imagenet_dataset)
+  - [Dataset example (txt for classification)](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/imagenet_txt_dataset)
+  - Detection format is the same as in PASCAL VOC
+  - [Format documentation](/docs/formats/imagenet)
 - KITTI (`segmentation`, `detection`)
   - [Format specification](http://www.cvlibs.net/datasets/kitti/index.php)
   - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/kitti_dataset)
@@ -69,53 +53,92 @@ List of supported formats:
   - [Format specification](http://www.cvlibs.net/datasets/kitti/raw_data.php)
   - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/kitti_dataset/kitti_raw)
   - [Format documentation](/docs/formats/kitti_raw)
+- LabelMe (`labels`, `boxes`, `masks`)
+  - [Format specification](http://labelme.csail.mit.edu/Release3.0)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/labelme_dataset)
+- LFW (`classification`, `person re-identification`, `landmarks`)
+  - [Format specification](http://vis-www.cs.umass.edu/lfw/)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/lfw_dataset)
+  - [Format documentation](/docs/formats/lfw)
+- Mapillary Vistas (import-only)
+  - [Format specification](https://www.mapillary.com/dataset/vistas)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mapillary_vistas_dataset)
+  - [Format documentation](/docs/formats/mapillary_vistas)
+- Market-1501 (`person re-identification`)
+  - [Format specification](https://www.aitribune.com/dataset/2018051063)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/market1501_dataset)
+- MARS (import-only)
+  - [Format specification](https://zheng-lab.cecs.anu.edu.au/Project/project_mars.html)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mars_dataset)
+  - [Format documentation](/docs/formats/mars)
+- MNIST (`classification`)
+  - [Format specification](http://yann.lecun.com/exdb/mnist/)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mnist_dataset)
+  - [Format documentation](/docs/formats/mnist)
+- MNIST in CSV (`classification`)
+  - [Format specification](https://pjreddie.com/projects/mnist-in-csv/)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mnist_csv_dataset)
+  - [Format documentation](/docs/formats/mnist)
+- MOT sequences
+  - [Format specification](https://arxiv.org/pdf/1906.04567.pdf)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mot_dataset)
+- MOTS (png)
+  - [Format specification](https://www.vision.rwth-aachen.de/page/mots)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mots_dataset)
+- MPII Human Pose Dataset (`detection`, `pose estimation`) (import-only)
+  - [Format specification](http://human-pose.mpi-inf.mpg.de)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mpii_dataset)
+  - [Format documentation](/docs/formats/mpii)
+- MPII Human Pose Dataset (JSON) (`detection`, `pose estimation`) (import-only)
+  - [Format specification](http://human-pose.mpi-inf.mpg.de)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mpii_json_dataset)
+  - [Format documentation](/docs/formats/mpii_json)
+- MS COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`,`panoptic`, `stuff`)
+  - [Format specification](http://cocodataset.org/#format-data)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/coco_dataset)
+  - `labels` are our extension - like `instances` with only `category_id`
+  - [Format documentation](/docs/formats/coco)
+- Open Images (`classification`, `detection`, `segmentation`)
+  - [Format specification](https://storage.googleapis.com/openimages/web/download.html)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/open_images_dataset)
+  - [Format documentation](/docs/formats/open_images)
+- PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances),
+  `action_classification`, `person_layout`)
+  - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/voc_dataset)
+  - [Format documentation](/docs/formats/pascal_voc)
 - Supervisely (`pointcloud`)
   - [Format specification](https://docs.supervise.ly/data-organization/00_ann_format_navi)
   - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/sly_pointcloud_dataset)
   - [Format documentation](/docs/formats/sly_pointcloud)
-- SYNTHIA (`segmentation`)
+- SYNTHIA (`segmentation`) (import-only)
   - [Format specification](https://synthia-dataset.net/)
   - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/synthia_dataset)
   - [Format documentation](/docs/formats/synthia)
-- CVAT
-  - [Format specification](https://openvinotoolkit.github.io/cvat/docs/manual/advanced/xml_format)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/cvat_dataset)
-- LabelMe
-  - [Format specification](http://labelme.csail.mit.edu/Release3.0)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/labelme_dataset)
-- ICDAR13/15 (`word_recognition`, `text_localization`, `text_segmentation`)
-  - [Format specification](https://rrc.cvc.uab.es/?ch=2)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/icdar_dataset)
-- Market-1501 (`person re-identification`)
-  - [Format specification](https://www.aitribune.com/dataset/2018051063)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/market1501_dataset)
-- LFW (`classification`, `person re-identification`, `landmarks`)
-  - [Format specification](http://vis-www.cs.umass.edu/lfw/)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/lfw_dataset)
-- CelebA (`classification`, `detection`, `landmarks`)
-  - [Format specification](https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/celeba_dataset)
-  - [Format documentation](/docs/formats/celeba)
-- Align CelebA (`classification`, `landmarks`)
-  - [Format specification](https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/align_celeba_dataset)
-  - [Format documentation](/docs/formats/align_celeba)
-- VoTT CSV (`detection`)
+- TF Detection API (`bboxes`, `masks`)
+  - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md),
+    [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/tf_detection_api_dataset)
+- VGGFace2 (`landmarks`, `bboxes`)
+  - [Format specification](https://github.com/ox-vgg/vgg_face2)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/vgg_face2_dataset)
+  - [Format documentation](/docs/formats/vgg_face2)
+- VoTT CSV (`detection`) (import-only)
   - [Format specification](https://github.com/microsoft/VoTT)
   - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/vott_csv_dataset)
   - [Format documentation](/docs/formats/vott_csv)
-- VoTT JSON (`detection`)
+- VoTT JSON (`detection`) (import-only)
   - [Format specification](https://github.com/microsoft/VoTT)
   - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/vott_json_dataset)
   - [Format documentation](/docs/formats/vott_json)
-- MPII Human Pose Dataset (`detection`, `pose estimation`)
-  - [Format specification](http://human-pose.mpi-inf.mpg.de)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mpii_dataset)
-  - [Format documentation](/docs/formats/mpii)
-- MPII Human Pose Dataset (JSON) (`detection`, `pose estimation`)
-  - [Format specification](http://human-pose.mpi-inf.mpg.de)
-  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mpii_json_dataset)
-  - [Format documentation](/docs/formats/mpii_json)
+- WIDER Face (`bboxes`)
+  - [Format specification](http://shuoyang1213.me/WIDERFACE/)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/widerface_dataset)
+  - [Format documentation](/docs/formats/wider_face)
+- YOLO (`bboxes`)
+  - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data)
+  - [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/yolo_dataset)
+  - [Format documentation](/docs/formats/yolo)
 
 ### Supported annotation types <a id="annotation-types"></a>
 

From 46bbbbf4bfaca5464c06c186880d3f17639d7caa Mon Sep 17 00:00:00 2001
From: Timur Osmanov <54434686+TOsmanov@users.noreply.github.com>
Date: Tue, 11 Jan 2022 12:26:23 +0300
Subject: [PATCH 09/15] Improved documentation site. Added copy button for the
 code block (#611)

* fix broken links

* add the copy code button
---
 site/assets/scss/_custom.scss                 | 34 +++++++++++++++++++
 site/config.toml                              |  2 +-
 site/content/en/docs/contributing.md          |  4 ++-
 site/content/en/docs/design.md                |  2 +-
 site/content/en/docs/developer_manual.md      |  2 +-
 site/content/en/docs/formats/ade20k2017.md    |  6 ++--
 site/content/en/docs/formats/ade20k2020.md    |  6 ++--
 site/content/en/docs/formats/align_celeba.md  |  6 ++--
 site/content/en/docs/formats/celeba.md        |  6 ++--
 site/content/en/docs/formats/cifar.md         |  9 +++--
 site/content/en/docs/formats/cityscapes.md    | 13 ++++---
 site/content/en/docs/formats/coco.md          |  8 +++--
 site/content/en/docs/formats/icdar.md         |  7 ++--
 site/content/en/docs/formats/kitti.md         | 14 +++++---
 site/content/en/docs/formats/kitti_raw.md     |  8 +++--
 .../en/docs/formats/mapillary_vistas.md       |  8 +++--
 site/content/en/docs/formats/market1501.md    |  7 ++--
 site/content/en/docs/formats/mnist.md         |  8 +++--
 site/content/en/docs/formats/mpii.md          |  4 ++-
 site/content/en/docs/formats/mpii_json.md     |  4 ++-
 site/content/en/docs/formats/open_images.md   | 11 +++---
 site/content/en/docs/formats/pascal_voc.md    | 19 ++++++-----
 .../content/en/docs/formats/sly_pointcloud.md | 10 ++++--
 site/content/en/docs/formats/synthia.md       |  6 ++--
 site/content/en/docs/formats/vott_csv.md      |  6 ++--
 site/content/en/docs/formats/vott_json.md     |  6 ++--
 site/content/en/docs/formats/yolo.md          | 16 +++++----
 .../user-manual/command-reference/stats.md    |  2 +-
 .../command-reference/transform.md            |  3 +-
 site/content/en/docs/user-manual/extending.md | 12 +++++--
 .../en/docs/user-manual/installation.md       |  3 +-
 .../en/docs/user-manual/supported_formats.md  |  2 +-
 32 files changed, 180 insertions(+), 74 deletions(-)

diff --git a/site/assets/scss/_custom.scss b/site/assets/scss/_custom.scss
index c7244b3cfe..90592013a0 100644
--- a/site/assets/scss/_custom.scss
+++ b/site/assets/scss/_custom.scss
@@ -71,3 +71,37 @@
 html {
   scroll-padding-top: 70px; /* height of sticky header */
 }
+
+/* Code blocks */
+
+div.code-toolbar .toolbar {
+  padding-right: 0.6em;
+}
+
+pre[class*="language-"] {
+  background: #f8f9fa !important;
+}
+
+details {
+  max-width: 80%;
+}
+
+@media (max-width: 991px) {
+  details {
+    max-width: 100% !important;
+  }
+}
+
+li > details {
+  max-width: 100%;
+}
+
+.code-toolbar {
+  max-width: 80%;
+}
+
+details > .code-toolbar,
+details > summary > .code-toolbar,
+.highlight > .code-toolbar {
+  max-width: 100%;
+}
diff --git a/site/config.toml b/site/config.toml
index be9754a89b..26ccb47a04 100644
--- a/site/config.toml
+++ b/site/config.toml
@@ -139,7 +139,7 @@ algolia_docsearch = false
 offlineSearch = true
 
 # Enable syntax highlighting and copy buttons on code blocks with Prism
-prism_syntax_highlighting = false
+prism_syntax_highlighting = true
 
 # User interface configuration
 [params.ui]
diff --git a/site/content/en/docs/contributing.md b/site/content/en/docs/contributing.md
index ad9b68ed10..0e5e8070ca 100644
--- a/site/content/en/docs/contributing.md
+++ b/site/content/en/docs/contributing.md
@@ -131,7 +131,9 @@ To run tests use:
 
 ``` bash
 pytest -v
-# or
+```
+or
+``` bash
 python -m pytest -v
 ```
 
diff --git a/site/content/en/docs/design.md b/site/content/en/docs/design.md
index 5acef3aeb1..ccf64f6251 100644
--- a/site/content/en/docs/design.md
+++ b/site/content/en/docs/design.md
@@ -82,7 +82,7 @@ extending CVAT UI capabilities regarding task and project operations.
 It should be capable of downloading and processing data from CVAT.
 
 <!--lint disable fenced-code-flag-->
-```lang-none
+```
         User
           |
           v
diff --git a/site/content/en/docs/developer_manual.md b/site/content/en/docs/developer_manual.md
index 7dabd02b9c..6fec21a601 100644
--- a/site/content/en/docs/developer_manual.md
+++ b/site/content/en/docs/developer_manual.md
@@ -16,7 +16,7 @@ all of this.
 
 Basic library usage and data flow:
 
-```lang-none
+```
 Extractors -> Dataset -> Converter
                  |
              Filtration
diff --git a/site/content/en/docs/formats/ade20k2017.md b/site/content/en/docs/formats/ade20k2017.md
index 6c3fc6bc52..de26b2a3a7 100644
--- a/site/content/en/docs/formats/ade20k2017.md
+++ b/site/content/en/docs/formats/ade20k2017.md
@@ -83,7 +83,7 @@ image. Each line in the text file contains:
 Each column is separated by a `#`. See example of dataset
 [here](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/ade20k2017_dataset).
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 ## Export to other formats
 
@@ -98,7 +98,9 @@ formats using CLI:
 datum create
 datum import -f ade20k2017 <path/to/dataset>
 datum export -f coco -o <output/dir> -- --save-images
-# or
+```
+or
+``` bash
 datum convert -if ade20k2017 -i <path/to/dataset> \
     -f coco -o <output/dir> -- --save-images
 ```
diff --git a/site/content/en/docs/formats/ade20k2020.md b/site/content/en/docs/formats/ade20k2020.md
index fe79939291..21f416d4a5 100644
--- a/site/content/en/docs/formats/ade20k2020.md
+++ b/site/content/en/docs/formats/ade20k2020.md
@@ -102,7 +102,7 @@ See our [tests asset](https://github.com/openvinotoolkit/datumaro/tree/develop/t
 for example of this file,
 or check [ADE20K toolkit](https://github.com/CSAILVision/ADE20K) for it.
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 ## Export to other formats
 
@@ -117,7 +117,9 @@ formats using CLI:
 datum create
 datum import -f ade20k2020 <path/to/dataset>
 datum export -f coco -o ./save_dir -- --save-images
-# or
+```
+or
+``` bash
 datum convert -if ade20k2020 -i <path/to/dataset> \
     -f coco -o <output/dir> -- --save-images
 ```
diff --git a/site/content/en/docs/formats/align_celeba.md b/site/content/en/docs/formats/align_celeba.md
index dca8ad5ff0..319898f4b4 100644
--- a/site/content/en/docs/formats/align_celeba.md
+++ b/site/content/en/docs/formats/align_celeba.md
@@ -70,7 +70,7 @@ landmarks and subsets respectively (optional).
 The original CelebA dataset stores images in a .7z archive. The archive
 needs to be unpacked before importing.
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 ## Export to other formats
 
@@ -85,7 +85,9 @@ formats using CLI:
 datum create
 datum import -f align_celeba <path/to/dataset>
 datum export -f imagenet_txt -o ./save_dir -- --save-images
-# or
+```
+or
+``` bash
 datum convert -if align_celeba -i <path/to/dataset> \
     -f imagenet_txt -o <output/dir> -- --save-images
 ```
diff --git a/site/content/en/docs/formats/celeba.md b/site/content/en/docs/formats/celeba.md
index e975a1eedd..9830648fa6 100644
--- a/site/content/en/docs/formats/celeba.md
+++ b/site/content/en/docs/formats/celeba.md
@@ -72,7 +72,7 @@ attributes, bounding boxes, landmarks and subsets respectively
 The original CelebA dataset stores images in a .7z archive. The archive
 needs to be unpacked before importing.
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 ## Export to other formats
 
@@ -87,7 +87,9 @@ formats using CLI:
 datum create
 datum import -f celeba <path/to/dataset>
 datum export -f imagenet_txt -o ./save_dir -- --save-images
-# or
+```
+or
+``` bash
 datum convert -if celeba -i <path/to/dataset> \
     -f imagenet_txt -o <output/dir> -- --save-images
 ```
diff --git a/site/content/en/docs/formats/cifar.md b/site/content/en/docs/formats/cifar.md
index b21177f016..74aa613ac2 100644
--- a/site/content/en/docs/formats/cifar.md
+++ b/site/content/en/docs/formats/cifar.md
@@ -102,7 +102,7 @@ CIFAR-100:
     'coarse_labels': list of integers
 ```
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 ## Export to other formats
 
@@ -117,7 +117,9 @@ formats using CLI:
 datum create
 datum import -f cifar <path/to/cifar>
 datum export -f imagenet -o <output/dir>
-# or
+```
+or
+``` bash
 datum convert -if cifar -i <path/to/dataset> \
     -f imagenet -o <output/dir> -- --save-images
 ```
@@ -139,7 +141,8 @@ There are several ways to convert a dataset to CIFAR format:
 # export dataset into CIFAR format from existing project
 datum export -p <path/to/project> -f cifar -o <output/dir> \
     -- --save-images
-
+```
+``` bash
 # converting to CIFAR format from other format
 datum convert -if imagenet -i <path/to/dataset> \
     -f cifar -o <output/dir> -- --save-images
diff --git a/site/content/en/docs/formats/cityscapes.md b/site/content/en/docs/formats/cityscapes.md
index e110dac60c..274a21b9e7 100644
--- a/site/content/en/docs/formats/cityscapes.md
+++ b/site/content/en/docs/formats/cityscapes.md
@@ -68,7 +68,7 @@ Annotated files description:
   is the instance ID. If a certain annotation describes multiple instances,
   then the pixels have the regular ID of that class
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file)
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file)
 and `label_colors.txt`.
 If the `dataset_meta.json` is not represented in the dataset, then
 `label_colors.txt` will be imported if possible.
@@ -97,7 +97,9 @@ formats using CLI:
 datum create
 datum import -f cityscapes <path/to/cityscapes>
 datum export -f voc -o <output/dir>
-# or
+```
+or
+``` bash
 datum convert -if cityscapes -i <path/to/cityscapes> \
     -f voc -o <output/dir> -- --save-images
 ```
@@ -119,6 +121,8 @@ There are several ways to convert a dataset to Cityscapes format:
 # export dataset into Cityscapes format from existing project
 datum export -p <path/to/project> -f cityscapes -o <output/dir> \
     -- --save-images
+```
+``` bash
 # converting to Cityscapes format from other format
 datum convert -if voc -i <path/to/dataset> \
     -f cityscapes -o <output/dir> -- --save-images
@@ -139,8 +143,9 @@ Extra options for exporting to Cityscapes format:
 # 255 0 0 person
 #...
 datum export -f cityscapes -- --label-map mycolormap.txt
-
-# or you can use original cityscapes colomap:
+```
+or you can use original cityscapes colomap:
+``` bash
 datum export -f cityscapes -- --label-map cityscapes
 ```
 
diff --git a/site/content/en/docs/formats/coco.md b/site/content/en/docs/formats/coco.md
index 99a14bde35..04fc788ad4 100644
--- a/site/content/en/docs/formats/coco.md
+++ b/site/content/en/docs/formats/coco.md
@@ -132,7 +132,7 @@ task-specific formats instead of plain `coco`: `coco_captions`,
 `coco_person_keypoints`, `coco_stuff`. In this case all items of the
 dataset will be added to the `default` subset.
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 You can import a dataset for one or several tasks
 instead of the whole dataset. This option also allows to import annotation
@@ -164,7 +164,9 @@ using CLI:
 datum create
 datum import -f coco <path/to/coco>
 datum export -f voc -o <output/dir>
-# or
+```
+or
+``` bash
 datum convert -if coco -i <path/to/coco> -f voc -o <output/dir>
 ```
 
@@ -185,6 +187,8 @@ There are several ways to convert a dataset to COCO format:
 # export dataset into COCO format from existing project
 datum export -p <path/to/project> -f coco -o <output/dir> \
     -- --save-images
+```
+``` bash
 # converting to COCO format from other format
 datum convert -if voc -i <path/to/dataset> \
     -f coco -o <output/dir> -- --save-images
diff --git a/site/content/en/docs/formats/icdar.md b/site/content/en/docs/formats/icdar.md
index ba2ed7e5f5..2be4fb4183 100644
--- a/site/content/en/docs/formats/icdar.md
+++ b/site/content/en/docs/formats/icdar.md
@@ -42,7 +42,7 @@ Supported attributes:
 
 There is few ways to import ICDAR dataset with Datumaro:
 - Through the Datumaro project
-```
+``` bash
 datum create
 datum import -f icdar_text_localization <text_localization_dataset>
 datum import -f icdar_text_segmentation <text_segmentation_dataset>
@@ -107,11 +107,12 @@ See more information about adding datasets to the project in the
 ## Export to other formats
 Datumaro can convert ICDAR dataset into any other format
 [Datumaro supports](/docs/user-manual/supported_formats/). Examples:
-```
+``` bash
 # converting ICDAR text segmentation dataset into the VOC with `convert` command
 datum convert -if icdar_text_segmentation -i source_dataset \
     -f voc -o export_dir -- --save-images
-
+```
+``` bash
 # converting ICDAR text localization into the LabelMe through Datumaro project
 datum create
 datum import -f icdar_text_localization source_dataset
diff --git a/site/content/en/docs/formats/kitti.md b/site/content/en/docs/formats/kitti.md
index fde450c3d7..23227326a0 100644
--- a/site/content/en/docs/formats/kitti.md
+++ b/site/content/en/docs/formats/kitti.md
@@ -15,7 +15,7 @@ Supported tasks / formats:
   The format specification is available in `README.md` [here](https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_object.zip).
 - [Segmentation](http://www.cvlibs.net/datasets/kitti/eval_semseg.php?benchmark=semantics2015) - `kitti_segmentation`
   The format specification is available in `README.md` [here](https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_semantics.zip).
-- Raw 3D / Velodyne Points - described [here](/formats/kitti_raw)
+- Raw 3D / Velodyne Points - described [here](/docs/formats/kitti_raw)
 
 Supported annotation types:
 - `Bbox` (object detection)
@@ -100,7 +100,7 @@ KITTI segmentation dataset directory should have the following structure:
             └── ...
 ```
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file)
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file)
 and `label_colors.txt`.
 If the `dataset_meta.json` is not represented in the dataset, then
 `label_colors.txt` will be imported if possible.
@@ -131,7 +131,9 @@ There are several ways to convert a KITTI dataset to other dataset formats:
 datum create
 datum import -f kitti <path/to/kitti>
 datum export -f cityscapes -o <output/dir>
-# or
+```
+or
+``` bash
 datum convert -if kitti -i <path/to/kitti> -f cityscapes -o <output/dir>
 ```
 
@@ -152,6 +154,8 @@ There are several ways to convert a dataset to KITTI format:
 # export dataset into KITTI format from existing project
 datum export -p <path/to/project> -f kitti -o <output/dir> \
     -- --save-images
+```
+``` bash
 # converting to KITTI format from other format
 datum convert -if cityscapes -i <path/to/dataset> \
     -f kitti -o <output/dir> -- --save-images
@@ -175,7 +179,9 @@ Extra options for exporting to KITTI format:
 #...
 datum export -f kitti -- --label-map mycolormap.txt
 
-# or you can use original kitti colomap:
+```
+or you can use original kitti colomap:
+``` bash
 datum export -f kitti -- --label-map kitti
 ```
 - `--tasks TASKS` allow to specify tasks for export dataset,
diff --git a/site/content/en/docs/formats/kitti_raw.md b/site/content/en/docs/formats/kitti_raw.md
index 7b727b9561..1a49527a9b 100644
--- a/site/content/en/docs/formats/kitti_raw.md
+++ b/site/content/en/docs/formats/kitti_raw.md
@@ -71,7 +71,7 @@ provides an option to use a special index file to allow this.
 ...
 ```
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 A Datumaro project with a KITTI source can be created in the following way:
 
@@ -100,7 +100,9 @@ There are several ways to convert a KITTI Raw dataset to other dataset formats:
 datum create
 datum import -f kitti_raw <path/to/kitti_raw>
 datum export -f sly_pointcloud -o <output/dir>
-# or
+```
+or
+``` bash
 datum convert -if kitti_raw -i <path/to/kitti_raw> -f sly_pointcloud
 ```
 
@@ -121,6 +123,8 @@ There are several ways to convert a dataset to KITTI Raw format:
 # export dataset into KITTI Raw format from existing project
 datum export -p <path/to/project> -f kitti_raw -o <output/dir> \
     -- --save-images
+```
+``` bash
 # converting to KITTI Raw format from other format
 datum convert -if sly_pointcloud -i <path/to/dataset> \
     -f kitti_raw -o <output/dir> -- --save-images --reindex
diff --git a/site/content/en/docs/formats/mapillary_vistas.md b/site/content/en/docs/formats/mapillary_vistas.md
index 2a1e7d448a..de637c9035 100644
--- a/site/content/en/docs/formats/mapillary_vistas.md
+++ b/site/content/en/docs/formats/mapillary_vistas.md
@@ -40,7 +40,9 @@ Use one of subformats (`mapillary_vistas_instances`, `mapillary_vistas_panoptic`
 if your dataset contains both panoptic and instance masks:
 ```bash
 datum add -f mapillary_vistas_instances ./dataset
-# or
+```
+or
+``` bash
 datum add -f mapillary_vistas_panoptic ./dataset
 ```
 
@@ -163,7 +165,7 @@ dataset
   {{< /tab >}}
 {{< /tabpane >}}
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 See examples of annotation files in
-[test assets](https://github.com/openvinotoolkit/datumaro/blob/develop/tests/assets/mappilary_vistas_dataset).
+[test assets](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/mapillary_vistas_dataset).
diff --git a/site/content/en/docs/formats/market1501.md b/site/content/en/docs/formats/market1501.md
index f1c3e14c37..ce720cd6eb 100644
--- a/site/content/en/docs/formats/market1501.md
+++ b/site/content/en/docs/formats/market1501.md
@@ -40,7 +40,7 @@ These item attributes decodes into the image name with such convention:
 ## Import Market-1501 dataset
 
 Importing of Market-1501 dataset into the Datumaro project:
-```
+```bash
 datum create
 datum import -f market1501 <path_to_market1501>
 ```
@@ -81,11 +81,12 @@ market1501_dataset/
 With Datumaro you can export dataset, that has `person_id` item attribute,
 to the Market-1501 format, example:
 
-```
+```bash
 # Converting MARS dataset into the Market-1501
 datum convert -if mars -i ./mars_dataset \
     -f market1501 -o ./output_dir
-
+```
+``` bash
 # Export dataaset to the Market-1501 format through the Datumaro project:
 datum create
 datum add -f mars ../mars
diff --git a/site/content/en/docs/formats/mnist.md b/site/content/en/docs/formats/mnist.md
index 26628dfa21..ca3ac0f551 100644
--- a/site/content/en/docs/formats/mnist.md
+++ b/site/content/en/docs/formats/mnist.md
@@ -87,7 +87,7 @@ MNIST in CSV dataset directory should have the following structure:
     └── mnist_train.csv
 ```
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file)
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file)
 and `labels.txt`.
 If the `dataset_meta.json` is not represented in the dataset, then
 `labels.txt` will be imported if possible.
@@ -121,7 +121,9 @@ There are several ways to convert a MNIST dataset to other dataset formats:
 datum create
 datum import -f mnist <path/to/mnist>
 datum export -f imagenet -o <output/dir>
-# or
+```
+or
+``` bash
 datum convert -if mnist -i <path/to/mnist> -f imagenet -o <output/dir>
 ```
 
@@ -145,6 +147,8 @@ There are several ways to convert a dataset to MNIST format:
 # export dataset into MNIST format from existing project
 datum export -p <path/to/project> -f mnist -o <output/dir> \
     -- --save-images
+```
+``` bash
 # converting to MNIST format from other format
 datum convert -if imagenet -i <path/to/dataset> \
     -f mnist -o <output/dir> -- --save-images
diff --git a/site/content/en/docs/formats/mpii.md b/site/content/en/docs/formats/mpii.md
index 6cee546930..f9408be680 100644
--- a/site/content/en/docs/formats/mpii.md
+++ b/site/content/en/docs/formats/mpii.md
@@ -63,7 +63,9 @@ to other dataset formats using CLI:
 datum create
 datum import -f mpii <path/to/dataset>
 datum export -f voc -o ./save_dir -- --save-images
-# or
+```
+or
+``` bash
 datum convert -if mpii -i <path/to/dataset> \
     -f voc -o <output/dir> -- --save-images
 ```
diff --git a/site/content/en/docs/formats/mpii_json.md b/site/content/en/docs/formats/mpii_json.md
index 1eed4801b6..c8ca3f19f9 100644
--- a/site/content/en/docs/formats/mpii_json.md
+++ b/site/content/en/docs/formats/mpii_json.md
@@ -66,7 +66,9 @@ to other dataset formats using CLI:
 datum create
 datum import -f mpii_json <path/to/dataset>
 datum export -f voc -o ./save_dir -- --save-images
-# or
+```
+or
+``` bash
 datum convert -if mpii_json -i <path/to/dataset> \
     -f voc -o <output/dir> -- --save-images
 ```
diff --git a/site/content/en/docs/formats/open_images.md b/site/content/en/docs/formats/open_images.md
index dbca5ef610..0cf5a20b35 100644
--- a/site/content/en/docs/formats/open_images.md
+++ b/site/content/en/docs/formats/open_images.md
@@ -192,7 +192,7 @@ The mask images must be extracted from the ZIP archives linked above.
 To use per-subset image description files instead of `image_ids_and_rotation.csv`,
 place them in the `annotations` subdirectory.
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 ### Creating an image metadata file
 
@@ -209,7 +209,7 @@ and record it in an image metadata file.
 This file must be placed at `annotations/images.meta`,
 and must contain one line per image, with the following structure:
 
-```
+``` bash
 <ID> <height> <width>
 ```
 
@@ -241,7 +241,9 @@ There are several ways to convert OID to other dataset formats:
 datum create
 datum import -f open_images <path/to/open_images>
 datum export -f cvat -o <output/dir>
-# or
+```
+or
+``` bash
 datum convert -if open_images -i <path/to/open_images> -f cvat -o <output/dir>
 ```
 
@@ -262,7 +264,8 @@ There are several ways to convert an existing dataset to the Open Images format:
 # export dataset into Open Images format from existing project
 datum export -p <path/to/project> -f open_images -o <output/dir> \
   -- --save_images
-
+```
+``` bash
 # convert a dataset in another format to the Open Images format
 datum convert -if imagenet -i <path/to/dataset> \
     -f open_images -o <output/dir> \
diff --git a/site/content/en/docs/formats/pascal_voc.md b/site/content/en/docs/formats/pascal_voc.md
index ea4e6eab05..704b9ca31d 100644
--- a/site/content/en/docs/formats/pascal_voc.md
+++ b/site/content/en/docs/formats/pascal_voc.md
@@ -108,7 +108,7 @@ These directories contain `.txt` files with a list of images in a subset,
 the subset name is the same as the `.txt` file name. Subset names can be
 arbitrary.
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file)
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file)
 and `labelmap.txt`.
 If the `dataset_meta.json` is not represented in the dataset, then
 `labelmap.txt` will be imported if possible.
@@ -116,7 +116,7 @@ If the `dataset_meta.json` is not represented in the dataset, then
 In `labelmap.txt` you can define custom color map and non-pascal labels,
 for example:
 
-```
+``` txt
 # label_map [label : color_rgb : parts : actions]
 helicopter:::
 elephant:0:124:134:head,ear,foot:
@@ -129,7 +129,7 @@ have arbitrary, but different, colors. If there are gaps in the used
 color indices in the annotations, they must be filled with arbitrary
 dummy labels. Example:
 
-```
+``` txt
 car:0,128,0:: # color index 0
 aeroplane:10,10,128:: # color index 1
 _dummy2:2,2,2:: # filler for color index 2
@@ -168,9 +168,10 @@ There are several ways to convert a Pascal VOC dataset to other dataset formats:
 datum create
 datum import -f voc <path/to/voc>
 datum export -f coco -o <output/dir>
-# or
+```
+or
+``` bash
 datum convert -if voc -i <path/to/voc> -f coco -o <output/dir>
-
 ```
 
 Or, using Python API:
@@ -189,7 +190,8 @@ There are several ways to convert an existing dataset to Pascal VOC format:
 ``` bash
 # export dataset into Pascal VOC format (classification) from existing project
 datum export -p <path/to/project> -f voc -o <output/dir> -- --tasks classification
-
+```
+``` bash
 # converting to Pascal VOC format from other format
 datum convert -if imagenet -i <path/to/dataset> \
     -f voc -o <output/dir> \
@@ -223,8 +225,9 @@ datum export -f voc -- --tasks detection,classification
 # cat:0,0,255::
 # person:255,0,0:head:
 datum export -f voc_segmentation -- --label-map mycolormap.txt
-
-# or you can use original voc colomap:
+```
+or you can use original voc colomap:
+``` bash
 datum export -f voc_segmentation -- --label-map voc
 ```
 
diff --git a/site/content/en/docs/formats/sly_pointcloud.md b/site/content/en/docs/formats/sly_pointcloud.md
index ed7cd25140..472daa8b16 100644
--- a/site/content/en/docs/formats/sly_pointcloud.md
+++ b/site/content/en/docs/formats/sly_pointcloud.md
@@ -67,7 +67,9 @@ There are two ways to import a Supervisely Point Cloud dataset:
 ```bash
 datum create
 datum import --format sly_pointcloud --input-path <path/to/dataset>
-# or
+```
+or
+``` bash
 datum create
 datum import -f sly_pointcloud <path/to/dataset>
 ```
@@ -93,7 +95,9 @@ to other dataset formats:
 datum create
 datum import -f sly_pointcloud <path/to/sly_pcd/>
 datum export -f kitti_raw -o <output/dir>
-# or
+```
+or
+``` bash
 datum convert -if sly_pointcloud -i <path/to/sly_pcd/> -f kitti_raw
 ```
 
@@ -114,6 +118,8 @@ There are several ways to convert a dataset to Supervisely Point Cloud format:
 # export dataset into Supervisely Point Cloud format from existing project
 datum export -p <path/to/project> -f sly_pointcloud -o <output/dir> \
     -- --save-images
+```
+``` bash
 # converting to Supervisely Point Cloud format from other format
 datum convert -if kitti_raw -i <path/to/dataset> \
     -f sly_pointcloud -o <output/dir> -- --save-images
diff --git a/site/content/en/docs/formats/synthia.md b/site/content/en/docs/formats/synthia.md
index 6397cd67cf..272eaffeca 100644
--- a/site/content/en/docs/formats/synthia.md
+++ b/site/content/en/docs/formats/synthia.md
@@ -98,7 +98,7 @@ If it is missing, `GT/COLOR` folder will be used.
 The original dataset also contains depth information, but Datumaro
 does not currently support it.
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 ## Export to other formats
 
@@ -113,7 +113,9 @@ formats using CLI:
 datum create
 datum import -f synthia <path/to/dataset>
 datum export -f voc -o <output/dir> -- --save-images
-# or
+```
+or
+``` bash
 datum convert -if synthia -i <path/to/dataset> \
     -f voc -o <output/dir> -- --save-images
 ```
diff --git a/site/content/en/docs/formats/vott_csv.md b/site/content/en/docs/formats/vott_csv.md
index 05743b6815..294eb3f73a 100644
--- a/site/content/en/docs/formats/vott_csv.md
+++ b/site/content/en/docs/formats/vott_csv.md
@@ -48,7 +48,7 @@ dataset/
 └── ...
 ```
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 ## Export to other formats
 
@@ -63,7 +63,9 @@ formats using CLI:
 datum create
 datum import -f vott_csv <path/to/dataset>
 datum export -f voc -o ./save_dir -- --save-images
-# or
+```
+or
+``` bash
 datum convert -if vott_csv -i <path/to/dataset> \
     -f voc -o <output/dir> -- --save-images
 ```
diff --git a/site/content/en/docs/formats/vott_json.md b/site/content/en/docs/formats/vott_json.md
index 46f5d17480..5d8d08d4f8 100644
--- a/site/content/en/docs/formats/vott_json.md
+++ b/site/content/en/docs/formats/vott_json.md
@@ -48,7 +48,7 @@ dataset/
 └── ...
 ```
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 ## Export to other formats
 
@@ -63,7 +63,9 @@ formats using CLI:
 datum create
 datum import -f vott_json <path/to/dataset>
 datum export -f voc -o ./save_dir -- --save-images
-# or
+```
+or
+``` bash
 datum convert -if vott_json -i <path/to/dataset> \
     -f voc -o <output/dir> -- --save-images
 ```
diff --git a/site/content/en/docs/formats/yolo.md b/site/content/en/docs/formats/yolo.md
index 33e50ea2f3..9a17d09562 100644
--- a/site/content/en/docs/formats/yolo.md
+++ b/site/content/en/docs/formats/yolo.md
@@ -64,7 +64,7 @@ YOLO dataset directory should have the following structure:
 
 - `obj.data` should have the following content, it is not necessary to have both
   subsets, but necessary to have one of them:
-```
+``` txt
 classes = 5 # optional
 names = <path/to/obj.names>
 train = <path/to/train.txt>
@@ -73,14 +73,14 @@ backup = backup/ # optional
 ```
 - `obj.names` contains a list of classes.
 The line number for the class is the same as its index:
-```
+``` txt
 label1  # label1 has index 0
 label2  # label2 has index 1
 label3  # label2 has index 2
 ...
 ```
 - Files `train.txt` and `valid.txt` should have the following structure:
-```
+``` txt
 <path/to/image1.jpg>
 <path/to/image2.jpg>
 ...
@@ -88,7 +88,7 @@ label3  # label2 has index 2
 - Files in directories `obj_train_data/` and `obj_valid_data/`
 should contain information about labeled bounding boxes
 for images:
-```
+``` txt
 # image1.txt:
 # <label_index> <x_center> <y_center> <width> <height>
 0 0.250000 0.400000 0.300000 0.400000
@@ -98,7 +98,7 @@ Here `x_center`, `y_center`, `width`, and `height` are relative to the image's
 width and height. The `x_center` and `y_center` are center of rectangle
 (are not top-left corner).
 
-To add custom classes, you can use [`dataset_meta.json`](/docs/user_manual/supported_formats/#dataset-meta-file).
+To add custom classes, you can use [`dataset_meta.json`](/docs/user-manual/supported_formats/#dataset-meta-file).
 
 ## Export to other formats
 
@@ -113,7 +113,9 @@ There are several ways to convert a YOLO dataset to other dataset formats:
 datum create
 datum add -f yolo <path/to/yolo/>
 datum export -f voc -o <output/dir>
-# or
+```
+or
+``` bash
 datum convert -if yolo -i <path/to/dataset> \
               -f coco_instances -o <path/to/dataset>
 ```
@@ -134,7 +136,7 @@ if the dataset supports object detection task.
 
 Example:
 
-```
+```bash
 datum create
 datum import -f coco_instances <path/to/dataset>
 datum export -f yolo -o <path/to/dataset> -- --save-images
diff --git a/site/content/en/docs/user-manual/command-reference/stats.md b/site/content/en/docs/user-manual/command-reference/stats.md
index c2eb54612a..aab11eea6b 100644
--- a/site/content/en/docs/user-manual/command-reference/stats.md
+++ b/site/content/en/docs/user-manual/command-reference/stats.md
@@ -35,7 +35,7 @@ Sample output:
 
 <details>
 
-```
+``` json
 {
     "annotations": {
         "labels": {
diff --git a/site/content/en/docs/user-manual/command-reference/transform.md b/site/content/en/docs/user-manual/command-reference/transform.md
index 727f397d3d..7da52497dd 100644
--- a/site/content/en/docs/user-manual/command-reference/transform.md
+++ b/site/content/en/docs/user-manual/command-reference/transform.md
@@ -211,5 +211,6 @@ datum transform -t ndr -- \
 - Resize dataset images and annotations. Supports upscaling, downscaling
 and mixed variants.
 
-```
+```bash
 datum transform -t resize -- -dw 256 -dh 256
+```
diff --git a/site/content/en/docs/user-manual/extending.md b/site/content/en/docs/user-manual/extending.md
index e491312692..4328006e2f 100644
--- a/site/content/en/docs/user-manual/extending.md
+++ b/site/content/en/docs/user-manual/extending.md
@@ -28,11 +28,17 @@ The plugin depends on TensorFlow, which can be installed with `pip`:
 
 ``` bash
 pip install tensorflow
-# or
+```
+or
+``` bash
 pip install tensorflow-gpu
-# or
+```
+or
+``` bash
 pip install datumaro[tf]
-# or
+```
+or
+``` bash
 pip install datumaro[tf-gpu]
 ```
 
diff --git a/site/content/en/docs/user-manual/installation.md b/site/content/en/docs/user-manual/installation.md
index 644bb228fc..e6496ec2d8 100644
--- a/site/content/en/docs/user-manual/installation.md
+++ b/site/content/en/docs/user-manual/installation.md
@@ -24,7 +24,8 @@ Install:
 ``` bash
 # From PyPI:
 pip install datumaro[default]
-
+```
+``` bash
 # From the GitHub repository:
 pip install 'git+https://github.com/openvinotoolkit/datumaro[default]'
 ```
diff --git a/site/content/en/docs/user-manual/supported_formats.md b/site/content/en/docs/user-manual/supported_formats.md
index 0de49a0107..ae16f0a687 100644
--- a/site/content/en/docs/user-manual/supported_formats.md
+++ b/site/content/en/docs/user-manual/supported_formats.md
@@ -160,7 +160,7 @@ in a specific format, only relevant annotations are exported.
 It is possible to use classes that are not original to the format.
 To do this, use `dataset_meta.json`.
 
-```
+```json
 {
 "label_map": {"0": "background", "1": "car", "2": "person"},
 "segmentation_colors": [[0, 0, 0], [255, 0, 0], [0, 0, 255]],

From 86cf76387e976e7e3e1b33ced65b07f8a31aad8d Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <maxim.zhiltsov@intel.com>
Date: Tue, 11 Jan 2022 15:29:02 +0300
Subject: [PATCH 10/15] Mark python 3.6 unsupported (#617)

* Mark python 3.6 unsupported in setup.py

* Update changelog

* Drop py3.6 from CI

* Update docs

* Update python code
---
 .github/workflows/health_check.yml               | 4 ++--
 .github/workflows/pr_checks.yml                  | 2 +-
 CHANGELOG.md                                     | 3 ++-
 datumaro/util/attrs_util.py                      | 8 +-------
 setup.py                                         | 2 +-
 site/content/en/docs/contributing.md             | 2 +-
 site/content/en/docs/getting_started.md          | 2 +-
 site/content/en/docs/user-manual/installation.md | 2 +-
 8 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/health_check.yml b/.github/workflows/health_check.yml
index d40a6138d5..1090b96ae9 100644
--- a/.github/workflows/health_check.yml
+++ b/.github/workflows/health_check.yml
@@ -8,7 +8,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.6', '3.7', '3.8', '3.9']
+        python-version: ['3.7', '3.8', '3.9']
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
@@ -24,6 +24,6 @@ jobs:
           pytest -v --cov --cov-report xml:coverage.xml
           datum -h
       - name: Sending coverage results
-        if: matrix.python-version == '3.6'
+        if: matrix.python-version == '3.7'
         run: |
           bash <(curl -Ls https://coverage.codacy.com/get.sh) report -r coverage.xml -t ${{ secrets.CODACY_PROJECT_TOKEN }}
diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml
index affa016a39..8879a4c4c9 100644
--- a/.github/workflows/pr_checks.yml
+++ b/.github/workflows/pr_checks.yml
@@ -18,7 +18,7 @@ jobs:
       fail-fast: false
       matrix:
         os: ['macos-10.15', 'ubuntu-20.04', 'windows-2016']
-        python-version: ['3.6', '3.7', '3.8', '3.9']
+        python-version: ['3.7', '3.8', '3.9']
     name: build and test (${{ matrix.os }}, Python ${{ matrix.python-version }})
     runs-on: ${{ matrix.os }}
     steps:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ee5672477d..ec1538e630 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,7 +28,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - TBD
 
 ### Removed
-- TBD
+- Official support of Python 3.6 (due to it's EOL)
+  (<https://github.com/openvinotoolkit/datumaro/pull/617>)
 
 ### Fixed
 - Fails in multimerge when lines are not approximated and when there are no
diff --git a/datumaro/util/attrs_util.py b/datumaro/util/attrs_util.py
index fe40bbfc00..1b7b41e472 100644
--- a/datumaro/util/attrs_util.py
+++ b/datumaro/util/attrs_util.py
@@ -22,13 +22,7 @@ def validator(inst, attribute, value):
                 value = default
         else:
             dst_type = None
-            if attribute.type and inspect.isclass(attribute.type) and \
-                    not hasattr(attribute.type, '__origin__'):
-                #       ^^^^^^^
-                # Disallow Generics in python 3.6
-                # Can be dropped with 3.6 support. Generics canot be used
-                # in isinstance() checks.
-
+            if attribute.type and inspect.isclass(attribute.type):
                 dst_type = attribute.type
             elif conv and inspect.isclass(conv):
                 dst_type = conv
diff --git a/setup.py b/setup.py
index d82a827c99..3000534844 100644
--- a/setup.py
+++ b/setup.py
@@ -67,7 +67,7 @@ def parse_requirements(filename=CORE_REQUIREMENTS_FILE):
         "License :: OSI Approved :: MIT License",
         "Operating System :: OS Independent",
     ],
-    python_requires='>=3.6',
+    python_requires='>=3.7',
     install_requires=CORE_REQUIREMENTS,
     extras_require={
         'tf': ['tensorflow'],
diff --git a/site/content/en/docs/contributing.md b/site/content/en/docs/contributing.md
index 0e5e8070ca..e9e905be60 100644
--- a/site/content/en/docs/contributing.md
+++ b/site/content/en/docs/contributing.md
@@ -14,7 +14,7 @@ weight: 50
 
 ### Prerequisites
 
-- Python (3.6+)
+- Python (3.7+)
 
 ``` bash
 git clone https://github.com/openvinotoolkit/datumaro
diff --git a/site/content/en/docs/getting_started.md b/site/content/en/docs/getting_started.md
index fdb98da7c9..ceb7094635 100644
--- a/site/content/en/docs/getting_started.md
+++ b/site/content/en/docs/getting_started.md
@@ -12,7 +12,7 @@ To read about the design concept and features of Datumaro, go to the [design sec
 
 ### Dependencies
 
-- Python (3.6+)
+- Python (3.7+)
 - Optional: OpenVINO, TensorFlow, PyTorch, MxNet, Caffe, Accuracy Checker
 
 Optionally, create a virtual environment:
diff --git a/site/content/en/docs/user-manual/installation.md b/site/content/en/docs/user-manual/installation.md
index e6496ec2d8..ce8b811877 100644
--- a/site/content/en/docs/user-manual/installation.md
+++ b/site/content/en/docs/user-manual/installation.md
@@ -7,7 +7,7 @@ weight: 1
 
 ### Dependencies
 
-- Python (3.6+)
+- Python (3.7+)
 - Optional: OpenVINO, TensorFlow, PyTorch, MxNet, Caffe, Accuracy Checker
 
 ### Installation steps

From 260ad5b2f7dbb20c73a62c78de7740d59e7db0e3 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@intel.com>
Date: Tue, 11 Jan 2022 16:15:37 +0300
Subject: [PATCH 11/15] Upgrade code to use Python 3.7 features (#619)

* Make use of postponed evaluation of annotations

Now that Python 3.7 is the minimal supported version, we don't need to
manually quote types anymore.

* Import NoReturn from typing rather than typing_extensions

Now that the minimum version of Python is 3.7, it's guaranteed to be there.
---
 datumaro/components/annotation.py             | 12 ++++----
 datumaro/components/dataset.py                | 30 ++++++++++---------
 datumaro/components/extractor.py              |  8 +++--
 datumaro/components/extractor_tfds.py         | 14 +++++----
 datumaro/components/format_detection.py       |  7 ++---
 datumaro/components/media.py                  | 12 ++++----
 datumaro/components/project.py                | 26 ++++++++--------
 .../sly_pointcloud_format/converter.py        |  6 ++--
 datumaro/util/scope.py                        |  8 +++--
 9 files changed, 69 insertions(+), 54 deletions(-)

diff --git a/datumaro/components/annotation.py b/datumaro/components/annotation.py
index 8fb74ec43f..6ef26f73aa 100644
--- a/datumaro/components/annotation.py
+++ b/datumaro/components/annotation.py
@@ -1,7 +1,9 @@
-# Copyright (C) 2021 Intel Corporation
+# Copyright (C) 2021-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
+from __future__ import annotations
+
 from enum import Enum, auto
 from itertools import zip_longest
 from typing import (
@@ -101,7 +103,7 @@ def from_iterable(cls, iterable: Iterable[Union[
         Tuple[str],
         Tuple[str, str],
         Tuple[str, str, List[str]],
-    ]]) -> 'LabelCategories':
+    ]]) -> LabelCategories:
         """
         Creates a LabelCategories from iterable.
 
@@ -180,7 +182,7 @@ class MaskCategories(Categories):
 
     @classmethod
     def generate(cls, size: int = 255, include_background: bool = True) \
-            -> 'MaskCategories':
+            -> MaskCategories:
         """
         Generates MaskCategories with the specified size.
 
@@ -336,7 +338,7 @@ class CompiledMask:
     @staticmethod
     def from_instance_masks(instance_masks: Iterable[Mask],
             instance_ids: Optional[Iterable[int]] = None,
-            instance_labels: Optional[Iterable[int]] = None) -> 'CompiledMask':
+            instance_labels: Optional[Iterable[int]] = None) -> CompiledMask:
         """
         Joins instance masks into a single mask. Masks are sorted by
         z_order (ascending) prior to merging.
@@ -655,7 +657,7 @@ class Category:
     def from_iterable(cls, iterable: Union[
         Tuple[int, List[str]],
         Tuple[int, List[str], Set[Tuple[int, int]]],
-    ]) -> 'PointsCategories':
+    ]) -> PointsCategories:
         """
         Create PointsCategories from an iterable.
 
diff --git a/datumaro/components/dataset.py b/datumaro/components/dataset.py
index 981b270518..a9c00d73a1 100644
--- a/datumaro/components/dataset.py
+++ b/datumaro/components/dataset.py
@@ -1,7 +1,9 @@
-# Copyright (C) 2020-2021 Intel Corporation
+# Copyright (C) 2020-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
+from __future__ import annotations
+
 from contextlib import contextmanager
 from copy import copy
 from enum import Enum, auto
@@ -105,7 +107,7 @@ def __copy__(self):
 
 class DatasetItemStorageDatasetView(IDataset):
     class Subset(IDataset):
-        def __init__(self, parent: 'DatasetItemStorageDatasetView', name: str):
+        def __init__(self, parent: DatasetItemStorageDatasetView, name: str):
             super().__init__()
             self.parent = parent
             self.name = name
@@ -182,7 +184,7 @@ class DatasetPatch:
     class DatasetPatchWrapper(DatasetItemStorageDatasetView):
         # The purpose of this class is to indicate that the input dataset is
         # a patch and autofill patch info in Converter
-        def __init__(self, patch: 'DatasetPatch', parent: IDataset):
+        def __init__(self, patch: DatasetPatch, parent: IDataset):
             super().__init__(patch.data, parent.categories())
             self.patch = patch
 
@@ -212,7 +214,7 @@ def as_dataset(self, parent: IDataset) -> IDataset:
         return __class__.DatasetPatchWrapper(self, parent)
 
 class DatasetSubset(IDataset): # non-owning view
-    def __init__(self, parent: 'Dataset', name: str):
+    def __init__(self, parent: Dataset, name: str):
         super().__init__()
         self.parent = parent
         self.name = name
@@ -249,7 +251,7 @@ def subsets(self):
     def categories(self):
         return self.parent.categories()
 
-    def as_dataset(self) -> 'Dataset':
+    def as_dataset(self) -> Dataset:
         return Dataset.from_extractors(self, env=self.parent.env)
 
 
@@ -608,7 +610,7 @@ class Dataset(IDataset):
     @classmethod
     def from_iterable(cls, iterable: Iterable[DatasetItem],
             categories: Union[CategoriesInfo, List[str], None] = None,
-            env: Optional[Environment] = None) -> 'Dataset':
+            env: Optional[Environment] = None) -> Dataset:
         if isinstance(categories, list):
             categories = { AnnotationType.label:
                 LabelCategories.from_iterable(categories)
@@ -632,7 +634,7 @@ def categories(self):
 
     @staticmethod
     def from_extractors(*sources: IDataset,
-            env: Optional[Environment] = None) -> 'Dataset':
+            env: Optional[Environment] = None) -> Dataset:
         if len(sources) == 1:
             source = sources[0]
         else:
@@ -709,7 +711,7 @@ def remove(self, id: str, subset: Optional[str] = None) -> None:
         self._data.remove(id, subset)
 
     def filter(self, expr: str, filter_annotations: bool = False,
-            remove_empty: bool = False) -> 'Dataset':
+            remove_empty: bool = False) -> Dataset:
         if filter_annotations:
             return self.transform(XPathAnnotationsFilter, expr, remove_empty)
         else:
@@ -717,7 +719,7 @@ def filter(self, expr: str, filter_annotations: bool = False,
 
     def update(self,
             source: Union[DatasetPatch, IExtractor, Iterable[DatasetItem]]) \
-                -> 'Dataset':
+                -> Dataset:
         """
         Updates items of the current dataset from another dataset or an
         iterable (the source). Items from the source overwrite matching
@@ -734,7 +736,7 @@ def update(self,
         return self
 
     def transform(self, method: Union[str, Type[Transform]],
-            *args, **kwargs) -> 'Dataset':
+            *args, **kwargs) -> Dataset:
         """
         Applies some function to dataset items.
         """
@@ -754,7 +756,7 @@ def transform(self, method: Union[str, Type[Transform]],
 
         return self
 
-    def run_model(self, model, batch_size=1) -> 'Dataset':
+    def run_model(self, model, batch_size=1) -> Dataset:
         from datumaro.components.launcher import Launcher, ModelTransform
         if isinstance(model, Launcher):
             return self.transform(ModelTransform, launcher=model,
@@ -765,7 +767,7 @@ def run_model(self, model, batch_size=1) -> 'Dataset':
             raise TypeError("Unexpected 'model' argument type: %s" % \
                 type(model))
 
-    def select(self, pred: Callable[[DatasetItem], bool]) -> 'Dataset':
+    def select(self, pred: Callable[[DatasetItem], bool]) -> Dataset:
         class _DatasetFilter(ItemTransform):
             def transform_item(self, item):
                 if pred(item):
@@ -863,12 +865,12 @@ def save(self, save_dir: Optional[str] = None, **kwargs) -> None:
             format=self._format, **options)
 
     @classmethod
-    def load(cls, path: str, **kwargs) -> 'Dataset':
+    def load(cls, path: str, **kwargs) -> Dataset:
         return cls.import_from(path, format=DEFAULT_FORMAT, **kwargs)
 
     @classmethod
     def import_from(cls, path: str, format: Optional[str] = None,
-            env: Optional[Environment] = None, **kwargs) -> 'Dataset':
+            env: Optional[Environment] = None, **kwargs) -> Dataset:
         from datumaro.components.config_model import Source
 
         if env is None:
diff --git a/datumaro/components/extractor.py b/datumaro/components/extractor.py
index 0923eeb659..20072dc233 100644
--- a/datumaro/components/extractor.py
+++ b/datumaro/components/extractor.py
@@ -1,7 +1,9 @@
-# Copyright (C) 2019-2021 Intel Corporation
+# Copyright (C) 2019-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
+from __future__ import annotations
+
 from glob import iglob
 from typing import Any, Callable, Dict, Iterator, List, Optional
 import os
@@ -101,10 +103,10 @@ def __len__(self) -> int:
     def __bool__(self): # avoid __len__ use for truth checking
         return True
 
-    def subsets(self) -> Dict[str, 'IExtractor']:
+    def subsets(self) -> Dict[str, IExtractor]:
         raise NotImplementedError()
 
-    def get_subset(self, name) -> 'IExtractor':
+    def get_subset(self, name) -> IExtractor:
         raise NotImplementedError()
 
     def categories(self) -> CategoriesInfo:
diff --git a/datumaro/components/extractor_tfds.py b/datumaro/components/extractor_tfds.py
index b2efc3fb77..cd54d3112a 100644
--- a/datumaro/components/extractor_tfds.py
+++ b/datumaro/components/extractor_tfds.py
@@ -1,7 +1,9 @@
-# Copyright (C) 2021 Intel Corporation
+# Copyright (C) 2021-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
+from __future__ import annotations
+
 from typing import (
     Any, Callable, Dict, Iterator, Mapping, Optional, Sequence, Tuple, Union,
 )
@@ -44,7 +46,7 @@ class _TfdsAdapter:
     metadata: TfdsDatasetMetadata
 
     def transform_categories(self,
-        tfds_builder: 'tfds.core.DatasetBuilder', categories: CategoriesInfo,
+        tfds_builder: tfds.core.DatasetBuilder, categories: CategoriesInfo,
     ) -> None:
         for t in self.category_transformers:
             t(tfds_builder, categories)
@@ -58,7 +60,7 @@ class _SetLabelCategoriesFromClassLabelFeature:
     feature_path: Union[str, Tuple[str, ...]]
 
     def __call__(self,
-        tfds_builder: 'tfds.core.DatasetBuilder', categories: CategoriesInfo,
+        tfds_builder: tfds.core.DatasetBuilder, categories: CategoriesInfo,
     ) -> None:
         assert AnnotationType.label not in categories
         if isinstance(self.feature_path, str):
@@ -186,9 +188,9 @@ def __call__(self, tfds_example: Any) -> str:
 }
 
 class _TfdsSplitExtractor(IExtractor):
-    def __init__(self, parent: '_TfdsExtractor',
-        tfds_split: 'tf.data.Dataset',
-        tfds_split_info: 'tfds.core.SplitInfo',
+    def __init__(self, parent: _TfdsExtractor,
+        tfds_split: tf.data.Dataset,
+        tfds_split_info: tfds.core.SplitInfo,
     ):
         self._parent = parent
         self._tfds_split = tfds_split
diff --git a/datumaro/components/format_detection.py b/datumaro/components/format_detection.py
index 1a92574482..2a735884ad 100644
--- a/datumaro/components/format_detection.py
+++ b/datumaro/components/format_detection.py
@@ -1,18 +1,17 @@
-# Copyright (C) 2021 Intel Corporation
+# Copyright (C) 2021-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
 from enum import IntEnum
 from typing import (
-    Callable, Collection, Iterator, List, Optional, Sequence, TextIO, Union,
+    Callable, Collection, Iterator, List, NoReturn, Optional, Sequence, TextIO,
+    Union,
 )
 import contextlib
 import fnmatch
 import glob
 import os.path as osp
 
-from typing_extensions import NoReturn
-
 
 class FormatDetectionConfidence(IntEnum):
     """
diff --git a/datumaro/components/media.py b/datumaro/components/media.py
index 2c72b21ec6..63d48d60a1 100644
--- a/datumaro/components/media.py
+++ b/datumaro/components/media.py
@@ -1,7 +1,9 @@
-# Copyright (C) 2021 Intel Corporation
+# Copyright (C) 2021-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
+from __future__ import annotations
+
 from typing import Callable, Iterable, Iterator, Optional, Tuple, Union
 import os
 import os.path as osp
@@ -194,7 +196,7 @@ def save(self, path):
             save_image(path, self.data)
 
 class VideoFrame(Image):
-    def __init__(self, video: 'Video', index: int):
+    def __init__(self, video: Video, index: int):
         self._video = video
         self._index = index
 
@@ -209,19 +211,19 @@ def index(self) -> int:
         return self._index
 
     @property
-    def video(self) -> 'Video':
+    def video(self) -> Video:
         return self._video
 
 class _VideoFrameIterator(Iterator[VideoFrame]):
     """
     Provides sequential access to the video frames.
     """
-    _video: 'Video'
+    _video: Video
     _iterator: Iterator[VideoFrame]
     _pos: int
     _current_frame_data: Optional[np.ndarray]
 
-    def __init__(self, video: 'Video'):
+    def __init__(self, video: Video):
         self._video = video
         self._reset()
 
diff --git a/datumaro/components/project.py b/datumaro/components/project.py
index c4448db8f0..0d61c5b761 100644
--- a/datumaro/components/project.py
+++ b/datumaro/components/project.py
@@ -1,7 +1,9 @@
-# Copyright (C) 2019-2021 Intel Corporation
+# Copyright (C) 2019-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
+from __future__ import annotations
+
 from contextlib import ExitStack, suppress
 from enum import Enum, auto
 from typing import (
@@ -48,7 +50,7 @@
 
 
 class ProjectSourceDataset(IDataset):
-    def __init__(self, path: str, tree: 'Tree', source: str,
+    def __init__(self, path: str, tree: Tree, source: str,
             readonly: bool = False):
         config = tree.sources[source]
 
@@ -191,7 +193,7 @@ def __contains__(self, name: str):
         return name in self._data
 
 class _DataSourceBase(CrudProxy[Source]):
-    def __init__(self, tree: 'Tree', config_field: str):
+    def __init__(self, tree: Tree, config_field: str):
         self._tree = tree
         self._field = config_field
 
@@ -209,7 +211,7 @@ def remove(self, name: str):
         self._data.remove(name)
 
 class ProjectSources(_DataSourceBase):
-    def __init__(self, tree: 'Tree'):
+    def __init__(self, tree: Tree):
         super().__init__(tree, 'sources')
 
     def __getitem__(self, name):
@@ -297,13 +299,13 @@ def _get_subgraph(graph, target):
         """
         return graph.subgraph(nx.ancestors(graph, target) | {target})
 
-    def get_slice(self, target) -> 'Pipeline':
+    def get_slice(self, target) -> Pipeline:
         pipeline = Pipeline()
         pipeline._graph = self._get_subgraph(self._graph, target).copy()
         return pipeline
 
 class ProjectBuilder:
-    def __init__(self, project: 'Project', tree: 'Tree'):
+    def __init__(self, project: Project, tree: Tree):
         self._project = project
         self._tree = tree
 
@@ -663,7 +665,7 @@ class ProjectBuildTargets(CrudProxy[BuildTarget]):
     MAIN_TARGET = 'project'
     BASE_STAGE = 'root'
 
-    def __init__(self, tree: 'Tree'):
+    def __init__(self, tree: Tree):
         self._tree = tree
 
     @property
@@ -1353,9 +1355,9 @@ class Tree:
     # - attached to the work dir
     # - attached to a revision
 
-    def __init__(self, project: 'Project',
+    def __init__(self, project: Project,
             config: Union[None, Dict, Config, TreeConfig] = None,
-            rev: Union[None, 'Revision'] = None):
+            rev: Union[None, Revision] = None):
         assert isinstance(project, Project)
         assert not rev or project.is_ref(rev), rev
 
@@ -1379,7 +1381,7 @@ def dump(self, path):
         os.makedirs(osp.dirname(path), exist_ok=True)
         self._config.dump(path)
 
-    def clone(self) -> 'Tree':
+    def clone(self) -> Tree:
         return Tree(self._project, TreeConfig(self.config), self._rev)
 
     @property
@@ -1399,7 +1401,7 @@ def env(self) -> Environment:
         return self._project.env
 
     @property
-    def rev(self) -> Union[None, 'Revision']:
+    def rev(self) -> Union[None, Revision]:
         return self._rev
 
     def make_pipeline(self, target: Optional[str] = None) -> Pipeline:
@@ -1622,7 +1624,7 @@ def _init_vcs(self):
 
     @classmethod
     @scoped
-    def init(cls, path) -> 'Project':
+    def init(cls, path) -> Project:
         existing_project = cls.find_project_dir(path)
         if existing_project:
             raise ProjectAlreadyExists(path)
diff --git a/datumaro/plugins/sly_pointcloud_format/converter.py b/datumaro/plugins/sly_pointcloud_format/converter.py
index c019da5509..d632a0c18f 100644
--- a/datumaro/plugins/sly_pointcloud_format/converter.py
+++ b/datumaro/plugins/sly_pointcloud_format/converter.py
@@ -1,10 +1,12 @@
-# Copyright (C) 2021 Intel Corporation
+# Copyright (C) 2021-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
 # The format is described here:
 # https://docs.supervise.ly/data-organization/00_ann_format_navi
 
+from __future__ import annotations
+
 from datetime import datetime
 import json
 import logging as log
@@ -24,7 +26,7 @@
 
 class _SuperviselyPointCloudDumper:
     def __init__(self, extractor: IExtractor,
-            context: 'SuperviselyPointCloudConverter'):
+            context: SuperviselyPointCloudConverter):
         self._extractor = extractor
         self._context = context
 
diff --git a/datumaro/util/scope.py b/datumaro/util/scope.py
index a857882ca5..69175c7ab1 100644
--- a/datumaro/util/scope.py
+++ b/datumaro/util/scope.py
@@ -1,7 +1,9 @@
-# Copyright (C) 2021 Intel Corporation
+# Copyright (C) 2021-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
+from __future__ import annotations
+
 from contextlib import ExitStack, contextmanager
 from functools import partial, wraps
 from typing import Any, Callable, ContextManager, Dict, Optional, Tuple, TypeVar
@@ -95,7 +97,7 @@ def disable(self):
     def close(self):
         self.__exit__(None, None, None)
 
-    def __enter__(self) -> 'Scope':
+    def __enter__(self) -> Scope:
         return self
 
     def __exit__(self, exc_type, exc_value, exc_traceback):
@@ -106,7 +108,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
         self._stack.pop_all() # prevent issues on repetitive calls
 
     @classmethod
-    def current(cls) -> 'Scope':
+    def current(cls) -> Scope:
         return cls._thread_locals.current
 
     @contextmanager

From 20d3d74cfbe9b5f23d53491c05c5102b8dfc7810 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <maxim.zhiltsov@intel.com>
Date: Tue, 11 Jan 2022 17:12:48 +0300
Subject: [PATCH 12/15] Fix project dataset saving when rpath is used (#613)

* Fix project dataset export when source rpath is specified

* Add test

* Update changelog
---
 CHANGELOG.md                   |  2 ++
 datumaro/components/project.py | 12 ++++++++++--
 tests/requirements.py          |  1 +
 tests/test_project.py          | 22 ++++++++++++++++++++++
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ec1538e630..92f7fb9a6b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   label categories (<https://github.com/openvinotoolkit/datumaro/pull/592>)
 - Cannot convert LabelMe dataset, that has no subsets
   (<https://github.com/openvinotoolkit/datumaro/pull/600>)
+- Saving (overwriting) a dataset in a project when rpath is used
+  (<https://github.com/openvinotoolkit/datumaro/pull/613>)
 
 ### Security
 - TBD
diff --git a/datumaro/components/project.py b/datumaro/components/project.py
index 0d61c5b761..7361349475 100644
--- a/datumaro/components/project.py
+++ b/datumaro/components/project.py
@@ -54,12 +54,20 @@ def __init__(self, path: str, tree: Tree, source: str,
             readonly: bool = False):
         config = tree.sources[source]
 
+        rpath = path
         if config.path:
-            path = osp.join(path, config.path)
+            rpath = osp.join(path, config.path)
 
-        self.__dict__['_dataset'] = Dataset.import_from(path,
+        dataset = Dataset.import_from(rpath,
             env=tree.env, format=config.format, **config.options)
 
+        # Using rpath won't allow to save directly with .save() when a file
+        # path is specified. Dataset doesn't know the root location and if
+        # it exists at all, but in a project, we do.
+        dataset.bind(path, format=dataset.format, options=dataset.options)
+
+        self.__dict__['_dataset'] = dataset
+
         self.__dict__['_config'] = config
         self.__dict__['_readonly'] = readonly
         self.__dict__['name'] = source
diff --git a/tests/requirements.py b/tests/requirements.py
index f3ad6332ad..04c95e5197 100644
--- a/tests/requirements.py
+++ b/tests/requirements.py
@@ -45,6 +45,7 @@ class Requirements:
     DATUM_BUG_470 = "Cannot to import Cityscapes dataset without images"
     DATUM_BUG_560 = "Reading MOT dataset with seqinfo produces 0-based indexing in frames"
     DATUM_BUG_583 = "Empty lines in VOC subset lists are not ignored"
+    DATUM_BUG_602 = "Patch command example error"
 
 
 class SkipMessages:
diff --git a/tests/test_project.py b/tests/test_project.py
index b6c115dd15..e9b68fd791 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -1112,6 +1112,28 @@ def test_cant_redownload_unhashed(self):
         with self.assertRaises(MissingSourceHashError):
             project.working_tree.make_dataset('source1.root')
 
+    @mark_requirement(Requirements.DATUM_BUG_602)
+    @scoped
+    def test_can_save_local_source_with_relpath(self):
+        test_dir = scope_add(TestDir())
+        source_url = osp.join(test_dir, 'source')
+        source_dataset = Dataset.from_iterable([
+            DatasetItem(0, subset='a', image=np.ones((2, 3, 3)),
+                annotations=[ Bbox(1, 2, 3, 4, label=0) ]),
+            DatasetItem(1, subset='b', image=np.zeros((10, 20, 3)),
+                annotations=[ Bbox(1, 2, 3, 4, label=1) ]),
+        ], categories=['a', 'b'])
+        source_dataset.save(source_url, save_images=True)
+
+        project = scope_add(Project.init(osp.join(test_dir, 'proj')))
+        project.import_source('s1', url=source_url, format=DEFAULT_FORMAT,
+            rpath=osp.join('annotations', 'b.json'))
+
+        read_dataset = project.working_tree.make_dataset('s1')
+        self.assertEqual(read_dataset.data_path, project.source_data_dir('s1'))
+
+        read_dataset.save()
+
 class BackwardCompatibilityTests_v0_1(TestCase):
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     @scoped

From a7791a662a41c88cc4db085d4b3659d7016526fb Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@intel.com>
Date: Wed, 12 Jan 2022 20:33:26 +0300
Subject: [PATCH 13/15] Sort command lists in code and documentation
 alphabetically (#624)

Currently, commands are listed in a seemingly arbitrary order. Having them
in alphabetical order is better, because:

* It's easier to find a command by name.
* It's clear where to insert a new command.

I removed the description text for the `explain` command, since otherwise it
sticks out for no reason. We could probably add a description line for
_every_ command, but having it for just one is ugly.
---
 datumaro/cli/__main__.py                   | 16 ++++++------
 site/content/en/docs/user-manual/_index.md | 30 +++++++++++-----------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/datumaro/cli/__main__.py b/datumaro/cli/__main__.py
index 26f3151bdf..a40c630d83 100644
--- a/datumaro/cli/__main__.py
+++ b/datumaro/cli/__main__.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2021 Intel Corporation
+# Copyright (C) 2019-2022 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -56,17 +56,17 @@ def _make_subcommands_help(commands, help_line_start=0):
 
 def _get_known_contexts():
     return [
+        ('model', contexts.model, "Actions with models"),
         ('project', contexts.project, "Actions with projects"),
         ('source', contexts.source, "Actions with data sources"),
-        ('model', contexts.model, "Actions with models"),
     ]
 
 def _get_known_commands():
     return [
         ("Project modification:", None, ''),
+        ('add', commands.add, "Add dataset"),
         ('create', commands.create, "Create empty project"),
         ('import', commands.import_, "Import dataset"),
-        ('add', commands.add, "Add dataset"),
         ('remove', commands.remove, "Remove dataset"),
 
         ("", None, ''),
@@ -78,17 +78,17 @@ def _get_known_commands():
 
         ("", None, ''),
         ("Dataset operations:", None, ''),
+        ('convert', commands.convert, "Convert dataset between formats"),
+        ('diff', commands.diff, "Compare datasets"),
         ('download', commands.download, "Download a publicly available dataset"),
+        ('explain', commands.explain, "Run Explainable AI algorithm for model"),
         ('export', commands.export, "Export dataset in some format"),
         ('filter', commands.filter, "Filter dataset items"),
-        ('transform', commands.transform, "Modify dataset items"),
+        ('info', commands.info, "Print dataset info"),
         ('merge', commands.merge, "Merge datasets"),
         ('patch', commands.patch, "Update dataset from another one"),
-        ('convert', commands.convert, "Convert dataset between formats"),
-        ('diff', commands.diff, "Compare datasets"),
         ('stats', commands.stats, "Compute dataset statistics"),
-        ('info', commands.info, "Print dataset info"),
-        ('explain', commands.explain, "Run Explainable AI algorithm for model"),
+        ('transform', commands.transform, "Modify dataset items"),
         ('validate', commands.validate, "Validate dataset")
     ]
 
diff --git a/site/content/en/docs/user-manual/_index.md b/site/content/en/docs/user-manual/_index.md
index 4645c1e0dd..913936ebdc 100644
--- a/site/content/en/docs/user-manual/_index.md
+++ b/site/content/en/docs/user-manual/_index.md
@@ -20,37 +20,37 @@ weight: 3
 - [Supported annotation types](./supported_formats/#annotation-types)
 - [Supported media formats](./media_formats)
 - [Command reference](./command-reference)
+  - [Add](./command-reference/sources/#source-add)
+  - [Checkout](./command-reference/checkout)
+  - [Commit](./command-reference/commit)
   - [Convert](./command-reference/convert)
   - [Create](./command-reference/create)
+  - [Diff](./command-reference/diff)
   - [Download](./command-reference/download)
-  - [Import](./command-reference/sources/#source-import)
+  - [Explain](./command-reference/explain)
   - [Export](./command-reference/export)
-  - [Add](./command-reference/sources/#source-add)
-  - [Remove](./command-reference/sources/#source-remove)
   - [Filter](./command-reference/filter)
+  - [Import](./command-reference/sources/#source-import)
+  - [Info](./command-reference/info)
+  - [Log](./command-reference/log)
   - [Merge](./command-reference/merge)
   - [Patch](./command-reference/patch)
-  - [Diff](./command-reference/diff)
-  - [Info](./command-reference/info)
+  - [Remove](./command-reference/sources/#source-remove)
   - [Stats](./command-reference/stats)
-  - [Validate](./command-reference/validate)
-  - [Transform](./command-reference/transform)
-  - [Commit](./command-reference/commit)
-  - [Checkout](./command-reference/checkout)
   - [Status](./command-reference/status)
-  - [Log](./command-reference/log)
-  - [Run model inference explanation (explain)](./command-reference/explain)
+  - [Transform](./command-reference/transform)
+  - [Validate](./command-reference/validate)
   - Models:
     - [Add](./command-reference/models/#model-add)
     - [Remove](./command-reference/models/#model-remove)
     - [Run](./command-reference/models/#model-run)
+  - Projects:
+    - [Info](./command-reference/projects/#project-info)
+    - [Migrate](./command-reference/projects/#project-migrate)
   - Sources:
-    - [Import](./command-reference/sources/#source-import)
     - [Add](./command-reference/sources/#source-add)
+    - [Import](./command-reference/sources/#source-import)
     - [Remove](./command-reference/sources/#source-remove)
-  - Projects:
-    - [Migrate](./command-reference/projects/#project-migrate)
-    - [Info](./command-reference/projects/#project-info)
 - [Extending](./extending)
   - [Builtin plugins](./extending/#builtin-plugins)
   - [Dataset Formats](./extending/#dataset-formats)

From ba19f03c588a8a8ee7a8be9e63bad4079f20b631 Mon Sep 17 00:00:00 2001
From: Maxim Zhiltsov <maxim.zhiltsov@intel.com>
Date: Thu, 13 Jan 2022 12:55:47 +0300
Subject: [PATCH 14/15] Fix resize memory use and image pixels (#622)

* Fix image resizing in resize transform

* Make resize lazy

* Update changelog

* Add image extension in Image ctor

* Add tests for problems

* Preserve output image extension

* Optimize performance and memory in resize a bit

* Update changelog

* Require dot in ext

* Remove ext validation

* Fix imports

* Fix comment

* Invert size tuple components
---
 CHANGELOG.md                   |  6 ++++
 datumaro/components/media.py   | 46 ++++++++++++++++-----------
 datumaro/plugins/transforms.py | 57 ++++++++++++++++++++++------------
 tests/requirements.py          |  4 +++
 tests/test_images.py           | 13 ++++++++
 tests/test_transforms.py       | 26 +++++++++++++---
 6 files changed, 110 insertions(+), 42 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 92f7fb9a6b..af0d1a4d0a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/600>)
 - Saving (overwriting) a dataset in a project when rpath is used
   (<https://github.com/openvinotoolkit/datumaro/pull/613>)
+- Output image extension preserving in the `Resize` transform
+  (<https://github.com/openvinotoolkit/datumaro/issues/606>)
+- Memory overuse in the `Resize` transform
+  (<https://github.com/openvinotoolkit/datumaro/issues/607>)
+- Invalid image pixels produced by the `Resize` transform
+  (<https://github.com/openvinotoolkit/datumaro/issues/618>)
 
 ### Security
 - TBD
diff --git a/datumaro/components/media.py b/datumaro/components/media.py
index 63d48d60a1..d64914a5e3 100644
--- a/datumaro/components/media.py
+++ b/datumaro/components/media.py
@@ -28,7 +28,7 @@ def path(self) -> str:
 
     @property
     def ext(self) -> str:
-        """Media file extension"""
+        """Media file extension (with the leading dot)"""
         return osp.splitext(osp.basename(self.path))[1]
 
     def __eq__(self, other: object) -> bool:
@@ -42,6 +42,7 @@ def __init__(self,
             data: Union[np.ndarray, Callable[[str], np.ndarray], None] = None,
             *,
             path: Optional[str] = None,
+            ext: Optional[str] = None,
             size: Optional[Tuple[int, int]] = None):
         assert size is None or len(size) == 2, size
         if size is not None:
@@ -58,6 +59,17 @@ def __init__(self,
             path = osp.abspath(path).replace('\\', '/')
         self._path = path
 
+        assert ext is None or isinstance(ext, str), ext
+        if ext:
+            assert not path, "Can't specify both 'path' and 'ext' for image"
+
+            if not ext.startswith('.'):
+                ext = '.' + ext
+            ext = ext.lower()
+        else:
+            ext = None
+        self._ext = ext
+
         if not isinstance(data, np.ndarray):
             assert path or callable(data), "Image can not be empty"
             assert data is None or callable(data)
@@ -75,7 +87,7 @@ def data(self) -> np.ndarray:
             data = self._data
 
         if self._size is None and data is not None:
-            self._size =  tuple(map(int, data.shape[:2]))
+            self._size = tuple(map(int, data.shape[:2]))
         return data
 
     @property
@@ -99,6 +111,14 @@ def size(self) -> Optional[Tuple[int, int]]:
                 self._size = tuple(map(int, data.shape[:2]))
         return self._size
 
+    @property
+    def ext(self) -> str:
+        """Media file extension"""
+        if self._ext is not None:
+            return self._ext
+        else:
+            return osp.splitext(osp.basename(self.path))[1]
+
     def __eq__(self, other):
         if not isinstance(other, __class__):
             return False
@@ -141,7 +161,12 @@ def __init__(self,
             if path and osp.isfile(path) or data:
                 data = lazy_image(path, loader=data)
 
-        super().__init__(path=path, size=size,
+        self._bytes_data = data
+
+        if ext is None and path is None and isinstance(data, bytes):
+            ext = self._guess_ext(data)
+
+        super().__init__(path=path, ext=ext, size=size,
             data=lambda _: decode_image(self.get_bytes()))
         if data is None:
             # We don't expect decoder to produce images from nothing,
@@ -150,15 +175,6 @@ def __init__(self,
             # from the path, when no data is provided.
             self._data = None
 
-        self._bytes_data = data
-        if ext:
-            ext = ext.lower()
-            if not ext.startswith('.'):
-                ext = '.' + ext
-        elif path is None and isinstance(data, bytes):
-            ext = self._guess_ext(data)
-        self._ext = ext
-
     @classmethod
     def _guess_ext(cls, data: bytes) -> Optional[str]:
         return next(
@@ -172,12 +188,6 @@ def get_bytes(self):
             return self._bytes_data()
         return self._bytes_data
 
-    @property
-    def ext(self):
-        if self._ext:
-            return self._ext
-        return super().ext
-
     def save(self, path):
         cur_path = osp.abspath(self.path)
         path = osp.abspath(path)
diff --git a/datumaro/plugins/transforms.py b/datumaro/plugins/transforms.py
index 8acd5243ab..e5b488195a 100644
--- a/datumaro/plugins/transforms.py
+++ b/datumaro/plugins/transforms.py
@@ -21,9 +21,11 @@
     Points, PointsCategories, Polygon, PolyLine, RleMask,
 )
 from datumaro.components.cli_plugin import CliPlugin
+from datumaro.components.errors import DatumaroError
 from datumaro.components.extractor import (
     DEFAULT_SUBSET_NAME, IExtractor, ItemTransform, Transform,
 )
+from datumaro.components.media import Image
 from datumaro.util import NOTSET, parse_str_enum_value, take_by
 from datumaro.util.annotation_util import find_group_leader, find_instances
 import datumaro.util.mask_tools as mask_tools
@@ -763,21 +765,48 @@ def __init__(self, extractor: IExtractor, width: int, height: int) -> None:
         self._width = width
         self._height = height
 
+    @staticmethod
+    def _lazy_resize_image(image, new_size):
+        def _resize_image(_):
+            h, w = image.size
+            yscale = new_size[0] / float(h)
+            xscale = new_size[1] / float(w)
+
+            # LANCZOS4 is preferable for upscaling, but it works quite slow
+            method = cv2.INTER_AREA if (xscale * yscale) < 1 \
+                else cv2.INTER_CUBIC
+
+            resized_image = cv2.resize(image.data / 255.0, new_size[::-1],
+                interpolation=method)
+            resized_image *= 255.0
+            return resized_image
+
+        return Image(_resize_image, ext=image.ext, size=new_size)
+
+    @staticmethod
+    def _lazy_resize_mask(mask, new_size):
+        def _resize_image():
+            # Can use only NEAREST for masks,
+            # because we can't have interpolated values
+            rescaled_mask = cv2.resize(mask.image.astype(np.float32),
+                new_size[::-1], interpolation=cv2.INTER_NEAREST)
+            return rescaled_mask.astype(np.uint8)
+        return _resize_image
+
     def transform_item(self, item):
         if not item.has_image:
-            raise Exception("Image info is required for this transform")
+            raise DatumaroError("Item %s: image info is required for this "
+                "transform" % (item.id, ))
 
         h, w = item.image.size
         xscale = self._width / float(w)
         yscale = self._height / float(h)
 
+        new_size = (self._height, self._width)
+
+        resized_image = None
         if item.image.has_data:
-            # LANCZOS4 is preferable for upscaling, but it works quite slow
-            method = cv2.INTER_AREA if (xscale * yscale) < 1 \
-                else cv2.INTER_CUBIC
-            image = item.image.data / 255.0
-            resized_image = cv2.resize(image, (self._width, self._height),
-                interpolation=method)
+            resized_image = self._lazy_resize_image(item.image, new_size)
 
         resized_annotations = []
         for ann in item.annotations:
@@ -798,18 +827,8 @@ def transform_item(self, item):
                     ]
                 ))
             elif isinstance(ann, Mask):
-                # Can use only NEAREST for masks,
-                # because we can't have interpolated values
-                rescaled_mask = cv2.resize(ann.image.astype(np.float32),
-                    (self._width, self._height),
-                    interpolation=cv2.INTER_NEAREST).astype(np.uint8)
-
-                if isinstance(ann, RleMask):
-                    rle = mask_tools.mask_to_rle(rescaled_mask)
-                    resized_annotations.append(ann.wrap(
-                        rle=mask_utils.frPyObjects(rle, *rle['size'])))
-                else:
-                    resized_annotations.append(ann.wrap(image=rescaled_mask))
+                rescaled_mask = self._lazy_resize_mask(ann, new_size)
+                resized_annotations.append(ann.wrap(image=rescaled_mask))
             elif isinstance(ann, (Caption, Label)):
                 resized_annotations.append(ann)
             else:
diff --git a/tests/requirements.py b/tests/requirements.py
index 04c95e5197..2b8a0e85fd 100644
--- a/tests/requirements.py
+++ b/tests/requirements.py
@@ -13,6 +13,8 @@ class DatumaroComponent:
 
 
 class Requirements:
+    # Please, maintain the ordering when adding new lines
+
     # Exact requirements
     DATUM_GENERAL_REQ = "Datumaro general requirement"
     DATUM_TELEMETRY = "Datumaro telemetry requirement"
@@ -46,6 +48,8 @@ class Requirements:
     DATUM_BUG_560 = "Reading MOT dataset with seqinfo produces 0-based indexing in frames"
     DATUM_BUG_583 = "Empty lines in VOC subset lists are not ignored"
     DATUM_BUG_602 = "Patch command example error"
+    DATUM_BUG_606 = "transform with resize also changed the image extension from .jpg to .png"
+    DATUM_BUG_618 = "ResizeTransform returns broken image pixels"
 
 
 class SkipMessages:
diff --git a/tests/test_images.py b/tests/test_images.py
index badef07d69..829ff0dcbb 100644
--- a/tests/test_images.py
+++ b/tests/test_images.py
@@ -82,8 +82,11 @@ def test_ctors(self):
                 { 'data': image },
                 { 'data': image, 'path': path },
                 { 'data': image, 'path': path, 'size': (2, 4) },
+                { 'data': image, 'ext': 'png' },
+                { 'data': image, 'ext': 'png', 'size': (2, 4) },
                 { 'data': lambda p: image },
                 { 'data': lambda p: image, 'path': 'somepath' },
+                { 'data': lambda p: image, 'ext': 'jpg' },
                 { 'path': path },
                 { 'path': path, 'data': load_image },
                 { 'path': path, 'data': load_image, 'size': (2, 4) },
@@ -95,6 +98,16 @@ def test_ctors(self):
                     np.testing.assert_array_equal(img.data, image)
                     self.assertEqual(img.size, tuple(image.shape[:2]))
 
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_ctor_errors(self):
+        with self.subTest('no data specified'):
+            with self.assertRaisesRegex(Exception, "can not be empty"):
+                Image(ext='jpg', size=(1, 2))
+
+        with self.subTest('either path or ext'):
+            with self.assertRaisesRegex(Exception, "both 'path' and 'ext'"):
+                Image(path='somepath', ext='someext')
+
 class BytesImageTest(TestCase):
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_lazy_image_shape(self):
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
index 47e3222e92..4961315d93 100644
--- a/tests/test_transforms.py
+++ b/tests/test_transforms.py
@@ -8,12 +8,13 @@
     PointsCategories, Polygon, PolyLine,
 )
 from datumaro.components.extractor import DatasetItem
+from datumaro.components.media import Image
 from datumaro.components.project import Dataset
 from datumaro.util.test_utils import compare_datasets
 import datumaro.plugins.transforms as transforms
 import datumaro.util.mask_tools as mask_tools
 
-from .requirements import Requirements, mark_requirement
+from .requirements import Requirements, mark_bug, mark_requirement
 
 
 class TransformsTest(TestCase):
@@ -568,9 +569,10 @@ def test_bboxes_values_decrement_transform(self):
         compare_datasets(self, dst_dataset, actual)
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    @mark_bug(Requirements.DATUM_BUG_618)
     def test_can_resize(self):
         small_dataset = Dataset.from_iterable([
-            DatasetItem(id=1, image=np.zeros((4, 4)), annotations=[
+            DatasetItem(id=i, image=np.ones((4, 4)) * i, annotations=[
                 Label(1),
                 Bbox(1, 1, 2, 2, label=2),
                 Polygon([1, 1, 1, 2, 2, 2, 2, 1], label=1),
@@ -582,11 +584,11 @@ def test_can_resize(self):
                     [0, 1, 1, 0],
                     [1, 1, 0, 0],
                 ]))
-            ])
+            ]) for i in range(3)
         ], categories=['a', 'b', 'c'])
 
         big_dataset = Dataset.from_iterable([
-            DatasetItem(id=1, image=np.zeros((8, 8)), annotations=[
+            DatasetItem(id=i, image=np.ones((8, 8)) * i, annotations=[
                 Label(1),
                 Bbox(2, 2, 4, 4, label=2),
                 Polygon([2, 2, 2, 4, 4, 4, 4, 2], label=1),
@@ -602,7 +604,7 @@ def test_can_resize(self):
                     [1, 1, 1, 1, 0, 0, 0, 0],
                     [1, 1, 1, 1, 0, 0, 0, 0],
                 ]))
-            ])
+            ]) for i in range(3)
         ], categories=['a', 'b', 'c'])
 
         with self.subTest('upscale'):
@@ -612,3 +614,17 @@ def test_can_resize(self):
         with self.subTest('downscale'):
             actual = transforms.ResizeTransform(big_dataset, width=4, height=4)
             compare_datasets(self, small_dataset, actual)
+
+    @mark_bug(Requirements.DATUM_BUG_606)
+    def test_can_keep_image_ext_on_resize(self):
+        expected = Image(np.ones((8, 4)), ext='jpg')
+
+        dataset = Dataset.from_iterable([
+            DatasetItem(id=1, image=Image(np.ones((4, 2)), ext='jpg'))
+        ])
+
+        dataset.transform('resize', width=4, height=8)
+
+        actual = dataset.get('1').image
+        self.assertEqual(actual.ext, expected.ext)
+        self.assertTrue(np.array_equal(actual.data, expected.data))

From 72f3a3828c836c4bbf20208ed9b0b3ca39831486 Mon Sep 17 00:00:00 2001
From: Nikita Manovich <nikita.manovich@intel.com>
Date: Fri, 14 Jan 2022 11:43:29 +0300
Subject: [PATCH 15/15] Depends on OpenVINO telemetry library 2022.1.0 from
 PyPI (#625)

* Depends on OpenVINO telemetry library 2022.1.0 from PyPI

* Update CHANGELOG.md

* Better comment for openvino-telemetry dependency
---
 CHANGELOG.md          | 2 ++
 requirements-core.txt | 3 +++
 requirements.txt      | 4 ----
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index af0d1a4d0a..bfb23ea4f5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/592>)
 - The `pycocotools` dependency lower bound is raised to `2.0.4`.
   (<https://github.com/openvinotoolkit/datumaro/pull/449>)
+- OpenVINO telemetry library 2022.1.0 from PyPI.
+  (<https://github.com/openvinotoolkit/datumaro/pull/625>)
 
 ### Deprecated
 - TBD
diff --git a/requirements-core.txt b/requirements-core.txt
index 937c88db25..c667ecf448 100644
--- a/requirements-core.txt
+++ b/requirements-core.txt
@@ -13,6 +13,9 @@ pycocotools>=2.0.4; platform_system != "Windows" or python_version >= '3.9'
 pycocotools-windows; platform_system == "Windows" and python_version < '3.9'
 PyYAML>=5.3.1
 
+# A library to send the telemetry data from the OpenVINO toolkit components.
+openvino-telemetry>=2022.1.0
+
 # 2.3 has an unlisted dependency on PyTorch, which we don't need
 tensorboardX>=1.8,!=2.3
 
diff --git a/requirements.txt b/requirements.txt
index a128a65db0..bed04eb99c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,10 +3,6 @@
 
 opencv-python-headless>=4.1.0.25
 
-# Move to the core list once released on pip
-# OpenVINO telemetry library
-openvino-telemetry @ git+https://github.com/openvinotoolkit/telemetry.git@master#egg=openvino-telemetry
-
 # testing
 pytest>=5.3.5