openvinotoolkit · zhiltsov-max · Jan 17, 2022 · Oct 15, 2021 · Oct 16, 2021 · Oct 16, 2021
@@ -8,6 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## \[Unreleased\]
 ### Added
+- Ability to import a video as frames with the `video_frames` format and
+  to split a video into frames with the `datum util split_video` command
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
+- `--subset` parameter in the `image_dir` format
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
+- `MediaManager` API to control loaded media resources at runtime
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
 - Command to download public datasets
   (<https://github.com/openvinotoolkit/datumaro/pull/582>)
 - Extension autodetection in `ByteImage`
@@ -18,11 +25,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/585>)
 
 ### Changed
+- The `pycocotools` dependency lower bound is raised to `2.0.4`.
+  (<https://github.com/openvinotoolkit/datumaro/pull/449>)
+- Allowed direct file paths in `datum import`. Such sources are imported like
+  when the `rpath` parameter is specified, however, only the selected path
+  is copied into the project
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
 - `smooth_line` from `datumaro.util.annotation_util` - the function
   is renamed to `approximate_line` and has updated interface
   (<https://github.com/openvinotoolkit/datumaro/pull/592>)
-- The `pycocotools` dependency lower bound is raised to `2.0.4`.
-  (<https://github.com/openvinotoolkit/datumaro/pull/449>)
 - Improved `stats` performance, added new filtering parameters,
   image stats (`unique`, `repeated`) moved to the `dataset` section,
   removed `mean` and `std` from the `dataset` section
@@ -38,6 +49,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/617>)
 
 ### Fixed
+- Prohibited calling `add`, `import` and `export` commands without a project
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
+- Calling `make_dataset` on empty project tree now produces the error properly
+  (<https://github.com/openvinotoolkit/datumaro/pull/555>)
 - Fails in multimerge when lines are not approximated and when there are no
   label categories (<https://github.com/openvinotoolkit/datumaro/pull/592>)
 - Cannot convert LabelMe dataset, that has no subsets

@@ -59,6 +59,7 @@ def _get_known_contexts():
         ('model', contexts.model, "Actions with models"),
         ('project', contexts.project, "Actions with projects"),
         ('source', contexts.source, "Actions with data sources"),
+        ('util', contexts.util, "Auxillary tools and utilities"),
     ]
 
 def _get_known_commands():

@@ -2,4 +2,4 @@
 #
 # SPDX-License-Identifier: MIT
 
-from . import model, project, source
+from . import model, project, source, util
@@ -162,7 +162,7 @@ def export_command(args):
     try:
         project = scope_add(load_project(args.project_dir))
     except ProjectNotFoundError:
-        if not show_plugin_help and args.project_dir:
+        if not show_plugin_help:
             raise
 
     if project is not None:

@@ -10,7 +10,9 @@
 from datumaro.components.errors import ProjectNotFoundError
 from datumaro.util.scope import on_error_do, scope_add, scoped
 
-from ..util import MultilineFormatter, add_subparser, join_cli_args
+from ..util import (
+    MultilineFormatter, add_subparser, join_cli_args, show_video_import_warning,
+)
 from ..util.errors import CliException
 from ..util.project import generate_next_name, load_project
 
@@ -102,7 +104,7 @@ def import_command(args):
     try:
         project = scope_add(load_project(args.project_dir))
     except ProjectNotFoundError:
-        if not show_plugin_help and args.project_dir:
+        if not show_plugin_help:
             raise
 
     if project is not None:
@@ -130,6 +132,9 @@ def import_command(args):
             list(project.working_tree.sources) + os.listdir(),
             'source', sep='-', default='1')
 
+    if fmt == 'video_frames':
+        show_video_import_warning()
+
     project.import_source(name, url=args.url, format=args.format,
         options=extra_args, no_cache=True, no_hash=True, rpath=args.rpath)
     on_error_do(project.remove_source, name, ignore_errors=True,
@@ -229,7 +234,7 @@ def add_command(args):
     try:
         project = scope_add(load_project(args.project_dir))
     except ProjectNotFoundError:
-        if not show_plugin_help and args.project_dir:
+        if not show_plugin_help:
             raise
 
     if project is not None:
@@ -248,6 +253,9 @@ def add_command(args):
 
     extra_args = arg_parser.parse_cmdline(args.extra_args)
 
+    if fmt == 'video_frames':
+        show_video_import_warning()
+
     name, _ = project.add_source(args.path,
         format=args.format, options=extra_args, rpath=args.rpath)
     on_error_do(project.remove_source, name, ignore_errors=True,

@@ -0,0 +1,102 @@
+# Copyright (C) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import argparse
+import logging as log
+import os
+import os.path as osp
+
+from datumaro.cli.util import MultilineFormatter, add_subparser
+from datumaro.cli.util.errors import CliException
+from datumaro.cli.util.project import generate_next_file_name
+from datumaro.components.dataset import Dataset
+
+
+def build_split_video_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(help="Split video into frames",
+        description="""
+        Splits a video into separate frames and saves them in a directory.
+        After the splitting, the images can be added into a project
+        using the 'import' command and the 'image_dir' format.|n
+        |n
+        This command is useful for making a dataset from a video file.
+        Unlike direct video reading during model training, which can produce
+        different results if the system environment changes, this command
+        allows to split the video into frames and use them instead, making
+        the dataset reproducible and stable.|n
+        |n
+        This command provides different options like setting the frame step,
+        file name pattern, starting and finishing frame etc.|n
+        |n
+        Examples:|n
+        - Split a video into frames, use each 30-rd frame:|n
+        |s|s%(prog)s -i video.mp4 -o video.mp4-frames --step 30|n
+        - Split a video into frames, save as 'frame_xxxxxx.png' files:|n
+        |s|s%(prog)s -i video.mp4 --image-ext=.png --name-pattern='frame_%%06d'
+        """,
+        formatter_class=MultilineFormatter)
+
+    parser.add_argument('-i', '--input-path', dest='src_path', required=True,
+        help="Path to the video file")
+    parser.add_argument('-o', '--output-dir', dest='dst_dir',
+        help="Directory to save output (default: a subdir in the current one)")
+    parser.add_argument('--overwrite', action='store_true',
+        help="Overwrite existing files in the save directory")
+    parser.add_argument('-n', '--name-pattern', default='%06d',
+        help="Name pattern for the produced images (default: %(default)s)")
+    parser.add_argument('-s', '--step', type=int, default=1,
+        help="Frame step (default: %(default)s)")
+    parser.add_argument('-b', '--start-frame', type=int, default=0,
+        help="Starting frame (default: %(default)s)")
+    parser.add_argument('-e', '--end-frame', type=int, default=None,
+        help="Finishing frame (default: %(default)s)")
+    parser.add_argument('-x', '--image-ext', default='.jpg',
+        help="Output image extension (default: %(default)s)")
+    parser.set_defaults(command=split_video_command)
+
+    return parser
+
+def get_split_video_sensitive_args():
+    return {
+        split_video_command: ['src_path', 'dst_dir', 'name_pattern'],
+    }
+
+def split_video_command(args):
+    src_path = osp.abspath(args.src_path)
+
+    dst_dir = args.dst_dir
+    if dst_dir:
+        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
+            raise CliException("Directory '%s' already exists "
+                "(pass --overwrite to overwrite)" % dst_dir)
+    else:
+        dst_dir = generate_next_file_name('%s-frames' % osp.basename(src_path))
+    dst_dir = osp.abspath(dst_dir)
+
+    log.info("Exporting frames...")
+
+    dataset = Dataset.import_from(src_path, 'video_frames',
+        name_pattern=args.name_pattern, step=args.step,
+        start_frame=args.start_frame, end_frame=args.end_frame)
+
+    dataset.export(format='image_dir', save_dir=dst_dir,
+        image_ext=args.image_ext)
+
+    log.info("Frames are exported into '%s'" % dst_dir)
+
+    return 0
+
+
+def build_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor()
+
+    subparsers = parser.add_subparsers()
+    add_subparser(subparsers, 'split_video', build_split_video_parser)
+
+    return parser
+
+def get_sensitive_args():
+    return {
+        **get_split_video_sensitive_args(),
+    }
@@ -4,6 +4,7 @@
 
 from typing import Iterable, List
 import argparse
+import logging as log
 import textwrap
 
 
@@ -69,3 +70,11 @@ def join_cli_args(args: argparse.Namespace, *names: Iterable[str]) -> List:
         joined += value
 
     return joined
+
+def show_video_import_warning():
+    log.warning("Using 'video_frames' in a project may lead "
+        "to different results across multiple runs, if the "
+        "system setup changes (library version, OS, etc.). "
+        "If you need stable results, consider splitting the video "
+        "manually using instructions at: "
+        "https://openvinotoolkit.github.io/datumaro/docs/user-manual/media_formats/")
@@ -8,6 +8,7 @@
 import os
 import os.path as osp
 import shutil
+import weakref
 
 import cv2
 import numpy as np
@@ -332,6 +333,9 @@ def __init__(self, path: str, *,
         self._frame_count = None
         self._length = None
 
+        from .media_manager import MediaManager
+        MediaManager.get_instance().push(weakref.ref(self), self)
+
     def close(self):
         self._iterator = None
 
@@ -457,3 +461,7 @@ def __eq__(self, other: object) -> bool:
             self._start_frame == other._start_frame and \
             self._step == other._step and \
             self._end_frame == other._end_frame
+
+    def __hash__(self):
+        # Required for caching
+        return hash((self._path, self._step, self._start_frame, self._end_frame))
@@ -0,0 +1,80 @@
+# Copyright (C) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+# We need to release the opened resources somehow to release file handles and
+# close the program normally.
+#
+# Options:
+#
+# A. Require converter to open all the media resources.
+# - Dataset (IExtractor) just provides media access metainfo
+# - Dataset (IExtractor) must provide the list of all media resources
+# - Each resource has to provide means for loading and releasing
+# - All converters require changes and special handling for different media
+# sources.
+# - Resource management is explicit
+# - Resources are managed safely and effectively
+#
+# Problems:
+# - Too much burden on plugins. Media reporting and resource management takes
+# too much efforts in this solution. Extractors and Converters all need to
+# bother with this.
+#
+#
+# B. Introduce Media Resource Manager, which contains all the opened
+# media resources.
+# - No code modifications in converters
+# - All (or specific) resources are released by request
+# - The system can manage the number or opened resources to control memory load
+# (maybe, just extend Image Cache?)
+# - Resource management is implicit for the user
+#
+# Problems:
+# - The moment we need to release resources is debatable and needs
+# investigation for each operation (however, it's just about the caching,
+# so it's unlikely to make the system unstable)
+
+from collections import OrderedDict
+import sys
+
+_instance = None
+
+DEFAULT_CAPACITY = 2
+
+class MediaManager:
+    @staticmethod
+    def get_instance():
+        global _instance
+        if _instance is None:
+            _instance = MediaManager()
+        return _instance
+
+    def __init__(self, capacity=DEFAULT_CAPACITY):
+        self.capacity = int(capacity)
+        self.items = OrderedDict()
+
+    def push(self, key, media):
+        if self.capacity <= len(self.items):
+            _, v = self.items.popitem(last=True)
+            if hasattr(v, 'close') and sys.getrefcount(v) <= 2:
+                v.close()
+        self.items[key] = media
+
+    def get(self, key):
+        default = object()
+        item = self.items.get(key, default)
+        if item is default:
+            return None
+
+        self.items.move_to_end(key, last=False) # naive splay tree
+        return item
+
+    def size(self):
+        return len(self.items)
+
+    def clear(self):
+        for item in self.items.values():
+            if hasattr(item, 'close'):
+                item.close()
+        self.items.clear()