Began to implement rich progress bar for tracking the completion of f…

…fmpeg jobs
TimeTravelPenguin · Dec 24, 2023 · 69ca02e · 69ca02e
1 parent 7e14c8f
commit 69ca02e
Show file tree

Hide file tree

Showing 12 changed files with 581 additions and 240 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,36 +4,37 @@ version = "1.0.0-alpha"
 description = ""
 authors = ["TimeTravelPenguins <TimeTravelPenguin@gmail.com>"]
 readme = "README.md"
-packages = [{include = "BAET", from = "src"}]
+packages = [{ include = "BAET", from = "src" }]
 
 [tool.poetry.scripts]
 baet = "src.main:main"
 
 [tool.poetry.dev-dependencies]
-black            = "*"
-isort            = "*"
-mypy             = "*"
-pylint           = "*"
+black = "*"
+isort = "*"
+mypy = "*"
+pylint = "*"
 
 [tool.poetry.dependencies]
-python = "^3.12"
+python = "^3.8"
 ffmpeg-python = "^0.2.0"
 pydantic = "^2.5.2"
 rich = "^13.7.0"
 rich-argparse = "^1.4.0"
+more-itertools = "^10.1.0"
 
 [tool.black]
 target-version = ["py312"]
 
 [tool.isort]
-profile    = "black"
+profile = "black"
 py_version = 312
 
 [tool.mypy]
 disallow_untyped_decorators = false
-install_types  = true
+install_types = true
 python_version = "3.12"
-strict         = true
+strict = true
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/requirements.txt b/requirements.txt
diff --git a/src/BAET/AppArgs.py b/src/BAET/AppArgs.py
@@ -70,7 +70,7 @@ def __rich_console__(
 
 def new_empty_argparser() -> ArgumentParser:
     def get_formatter(prog):
-        return RichHelpFormatter(prog, max_help_position=35, console=console)
+        return RichHelpFormatter(prog, max_help_position=40, console=console)
 
     # todo: use console protocol https://rich.readthedocs.io/en/stable/protocol.html#console-protocol
     description = AppDescription()
@@ -155,6 +155,7 @@ def GetArgs() -> AppArgs:
 
     output_group.add_argument(
         "--output-streams-separately",
+        "--sep",
         default=False,
         action="store_true",
         help="[TODO] When set, individual commands are given to [blue]ffmpeg[/] to export each stream. Otherwise, "
@@ -165,14 +166,15 @@ def GetArgs() -> AppArgs:
 
     output_group.add_argument(
         "--overwrite-existing",
+        "--overwrite",
         default=False,
         action="store_true",
         help="Overwrite a file if it already exists. (Default: False)",
     )
 
     output_group.add_argument(
         "--no-output-subdirs",
-        default=True,
+        default=False,
         action="store_true",
         help="Do not create subdirectories for each video's extracted audio tracks in the output directory. "
         "(Default: True)",
@@ -204,6 +206,16 @@ def GetArgs() -> AppArgs:
         "Options to help debug the application.",
     )
 
+    debug_group.add_argument(
+        "--run-synchronously",
+        "--sync",
+        default=False,
+        action="store_true",
+        help="[TODO] Run each each job in order. This should reduce the CPU workload, but may increase runtime. A "
+        "'job' is per file input, regardless of whether ffmpeg commands are merged (see: "
+        "`--output-streams-separately`). (Default: False)",
+    )
+
     debug_group.add_argument(
         "--logging",
         default=False,
@@ -227,6 +239,7 @@ def GetArgs() -> AppArgs:
 
     debug_group.add_argument(
         "--show-ffmpeg-cmd",
+        "--cmds",
         default=False,
         action="store_true",
         help="[TODO] Print to the console the generated ffmpeg command. (Default: False)",
@@ -269,6 +282,7 @@ def GetArgs() -> AppArgs:
         trim=args.trim,
         print_args=args.print_args,
         show_ffmpeg_cmd=args.show_ffmpeg_cmd,
+        run_synchronously=args.run_synchronously,
     )
 
     app_args = AppArgs.model_validate(

diff --git a/src/BAET/AudioExtractor.py b/src/BAET/AudioExtractor.py
@@ -1,7 +1,11 @@
+import contextlib
 import getpass
+import sys
 from pathlib import Path
+from subprocess import Popen
 
 import ffmpeg
+from ffmpeg.nodes import InputNode
 from rich.markdown import Markdown
 
 from BAET.AppArgs import AppArgs
@@ -16,6 +20,31 @@ def print_ffmpeg_cmd(output):
     cmd = Markdown(f"```console\n{md_user} {compiled}\n```")
 
     console.print(cmd)
+    console.print()
+
+
+@contextlib.contextmanager
+def probe_audio_streams(file: Path):
+    try:
+        info_logger.info('Probing "%s"', file)
+        probe = ffmpeg.probe(file)
+
+        audio_streams = [
+            stream
+            for stream in probe["streams"]
+            if "codec_type" in stream and stream["codec_type"] == "audio"
+        ]
+
+        if not audio_streams:
+            raise ValueError("No audio streams found")
+
+        info_logger.info("Found %d audio streams", len(audio_streams))
+        yield audio_streams
+
+    except (ffmpeg.Error, ValueError) as e:
+        info_logger.critical("%s: %s", type(e).__name__, e)
+        error_console.print_exception()
+        raise e
 
 
 class AudioExtractor:
@@ -37,77 +66,97 @@ def __init__(
         )
 
     def extract(self):
-        try:
-            audio_streams = self.probe_audio_stream()
-            info_logger.info("Found %d audio streams", len(audio_streams))
-        except ffmpeg.Error as e:
-            info_logger.critical("Found no audio streams")
-            error_console.print(e.stderr)
-            return None
-        except ValueError as e:
-            info_logger.critical("Found no audio streams")
-            error_console.print(e)
-            return None
-
-        ffmpeg_input = ffmpeg.input(str(self.file))
-
-        info_logger.info("Creating ffmpeg command to extract each stream")
-
-        outputs = []
-        for stream in audio_streams:
-            out = self.create_output(ffmpeg_input, stream)
-            outputs.append(out)
+        ffmpeg_input: InputNode = ffmpeg.input(self.file)
+        workers = []
+
+        with probe_audio_streams(self.file) as audio_streams:
+            # Check: does the indexing of audio stream relate to ffprobe index
+            # or to the index of the collected audio streams in ffmpeg_input["a:index"]?
+            for index, stream in enumerate(audio_streams):
+                # index = stream["index"]
+
+                out = self._create_workers(
+                    ffmpeg_input,
+                    index,
+                    stream["sample_rate"] or self.output_configuration.sample_rate,
+                )
+
+                workers.append(out)
 
         info_logger.info("Extracting audio to %s", self.output_dir)
 
-        if not self.output_configuration.output_streams_separately:
-            output = ffmpeg.merge_outputs(*outputs)
-            self._run(output)
+        # if not self.output_configuration.output_streams_separately:
+        #     output = ffmpeg.merge_outputs(*worker_pairs)
+        #     self._run(output)
+        #     return
+        #
+        # for output in worker_pairs:
+        #     self._run(output)
+
+        try:
+            for output in workers:
+                self._run_workers(output)
+        except Exception as e:
+            info_logger.critical("%s: %s", type(e).__name__, e)
+            error_console.print_exception()
+            raise e
+
+    def _run_workers(self, ffmpeg_output):
+        # if self.debug_options.show_ffmpeg_cmd:
+        #     print_ffmpeg_cmd(ffmpeg_write)
+
+        if self.debug_options.dry_run:
             return
 
-        for output in outputs:
-            self._run(output)
+        proc: Popen[bytes] = ffmpeg_output.run_async(pipe_stdout=True)
 
-    def _run(self, output):
-        if self.output_configuration.overwrite_existing:
-            output = ffmpeg.overwrite_output(output)
-        if self.debug_options.show_ffmpeg_cmd:
-            print(output)
-        if not self.debug_options.dry_run:
-            output.run()
+        while proc.poll() is None:
+            output = proc.stdout.readline().decode("utf-8")
+            console.print(output)
+
+        if proc.returncode != 0:
+            info_logger.critical("Return code: %s", proc.returncode)
+            sys.exit(-1)
+            # todo: hangs here :(
+
+            # progress_text = input_data.decode("utf-8")
+            # console.print(progress_text)
+
+            # proc_write.stdin.write(input_data)
 
-    def create_output(self, input_file, stream):
-        index = stream["index"]
-        audio_stream = input_file[f"a:{index - 1}"]
+            # Look for "frame=xx"
+            # if progress_text.startswith("frame="):
+            #     frame = int(progress_text.partition("=")[-1])  # Get the frame number
+            #     console.print(f"q: {frame}")
 
+        proc.wait()
+
+    def _create_workers(
+        self, ffmpeg_input: InputNode, stream_index: int, sample_rate: int
+    ):
         self.output_dir.mkdir(parents=True, exist_ok=True)
 
-        filename = (
-            f"{self.file.stem}_track{index}.{self.output_configuration.file_type}"
+        output_filename = Path(
+            f"{self.file.stem}_track{stream_index}.{self.output_configuration.file_type}"
         )
 
-        info_logger.info("Creating output file %s", str(self.output_dir / filename))
-
-        return ffmpeg.output(
-            audio_stream,
-            str(self.output_dir / filename),
-            acodec=self.output_configuration.acodec,
-            audio_bitrate=(
-                stream["sample_rate"] or self.output_configuration.fallback_sample_rate
-            ),
-            format=self.output_configuration.file_type,
+        # TODO: MOVE
+        info_logger.info(
+            "Creating output dir %s", self.output_dir / output_filename.stem
         )
 
-    def probe_audio_stream(self):
-        probe = ffmpeg.probe(self.file)
+        opt_kwargs = {
+            "acodec": self.output_configuration.acodec,
+            "audio_bitrate": sample_rate
+            or self.output_configuration.fallback_sample_rate,
+            "format": self.output_configuration.file_type,
+        }
 
-        audio_streams = [
-            stream
-            for stream in probe["streams"]
-            if "codec_type" in stream and stream["codec_type"] == "audio"
-        ]
+        output = ffmpeg_input[f"a:{stream_index}"].output(
+            str(output_filename), **opt_kwargs
+        )
 
-        if not audio_streams:
-            raise ValueError("No audio streams found")
+        if self.output_configuration.overwrite_existing:
+            output = output.overwrite_output()
 
-        return audio_streams
+        return output