Skip to content

Commit

Permalink
Began to implement rich progress bar for tracking the completion of f…
Browse files Browse the repository at this point in the history
…fmpeg jobs
  • Loading branch information
TimeTravelPenguins authored and TimeTravelPenguins committed Dec 24, 2023
1 parent 7e14c8f commit 69ca02e
Show file tree
Hide file tree
Showing 12 changed files with 581 additions and 240 deletions.
366 changes: 202 additions & 164 deletions poetry.lock

Large diffs are not rendered by default.

19 changes: 10 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,37 @@ version = "1.0.0-alpha"
description = ""
authors = ["TimeTravelPenguins <TimeTravelPenguin@gmail.com>"]
readme = "README.md"
packages = [{include = "BAET", from = "src"}]
packages = [{ include = "BAET", from = "src" }]

[tool.poetry.scripts]
baet = "src.main:main"

[tool.poetry.dev-dependencies]
black = "*"
isort = "*"
mypy = "*"
pylint = "*"
black = "*"
isort = "*"
mypy = "*"
pylint = "*"

[tool.poetry.dependencies]
python = "^3.12"
python = "^3.8"
ffmpeg-python = "^0.2.0"
pydantic = "^2.5.2"
rich = "^13.7.0"
rich-argparse = "^1.4.0"
more-itertools = "^10.1.0"

[tool.black]
target-version = ["py312"]

[tool.isort]
profile = "black"
profile = "black"
py_version = 312

[tool.mypy]
disallow_untyped_decorators = false
install_types = true
install_types = true
python_version = "3.12"
strict = true
strict = true

[build-system]
requires = ["poetry-core"]
Expand Down
4 changes: 0 additions & 4 deletions requirements.txt

This file was deleted.

18 changes: 16 additions & 2 deletions src/BAET/AppArgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __rich_console__(

def new_empty_argparser() -> ArgumentParser:
def get_formatter(prog):
return RichHelpFormatter(prog, max_help_position=35, console=console)
return RichHelpFormatter(prog, max_help_position=40, console=console)

# todo: use console protocol https://rich.readthedocs.io/en/stable/protocol.html#console-protocol
description = AppDescription()
Expand Down Expand Up @@ -155,6 +155,7 @@ def GetArgs() -> AppArgs:

output_group.add_argument(
"--output-streams-separately",
"--sep",
default=False,
action="store_true",
help="[TODO] When set, individual commands are given to [blue]ffmpeg[/] to export each stream. Otherwise, "
Expand All @@ -165,14 +166,15 @@ def GetArgs() -> AppArgs:

output_group.add_argument(
"--overwrite-existing",
"--overwrite",
default=False,
action="store_true",
help="Overwrite a file if it already exists. (Default: False)",
)

output_group.add_argument(
"--no-output-subdirs",
default=True,
default=False,
action="store_true",
help="Do not create subdirectories for each video's extracted audio tracks in the output directory. "
"(Default: True)",
Expand Down Expand Up @@ -204,6 +206,16 @@ def GetArgs() -> AppArgs:
"Options to help debug the application.",
)

debug_group.add_argument(
"--run-synchronously",
"--sync",
default=False,
action="store_true",
help="[TODO] Run each each job in order. This should reduce the CPU workload, but may increase runtime. A "
"'job' is per file input, regardless of whether ffmpeg commands are merged (see: "
"`--output-streams-separately`). (Default: False)",
)

debug_group.add_argument(
"--logging",
default=False,
Expand All @@ -227,6 +239,7 @@ def GetArgs() -> AppArgs:

debug_group.add_argument(
"--show-ffmpeg-cmd",
"--cmds",
default=False,
action="store_true",
help="[TODO] Print to the console the generated ffmpeg command. (Default: False)",
Expand Down Expand Up @@ -269,6 +282,7 @@ def GetArgs() -> AppArgs:
trim=args.trim,
print_args=args.print_args,
show_ffmpeg_cmd=args.show_ffmpeg_cmd,
run_synchronously=args.run_synchronously,
)

app_args = AppArgs.model_validate(
Expand Down
163 changes: 106 additions & 57 deletions src/BAET/AudioExtractor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import contextlib
import getpass
import sys
from pathlib import Path
from subprocess import Popen

import ffmpeg
from ffmpeg.nodes import InputNode
from rich.markdown import Markdown

from BAET.AppArgs import AppArgs
Expand All @@ -16,6 +20,31 @@ def print_ffmpeg_cmd(output):
cmd = Markdown(f"```console\n{md_user} {compiled}\n```")

console.print(cmd)
console.print()


@contextlib.contextmanager
def probe_audio_streams(file: Path):
try:
info_logger.info('Probing "%s"', file)
probe = ffmpeg.probe(file)

audio_streams = [
stream
for stream in probe["streams"]
if "codec_type" in stream and stream["codec_type"] == "audio"
]

if not audio_streams:
raise ValueError("No audio streams found")

info_logger.info("Found %d audio streams", len(audio_streams))
yield audio_streams

except (ffmpeg.Error, ValueError) as e:
info_logger.critical("%s: %s", type(e).__name__, e)
error_console.print_exception()
raise e


class AudioExtractor:
Expand All @@ -37,77 +66,97 @@ def __init__(
)

def extract(self):
try:
audio_streams = self.probe_audio_stream()
info_logger.info("Found %d audio streams", len(audio_streams))
except ffmpeg.Error as e:
info_logger.critical("Found no audio streams")
error_console.print(e.stderr)
return None
except ValueError as e:
info_logger.critical("Found no audio streams")
error_console.print(e)
return None

ffmpeg_input = ffmpeg.input(str(self.file))

info_logger.info("Creating ffmpeg command to extract each stream")

outputs = []
for stream in audio_streams:
out = self.create_output(ffmpeg_input, stream)
outputs.append(out)
ffmpeg_input: InputNode = ffmpeg.input(self.file)
workers = []

with probe_audio_streams(self.file) as audio_streams:
# Check: does the indexing of audio stream relate to ffprobe index
# or to the index of the collected audio streams in ffmpeg_input["a:index"]?
for index, stream in enumerate(audio_streams):
# index = stream["index"]

out = self._create_workers(
ffmpeg_input,
index,
stream["sample_rate"] or self.output_configuration.sample_rate,
)

workers.append(out)

info_logger.info("Extracting audio to %s", self.output_dir)

if not self.output_configuration.output_streams_separately:
output = ffmpeg.merge_outputs(*outputs)
self._run(output)
# if not self.output_configuration.output_streams_separately:
# output = ffmpeg.merge_outputs(*worker_pairs)
# self._run(output)
# return
#
# for output in worker_pairs:
# self._run(output)

try:
for output in workers:
self._run_workers(output)
except Exception as e:
info_logger.critical("%s: %s", type(e).__name__, e)
error_console.print_exception()
raise e

def _run_workers(self, ffmpeg_output):
# if self.debug_options.show_ffmpeg_cmd:
# print_ffmpeg_cmd(ffmpeg_write)

if self.debug_options.dry_run:
return

for output in outputs:
self._run(output)
proc: Popen[bytes] = ffmpeg_output.run_async(pipe_stdout=True)

def _run(self, output):
if self.output_configuration.overwrite_existing:
output = ffmpeg.overwrite_output(output)
if self.debug_options.show_ffmpeg_cmd:
print(output)
if not self.debug_options.dry_run:
output.run()
while proc.poll() is None:
output = proc.stdout.readline().decode("utf-8")
console.print(output)

if proc.returncode != 0:
info_logger.critical("Return code: %s", proc.returncode)
sys.exit(-1)
# todo: hangs here :(

# progress_text = input_data.decode("utf-8")
# console.print(progress_text)

# proc_write.stdin.write(input_data)

def create_output(self, input_file, stream):
index = stream["index"]
audio_stream = input_file[f"a:{index - 1}"]
# Look for "frame=xx"
# if progress_text.startswith("frame="):
# frame = int(progress_text.partition("=")[-1]) # Get the frame number
# console.print(f"q: {frame}")

proc.wait()

def _create_workers(
self, ffmpeg_input: InputNode, stream_index: int, sample_rate: int
):
self.output_dir.mkdir(parents=True, exist_ok=True)

filename = (
f"{self.file.stem}_track{index}.{self.output_configuration.file_type}"
output_filename = Path(
f"{self.file.stem}_track{stream_index}.{self.output_configuration.file_type}"
)

info_logger.info("Creating output file %s", str(self.output_dir / filename))

return ffmpeg.output(
audio_stream,
str(self.output_dir / filename),
acodec=self.output_configuration.acodec,
audio_bitrate=(
stream["sample_rate"] or self.output_configuration.fallback_sample_rate
),
format=self.output_configuration.file_type,
# TODO: MOVE
info_logger.info(
"Creating output dir %s", self.output_dir / output_filename.stem
)

def probe_audio_stream(self):
probe = ffmpeg.probe(self.file)
opt_kwargs = {
"acodec": self.output_configuration.acodec,
"audio_bitrate": sample_rate
or self.output_configuration.fallback_sample_rate,
"format": self.output_configuration.file_type,
}

audio_streams = [
stream
for stream in probe["streams"]
if "codec_type" in stream and stream["codec_type"] == "audio"
]
output = ffmpeg_input[f"a:{stream_index}"].output(
str(output_filename), **opt_kwargs
)

if not audio_streams:
raise ValueError("No audio streams found")
if self.output_configuration.overwrite_existing:
output = output.overwrite_output()

return audio_streams
return output
Loading

0 comments on commit 69ca02e

Please sign in to comment.