Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

I've written a script that does VobSub extraction and conversion using VobSub2SRT #105

Open
guibregolin opened this issue Mar 6, 2024 · 2 comments

Comments

@guibregolin
Copy link

guibregolin commented Mar 6, 2024

I'd like to contribute it to the project, here's the script in full just in case:

import argparse
import subprocess
import os
import re

def read_language_from_idx(idx_file_path):
    with open(idx_file_path, 'r', encoding='ISO-8859-1') as file:
        content = file.read()
    match = re.search(r'^id: (\w+),', content, re.MULTILINE)
    if match:
        return match.group(1)
    return None

def extract_subtitles_with_mkvextract(video_file, output_folder):
    cmd_info = ["mkvmerge", "-i", video_file]
    result = subprocess.run(cmd_info, capture_output=True, text=True, check=True)
    tracks_info = result.stdout
    
    subtitle_tracks = re.findall(r"Track ID (\d+): subtitles", tracks_info)
    
    subtitle_files = []
    for track_id in subtitle_tracks:
        output_idx = os.path.join(output_folder, f"subtitles_track{track_id}.idx")
        cmd_extract = ["mkvextract", video_file, "tracks", f"{track_id}:{output_idx}"]
        subprocess.run(cmd_extract, check=True)
        
        output_sub = output_idx.replace('.idx', '.sub')
        if os.path.exists(output_idx) and os.path.exists(output_sub):
            subtitle_files.append((output_idx, output_sub))
    
    return subtitle_files

def filter_subtitles_by_language(subtitle_files, languages):
    filtered_files = []
    for idx_file, sub_file in subtitle_files:
        lang = read_language_from_idx(idx_file)
        if languages is None or lang in languages:
            filtered_files.append((idx_file, sub_file, lang))
    return filtered_files

def convert_subtitles_to_srt(subtitle_files, output_folder):
    srt_files_with_lang = []
    for idx_file, sub_file, lang in subtitle_files:
        base_name = os.path.splitext(idx_file)[0]
        vobsub2srt_cmd = ["vobsub2srt", base_name]
        subprocess.run(vobsub2srt_cmd, check=False)
        
        output_srt = f"{base_name}.srt"
        if os.path.exists(output_srt):
            srt_files_with_lang.append((output_srt, lang))  # Keep track of language
    return srt_files_with_lang


def mux_subtitles(video_file, subtitle_files, output_folder):
    temp_output_file = os.path.join(output_folder, "temp_output.mkv")
    
    cmd = ["mkvmerge", "-o", temp_output_file, video_file]
    for srt_file, lang in subtitle_files:  # Expect subtitle_files to include language code
        # Use ISO 639-2 language code if available, default to 'und' otherwise
        lang_code = 'und' if lang is None else lang
        cmd.extend(["--language", f"0:{lang_code}", srt_file])
    subprocess.run(cmd, check=True)
    
    os.replace(temp_output_file, video_file)


def process_video_files(video_files, output_folder, languages):
    for video_file in video_files:
        print(f"Processing {video_file} for languages: {languages}")
        
        os.makedirs(output_folder, exist_ok=True)
        
        extracted_subtitle_files = extract_subtitles_with_mkvextract(video_file, output_folder)
        if languages:
            languages_set = set(languages.split(','))
            filtered_subtitle_files = filter_subtitles_by_language(extracted_subtitle_files, languages_set)
        else:
            filtered_subtitle_files = [(idx, sub, None) for idx, sub in extracted_subtitle_files]
        
        srt_files_with_lang = convert_subtitles_to_srt(filtered_subtitle_files, output_folder)
        
        mux_subtitles(video_file, srt_files_with_lang, output_folder)
        
        # Cleanup step omitted for brevity

        print(f"Finished processing {video_file}. The original file has been updated with new SRT subtitles.")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert VobSub subtitles to SRT for specified languages and mux back into the MKV file.")
    parser.add_argument("video_files", nargs='+', help="Paths to video files to process")
    parser.add_argument("--output", default=".", help="Output folder for temporary files")
    parser.add_argument("--languages", help="Comma-separated list of ISO 639-1 language codes to convert (e.g., 'en,es'). Processes all languages if omitted.")
    args = parser.parse_args()

    process_video_files(args.video_files, args.output, args.languages)
@vredesbyyrd
Copy link

Thanks for this. Works well and comes in very handy for my media server import pipeline.

@milahu
Copy link

milahu commented Nov 22, 2024

please add a header like

#!/usr/bin/env python3

# convert vob subtitles from video files to srt subtitles
# https://github.com/ruediger/VobSub2SRT/issues/105
# SPDX-License-Identifier: MIT

by default, the input video files should be treated as read-only
and results should be written to srt files

the video files should be modified only with --write or -w

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants