Skip to content

Commit

Permalink
Add subtitles attribute to Video (#1151)
Browse files Browse the repository at this point in the history
* use video.subtitles instead of video.subtitle_languages

* fix test due to a bug in enzyme

* add news fragment
  • Loading branch information
getzze authored Jul 12, 2024
1 parent 96c0165 commit 19b190d
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 67 deletions.
1 change: 1 addition & 0 deletions changelog.d/1151.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
add `subtitles` attribute to Video
8 changes: 3 additions & 5 deletions subliminal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def download(
errored_paths.append(p)
continue
if not force:
video.subtitle_languages |= set(search_external_subtitles(video.name, directory=directory).values())
video.subtitles |= set(search_external_subtitles(video.name, directory=directory).values())

if check_video(video, languages=language_set, age=age, undefined=single):
refine(
Expand All @@ -444,9 +444,7 @@ def download(
continue
for video in scanned_videos:
if not force:
video.subtitle_languages |= set(
search_external_subtitles(video.name, directory=directory).values()
)
video.subtitles |= set(search_external_subtitles(video.name, directory=directory).values())
if check_video(video, languages=language_set, age=age, undefined=single):
refine(
video,
Expand All @@ -470,7 +468,7 @@ def download(
errored_paths.append(p)
continue
if not force:
video.subtitle_languages |= set(search_external_subtitles(video.name, directory=directory).values())
video.subtitles |= set(search_external_subtitles(video.name, directory=directory).values())
if check_video(video, languages=language_set, age=age, undefined=single):
refine(
video,
Expand Down
41 changes: 27 additions & 14 deletions subliminal/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,11 +368,34 @@ def check_video(
return True


def parse_subtitle_filename(subtitle_filename: str, video_filename: str) -> Subtitle | None:
"""Parse the subtitle filename to extract the language."""
fileroot, fileext = os.path.splitext(video_filename)

# keep only valid subtitle filenames
if not subtitle_filename.startswith(fileroot) or not subtitle_filename.lower().endswith(SUBTITLE_EXTENSIONS):
return None

# extract the potential language code
language = Language('und')
language_code = subtitle_filename[len(fileroot) : -len(os.path.splitext(subtitle_filename)[1])]
language_code = language_code.replace(fileext, '').replace('_', '-')[1:]
if language_code:
try:
language = Language.fromietf(language_code)
except (ValueError, LanguageReverseError):
logger.exception('Cannot parse language code %r', language_code)

# TODO: extract the hearing_impaired or forced attribute

return Subtitle(language, subtitle_id=subtitle_filename)


def search_external_subtitles(
path: str | os.PathLike,
*,
directory: str | os.PathLike | None = None,
) -> dict[str, Language]:
) -> dict[str, Subtitle]:
"""Search for external subtitles from a video `path` and their associated language.
Unless `directory` is provided, search will be made in the same directory as the video file.
Expand All @@ -386,25 +409,15 @@ def search_external_subtitles(
# split path
dirpath, filename = os.path.split(path)
dirpath = dirpath or '.'
fileroot, fileext = os.path.splitext(filename)

# search for subtitles
subtitles = {}
for p in os.listdir(directory or dirpath):
# keep only valid subtitle filenames
if not p.startswith(fileroot) or not p.lower().endswith(SUBTITLE_EXTENSIONS):
subtitle = parse_subtitle_filename(p, filename)
if subtitle is None:
continue

# extract the potential language code
language = Language('und')
language_code = p[len(fileroot) : -len(os.path.splitext(p)[1])].replace(fileext, '').replace('_', '-')[1:]
if language_code:
try:
language = Language.fromietf(language_code)
except (ValueError, LanguageReverseError):
logger.exception('Cannot parse language code %r', language_code)

subtitles[p] = language
subtitles[p] = subtitle

logger.debug('Found subtitles %r', subtitles)

Expand Down
6 changes: 4 additions & 2 deletions subliminal/refiners/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from babelfish import Language # type: ignore[import-untyped]
from enzyme import MKV # type: ignore[import-untyped]

from subliminal.subtitle import EmbeddedSubtitle

if TYPE_CHECKING:
from subliminal.video import Video

Expand All @@ -24,7 +26,7 @@ def refine(video: Video, *, embedded_subtitles: bool = True, **kwargs: Any) -> V
* :attr:`~subliminal.video.Video.resolution`
* :attr:`~subliminal.video.Video.video_codec`
* :attr:`~subliminal.video.Video.audio_codec`
* :attr:`~subliminal.video.Video.subtitle_languages`
* :attr:`~subliminal.video.Video.subtitles`
:param bool embedded_subtitles: search for embedded subtitles.
Expand Down Expand Up @@ -106,7 +108,7 @@ def refine(video: Video, *, embedded_subtitles: bool = True, **kwargs: Any) -> V
else:
embedded_subtitle_languages.add(Language('und'))
logger.debug('Found embedded subtitle %r', embedded_subtitle_languages)
video.subtitle_languages |= embedded_subtitle_languages
video.subtitles |= {EmbeddedSubtitle(lang) for lang in embedded_subtitle_languages}
else:
logger.debug('MKV has no subtitle track')

Expand Down
33 changes: 26 additions & 7 deletions subliminal/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

from babelfish import Country, Language # type: ignore[import-untyped]

from subliminal.subtitle import Subtitle

logger = logging.getLogger(__name__)

#: Video extensions
Expand Down Expand Up @@ -115,10 +117,12 @@ class Video:
:param str resolution: resolution of the video stream (480p, 720p, 1080p or 1080i).
:param str video_codec: codec of the video stream.
:param str audio_codec: codec of the main audio stream.
:param float frame_rate: frame rate in frames per seconds.
:param float duration: duration of the video in seconds.
:param dict hashes: hashes of the video file by provider names.
:param int size: size of the video file in bytes.
:param subtitle_languages: existing subtitle languages.
:type subtitle_languages: set[:class:`~babelfish.language.Language`]
:param subtitles: existing subtitles.
:type subtitles: set[:class:`~subliminal.subtitle.Subtitle`]
:param int year: year of the video.
:param country: Country of the video.
:type country: :class:`~babelfish.country.Country`
Expand Down Expand Up @@ -148,15 +152,18 @@ class Video:
#: Codec of the main audio stream
audio_codec: str | None

#: Frame rate in frame per seconds
frame_rate: float | None

#: Duration of the video in seconds
duration: float | None

#: Hashes of the video file by provider names
hashes: dict[str, str]

#: Size of the video file in bytes
size: int | None

#: Existing subtitle languages
subtitle_languages: set[Language]

#: Title of the video
title: str | None

Expand All @@ -172,6 +179,9 @@ class Video:
#: TMDB id of the video
tmdb_id: int | None

#: Existing subtitle languages
subtitles: set[Subtitle]

def __init__(
self,
name: str,
Expand All @@ -182,9 +192,11 @@ def __init__(
streaming_service: str | None = None,
video_codec: str | None = None,
audio_codec: str | None = None,
frame_rate: float | None = None,
duration: float | None = None,
hashes: Mapping[str, str] | None = None,
size: int | None = None,
subtitle_languages: Set[Language] | None = None,
subtitles: Set[Subtitle] | None = None,
title: str | None = None,
year: int | None = None,
country: Country | None = None,
Expand All @@ -198,9 +210,11 @@ def __init__(
self.resolution = resolution
self.video_codec = video_codec
self.audio_codec = audio_codec
self.frame_rate = frame_rate
self.duration = duration
self.hashes = dict(hashes) if hashes is not None else {}
self.size = size
self.subtitle_languages = set(subtitle_languages) if subtitle_languages is not None else set()
self.subtitles = set(subtitles) if subtitles is not None else set()
self.title = title
self.year = year
self.country = country
Expand All @@ -219,6 +233,11 @@ def age(self) -> timedelta:
return timedelta()
return datetime.now(timezone.utc) - datetime.fromtimestamp(os.path.getmtime(self.name), timezone.utc)

@property
def subtitle_languages(self) -> set[Language]:
"""Set of languages from the subtitles already found for the video."""
return {s.language for s in self.subtitles}

@classmethod
def fromguess(cls, name: str, guess: dict[str, Any]) -> Video:
"""Create an :class:`Episode` or a :class:`Movie` with the given `name` based on the `guess`.
Expand Down
31 changes: 31 additions & 0 deletions tests/refiners/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from babelfish import Language # type: ignore[import-untyped]
from subliminal.core import scan_video
from subliminal.refiners.metadata import refine
from subliminal.video import Movie


def test_refine_video_metadata(mkv):
scanned_video = scan_video(mkv['test5'])
refine(scanned_video, embedded_subtitles=True)

assert type(scanned_video) is Movie
assert scanned_video.name == mkv['test5']
assert scanned_video.source is None
assert scanned_video.release_group is None
assert scanned_video.resolution is None
assert scanned_video.video_codec == 'H.264'
assert scanned_video.audio_codec == 'AAC'
assert scanned_video.imdb_id is None
assert scanned_video.size == 31762747
assert scanned_video.subtitle_languages == {
# Language('eng'), # bug in enzyme
Language('spa'),
Language('deu'),
Language('jpn'),
Language('und'),
Language('ita'),
Language('fra'),
Language('hun'),
}
assert scanned_video.title == 'test5'
assert scanned_video.year is None
52 changes: 13 additions & 39 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
download_best_subtitles,
download_subtitles,
list_subtitles,
refine,
save_subtitles,
scan_archive,
scan_video,
Expand Down Expand Up @@ -115,7 +114,7 @@ def test_check_video_languages(movies):
video = movies['man_of_steel']
languages = {Language('fra'), Language('eng')}
assert check_video(video, languages=languages)
video.subtitle_languages = languages
video.subtitles = {Subtitle(lang) for lang in languages}
assert not check_video(video, languages=languages)


Expand All @@ -130,7 +129,7 @@ def test_check_video_undefined(movies):
video = movies['man_of_steel']
assert check_video(video, undefined=False)
assert check_video(video, undefined=True)
video.subtitle_languages = {Language('und')}
video.subtitles = {Subtitle(Language('und'))}
assert check_video(video, undefined=False)
assert not check_video(video, undefined=True)

Expand All @@ -153,7 +152,8 @@ def test_search_external_subtitles(episodes, tmpdir):
for path in expected_subtitles:
tmpdir.ensure(path)
subtitles = search_external_subtitles(video_path)
assert subtitles == expected_subtitles
subtitle_languages = {path: subtitle.language for path, subtitle in subtitles.items()}
assert subtitle_languages == expected_subtitles


def test_search_external_subtitles_archive(movies, tmpdir):
Expand All @@ -173,7 +173,8 @@ def test_search_external_subtitles_archive(movies, tmpdir):
for path in expected_subtitles:
tmpdir.ensure(path)
subtitles = search_external_subtitles(video_path)
assert subtitles == expected_subtitles
subtitle_languages = {path: subtitle.language for path, subtitle in subtitles.items()}
assert subtitle_languages == expected_subtitles


def test_search_external_subtitles_no_directory(movies, tmpdir, monkeypatch):
Expand All @@ -185,7 +186,8 @@ def test_search_external_subtitles_no_directory(movies, tmpdir, monkeypatch):
for path in expected_subtitles:
tmpdir.ensure(path)
subtitles = search_external_subtitles(video_name)
assert subtitles == expected_subtitles
subtitle_languages = {path: subtitle.language for path, subtitle in subtitles.items()}
assert subtitle_languages == expected_subtitles


def test_search_external_subtitles_in_directory(episodes, tmpdir):
Expand All @@ -198,7 +200,8 @@ def test_search_external_subtitles_in_directory(episodes, tmpdir):
for path in expected_subtitles:
tmpdir.ensure('subtitles', path)
subtitles = search_external_subtitles(video_name, directory=subtitles_directory)
assert subtitles == expected_subtitles
subtitle_languages = {path: subtitle.language for path, subtitle in subtitles.items()}
assert subtitle_languages == expected_subtitles


def test_scan_video_movie(movies, tmpdir, monkeypatch):
Expand Down Expand Up @@ -245,35 +248,6 @@ def test_scan_video_episode(episodes, tmpdir, monkeypatch):
assert scanned_video.tvdb_id is None


def test_refine_video_metadata(mkv):
scanned_video = scan_video(mkv['test5'])
refine(scanned_video, episode_refiners=('metadata',), movie_refiners=('metadata',))
assert type(scanned_video) is Movie
assert scanned_video.name == mkv['test5']
assert scanned_video.source is None
assert scanned_video.release_group is None
assert scanned_video.resolution is None
assert scanned_video.video_codec == 'H.264'
assert scanned_video.audio_codec == 'AAC'
assert scanned_video.imdb_id is None
assert scanned_video.hashes == {
'opensubtitlescom': '49e2530ea3bd0d18',
'opensubtitles': '49e2530ea3bd0d18',
}
assert scanned_video.size == 31762747
assert scanned_video.subtitle_languages == {
Language('spa'),
Language('deu'),
Language('jpn'),
Language('und'),
Language('ita'),
Language('fra'),
Language('hun'),
}
assert scanned_video.title == 'test5'
assert scanned_video.year is None


def test_scan_video_path_does_not_exist(movies):
with pytest.raises(ValueError) as excinfo:
scan_video(movies['man_of_steel'].name)
Expand Down Expand Up @@ -493,7 +467,7 @@ def test_list_subtitles_episode_no_hash(episodes):
def test_list_subtitles_no_language(episodes):
video = episodes['dallas_s01e03']
languages = {Language('eng')}
video.subtitle_languages = languages
video.subtitles = {Subtitle(lang) for lang in languages}

subtitles = list_subtitles({video}, languages)

Expand Down Expand Up @@ -567,7 +541,7 @@ def test_download_best_subtitles_min_score(episodes):
def test_download_best_subtitles_no_language(episodes):
video = episodes['bbt_s07e05']
languages = {Language('fra')}
video.subtitle_languages = languages
video.subtitles = {Subtitle(lang) for lang in languages}
providers = ['gestdown']

subtitles = download_best_subtitles({video}, languages, min_score=episode_scores['hash'], providers=providers)
Expand All @@ -578,7 +552,7 @@ def test_download_best_subtitles_no_language(episodes):
def test_download_best_subtitles_undefined(episodes):
video = episodes['bbt_s07e05']
languages = {Language('und')}
video.subtitle_languages = languages
video.subtitles = {Subtitle(lang) for lang in languages}
providers = ['gestdown']

subtitles = download_best_subtitles(
Expand Down

0 comments on commit 19b190d

Please sign in to comment.