Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preference or avoidance per language type (hearing impaired, foreign only) #1175

Merged
merged 8 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/1175.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add cli option to prefer or disfavor hearing impaired (-hi/-HI) or foreign only (-fo/-FO) subtitles.
1 change: 1 addition & 0 deletions docs/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ provider = ["addic7ed", "opensubtitlescom", "opensubtitles"]
refiner = ["metadata", "hash", "omdb"]
ignore_refiner = ["tmdb"]
language = ["fr", "en", "pt-br"]
foreign_only = false
encoding = "utf-8"
min_score = 50
archives = true
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,8 @@ extend-ignore-re = [
"(?Rm)^.*#\\s*spellchecker:\\s*disable-line$",
"#\\s*spellchecker:off\\s*\\n.*\\n\\s*#\\s*spellchecker:on"
]
[tool.typos.default.extend-words]
fo = "fo"
[tool.typos.default.extend-identifiers]
tha = "tha"
bre = "bre"
67 changes: 57 additions & 10 deletions subliminal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
)
from subliminal.core import ARCHIVE_EXTENSIONS, scan_name, search_external_subtitles
from subliminal.extensions import get_default_providers, get_default_refiners
from subliminal.score import match_hearing_impaired
from subliminal.utils import merge_extend_and_ignore_unions

if TYPE_CHECKING:
Expand Down Expand Up @@ -142,6 +141,12 @@ def configure(ctx: click.Context, param: click.Parameter | None, filename: str |

# make download options
download_dict = toml_dict.setdefault('download', {})
# handle language types
for lt in ('hearing_impaired', 'foreign_only'):
# if an option was defined in the config file, make it a tuple, the expected type
if lt in download_dict and (isinstance(download_dict[lt], bool) or download_dict[lt] is None):
download_dict[lt] = (download_dict[lt],)

# remove the provider and refiner lists to select, extend and ignore
provider_lists = {
'select': download_dict.pop('provider', []),
Expand Down Expand Up @@ -411,7 +416,42 @@ def cache(ctx: click.Context, clear_subliminal: bool) -> None:
),
)
@click.option('-f', '--force', is_flag=True, default=False, help='Force download even if a subtitle already exist.')
@click.option('-hi', '--hearing-impaired', is_flag=True, default=False, help='Prefer hearing impaired subtitles.')
@click.option(
'-fo',
'--foreign-only',
'foreign_only',
is_flag=True,
flag_value=True,
multiple=True,
help='Prefer foreign-only subtitles.',
)
@click.option(
'-FO',
'--no-foreign-only',
'foreign_only',
is_flag=True,
flag_value=False,
multiple=True,
help='Disfavor foreign-only subtitles.',
)
@click.option(
'-hi',
'--hearing-impaired',
'hearing_impaired',
is_flag=True,
flag_value=True,
multiple=True,
help='Prefer hearing-impaired subtitles.',
)
@click.option(
'-HI',
'--no-hearing-impaired',
'hearing_impaired',
is_flag=True,
flag_value=False,
multiple=True,
help='Disfavor hearing-impaired subtitles.',
)
@click.option(
'-m',
'--min-score',
Expand All @@ -423,7 +463,7 @@ def cache(ctx: click.Context, clear_subliminal: bool) -> None:
'--language-type-suffix',
is_flag=True,
default=False,
help='Add a suffix to the saved subtitle name to indicate a hearing impaired or foreign part subtitle.',
help='Add a suffix to the saved subtitle name to indicate a hearing impaired or foreign only subtitle.',
)
@click.option(
'--language-format',
Expand Down Expand Up @@ -468,7 +508,8 @@ def download(
original_encoding: bool,
single: bool,
force: bool,
hearing_impaired: bool,
hearing_impaired: tuple[bool | None, ...],
foreign_only: tuple[bool | None, ...],
min_score: int,
language_type_suffix: bool,
language_format: str,
Expand Down Expand Up @@ -496,6 +537,14 @@ def download(
elif encoding is None:
encoding = 'utf-8'

# language_type
hearing_impaired_flag: bool | None = None
if len(hearing_impaired) > 0:
hearing_impaired_flag = hearing_impaired[-1]
foreign_only_flag: bool | None = None
if len(foreign_only) > 0:
foreign_only_flag = foreign_only[-1]

debug = obj.get('debug', False)
if debug:
verbose = 3
Expand Down Expand Up @@ -649,7 +698,8 @@ def download(
v,
language_set,
min_score=scores['hash'] * min_score // 100,
hearing_impaired=hearing_impaired,
hearing_impaired=hearing_impaired_flag,
foreign_only=foreign_only_flag,
only_one=single,
ignore_subtitles=ignore_subtitles,
)
Expand Down Expand Up @@ -701,11 +751,8 @@ def download(
else:
score_color = 'green'

# scale score from 0 to 100 taking out preferences
scaled_score = score
if match_hearing_impaired(s, hearing_impaired=hearing_impaired):
scaled_score -= scores['hearing_impaired']
scaled_score *= 100 / scores['hash']
# scale score from 0 to 100
scaled_score = score * 100 / scores['hash']

# echo some nice colored output
language_str = (
Expand Down
35 changes: 25 additions & 10 deletions subliminal/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
refiner_manager,
)
from .score import compute_score as default_compute_score
from .subtitle import SUBTITLE_EXTENSIONS, Subtitle
from .subtitle import SUBTITLE_EXTENSIONS, LanguageType, Subtitle
from .utils import get_age, handle_exception
from .video import VIDEO_EXTENSIONS, Episode, Movie, Video

Expand Down Expand Up @@ -148,7 +148,7 @@ def list_subtitles_provider(self, provider: str, video: Video, languages: Set[La
try:
return self[provider].list_subtitles(video, provider_languages)
except Exception as e: # noqa: BLE001
handle_exception(e, 'Provider {provider}')
handle_exception(e, f'Provider {provider}')

return []

Expand Down Expand Up @@ -220,7 +220,8 @@ def download_best_subtitles(
languages: Set[Language],
*,
min_score: int = 0,
hearing_impaired: bool = False,
hearing_impaired: bool | None = None,
foreign_only: bool | None = None,
only_one: bool = False,
compute_score: ComputeScore | None = None,
ignore_subtitles: Sequence[str] | None = None,
Expand All @@ -234,10 +235,11 @@ def download_best_subtitles(
:param languages: languages to download.
:type languages: set of :class:`~babelfish.language.Language`
:param int min_score: minimum score for a subtitle to be downloaded.
:param bool hearing_impaired: hearing impaired preference.
:param (bool | None) hearing_impaired: hearing impaired preference (yes/no/indifferent).
:param (bool | None) foreign_only: foreign only preference (yes/no/indifferent).
:param bool only_one: download only one subtitle, not one per language.
:param compute_score: function that takes `subtitle` and `video` as positional arguments,
`hearing_impaired` as keyword argument and returns the score.
and returns the score.
:param ignore_subtitles: list of subtitle ids to ignore (None defaults to an empty list).
:return: downloaded subtitles.
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
Expand All @@ -249,9 +251,19 @@ def download_best_subtitles(
# ignore subtitles
subtitles = [s for s in subtitles if s.id not in ignore_subtitles]

# sort by hearing impaired and foreign only
language_type = LanguageType.from_flags(hearing_impaired=hearing_impaired, foreign_only=foreign_only)
if language_type != LanguageType.UNKNOWN:
logger.info('Sort subtitles by %s types first', language_type.value)
subtitles = sorted(
subtitles,
key=lambda s: s.language_type == language_type,
reverse=True,
)

# sort subtitles by score
scored_subtitles = sorted(
[(s, compute_score(s, video, hearing_impaired=hearing_impaired)) for s in subtitles],
[(s, compute_score(s, video)) for s in subtitles],
key=operator.itemgetter(1),
reverse=True,
)
Expand Down Expand Up @@ -411,7 +423,7 @@ def parse_subtitle_filename(subtitle_filename: str, video_filename: str) -> Subt
except (ValueError, LanguageReverseError):
logger.exception('Cannot parse language code %r', language_code)

# TODO: extract the hearing_impaired or forced attribute
# TODO: extract the hearing_impaired or foreign_only attribute

return Subtitle(language, subtitle_id=subtitle_filename)

Expand Down Expand Up @@ -775,7 +787,8 @@ def download_best_subtitles(
languages: Set[Language],
*,
min_score: int = 0,
hearing_impaired: bool = False,
hearing_impaired: bool | None = None,
foreign_only: bool | None = None,
only_one: bool = False,
compute_score: ComputeScore | None = None,
pool_class: type[ProviderPool] = ProviderPool,
Expand All @@ -790,7 +803,8 @@ def download_best_subtitles(
:param languages: languages to download.
:type languages: set of :class:`~babelfish.language.Language`
:param int min_score: minimum score for a subtitle to be downloaded.
:param bool hearing_impaired: hearing impaired preference.
:param (bool | None) hearing_impaired: hearing impaired preference (yes/no/indifferent).
:param (bool | None) foreign_only: foreign only preference (yes/no/indifferent).
:param bool only_one: download only one subtitle, not one per language.
:param compute_score: function that takes `subtitle` and `video` as positional arguments,
`hearing_impaired` as keyword argument and returns the score.
Expand Down Expand Up @@ -825,6 +839,7 @@ def download_best_subtitles(
languages,
min_score=min_score,
hearing_impaired=hearing_impaired,
foreign_only=foreign_only,
only_one=only_one,
compute_score=compute_score,
)
Expand Down Expand Up @@ -861,7 +876,7 @@ def save_subtitles(
:param str directory: path to directory where to save the subtitles, default is next to the video.
:param str encoding: encoding in which to save the subtitles, default is to keep original encoding.
:param (str | None) extension: the subtitle extension, default is to match to the subtitle format.
:param bool language_type_suffix: add a suffix 'hi' or 'forced' if needed. Default to False.
:param bool language_type_suffix: add a suffix 'hi' or 'fo' if needed. Default to False.
:param str language_format: format of the language suffix. Default to 'alpha2'.
:return: the saved subtitles
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
Expand Down
12 changes: 11 additions & 1 deletion subliminal/providers/opensubtitlescom.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ def __init__(
subtitle_id: str,
*,
hearing_impaired: bool = False,
foreign_only: bool = False,
movie_kind: str | None = None,
release: str | None = None,
movie_title: str | None = None,
Expand All @@ -199,7 +200,14 @@ def __init__(
file_id: int = 0,
file_name: str = '',
) -> None:
super().__init__(language, subtitle_id, hearing_impaired=hearing_impaired, page_link=None, encoding='utf-8')
super().__init__(
language,
subtitle_id,
hearing_impaired=hearing_impaired,
foreign_only=foreign_only,
page_link=None,
encoding='utf-8',
)
self.movie_kind = movie_kind
self.release = release
self.movie_title = movie_title
Expand Down Expand Up @@ -235,6 +243,7 @@ def from_response(
attributes = response.get('attributes', {})
language = Language.fromopensubtitlescom(str(attributes.get('language')))
hearing_impaired = bool(int(attributes.get('hearing_impaired')))
foreign_only = bool(int(attributes.get('foreign_parts_only')))
release = str(attributes.get('release'))
moviehash_match = bool(attributes.get('moviehash_match', False))
download_count = int(attributes.get('download_count'))
Expand Down Expand Up @@ -266,6 +275,7 @@ def from_response(
language,
subtitle_id,
hearing_impaired=hearing_impaired,
foreign_only=foreign_only,
movie_kind=movie_kind,
release=release,
movie_title=movie_title,
Expand Down
16 changes: 5 additions & 11 deletions subliminal/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
class ComputeScore(Protocol):
"""Compute the score of a subtitle matching a video."""

def __call__(self, subtitle: Subtitle, video: Video, *, hearing_impaired: bool | None) -> int: ... # noqa: D102
def __call__(self, subtitle: Subtitle, video: Video) -> int: ... # noqa: D102


# Check if sympy is installed (for tests)
Expand Down Expand Up @@ -141,8 +141,8 @@ def match_hearing_impaired(subtitle: Subtitle, *, hearing_impaired: bool | None
)


def compute_score(subtitle: Subtitle, video: Video, *, hearing_impaired: bool | None = None) -> int:
"""Compute the score of the `subtitle` against the `video` with `hearing_impaired` preference.
def compute_score(subtitle: Subtitle, video: Video, **kwargs: Any) -> int:
"""Compute the score of the `subtitle` against the `video`.

:func:`compute_score` uses the :meth:`Subtitle.get_matches <subliminal.subtitle.Subtitle.get_matches>` method and
applies the scores (either from :data:`episode_scores` or :data:`movie_scores`) after some processing.
Expand All @@ -151,12 +151,11 @@ def compute_score(subtitle: Subtitle, video: Video, *, hearing_impaired: bool |
:type subtitle: :class:`~subliminal.subtitle.Subtitle`
:param video: the video to compute the score against.
:type video: :class:`~subliminal.video.Video`
:param (bool | None) hearing_impaired: hearing impaired preference (None if no preference).
:return: score of the subtitle.
:rtype: int

"""
logger.info('Computing score of %r for video %r with %r', subtitle, video, {'hearing_impaired': hearing_impaired})
logger.info('Computing score of %r for video %r', subtitle, video)

# get the scores dict
scores = get_scores(video)
Expand Down Expand Up @@ -193,17 +192,12 @@ def compute_score(subtitle: Subtitle, video: Video, *, hearing_impaired: bool |
logger.debug('Adding imdb_id match equivalents')
matches |= {'title', 'year', 'country'}

# handle hearing impaired
if match_hearing_impaired(subtitle, hearing_impaired=hearing_impaired):
logger.debug('Matched hearing_impaired')
matches.add('hearing_impaired')

# compute the score
score = int(sum(scores.get(match, 0) for match in matches))
logger.info('Computed score %r with final matches %r', score, matches)

# ensure score is within valid bounds
max_score = scores['hash'] + scores['hearing_impaired']
max_score = scores['hash']
if not (0 <= score <= max_score): # pragma: no cover
logger.info('Clip score between 0 and %d: %d', max_score, score)
score = int(clip(score, 0, max_score))
Expand Down
Loading
Loading