From 8e879cbd8939bd1b083d8a615da4daffba55656c Mon Sep 17 00:00:00 2001 From: getzze Date: Wed, 25 Sep 2024 23:25:01 +0100 Subject: [PATCH] Add Provider.hash_video staticmethod (#1172) * Add Provider.hash_video staticmethod, to allow creating standalone providers * update tests --- changelog.d/1172.change.rst | 1 + subliminal/providers/__init__.py | 6 ++ subliminal/providers/bsplayer.py | 36 ++++++++++ subliminal/providers/napiprojekt.py | 16 +++++ subliminal/refiners/hash.py | 105 +++------------------------- tests/providers/test_bsplayer.py | 9 +++ tests/providers/test_napiprojekt.py | 4 ++ tests/refiners/test_hash.py | 20 +----- 8 files changed, 81 insertions(+), 116 deletions(-) create mode 100644 changelog.d/1172.change.rst diff --git a/changelog.d/1172.change.rst b/changelog.d/1172.change.rst new file mode 100644 index 00000000..a1e2ce70 --- /dev/null +++ b/changelog.d/1172.change.rst @@ -0,0 +1 @@ +Add Provider.hash_video staticmethod, to allow creating standalone providers. diff --git a/subliminal/providers/__init__.py b/subliminal/providers/__init__.py index dd0adf7e..eb4f8929 100644 --- a/subliminal/providers/__init__.py +++ b/subliminal/providers/__init__.py @@ -24,6 +24,7 @@ from subliminal.video import Episode, Movie, Video if TYPE_CHECKING: + import os from collections.abc import Sequence, Set from http.client import HTTPSConnection from types import TracebackType @@ -144,6 +145,11 @@ class Provider(Generic[S]): #: User Agent to use user_agent: str = f'Subliminal/{__short_version__}' + @staticmethod + def hash_video(video_path: str | os.PathLike) -> str | None: + """Hash the video to be used by the provider.""" + return None + def __enter__(self) -> Self: self.initialize() return self diff --git a/subliminal/providers/bsplayer.py b/subliminal/providers/bsplayer.py index 18deb3b2..9ddce92d 100644 --- a/subliminal/providers/bsplayer.py +++ b/subliminal/providers/bsplayer.py @@ -3,8 +3,10 @@ from __future__ import annotations import logging +import os import re import secrets +import struct import zlib from time import sleep from typing import TYPE_CHECKING, ClassVar, cast, overload @@ -194,6 +196,40 @@ def __init__(self, search_url: str | None = None, timeout: int = 10) -> None: self.session = Session() self.search_url = search_url or get_sub_domain() + @staticmethod + def hash_video(video_path: str | os.PathLike) -> str | None: + """Compute a hash using BSPlayer's algorithm. + + :param str video_path: path of the video. + :return: the hash. + :rtype: str. + """ + little_endian_long_long = ' Element: """Request data from search url. diff --git a/subliminal/providers/napiprojekt.py b/subliminal/providers/napiprojekt.py index 3fb7adbb..4adc759f 100644 --- a/subliminal/providers/napiprojekt.py +++ b/subliminal/providers/napiprojekt.py @@ -2,6 +2,7 @@ from __future__ import annotations +import hashlib import io import logging from gzip import BadGzipFile, GzipFile @@ -16,6 +17,7 @@ from . import Provider if TYPE_CHECKING: + import os from collections.abc import Set from subliminal.video import Video @@ -96,6 +98,20 @@ def __init__(self, *, timeout: int = 10) -> None: self.timeout = timeout self.session = None + @staticmethod + def hash_video(video_path: str | os.PathLike) -> str | None: + """Compute a hash using NapiProjekt's algorithm. + + :param str video_path: path of the video. + :return: the hash. + :rtype: str + + """ + readsize = 1024 * 1024 * 10 + with open(video_path, 'rb') as f: + data = f.read(readsize) + return hashlib.md5(data).hexdigest() # noqa: S324 + def initialize(self) -> None: """Initialize the provider.""" self.session = Session() diff --git a/subliminal/refiners/hash.py b/subliminal/refiners/hash.py index b14826cb..da2f73e0 100644 --- a/subliminal/refiners/hash.py +++ b/subliminal/refiners/hash.py @@ -2,7 +2,6 @@ from __future__ import annotations -import hashlib import logging import os import struct @@ -24,40 +23,6 @@ logger = logging.getLogger(__name__) -def hash_bsplayer(video_path: str | os.PathLike) -> str | None: - """Compute a hash using BSPlayer's algorithm. - - :param str video_path: path of the video. - :return: the hash. - :rtype: str. - """ - little_endian_long_long = ' str | None: """Compute a hash using OpenSubtitles' algorithm. @@ -87,69 +52,11 @@ def hash_opensubtitles(video_path: str | os.PathLike) -> str | None: return f'{filehash:016x}' -def hash_thesubdb(video_path: str | os.PathLike) -> str | None: # pragma: no cover - """Compute a hash using TheSubDB's algorithm. - - :param str video_path: path of the video. - :return: the hash. - :rtype: str - - """ - readsize = 64 * 1024 - if os.path.getsize(video_path) < readsize: - return None - with open(video_path, 'rb') as f: - data = f.read(readsize) - f.seek(-readsize, os.SEEK_END) - data += f.read(readsize) - - return hashlib.md5(data).hexdigest() # noqa: S324 - - -def hash_napiprojekt(video_path: str | os.PathLike) -> str | None: - """Compute a hash using NapiProjekt's algorithm. - - :param str video_path: path of the video. - :return: the hash. - :rtype: str - - """ - readsize = 1024 * 1024 * 10 - with open(video_path, 'rb') as f: - data = f.read(readsize) - return hashlib.md5(data).hexdigest() # noqa: S324 - - -def hash_shooter(video_path: str | os.PathLike) -> str | None: # pragma: no cover - """Compute a hash using Shooter's algorithm. - - :param string video_path: path of the video - :return: the hash - :rtype: string - - """ - filesize = os.path.getsize(video_path) - readsize = 4096 - if os.path.getsize(video_path) < readsize * 2: - return None - offsets = (readsize, filesize // 3 * 2, filesize // 3, filesize - readsize * 2) - filehash = [] - with open(video_path, 'rb') as f: - for offset in offsets: - f.seek(offset) - filehash.append(hashlib.md5(f.read(readsize)).hexdigest()) # noqa: S324 - return ';'.join(filehash) - - hash_functions: dict[str, HashFunc] = { - 'bsplayer': hash_bsplayer, - 'napiprojekt': hash_napiprojekt, 'opensubtitles': hash_opensubtitles, 'opensubtitlesvip': hash_opensubtitles, 'opensubtitlescom': hash_opensubtitles, 'opensubtitlescomvip': hash_opensubtitles, - 'shooter': hash_shooter, - 'thesubdb': hash_thesubdb, } @@ -174,16 +81,20 @@ def refine( logger.debug('Computing hashes for %r', video.name) for name in providers or default_providers: provider = cast(Provider, provider_manager[name].plugin) - if name not in hash_functions: - continue - if not provider.check_types(video): continue if languages is not None and not provider.check_languages(languages): continue - h = hash_functions[name](video.name) + # Try provider static method + h = provider.hash_video(video.name) + + # Try generic hashes + if h is None and name in hash_functions: + h = hash_functions[name](video.name) + + # Add hash if h is not None: video.hashes[name] = h diff --git a/tests/providers/test_bsplayer.py b/tests/providers/test_bsplayer.py index 3b044884..961dbf98 100644 --- a/tests/providers/test_bsplayer.py +++ b/tests/providers/test_bsplayer.py @@ -16,6 +16,15 @@ SEARCH_URL = 'http://s1.api.bsplayer-subtitles.com/v1.php' +def test_hash_bsplayer(mkv): + assert BSPlayerProvider.hash_video(mkv['test1']) == '40b44a7096b71ec3' + + +def test_hash_bsplayer_too_small(tmpdir): + path = tmpdir.ensure('test_too_small.mkv') + assert BSPlayerProvider.hash_video(str(path)) is None + + def test_get_matches_movie_hash(episodes): subtitle = BSPlayerSubtitle( subtitle_id='16442520', diff --git a/tests/providers/test_napiprojekt.py b/tests/providers/test_napiprojekt.py index c12bfea0..4d65fe33 100644 --- a/tests/providers/test_napiprojekt.py +++ b/tests/providers/test_napiprojekt.py @@ -13,6 +13,10 @@ ) +def test_hash_napiprojekt(mkv): + assert NapiProjektProvider.hash_video(mkv['test1']) == '9884a2b66dcb2965d0f45ce84e37b60c' + + def test_get_matches(movies): subtitle = NapiProjektSubtitle(Language('pol'), '6303e7ee6a835e9fcede9fb2fb00cb36') matches = subtitle.get_matches(movies['man_of_steel']) diff --git a/tests/refiners/test_hash.py b/tests/refiners/test_hash.py index 047ab437..62f0e1e6 100644 --- a/tests/refiners/test_hash.py +++ b/tests/refiners/test_hash.py @@ -1,13 +1,4 @@ -from subliminal.refiners.hash import hash_bsplayer, hash_opensubtitles, hash_thesubdb - - -def test_hash_bsplayer(mkv): - assert hash_bsplayer(mkv['test1']) == '40b44a7096b71ec3' - - -def test_hash_bsplayer_too_small(tmpdir): - path = tmpdir.ensure('test_too_small.mkv') - assert hash_bsplayer(str(path)) is None +from subliminal.refiners.hash import hash_opensubtitles def test_hash_opensubtitles(mkv): @@ -17,12 +8,3 @@ def test_hash_opensubtitles(mkv): def test_hash_opensubtitles_too_small(tmpdir): path = tmpdir.ensure('test_too_small.mkv') assert hash_opensubtitles(str(path)) is None - - -def test_hash_thesubdb(mkv): - assert hash_thesubdb(mkv['test1']) == '054e667e93e254f8fa9f9e8e6d4e73ff' - - -def test_hash_thesubdb_too_small(tmpdir): - path = tmpdir.ensure('test_too_small.mkv') - assert hash_thesubdb(str(path)) is None