Skip to content

Commit

Permalink
Add Provider.hash_video staticmethod (#1172)
Browse files Browse the repository at this point in the history
* Add Provider.hash_video staticmethod, to allow creating standalone providers

* update tests
  • Loading branch information
getzze authored Sep 25, 2024
1 parent 85028e0 commit 8e879cb
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 116 deletions.
1 change: 1 addition & 0 deletions changelog.d/1172.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add Provider.hash_video staticmethod, to allow creating standalone providers.
6 changes: 6 additions & 0 deletions subliminal/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from subliminal.video import Episode, Movie, Video

if TYPE_CHECKING:
import os
from collections.abc import Sequence, Set
from http.client import HTTPSConnection
from types import TracebackType
Expand Down Expand Up @@ -144,6 +145,11 @@ class Provider(Generic[S]):
#: User Agent to use
user_agent: str = f'Subliminal/{__short_version__}'

@staticmethod
def hash_video(video_path: str | os.PathLike) -> str | None:
"""Hash the video to be used by the provider."""
return None

def __enter__(self) -> Self:
self.initialize()
return self
Expand Down
36 changes: 36 additions & 0 deletions subliminal/providers/bsplayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
from __future__ import annotations

import logging
import os
import re
import secrets
import struct
import zlib
from time import sleep
from typing import TYPE_CHECKING, ClassVar, cast, overload
Expand Down Expand Up @@ -194,6 +196,40 @@ def __init__(self, search_url: str | None = None, timeout: int = 10) -> None:
self.session = Session()
self.search_url = search_url or get_sub_domain()

@staticmethod
def hash_video(video_path: str | os.PathLike) -> str | None:
"""Compute a hash using BSPlayer's algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str.
"""
little_endian_long_long = '<q' # little-endian long long
byte_size = struct.calcsize(little_endian_long_long)

with open(video_path, 'rb') as f:
file_size = os.path.getsize(video_path)
file_hash = file_size

if file_size < 65536 * 2:
return None

for _ in range(65536 // byte_size):
buff = f.read(byte_size)
(l_value,) = struct.unpack(little_endian_long_long, buff)
file_hash += l_value
file_hash &= 0xFFFFFFFFFFFFFFFF # to remain as 64bit number

f.seek(max(0, file_size - 65536), 0)

for _ in range(65536 // byte_size):
buff = f.read(byte_size)
(l_value,) = struct.unpack(little_endian_long_long, buff)
file_hash += l_value
file_hash &= 0xFFFFFFFFFFFFFFFF

return f'{file_hash:016x}'

def _api_request(self, func_name: str = 'logIn', params: str = '', tries: int = 5) -> Element:
"""Request data from search url.
Expand Down
16 changes: 16 additions & 0 deletions subliminal/providers/napiprojekt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import hashlib
import io
import logging
from gzip import BadGzipFile, GzipFile
Expand All @@ -16,6 +17,7 @@
from . import Provider

if TYPE_CHECKING:
import os
from collections.abc import Set

from subliminal.video import Video
Expand Down Expand Up @@ -96,6 +98,20 @@ def __init__(self, *, timeout: int = 10) -> None:
self.timeout = timeout
self.session = None

@staticmethod
def hash_video(video_path: str | os.PathLike) -> str | None:
"""Compute a hash using NapiProjekt's algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str
"""
readsize = 1024 * 1024 * 10
with open(video_path, 'rb') as f:
data = f.read(readsize)
return hashlib.md5(data).hexdigest() # noqa: S324

def initialize(self) -> None:
"""Initialize the provider."""
self.session = Session()
Expand Down
105 changes: 8 additions & 97 deletions subliminal/refiners/hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from __future__ import annotations

import hashlib
import logging
import os
import struct
Expand All @@ -24,40 +23,6 @@
logger = logging.getLogger(__name__)


def hash_bsplayer(video_path: str | os.PathLike) -> str | None:
"""Compute a hash using BSPlayer's algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str.
"""
little_endian_long_long = '<q' # little-endian long long
byte_size = struct.calcsize(little_endian_long_long)

with open(video_path, 'rb') as f:
file_size = os.path.getsize(video_path)
file_hash = file_size

if file_size < 65536 * 2:
return None

for _ in range(65536 // byte_size):
buff = f.read(byte_size)
(l_value,) = struct.unpack(little_endian_long_long, buff)
file_hash += l_value
file_hash &= 0xFFFFFFFFFFFFFFFF # to remain as 64bit number

f.seek(max(0, file_size - 65536), 0)

for _ in range(65536 // byte_size):
buff = f.read(byte_size)
(l_value,) = struct.unpack(little_endian_long_long, buff)
file_hash += l_value
file_hash &= 0xFFFFFFFFFFFFFFFF

return f'{file_hash:016x}'


def hash_opensubtitles(video_path: str | os.PathLike) -> str | None:
"""Compute a hash using OpenSubtitles' algorithm.
Expand Down Expand Up @@ -87,69 +52,11 @@ def hash_opensubtitles(video_path: str | os.PathLike) -> str | None:
return f'{filehash:016x}'


def hash_thesubdb(video_path: str | os.PathLike) -> str | None: # pragma: no cover
"""Compute a hash using TheSubDB's algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str
"""
readsize = 64 * 1024
if os.path.getsize(video_path) < readsize:
return None
with open(video_path, 'rb') as f:
data = f.read(readsize)
f.seek(-readsize, os.SEEK_END)
data += f.read(readsize)

return hashlib.md5(data).hexdigest() # noqa: S324


def hash_napiprojekt(video_path: str | os.PathLike) -> str | None:
"""Compute a hash using NapiProjekt's algorithm.
:param str video_path: path of the video.
:return: the hash.
:rtype: str
"""
readsize = 1024 * 1024 * 10
with open(video_path, 'rb') as f:
data = f.read(readsize)
return hashlib.md5(data).hexdigest() # noqa: S324


def hash_shooter(video_path: str | os.PathLike) -> str | None: # pragma: no cover
"""Compute a hash using Shooter's algorithm.
:param string video_path: path of the video
:return: the hash
:rtype: string
"""
filesize = os.path.getsize(video_path)
readsize = 4096
if os.path.getsize(video_path) < readsize * 2:
return None
offsets = (readsize, filesize // 3 * 2, filesize // 3, filesize - readsize * 2)
filehash = []
with open(video_path, 'rb') as f:
for offset in offsets:
f.seek(offset)
filehash.append(hashlib.md5(f.read(readsize)).hexdigest()) # noqa: S324
return ';'.join(filehash)


hash_functions: dict[str, HashFunc] = {
'bsplayer': hash_bsplayer,
'napiprojekt': hash_napiprojekt,
'opensubtitles': hash_opensubtitles,
'opensubtitlesvip': hash_opensubtitles,
'opensubtitlescom': hash_opensubtitles,
'opensubtitlescomvip': hash_opensubtitles,
'shooter': hash_shooter,
'thesubdb': hash_thesubdb,
}


Expand All @@ -174,16 +81,20 @@ def refine(
logger.debug('Computing hashes for %r', video.name)
for name in providers or default_providers:
provider = cast(Provider, provider_manager[name].plugin)
if name not in hash_functions:
continue

if not provider.check_types(video):
continue

if languages is not None and not provider.check_languages(languages):
continue

h = hash_functions[name](video.name)
# Try provider static method
h = provider.hash_video(video.name)

# Try generic hashes
if h is None and name in hash_functions:
h = hash_functions[name](video.name)

# Add hash
if h is not None:
video.hashes[name] = h

Expand Down
9 changes: 9 additions & 0 deletions tests/providers/test_bsplayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@
SEARCH_URL = 'http://s1.api.bsplayer-subtitles.com/v1.php'


def test_hash_bsplayer(mkv):
assert BSPlayerProvider.hash_video(mkv['test1']) == '40b44a7096b71ec3'


def test_hash_bsplayer_too_small(tmpdir):
path = tmpdir.ensure('test_too_small.mkv')
assert BSPlayerProvider.hash_video(str(path)) is None


def test_get_matches_movie_hash(episodes):
subtitle = BSPlayerSubtitle(
subtitle_id='16442520',
Expand Down
4 changes: 4 additions & 0 deletions tests/providers/test_napiprojekt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
)


def test_hash_napiprojekt(mkv):
assert NapiProjektProvider.hash_video(mkv['test1']) == '9884a2b66dcb2965d0f45ce84e37b60c'


def test_get_matches(movies):
subtitle = NapiProjektSubtitle(Language('pol'), '6303e7ee6a835e9fcede9fb2fb00cb36')
matches = subtitle.get_matches(movies['man_of_steel'])
Expand Down
20 changes: 1 addition & 19 deletions tests/refiners/test_hash.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,4 @@
from subliminal.refiners.hash import hash_bsplayer, hash_opensubtitles, hash_thesubdb


def test_hash_bsplayer(mkv):
assert hash_bsplayer(mkv['test1']) == '40b44a7096b71ec3'


def test_hash_bsplayer_too_small(tmpdir):
path = tmpdir.ensure('test_too_small.mkv')
assert hash_bsplayer(str(path)) is None
from subliminal.refiners.hash import hash_opensubtitles


def test_hash_opensubtitles(mkv):
Expand All @@ -17,12 +8,3 @@ def test_hash_opensubtitles(mkv):
def test_hash_opensubtitles_too_small(tmpdir):
path = tmpdir.ensure('test_too_small.mkv')
assert hash_opensubtitles(str(path)) is None


def test_hash_thesubdb(mkv):
assert hash_thesubdb(mkv['test1']) == '054e667e93e254f8fa9f9e8e6d4e73ff'


def test_hash_thesubdb_too_small(tmpdir):
path = tmpdir.ensure('test_too_small.mkv')
assert hash_thesubdb(str(path)) is None

0 comments on commit 8e879cb

Please sign in to comment.