Skip to content

Commit

Permalink
Merge pull request #5 from sirstudly/torbox_scraper
Browse files Browse the repository at this point in the history
Adds Torbox support as torrent search/scraper
  • Loading branch information
funkypenguin authored Oct 6, 2024
2 parents a87f9bb + b0a1404 commit 02bcde8
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 1 deletion.
3 changes: 2 additions & 1 deletion scraper/services/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from scraper.services import nyaa
from scraper.services import torrentio
from scraper.services import zilean
from scraper.services import torbox

#define subclass method
def __subclasses__():
return [rarbg,x1337,jackett,prowlarr,orionoid,nyaa,torrentio,zilean]
return [rarbg,x1337,jackett,prowlarr,orionoid,nyaa,torrentio,zilean,torbox]

active = ['torrentio']
overwrite = []
Expand Down
181 changes: 181 additions & 0 deletions scraper/services/torbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
from ui.ui_print import *
import releases
from functools import lru_cache
import time
import urllib.parse

name = "torbox"
timeout_sec = 30
default_cache_timeout = 120 # TTL cache in seconds when querying torbox
session = requests.Session()


def setup(cls, new=False):
from settings import settings_list
from scraper.services import active
settings = []
for category, allsettings in settings_list:
for setting in allsettings:
if setting.cls == cls:
settings += [setting]
if settings == []:
if cls.name not in active:
active += [cls.name]
back = False
if not new:
while not back:
print("0) Back")
indices = []
for index, setting in enumerate(settings):
print(str(index + 1) + ') ' + setting.name)
indices += [str(index + 1)]
print()
if settings == []:
print("Nothing to edit!")
print()
time.sleep(3)
return
choice = input("Choose an action: ")
if choice in indices:
settings[int(choice) - 1].input()
if not cls.name in active:
active += [cls.name]
back = True
elif choice == '0':
back = True
else:
print()


def scrape(query, altquery):
from scraper.services import active
ui_print("[torbox] searching for " + query + " accepting titles that regex match " + altquery, ui_settings.debug)
if 'torbox' not in active:
return []

matches_regex = altquery
if altquery == "(.*)":
matches_regex = query

# we need the imdb id when searching torrents on torbox
imdb_ids = (imdb_lookup(query) if not regex.search(r'tt[0-9]+', matches_regex, regex.I)
else ["imdb:" + regex.search(r'tt[0-9]+', matches_regex, regex.I).group()])

# store the search request so we can get better results in subsequent iterations (if necessary)
torbox_request(store_search, query)

# flatten and remove duplicates
return remove_duplicates(flatten(scrape_releases(imdb_id, matches_regex, altquery) for imdb_id in imdb_ids))


def scrape_releases(imdb_id, matches_regex, altquery):
opts = ['metadata=false']
if regex.search(r'(S[0-9]|complete|S\?[0-9])', matches_regex, regex.I):
s = (regex.search(r'(?<=S)([0-9]+)', matches_regex, regex.I).group()
if regex.search(r'(?<=S)([0-9]+)', matches_regex, regex.I) else None)
e = (regex.search(r'(?<=E)([0-9]+)', matches_regex, regex.I).group()
if regex.search(r'(?<=E)([0-9]+)', matches_regex, regex.I) else None)
if s is not None and int(s) != 0:
opts.append('season=' + str(int(s)))
if e is not None and int(e) != 0:
opts.append('episode=' + str(int(e)))

json_response = torbox_request(search_query, "https://search-api.torbox.app/torrents/" + imdb_id + '?' + '&'.join(opts), get_ttl_hash())
if not json_response or not hasattr(json_response, "torrents"):
ui_print('[torbox] No torrents found.', ui_settings.debug)
return []

ui_print('[torbox] ' + str(len(json_response.torrents)) + ' results found.', ui_settings.debug)
scraped_releases = []
for result in json_response.torrents[:]:
if regex.match(r'(' + altquery + ')', result.raw_title, regex.I):
links = [result.magnet]
seeders = result.last_known_seeders
source = '[torbox: ' + result.tracker + ']' if result.tracker else '[torbox]'
ui_print('[torbox] found release ' + result.raw_title, ui_settings.debug)
scraped_releases += [releases.release(
source, 'torrent', result.raw_title, [], float(result.size) / 1000000000, links, seeders)]
else:
ui_print('[torbox] skipping ' + result.raw_title + ' because it does not match deviation ' + altquery, ui_settings.debug)

return scraped_releases


# Calls func(param) and returns the parsed JSON response if successful or [] if not
# When searching films, multiple calls may occur with the same IMDB id
# Temporarily Cache result so we don't make multiple identical calls within a short space of time
@lru_cache()
def torbox_request(func, param, ttl_hash=None):
del ttl_hash # to emphasize we don't use it and to shut pylint up
try:
response = func(param)

if response.status_code != 200:
ui_print('[torbox] error ' + str(response.status_code) + ': failed response from torbox. ' + response.content.decode("utf-8"))
return []

except requests.exceptions.Timeout:
ui_print('[torbox] error: torbox request timed out.')
return []
except Exception as e:
ui_print('[torbox] error: ' + str(e))
return []

try:
json_response = json.loads(response.content, object_hook=lambda d: SimpleNamespace(**d))
except Exception as e:
ui_print('[torbox] error: unable to parse response:' + response.content.decode("utf-8") + " " + str(e))
return []

if not json_response.success:
ui_print('[torbox] error: response failed:' + response.content.decode("utf-8"))
return []

if hasattr(json_response, 'message') and json_response.message:
ui_print('[torbox] response: ' + json_response.message, ui_settings.debug)
if hasattr(json_response, 'detail') and json_response.detail:
ui_print('[torbox] response: ' + json_response.detail, ui_settings.debug)

return json_response.data


def search_query(url):
ui_print("[torbox] search URL: " + url + " ...", ui_settings.debug)
response = session.get(url, timeout=timeout_sec)
ui_print("done", ui_settings.debug)
return response


# search metadata by title and return a list of ids (eg. [imdb:tt1234567,imdb:tt7654321])
def imdb_lookup(query):
return [row.id for row in torbox_request(search_query, "https://search-api.torbox.app/search/" + query, get_ttl_hash())]


def store_search(query):
# refresh active torrents based on any search criteria for future requests
url = "https://api.torbox.app/v1/api/torrents/storesearch?query=" + urllib.parse.quote(query)
ui_print("[torbox] storing search: " + url + " ...", ui_settings.debug)
response = session.put(url, timeout=timeout_sec)
ui_print("done", ui_settings.debug)
return response


def flatten(matrix):
return [item for row in matrix for item in row]


# this is used for a TTL cache when querying torbox.
# see https://stackoverflow.com/a/55900800
def get_ttl_hash(seconds=default_cache_timeout):
# Return the same value within `seconds` time period
return round(time.time() / seconds)


def remove_duplicates(items):
seen_items = set()
new_list = []
for release in items:
if release.hash not in seen_items:
seen_items.add(release.hash)
new_list.append(release)
return new_list

0 comments on commit 02bcde8

Please sign in to comment.