diff --git a/AUTHORS.rst b/AUTHORS.rst index c500be3..09cdc09 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -13,3 +13,4 @@ Contributors * @kevinlekiller - Compressed Header Support * @bettse - Various bugfixes * @guillp - Group management scripts and bugfixes +* @ukharley - Logging modifications diff --git a/README.md b/README.md index 52cf980..80e5611 100644 --- a/README.md +++ b/README.md @@ -347,7 +347,7 @@ To build the webui from source, first modify the config to include your indexer > cd webui/app/scripts > vim config.js - > [add host url] + > [add host url and port] Then initiate the build: diff --git a/api.py b/api.py index 739452d..6206cbb 100644 --- a/api.py +++ b/api.py @@ -1,9 +1,10 @@ import json - +import argparse import regex import bottle from bottle import request, response import xmltodict +import traceback from pynab import log import pynab.api @@ -33,6 +34,11 @@ def serve_static(path): return bottle.static_file(path, root='./webui/dist/fonts/') +@app.get('/bower_components/:path#.+#') +def serve_static(path): + return bottle.static_file(path, root='./webui/dist/bower_components/') + + @app.get('/api') def api(): log.debug('Handling request for {0}.'.format(request.fullpath)) @@ -57,13 +63,13 @@ def api(): @app.get('/') @app.get('/index.html') def index(): - if config.site['webui']: + if config.api.get('webui'): # disabled by default ? not really useful for a single user install raise bottle.static_file('index.html', root='./webui/dist') @app.get('/favicon.ico') def index(): - if config.site['webui']: + if config.api.get('webui'): raise bottle.static_file('favicon.ico', root='./webui/dist') @@ -108,5 +114,34 @@ def get_link(route=''): return url +def daemonize(pidfile): + try: + import traceback + from daemonize import Daemonize + daemon = Daemonize(app='pynab', pid=pidfile, action=main) + daemon.start() + except SystemExit: + raise + except: + log.critical(traceback.format_exc()) + + +def main(): + bottle.run(app=app, host=config.api.get('api_host', '0.0.0.0'), port=config.api.get('api_port', 8080)) + + if __name__ == '__main__': - bottle.run(app=app, host=config.site.get('api_host', '0.0.0.0'), port=config.site.get('api_port', 8080)) + argparser = argparse.ArgumentParser(description="Pynab main indexer script") + argparser.add_argument('-d', '--daemonize', action='store_true', help='run as a daemon') + argparser.add_argument('-p', '--pid-file', help='pid file (when -d)') + + args = argparser.parse_args() + + if args.daemonize: + pidfile = args.pid_file or config.api.get('api_pid_file') + if not pidfile: + log.error("A pid file is required to run as a daemon, please supply one either in the config file '{}' or as argument".format(config.__file__)) + else: + daemonize(pidfile) + else: + main() diff --git a/config.sample.py b/config.sample.py index 5707eb6..1ef50c0 100644 --- a/config.sample.py +++ b/config.sample.py @@ -1,7 +1,7 @@ import logging -site = { - # general site settings +api = { + # api settings # --------------------- # title: shows on the rss feed, can be whatever @@ -19,11 +19,8 @@ # your administrator email (shows on rss feed) 'email': '', - # enable web interface - 'webui': True, - - # api settings - # ------------ + # enable web interface + 'webui': True, # result_limit: maximum search results for rss feeds # make sure there's no quotes around it @@ -41,6 +38,12 @@ # usually 8080 'api_port': 8080, + # pid_file: process file for the api, if daemonized + # make sure it's writable, leave blank for nginx + 'pid_file': '' +} + +scan = { # scanning settings # ----------------- @@ -70,6 +73,13 @@ # set this to 3 days or so, don't set it to 0 'dead_binary_age': 3, + # pid_file: process file for the scanner, if daemonized + # make sure it's writable, leave blank for nginx + 'pid_file': '' + +} + +postprocess = { # release processing settings # --------------------------- @@ -86,9 +96,6 @@ # 100% completion resulted in about 11,000 unmatched releases after 4 weeks over 6 groups # lowering that to 99% built an extra 3,500 releases - # postprocessing settings - # ----------------------- - # postprocess_wait: time to sleep between postprocess.py loops # setting this to 0 may be horrible to online APIs, but if you've got a good # local db it should be fine @@ -131,20 +138,7 @@ # so if we can't find a match for some movie, wait 7 days before trying that movie again # there's really no benefit to setting this low - anywhere from a week to several months is fine 'fetch_blacklist_duration': 7, - - # logging settings - # ---------------- - # logging_file: a filepath or None to go to stdout - 'logging_file': None, - - # logging.x where DEBUG, INFO, WARNING, ERROR, etc - # generally, debug if something goes wrong, info for normal usage - 'logging_level': logging.DEBUG, - - # max_log_size: maximum size of logfiles before they get rotated - # number, in bytes (this is 50mb) - 'max_log_size': 50*1024*1024, - + # regex update settings # --------------------- @@ -158,7 +152,22 @@ # generally leave alone 'blacklist_url': 'https://raw.github.com/kevinlekiller/Newznab-Blacklist/master/New/blacklists.txt', +} +log = { + # logging settings + # ---------------- + # logging_file: a filepath or None to go to stdout + 'logging_file': None, + + # logging.x where DEBUG, INFO, WARNING, ERROR, etc + # generally, debug if something goes wrong, info for normal usage + 'logging_level': logging.DEBUG, + + # max_log_size: maximum size of logfiles before they get rotated + # number, in bytes (this is 50mb) + 'max_log_size': 50*1024*1024, + } # mongodb config diff --git a/install.py b/install.py index 64aa2b5..db62d9a 100644 --- a/install.py +++ b/install.py @@ -83,7 +83,7 @@ print('Problem inserting data into MongoDB.') sys.exit(0) - if config.site['regex_url']: + if config.postprocess.get('regex_url'): print('Updating regex...') pynab.util.update_regex() else: @@ -91,7 +91,7 @@ print('If you don\'t have one, buy a Newznab+ license or find your own regexes.') print('You won\'t be able to build releases without appropriate regexes.') - if config.site['blacklist_url']: + if config.postprocess.get('blacklist_url'): print('Updating binary blacklist...') pynab.util.update_blacklist() else: diff --git a/postprocess.py b/postprocess.py index 74c4268..4b7ddcb 100644 --- a/postprocess.py +++ b/postprocess.py @@ -44,35 +44,47 @@ def process_imdb(): # print MP log as well multiprocessing.log_to_stderr().setLevel(logging.DEBUG) - # take care of REQ releases first - for release in db.releases.find({'search_name': {'$regex': 'req', '$options': '-i'}}): - pynab.releases.strip_req(release) - # start with a quick post-process - log.info('Starting with a quick post-process to clear out the cruft that\'s available locally...') + log.info('starting with a quick post-process to clear out the cruft that\'s available locally...') scripts.quick_postprocess.local_postprocess() while True: + # take care of REQ releases first + for release in db.releases.find({'search_name': {'$regex': 'req', '$options': '-i'}}): + pynab.releases.strip_req(release) + + # delete passworded releases first so we don't bother processing them + if config.postprocess.get('delete_passworded', True): + if config.postprocess.get('delete_potentially_passworded', True): + query = {'passworded': {'$in': [True, 'potentially']}} + else: + query = {'passworded': True} + db.releases.remove(query) + + # delete any nzbs that don't have an associated release + # and delete any releases that don't have an nzb + + # grab and append tvrage data to tv releases tvrage_p = None - if config.site['process_tvrage']: + if config.postprocess.get('process_tvrage', True): tvrage_p = multiprocessing.Process(target=process_tvrage) tvrage_p.start() imdb_p = None - if config.site['process_imdb']: + if config.postprocess.get('process_imdb', True): imdb_p = multiprocessing.Process(target=process_imdb) imdb_p.start() # grab and append nfo data to all releases nfo_p = None - if config.site['process_nfos']: + if config.postprocess.get('process_nfos', True): nfo_p = multiprocessing.Process(target=process_nfos) nfo_p.start() # check for passwords, file count and size rar_p = None - if config.site['process_rars']: + if config.postprocess.get('process_rars', True): rar_p = multiprocessing.Process(target=process_rars) rar_p.start() @@ -92,11 +104,12 @@ def process_imdb(): scripts.rename_bad_releases.rename_bad_releases(8010) scripts.rename_bad_releases.rename_bad_releases(7020) - if config.site['delete_bad_releases']: + if config.postprocess.get('delete_bad_releases', False): pass #log.info('Deleting bad releases...') # not confident in this yet # wait for the configured amount of time between cycles - log.info('Sleeping for {:d} seconds...'.format(config.site['postprocess_wait'])) - time.sleep(config.site['postprocess_wait']) \ No newline at end of file + postprocess_wait = config.postprocess.get('postprocess_wait', 1) + log.info('sleeping for {:d} seconds...'.format(postprocess_wait)) + time.sleep(postprocess_wait) \ No newline at end of file diff --git a/pynab/__init__.py b/pynab/__init__.py index e4f7687..aa67281 100644 --- a/pynab/__init__.py +++ b/pynab/__init__.py @@ -3,18 +3,58 @@ __author__ = 'James Meneghello' __email__ = 'murodese@gmail.com' -__version__ = '1.1.0' +__version__ = '1.2.0' import logging import config import logging.handlers +import os +import colorlog +import inspect +import sys log = logging.getLogger(__name__) -log.setLevel(config.site['logging_level']) +log.setLevel(config.log.get('logging_level', logging.DEBUG)) -if config.site['logging_file']: - handler = logging.handlers.RotatingFileHandler(config.site['logging_file'], maxBytes=config.site['max_log_size'], backupCount=5, encoding='utf-8') - handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) +logging_file = config.log.get('logging_file') +log_descriptor = None + +formatter = colorlog.ColoredFormatter( + "%(log_color)s%(asctime)s - %(levelname)s - %(reset)s %(blue)s%(message)s", + datefmt=None, + reset=True, + log_colors={ + 'DEBUG': 'cyan', + 'INFO': 'green', + 'WARNING': 'yellow', + 'ERROR': 'red', + 'CRITICAL': 'red', + } +) + +if logging_file: + frame = inspect.currentframe() + info=inspect.getouterframes(frame) + c=0 + for n in info: + if n[4] and c > 1: # c > 1 skips this module itself + if n[3] == '': # from my testing (on Windows), the first module found is the calling module + break + c += 1 + if c >= len(info): + sys.exit(1) + name, _ = os.path.splitext(os.path.basename(inspect.stack()[c][1].rstrip(os.sep))) + file, ext = os.path.splitext(config.log.get('logging_file')) + logging_file = ''.join([file, '_', name, ext]) + + handler = logging.handlers.RotatingFileHandler(logging_file, maxBytes=config.log.get('max_log_size', 50*1024*1024), backupCount=5, encoding='utf-8') + handler.setFormatter(formatter) log.addHandler(handler) + log_descriptor = handler.stream.fileno() else: - logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') + handler = logging.StreamHandler() + handler.setFormatter(formatter) + log.addHandler(handler) + +# set up root_dir for use with templates etc +root_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..') diff --git a/pynab/api.py b/pynab/api.py index 3d96866..4cc6e8d 100644 --- a/pynab/api.py +++ b/pynab/api.py @@ -9,7 +9,7 @@ from bottle import request, response from pynab.db import db, fs -from pynab import log +from pynab import log, root_dir import config @@ -162,8 +162,7 @@ def details(dataset=None): try: tmpl = Template( - filename=os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', - 'templates/api/result.mako')) + filename=os.path.join(root_dir, 'templates/api/result.mako')) return tmpl.render(**dataset) except: log.error('Failed to deliver page: {0}'.format(exceptions.text_error_template().render())) @@ -177,11 +176,11 @@ def details(dataset=None): def caps(dataset=None): - dataset['app_version'] = config.site['version'] - dataset['api_version'] = config.site['api_version'] - dataset['email'] = config.site['email'] or '' - dataset['result_limit'] = config.site['result_limit'] or 20 - dataset['result_default'] = config.site['result_default'] or 20 + dataset['app_version'] = config.api.get('version', '1.0.0') + dataset['api_version'] = config.api.get('api_version', '0.2.3') + dataset['email'] = config.api.get('email', '') + dataset['result_limit'] = config.api.get('result_limit', 20) + dataset['result_default'] = config.api.get('result_default', 20) categories = {} for category in db.categories.find(): @@ -194,7 +193,7 @@ def caps(dataset=None): try: tmpl = Template( - filename=os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'templates/api/caps.mako')) + filename=os.path.join(root_dir, 'templates/api/caps.mako')) return tmpl.render(**dataset) except: log.error('Failed to deliver page: {0}'.format(exceptions.text_error_template().render())) @@ -214,10 +213,10 @@ def search(dataset=None, params=None): # set limit to request or default # this will also match limit == 0, which would be infinite limit = request.query.limit or None - if limit and int(limit) <= int(config.site['result_limit']): + if limit and int(limit) <= int(config.api.get('result_limit', 100)): limit = int(limit) else: - limit = int(config.site['result_default']) + limit = int(config.api.get('result_default', 20)) # offset is only available for rss searches and won't work with text offset = request.query.offset or None @@ -303,11 +302,9 @@ def search(dataset=None, params=None): dataset['search'] = True dataset['api_key'] = request.query.apikey - pprint.pprint(results) - try: tmpl = Template( - filename=os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'templates/api/result.mako')) + filename=os.path.join(root_dir, 'templates/api/result.mako')) return tmpl.render(**dataset) except: log.error('Failed to deliver page: {0}'.format(exceptions.text_error_template().render())) diff --git a/pynab/binaries.py b/pynab/binaries.py index f856e75..c4ce246 100644 --- a/pynab/binaries.py +++ b/pynab/binaries.py @@ -36,7 +36,6 @@ def save(binary): -- Note: Much quicker. Hooray! """ - log.debug('Saving to binary: ' + binary['name']) existing_binary = db.binaries.find_one({'name': binary['name']}) try: @@ -61,7 +60,7 @@ def save(binary): 'parts': binary['parts'] }) except: - log.error('Binary was too large to fit in DB!') + log.error('binary: binary was too large to fit in DB!') def save_and_clear(binaries=None, parts=None): @@ -72,12 +71,10 @@ def save_and_clear(binaries=None, parts=None): Turns out MySQL kinda sucks at deleting lots of shit. If we need more speed, move the parts away and drop the temporary table instead.""" - log.info('Saving discovered binaries...') for binary in binaries.values(): save(binary) if parts: - log.info('Removing parts that were either packaged or terrible...') db.parts.remove({'_id': {'$in': parts}}) @@ -86,14 +83,12 @@ def process(): based on regex in DB. Copies parts/segments across to the binary document. Keeps a list of parts that were processed for deletion.""" - log.info('Starting to process parts and build binaries...') - start = time.clock() + + start = time.time() binaries = {} orphan_binaries = [] processed_parts = [] - chunk_count = 1 - approx_chunks = db.parts.count() / CHUNK_SIZE # new optimisation: if we only have parts from a couple of groups, # we don't want to process the regex for every single one. @@ -119,14 +114,12 @@ def process(): try: result = regex.search(r, part['subject'], regex_flags) except: - log.error('Broken regex detected. _id: {:d}, removing...'.format(reg['_id'])) + log.error('binary: broken regex detected. _id: {:d}, removing...'.format(reg['_id'])) db.regexes.remove({'_id': reg['_id']}) continue match = result.groupdict() if result else None if match: - log.debug('Matched part {} to {}.'.format(part['subject'], reg['regex'])) - # remove whitespace in dict values try: match = {k: v.strip() for k, v in match.items()} @@ -195,10 +188,6 @@ def process(): # save and delete stuff in chunks if len(processed_parts) >= CHUNK_SIZE: - log.info('Processing chunk {0:d} of approx {1:.1f} with {2:d} parts...' - .format(chunk_count, approx_chunks, CHUNK_SIZE) - ) - chunk_count += 1 save_and_clear(binaries, processed_parts) processed_parts = [] binaries = {} @@ -206,8 +195,11 @@ def process(): # clear off whatever's left save_and_clear(binaries, processed_parts) - end = time.clock() - log.info('Time elapsed: {:.2f}s'.format(end - start)) + end = time.time() + + log.info('binary: processed {} parts in {:.2f}s' + .format(db.parts.count(), end - start) + ) def parse_xref(xref): diff --git a/pynab/categories.py b/pynab/categories.py index d24aadb..50a4700 100644 --- a/pynab/categories.py +++ b/pynab/categories.py @@ -1,6 +1,7 @@ import regex import collections from pynab import log +from pynab.db import db # category codes # these are stored in the db, as well @@ -81,7 +82,7 @@ """ group_regex = { regex.compile('alt\.binaries\.0day', regex.I): [ - CAT_PARENT_PC, CAT_PC_0DAY + CAT_PARENT_BOOK, CAT_PARENT_PC, CAT_PC_0DAY ], regex.compile('alt\.binaries\.ath', regex.I): [ CAT_PARENT_XXX, CAT_PARENT_GAME, CAT_PARENT_PC, CAT_PARENT_TV, CAT_PARENT_MOVIE, CAT_PARENT_MUSIC, @@ -90,19 +91,19 @@ regex.compile('alt\.binaries\.b4e', regex.I): [ CAT_PARENT_PC, CAT_PARENT_BOOK ], - regex.compile('alt\.binaries\..*?audiobook.*?', regex.I): [ + regex.compile('alt\.binaries\..*?audiobook', regex.I): [ CAT_MUSIC_AUDIOBOOK ], regex.compile('lossless|flac', regex.I): [ CAT_MUSIC_LOSSLESS ], - regex.compile('alt\.binaries\.sounds.*?|alt\.binaries\.mp3.*?|alt\.binaries.*?\.mp3', regex.I): [ + regex.compile('alt\.binaries\.sounds|alt\.binaries\.mp3|alt\.binaries\.mp3', regex.I): [ CAT_PARENT_MUSIC, CAT_MISC_OTHER ], regex.compile('alt\.binaries\.console.ps3', regex.I): [ CAT_PARENT_GAME, CAT_GAME_PS3 ], - regex.compile('alt\.binaries\.games\.xbox*', regex.I): [ + regex.compile('alt\.binaries\.games\.xbox', regex.I): [ CAT_PARENT_GAME, CAT_PARENT_XXX, CAT_PARENT_TV, CAT_PARENT_MOVIE ], regex.compile('alt\.binaries\.games$', regex.I): [ @@ -111,34 +112,34 @@ regex.compile('alt\.binaries\.games\.wii', regex.I): [ CAT_PARENT_GAME ], - regex.compile('alt\.binaries\.dvd.*?', regex.I): [ + regex.compile('alt\.binaries\.dvd', regex.I): [ CAT_PARENT_BOOK, CAT_PARENT_PC, CAT_PARENT_XXX, CAT_PARENT_TV, CAT_PARENT_MOVIE ], - regex.compile('alt\.binaries\.hdtv*|alt\.binaries\.x264|alt\.binaries\.tv$', regex.I): [ + regex.compile('alt\.binaries\.hdtv|alt\.binaries\.x264|alt\.binaries\.tv$', regex.I): [ CAT_PARENT_MUSIC, CAT_PARENT_XXX, CAT_PARENT_TV, CAT_PARENT_MOVIE ], regex.compile('alt\.binaries\.nospam\.cheerleaders', regex.I): [ CAT_PARENT_MUSIC, CAT_PARENT_XXX, CAT_PARENT_TV, CAT_PARENT_PC, CAT_PARENT_MOVIE ], - regex.compile('alt\.binaries\.classic\.tv.*?', regex.I): [ + regex.compile('alt\.binaries\.classic\.tv', regex.I): [ CAT_PARENT_TV, CAT_TV_OTHER ], - regex.compile('alt\.binaries\.multimedia', regex.I): [ + regex.compile('alt\.binaries\.multimedia$', regex.I): [ CAT_PARENT_MOVIE, CAT_PARENT_TV ], - regex.compile('alt\.binaries\.multimedia\.anime(\.highspeed)?', regex.I): [ + regex.compile('alt\.binaries\.multimedia\.anime', regex.I): [ CAT_TV_ANIME ], regex.compile('alt\.binaries\.anime', regex.I): [ CAT_TV_ANIME ], - regex.compile('alt\.binaries\.e(-|)book*?', regex.I): [ + regex.compile('alt\.binaries\.e(-|)book', regex.I): [ CAT_PARENT_BOOK, CAT_BOOK_EBOOK ], - regex.compile('alt\.binaries\.comics.*?', regex.I): [ + regex.compile('alt\.binaries\.comics', regex.I): [ CAT_BOOK_COMICS ], - regex.compile('alt\.binaries\.cores.*?', regex.I): [ + regex.compile('alt\.binaries\.cores', regex.I): [ CAT_PARENT_BOOK, CAT_PARENT_XXX, CAT_PARENT_GAME, CAT_PARENT_PC, CAT_PARENT_MUSIC, CAT_PARENT_TV, CAT_PARENT_MOVIE, CAT_MISC_OTHER ], @@ -175,7 +176,7 @@ CAT_TV_OTHER ], regex.compile('alt\.binaries\.documentaries', regex.I): [ - CAT_PARENT_XXX, CAT_PARENT_TV, CAT_PARENT_MOVIE, CAT_MISC_OTHER + CAT_TV_DOCU ], regex.compile('alt\.binaries\.drummers', regex.I): [ CAT_PARENT_BOOK, CAT_PARENT_XXX, CAT_PARENT_TV, CAT_PARENT_MOVIE @@ -197,7 +198,7 @@ CAT_PARENT_BOOK, CAT_PARENT_XXX, CAT_PARENT_PC, CAT_PARENT_MUSIC, CAT_PARENT_GAME, CAT_PARENT_TV, CAT_PARENT_MOVIE, CAT_MISC_OTHER ], - regex.compile('alt\.binaries\.mma|alt\.binaries\.multimedia\.sports.*?', regex.I): [ + regex.compile('alt\.binaries\.mma|alt\.binaries\.multimedia\.sports', regex.I): [ CAT_TV_SPORT ], regex.compile('alt\.binaries\.b4e$', regex.I): [ @@ -240,7 +241,10 @@ regex.compile('dk\.binaer\.musik', regex.I): [ CAT_PARENT_MUSIC, CAT_MISC_OTHER ], - regex.compile('alt\.binaries\.(teevee|multimedia|tv|tvseries).*?', regex.I): [ + regex.compile('alt\.binaries\.(teevee|tv|tvseries)', regex.I): [ + CAT_PARENT_TV, CAT_PARENT_MOVIE, CAT_PARENT_XXX, CAT_MISC_OTHER + ], + regex.compile('alt\.binaries\.multimedia$', regex.I): [ CAT_PARENT_XXX, CAT_PARENT_GAME, CAT_PARENT_MUSIC, CAT_PARENT_TV, CAT_PARENT_PC, CAT_PARENT_MOVIE, CAT_MISC_OTHER ], @@ -273,17 +277,20 @@ '( S\d{1,2} |\.S\d{2}\.|\.S\d{2}|s\d{1,2}e\d{1,2}|(\.| |\b|\-)EP\d{1,2}\.|\.E\d{1,2}\.|special.*?HDTV|HDTV.*?special|PDTV|\.\d{3}\.DVDrip|History( |\.|\-)Channel|trollhd|trollsd|HDTV.*?BTL|C4TV|WEB DL|web\.dl|WWE|season \d{1,2}|(?!collectors).*?series|\.TV\.|\.dtv\.|UFC|TNA|staffel|episode|special\.\d{4})', regex.I), [ CAT_TV_FOREIGN, CAT_TV_SPORT, CAT_TV_DOCU, CAT_TV_HD, CAT_TV_SD, CAT_TV_OTHER - ]), + ]), (regex.compile('seizoen', regex.I), [ CAT_TV_FOREIGN - ]) + ]), + (regex.compile('\[([0-9A-F]{8})\]$', regex.I), [ + CAT_TV_ANIME + ]), + (regex.compile('(SD|HD|PD)TV', regex.I), [ + CAT_TV_HD, CAT_TV_SD + ]), ]), CAT_PARENT_MOVIE: collections.OrderedDict([ - (regex.compile('', regex.I), [ - CAT_MOVIE_FOREIGN, CAT_MOVIE_SD, CAT_MOVIE_3D, CAT_MOVIE_HD, CAT_MOVIE_BLURAY - ]), - (regex.compile('xvid', regex.I), [ - CAT_MOVIE_OTHER + (regex.compile('[-._ ]AVC|[-._ ]|(B|H)(D|R)RIP|Bluray|BD[-._ ]?(25|50)?|BR|Camrip|[-._ ]\d{4}[-._ ].+(720p|1080p|Cam)|DIVX|[-._ ]DVD[-._ ]|DVD-?(5|9|R|Rip)|Untouched|VHSRip|XVID|[-._ ](DTS|TVrip)[-._ ]', regex.I), [ + CAT_MOVIE_FOREIGN, CAT_MOVIE_SD, CAT_MOVIE_3D, CAT_MOVIE_BLURAY, CAT_MOVIE_HD, CAT_MOVIE_OTHER ]) ]), CAT_PARENT_PC: collections.OrderedDict([ @@ -294,7 +301,7 @@ ]), CAT_PARENT_XXX: collections.OrderedDict([ (regex.compile( - '(\.JAV\.| JAV |\.Jav\.|Girls.*?Gone.*?Wild|\-MotTto|-Nukleotide|XXX|PORNOLATiON|SWE6RUS|swe6|SWE6|NYMPHO|DETOXATiON|DivXfacTory|TESORO|STARLETS|xxx|XxX|PORNORIP|PornoRip)', + '(XXX|Porn|PORNOLATiON|SWE6RUS|masturbation|masturebate|lesbian|Imageset|Squirt|Transsexual|a\.b\.erotica|pictures\.erotica\.anime|cumming|ClubSeventeen|Errotica|Erotica|EroticaX|nymph|sexontv|My_Stepfather_Made_Me|slut|\bwhore\b)', regex.I), [ CAT_XXX_DVD, CAT_XXX_IMAGESET, CAT_XXX_PACK, CAT_XXX_WMV, CAT_XXX_X264, CAT_XXX_XVID, CAT_XXX_OTHER ]), @@ -397,13 +404,17 @@ regex.I), regex.compile( '(?!.*?S\d{2}.*?)(?!.*?EP?\d{2}.*?)((\b|_)(Science.Channel|National.geographi|History.Chanel|Colossal|Discovery.travel|Planet.Science|Animal.Planet|Discovery.Sci|Regents|Discovery.World|Discovery.truth|Discovery.body|Dispatches|Biography|The.Investigator|Private.Life|Footballs.Greatest|Most.Terrifying)(\b|_))', - regex.I) + regex.I), + regex.compile('Documentary', regex.I), ], CAT_TV_HD: [ regex.compile('1080|720', regex.I) ], CAT_TV_SD: [ - regex.compile('(SDTV|HDTV|XVID|DIVX|PDTV|WEBDL|DVDR|DVD-RIP|WEB-DL|x264|dvd)', regex.I) + regex.compile('(SDTV|HDTV|XVID|DIVX|PDTV|WEBDL|WEBRIP|DVDR|DVD-RIP|WEB-DL|x264|dvd)', regex.I) + ], + CAT_TV_ANIME: [ + regex.compile('[-._ ]Anime[-._ ]|^\(\[AST\]\s|\[(HorribleSubs|a4e|A-Destiny|AFFTW|Ahodomo|Anxious-He|Ayako-Fansubs|Broken|Chihiro|CoalGirls|CoalGuys|CMS|Commie|CTTS|Darksouls-Subs|Delicio.us|Doki|Doutei|Doremi Fansubs|Elysium|EveTaku|FFF|FFFpeeps|GG|GotWoot?|GotSpeed?|GX_ST|Hadena|Hatsuyuki|KiraKira|Hiryuu|HorribleSubs|Hybrid-Subs|IB|Kira-Fansub|KiteSeekers|m.3.3.w|Mazui|Muteki|Oyatsu|PocketMonsters|Ryuumaru|sage|Saitei|Sayonara-Group|Seto-Otaku|Shimeji|Shikakku|SHiN-gx|Static-Subs|SubDESU (Hentai)|SubSmith|Underwater|UTW|Warui-chan|Whine-Subs|WhyNot Subs|Yibis|Zenyaku|Zorori-Project)\]|\[[0-9A-Z]{8}\]$', regex.I) ], CAT_MOVIE_FOREIGN: [ regex.compile( @@ -421,7 +432,8 @@ { regex.compile('(divx|xvid|(\.| )r5(\.| ))', regex.I): True, regex.compile('(720|1080)', regex.I): False, - } + }, + regex.compile('[\.\-\ ]BeyondHD', regex.I) ], CAT_MOVIE_3D: [ { @@ -430,7 +442,7 @@ } ], CAT_MOVIE_HD: [ - regex.compile('x264|wmvhd|web\-dl|XvidHD|BRRIP|HDRIP|HDDVD|bddvd|BDRIP|webscr', regex.I) + regex.compile('x264|AVC|VC\-?1|wmvhd|web\-dl|XvidHD|BRRIP|HDRIP|HDDVD|bddvd|BDRIP|webscr|720p|1080p', regex.I) ], CAT_MOVIE_BLURAY: [ regex.compile('bluray|bd?25|bd?50|blu-ray|VC1|VC\-1|AVC|BDREMUX', regex.I) @@ -540,26 +552,40 @@ } +def get_category_name(id): + category = db.categories.find_one({'_id': id}) + parent_category = db.categories.find_one({'_id': category['parent_id']}) + + return '{} > {}'.format(parent_category['name'], category['name']) + + def determine_category(name, group_name=''): """Categorise release based on release name and group name.""" - log.debug('Attempting to determine category for {0}...'.format(name)) + + category = '' if is_hashed(name): - log.debug('Release is hashed!') - return CAT_MISC_OTHER + category = CAT_MISC_OTHER + else: + if group_name: + category = check_group_category(name, group_name) - category = check_group_category(name, group_name) - if category: - return category + if not category: + for parent_category in parent_category_regex.keys(): + category = check_parent_category(name, parent_category) + if category: + break - for parent_category in parent_category_regex.keys(): - category = check_parent_category(name, parent_category) - if category: - log.debug('Category found as: {:d}'.format(category)) - return category + if not category: + category = CAT_MISC_OTHER - # if all else fails - return CAT_MISC_OTHER + log.info('category: ({}) [{}]: {} ({})'.format( + group_name, + name, + get_category_name(category), + category + )) + return category def is_hashed(name): @@ -572,34 +598,27 @@ def check_group_category(name, group_name): take appropriate action - match against categories as dictated in the dicts above.""" for regex, actions in group_regex.items(): - if regex.search(group_name): - log.debug('Matched group regex {0}...'.format(regex.pattern)) + if regex.match(group_name): for action in actions: if action in parent_category_regex.keys(): category = check_parent_category(name, action) if category: - log.debug('Found category: {:d}!'.format(category)) return category elif action in category_regex.keys(): - log.debug('Reached end of list with a single cat {:d}...'.format(action)) return action def check_parent_category(name, parent_category): """Check the release against a single parent category, which will call appropriate sub-category checks.""" - log.debug('Checking parent category: {:d}'.format(parent_category)) for test, actions in parent_category_regex[parent_category].items(): - log.debug('Checking parent test (this might be blank): {0}'.format(test.pattern)) - - if test.search(name): + if test.search(name) is not None: for category in actions: if category in category_regex: if check_single_category(name, category): return category else: - log.debug('Category has no regex tests, assigning: {:d}'.format(category)) return category return False @@ -607,7 +626,8 @@ def check_parent_category(name, parent_category): def check_single_category(name, category): """Check release against a single category.""" - log.debug('Checking single category {0}...'.format(category)) + + log.info('checking {}'.format(category)) for regex in category_regex[category]: if isinstance(regex, collections.Mapping): @@ -615,9 +635,9 @@ def check_single_category(name, category): return True elif isinstance(regex, tuple): (r, ret) = regex - if r.search(name): + if r.search(name) is not None: return ret else: - if regex.search(name): + if regex.search(name) is not None: return True - return False \ No newline at end of file + return False diff --git a/pynab/groups.py b/pynab/groups.py index 1277204..f656adb 100644 --- a/pynab/groups.py +++ b/pynab/groups.py @@ -4,11 +4,11 @@ from pynab import parts import config -MESSAGE_LIMIT = config.site['message_scan_limit'] +MESSAGE_LIMIT = config.scan.get('message_scan_limit', 20000) def backfill(group_name, date=None): - log.info('{}: Backfilling group...'.format(group_name)) + log.info('group: {}: backfilling group'.format(group_name)) server = Server() _, count, first, last, _ = server.group(group_name) @@ -16,33 +16,26 @@ def backfill(group_name, date=None): if date: target_article = server.day_to_post(group_name, server.days_old(date)) else: - target_article = server.day_to_post(group_name, config.site['backfill_days']) + target_article = server.day_to_post(group_name, config.scan.get('backfill_days', 10)) group = db.groups.find_one({'name': group_name}) if group: # if the group hasn't been updated before, quit if not group['first']: - log.error('{}: Need to run a normal update prior to backfilling group.'.format(group_name)) + log.error('group: {}: run a normal update prior to backfilling'.format(group_name)) if server.connection: server.connection.quit() return False - log.info('{0}: Server has {1:d} - {2:d} or ~{3:d} days.' - .format(group_name, first, last, server.days_old(server.post_date(group_name, first))) - ) - # if the first article we have is lower than the target if target_article >= group['first']: - log.info('{}: Nothing to do, we already have the target post.'.format(group_name)) + log.info('group: {}: Nothing to do, we already have the target post.'.format(group_name)) if server.connection: server.connection.quit() return True # or if the target is below the server's first if target_article < first: - log.warning( - '{}: Backfill target is older than the server\'s retention. Setting target to the first possible article.'.format( - group_name)) target_article = first total = group['first'] - target_article @@ -67,12 +60,12 @@ def backfill(group_name, date=None): }) retries = 0 else: - log.error('{}: Failed while saving parts.'.format(group_name)) + log.error('group: {}: failed while saving parts'.format(group_name)) if server.connection: server.connection.quit() return False else: - log.error('Problem updating group - trying again...') + log.error('group: {}: problem updating group - trying again'.format(group_name)) retries += 1 # keep trying the same block 3 times, then skip if retries <= 3: @@ -88,14 +81,14 @@ def backfill(group_name, date=None): if target_article > start: start = target_article else: - log.error('{}: Group doesn\'t exist in db.'.format(group_name)) + log.error('group: {}: group doesn\'t exist in db.'.format(group_name)) if server.connection: server.connection.quit() return False def update(group_name): - log.info('{}: Updating group...'.format(group_name)) + log.info('group: {}: updating group'.format(group_name)) server = Server() _, count, first, last, _ = server.group(group_name) @@ -109,7 +102,7 @@ def update(group_name): # if our last article is newer than the server's, something's wrong if last < group['last']: - log.error('{}: Server\'s last article {:d} is lower than the local {:d}'.format(group_name, last, + log.error('group: {}: last article {:d} on server is older than the local {:d}'.format(group_name, last, group['last'])) if server.connection: try: @@ -119,9 +112,9 @@ def update(group_name): return False else: # otherwise, start from x days old - start = server.day_to_post(group_name, config.site['new_group_scan_days']) + start = server.day_to_post(group_name, config.scan.get('new_group_scan_days', 5)) if not start: - log.error('{}: Couldn\'t determine a start point for group.'.format(group_name)) + log.error('group: {}: couldn\'t determine a start point for group'.format(group_name)) if server.connection: try: server.connection.quit() @@ -150,24 +143,22 @@ def update(group_name): if start_date and end_date: total_date = end_date - start_date - log.debug('{}: Start: {:d} ({}) End: {:d} ({}) Total: {:d} ({} days, {} hours, {} minutes)' - .format( - group_name, start, start_date, - end, end_date, - total, total_date.days, total_date.seconds // 3600, (total_date.seconds // 60) % 60 - ) - ) + log.info('group: {}: pulling {} - {} ({}d, {}h, {}m)'.format( + group_name, + start, end, + total_date.days, + total_date.seconds // 3600, + (total_date.seconds // 60) % 60 + )) else: - log.debug('{}: Group is semi-broken - not all debug output is available. Start: {}, End: {}, Total: {}' - .format(group_name, start, end, total) - ) + log.info('group: {}: pulling {} - {}'.format(group_name, start, end)) if total > 0: if not group['last']: - log.info('{}: Starting new group with {:d} days and {:d} new parts.' - .format(group_name, config.site['new_group_scan_days'], total)) + log.info('group: {}: starting new group with {:d} days and {:d} new parts' + .format(group_name, config.scan.get('new_group_scan_days', 5), total)) else: - log.info('{}: Group has {:d} new parts.'.format(group_name, total)) + log.info('group: {}: group has {:d} new parts.'.format(group_name, total)) retries = 0 # until we're finished, loop @@ -192,7 +183,7 @@ def update(group_name): }) retries = 0 else: - log.error('{}: Failed while saving parts.'.format(group_name)) + log.error('group: {}: failed while saving parts'.format(group_name)) if server.connection: try: server.connection.quit() @@ -209,14 +200,13 @@ def update(group_name): return True else: start = end + 1 - log.info('{}: {:d} messages to go for this group.'.format(group_name, last - end)) else: - log.info('{}: No new records for group.'.format(group_name)) + log.info('group: {}: no new messages'.format(group_name)) if server.connection: server.connection.quit() return True else: - log.error('{}: No such group exists in the db.'.format(group_name)) + log.error('group: {}: no group in db'.format(group_name)) if server.connection: server.connection.quit() return False \ No newline at end of file diff --git a/pynab/imdb.py b/pynab/imdb.py index f0bd365..1b22286 100644 --- a/pynab/imdb.py +++ b/pynab/imdb.py @@ -16,13 +16,12 @@ def process_release(release, online=True): - log.info('Processing Movie information for movie {}.'.format(release['search_name'])) name, year = parse_movie(release['search_name']) if name and year: - log.debug('Parsed as {} {}'.format(name, year)) + method = 'local' imdb = db.imdb.find_one({'name': clean_name(name), 'year': year}) if not imdb and online: - log.info('Movie not found in local IMDB DB, searching online...') + method = 'online' movie = search(clean_name(name), year) if movie and movie['Type'] == 'movie': db.imdb.update( @@ -38,14 +37,21 @@ def process_release(release, online=True): imdb = db.imdb.find_one({'_id': movie['imdbID']}) if imdb: - log.info('IMDB match found, appending IMDB ID to release.') + log.info('[{}] - [{}] - imdb added: {}'.format( + release['_id'], + release['search_name'], + method + )) db.releases.update({'_id': release['_id']}, { '$set': { 'imdb': imdb } }) elif not imdb and online: - log.warning('Could not find IMDB data to associate with release {}.'.format(release['search_name'])) + log.warning('[{}] - [{}] - imdb not found: online'.format( + release['_id'], + release['search_name'] + )) db.releases.update({'_id': release['_id']}, { '$set': { 'imdb': { @@ -54,9 +60,15 @@ def process_release(release, online=True): } }) else: - log.warning('Could not find local IMDB data to associate with release {}.'.format(release['search_name'])) + log.warning('[{}] - [{}] - imdb not found: local'.format( + release['_id'], + release['search_name'] + )) else: - log.warning('Could not parse name for movie data: {}.'.format(release['search_name'])) + log.error('[{}] - [{}] - imdb not found: no suitable regex for movie name'.format( + release['_id'], + release['search_name'] + )) db.releases.update({'_id': release['_id']}, { '$set': { 'imdb': { @@ -68,9 +80,7 @@ def process_release(release, online=True): def process(limit=100, online=True): """Process movies without imdb data and append said data.""" - log.info('Processing movies to add IMDB data...') - - expiry = datetime.datetime.now(pytz.utc) - datetime.timedelta(config.site['fetch_blacklist_duration']) + expiry = datetime.datetime.now(pytz.utc) - datetime.timedelta(config.postprocess.get('fetch_blacklist_duration', 7)) query = { 'imdb._id': {'$exists': False}, @@ -91,7 +101,6 @@ def process(limit=100, online=True): def search(name, year): """Search OMDB for a movie and return the IMDB ID.""" - log.info('Searching for movie: {}'.format(name)) # if we managed to parse the year from the name # include it, since it'll narrow results @@ -104,7 +113,7 @@ def search(name, year): try: data = r.json() except: - log.debug('There was a problem accessing the API page.') + log.critical('There was a problem accessing the IMDB API page.') return None if 'Search' in data: @@ -112,12 +121,10 @@ def search(name, year): # doublecheck, but the api should've searched properly ratio = difflib.SequenceMatcher(None, clean_name(name), clean_name(movie['Title'])).ratio() if ratio > 0.8 and year == movie['Year'] and movie['Type'] == 'movie': - log.info('OMDB movie match found: {}'.format(movie['Title'])) return movie def get_details(id): - log.info('Retrieving movie details for {}...'.format(id)) r = requests.get(OMDB_DETAIL_URL + id) data = r.json() diff --git a/pynab/nfos.py b/pynab/nfos.py index e00342e..66d4a5b 100644 --- a/pynab/nfos.py +++ b/pynab/nfos.py @@ -37,7 +37,6 @@ def get(nfo_id): def process(limit=5, category=0): """Process releases for NFO parts and download them.""" - log.info('Checking for NFO segments...') with Server() as server: query = {'nfo': None} @@ -45,7 +44,6 @@ def process(limit=5, category=0): query['category._id'] = int(category) for release in db.releases.find(query).limit(limit).sort('posted', pymongo.DESCENDING).batch_size(50): - log.debug('Checking for NFO in {}...'.format(release['search_name'])) nzb = pynab.nzbs.get_nzb_dict(release['nzb']) if nzb: @@ -76,13 +74,23 @@ def process(limit=5, category=0): 'nfo': nfo_file } }) - log.info('Grabbed and saved NFO for: {}'.format(release['name'])) + + log.info('nfo: [{}] - [{}] - nfo added'.format( + release['_id'], + release['search_name'] + )) break else: - log.debug('Error retrieving NFO.') + log.warning('nfo: [{}] - [{}] - nfo unavailable'.format( + release['_id'], + release['search_name'] + )) continue else: - log.debug('No NFOs found in this release.') + log.warning('nfo: [{}] - [{}] - no nfo in release'.format( + release['_id'], + release['search_name'] + )) db.releases.update({'_id': release['_id']}, { '$set': { 'nfo': False diff --git a/pynab/nzbs.py b/pynab/nzbs.py index c6048de..a88be9c 100644 --- a/pynab/nzbs.py +++ b/pynab/nzbs.py @@ -13,7 +13,7 @@ from mako import exceptions from pynab.db import fs, db -from pynab import log +from pynab import log, root_dir import pynab nfo_regex = '[ "\(\[].*?\.(nfo|ofn)[ "\)\]]' @@ -70,7 +70,6 @@ def get_nzb_dict(nzb_id): def create(gid, name, binary): """Create the NZB, store it in GridFS and return the ID to be linked to the release.""" - log.debug('Creating NZB {0}.nzb.gz and storing it to GridFS...'.format(gid)) if binary['category_id']: category = db.categories.find_one({'id': binary['category_id']}) else: @@ -78,10 +77,10 @@ def create(gid, name, binary): xml = '' try: - tpl = Template(filename='templates/nzb.mako') + tpl = Template(filename=os.path.join(root_dir, 'templates/nzb.mako')) xml = tpl.render(version=pynab.__version__, name=name, category=category, binary=binary) except: - log.error('Failed to create NZB: {0}'.format(exceptions.text_error_template().render())) + log.error('nzb: failed to create NZB: {0}'.format(exceptions.text_error_template().render())) return None data = gzip.compress(xml.encode('utf-8')) @@ -113,11 +112,11 @@ def import_nzb(filepath, quick=True): if 'group' in elem.tag and 'groups' not in elem.tag: release['group_name'] = elem.text except: - log.error('Error parsing NZB files: file appears to be corrupt.') + log.error('nzb: error parsing NZB files: file appears to be corrupt.') return False if 'name' not in release: - log.error('Failed to import nzb: {0}'.format(filepath)) + log.error('nzb: failed to import nzb: {0}'.format(filepath)) return False # check that it doesn't exist first @@ -154,7 +153,7 @@ def import_nzb(filepath, quick=True): if 'group_name' in release: group = db.groups.find_one({'name': release['group_name']}, {'name': 1}) if not group: - log.error('Could not add release - group {0} doesn\'t exist.'.format(release['group_name'])) + log.error('nzb: could not add release - group {0} doesn\'t exist.'.format(release['group_name'])) return False release['group'] = group del release['group_name'] @@ -168,12 +167,12 @@ def import_nzb(filepath, quick=True): try: db.releases.insert(release) except: - log.error('Problem saving release: {0}'.format(release)) + log.error('nzb: problem saving release: {0}'.format(release)) return False f.close() return True else: - log.error('Release already exists: {0}'.format(release['name'])) + log.error('nzb: release already exists: {0}'.format(release['name'])) return False diff --git a/pynab/parts.py b/pynab/parts.py index 8e491d9..9459fda 100644 --- a/pynab/parts.py +++ b/pynab/parts.py @@ -42,7 +42,6 @@ def save(part): def save_all(parts): """Save a set of parts to the DB, in a batch if possible.""" - log.info('Saving collected segments and parts...') # if possible, do a quick batch insert # rarely possible! @@ -57,7 +56,7 @@ def save_all(parts): save(part) return True except pymongo.errors.PyMongoError as e: - log.error('Could not write parts to db: {0}'.format(e)) + log.error('parts: could not write to db: {0}'.format(e)) return False diff --git a/pynab/rars.py b/pynab/rars.py index afb4a90..48d4c98 100644 --- a/pynab/rars.py +++ b/pynab/rars.py @@ -59,6 +59,9 @@ def attempt_parse(file): elif match.match('(?!UTC)([a-z0-9]+[a-z0-9\.\_\- \'\)\(]+(\d{4}|HDTV).*?\-[a-z0-9]+)', gen_s, regex.I): name = match.match_obj.group(1) + if not name: + name = file + return name @@ -100,7 +103,6 @@ def get_rar_info(server, group_name, messages): try: files = check_rar(t.name) except lib.rar.BadRarFile: - log.debug('Deleting temp files...') os.remove(t.name) return None @@ -120,48 +122,48 @@ def get_rar_info(server, group_name, messages): 'files.names': [r.filename for r in files] } - # make a tempdir to extract rar to - tmp_dir = tempfile.mkdtemp() - log.debug('Creating temp directory: {}...'.format(tmp_dir)) - exe = [ - '"{}"'.format(config.site['unrar_path']), - 'e', '-ai', '-ep', '-r', '-kb', - '-c-', '-id', '-p-', '-y', '-inul', - '"{}"'.format(t.name), - '"{}"'.format(tmp_dir) - ] - - try: - subprocess.check_call(' '.join(exe), stderr=subprocess.STDOUT, shell=True) - except subprocess.CalledProcessError as cpe: - log.debug('Archive had issues while extracting: {}: {} {}'.format(cpe.cmd, cpe.returncode, cpe.output)) - log.debug('Not to worry, it\'s probably a multi-volume rar (most are).') - log.debug(info) - - inner_passwords = [] - for file in files: - fpath = os.path.join(tmp_dir, file.filename) + unrar_path = config.postprocess.get('unrar_path', '/usr/bin/unrar') + if not (unrar_path and os.path.isfile(unrar_path) and os.access(unrar_path, os.X_OK)): + log.error('rar: skipping archive decompression because unrar_path is not set or incorrect') + log.error('rar: if the rar is not password protected, but contains an inner archive that is, we will not know') + else: + # make a tempdir to extract rar to + tmp_dir = tempfile.mkdtemp() + exe = [ + '"{}"'.format(unrar_path), + 'e', '-ai', '-ep', '-r', '-kb', + '-c-', '-id', '-p-', '-y', '-inul', + '"{}"'.format(t.name), + '"{}"'.format(tmp_dir) + ] + try: - inner_files = check_rar(fpath) - except lib.rar.BadRarFile: - log.debug('Inner file {} wasn\'t a RAR archive.'.format(file.filename)) - continue - - if inner_files: - inner_passwords += [r.is_encrypted for r in inner_files] - else: - passworded = True - break - - if not passworded: - passworded = any(inner_passwords) + subprocess.check_call(' '.join(exe), stderr=subprocess.STDOUT, shell=True) + except subprocess.CalledProcessError as cpe: + log.debug('rar: issue while extracting rar: {}: {} {}'.format(cpe.cmd, cpe.returncode, cpe.output)) + + inner_passwords = [] + for file in files: + fpath = os.path.join(tmp_dir, file.filename) + try: + inner_files = check_rar(fpath) + except lib.rar.BadRarFile: + continue + + if inner_files: + inner_passwords += [r.is_encrypted for r in inner_files] + else: + passworded = True + break + + if not passworded: + passworded = any(inner_passwords) - log.debug('Deleting temp files...') - os.remove(t.name) - shutil.rmtree(tmp_dir) + os.remove(t.name) + shutil.rmtree(tmp_dir) else: - log.debug('Archive was encrypted or passworded.') passworded = True + os.remove(t.name) info['passworded'] = passworded @@ -173,8 +175,12 @@ def check_release_files(server, group_name, nzb): for rar in nzb['rars']: messages = [] + if not rar['segments']: + continue + if not isinstance(rar['segments']['segment'], list): rar['segments']['segment'] = [rar['segments']['segment'], ] + for s in rar['segments']['segment']: messages.append(s['#text']) break @@ -204,22 +210,22 @@ def check_release_files(server, group_name, nzb): def process(limit=20, category=0): - """Processes release rarfiles to check for passwords and filecounts. Optionally - deletes passworded releases.""" - log.info('Checking for passworded releases and deleting them if appropriate...') + """Processes release rarfiles to check for passwords and filecounts.""" with Server() as server: query = {'passworded': None} if category: query['category._id'] = int(category) for release in db.releases.find(query).limit(limit).sort('posted', pymongo.DESCENDING).batch_size(50): - log.debug('Processing rar part for {}...'.format(release['name'])) nzb = pynab.nzbs.get_nzb_dict(release['nzb']) if nzb and 'rars' in nzb: info = check_release_files(server, release['group']['name'], nzb) if info: - log.info('Adding file data to release: {}'.format(release['name'])) + log.info('[{}] - [{}] - file info: added'.format( + release['_id'], + release['search_name'] + )) db.releases.update({'_id': release['_id']}, { '$set': { 'files.count': info['files.count'], @@ -231,7 +237,10 @@ def process(limit=20, category=0): continue - log.debug('No RARs in release, blacklisting...') + log.warning('rar: [{}] - [{}] - file info: no rars in release'.format( + release['_id'], + release['search_name'] + )) db.releases.update({'_id': release['_id']}, { '$set': { 'files.count': 0, @@ -240,11 +249,3 @@ def process(limit=20, category=0): 'passworded': 'unknown' } }) - - if config.site['delete_passworded']: - log.info('Deleting passworded releases...') - if config.site['delete_potentially_passworded']: - query = {'passworded': {'$in': [True, 'potentially']}} - else: - query = {'passworded': True} - db.releases.remove(query) \ No newline at end of file diff --git a/pynab/releases.py b/pynab/releases.py index 6913e6c..d0b5eb2 100644 --- a/pynab/releases.py +++ b/pynab/releases.py @@ -30,7 +30,6 @@ def strip_req(release): if result: result_dict = result.groupdict() if 'name' in result_dict and 'reqid' in result_dict: - log.info('Found request {}, storing req_id and renaming...'.format(result_dict['name'])) db.releases.update({'_id': release['_id']}, { '$set': { 'search_name': result_dict['name'], @@ -42,31 +41,24 @@ def strip_req(release): def names_from_nfos(release): """Attempt to grab a release name from its NFO.""" - log.debug('Parsing NFO for release details in: {}'.format(release['search_name'])) nfo = pynab.nfos.get(release['nfo']).decode('ascii', 'ignore') if nfo: return pynab.nfos.attempt_parse(nfo) else: - log.debug('NFO not available for release: {}'.format(release['search_name'])) return [] def names_from_files(release): """Attempt to grab a release name from filenames inside the release.""" - log.debug('Parsing files for release details in: {}'.format(release['search_name'])) if release['files']['names']: potential_names = [] for file in release['files']['names']: - log.debug('Checking file name: {}'.format(file)) - name = pynab.rars.attempt_parse(file) - if name: potential_names.append(name) return potential_names else: - log.debug('File list was empty for release: {}'.format(release['search_name'])) return [] @@ -84,9 +76,6 @@ def discover_name(release): old_category = release['category']['_id'] calculated_old_category = pynab.categories.determine_category(release['search_name']) - log.debug('Release Name: {}'.format(release['search_name'])) - log.debug('Old Category: {:d} Recalculated Old Category: {:d}'.format(old_category, calculated_old_category)) - for name in potential_names: new_category = pynab.categories.determine_category(name) @@ -100,12 +89,21 @@ def discover_name(release): # ignore this name, since it's apparently gibberish continue else: - if (math.floor(new_category / 1000) * 1000) == (math.floor(old_category / 1000) * 1000): + if (math.floor(new_category / 1000) * 1000) == (math.floor(old_category / 1000) * 1000)\ + or (math.floor(old_category / 1000) * 1000) == pynab.categories.CAT_PARENT_MISC: # if they're the same parent, use the new category + # or, if the old category was misc>other, fix it search_name = name category_id = new_category - log.debug('Found new name for {}: {} with category {:d}'.format(release['search_name'], search_name, category_id)) + log.info('release: [{}] - [{}] - rename: {} ({} -> {} -> {})'.format( + release['_id'], + release['search_name'], + search_name, + old_category, + calculated_old_category, + category_id + )) return search_name, category_id else: @@ -113,9 +111,16 @@ def discover_name(release): continue else: # the old name was apparently fine + log.info('release: [{}] - [{}] - old name was fine'.format( + release['_id'], + release['search_name'] + )) return True, False - log.debug('No potential names found for release.') + log.info('release: [{}] - [{}] - no good name candidates'.format( + release['_id'], + release['search_name'] + )) return None, None @@ -133,8 +138,11 @@ def process(): for 100% completion and will create NZBs/releases for each complete release. Will also categorise releases, and delete old binaries.""" - log.info('Processing complete binaries and generating releases...') - start = time.clock() + + binary_count = 0 + added_count = 0 + + start = time.time() # mapreduce isn't really supposed to be run in real-time # then again, processing releases isn't a real-time op @@ -161,7 +169,7 @@ def process(): complete = false } var completion = available_segments / parseFloat(total_segments) * 100.0; - if (complete || completion >= """ + str(config.site['min_completion']) + """) + if (complete || completion >= """ + str(config.postprocess.get('min_completion', 99)) + """) emit(this._id, completion) } @@ -173,6 +181,7 @@ def process(): # returns a list of _ids, so we need to get each binary for result in db.binaries.inline_map_reduce(mapper, reducer): if result['value']: + binary_count += 1 binary = db.binaries.find_one({'_id': result['_id']}) # check to make sure we have over the configured minimum files @@ -201,10 +210,10 @@ def process(): part['subject'], regex.I): zip_count += 1 - log.debug('Binary {} has {} rars and {} rar_parts.'.format(binary['name'], len(rars), rar_count)) - - if rar_count + zip_count < config.site['min_archives']: - log.debug('Binary does not have the minimum required archives.') + if rar_count + zip_count < config.postprocess.get('min_archives', 1): + log.info('release: [{}] - removed (less than minimum archives)'.format( + binary['name'] + )) db.binaries.remove({'_id': binary['_id']}) continue @@ -231,7 +240,13 @@ def process(): # create the nzb, store it in GridFS and link it here nzb, nzb_size = pynab.nzbs.create(gid, clean_name, binary) if nzb: - log.debug('Adding release: {0}'.format(clean_name)) + added_count += 1 + + log.debug('release: [{}]: added release ({} rars, {} rarparts)'.format( + binary['name'], + len(rars), + rar_count + )) db.releases.update( { @@ -275,5 +290,9 @@ def process(): # delete processed binaries db.binaries.remove({'_id': binary['_id']}) - end = time.clock() - log.info('Time elapsed: {:.2f}s'.format(end - start)) + end = time.time() + log.info('release: added {} out of {} binaries in {:.2f}s'.format( + added_count, + binary_count, + end - start + )) diff --git a/pynab/server.py b/pynab/server.py index 00d1afe..eba18c4 100644 --- a/pynab/server.py +++ b/pynab/server.py @@ -35,7 +35,7 @@ def group(self, group_name): try: response, count, first, last, name = self.connection.group(group_name) except nntplib.NNTPError: - log.error('Problem sending group command to server.') + log.error('server: Problem sending group command to server.') return False return response, count, first, last, name @@ -43,8 +43,6 @@ def group(self, group_name): def connect(self, compression=True): """Creates a connection to a news server.""" if not self.connection: - log.info('Attempting to connect to news server...') - news_config = config.news.copy() # i do this because i'm lazy @@ -56,27 +54,20 @@ def connect(self, compression=True): else: self.connection = nntplib.NNTP(compression=compression, **news_config) except Exception as e: - log.error('Could not connect to news server: {}'.format(e)) + log.error('server: Could not connect to news server: {}'.format(e)) return False - log.info('Connected!') - return True - else: - return True + return True def get(self, group_name, messages=None): """Get a set of messages from the server for the specified group.""" - log.info('{}: Getting {:d} messages...'.format(group_name, len(messages))) + data = '' if messages: try: _, total, first, last, _ = self.connection.group(group_name) - log.debug('{}: Total articles in group: {:d}'.format(group_name, total)) for message in messages: article = '<{}>'.format(message) - - log.debug('{}: Getting article: {}'.format(group_name, article)) - response, (number, message_id, lines) = self.connection.body(article) res = pynab.yenc.yenc_decode(lines) if res: @@ -84,20 +75,17 @@ def get(self, group_name, messages=None): else: return None except nntplib.NNTPError as nntpe: - log.error('{}: Problem retrieving messages from server: {}.'.format(group_name, nntpe)) + log.error('server: [{}]: Problem retrieving messages: {}.'.format(group_name, nntpe)) return None return data else: - log.error('{}: No messages were specified.'.format(group_name)) return None def scan(self, group_name, first, last): """Scan a group for segments and return a list.""" - log.info('{}: Collecting parts {:d} to {:d}...'.format(group_name, first, last)) - - start = time.clock() + start = time.time() try: # grab the headers we're after self.connection.group(group_name) @@ -134,9 +122,9 @@ def scan(self, group_name, first, last): if int(segment_number) > 0 and int(total_segments) > 0: # strip the segment number off the subject so # we can match binary parts together - subject = overview['subject'].replace( + subject = nntplib.decode_header(overview['subject'].replace( '(' + str(segment_number) + '/' + str(total_segments) + ')', '' - ).strip() + ).strip()).encode('utf-8', 'replace').decode('latin-1') # this is spammy as shit, for obvious reasons #pynab.log.debug('Binary part found: ' + subject) @@ -157,9 +145,9 @@ def scan(self, group_name, first, last): # some subjects/posters have odd encoding, which will break pymongo # so we make sure it doesn't message = { - 'subject': nntplib.decode_header(subject).encode('utf-8', 'surrogateescape').decode('latin-1'), + 'subject': subject, 'posted': dateutil.parser.parse(overview['date']), - 'posted_by': nntplib.decode_header(overview['from']).encode('utf-8', 'surrogateescape').decode( + 'posted_by': nntplib.decode_header(overview['from']).encode('utf-8', 'replace').decode( 'latin-1'), 'group_name': group_name, 'xref': overview['xref'], @@ -181,23 +169,26 @@ def scan(self, group_name, first, last): for k in blacklist: del messages[k] - log.info( - '{}: Received {:d} articles of {:d}, forming {:d} parts with {:d} ignored and {:d} blacklisted.' - .format(group_name, len(received), last - first + 1, total_parts, ignored, blacklisted_parts) - ) - # TODO: implement re-checking of missed messages, or maybe not # most parts that get ko'd these days aren't coming back anyway messages_missed = list(set(range(first, last)) - set(received)) - end = time.clock() - log.info('Time elapsed: {:.2f}s'.format(end - start)) + end = time.time() + + log.info('server: [{}]: retrieved {} - {} in {:.2f}s [{} recv, {} pts, {} ign, {} blk]'.format( + group_name, + first, last, + end - start, + len(received), + total_parts, + ignored, + blacklisted_parts + )) return messages def post_date(self, group_name, article): """Retrieves the date of the specified post.""" - log.debug('{}: Retrieving date of article {:d}'.format(group_name, article)) i = 0 while i < 10: @@ -214,8 +205,6 @@ def post_date(self, group_name, article): try: art_num, overview = articles[0] except IndexError: - log.warning('{}: Server was missing article {:d}.'.format(group_name, article)) - # if the server is missing an article, it's usually part of a large group # so skip along quickishly, the datefinder will autocorrect itself anyway article += int(article * 0.0001) @@ -230,7 +219,6 @@ def post_date(self, group_name, article): def day_to_post(self, group_name, days): """Converts a datetime to approximate article number for the specified group.""" - log.debug('{}: Finding post {:d} days old...'.format(group_name, days)) _, count, first, last, _ = self.connection.group(group_name) target_date = datetime.datetime.now(pytz.utc) - datetime.timedelta(days) @@ -240,34 +228,21 @@ def day_to_post(self, group_name, days): if first_date and last_date: if target_date < first_date: - log.warning( - '{}: First available article is newer than target date, starting from first available.'.format( - group_name)) return first elif target_date > last_date: - log.warning( - '{}: Target date is more recent than newest article. Try a longer backfill.'.format(group_name)) return False - log.debug('{}: Searching for post where goal: {}, first: {}, last: {}' - .format(group_name, target_date, first_date, last_date) - ) upper = last lower = first interval = math.floor((upper - lower) * 0.5) next_date = last_date - log.debug('{}: Start: {:d} End: {:d} Interval: {:d}'.format(group_name, lower, upper, interval)) - while self.days_old(next_date) < days: skip = 1 temp_date = self.post_date(group_name, upper - interval) if temp_date: while temp_date > target_date: upper = upper - interval - (skip - 1) - log.debug('{}: New upperbound: {:d} is {:d} days old.' - .format(group_name, upper, self.days_old(temp_date)) - ) skip *= 2 temp_date = self.post_date(group_name, upper - interval) @@ -275,20 +250,18 @@ def day_to_post(self, group_name, days): if interval <= 0: break skip = 1 - log.debug('{}: Set interval to {:d} articles.'.format(group_name, interval)) next_date = self.post_date(group_name, upper - 1) if next_date: while not next_date: upper = upper - skip skip *= 2 - log.debug('{}: Article was lost, getting next: {:d}'.format(group_name, upper)) next_date = self.post_date(group_name, upper - 1) - log.debug('{}: Article is {:d} which is {:d} days old.'.format(group_name, upper, self.days_old(next_date))) + log.debug('server: {}: article {:d} is {:d} days old.'.format(group_name, upper, self.days_old(next_date))) return upper else: - log.error('{}: Could not get group information.'.format(group_name)) + log.error('server: {}: could not get group information.'.format(group_name)) return False @staticmethod diff --git a/pynab/tvrage.py b/pynab/tvrage.py index bba95b3..19df46e 100644 --- a/pynab/tvrage.py +++ b/pynab/tvrage.py @@ -32,9 +32,8 @@ def process(limit=100, online=True): """Processes [limit] releases to add TVRage information.""" - log.info('Processing TV episodes to add TVRage data...') - expiry = datetime.datetime.now(pytz.utc) - datetime.timedelta(config.site['fetch_blacklist_duration']) + expiry = datetime.datetime.now(pytz.utc) - datetime.timedelta(config.postprocess.get('fetch_blacklist_duration', 7)) query = { 'tvrage._id': {'$exists': False}, @@ -50,8 +49,9 @@ def process(limit=100, online=True): ] }) - for release in db.releases.find(query).limit(limit).sort('posted', pymongo.DESCENDING).batch_size(50): - log.info('Processing TV/Rage information for show {}.'.format(release['search_name'])) + for release in db.releases.find(query).limit(limit).sort('posted', pymongo.DESCENDING).batch_size(25): + method = '' + show = parse_show(release['search_name']) if show: db.releases.update({'_id': release['_id']}, { @@ -64,10 +64,12 @@ def process(limit=100, online=True): if not rage and 'and' in show['clean_name']: rage = db.tvrage.find_one({'name': show['clean_name'].replace(' and ', ' & ')}) - if not rage and online: - log.info('Show not found in local TvRage DB, searching online...') + if rage: + method = 'local' + elif not rage and online: rage_data = search(show) if rage_data: + method = 'online' db.tvrage.update( {'_id': int(rage_data['showid'])}, { @@ -83,14 +85,24 @@ def process(limit=100, online=True): time.sleep(1) if rage: - log.info('TVRage match found, appending TVRage ID to release.') + log.info('tvrage: [{}] - [{}] - tvrage added: {}'.format( + release['_id'], + release['search_name'], + method + )) + db.releases.update({'_id': release['_id']}, { '$set': { 'tvrage': rage } }) elif not rage and online: - log.warning('Could not find TVRage data to associate with release {}.'.format(release['search_name'])) + log.warning('tvrage: [{}] - [{}] - tvrage failed: {}'.format( + release['_id'], + release['search_name'], + 'no show found (online)' + )) + db.releases.update({'_id': release['_id']}, { '$set': { 'tvrage': { @@ -99,9 +111,17 @@ def process(limit=100, online=True): } }) else: - log.warning('Could not find local TVRage data to associate with release {}.'.format(release['search_name'])) + log.warning('tvrage: [{}] - [{}] - tvrage failed: {}'.format( + release['_id'], + release['search_name'], + 'no show found (local)' + )) else: - log.warning('Could not parse name for TV data: {}.'.format(release['search_name'])) + log.error('tvrage: [{}] - [{}] - tvrage failed: {}'.format( + release['_id'], + release['search_name'], + 'no suitable regex for show name' + )) db.releases.update({'_id': release['_id']}, { '$set': { 'tvrage': { @@ -115,8 +135,8 @@ def search(show): """Search TVRage's online API for show data.""" try: r = requests.get(TVRAGE_FULL_SEARCH_URL, params={'show': show['clean_name']}) - except: - log.error('Problem retrieving TVRage XML. The API is probably down.') + except Exception as e: + log.error(e) return None content = r.content @@ -138,7 +158,7 @@ def search_lxml(show, content): try: tree = etree.fromstring(content) except: - log.error('Problem parsing XML with lxml') + log.critical('Problem parsing XML with lxml') return None matches = defaultdict(list) @@ -147,7 +167,6 @@ def search_lxml(show, content): for name in extract_names(xml_show): ratio = int(difflib.SequenceMatcher(None, show['clean_name'], clean_name(name)).ratio() * 100) if ratio == 100: - log.debug('Found 100% xml_match: {}'.format(name)) return xmltodict.parse(etree.tostring(xml_show))['show'] matches[ratio].append(xml_show) @@ -155,17 +174,12 @@ def search_lxml(show, content): for ratio, xml_matches in sorted(matches.items(), reverse=True): for xml_match in xml_matches: if ratio >= 80: - log.debug('Found {:d}% xml_match: {}'.format(ratio, XPATH_NAME(xml_match)[0])) return xmltodict.parse(etree.tostring(xml_match))['show'] elif 80 > ratio > 60: if 'country' in show and show['country'] and XPATH_COUNTRY(xml_match): - if str.lower(show['country']) == str.lower(XPATH_COUNTRY(xml_match)): - log.debug('Found {:d}% xml_match: {}'.format(ratio, XPATH_NAME(xml_match)[0])) + if str.lower(show['country']) == str.lower(XPATH_COUNTRY(xml_match)[0]): return xmltodict.parse(etree.tostring(xml_match))['show'] - ratio, highests = sorted(matches.items(), reverse=True)[0] - log.warning('No TVRage match found for {}, highest match was {}%.'.format(show['clean_name'], ratio)) - def clean_name(name): """Cleans a show name for searching (against tvrage).""" @@ -174,6 +188,7 @@ def clean_name(name): name = regex.sub('[._\-]', ' ', name) name = regex.sub('[\':!"#*’,()?]', '', name) name = regex.sub('\s{2,}', ' ', name) + name = regex.sub('\[.*?\]', '', name) replace_chars = { '$': 's', @@ -184,6 +199,9 @@ def clean_name(name): for k, v in replace_chars.items(): name = name.replace(k, v) + pattern = regex.compile(r'\b(hdtv|dvd|divx|xvid|mpeg2|x264|aac|flac|bd|dvdrip|10 bit|264|720p|1080p\d+x\d+)\b', regex.I) + name = pattern.sub('', name) + return name.lower() @@ -297,8 +315,6 @@ def parse_show(search_name): 'season': int(match.match_obj.group(2)), 'episode': 'all' } - else: - log.error('No regex match.') if 'name' in show and show['name']: # check for country code or name (Biggest Loser Australia etc) @@ -335,7 +351,6 @@ def parse_show(search_name): return show - log.error('Could not determine show info from search_name: {}'.format(search_name)) return False diff --git a/pynab/util.py b/pynab/util.py index 395c25c..7238701 100644 --- a/pynab/util.py +++ b/pynab/util.py @@ -20,9 +20,9 @@ def match(self, *args, **kwds): def update_blacklist(): """Check for Blacklist update and load them into Mongo.""" - if 'blacklist_url' in config.site: - log.info('Starting blacklist update...') - response = requests.get(config.site['blacklist_url']) + blacklist_url = config.postprocess.get('blacklist_url') + if blacklist_url: + response = requests.get(blacklist_url) lines = response.text.splitlines() for line in lines: @@ -53,9 +53,9 @@ def update_blacklist(): def update_regex(): """Check for NN+ regex update and load them into Mongo.""" - if 'regex_url' in config.site: - log.info('Starting regex update...') - response = requests.get(config.site['regex_url']) + regex_url = config.postprocess.get('regex_url') + if regex_url: + response = requests.get(regex_url) lines = response.text.splitlines() # get the revision by itself diff --git a/requirements.txt b/requirements.txt index 9084ca7..028a959 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,6 @@ pynzb requests roman regex -lxml \ No newline at end of file +lxml +daemonize +colorlog diff --git a/scripts/ensure_indexes.py b/scripts/ensure_indexes.py index c1c74cc..3127cf0 100644 --- a/scripts/ensure_indexes.py +++ b/scripts/ensure_indexes.py @@ -21,8 +21,10 @@ def create_indexes(): db.categories.ensure_index('parent_id', pymongo.ASCENDING) # regexes - db.regexes.ensure_index('group_name', pymongo.ASCENDING) - db.regexes.ensure_index('category_id', pymongo.ASCENDING) + db.regexes.ensure_index([ + ('ordinal', pymongo.ASCENDING), + ('group_name', pymongo.ASCENDING) + ], background=True) # groups db.groups.ensure_index('name', pymongo.ASCENDING) @@ -46,6 +48,11 @@ def create_indexes(): # imdb db.imdb.ensure_index('_id', pymongo.ASCENDING) db.imdb.ensure_index('name', pymongo.ASCENDING) + db.imdb.ensure_index([ + ('name', pymongo.ASCENDING), + ('year', pymongo.ASCENDING) + ], background=True) + # binaries db.binaries.ensure_index('name', pymongo.ASCENDING, background=True) @@ -60,6 +67,7 @@ def create_indexes(): db.releases.ensure_index('id', pymongo.ASCENDING, background=True) db.releases.ensure_index('name', pymongo.ASCENDING, background=True) db.releases.ensure_index('category._id', pymongo.ASCENDING, background=True) + db.releases.ensure_index('category', pymongo.ASCENDING, background=True) db.releases.ensure_index('rage._id', pymongo.ASCENDING, background=True) db.releases.ensure_index('imdb._id', pymongo.ASCENDING, background=True) db.releases.ensure_index('tvdb._id', pymongo.ASCENDING, background=True) @@ -89,8 +97,10 @@ def create_indexes(): ('tvrage._id', pymongo.ASCENDING), ('category._id', pymongo.ASCENDING) ], background=True) - db.releases.ensure_index('passworded', pymongo.ASCENDING, background=True) - #TODO: add sparse indexes related to postproc + db.releases.ensure_index([ + ('passworded', pymongo.ASCENDING), + ('posted', pymongo.DESCENDING), + ], background=True) if __name__ == '__main__': diff --git a/scripts/process_min_archives.py b/scripts/process_min_archives.py index 01f6692..fb35a31 100644 --- a/scripts/process_min_archives.py +++ b/scripts/process_min_archives.py @@ -13,7 +13,7 @@ def process_minarchives(): for release in db.releases.find(): data = pynab.nzbs.get_nzb_dict(release['nzb']) - if data['rar_count'] + data['zip_count'] < config.site['min_archives']: + if data['rar_count'] + data['zip_count'] < config.postprocess.get('min_archives', 1): print('DELETING: Release {} has {} rars and {} zips.'.format(release['search_name'], data['rar_count'], data['zip_count'])) db.releases.remove({'_id': release['_id']}) diff --git a/scripts/process_uncategorised.py b/scripts/process_uncategorised.py index 819077a..c340341 100644 --- a/scripts/process_uncategorised.py +++ b/scripts/process_uncategorised.py @@ -30,9 +30,8 @@ def fix_uncategorised(): log.error('Release had no group! Think about deleting releases without groups.') continue - category_id = pynab.categories.determine_category(release['name'], release['group']['name']) + category_id = pynab.categories.determine_category(release['search_name'], release['group']['name']) if category_id: - log.info('Found category: {:d}'.format(category_id)) category = db.categories.find_one({'_id': category_id}) # if this isn't a parent category, add those details as well if 'parent_id' in category: diff --git a/scripts/rename_bad_releases.py b/scripts/rename_bad_releases.py index c2de4cf..eb6d418 100644 --- a/scripts/rename_bad_releases.py +++ b/scripts/rename_bad_releases.py @@ -10,8 +10,10 @@ def rename_bad_releases(category): - for release in db.releases.find({'category._id': int(category), '$or': [{'nfo': {'$nin': [None, False]}}, {'files.count': {'$exists': True}}]}): - log.debug('Finding name for {}...'.format(release['search_name'])) + count = 0 + s_count = 0 + for release in db.releases.find({'category._id': int(category), 'unwanted': {'$ne': True}, '$or': [{'nfo': {'$nin': [None, False]}}, {'files.count': {'$exists': True}}]}): + count += 1 name, category_id = pynab.releases.discover_name(release) if name and not category_id: @@ -19,10 +21,7 @@ def rename_bad_releases(category): pass elif name and category_id: # we found a new name! - log.info('Renaming {} ({:d}) to {} ({:d})...'.format( - release['search_name'], release['category']['_id'], - name, category_id - )) + s_count += 1 category = db.categories.find_one({'_id': category_id}) category['parent'] = db.categories.find_one({'_id': category['parent_id']}) @@ -38,7 +37,7 @@ def rename_bad_releases(category): else: # bad release! - log.debug('Noting unwanted release {} ({:d})...'.format( + log.info('Noting unwanted release {} ({:d})...'.format( release['search_name'], release['category']['_id'], )) @@ -50,6 +49,9 @@ def rename_bad_releases(category): } ) + log.info('rename: successfully renamed {} of {} releases'.format(s_count, count)) + + if __name__ == '__main__': parser = argparse.ArgumentParser(description=''' Rename Bad Releases diff --git a/start.py b/start.py index c235214..6f0c5c0 100644 --- a/start.py +++ b/start.py @@ -1,10 +1,12 @@ +import argparse import multiprocessing import time import logging import pytz import datetime +import traceback -from pynab import log +from pynab import log, log_descriptor from pynab.db import db import pynab.groups @@ -41,8 +43,25 @@ def process_imdb(limit): pynab.imdb.process(limit) -if __name__ == '__main__': - log.info('Starting update...') +def daemonize(pidfile): + try: + import traceback + from daemonize import Daemonize + + fds = [] + if log_descriptor: + fds = [log_descriptor] + + daemon = Daemonize(app='pynab', pid=pidfile, action=main, keep_fds=fds) + daemon.start() + except SystemExit: + raise + except: + log.critical(traceback.format_exc()) + + +def main(): + log.info('starting update...') # print MP log as well multiprocessing.log_to_stderr().setLevel(logging.DEBUG) @@ -52,7 +71,7 @@ def process_imdb(limit): if active_groups: # if maxtasksperchild is more than 1, everything breaks # they're long processes usually, so no problem having one task per child - pool = multiprocessing.Pool(processes=config.site['update_threads'], maxtasksperchild=1) + pool = multiprocessing.Pool(processes=config.scan.get('update_threads', 4), maxtasksperchild=1) result = pool.map_async(update, active_groups) try: result.get() @@ -71,12 +90,30 @@ def process_imdb(limit): pynab.releases.process() # clean up dead binaries - dead_time = pytz.utc.localize(datetime.datetime.now()) - datetime.timedelta(days=config.site['dead_binary_age']) + dead_time = pytz.utc.localize(datetime.datetime.now()) - datetime.timedelta(days=config.scan.get('dead_binary_age', 3)) db.binaries.remove({'posted': {'$lte': dead_time}}) # wait for the configured amount of time between cycles - log.info('Sleeping for {:d} seconds...'.format(config.site['update_wait'])) - time.sleep(config.site['update_wait']) + update_wait = config.scan.get('update_wait', 300) + log.info('sleeping for {:d} seconds...'.format(update_wait)) + time.sleep(update_wait) + else: + log.info('no groups active, cancelling start.py...') + break + + +if __name__ == '__main__': + argparser = argparse.ArgumentParser(description="Pynab main indexer script") + argparser.add_argument('-d', '--daemonize', action='store_true', help='run as a daemon') + argparser.add_argument('-p', '--pid-file', help='pid file (when -d)') + + args = argparser.parse_args() + + if args.daemonize: + pidfile = args.pid_file or config.scan.get('pid_file') + if not pidfile: + log.error("A pid file is required to run as a daemon, please supply one either in the config file '{}' or as argument".format(config.__file__)) else: - log.info('No groups active, cancelling start.py...') - break \ No newline at end of file + daemonize(pidfile) + else: + main() diff --git a/templates/api/result.mako b/templates/api/result.mako index 60b3a4d..78ba68b 100644 --- a/templates/api/result.mako +++ b/templates/api/result.mako @@ -5,8 +5,8 @@ %> - ${config.site['title']} - ${config.site['description']} + ${config.api.get('title', 'pynab')} + ${config.api.get('description', '')} ${get_link('')} % if search: diff --git a/webui/app/scripts/config.js b/webui/app/scripts/config.js index 8cac5f6..a37a1c4 100644 --- a/webui/app/scripts/config.js +++ b/webui/app/scripts/config.js @@ -1,5 +1,5 @@ angular.module('pynabWebuiApp').constant('PYNAB_CONFIG', { - // example: 'http://someindexer.org/' + // example: 'http://someindexer.org:8080/' // don't forget the trailing slash // if your install is in a subdirectory, include that hostUrl: '' diff --git a/webui/app/scripts/controllers/search.js b/webui/app/scripts/controllers/search.js index 0e33698..aa1a1bc 100644 --- a/webui/app/scripts/controllers/search.js +++ b/webui/app/scripts/controllers/search.js @@ -59,6 +59,10 @@ angular.module('pynabWebuiApp') } else { var results = response.data.rss.channel.item; + if (!(results instanceof Array)) { + results = [results]; + } + $scope.searchResults = []; angular.forEach(results, function(obj) { obj.pubDate = moment(obj.pubDate, "ddd, DD MMM YYYY HH:mm:ss ZZ").toDate(); diff --git a/webui/app/views/search.html b/webui/app/views/search.html index 3141846..5c844c1 100644 --- a/webui/app/views/search.html +++ b/webui/app/views/search.html @@ -43,7 +43,7 @@ Category Download NZB - + {{item.size | bytes}} @@ -57,4 +57,4 @@

No results found.

- \ No newline at end of file + diff --git a/webui/package.json b/webui/package.json index 90fc078..1c19dc3 100644 --- a/webui/package.json +++ b/webui/package.json @@ -34,7 +34,7 @@ "karma-chrome-launcher": "~0.1.2", "karma-firefox-launcher": "~0.1.3", "karma-html2js-preprocessor": "~0.1.0", - "karma-jasmine": "~0.2.0", + "karma-jasmine": "~0.1.0", "requirejs": "~2.1.10", "karma-requirejs": "~0.2.1", "karma-coffee-preprocessor": "~0.1.3",