diff --git a/pythonbits/bb.py b/pythonbits/bb.py index b3c4421..7f49250 100644 --- a/pythonbits/bb.py +++ b/pythonbits/bb.py @@ -9,7 +9,7 @@ from collections import namedtuple, abc from concurrent.futures.thread import ThreadPoolExecutor from datetime import timedelta -from mimetypes import guess_type +import mimetypes import pymediainfo import mutagen @@ -24,6 +24,9 @@ from . import imdb from . import musicbrainz as mb from . import imagehosting +from . import goodreads +from .googlebooks import find_cover, find_categories +from .openlibrary import format_cover_url from .ffmpeg import FFMpeg from . import templating as bb from .submission import (Submission, form_field, finalize, cat_map, @@ -57,6 +60,17 @@ def uniq(seq): class BbSubmission(Submission): default_fields = ("form_title", "tags", "cover") + ebook_types = {'application/epub+zip': 'EPUB', + 'application/x-mobipocket-ebook': 'MOBI', + 'application/pdf': 'PDF', + 'text/html': 'HTML', + 'text/plain': 'TXT', + 'image/vnd.djvu': 'DJVU', + 'application/vnd.ms-htmlhelp': 'CHM', + 'application/x-cbr': 'CBR', + 'application/x-cbz': 'CBZ', + 'application/x-cb7': 'CB7', + 'application/x-mobi8-ebook': 'AZW3'} def show_fields(self, fields): return super(BbSubmission, self).show_fields( @@ -68,6 +82,7 @@ def confirm_finalization(self, fields): def subcategory(self): path = self['path'] + self.add_ebook_mime_types() if os.path.isfile(path): files = [(os.path.getsize(path), path)] else: @@ -79,14 +94,15 @@ def subcategory(self): files.append((os.path.getsize(fpath), fpath)) for _, path in sorted(files, reverse=True): - mime_guess, _ = guess_type(path) + mime_guess, _ = mimetypes.guess_type(path) if mime_guess: mime_guess = mime_guess.split('/') if mime_guess[0] == 'video': return VideoSubmission elif mime_guess[0] == 'audio': return AudioSubmission - + elif self.subcategorise_ebook('/'.join(mime_guess)): + return BookSubmission log.info("Unable to guess submission category using known mimetypes") while True: cat = input("Please manually specify category. " @@ -109,6 +125,30 @@ def subcategorise(self): sub.depends_on = self.depends_on return sub + def add_ebook_mime_types(self): + contentTypes = mimetypes.types_map + contentTypes.update( + { + '.epub': 'application/epub+zip', + '.mobi': 'application/x-mobipocket-ebook', + '.pdf': 'application/pdf', + '.html': 'text/html', + '.txt': 'text/plain', + '.djvu': 'image/vnd.djvu', + '.chm': 'application/vnd.ms-htmlhelp', + '.cbr': 'application/x-cbr', + '.cbz': 'application/x-cbz', + '.cb7': 'application/x-cb7', + '.azw3': 'application/x-mobi8-ebook' + } + ) + + def subcategorise_ebook(self, mime): + try: + return self.ebook_types.get(mime) + except KeyError: + return False + @staticmethod def submit(payload): t = Tracker() @@ -150,6 +190,7 @@ def copy(source, target): 'movie': ['hard', 'sym', 'copy', 'move'], 'tv': ['hard', 'sym', 'copy', 'move'], 'music': ['copy', 'move'], + 'book': ['copy', 'move'], } method_map = {'hard': os.link, @@ -935,6 +976,148 @@ def _render_form_description(self): return self['description'] +class BookSubmission(BbSubmission): + + _cat_id = 'book' + _form_type = 'E-Books' + + def _desc(self): + s = self['summary'] + return re.sub('<[^<]+?>', '', s['description']) + + def _render_scene(self): + return False + + @form_field('book_retail', 'checkbox') + def _render_retail(self): + return bool( + input('Is this a retail release? [y/N] ').lower() + == 'y') + + @form_field('book_language') + def _render_language(self): + return self['summary']['language'] + + @form_field('book_publisher') + def _render_publisher(self): + return self['summary']['publisher'] + + @form_field('book_author') + def _render_author(self): + return self['summary']['authors'][0]['name'] + + @form_field('book_format') + def _render_format(self): + mime_type, _ = mimetypes.guess_type(self['path']) + fmt = self.subcategorise_ebook(mime_type) + return fmt + + def _render_summary(self): + gr = goodreads.Goodreads() + return gr.search(self['path']) + + @form_field('book_year') + def _render_year(self): + if 'summary' in self.fields: + return self['summary']['publication_year'] + else: + while True: + year = input('Please enter year: ') + try: + year = int(year) + except ValueError: + pass + else: + return year + + @form_field('book_isbn') + def _render_isbn(self): + if 'summary' in self.fields: + return self['summary'].get('isbn', '') + + @form_field('title') + def _render_form_title(self): + if 'summary' in self.fields: + return self['summary'].get('title', '') + + @form_field('tags') + def _render_tags(self): + categories = find_categories(self['summary']['isbn']) + authors = self['summary']['authors'] + shelves = self['summary']['shelves'] + + tags = uniq(list(format_tag(a['name']) for a in authors) + + list(format_tag(c) for c in categories) + + list(format_tag(s['name']) for s in shelves)) + # Maximum tags length is 200 characters + + def tags_string(tags): + return ",".join(format_tag(tag) for tag in tags) + while len(tags_string(tags)) > 200: + del tags[-1] + return tags_string(tags) + + def _render_section_information(self): + def gr_author_link(gra): + return bb.link(gra['name'], gra['link']) + + book = self['summary'] + isbn = book['isbn'] + links = [('Goodreads', book['url']), + ('Amazon', 'http://amzn.com/{}' + .format(isbn)), + ('LibraryThing', 'http://www.librarything.com/isbn/{}/' + .format(isbn)), + ('Google Books', 'http://books.google.com/books?vid=ISBN{}' + .format(isbn))] + + return dedent("""\ + [b]Title[/b]: {title} ({links}) + [b]ISBN[/b]: {isbn} + [b]Publisher[/b]: {publisher} + [b]Publication Year[/b]: {publication_year} + [b]Rating[/b]: {rating} [size=1]({ratings_count} ratings)[/size] + [b]Author(s)[/b]: {authors}""").format( + links=", ".join(bb.link(*l) for l in links), + title=book['title'], + isbn=isbn, + publisher=book['publisher'], + publication_year=book['publication_year'], + rating=bb.format_rating(float(book['average_rating']), + max=5), + ratings_count=book['ratings_count'], + authors=" | ".join(gr_author_link(a) for a in book['authors']) + ) + + def _render_section_description(self): + return self._desc() + + @form_field('desc') + def _render_description(self): + sections = [("Description", self['section_description']), + ("Information", self['section_information'])] + + description = "\n".join(bb.section(*s) for s in sections) + description += bb.release + + return description + + @finalize + @form_field('image') + def _render_cover(self): + if(config.get('Books', 'use_openlibrary').lower() == "true"): + return format_cover_url('isbn', self['summary']['isbn'], 'L') + # Goodreads usually won't give you a cover image as they don't have the + # the right to distribute them + elif 'nophoto' in self['summary']['image_url']: + return find_cover(self['summary']['isbn']) + else: + return self['summary']['image_url'] + + def _finalize_cover(self): + return imagehosting.upload(self['cover']) + + class AudioSubmission(BbSubmission): default_fields = ("description", "form_tags", "year", "cover", "title", "format", "bitrate") @@ -1004,7 +1187,7 @@ def _render_mediainfo_path(self): # get first file over 1 MiB for dp, _, fns in os.walk(self['path']): for fn in fns: - g = guess_type(fn)[0] + g = mimetypes.guess_type(fn)[0] if g and g.startswith('audio'): return os.path.join(dp, fn) # return full path raise Exception('No media file found') diff --git a/pythonbits/calibre.py b/pythonbits/calibre.py new file mode 100644 index 0000000..cdd1b23 --- /dev/null +++ b/pythonbits/calibre.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +import subprocess + +from .logging import log + +COMMAND = "ebook-meta" + + +class EbookMetaException(Exception): + pass + + +def get_version(): + try: + ebook_meta = subprocess.Popen( + [COMMAND, '--version'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return ebook_meta.communicate()[0].decode('utf8') + except OSError: + raise EbookMetaException( + "Could not find {}, please ensure it is installed (via Calibre)." + .format(COMMAND)) + + +def read_metadata(path): + version = get_version() + log.debug('Found ebook-meta version: %s' % version) + log.info("Trying to read eBook metadata...") + + output = subprocess.check_output( + '{} "{}"'.format(COMMAND, path), shell=True) + result = {} + for row in output.decode('utf8').split('\n'): + if ': ' in row: + try: + key, value = row.split(': ') + result[key.strip(' .')] = value.strip() + except ValueError: + pass + return result diff --git a/pythonbits/goodreads.py b/pythonbits/goodreads.py new file mode 100755 index 0000000..f4b878d --- /dev/null +++ b/pythonbits/goodreads.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +from textwrap import dedent + +import goodreads_api_client as gr +import pycountry + +from .config import config +from .logging import log +from .calibre import read_metadata +from collections import OrderedDict + +config.register( + 'Goodreads', 'api_key', + dedent("""\ + To find your Goodreads API key, login to https://www.goodreads.com/api/keys + Enter the API Key below + API Key""")) + +EXCLUDED_WORDS = ['read', 'favorites', 'book', + 'own', 'series', 'novel', 'kindle', 'shelf' + 'library', 'buy', 'abandoned', + 'audible', 'audio', 'finish', 'wish'] + + +def _extract_authors(authors): + if isinstance(authors['author'], OrderedDict): + return [{ + 'name': authors['author']['name'], + 'link': authors['author']['link'] + }] + else: + return [_extract_author(auth) + for auth in authors['author']] + + +def _extract_author(auth): + return { + 'name': auth['name'], + 'link': auth['link'] + } + + +def _extract_language(alpha_3): + if not alpha_3: + return _read_language() + try: + return pycountry.languages.get(alpha_3=alpha_3).name + except AttributeError: + try: + return pycountry.languages.get(alpha_2=alpha_3[:2]).name + except AttributeError: + # I give up + return _read_language() + + +def _read_language(): + return input('Please specify the book\'s Language: ') + + +def _extract_shelves(shelves, take): + # source for tags e.g. sci-fi + return [_extract_shelf(shelf) + for shelf in filter(_exclude_well_known, + sorted(shelves, key=_shelf_sort_key, + reverse=True)[:take])] + + +def _exclude_well_known(s): + return not any(w in s['@name'] for w in EXCLUDED_WORDS) + + +def _shelf_sort_key(s): + return int(s['@count']) + + +def _extract_shelf(shelf): + return {'name': shelf['@name'], 'count': shelf['@count']} + + +def _process_book(books): + keys_wanted = ['id', 'title', 'isbn', 'isbn13', 'description', + 'language_code', 'publication_year', 'publisher', + 'image_url', 'url', 'authors', 'average_rating', + 'work', 'popular_shelves'] + book = {k: v for k, v in books if k in keys_wanted} + book['authors'] = _extract_authors(book['authors']) + book['ratings_count'] = int(book['work']['ratings_count']['#text']) + book['language'] = _extract_language(book['language_code']) + book['shelves'] = _extract_shelves(book['popular_shelves']['shelf'], 10) + return book + + +class Goodreads(object): + def __init__(self, interactive=True): + self.goodreads = gr.Client( + developer_key=config.get('Goodreads', 'api_key')) + + def show_by_isbn(self, isbn): + return _process_book(self.goodreads.Book.show_by_isbn( + isbn).items()) + + def search(self, path): + + book = read_metadata(path) + isbn = '' + try: + isbn = book['Identifiers'].split(':')[1].split(',')[0] + except KeyError: + pass + + if isbn: + log.debug("Searching Goodreads by ISBN {} for '{}'", + isbn, book.get('Title', isbn)) + return self.show_by_isbn(isbn) + elif book['Title']: + search_term = book['Title'] + log.debug( + "Searching Goodreads by Title only for '{}'", search_term) + book_results = self.goodreads.search_book(search_term) + print("Results:") + for i, book in enumerate(book_results['results']['work']): + print('{}: {} by {} ({})' + .format(i, book['best_book']['title'], + book['best_book']['author']['name'], + book['original_publication_year'] + .get('#text', ''))) + + while True: + choice = input('Select number or enter an alternate' + ' search term' + ' (or an ISBN with isbn: prefix):' + ' [0-{}, 0 default] ' + .format( + len(book_results['results']['work']) - 1)) + try: + choice = int(choice) + except ValueError: + if choice: + return self.show_by_isbn(choice.replace('isbn:', '')) + choice = 0 + + try: + result = book_results['results']['work'][choice] + except IndexError: + pass + else: + id = result['best_book']['id'].get('#text', '') + log.debug("Selected Goodreads item {}", id) + log.debug("Searching Goodreads by ID {}", id) + return _process_book(self.goodreads.Book.show( + id).items()) diff --git a/pythonbits/googlebooks.py b/pythonbits/googlebooks.py new file mode 100644 index 0000000..76c5320 --- /dev/null +++ b/pythonbits/googlebooks.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +import requests +import json + +from .logging import log + +API_URL = 'https://www.googleapis.com/books/v1/' + +cache = {} + + +def find_cover(isbn): + if _get_or_set(key=isbn): + return _extract_cover(cache[isbn]) + + path = 'volumes?q=isbn:{}'.format(isbn) + resp = requests.get(API_URL+path) + log.debug('Fetching alt cover art from {}'.format(resp.url)) + if resp.status_code == 200: + content = json.loads(resp.content) + _get_or_set(key=isbn, value=content) + return _extract_cover(content) + else: + log.warn('Couldn\'t find cover art for ISBN {}'.format(isbn)) + return '' + + +def find_categories(isbn): + if _get_or_set(key=isbn): + return _extract_categories(cache[isbn]) + + path = 'volumes?q=isbn:{}'.format(isbn) + resp = requests.get(API_URL+path) + log.debug('Fetching categories from {}'.format(resp.url)) + if resp.status_code == 200: + content = json.loads(resp.content) + _get_or_set(key=isbn, value=content) + return _extract_categories(content) + else: + log.warn('Couldn\'t find categories for ISBN {}'.format(isbn)) + return '' + + +def _get_or_set(**kwargs): + value = kwargs.get('value', None) + key = kwargs.get('key', None) + if value: + cache[key] = value + return value + elif key in cache: + return cache[key] + + +def _extract_categories(book): + return (book['items'][0]['volumeInfo'] + ['categories'] or '') + + +def _extract_cover(book): + return (book['items'][0]['volumeInfo'] + ['imageLinks']['thumbnail'] or '') diff --git a/pythonbits/openlibrary.py b/pythonbits/openlibrary.py new file mode 100644 index 0000000..4b3b8c6 --- /dev/null +++ b/pythonbits/openlibrary.py @@ -0,0 +1,23 @@ + +# -*- coding: utf-8 -*- + +""" +Note, read https://openlibrary.org/dev/docs/api/covers +The cover access by ids other than CoverID and OLID are rate-limited. +Currently only 100 requests/IP are allowed for every 5 minutes. +If any IP tries to access more that the allowed limit, +the service will return "403 Forbidden" status. +""" + +API_URL = 'http://covers.openlibrary.org/b/{}/{}-{}.jpg' + + +""" +key can be any one of ISBN, OCLC, LCCN, OLID and ID (case-insensitive) +value is the value of the chosen key +size can be one of S, M and L for small, medium and large respectively. +""" + + +def format_cover_url(key, value, size): + return API_URL.format(key, value, size) diff --git a/setup.py b/setup.py index fa47a1f..de88ed2 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,8 @@ def find_version(*file_paths): "mutagen~=1.44", "musicbrainzngs~=0.7", "terminaltables~=3.1", + "goodreads_api_client~=0.1.0.dev4", + "pycountry~=20.7.3" ], python_requires=">=3.5,<3.9", tests_require=['tox', 'pytest', 'flake8', 'pytest-logbook'],