Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Book Submissions #114

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions pythonbits/bb.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from . import imdb
from . import musicbrainz as mb
from . import imagehosting
from . import goodreads
from .googlebooks import find_cover, find_categories
from .ffmpeg import FFMpeg
from . import templating as bb
from .submission import (Submission, form_field, finalize, cat_map,
Expand Down Expand Up @@ -150,6 +152,7 @@ def copy(source, target):
'movie': ['hard', 'sym', 'copy', 'move'],
'tv': ['hard', 'sym', 'copy', 'move'],
'music': ['copy', 'move'],
'book': ['copy', 'move'],
}

method_map = {'hard': os.link,
Expand Down Expand Up @@ -930,6 +933,160 @@ def _render_form_description(self):
return self['description']


class BookSubmission(BbSubmission):

_cat_id = 'book'
_form_type = 'E-Books'

def _desc(self):
s = self['summary']
return re.sub('<[^<]+?>', '', s['description'])

def _render_scene(self):
return False

@form_field('book_retail', 'checkbox')
def _render_retail(self):
return bool(
input('Is this a retail release? [y/N] ').lower()
== 'y')

@form_field('book_language')
def _render_language(self):
return self['summary']['language']

@form_field('book_publisher')
def _render_publisher(self):
return self['summary']['publisher']

@form_field('book_author')
def _render_author(self):
return self['summary']['authors'][0]['name']

@form_field('book_format')
def _render_format(self):
book_format = {
'EPUB': 'EPUB',
'MOBI': 'MOBI',
'PDF': 'PDF',
'HTML': 'HTML',
'TXT': 'TXT',
'DJVU': 'DJVU',
'CHM': 'CHM',
'CBR': 'CBR',
'CBZ': 'CBZ',
'CB7': 'CB7',
'TXT': 'TXT',
'AZW3': 'AZW3',
}
znedw marked this conversation as resolved.
Show resolved Hide resolved

_, ext = os.path.splitext(self['path'])
return book_format[ext.replace('.', '').upper()]

def _render_summary(self):
gr = goodreads.Goodreads()
return gr.search(self['path'])

@form_field('book_year')
def _render_year(self):
if 'summary' in self.fields:
return self['summary']['publication_year']
else:
while True:
year = input('Please enter year: ')
try:
year = int(year)
except ValueError:
pass
else:
return year

@form_field('book_isbn')
def _render_isbn(self):
if 'summary' in self.fields:
return self['summary'].get('isbn', '')

@form_field('title')
def _render_form_title(self):
if 'summary' in self.fields:
return self['summary'].get('title', '')

@form_field('tags')
def _render_tags(self):
categories = find_categories(self['summary']['isbn'])
authors = self['summary']['authors']
shelves = self['summary']['shelves']

tags = uniq(list(format_tag(a['name']) for a in authors) +
list(format_tag(c) for c in categories) +
list(format_tag(s['name']) for s in shelves))
# Maximum tags length is 200 characters

def tags_string(tags):
return ",".join(format_tag(tag) for tag in tags)
while len(tags_string(tags)) > 200:
del tags[-1]
return tags_string(tags)

def _render_section_information(self):
def gr_author_link(gra):
return bb.link(gra['name'], gra['link'])

book = self['summary']
isbn = book['isbn']
links = [('Goodreads', book['url']),
('Amazon', 'http://amzn.com/{}'
.format(isbn)),
('LibraryThing', 'http://www.librarything.com/isbn/{}/'
.format(isbn)),
('Google Books', 'http://books.google.com/books?vid=ISBN{}'
.format(isbn))]

return dedent("""\
[b]Title[/b]: {title} ({links})
[b]ISBN[/b]: {isbn}
[b]Publisher[/b]: {publisher}
[b]Publication Year[/b]: {publication_year}
[b]Rating[/b]: {rating} [size=1]({ratings_count} ratings)[/size]
[b]Author(s)[/b]: {authors}""").format(
links=", ".join(bb.link(*l) for l in links),
title=book['title'],
isbn=isbn,
publisher=book['publisher'],
publication_year=book['publication_year'],
rating=bb.format_rating(float(book['average_rating']),
max=5),
ratings_count=book['ratings_count'],
authors=" | ".join(gr_author_link(a) for a in book['authors'])
)

def _render_section_description(self):
return self._desc()

@form_field('desc')
def _render_description(self):
sections = [("Description", self['section_description']),
("Information", self['section_information'])]

description = "\n".join(bb.section(*s) for s in sections)
description += bb.release

return description

@finalize
@form_field('image')
def _render_cover(self):
# Goodreads usually won't give you a cover image as they don't have the
# the right to distribute them
if 'nophoto' in self['summary']['image_url']:
return find_cover(self['summary']['isbn'])
else:
return self['summary']['image_url']

def _finalize_cover(self):
return imagehosting.upload(self['cover'])


class AudioSubmission(BbSubmission):
default_fields = ("description", "form_tags", "year", "cover",
"title", "format", "bitrate")
Expand Down
40 changes: 40 additions & 0 deletions pythonbits/calibre.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
import subprocess

from .logging import log

COMMAND = "ebook-meta"


class EbookMetaException(Exception):
pass


def get_version():
try:
ebook_meta = subprocess.Popen(
[COMMAND, '--version'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return ebook_meta.communicate()[0].decode('utf8')
except OSError:
raise EbookMetaException(
"Could not find {}, please ensure it is installed (via Calibre)."
.format(COMMAND))


def read_metadata(path):
version = get_version()
log.debug('Found ebook-meta version: %s' % version)
log.info("Trying to read eBook metadata...")

output = subprocess.check_output(
'{} "{}"'.format(COMMAND, path), shell=True)
result = {}
for row in output.decode('utf8').split('\n'):
if ': ' in row:
try:
key, value = row.split(': ')
result[key.strip(' .')] = value.strip()
except ValueError:
pass
return result
145 changes: 145 additions & 0 deletions pythonbits/goodreads.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# -*- coding: utf-8 -*-
from textwrap import dedent

import goodreads_api_client as gr
import pycountry

from .config import config
from .logging import log
from .calibre import read_metadata
from collections import OrderedDict

config.register(
'Goodreads', 'api_key',
dedent("""\
To find your Goodreads API key, login to https://www.goodreads.com/api/keys
Enter the API Key below
API Key"""))

EXCLUDED_WORDS = ['read', 'favorites', 'book',
'own', 'series', 'novel', 'kindle', 'shelf'
'library', 'buy', 'abandoned',
'audible', 'audio', 'finish', 'wish']
Comment on lines +19 to +22
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tbh i think it would be a better idea to have a whitelist instead, if there are so many shitty tags

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not too bad, I think an allow-list would be too hard as there's lots and lots of genres...

This is what comes back for Dune:

<popular_shelves>
<shelf name="to-read" count="498223"/>
<shelf name="currently-reading" count="54844"/>
<shelf name="science-fiction" count="13384"/>
<shelf name="sci-fi" count="12220"/>
<shelf name="favorites" count="8088"/>
<shelf name="fiction" count="6904"/>
<shelf name="fantasy" count="4244"/>
<shelf name="classics" count="3383"/>
<shelf name="scifi" count="2661"/>
<shelf name="owned" count="2454"/>
<shelf name="sci-fi-fantasy" count="1304"/>
<shelf name="books-i-own" count="974"/>
<shelf name="sf" count="937"/>
<shelf name="classic" count="789"/>
<shelf name="series" count="729"/>
<shelf name="audiobook" count="687"/>
<shelf name="dune" count="659"/>
<shelf name="scifi-fantasy" count="531"/>
<shelf name="space-opera" count="528"/>
<shelf name="fantasy-sci-fi" count="528"/>
<shelf name="audiobooks" count="505"/>
<shelf name="favourites" count="489"/>
<shelf name="novels" count="479"/>
<shelf name="kindle" count="468"/>
<shelf name="library" count="454"/>
<shelf name="default" count="443"/>
<shelf name="owned-books" count="395"/>
<shelf name="adventure" count="377"/>
<shelf name="to-buy" count="363"/>
<shelf name="dnf" count="344"/>
<shelf name="book-club" count="334"/>
<shelf name="science-fiction-fantasy" count="333"/>
<shelf name="abandoned" count="324"/>
<shelf name="audible" count="320"/>
<shelf name="adult" count="307"/>
<shelf name="audio" count="281"/>
<shelf name="speculative-fiction" count="273"/>
<shelf name="re-read" count="271"/>
<shelf name="space" count="264"/>
<shelf name="novel" count="252"/>
<shelf name="ebook" count="243"/>
<shelf name="did-not-finish" count="243"/>
<shelf name="my-library" count="237"/>
<shelf name="tbr" count="236"/>
<shelf name="literature" count="233"/>
<shelf name="epic" count="228"/>
<shelf name="great-american-read" count="219"/>
<shelf name="my-books" count="214"/>
<shelf name="sci-fi-and-fantasy" count="210"/>
<shelf name="books" count="207"/>
<shelf name="frank-herbert" count="207"/>
<shelf name="fantasy-scifi" count="206"/>
<shelf name="all-time-favorites" count="203"/>
<shelf name="unfinished" count="203"/>
<shelf name="science" count="196"/>
<shelf name="american" count="187"/>
<shelf name="on-hold" count="180"/>
<shelf name="20th-century" count="171"/>
<shelf name="english" count="160"/>
<shelf name="read-in-2020" count="158"/>
<shelf name="sff" count="158"/>
<shelf name="hugo-award" count="154"/>
<shelf name="favorite" count="154"/>
<shelf name="hugo" count="153"/>
<shelf name="sf-fantasy" count="153"/>
<shelf name="wish-list" count="152"/>
<shelf name="adult-fiction" count="146"/>
<shelf name="ebooks" count="144"/>
<shelf name="recommended" count="141"/>
<shelf name="dystopian" count="141"/>
<shelf name="reread" count="138"/>
<shelf name="favorite-books" count="138"/>
<shelf name="bookshelf" count="134"/>
<shelf name="fantascienza" count="132"/>
<shelf name="ciencia-ficción" count="130"/>
<shelf name="dystopia" count="129"/>
<shelf name="audio-books" count="127"/>
<shelf name="maybe" count="125"/>
<shelf name="religion" count="124"/>
<shelf name="read-2020" count="123"/>
<shelf name="i-own" count="123"/>
<shelf name="to-read-fiction" count="123"/>
<shelf name="must-read" count="118"/>
<shelf name="audio-book" count="115"/>
<shelf name="the-great-american-read" count="114"/>
<shelf name="to-re-read" count="112"/>
<shelf name="ciencia-ficcion" count="110"/>
<shelf name="nebula-award" count="110"/>
<shelf name="own-it" count="107"/>
<shelf name="to-reread" count="106"/>
<shelf name="owned-tbr" count="104"/>
<shelf name="hugo-winners" count="104"/>
<shelf name="home-library" count="103"/>
<shelf name="science-fiction-and-fantasy" count="103"/>
<shelf name="nebula" count="101"/>
<shelf name="physical-tbr" count="99"/>
<shelf name="paperback" count="99"/>
<shelf name="politics" count="99"/>
<shelf name="war" count="99"/>
<shelf name="sf-masterworks" count="98"/>
</popular_shelves>



def _extract_authors(authors):
if isinstance(authors['author'], OrderedDict):
return [{
'name': authors['author']['name'],
'link': authors['author']['link']
}]
else:
return [_extract_author(auth)
for auth in authors['author']]


def _extract_author(auth):
return {
'name': auth['name'],
'link': auth['link']
}


def _extract_language(alpha_3):
try:
return pycountry.languages.get(alpha_3=alpha_3).name
except AttributeError:
try:
return pycountry.languages.get(alpha_2=alpha_3[:2]).name
except AttributeError:
# I give up
return input('Please specify the book\'s Language: ')


def _extract_shelves(shelves, take):
# source for tags e.g. sci-fi
return [_extract_shelf(shelf)
for shelf in filter(_exclude_well_known,
sorted(shelves, key=_shelf_sort_key,
reverse=True)[:take])]


def _exclude_well_known(s):
return not any(w in s['@name'] for w in EXCLUDED_WORDS)


def _shelf_sort_key(s):
return int(s['@count'])


def _extract_shelf(shelf):
return {'name': shelf['@name'], 'count': shelf['@count']}


def _process_book(books):
keys_wanted = ['id', 'title', 'isbn', 'isbn13', 'description',
'language_code', 'publication_year', 'publisher',
'image_url', 'url', 'authors', 'average_rating',
'work', 'popular_shelves']
book = {k: v for k, v in books if k in keys_wanted}
book['authors'] = _extract_authors(book['authors'])
book['ratings_count'] = int(book['work']['ratings_count']['#text'])
book['language'] = _extract_language(book['language_code'])
book['shelves'] = _extract_shelves(book['popular_shelves']['shelf'], 10)
return book


class Goodreads(object):
def __init__(self, interactive=True):
self.goodreads = gr.Client(
developer_key=config.get('Goodreads', 'api_key'))

def show_by_isbn(self, isbn):
return _process_book(self.goodreads.Book.show_by_isbn(
isbn).items())

def search(self, path):

book = read_metadata(path)
isbn = ''
try:
isbn = book['Identifiers'].split(':')[1].split(',')[0]
except KeyError:
pass

if isbn:
log.debug("Searching Goodreads by ISBN {} for '{}'",
isbn, book['Title'])
return self.show_by_isbn(isbn)
elif book['Title']:
search_term = book['Title']
log.debug(
"Searching Goodreads by Title only for '{}'", search_term)
book_results = self.goodreads.search_book(search_term)
print("Results:")
for i, book in enumerate(book_results['results']['work']):
print('{}: {} by {} ({})'
.format(i, book['best_book']['title'],
book['best_book']['author']['name'],
book['original_publication_year']
.get('#text', '')))

while True:
choice = input('Select number or enter an alternate'
' search term'
' (or an ISBN with isbn: prefix):'
' [0-{}, 0 default] '
.format(
len(book_results['results']['work']) - 1))
try:
choice = int(choice)
except ValueError:
if choice:
return self.show_by_isbn(choice.replace('isbn:', ''))
choice = 0

try:
result = book_results['results']['work'][choice]
except IndexError:
pass
else:
id = result['best_book']['id'].get('#text', '')
log.debug("Selected Goodreads item {}", id)
log.debug("Searching Goodreads by ID {}", id)
return _process_book(self.goodreads.Book.show(
id).items())
Loading