-
Notifications
You must be signed in to change notification settings - Fork 0
/
htmlToPDFdirTree.py
61 lines (57 loc) · 2.08 KB
/
htmlToPDFdirTree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#original bookmarks2fs https://github.com/bookmarks-tools/bookmarks2fs/blob/master/bookmarks2fs.py
#wkhtmltopdf required to run pdfkit
import configparser
from pathlib import Path
import base64
from urllib.parse import urlparse
import bookmarks_parser
import unicodedata
import re
import pdfkit
import os
import shutil
config = configparser.RawConfigParser()
config.optionxform = str
def create_bookmark(bookmark, folder_name):
title = re.sub('[^0-9a-zA-Z]+', '_', bookmark['title'])
html = re.sub('#(\.css|\.js)\?[^"]+#', '$1', bookmark['url'])
options = {
"load-error-handling ignore": None
"load-media-error-handling ignore": None
}
if bookmark.get('title'):
domain_name = urlparse(bookmark['url'])
relative_path = Path("icons/{}.png".format(domain_name))
if not relative_path.exists():
path = Path.cwd() / relative_path
else:
path = Path.cwd() / domain_name
try:
pdfkit.from_url(html,('{}/{}'.format(folder_name, title+'.pdf', options=options)))
except OSError as e:
if 'Done' not in str(e):
raise e
else:
try:
pdfkit.from_url(html,('{}/{}'.format(folder_name, title+'.pdf', options=options)))
except OSError as e:
if 'Done' not in str(e):
raise e
def title2path(child, prev=None):
for bookmark in child:
if bookmark.get('children'):
if prev:
bookmark['title'] = '{}/{}'.format(prev['title'], bookmark['title'])
bookmark_folder_path = Path(bookmark['title'])
if not bookmark_folder_path.exists():
bookmark_folder_path.mkdir()
if bookmark['children']:
title2path(bookmark['children'], bookmark)
elif bookmark['type'] == 'bookmark':
create_bookmark(bookmark, prev['title'])
if __name__ == '__main__':
bookmarks = bookmarks_parser.parse("test.html")#rename to exported html file to be processed
p = Path('icons')
if not p.exists():
p.mkdir()
title2path(bookmarks)