-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert.py
85 lines (68 loc) · 2.5 KB
/
convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import base64
import json
import logging
import time
from io import BytesIO
from typing import List
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options as ChromeOptions
from webdriver_manager.chrome import ChromeDriverManager
logging.basicConfig(level=logging.WARNING)
class PdfGenerator:
driver = None
# https://chromedevtools.github.io/devtools-protocol/tot/Page#method-printToPDF
print_options = {
'landscape': False,
'displayHeaderFooter': False,
'printBackground': True,
'preferCSSPageSize': True,
'paperWidth': 16,
'paperHeight': 16,
}
def __init__(self, urls: List[str]):
self.urls = urls
def _get_pdf_from_url(self, url, *args, **kwargs):
self.driver.get(url)
time.sleep(1) # allow the page to load, increase if needed
print_options = self.print_options.copy()
result = self._send_devtools(self.driver, "Page.printToPDF", print_options)
return base64.b64decode(result['data'])
@staticmethod
def _send_devtools(driver, cmd, params):
"""
Works only with chromedriver.
Method uses cromedriver's api to pass various commands to it.
"""
resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
url = driver.command_executor._url + resource
body = json.dumps({'cmd': cmd, 'params': params})
response = driver.command_executor._request('POST', url, body)
return response.get('value')
def _generate_pdfs(self):
pdf_files = []
for url in self.urls:
result = self._get_pdf_from_url(url)
file = BytesIO()
file.write(result)
pdf_files.append(file)
return pdf_files
def main(self) -> List[BytesIO]:
webdriver_options = ChromeOptions()
webdriver_options.add_argument('--headless')
webdriver_options.add_argument('--disable-gpu')
try:
self.driver = webdriver.Chrome(
service=ChromeService(ChromeDriverManager().install()),
options=webdriver_options
)
result = self._generate_pdfs()
finally:
self.driver.close()
return result
"""
#Simple use case:
pdf_file = PdfGenerator(['https://www.gresb.com/nl-en/']).main()
with open('new_pdf.pdf', "wb") as outfile:
outfile.write(pdf_file[0].getbuffer())
"""