-
Notifications
You must be signed in to change notification settings - Fork 0
/
ws_client.py
executable file
·66 lines (48 loc) · 1.96 KB
/
ws_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
# -*- coding utf-8 -*-
# vim: set fileencoding=utg8 :
from urllib.request import urlretrieve
from urllib.request import urlopen
import bs4
class Client(object):
def print_book(self, title, description, points, web_url, output = "book_info.txt"):
print (title.strip(), file=open(output,"w"))
print (description.strip(), file=open(output,"a"))
points = points.strip()
if points:
for point in points.split('\n'):
print ('\t* ' + point , file=open(output,"a"))
print (web_url, file=open(output,"a"))
def get_web(self, url):
f = urlopen(url)
html = f.read()
f.close()
return html
def run(self):
web_url = "https://www.packtpub.com/packt/offers/free-learning/"
# download the web page
html = self.get_web(web_url)
# parse it
soup = bs4.BeautifulSoup(html, "lxml")
# search the title
book_title_html = soup.find("div", "dotd-title")
title = book_title_html.text
# search the book cover
book_image_html = soup.find("img","bookimage imagecache imagecache-dotd_main_image")
# download the cover if there is one
if book_image_html:
urlretrieve('http:' + book_image_html['src'].replace(" ", "%20"), "book_cover.jpg")
# search for the description of the book
# which is in the same parent of the title in the 7th div
# it has no identifier or class so we can only get it knowing the exact div where it is
# as well for the points of the book
for i,sibling in enumerate(book_title_html.parent):
if i == 7: # description of the book
description = sibling.text
if i == 9: # points of the book
points = sibling.text
# print results
self.print_book(title,description, points, web_url)
if __name__ == '__main__':
client = Client()
client.run()