-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathbc3_testor.py
80 lines (51 loc) · 1.95 KB
/
bc3_testor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 2 19:34:04 2018
@author: zouco
This py file is for me to test the BasicCrawler.
And it is also a good material for learner to learn to use BasicCrawler
"""
from bc3 import BasicCrawler
from bc3 import BasicCrawlerGroup
import time
def general_test():
test_page = 'http://zoucongyu.strikingly.com'
bcg = BasicCrawlerGroup(proxies='auto', headers='auto', num_crawler=2)
print(len(bcg.main_crawler.proxies_list_))
print(len(bcg.crawlers[0].proxies_list_))
print(bcg.crawlers[0].proxies_)
print(len(bcg.crawlers[1].proxies_list_))
print(bcg.crawlers[1].proxies_)
soup = bcg.crawlers[0].get_soup(test_page)
print(soup.find('a'))
soup = bcg.crawlers[1].get_soup(test_page)
print(soup.find('a'))
def bcg_comparison_test():
test_page= 'https://news.baidu.com'
t1 = time.time()
bcg = BasicCrawlerGroup(num_crawler=4, proxies='auto', safetime=(2,2))
soups = bcg.run([test_page for i in range(40)])
print(soups[-1].find('a'))
t11=time.time() - t1
print(t11)
t1 = time.time()
soups = []
bc = BasicCrawler(proxies='auto', safetime=(2,2))
for url in [test_page for i in range(40)]:
soups.append(bc.get_soup(url))
t12=time.time() - t1
print(t12)
print(t11)
def test_bcg_save_htmls():
test_page= 'https://news.baidu.com'
bcg = BasicCrawlerGroup(num_crawler=4, proxies='auto')
bcg.run([test_page for i in range(12)], task='save html')
if __name__ == '__main__':
# crawler = BasicCrawler()
# soup = crawler.get_soup('https://www.bbc.com/news')
# print(soup.find('a'))
# bcg_comparison_test()
# crawler.save_htmls(['https://news.baidu.com' for _ in range(10)])
test_page= 'https://news.baidu.com'
bcg = BasicCrawlerGroup(num_crawler=4, proxies=None, safetime=(2,2))
bcg.run([test_page for i in range(12)], task='save html')