-
Notifications
You must be signed in to change notification settings - Fork 0
/
pensador.py
60 lines (49 loc) · 1.78 KB
/
pensador.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import json
import requests
from bs4 import BeautifulSoup
def scrape_quotes():
quotes_count = 0
current_page = 1
day = 1
month = 1
quotes = {}
dias = {
1: 31, 2: 29, 3: 31, 4: 30, 5: 31, 6: 30,
7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31
}
name = 'frases_bonitas'
while quotes_count < 200:
print(quotes_count)
url = f'https://www.pensador.com/{name}/{current_page}/'
try:
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
for element in soup.find_all(class_='frase fr'):
if quotes_count > 366:
return # Verifica se já coletou o suficiente
quote = element.get_text().strip()
author = (
element.find_next(class_='autor')
.find(class_='author-name')
.get_text()
.strip()
)
if quotes_count % 31 == 0 and quotes_count > 0:
month += 1
day = 1
key = str(quotes_count + 1)
if key not in quotes:
quotes[key] = []
quotes[key].append({'quote': quote, 'author': author})
quotes_count += 1
day += 1
# print(f"Frase {quotes_count}: {quote} - Autor: {author}")
current_page += 1
except Exception as e:
print(f'Ocorreu um erro ao fazer o scraping da página {current_page}')
print(e)
break
with open(f'{name}.json', 'w', encoding='utf-8') as file:
json.dump(quotes, file, indent=2, ensure_ascii=False)
print(f'Arquivo {name}.json criado com sucesso!')
scrape_quotes()