-
Notifications
You must be signed in to change notification settings - Fork 0
/
decretosv2.py
78 lines (70 loc) · 2.65 KB
/
decretosv2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import json
import pymongo
import re
import requests
import urllib3
from bs4 import BeautifulSoup
from datetime import datetime
from html.parser import HTMLParser
from pymongo import MongoClient
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def connection():
client=MongoClient("mongodb://localhost:27017/")
db=client["decretospy"]
return db
def obtener_decretos(url):
decretos = []
try:
soup=BeautifulSoup(
requests.get(url, timeout=10,
headers={'user-agent': 'Mozilla/5.0'}, verify=False).text, "html.parser")
tbody = soup.find('tbody')
for tr in tbody.findAll('tr'):
td = tr.findAll('td')
decreeId = [d for d in td[len(td)-1].a.get('href') if d.isdigit()]
decretos.append({
"decreeId": int(''.join(decreeId)),
"nro": int(tr.find('th').text),
"fecha": datetime.strptime(td[0].text, '%d/%m/%Y'),
"descripcion": td[0].text+": "+td[1].text.title().strip(),
"link": td[2].a.get('href'),
"tweet": False,
'fecha_alta': datetime.now()
})
return decretos
except requests.ConnectionError:
print("error al conectar")
except Exception as e:
print('error: %s'%e)
def decretos():
decretos = []
url = "https://www.presidencia.gov.py/url-sistema-visor-decretos/index.php/decretos/"
while len(decretos)<50:
try:
decretos.extend(obtener_decretos(url))
soup=BeautifulSoup(
requests.get(url, timeout=10,
headers={'user-agent': 'Mozilla/5.0'}, verify=False).text, "html.parser")
tags = soup.find("ul")
url = tags.find("a", {'rel':'next'}).get('href')
except requests.ConnectionError:
print("error al conectar")
except Exception as e:
print(e)
return decretos
def write_output():
db=connection()
print(decretos())
sorted_list = sorted(decretos(), key=lambda i: i['fecha'])
for i in sorted_list:
try:
db.decretos.insert_one(i)
db.decretos.create_index("decreeId", unique=True)
except pymongo.errors.DuplicateKeyError:
if db.decretos.count_documents({ 'decreeId':i['decreeId'], 'descripcion':i['descripcion'], 'link':i['link']}) == 0:
db.decretos.update_one({'decreeId':i['decreeId']},
{'$set':{'descripcion':i['descripcion'], 'link':i['link'], 'fecha_modificacion':datetime.now(),'tweet':False}})
else:
pass
except BulkWriteError as bwe:
pass