-
Notifications
You must be signed in to change notification settings - Fork 0
/
deer-incidente.py
105 lines (73 loc) · 3.3 KB
/
deer-incidente.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
frecvență?
for each regiune
- [x] întreruperi accidentale: `incidente.aspx`
- [ ] întreruperi programate: `intreruperi<zonă>.aspx`
- [ ] today
- [ ] next 15 days
- [ ] archive
"""
targets= {
"MN": ["Prahova", "Brăila", "Buzău", "Dâmbovița", "Galați", "Vrancea"],
"TS": ["Brașov", "Alba", "Covasna", "Harghita", "Mureș", "Sibiu"],
"TN": ["Cluj", "Bihor", "Bistrița-Năsăud", "Maramureș", "Satu Mare", "Sălaj"]
}
cols_incidente = ['NUMAR LUCRARE', 'JUDEȚ', 'ADRESA', 'DATA ÎNCEPERE', 'DATA FINALIZARE', 'zona']
file_root_incidente = 'data/distributie-energie/deer-incidente'
import requests
from bs4 import BeautifulSoup
import pandas as pd
headers = {
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
# 'Accept-Language': 'en-GB,en;q=0.7',
# 'Cache-Control': 'max-age=0',
# 'Connection': 'keep-alive',
# 'Content-Type': 'application/x-www-form-urlencoded',
# 'Cookie': 'cookiesession1=56CBB03A3N3S4Z0IZ3SMSQDGTLJM1261',
# 'Origin': 'https://intreruperi.edmn.ro',
# 'Referer': 'https://intreruperi.edmn.ro/intreruperiMN.aspx',
# 'Sec-Fetch-Dest': 'document',
# 'Sec-Fetch-Mode': 'navigate',
# 'Sec-Fetch-Site': 'same-origin',
# 'Sec-Fetch-User': '?1',
# 'Sec-GPC': '1',
# 'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
# 'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Brave";v="116"',
# 'sec-ch-ua-mobile': '?0',
# 'sec-ch-ua-platform': '"macOS"'
}
def deer_incidente(zona):
url = "https://intreruperi.edmn.ro/incidente.aspx?zona=" + zona
response = requests.get(url, headers=headers, verify = False)
# Check if the request was successful
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', {'id': 'tabNeprogramate'})
data = []
for row in table.find_all('tr'):
cols = row.find_all('td')
cols = [col.text.strip() for col in cols]
cols.append(zona)
data.append(cols)
data = data[1:]
df = pd.DataFrame(data, columns = cols_incidente)
return df
print(f"Data has been extracted and saved to {csv_file}")
else:
print(f" E72 Failed to retrieve data from {url}")
incidente = pd.DataFrame(columns= cols_incidente)
for key, regiune in targets.items():
urlincidente = 'https://intreruperi.edmn.ro/incidente.aspx?zona=' + key
kk = deer_incidente(key)
incidente = pd.concat([incidente, kk], ignore_index=True)
# for judet in regiune:
# print(key + ' - ' +judet)
# zz = deer_intreruperi(key, judet)
# intreruperi = pd.concat([intreruperi, zz], ignore_index=True)
incidente.to_csv(file_root_incidente + '.csv', encoding='utf-8', index=False)
incidente.to_json(file_root_incidente + '.json', orient='records', lines=True)
# intreruperi.to_csv(file_root_intreruperi + '.csv', encoding='utf-8', index=False)
# intreruperi.to_json(file_root_intreruperi + '.json', orient='records', lines=True)
print('saved ' + str(len(incidente)) + ' incidents')
# print('saved ' + str(len(intreruperi)) + ' intreruperi')