forked from chinese-soup/zradlo
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrybarna.py
executable file
·99 lines (82 loc) · 2.97 KB
/
rybarna.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python3
# coding=utf-8
import requests, sys, re
from datetime import datetime
from bs4 import BeautifulSoup
def get_url():
return "http://rybarna.net/denni-menu/"
def get_name():
return "Rybárna"
def get_file():
kantyna = requests.get(get_url())
kantyna.encoding = 'UTF-8'
return kantyna
def prepare_bs(kantyna):
if kantyna is not None and kantyna.status_code == 200:
html = kantyna.text
soup = BeautifulSoup(html, 'html.parser')
return soup
else:
return "Error"
def return_menu(soup):
a = soup.find("div", { "class": "entry-content" }).find_all("p")
items = []
# Protoze polevka je z nejakeho duvodu mimo zbytek menu, projizdim ho zbytecne 2x - ale bylo to nejjednodusi :)
for item in a:
#print(item)
text = item.text
match = re.match("([\w\d\sěščřžýáíéúůóÓĚŠČŘŽÝÁÍÉÚŮöäëÄÖËťŤ\"\(\)\,\-\{\}0-9\.]+)[\s]+([0-9\/]+)[\s\(\)L]*$", text)
if match is not None:
arr = [match.group(1).strip(), match.group(2).strip() + " Kč"]
items.append(arr)
else:
continue
#a = soup.find("div", { "class": "entry-content" }).find_all("p")
# print(a)
try:
a = soup.find("div", { "class": "entry-content" }).find("ol").find_all("li")
for item in a:
# print(item)
text = item.text
match = re.match("([\w\d\sěščřžýáíéúůóÓĚŠČŘŽÝÁÍÉÚŮöäëÄÖËťŤ\"\(\)\,\-\{\}0-9\.]+)[\s]+([0-9]{2,3})[\s\(\)L]*$", text)
if match is not None:
arr = [match.group(1).strip(), match.group(2).strip() + " Kč"]
items.append(arr)
else:
continue
except AttributeError:
pass
return(items)
def return_date(soup):
# today = time.strftime("%-d.%-m.%Y")
# print(today)
date = "???"
a = soup.find("div", { "class": "entry-content" }).find_all("p")
for item in a:
# match = re.match("([0-9]{1,2}\.[0-9]{1,2}\.[0-9]{4})", item.text.strip())
match = re.match("Denní menu\s?:\s+([0-9]{1,2}\.\s?[A-Za-zěščřžýáíéúůóÓĚŠČŘŽÝÁÍÉÚŮöäëÄÖËťŤ]+)", item.text.strip())
if match:
date = match.group(1)
break
return(date)
def result():
lokalita = "brumlovka"
try:
file = get_file()
bs = prepare_bs(file)
nazev = get_name()
url = get_url()
date = return_date(bs)
menu_list = return_menu(bs)
return (nazev, url, date, menu_list, lokalita)
except Exception as e:
print(e)
nazev = get_name()
url = get_url()
return (nazev, url, "Menu nenalezeno", [], lokalita)
if __name__ == "__main__":
file = get_file()
bs = prepare_bs(file)
date = return_date(bs)
menu_list = return_menu(bs)
print (date, menu_list)