-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSample_City.py
48 lines (39 loc) · 1.99 KB
/
Sample_City.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from bs4 import BeautifulSoup
from park_api.util import convert_date
from park_api.geodata import GeoData
# This loads the geodata for this city if <city>.geojson exists in the same directory as this file.
# No need to remove this if there's no geodata (yet), everything will still work.
geodata = GeoData(__file__)
# This function is called by the scraper and given the data of the page specified as source in geojson above.
# It's supposed to return a dictionary containing everything the current spec expects. Tests will fail if it doesn't ;)
def parse_html(html):
# BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
soup = BeautifulSoup(html, "html.parser")
# last_updated is the date when the data on the page was last updated, it should be listed on most pages
last_updated = soup.select("p#last_updated")[0].text
data = {
# convert_date is a utility function you can use to turn this date into the correct string format
"last_updated": convert_date(last_updated, "%d.%m.%Y %H:%M Uhr"),
# URL for the page where the scraper can gather the data
"lots": []
}
for tr in soup.find_all("tr"):
lot_name = tr.find("td", {"class": "lot_name"}).text
lot_free = tr.find("td", {"class": "lot_free"}).text
lot_total = tr.find("td", {"class": "lot_total"}).text
# please be careful about the state only being allowed to contain either open, closed or nodata
# should the page list other states, please map these into the three listed possibilities
state = tr.find("td", {"class": "lot_state"}).text
lot = geodata.lot(lot_name)
data["lots"].append({
"name": lot.name,
"free": lot_free,
"total": lot_total,
"address": lot.address,
"coords": lot.coords,
"state": state,
"lot_type": lot.type,
"id": lot.id,
"forecast": False,
})
return data