Skip to content

Commit

Permalink
wirral_gov_uk: fix for crash introduced due to extra text for Christm…
Browse files Browse the repository at this point in the history
…as schedule (#1535)

* wirral_gov_uk: Check date format for each scraped line and discard any extraneous text or mismatching rows.
Fixes crash introduced due to extra text describing Christmas schedule changes.

* minor reformatting

---------

Co-authored-by: Mike Stirling <opensource@mikestirling.co.uk>
Co-authored-by: 5ila5 <5ila5@users.noreply.github.com>
  • Loading branch information
3 people authored Dec 19, 2023
1 parent a2127ed commit fb6a2a3
Showing 1 changed file with 16 additions and 11 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import requests
import re
from datetime import datetime

import requests
from bs4 import BeautifulSoup
from datetime import datetime
from waste_collection_schedule import Collection # type: ignore[attr-defined]

TITLE = "Wirral Council"
Expand All @@ -23,6 +24,7 @@
"Garden waste (brown bin)",
"Paper and packaging (grey bin)",
}
DATE_REGEX = "^([0-9]{1,2} [A-Za-z]+ [0-9]{4})"


class Source:
Expand All @@ -32,7 +34,7 @@ def __init__(self, street, suburb):

def fetch(self):
s = requests.Session()
#Loop through waste types
# Loop through waste types
entries = []
for waste in WASTES:
r = s.get(
Expand All @@ -43,13 +45,16 @@ def fetch(self):
dates = soup.findAll("li")
if len(dates) != 0:
for item in dates:
entries.append(
Collection(
date=datetime.strptime(item.text, "%d %B %Y").date(),
t=waste,
icon=ICON_MAP.get(waste.upper()),
match = re.match(DATE_REGEX, item.text)
if match:
entries.append(
Collection(
date=datetime.strptime(
match.group(1), "%d %B %Y"
).date(),
t=waste,
icon=ICON_MAP.get(waste.upper()),
)
)
)


return entries

0 comments on commit fb6a2a3

Please sign in to comment.