diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index c5488f86f6..e22eb904fc 100755 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -244,6 +244,13 @@ "wiki_name": "Breckland Council", "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN." }, + "BrentCouncil": { + "house_number": "25", + "postcode": "HA3 0QU", + "url": "https://recyclingservices.brent.gov.uk/waste", + "wiki_name": "Brent Council", + "wiki_note": "Pass the house number and postcode in their respective parameters." + }, "BrightonandHoveCityCouncil": { "house_number": "44 Carden Avenue, Brighton, BN1 8NE", "postcode": "BN1 8NE", @@ -440,7 +447,7 @@ "uprn": "100110734613", "url": "https://www.copeland.gov.uk", "wiki_name": "Copeland Borough Council", - "wiki_note": "Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN." + "wiki_note": "*****This has now been replaced by Cumberland Council****" }, "CornwallCouncil": { "skip_get_url": true, @@ -486,6 +493,12 @@ "wiki_name": "Croydon Council", "wiki_note": "Pass the house number and postcode in their respective parameters." }, + "CumberlandCouncil": { + "uprn": "100110734613", + "url": "https://waste.cumberland.gov.uk", + "wiki_name": "Cumberland Borough Council", + "wiki_note": "Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN." + }, "CumberlandAllerdaleCouncil": { "house_number": "2", "postcode": "CA13 0DE", @@ -508,6 +521,12 @@ "wiki_name": "Dartford Borough Council", "wiki_note": "Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN." }, + "DenbighshireCouncil": { + "url": "https://www.denbighshire.gov.uk/", + "uprn": "200004299351", + "wiki_name": "Denbighshire Council", + "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN." + }, "DerbyCityCouncil": { "url": "https://www.derby.gov.uk", "uprn": "10010684240", @@ -550,6 +569,12 @@ "wiki_name": "Dudley Council", "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN." }, + "DundeeCityCouncil": { + "url": "https://www.dundeecity.gov.uk/", + "uprn": "9059043390", + "wiki_name": "Dundee City Council", + "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN." + }, "DurhamCouncil": { "skip_get_url": true, "uprn": "200003218818", @@ -805,6 +830,12 @@ "wiki_name": "Guildford Council", "wiki_note": "If the bin day is 'today' then the collectionDate will only show today's date if before 7 AM; else the date will be in 'previousCollectionDate'. To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search)." }, + "GwyneddCouncil": { + "url": "https://diogel.gwynedd.llyw.cymru", + "uprn": "10070350463", + "wiki_name": "Gwynedd Council", + "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN." + }, "HackneyCouncil": { "house_number": "101", "postcode": "N16 9AS", @@ -936,13 +967,11 @@ "wiki_note": "Follow the instructions [here](https://waste-services.kingston.gov.uk/waste) until the \"Your bin days\" page, then copy the URL and replace the URL in the command." }, "KirkleesCouncil": { - "house_number": "24", - "postcode": "HD7 5DX", + "uprn": "83002937", "skip_get_url": true, "url": "https://www.kirklees.gov.uk/beta/your-property-bins-recycling/your-bins", - "web_driver": "http://selenium:4444", "wiki_name": "Kirklees Council", - "wiki_note": "Pass the house number and postcode in their respective parameters. This parser requires a Selenium webdriver." + "wiki_note": "Provide your UPRN. Find your UPRN using [FindMyAddress](https://www.findmyaddress.co.uk/search)." }, "KnowsleyMBCouncil": { "house_number": "22", @@ -1325,7 +1354,7 @@ "house_number": "22", "postcode": "NE46 1UQ", "skip_get_url": true, - "url": "https://www.northumberland.gov.uk/Waste/Bins/Bin-Calendars.aspx", + "url": "https://www.northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx", "web_driver": "http://selenium:4444", "wiki_name": "Northumberland Council", "wiki_note": "Pass the house number and postcode in their respective parameters. This parser requires a Selenium webdriver." @@ -1344,6 +1373,12 @@ "house_number": "Newdigate Road", "wiki_note": "Pass the name of the street ONLY in the house number parameter, wrapped in double quotes. Street name must match exactly as it appears on the council's website." }, + "OadbyAndWigstonBoroughCouncil": { + "url": "https://my.oadby-wigston.gov.uk", + "uprn": "10010149102", + "wiki_name": "Oadby & Wigston Borough Council", + "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN." + }, "OldhamCouncil": { "url": "https://portal.oldham.gov.uk/bincollectiondates/details?uprn=422000033556", "wiki_name": "Oldham Council", @@ -1985,6 +2020,12 @@ "wiki_name": "West Berkshire Council", "wiki_note": "Provide your house number in the `house_number` parameter and postcode in the `postcode` parameter." }, + "WestDunbartonshireCouncil": { + "url": "https://www.west-dunbarton.gov.uk/", + "uprn": "129001383", + "wiki_name": "West Dunbartonshire Council", + "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN." + }, "WestLancashireBoroughCouncil": { "url": "https://www.westlancs.gov.uk", "uprn": "10012343339", diff --git a/uk_bin_collection/uk_bin_collection/councils/BrentCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BrentCouncil.py new file mode 100644 index 0000000000..7252eeae75 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/BrentCouncil.py @@ -0,0 +1,115 @@ +from time import sleep + +import requests +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + data = {"bins": []} + user_postcode = kwargs.get("postcode") + user_paon = kwargs.get("paon") + check_postcode(user_postcode) + check_paon(user_paon) + + URI = "https://recyclingservices.brent.gov.uk/waste" + + payload = {"postcode": user_postcode} + + s = requests.Session() + + # Make the POST request + response = s.post(URI, data=payload) + + # Make a BS4 object + soup = BeautifulSoup(response.content, features="html.parser") + + address_list = soup.find_all("option") + + current_year = datetime.now().year + next_year = current_year + 1 + + for address in address_list: + if user_paon in (address.text): + address_id = address.get("value") + URI = f"https://recyclingservices.brent.gov.uk/waste/{address_id}" + + counter = 0 + r = s.get(URI) + while "Loading your bin days..." in r.text: + counter = counter + 1 + if counter == 20: + return data + sleep(2) + r = s.get(URI) + + r.raise_for_status() + + soup = BeautifulSoup(r.content, features="html.parser") + + wastecollections = soup.find("div", {"class": "waste__collections"}) + + # Find all waste service sections + waste_services = wastecollections.find_all( + "h3", class_="govuk-heading-m waste-service-name" + ) + + for service in waste_services: + # Get the collection type (e.g., Rubbish, Recycling) + collection_type = (service.get_text(strip=True)).split("\n")[0] + + # Find the sibling container holding details + service_details = service.find_next( + "dl", class_="govuk-summary-list" + ) + + if service_details: + + # Extract next collection date + next_collection_row = service_details.find( + "dt", string="Next collection" + ) + next_collection = ( + next_collection_row.find_next_sibling("dd").get_text( + strip=True + ) + if next_collection_row + else "Unknown" + ) + + # Parse dates into standard dd/mm/yyyy format + next_collection_date = datetime.strptime( + remove_ordinal_indicator_from_date_string(next_collection), + "%A, %d %B", + ) + + if (datetime.now().month == 12) and ( + next_collection.month == 1 + ): + next_collection_date = next_collection_date.replace( + year=next_year + ) + else: + next_collection_date = next_collection_date.replace( + year=current_year + ) + + dict_data = { + "type": collection_type.strip(), + "collectionDate": next_collection_date.strftime( + date_format + ), + } + data["bins"].append(dict_data) + + return data diff --git a/uk_bin_collection/uk_bin_collection/councils/CornwallCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CornwallCouncil.py index 2170c16f5f..7af3148fe3 100644 --- a/uk_bin_collection/uk_bin_collection/councils/CornwallCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/CornwallCouncil.py @@ -1,7 +1,8 @@ from bs4 import BeautifulSoup +from dateutil.relativedelta import relativedelta + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass -from dateutil.relativedelta import relativedelta # import the wonderful Beautiful Soup and the URL grabber @@ -52,9 +53,13 @@ def parse_data(self, page: str, **kwargs) -> dict: for item in soup.find_all("div", class_="collection text-center service"): bin_type = item.contents[1].text + " bin" - collection_date = datetime.strptime(item.contents[5].text, "%d %b").replace( - year=curr_date.year - ) + try: + collection_date = datetime.strptime( + item.contents[5].text, "%d %b" + ).replace(year=curr_date.year) + except: + continue + if curr_date.month == 12 and collection_date.month == 1: collection_date = collection_date + relativedelta(years=1) collections.append((bin_type, collection_date)) diff --git a/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py new file mode 100644 index 0000000000..90bdc6c2b8 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/CumberlandCouncil.py @@ -0,0 +1,96 @@ +import requests +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + URI = "https://waste.cumberland.gov.uk/renderform?t=25&k=E43CEB1FB59F859833EF2D52B16F3F4EBE1CAB6A" + + s = requests.Session() + + # Make the GET request + response = s.get(URI) + + # Make a BS4 object + soup = BeautifulSoup(response.content, features="html.parser") + + # print(soup) + + token = (soup.find("input", {"name": "__RequestVerificationToken"})).get( + "value" + ) + + formguid = (soup.find("input", {"name": "FormGuid"})).get("value") + + # print(token) + # print(formguid) + + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Origin": "https://waste.cumberland.gov.uk", + "Referer": "https://waste.cumberland.gov.uk/renderform?t=25&k=E43CEB1FB59F859833EF2D52B16F3F4EBE1CAB6A", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 OPR/98.0.0.0", + "X-Requested-With": "XMLHttpRequest", + } + + payload = { + "__RequestVerificationToken": token, + "FormGuid": formguid, + "ObjectTemplateID": "25", + "Trigger": "submit", + "CurrentSectionID": "33", + "TriggerCtl": "", + "FF265": f"U{user_uprn}", + "FF265lbltxt": "Please select your address", + } + + # print(payload) + + response = s.post( + "https://waste.cumberland.gov.uk/renderform/Form", + headers=headers, + data=payload, + ) + + soup = BeautifulSoup(response.content, features="html.parser") + for row in soup.find_all("div", class_="resirow"): + # Extract the type of collection (e.g., Recycling, Refuse) + collection_type_div = row.find("div", class_="col") + collection_type = ( + collection_type_div.get("class")[1] + if collection_type_div + else "Unknown" + ) + + # Extract the collection date + date_div = row.find("div", style="width:360px;") + collection_date = date_div.text.strip() if date_div else "Unknown" + + dict_data = { + "type": collection_type, + "collectionDate": datetime.strptime( + collection_date, "%A %d %B %Y" + ).strftime(date_format), + } + bindata["bins"].append(dict_data) + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return bindata diff --git a/uk_bin_collection/uk_bin_collection/councils/DenbighshireCouncil.py b/uk_bin_collection/uk_bin_collection/councils/DenbighshireCouncil.py new file mode 100644 index 0000000000..67aa38c4d2 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/DenbighshireCouncil.py @@ -0,0 +1,66 @@ +import time + +import requests + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + URI = "https://refusecalendarapi.denbighshire.gov.uk/Csrf/token" + + token = requests.get(URI) + + token_data = token.json() + + URI = f"https://refusecalendarapi.denbighshire.gov.uk/Calendar/{user_uprn}" + + headers = { + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "content-type": "application/json", + "dnt": "1", + "host": "refusecalendarapi.denbighshire.gov.uk", + "referer": "https://refusecalendarapi.denbighshire.gov.uk/", + "x-csrf-token": token_data["token"], + } + + # Make the GET request + response = requests.get(URI, headers=headers) + + # Parse the JSON response + json_data = response.json() + + bin_types = { + "refuseDate": "Refuse", + "recyclingDate": "Recycling", + "gardenDate": "Garden Waste", + "ahpDate": "AHP (Assisted Household Pickup)", + "tradeDate": "Trade Waste", + "tradeRefuseDate": "Trade Refuse", + "tradeRecyclingDate": "Trade Recycling", + } + + bindata["bins"] = [ + {"type": label, "collectionDate": json_data[key]} + for key, label in bin_types.items() + if json_data.get(key) + ] + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return bindata diff --git a/uk_bin_collection/uk_bin_collection/councils/DundeeCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/DundeeCityCouncil.py new file mode 100644 index 0000000000..cffd2e6ec8 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/DundeeCityCouncil.py @@ -0,0 +1,44 @@ +import time + +import requests + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + URI = f"https://www.dundee-mybins.co.uk/get_calendar.php?rn={user_uprn}" + + # Make the GET request + response = requests.get(URI) + + # Parse the JSON response + bin_collection = response.json() + + for item in bin_collection: + dict_data = { + "type": item["title"], + "collectionDate": datetime.strptime(item["start"], "%Y-%m-%d").strftime( + date_format + ), + } + bindata["bins"].append(dict_data) + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return bindata diff --git a/uk_bin_collection/uk_bin_collection/councils/GwyneddCouncil.py b/uk_bin_collection/uk_bin_collection/councils/GwyneddCouncil.py new file mode 100644 index 0000000000..be83519f9e --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/GwyneddCouncil.py @@ -0,0 +1,54 @@ +import requests +from bs4 import BeautifulSoup, Tag + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + URI = f"https://diogel.gwynedd.llyw.cymru/Daearyddol/en/LleDwinByw/Index/{user_uprn}" + + # Make the GET request + response = requests.get(URI) + + soup = BeautifulSoup(response.text, "html.parser") + collections_headline = soup.find("h6", text="Next collection dates:") + if not isinstance(collections_headline, Tag): + raise Exception("Could not find collections") + collections = collections_headline.find_next("ul").find_all("li") + + for collection in collections: + if not isinstance(collection, Tag): + continue + for p in collection.find_all("p"): + p.extract() + + bin_type, date_str = collection.text.strip().split(":")[:2] + bin_type, date_str = bin_type.strip(), date_str.strip() + + dict_data = { + "type": bin_type, + "collectionDate": datetime.strptime(date_str, "%A %d/%m/%Y").strftime( + date_format + ), + } + bindata["bins"].append(dict_data) + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), date_format) + ) + + return bindata diff --git a/uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py b/uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py index 238c978f6c..757b23689a 100644 --- a/uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/KirkleesCouncil.py @@ -1,36 +1,12 @@ import time -from datetime import datetime -from typing import Optional -from bs4 import BeautifulSoup -from selenium.common import TimeoutException -from selenium.webdriver.common.by import By -from selenium.webdriver.common.keys import Keys -from selenium.webdriver.remote.webdriver import WebDriver -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.wait import WebDriverWait -from webdriver_manager.drivers.chrome import ChromeDriver +import requests -from selenium import webdriver - -from uk_bin_collection.uk_bin_collection.common import create_webdriver -from uk_bin_collection.uk_bin_collection.common import date_format +from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass -def wait_for_element(driver, element_type, element: str, timeout: int = 5): - element_present = EC.presence_of_element_located((element_type, element)) - wait_for_element_conditions(driver, element_present, timeout=timeout) - - -def wait_for_element_conditions(driver, conditions, timeout: int = 5): - try: - WebDriverWait(driver, timeout).until(conditions) - except TimeoutException: - print("Timed out waiting for page to load") - raise - - +# import the wonderful Beautiful Soup and the URL grabber class CouncilClass(AbstractGetBinDataClass): """ Concrete classes have to implement all abstract operations of the @@ -38,106 +14,63 @@ class CouncilClass(AbstractGetBinDataClass): implementation. """ - def __init__(self): - self._driver: Optional[WebDriver] = None - - def parse_data(self, *args, **kwargs) -> dict: - try: - return self._parse_data(*args, **kwargs) - finally: - if self._driver: - self._driver.quit() - - def _parse_data(self, page: str, **kwargs) -> dict: - """ - Process: - - - Use a house number and postcode that is known to be domestic and resolves to a - single unique address. When the address search form is submitted with - those details, a session is created - - - Now a session exists, navigate to the calendar URL, specifying the UPRN - - - Extract info from the 'alt' attribute of the images on that page - """ - data = {"bins": []} - collections = [] - - user_paon = kwargs["paon"] - user_postcode = kwargs["postcode"] - - self._driver = driver = webdriver.Chrome() - # self._driver = driver = create_webdriver( - # web_driver=kwargs["web_driver"], - # headless=kwargs.get("headless", True), - # session_name=__name__, - # ) - driver.implicitly_wait(1) - - driver.get( - "https://my.kirklees.gov.uk/service/Bins_and_recycling___Manage_your_bins" - ) - - time.sleep(5) - - # Switch to iframe - iframe = driver.find_element(By.CSS_SELECTOR, "#fillform-frame-1") - driver.switch_to.frame(iframe) - - wait_for_element( - driver, By.ID, "mandatory_Postcode", timeout=10 - ) - - postcode_input = driver.find_element( - By.ID, "Postcode" - ) - postcode_input.send_keys(user_postcode) - - wait_for_element(driver, By.ID, "List") - time.sleep(2) - - WebDriverWait(driver, 10).until( - EC.element_to_be_clickable( - ( - By.XPATH, - "//select[@name='List']//option[contains(., '" - + user_paon - + "')]", - ) - ) - ).click() - - time.sleep(10) - - # For whatever reason, the page sometimes automatically goes to the next step - next_button = driver.find_element(By.XPATH, '/html/body/div/div/section/form/div/nav/div[2]/button') - if next_button.is_displayed(): - next_button.click() - - - time.sleep(5) - - soup = BeautifulSoup(self._driver.page_source, features="html.parser") - soup.prettify() - - radio_button_text = soup.find_all("label", {"class": "radio-label"}) - for label in radio_button_text: - parsed_text = label.text.split("x ") - row = parsed_text[1].lower().split("collection date: ") - bin_type = row[0].split("(")[0].strip() - date_text = row[1].strip().replace(")", "") - if date_text == "today": - bin_date = datetime.now() - else: - bin_date = datetime.strptime(date_text, "%A %d %B %Y") - collections.append((bin_type, bin_date)) - - ordered_data = sorted(collections, key=lambda x: x[1]) - for item in ordered_data: - dict_data = { - "type": item[0].replace("standard ", "").capitalize(), - "collectionDate": item[1].strftime(date_format), - } - data["bins"].append(dict_data) - - return data + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + SESSION_URL = "https://my.kirklees.gov.uk/authapi/isauthenticated?uri=https%253A%252F%252Fmy.kirklees.gov.uk%252Fservice%252FBins_and_recycling___Manage_your_bins&hostname=my.kirklees.gov.uk&withCredentials=true" + + API_URL = "https://my.kirklees.gov.uk/apibroker/runLookup" + + data = { + "formValues": {"Search": {"validatedUPRN": {"value": user_uprn}}}, + } + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + "User-Agent": "Mozilla/5.0", + "X-Requested-With": "XMLHttpRequest", + "Referer": "https://my.kirklees.gov.uk/fillform/?iframe_id=fillform-frame-1&db_id=", + } + s = requests.session() + r = s.get(SESSION_URL) + r.raise_for_status() + session_data = r.json() + sid = session_data["auth-session"] + params = { + "id": "65e08e60b299d", + "repeat_against": "", + "noRetry": "false", + "getOnlyTokens": "undefined", + "log_id": "", + "app_name": "AF-Renderer::Self", + # unix_timestamp + "_": str(int(time.time() * 1000)), + "sid": sid, + } + + r = s.post(API_URL, json=data, headers=headers, params=params) + r.raise_for_status() + + data = r.json() + rows_data = data["integration"]["transformed"]["rows_data"] + if not isinstance(rows_data, dict): + raise ValueError("Invalid data returned from API") + + for bin_id, bin_info in rows_data.items(): + label = bin_info.get("label", "Unknown") + next_collection_date = bin_info.get("NextCollectionDate", "Unknown") + # Convert the date string into a readable format + try: + formatted_date = datetime.strptime( + next_collection_date, "%Y-%m-%dT%H:%M:%S" + ).strftime(date_format) + except ValueError: + formatted_date = "Unknown" + + dict_data = {"type": label, "collectionDate": formatted_date} + bindata["bins"].append(dict_data) + + return bindata diff --git a/uk_bin_collection/uk_bin_collection/councils/LondonBoroughSutton.py b/uk_bin_collection/uk_bin_collection/councils/LondonBoroughSutton.py index 8537e1ac32..b232051cb8 100644 --- a/uk_bin_collection/uk_bin_collection/councils/LondonBoroughSutton.py +++ b/uk_bin_collection/uk_bin_collection/councils/LondonBoroughSutton.py @@ -61,7 +61,7 @@ def parse_data(self, page: str, **kwargs) -> dict: "%A, %d %B", ) - if next_collection.month == 1: + if (datetime.now().month == 12) and (next_collection.month == 1): next_collection = next_collection.replace(year=next_year) else: next_collection = next_collection.replace(year=current_year) diff --git a/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py index 17b3eaa8ff..3b6ac63dcc 100644 --- a/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/NorthumberlandCouncil.py @@ -27,7 +27,7 @@ def extract_styles(self, style_str: str) -> dict: def parse_data(self, page: str, **kwargs) -> dict: driver = None try: - page = "https://www.northumberland.gov.uk/Waste/Bins/Bin-Calendars.aspx" + page = "https://www.northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx" data = {"bins": []} @@ -74,7 +74,7 @@ def parse_data(self, page: str, **kwargs) -> dict: "span", id="p_lt_ctl04_pageplaceholder_p_lt_ctl02_WasteCollectionCalendars_spanRouteSummary", ) - .string.replace("Routes found: ", "") + .text.replace("Routes found: ", "") .split(","), ) ) @@ -82,6 +82,8 @@ def parse_data(self, page: str, **kwargs) -> dict: # Get the background colour for each of them... bins_by_colours = dict() for bin in bins_collected: + if "(but no dates found)" in bin: + continue style_str = soup.find("span", string=bin)["style"] bin_colour = self.extract_styles(style_str)["background-color"].upper() bins_by_colours[bin_colour] = bin diff --git a/uk_bin_collection/uk_bin_collection/councils/OadbyAndWigstonBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/OadbyAndWigstonBoroughCouncil.py new file mode 100644 index 0000000000..a3ef9d2986 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/OadbyAndWigstonBoroughCouncil.py @@ -0,0 +1,65 @@ +import requests +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + URI = f"https://my.oadby-wigston.gov.uk/location?put=ow{user_uprn}&rememberme=0&redirect=%2F" + + # Make the GET request + response = requests.get(URI) + + soup = BeautifulSoup(response.text, features="html.parser") + soup.prettify() + + # Find the collection list + collection_list = soup.find("ul", class_="refuse") + + current_year = datetime.now().year + next_year = current_year + 1 + + # Loop through each collection item + for li in collection_list.find_all("li"): + date_text = li.find("strong", class_="date").text.strip() + bin_type = li.find("a").text # Get the class for bin type + + # Parse the date + if date_text == "Today": + collection_date = datetime.now() + else: + try: + collection_date = datetime.strptime(date_text, "%A %d %b") + except: + continue + + if (datetime.now().month == 12) and (collection_date.month == 1): + collection_date = collection_date.replace(year=next_year) + else: + collection_date = collection_date.replace(year=current_year) + + dict_data = { + "type": bin_type, + "collectionDate": collection_date.strftime(date_format), + } + bindata["bins"].append(dict_data) + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return bindata diff --git a/uk_bin_collection/uk_bin_collection/councils/WalthamForest.py b/uk_bin_collection/uk_bin_collection/councils/WalthamForest.py index d7566e2e1c..f2afd4e6e7 100644 --- a/uk_bin_collection/uk_bin_collection/councils/WalthamForest.py +++ b/uk_bin_collection/uk_bin_collection/councils/WalthamForest.py @@ -3,10 +3,10 @@ from bs4 import BeautifulSoup from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import Select from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.common.keys import Keys from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass @@ -73,7 +73,7 @@ def parse_data(self, page: str, **kwargs) -> dict: find_ac_button.send_keys(Keys.RETURN) h4_element = wait.until( EC.presence_of_element_located( - (By.XPATH, "//h4[contains(text(), 'Your Collections')]") + (By.XPATH, "//h4[contains(text(), 'Next Collections')]") ) ) diff --git a/uk_bin_collection/uk_bin_collection/councils/WestDunbartonshireCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WestDunbartonshireCouncil.py new file mode 100644 index 0000000000..fef03704ee --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/WestDunbartonshireCouncil.py @@ -0,0 +1,66 @@ +import requests +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + bindata = {"bins": []} + + URI = f"https://www.west-dunbarton.gov.uk/recycling-and-waste/bin-collection-day/?uprn={user_uprn}" + + # Make the GET request + response = requests.get(URI) + + soup = BeautifulSoup(response.content, "html.parser") + + # For each next-date class get the text within the date-string class + schedule_details = soup.findAll("div", {"class": "round-info"}) + + for item in schedule_details: + schedule_date = item.find("span", {"class": "date-string"}).text.strip() + schedule_type = item.find("div", {"class": "round-name"}).text.strip() + # Format is 22 March 2023 - convert to date + collection_date = datetime.strptime(schedule_date, "%d %B %Y").date() + + # If the type contains "Blue bin or bag" or "Blue" then set the type to "BLUE" + if "bag" in schedule_type.lower() or "blue" in schedule_type.lower(): + dict_data = { + "type": "Blue", + "collectionDate": collection_date.strftime(date_format), + } + bindata["bins"].append(dict_data) + + # If the type contains "caddy" or "brown" then set the type to "BROWN" + if "caddy" in schedule_type.lower() or "brown" in schedule_type.lower(): + dict_data = { + "type": "Brown", + "collectionDate": collection_date.strftime(date_format), + } + bindata["bins"].append(dict_data) + + # If the type contains "Non-Recyclable" then set the type to "BLACK", compare in lowecase + if "non-recyclable" in schedule_type.lower(): + dict_data = { + "type": "Black", + "collectionDate": collection_date.strftime(date_format), + } + bindata["bins"].append(dict_data) + + bindata["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return bindata diff --git a/wiki/Councils.md b/wiki/Councils.md index 279b65d555..494a9a08d7 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -41,6 +41,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Bradford MDC](#bradford-mdc) - [Braintree District Council](#braintree-district-council) - [Breckland Council](#breckland-council) +- [Brent Council](#brent-council) - [Brighton and Hove City Council](#brighton-and-hove-city-council) - [Bristol City Council](#bristol-city-council) - [Bromley Borough Council](#bromley-borough-council) @@ -72,15 +73,18 @@ This document is still a work in progress, don't worry if your council isn't lis - [Cotswold District Council](#cotswold-district-council) - [Crawley Borough Council](#crawley-borough-council) - [Croydon Council](#croydon-council) +- [Cumberland Borough Council](#cumberland-borough-council) - [Cumberland Council - Allerdale District](#cumberland-council---allerdale-district) - [Dacorum Borough Council](#dacorum-borough-council) - [Dartford Borough Council](#dartford-borough-council) +- [Denbighshire Council](#denbighshire-council) - [Derby City Council](#derby-city-council) - [Derbyshire Dales District Council](#derbyshire-dales-district-council) - [Doncaster Council](#doncaster-council) - [Dorset Council](#dorset-council) - [Dover District Council](#dover-district-council) - [Dudley Council](#dudley-council) +- [Dundee City Council](#dundee-city-council) - [Durham Council](#durham-council) - [Ealing Council](#ealing-council) - [East Ayrshire Council](#east-ayrshire-council) @@ -112,11 +116,13 @@ This document is still a work in progress, don't worry if your council isn't lis - [Gloucester City Council](#gloucester-city-council) - [Gravesham Borough Council](#gravesham-borough-council) - [Guildford Council](#guildford-council) +- [Gwynedd Council](#gwynedd-council) - [Hackney Council](#hackney-council) - [Halton Borough Council](#halton-borough-council) - [Harborough District Council](#harborough-district-council) - [Haringey Council](#haringey-council) - [Harrogate Borough Council](#harrogate-borough-council) +- [Hart District Council](#hart-district-council) - [Hartlepool Borough Council](#hartlepool-borough-council) - [Hertsmere Borough Council](#hertsmere-borough-council) - [Highland Council](#highland-council) @@ -183,6 +189,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Northumberland Council](#northumberland-council) - [Nottingham City Council](#nottingham-city-council) - [Nuneaton and Bedworth Borough Council](#nuneaton-and-bedworth-borough-council) +- [Oadby & Wigston Borough Council](#oadby-&-wigston-borough-council) - [Oldham Council](#oldham-council) - [Oxford City Council](#oxford-city-council) - [Perth and Kinross Council](#perth-and-kinross-council) @@ -265,6 +272,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Wealden District Council](#wealden-district-council) - [Welhat Council](#welhat-council) - [West Berkshire Council](#west-berkshire-council) +- [West Dunbartonshire Council](#west-dunbartonshire-council) - [West Lancashire Borough Council](#west-lancashire-borough-council) - [West Lindsey District Council](#west-lindsey-district-council) - [West Lothian Council](#west-lothian-council) @@ -668,6 +676,18 @@ Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/searc --- +### Brent Council +```commandline +python collect_data.py BrentCouncil https://recyclingservices.brent.gov.uk/waste -p "XXXX XXX" -n XX +``` +Additional parameters: +- `-p` - postcode +- `-n` - house number + +Note: Pass the house number and postcode in their respective parameters. + +--- + ### Brighton and Hove City Council ```commandline python collect_data.py BrightonandHoveCityCouncil https://cityclean.brighton-hove.gov.uk/link/collections -s -u XXXXXXXX -p "XXXX XXX" -n XX -w http://HOST:PORT/ @@ -983,7 +1003,7 @@ python collect_data.py CopelandBoroughCouncil https://www.copeland.gov.uk -u XXX Additional parameters: - `-u` - UPRN -Note: Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN. +Note: *****This has now been replaced by Cumberland Council**** --- @@ -1048,6 +1068,17 @@ Note: Pass the house number and postcode in their respective parameters. --- +### Cumberland Borough Council +```commandline +python collect_data.py CumberlandCouncil https://waste.cumberland.gov.uk -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN. + +--- + ### Cumberland Council - Allerdale District ```commandline python collect_data.py CumberlandAllerdaleCouncil https://www.allerdale.gov.uk -p "XXXX XXX" -n XX @@ -1085,6 +1116,17 @@ Note: Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your U --- +### Denbighshire Council +```commandline +python collect_data.py DenbighshireCouncil https://www.denbighshire.gov.uk/ -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN. + +--- + ### Derby City Council ```commandline python collect_data.py DerbyCityCouncil https://www.derby.gov.uk -u XXXXXXXX @@ -1154,6 +1196,17 @@ Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/searc --- +### Dundee City Council +```commandline +python collect_data.py DundeeCityCouncil https://www.dundeecity.gov.uk/ -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN. + +--- + ### Durham Council ```commandline python collect_data.py DurhamCouncil https://www.durham.gov.uk/bincollections?uprn= -s -u XXXXXXXX @@ -1530,6 +1583,17 @@ Note: If the bin day is 'today' then the collectionDate will only show today's d --- +### Gwynedd Council +```commandline +python collect_data.py GwyneddCouncil https://diogel.gwynedd.llyw.cymru -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN. + +--- + ### Hackney Council ```commandline python collect_data.py HackneyCouncil https://www.hackney.gov.uk -p "XXXX XXX" -n XX @@ -1591,6 +1655,18 @@ Note: Pass the UPRN, which can be found at [this site](https://secure.harrogate. --- +### Hart District Council +```commandline +python collect_data.py HartDistrictCouncil https://www.hart.gov.uk/ -s -u XXXXXXXX +``` +Additional parameters: +- `-s` - skip get URL +- `-u` - UPRN + +Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN. + +--- + ### Hartlepool Borough Council ```commandline python collect_data.py HartlepoolBoroughCouncil https://www.hartlepool.gov.uk -u XXXXXXXX @@ -1721,15 +1797,13 @@ Note: Follow the instructions [here](https://waste-services.kingston.gov.uk/wast ### Kirklees Council ```commandline -python collect_data.py KirkleesCouncil https://www.kirklees.gov.uk/beta/your-property-bins-recycling/your-bins -s -p "XXXX XXX" -n XX -w http://HOST:PORT/ +python collect_data.py KirkleesCouncil https://www.kirklees.gov.uk/beta/your-property-bins-recycling/your-bins -s -u XXXXXXXX ``` Additional parameters: - `-s` - skip get URL -- `-p` - postcode -- `-n` - house number -- `-w` - remote Selenium web driver URL (required for Home Assistant) +- `-u` - UPRN -Note: Pass the house number and postcode in their respective parameters. This parser requires a Selenium webdriver. +Note: Provide your UPRN. Find your UPRN using [FindMyAddress](https://www.findmyaddress.co.uk/search). --- @@ -2345,7 +2419,7 @@ Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/searc ### Northumberland Council ```commandline -python collect_data.py NorthumberlandCouncil https://www.northumberland.gov.uk/Waste/Bins/Bin-Calendars.aspx -s -p "XXXX XXX" -n XX -w http://HOST:PORT/ +python collect_data.py NorthumberlandCouncil https://www.northumberland.gov.uk/Waste/Household-waste/Household-bin-collections/Bin-Calendars.aspx -s -p "XXXX XXX" -n XX -w http://HOST:PORT/ ``` Additional parameters: - `-s` - skip get URL @@ -2381,6 +2455,17 @@ Note: Pass the name of the street ONLY in the house number parameter, wrapped in --- +### Oadby & Wigston Borough Council +```commandline +python collect_data.py OadbyAndWigstonBoroughCouncil https://my.oadby-wigston.gov.uk -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN. + +--- + ### Oldham Council ```commandline python collect_data.py OldhamCouncil https://portal.oldham.gov.uk/bincollectiondates/details?uprn=422000033556 @@ -3358,6 +3443,17 @@ Note: Provide your house number in the `house_number` parameter and postcode in --- +### West Dunbartonshire Council +```commandline +python collect_data.py WestDunbartonshireCouncil https://www.west-dunbarton.gov.uk/ -u XXXXXXXX +``` +Additional parameters: +- `-u` - UPRN + +Note: You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN. + +--- + ### West Lancashire Borough Council ```commandline python collect_data.py WestLancashireBoroughCouncil https://www.westlancs.gov.uk -u XXXXXXXX -p "XXXX XXX"