diff --git a/uk_bin_collection/tests/council_schemas/NeathPortTalbotCouncil.schema b/uk_bin_collection/tests/council_schemas/NeathPortTalbotCouncil.schema new file mode 100644 index 0000000000..19b178c7d4 --- /dev/null +++ b/uk_bin_collection/tests/council_schemas/NeathPortTalbotCouncil.schema @@ -0,0 +1,47 @@ +{ + "$schema": "http://json-schema.org/draft-06/schema#", + "$ref": "#/definitions/Welcome3", + "definitions": { + "Welcome3": { + "type": "object", + "additionalProperties": false, + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/definitions/Bin" + } + } + }, + "required": [ + "bins" + ], + "title": "Welcome3" + }, + "Bin": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { + "type": "string" + }, + "collectionDate": { + "$ref": "#/definitions/CollectionDate" + } + }, + "required": [ + "collectionDate", + "type" + ], + "title": "Bin" + }, + "CollectionDate": { + "type": "string", + "enum": [ + "23/10/2023", + "30/10/2023" + ], + "title": "CollectionDate" + } + } +} diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index 30e1573b2b..4846a109b5 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -52,6 +52,7 @@ Feature: Test each council output matches expected results in /outputs | MertonCouncil | | MidSussexDistrictCouncil | | MiltonKeynesCityCouncil | + | NeathPortTalbotCouncil | | NewarkAndSherwoodDC | | NewcastleCityCouncil | | NorthEastLincs | diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 694b0c3dde..a14b872be5 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -284,6 +284,13 @@ "wiki_name": "Milton Keynes City Council", "wiki_note": "Pass the name of the estate with the UPRN parameter, wrapped in double quotes" }, + "NeathPortTalbotCouncil": { + "SKIP_GET_URL": "SKIP_GET_URL", + "postcode": "SA13 3BA", + "uprn": "10023947752", + "url": "https://www.npt.gov.uk", + "wiki_name": "Neath Port Talbot Council" + }, "NewarkAndSherwoodDC": { "url": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=200004258529", "wiki_name": "Newark and Sherwood District Council", diff --git a/uk_bin_collection/tests/outputs/NeathPortTalbotCouncil.json b/uk_bin_collection/tests/outputs/NeathPortTalbotCouncil.json new file mode 100644 index 0000000000..9c22f531cf --- /dev/null +++ b/uk_bin_collection/tests/outputs/NeathPortTalbotCouncil.json @@ -0,0 +1,60 @@ +{ + "bins": [ + { + "type": "Plastic / Tins / Cans (White bag)", + "collectionDate": "23/10/2023" + }, + { + "type": "Cardboard (White bag)", + "collectionDate": "23/10/2023" + }, + { + "type": "Paper (Black box with lid)", + "collectionDate": "23/10/2023" + }, + { + "type": "Glass (Black box without lid)", + "collectionDate": "23/10/2023" + }, + { + "type": "Food waste (Green bin)", + "collectionDate": "23/10/2023" + }, + { + "type": "Batteries (Battery bag)", + "collectionDate": "23/10/2023" + }, + { + "type": "Household rubbish (Wheelie bin / black bags)", + "collectionDate": "30/10/2023" + }, + { + "type": "Garden waste (Green bag)", + "collectionDate": "30/10/2023" + }, + { + "type": "Plastic / Tins / Cans (White bag)", + "collectionDate": "30/10/2023" + }, + { + "type": "Cardboard (White bag)", + "collectionDate": "30/10/2023" + }, + { + "type": "Paper (Black box with lid)", + "collectionDate": "30/10/2023" + }, + { + "type": "Glass (Black box without lid)", + "collectionDate": "30/10/2023" + }, + { + "type": "Food waste (Green bin)", + "collectionDate": "30/10/2023" + }, + { + "type": "Batteries (Battery bag)", + "collectionDate": "30/10/2023" + } + ] +} \ No newline at end of file diff --git a/uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py new file mode 100644 index 0000000000..97725eb040 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/NeathPortTalbotCouncil.py @@ -0,0 +1,116 @@ +import time +from bs4 import BeautifulSoup +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import Select +from selenium.webdriver.support.wait import WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + data = {"bins": []} + user_uprn = kwargs.get("uprn") + user_postcode = kwargs.get("postcode") + check_uprn(user_uprn) + check_postcode(user_postcode) + + # Set up Selenium to run 'headless' + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-gpu") + options.add_argument("--disable-dev-shm-usage") + options.add_experimental_option("excludeSwitches", ["enable-logging"]) + + # Create Selenium webdriver + driver = webdriver.Chrome(options=options) + driver.get("https://www.npt.gov.uk/2195") + + # Accept cookies banner + cookieAccept = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "ccc-notify-accept")) + ) + cookieAccept.click() + + # Populate postcode field + inputElement_postcode = driver.find_element( + By.ID, + "ContentPlaceHolderDefault_ctl13_nptLLPG2_25_addresslookup_txtTmpPostcode", + ) + inputElement_postcode.send_keys(user_postcode) + + # Click search button + findAddress = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "ContentPlaceHolderDefault_ctl13_nptLLPG2_25_addresslookup_btnFindAddress")) + ) + findAddress.click() + + time.sleep(1) + + # Wait for the 'Select address' dropdown to appear and select option matching UPRN + dropdown = WebDriverWait(driver, 10).until( + EC.presence_of_element_located( + (By.ID, "ContentPlaceHolderDefault_ctl13_nptLLPG2_25_addresslookup_ddlAddressLookup")) + ) + # Create a 'Select' for it, then select the matching URPN option + dropdownSelect = Select(dropdown) + dropdownSelect.select_by_value(user_uprn) + + # Remove back to top button if exists + driver.execute_script(""" + if (document.contains(document.querySelector(".backtotop"))) { + document.querySelector(".backtotop").remove(); + } + """) + + # Wait for the submit button to appear, then click it to get the collection dates + submit = WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.ID, "ContentPlaceHolderDefault_ctl13_nptLLPG2_25_btnDisplay")) + ) + submit.click() + + soup = BeautifulSoup(driver.page_source, features="html.parser") + + # Get the property details + property_details = soup.find( + "div", + {"id": "ContentPlaceHolderDefault_ctl13_nptLLPG2_25_divPropertyDetails"} + ) + + # Get the dates + for date in property_details.find_all("h2"): + if date.get_text(strip=True) != "Bank Holidays": + bin_date = datetime.strptime( + date.get_text(strip=True).replace(' ', ' ') + " " + datetime.now().strftime("%Y"), + "%A, %d %B %Y" + ) + bin_types_wrapper = date.find_next_sibling("div") + for bin_type_wrapper in bin_types_wrapper.find_all("div", {"class": "card"}): + if bin_date and bin_type_wrapper: + bin_type = bin_type_wrapper.find("a").get_text(strip=True) + bin_type += " (" + bin_type_wrapper.find("span").get_text(strip=True) + ")" + dict_data = { + "type": bin_type, + "collectionDate": bin_date.strftime(date_format) + } + data["bins"].append(dict_data) + + data["bins"].sort( + key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y") + ) + + return data