Skip to content

Commit

Permalink
Merge pull request #948 from m26dvd/master
Browse files Browse the repository at this point in the history
feat: Council Pack 11
  • Loading branch information
robbrad authored Nov 2, 2024
2 parents e64822d + a881649 commit 9aaebc4
Show file tree
Hide file tree
Showing 5 changed files with 339 additions and 49 deletions.
27 changes: 23 additions & 4 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@
"wiki_name": "Bradford MDC",
"wiki_note": "To get the UPRN, you will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search). Post code isn't parsed by this script, but you can pass it in double quotes."
},
"BrecklandCouncil": {
"url": "https://www.breckland.gov.uk",
"wiki_command_url_override": "https://www.breckland.gov.uk",
"uprn": "100091495479",
"wiki_name": "Breckland Council",
"wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN."
},
"BrightonandHoveCityCouncil": {
"house_number": "44 Carden Avenue, Brighton, BN1 8NE",
"postcode": "BN1 8NE",
Expand Down Expand Up @@ -314,6 +321,15 @@
"wiki_name": "Cornwall Council",
"wiki_note": "Use https://uprn.uk/ to find your UPRN."
},
"CotswoldDistrictCouncil": {
"house_number": "19",
"postcode": "GL56 0GB",
"skip_get_url": true,
"url": "https://community.cotswold.gov.uk/s/waste-collection-enquiry",
"web_driver": "http://selenium:4444",
"wiki_name": "Cotswold District Council",
"wiki_note": "Pass the full address in the house number and postcode in"
},
"CoventryCityCouncil": {
"url": "https://www.coventry.gov.uk/directory-record/56384/abberton-way-",
"wiki_command_url_override": "https://www.coventry.gov.uk/directory_record/XXXXXX/XXXXXX",
Expand Down Expand Up @@ -837,8 +853,8 @@
"wiki_name": "New Forest Council"
},
"NewarkAndSherwoodDC": {
"url": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=200004258529",
"wiki_command_url_override": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX",
"url": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=200004258529&nc=1",
"wiki_command_url_override": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX&nc=1",
"wiki_name": "Newark and Sherwood District Council",
"wiki_note": "Replace XXXXXXXX with UPRN."
},
Expand Down Expand Up @@ -1200,10 +1216,13 @@
"wiki_name": "St Albans City and District Council"
},
"StHelensBC": {
"house_number": "15",
"postcode": "L34 2GA",
"skip_get_url": true,
"uprn": "39081672",
"url": "https://www.sthelens.gov.uk/",
"wiki_name": "St Helens Borough Council"
"web_driver": "http://selenium:4444",
"wiki_name": "St Helens Borough Council",
"wiki_note": "Pass the house name/number in the house number parameter, wrapped in double quotes"
},
"StaffordBoroughCouncil": {
"uprn": "100032203010",
Expand Down
55 changes: 55 additions & 0 deletions uk_bin_collection/uk_bin_collection/councils/BrecklandCouncil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import time

import requests

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:

user_uprn = kwargs.get("uprn")
check_uprn(user_uprn)
bindata = {"bins": []}

URI = "https://www.breckland.gov.uk/apiserver/ajaxlibrary"

data = {
"id": "1730410741649",
"jsonrpc": "2.0",
"method": "Breckland.Whitespace.JointWasteAPI.GetBinCollectionsByUprn",
"params": {"uprn": user_uprn, "environment": "live"},
}
# Make the GET request
response = requests.post(URI, json=data)

# Parse the JSON response
bin_collection = response.json()

# Loop through each collection in bin_collection
for collection in bin_collection["result"]:
bin_type = collection.get("collectiontype")
collection_date = collection.get("nextcollection")

dict_data = {
"type": bin_type,
"collectionDate": datetime.strptime(
collection_date,
"%d/%m/%Y %H:%M:%S",
).strftime("%d/%m/%Y"),
}
bindata["bins"].append(dict_data)

bindata["bins"].sort(
key=lambda x: datetime.strptime(x.get("collectionDate"), "%d/%m/%Y")
)

return bindata
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import time
from datetime import datetime

from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass

# import the wonderful Beautiful Soup and the URL grabber


class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:
driver = None
try:
page = "https://community.cotswold.gov.uk/s/waste-collection-enquiry"

data = {"bins": []}

house_number = kwargs.get("paon")
postcode = kwargs.get("postcode")
full_address = f"{house_number}, {postcode}"
web_driver = kwargs.get("web_driver")
headless = kwargs.get("headless")

# Create Selenium webdriver
driver = create_webdriver(web_driver, headless, None, __name__)
driver.get(page)

# If you bang in the house number (or property name) and postcode in the box it should find your property
wait = WebDriverWait(driver, 60)
address_entry_field = wait.until(
EC.presence_of_element_located(
(By.XPATH, '//*[@id="combobox-input-19"]')
)
)

address_entry_field.send_keys(str(full_address))

address_entry_field = wait.until(
EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-19"]'))
)
address_entry_field.click()
address_entry_field.send_keys(Keys.BACKSPACE)
address_entry_field.send_keys(str(full_address[len(full_address) - 1]))

first_found_address = wait.until(
EC.element_to_be_clickable(
(By.XPATH, '//*[@id="dropdown-element-19"]/ul')
)
)

first_found_address.click()
# Wait for the 'Select your property' dropdown to appear and select the first result
next_btn = wait.until(
EC.element_to_be_clickable((By.XPATH, "//lightning-button/button"))
)
next_btn.click()
bin_data = wait.until(
EC.presence_of_element_located(
(By.XPATH, "//span[contains(text(), 'Container')]")
)
)

soup = BeautifulSoup(driver.page_source, features="html.parser")

rows = soup.find_all("tr", class_="slds-hint-parent")
current_year = datetime.now().year

for row in rows:
columns = row.find_all("td")
if columns:
container_type = row.find("th").text.strip()
if columns[0].get_text() == "Today":
collection_day = datetime.now().strftime("%a, %d %B")
elif columns[0].get_text() == "Tomorrow":
collection_day = (datetime.now() + timedelta(days=1)).strftime(
"%a, %d %B"
)
else:
collection_day = re.sub(
r"[^a-zA-Z0-9,\s]", "", columns[0].get_text()
).strip()

# Parse the date from the string
parsed_date = datetime.strptime(collection_day, "%a, %d %B")
if parsed_date < datetime(
parsed_date.year, parsed_date.month, parsed_date.day
):
parsed_date = parsed_date.replace(year=current_year + 1)
else:
parsed_date = parsed_date.replace(year=current_year)
# Format the date as %d/%m/%Y
formatted_date = parsed_date.strftime("%d/%m/%Y")

# Add the bin type and collection date to the 'data' dictionary
data["bins"].append(
{"type": container_type, "collectionDate": formatted_date}
)
except Exception as e:
# Here you can log the exception if needed
print(f"An error occurred: {e}")
# Optionally, re-raise the exception if you want it to propagate
raise
finally:
# This block ensures that the driver is closed regardless of an exception
if driver:
driver.quit()
return data
149 changes: 107 additions & 42 deletions uk_bin_collection/uk_bin_collection/councils/StHelensBC.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
Expand All @@ -8,50 +12,111 @@
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
baseclass. They can also override some
operations with a default implementation.
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:
uprn = kwargs.get("uprn")
# Check the UPRN is valid
check_uprn(uprn)

# Request URL
url = f"https://secure.sthelens.net/website/CollectionDates.nsf/servlet.xsp/NextCollections?source=1&refid={uprn}"

# Make Request
requests.packages.urllib3.disable_warnings()
s = requests.Session()
page = s.get(url)

# Make a BS4 object
soup = BeautifulSoup(
re.sub("<div([^>]+)>", "", page.text).replace("</div>", ""),
features="html.parser",
)
soup.prettify()

data = {"bins": []}
collection_rows = (
soup.find("table", {"class": "multitable"}).find("tbody").find_all("tr")
)

for collection_row in collection_rows:
# Get bin collection type
bin_type = collection_row.find("th")
if bin_type:
bin_type = bin_type.get_text(strip=True)
# Get bin collection dates
for bin_date in collection_row.find_all("td"):
if bin_date.get_text(strip=True) != "Dates not allocated":
collection_date = datetime.strptime(
bin_date.get_text(strip=True), "%a %d %b %Y"
)
dict_data = {
"type": bin_type,
"collectionDate": collection_date.strftime(date_format),
}
data["bins"].append(dict_data)
driver = None
try:
data = {"bins": []}
user_paon = kwargs.get("paon")
user_postcode = kwargs.get("postcode")
web_driver = kwargs.get("web_driver")
headless = kwargs.get("headless")
check_paon(user_paon)
check_postcode(user_postcode)

# Create Selenium webdriver
driver = create_webdriver(web_driver, headless, None, __name__)
driver.get(
"https://www.sthelens.gov.uk/article/3473/Check-your-collection-dates"
)

"""
accept_button = WebDriverWait(driver, timeout=30).until(
EC.element_to_be_clickable((By.ID, "ccc-notify-accept"))
)
accept_button.click()
"""

# Wait for the postcode field to appear then populate it
inputElement_postcode = WebDriverWait(driver, 30).until(
EC.presence_of_element_located(
(By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_POSTCODE")
)
)
inputElement_postcode.send_keys(user_postcode)

# Click search button
findAddress = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_FINDADDRESS_NEXT")
)
)
findAddress.click()

WebDriverWait(driver, timeout=30).until(
EC.element_to_be_clickable(
(By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_ADDRESS_chosen")
)
).click()

WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(
By.XPATH,
f"//ul[@id='RESIDENTCOLLECTIONDATES_PAGE1_ADDRESS-chosen-search-results']/li[starts-with(text(), '{user_paon}')]",
)
)
).click()

WebDriverWait(driver, timeout=30).until(
EC.element_to_be_clickable(
(By.ID, "RESIDENTCOLLECTIONDATES_PAGE1_ADDRESSNEXT_NEXT")
)
).click()

# Wait for the collections table to appear
WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.ID, "RESIDENTCOLLECTIONDATES__FIELDS_OUTER")
)
)

soup = BeautifulSoup(driver.page_source, features="html.parser")

# Get the month rows first
current_month = ""
for row in soup.find_all("tr"):
# Check if the row is a month header (contains 'th' tag)
if row.find("th"):
current_month = row.find("th").get_text(strip=True)
else:
# Extract the date, day, and waste types
columns = row.find_all("td")
if len(columns) >= 4:
day = columns[0].get_text(strip=True)
date = day + " " + current_month
waste_types = columns[3].get_text(strip=True)

for type in waste_types.split(" & "):
dict_data = {
"type": type,
"collectionDate": datetime.strptime(
date,
"%d %B %Y",
).strftime("%d/%m/%Y"),
}
data["bins"].append(dict_data)

except Exception as e:
# Here you can log the exception if needed
print(f"An error occurred: {e}")
# Optionally, re-raise the exception if you want it to propagate
raise
finally:
# This block ensures that the driver is closed regardless of an exception
if driver:
driver.quit()
return data
Loading

0 comments on commit 9aaebc4

Please sign in to comment.