diff --git a/city_scrapers/spiders/wicks_wampo_tac.py b/city_scrapers/spiders/wicks_wampo_tac.py new file mode 100644 index 0000000..0e94b15 --- /dev/null +++ b/city_scrapers/spiders/wicks_wampo_tac.py @@ -0,0 +1,102 @@ +from datetime import datetime, time +from unicodedata import normalize + +from city_scrapers_core.constants import BOARD +from city_scrapers_core.items import Meeting +from city_scrapers_core.spiders import CityScrapersSpider +from dateutil.parser import parse + + +class WicksWampoTacSpider(CityScrapersSpider): + name = "wicks_wampo_tac" + agency = ( + "Wichita Area Metropolitan Planning Organization - Technical Advisory Committee" + ) + timezone = "America/Chicago" + start_urls = ["https://www.wampo.org/technical-advisory-committee"] + meeting_time = time(10, 0) + location = { + "name": "Wichita Area Metropolitan Planning Organization", + "address": "271 W 3rd St N, Wichita, KS 67202", + } + + def parse(self, response): + """ + Parse HTML from agency page. Note that certain key details are absent + like title and meeting time, so we hardcode them. However, page offers + good collection of links to meeting agendas and minutes. + """ + columns = response.css( + 'section.wixui-column-strip div[data-testid="columns"] div[data-testid="richTextElement"]' # noqa + ) + for column in columns: + # get year + year = column.css("h2 span::text").extract() + if not year: + continue + year_parsed = "".join(year[:2]).strip() + # parse rows + for item in column.css("ul > li > p"): + start = self._parse_start(item, year_parsed) + if not start: + self.logger.info("No start time found – skipping") + continue + meeting = Meeting( + title="WAMPO Transportation Policy Body Meeting", + description="", + classification=BOARD, + start=start, + end=None, + all_day=False, + time_notes="", + location=self.location, + links=self._parse_links(item), + source=response.url, + ) + meeting["status"] = self._get_status(meeting) + meeting["id"] = self._get_id(meeting) + + yield meeting + + def _parse_start(self, item, parsed_year): + """Parse start datetime as a naive datetime object.""" + date_str = item.css("span[style*='font-weight:bold']::text").extract_first() + if not date_str: + return None + try: + # parse date in format "01/01/2020" + date_str_w_year = f"{date_str}/{parsed_year}" + parsed_date = parse(date_str_w_year, fuzzy=True) + full_start = datetime.combine(parsed_date, self.meeting_time) + return full_start + except ValueError: + self.logger.info(f"Failed to parse date: {date_str}") + return None + + def _parse_links(self, item): + """Parse links to meeting agendas and minutes. HTML is + very messy. Display text is often split across multiple + span tags. In some cases, multiple a tags make up the same + link (Eg. "Re", "cording").""" + link_text = [] + link_hrefs = [] + for link in item.css("a"): + url = link.attrib["href"] + # get text from all child spans + title = "".join(link.css("::text").extract()).strip() + # Strip white space and remove special characters + clean_title = ( + normalize("NFKD", title).encode("ascii", "ignore").decode("utf-8") + ) + if url in link_hrefs: + # if link already exists, append to corresponding text + index = link_hrefs.index(url) + link_text[index] += clean_title + else: + link_hrefs.append(url) + link_text.append(clean_title) + # zip together + links = [] + for i, title in enumerate(link_text): + links.append({"title": title, "href": link_hrefs[i]}) + return links diff --git a/tests/files/wicks_wampo_tac.html b/tests/files/wicks_wampo_tac.html new file mode 100644 index 0000000..4458c67 --- /dev/null +++ b/tests/files/wicks_wampo_tac.html @@ -0,0 +1,721 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Technical Advisory Committee | Wichita Area Metropolitan Planning Organization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
top of page
Technical Advisory Committee: Text

Technical Advisory Committee (TAC)

Technical Advisory Committee: Files
Technical Advisory Committee: Files

Meets every 4th Monday of the month at 10:00 AM

The Technical Advisory Committee (TAC) reviews technical information about transportation studies and issues as well as provides the TPB with their professional recommendations on documents, plans, and activities. They also help with the adoption of transportation documents such as the biennial Transportation Improvement Program.

AGENDAS

2024

+ + + +

2023

+ +
bottom of page
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/test_wicks_wampo_tac.py b/tests/test_wicks_wampo_tac.py new file mode 100644 index 0000000..fc439a6 --- /dev/null +++ b/tests/test_wicks_wampo_tac.py @@ -0,0 +1,91 @@ +from datetime import datetime +from os.path import dirname, join + +import pytest +from city_scrapers_core.constants import BOARD, PASSED +from city_scrapers_core.utils import file_response +from freezegun import freeze_time + +from city_scrapers.spiders.wicks_wampo_tac import WicksWampoTacSpider + +test_response = file_response( + join(dirname(__file__), "files", "wicks_wampo_tac.html"), + url="https://www.wampo.org/technical-advisory-committee", +) +spider = WicksWampoTacSpider() + +freezer = freeze_time(datetime(2024, 3, 19, 11, 52)) +freezer.start() + +parsed_items = [item for item in spider.parse(test_response)] +parsed_item = parsed_items[0] +freezer.stop() + + +def test_title(): + assert parsed_item["title"] == "WAMPO Transportation Policy Body Meeting" + + +def test_description(): + assert parsed_item["description"] == "" + + +def test_start(): + assert parsed_item["start"] == datetime(2024, 1, 19, 10, 0) + + +def test_end(): + assert parsed_item["end"] is None + + +def test_time_notes(): + assert parsed_item["time_notes"] == "" + + +def test_id(): + assert ( + parsed_item["id"] + == "wicks_wampo_tac/202401191000/x/wampo_transportation_policy_body_meeting" + ) + + +def test_status(): + assert parsed_item["status"] == PASSED + + +def test_location(): + assert parsed_item["location"] == { + "name": "Wichita Area Metropolitan Planning Organization", + "address": "271 W 3rd St N, Wichita, KS 67202", + } + + +def test_source(): + assert parsed_item["source"] == "https://www.wampo.org/technical-advisory-committee" + + +def test_links(): + assert parsed_item["links"] == [ + { + "title": "Agenda Packet", + "href": "https://www.wampo.org/_files/ugd/bbf89d_02a78f0fb9c24886998245c53019d994.pdf", # noqa + }, + { + "title": "Minutes", + "href": "https://www.wampo.org/_files/ugd/bbf89d_1471b553e5e54ffdb3fa8cdd0821f829.pdf", # noqa + }, + {"title": "Recording", "href": "https://www.youtube.com/watch?v=ATbfnexxAhg"}, + { + "title": "Safer Speeds Presentation Slides", + "href": "https://www.wampo.org/_files/ugd/bbf89d_96c56756159b4989953c130cbe6ab49c.pdf", # noqa + }, + ] + + +def test_classification(): + assert parsed_item["classification"] == BOARD + + +@pytest.mark.parametrize("item", parsed_items) +def test_all_day(item): + assert item["all_day"] is False