From 0db30596e513f02cb4e555bcea9fa07a8dedede0 Mon Sep 17 00:00:00 2001 From: Daniel Simmons-Ritchie <37225902+SimmonsRitchie@users.noreply.github.com> Date: Tue, 30 Jul 2024 11:29:24 -0500 Subject: [PATCH] Fix: oma_planning_exam_engineers Mixin appears to be broken. Spider has been separated into its own file. --- city_scrapers/spiders/oma_examining.py | 10 - .../spiders/oma_planning_exam_engineers.py | 92 +++ tests/files/oma_planning_exam_engineers.html | 543 ++++++++++++++++++ tests/test_oma_planning_exam_engineers.py | 82 +++ 4 files changed, 717 insertions(+), 10 deletions(-) create mode 100644 city_scrapers/spiders/oma_planning_exam_engineers.py create mode 100644 tests/files/oma_planning_exam_engineers.html create mode 100644 tests/test_oma_planning_exam_engineers.py diff --git a/city_scrapers/spiders/oma_examining.py b/city_scrapers/spiders/oma_examining.py index 1a7ce92..06ef963 100644 --- a/city_scrapers/spiders/oma_examining.py +++ b/city_scrapers/spiders/oma_examining.py @@ -56,16 +56,6 @@ def parse(self, response): yield meeting -class OmahaPlanningExaminersEngineers(OmahaExaminingBoardMixin, CityScrapersSpider): - name = "oma_planning_exam_engineers" - agency = "Omaha Planning Department: Board of Examiners (For Engineers)" - start_urls = [ - "https://planning.cityofomaha.org/boards/board-of-examiners-for-engineers" # noqa - ] - time = "12pm" - address = "Room 1210 - 12th Floor; Omaha-Douglas Civic Center, 1819 Farnam Street" - - class OmahaPlanningExaminersPipefitters(OmahaExaminingBoardMixin, CityScrapersSpider): name = "oma_planning_exam_pipefitters" agency = "Omaha Planning Department: Board of Examiners (For Engineers)" diff --git a/city_scrapers/spiders/oma_planning_exam_engineers.py b/city_scrapers/spiders/oma_planning_exam_engineers.py new file mode 100644 index 0000000..1aea002 --- /dev/null +++ b/city_scrapers/spiders/oma_planning_exam_engineers.py @@ -0,0 +1,92 @@ +import re +from urllib.parse import urljoin + +from city_scrapers_core.constants import NOT_CLASSIFIED +from city_scrapers_core.items import Meeting +from city_scrapers_core.spiders import CityScrapersSpider +from dateutil import parser + + +class OmahaPlanningExaminersEngineersSpider(CityScrapersSpider): + name = "oma_planning_exam_engineers" + agency = "Omaha Planning Department: The Board of Engineer Examiners" + start_urls = [ + "https://planning.cityofomaha.org/boards/board-of-examiners-for-engineers" # noqa + ] + timezone = "America/Chicago" + base_url = "https://planning.cityofomaha.org" + start_time = "12 p.m." + location = { + "name": "", + "address": "Room 1210 - 12th Floor; Omaha-Douglas Civic Center, 1819 Farnam Street", # noqa + } + + def parse(self, response): + table = response.css("table.tabclr") + + # skip the first two rows, which are headers + col_headers = table.css("tr")[1] + for row in table.css("tr")[2:]: + start = self._parse_start(row) + if not start: + # If we can't parse the start time, skip this row + continue + meeting = Meeting( + title="Board of Engineer Examiners meeting", + description="", + classification=NOT_CLASSIFIED, + start=start, + end=None, + all_day=False, + time_notes="", + location=self.location, + links=self._parse_links(row, col_headers), + source=response.url, + ) + meeting["status"] = self._get_status(meeting) + meeting["id"] = self._get_id(meeting) + yield meeting + + def _parse_start(self, row): + """ + Parse the start time from the second column of the row. + Date is in format "Month Day, Year" and time is always 12 p.m. + Date might be in a link, so we need to check for that. + """ + second_col = row.css("td:nth-child(2)") + date_str = second_col.css("::text").extract_first() + + if not date_str: + date_str = second_col.css("a::text").extract_first() + if not date_str: + return + # use regex to capture only the date string in format "Month Day, Year" + clean_date = re.search(r"([A-Z][a-z]+ \d{1,2}, \d{4})", date_str) + if not clean_date: + return + clean_date_str = clean_date.group(0) + full_start_str = f"{clean_date_str} {self.start_time}" + return parser.parse(full_start_str) + + def _parse_links(self, row, col_headers): + """ + Third and four columns contain links to meeting minutes and agendas. + """ + links = [] + for col_num in [3, 4]: + col = row.css(f"td:nth-child({col_num})") + if col.css("a"): + header_selector = f"td:nth-child({col_num}) strong::text" + title_els = col_headers.css(header_selector) + # loop over all title_els and join + title = " ".join(title_els.extract()) + clean_title = re.sub(r"\s+", " ", title) + relative_url = col.css("a::attr(href)").extract_first() + abs_url = urljoin(self.base_url, relative_url) + links.append( + { + "title": clean_title, + "href": abs_url, + } + ) + return links diff --git a/tests/files/oma_planning_exam_engineers.html b/tests/files/oma_planning_exam_engineers.html new file mode 100644 index 0000000..05417ed --- /dev/null +++ b/tests/files/oma_planning_exam_engineers.html @@ -0,0 +1,543 @@ + + + + + + + + + + + + + + Board of Examiners (For Engineers) - City of Omaha Planning Department + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + + +
+
+ +
+ + + +
+ + + +
+ +
+ + + + + +
+ + +
Text Size   - | +
+ +
+ +
+ + + +
+ + + +
+ +
+ + + + + +
+

The Board of Engineer Examiners shall meet at least once each calendar month for the examination of applicants or their examination papers; however, such a meeting shall not be held unless there are two or more applications on file. The president of the board shall have the power to call special meetings of the board when deemed necessary. The secretary of the board shall be required to keep minutes of all meetings. The board shall not hold more than two sessions per month.

+

Click here for Hoisting and Portable Applications

+

Click here for Stationary Engineer Applications

+

Click here for Tank Installer Applications

+

Board Members:

+
    +
  • Shawn Dugan
  • +
  • Reginald Gillispie
  • +
  • Zachary Beister ( Chief Mechanical Inspector - Non-Voting)
  • +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

Stationary Engineers 2024 Meeting Schedule
12:00 p.m.* - Room 1210 - 12th Floor
Omaha-Douglas Civic Center, 1819 Farnam Street

+
+

Filing Deadline
(4:00 p.m. the Monday two weeks prior)

+
+

Meeting Dates
(Second Tuesday of each month at Noon.)

+
+

Disposition Agenda

+
Minutes +

 Testing Dates 
(9:00 a.m. on the first Thursday and Friday of each month)

+
December 19, 2023*January 9, 2024 
(cancelled)
  January 4-5, 2024
December 19, 2023 January 23, 2024January 23, 2024January 23, 2024February 1-2, 2024 
January 29, 2024February 13, 2024February 13, 2024February 13, 2024February 1-2, 2024
February 26, 2024March 12, 2024March 12, 2024March 12, 2024March 7-8, 2024
March 25, 2024April 9, 2024April 9, 2024April 9, 2024 April 4-5, 2024
April 29, 2024May 14, 2024May 14, 2024 May 14, 2024May 2-3, 2024
May 28, 2024June 11, 2024 June 11, 2024 June 6-7, 2024
July 1, 2024July 30, 2024  July 11-12, 2024
July 29, 2024August 13, 2024  August 1-2, 2024
August 26, 2024September 10, 2024  September 5-6 2024
September 23, 2024October 8, 2024  October 3-4, 2024
October 28, 2024November 12, 2024  November 7-8, 2024
November 25, 2024December 10, 2025  December 5-6, 2024
December 23, 2024*January 14, 2025  January 2-3, 2025
+

* Time subject to change as needed due to the number of exams to be graded

+

** Changed due to holiday

+

2023 Archives
2022 Archives
2021 Archives
2020 Archives
2019 Archives
2018 Archives
2017 Archives

+
+ + +
+
+ +
+ +
+ + + +
+ +
+ + + + + + + diff --git a/tests/test_oma_planning_exam_engineers.py b/tests/test_oma_planning_exam_engineers.py new file mode 100644 index 0000000..7bc5382 --- /dev/null +++ b/tests/test_oma_planning_exam_engineers.py @@ -0,0 +1,82 @@ +from datetime import datetime +from os.path import dirname, join + +import pytest +from city_scrapers_core.constants import NOT_CLASSIFIED, PASSED +from city_scrapers_core.utils import file_response +from freezegun import freeze_time + +from city_scrapers.spiders.oma_planning_exam_engineers import ( + OmahaPlanningExaminersEngineersSpider, +) + +test_response = file_response( + join(dirname(__file__), "files", "oma_planning_exam_engineers.html"), + url="https://planning.cityofomaha.org/boards/board-of-examiners-for-engineers", +) +spider = OmahaPlanningExaminersEngineersSpider() + +freezer = freeze_time(datetime(2024, 7, 30, 11, 26)) +freezer.start() + +parsed_items = [item for item in spider.parse(test_response)] +parsed_item = parsed_items[0] +freezer.stop() + + +def test_title(): + assert parsed_item["title"] == "Board of Engineer Examiners meeting" + + +def test_description(): + assert parsed_item["description"] == "" + + +def test_start(): + assert parsed_item["start"] == datetime(2024, 1, 9, 12, 0) + + +def test_end(): + assert parsed_item["end"] is None + + +def test_time_notes(): + assert parsed_item["time_notes"] == "" + + +def test_id(): + assert ( + parsed_item["id"] + == "oma_planning_exam_engineers/202401091200/x/board_of_engineer_examiners_meeting" # noqa + ) + + +def test_status(): + assert parsed_item["status"] == PASSED + + +def test_location(): + assert parsed_item["location"] == { + "name": "", + "address": "Room 1210 - 12th Floor; Omaha-Douglas Civic Center, 1819 Farnam Street", # noqa + } + + +def test_source(): + assert ( + parsed_item["source"] + == "https://planning.cityofomaha.org/boards/board-of-examiners-for-engineers" + ) + + +def test_links(): + assert parsed_item["links"] == [] + + +def test_classification(): + assert parsed_item["classification"] == NOT_CLASSIFIED + + +@pytest.mark.parametrize("item", parsed_items) +def test_all_day(item): + assert item["all_day"] is False