Build spider: wicks_wampo_ec

City-Bureau · Mar 15, 2024 · 965af63 · 965af63
1 parent a3f9911
commit 965af63
Show file tree

Hide file tree

Showing 4 changed files with 1,684 additions and 0 deletions.
diff --git a/city_scrapers/spiders/wicks_wampo_ec.py b/city_scrapers/spiders/wicks_wampo_ec.py
@@ -0,0 +1,73 @@
+import re
+from datetime import datetime, time
+
+from city_scrapers_core.constants import COMMITTEE
+from city_scrapers_core.items import Meeting
+from city_scrapers_core.spiders import CityScrapersSpider
+
+
+class WampoEcSpider(CityScrapersSpider):
+    name = "wicks_wampo_ec"
+    agency = "Wichita Area Metropolitan Planning Organization – Executive Committee"
+    timezone = "America/Chicago"
+    start_urls = ["https://www.wampo.org/executive-committee"]
+    start_time = time(11, 0)
+    location = {
+        "name": "Wichita Area Metropolitan Planning Organization",
+        "address": "271 W. 3rd St. N., Suite 101, Wichita, KS 67202",
+    }
+
+    def parse(self, response):
+        """
+        Parse the page and extract the meeting information.
+        """
+        for tabpanel in response.css("div[role='tabpanel'] ul"):
+            for item in tabpanel.css("li p"):
+                start = self._parse_start(item)
+                if start is None:
+                    self.logger.warn("Skipping row with no date")
+                    continue
+                meeting = Meeting(
+                    title="Executive Committee Meeting",
+                    description="",
+                    classification=COMMITTEE,
+                    start=start,
+                    end=None,
+                    all_day=False,
+                    time_notes="",
+                    location=self.location,
+                    links=self._parse_links(item),
+                    source=response.url,
+                )
+                meeting["status"] = self._get_status(meeting)
+                meeting["id"] = self._get_id(meeting)
+                yield meeting
+
+    def _parse_start(self, item):
+        """Extracts all the text from the row and parses the first
+        column that looks formatted like MM/DD/YYYY."""
+        item.css("span::text").extract()
+        for text in item.css("span::text").extract():
+            clean_text = text.strip()
+            if re.match(r"\d{1,2}/\d{1,2}/\d{4}", clean_text):
+                try:
+                    start_date = datetime.strptime(clean_text, "%m/%d/%Y")
+                    start_datetime = datetime.combine(start_date, self.start_time)
+                    return start_datetime
+                except ValueError:
+                    self.logger.info(f"Could not parse date from {clean_text}")
+                    return None
+        self.logger.info("Could not find date in row")
+        return None
+
+    def _parse_links(self, item):
+        """Parse all links in the row."""
+        links = []
+        for link in item.css("a"):
+            links.append(
+                {
+                    "href": link.attrib["href"],
+                    "title": link.css("::text").extract_first(),
+                }
+            )
+        return links