Skip to content

Commit

Permalink
Build spider: wicks_wampo_ec
Browse files Browse the repository at this point in the history
  • Loading branch information
SimmonsRitchie committed Mar 15, 2024
1 parent a3f9911 commit 965af63
Show file tree
Hide file tree
Showing 4 changed files with 1,684 additions and 0 deletions.
73 changes: 73 additions & 0 deletions city_scrapers/spiders/wicks_wampo_ec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import re
from datetime import datetime, time

from city_scrapers_core.constants import COMMITTEE
from city_scrapers_core.items import Meeting
from city_scrapers_core.spiders import CityScrapersSpider


class WampoEcSpider(CityScrapersSpider):
name = "wicks_wampo_ec"
agency = "Wichita Area Metropolitan Planning Organization – Executive Committee"
timezone = "America/Chicago"
start_urls = ["https://www.wampo.org/executive-committee"]
start_time = time(11, 0)
location = {
"name": "Wichita Area Metropolitan Planning Organization",
"address": "271 W. 3rd St. N., Suite 101, Wichita, KS 67202",
}

def parse(self, response):
"""
Parse the page and extract the meeting information.
"""
for tabpanel in response.css("div[role='tabpanel'] ul"):
for item in tabpanel.css("li p"):
start = self._parse_start(item)
if start is None:
self.logger.warn("Skipping row with no date")
continue
meeting = Meeting(
title="Executive Committee Meeting",
description="",
classification=COMMITTEE,
start=start,
end=None,
all_day=False,
time_notes="",
location=self.location,
links=self._parse_links(item),
source=response.url,
)
meeting["status"] = self._get_status(meeting)
meeting["id"] = self._get_id(meeting)
yield meeting

def _parse_start(self, item):
"""Extracts all the text from the row and parses the first
column that looks formatted like MM/DD/YYYY."""
item.css("span::text").extract()
for text in item.css("span::text").extract():
clean_text = text.strip()
if re.match(r"\d{1,2}/\d{1,2}/\d{4}", clean_text):
try:
start_date = datetime.strptime(clean_text, "%m/%d/%Y")
start_datetime = datetime.combine(start_date, self.start_time)
return start_datetime
except ValueError:
self.logger.info(f"Could not parse date from {clean_text}")
return None
self.logger.info("Could not find date in row")
return None

def _parse_links(self, item):
"""Parse all links in the row."""
links = []
for link in item.css("a"):
links.append(
{
"href": link.attrib["href"],
"title": link.css("::text").extract_first(),
}
)
return links
Loading

0 comments on commit 965af63

Please sign in to comment.