Skip to content

Commit

Permalink
Build spider: bisnd_mc
Browse files Browse the repository at this point in the history
  • Loading branch information
SimmonsRitchie committed Mar 12, 2024
1 parent 86e3f9b commit 4e1f47b
Show file tree
Hide file tree
Showing 5 changed files with 2,413 additions and 0 deletions.
144 changes: 144 additions & 0 deletions city_scrapers/mixins/mc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from datetime import datetime

from city_scrapers_core.constants import (
BOARD,
CITY_COUNCIL,
COMMISSION,
COMMITTEE,
NOT_CLASSIFIED,
)
from city_scrapers_core.items import Meeting
from city_scrapers_core.spiders import CityScrapersSpider
from dateutil.relativedelta import relativedelta
from scrapy import Request


class MCMixinMeta(type):
"""
Metaclass that enforces the implementation of required static
variables in child classes.
"""

def __init__(cls, name, bases, dct):
required_static_vars = ["agency", "name", "category_id"]
missing_vars = [var for var in required_static_vars if var not in dct]

if missing_vars:
missing_vars_str = ", ".join(missing_vars)
raise NotImplementedError(
f"{name} must define the following static variable(s): {missing_vars_str}." # noqa
)
super().__init__(name, bases, dct)


class MCMixin(CityScrapersSpider, metaclass=MCMixinMeta):
"""
Spider mixin for City of Mandan in Mandan, ND. This mixin
is intended to be used as a base class for spiders that scrape meeting
data from the city's website.
"""

base_url = "https://mandannd.api.civicclerk.com"
timezone = "America/Chicago"
name = None
agency = None
category_id = None

def start_requests(self):
"""
sdfsdfsdf
"""
# Calculate dates for one month prior and one year ahead
today = datetime.today()
one_month_prior = today - relativedelta(months=1)
one_year_ahead = today + relativedelta(months=6)

# Format dates like "2024-03-01T00:00:00.000Z"
meeting_date_from = one_month_prior.strftime("%Y-%m-%dT%H:00:00Z")
meeting_date_to = one_year_ahead.strftime("%Y-%m-%dT%H:00:00Z")

# build the URL
url = f"{self.base_url}/v1/Events?$filter=categoryId+in+({self.category_id})+and+startDateTime+ge+{meeting_date_from}+and+startDateTime+le+{meeting_date_to}&$orderby=startDateTime" # noqa

yield Request(url, callback=self.parse)

def parse(self, response):
"""
Parse a list of meetings from the response.
"""
print("donkey", response)
items = response.json()
if not items or len(items) == 0 or "value" not in items:
self.logger.warning("No meetings found")
return
for item in items["value"]:
meeting = Meeting(
title=item["eventName"],
description=item["eventDescription"],
classification=self._parse_classification(item["categoryName"]),
start=self._parse_start(item["startDateTime"]),
end=None,
all_day=False,
time_notes="",
location=self._parse_location(item["eventLocation"]),
links=self._parse_links(item),
source=response.url,
)
meeting["status"] = self._get_status(meeting)
meeting["id"] = self._get_id(meeting)
yield meeting

def _parse_classification(self, name):
"""
Parse or generate classification from allowed options.
"""
if "city council" in name.lower():
return CITY_COUNCIL
if "board" in name.lower():
return BOARD
if "commission" in name.lower():
return COMMISSION
if "committee" in name.lower():
return COMMITTEE
return NOT_CLASSIFIED

def _parse_start(self, start):
"""
Parse the start date and time.
"""
return datetime.strptime(start, "%Y-%m-%dT%H:%M:%SZ")

def _parse_location(self, location):
"""
Parse or generate location.
"""
if not location:
return {
"name": "",
"address": "",
}
# build location from address1, city, state, zipCode
address = location["address1"]
address_fields = ["address2", "city", "state", "zipCode"]
for field in address_fields:
if location[field]:
address += f", {location[field].strip()}"
return {
"name": "",
"address": address,
}

def _parse_links(self, item):
"""
Parse published files into links.
"""
links = []
if "publishedFiles" in item and len(item["publishedFiles"]) > 0:
for file in item["publishedFiles"]:
links.append(
{
"title": file["name"],
"href": f"https://mandannd.api.civicclerk.com/v1/Meetings/GetMeetingFileStream(fileId={file['fileId']},plainText=false)", # noqa
}
)
return links
139 changes: 139 additions & 0 deletions city_scrapers/spiders/bisnd_mc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from city_scrapers_core.spiders import CityScrapersSpider

from city_scrapers.mixins.mc import MCMixin

spider_configs = [
{
"agency": "City of Mandan - General",
"category_id": 24,
"name": "bisnd_mc_g",
"class_name": "BisndMCGSpider",
},
{
"agency": "City of Mandan - City Commission",
"category_id": 26,
"name": "bisnd_mc_cc",
"class_name": "BisndMCCCSpider",
},
{
"agency": "City of Mandan - Airport Authority",
"category_id": 27,
"name": "bisnd_mc_aa",
"class_name": "BisndMCAASpider",
},
{
"agency": "City of Mandan - Architectural Review Commission",
"category_id": 28,
"name": "bisnd_mc_arc",
"class_name": "BisndMCARCSpider",
},
{
"agency": "City of Mandan - Civil Service Commission",
"category_id": 29,
"name": "bisnd_mc_csc",
"class_name": "BisndMCCSCSpider",
},
{
"agency": "City of Mandan - Code Enforcement Appeals Board",
"category_id": 30,
"name": "bisnd_mc_ceab",
"class_name": "BisndMCCEABSpider",
},
{
"agency": "City of Mandan - Community Beautification Committee",
"category_id": 31,
"name": "bisnd_mc_cbc",
"class_name": "BisndMCCBCSpider",
},
{
"agency": "City of Mandan - Board of Equalization",
"category_id": 32,
"name": "bisnd_mc_boe",
"class_name": "BisndMCBOESpider",
},
{
"agency": "City of Mandan - Growth Fund Committee",
"category_id": 33,
"name": "bisnd_mc_gfc",
"class_name": "BisndMCGFCSpider",
},
{
"agency": "City of Mandan - Library Board of Trustees",
"category_id": 34,
"name": "bisnd_mc_lbot",
"class_name": "BisndMCLBOTSpider",
},
{
"agency": "City of Mandan - Parking Authority",
"category_id": 35,
"name": "bisnd_mc_pa",
"class_name": "BisndMCPASpider",
},
{
"agency": "City of Mandan - Planning and Zoning Commission",
"category_id": 36,
"name": "bisnd_mc_pazc",
"class_name": "BisndMCPAZCSpider",
},
{
"agency": "City of Mandan - Remediation Trust",
"category_id": 37,
"name": "bisnd_mc_rt",
"class_name": "BisndMCRTSpider",
},
{
"agency": "City of Mandan - Renaissance Zone Committee",
"category_id": 38,
"name": "bisnd_mc_rzc",
"class_name": "BisndMCRZCSpider",
},
{
"agency": "City of Mandan - Special Assessment Committee",
"category_id": 39,
"name": "bisnd_mc_sac",
"class_name": "BisndMCSACSpider",
},
{
"agency": "City of Mandan - Tree Board",
"category_id": 40,
"name": "bisnd_mc_tb",
"class_name": "BisndMCTBSpider",
},
{
"agency": "City of Mandan - Visitors Committee",
"category_id": 41,
"name": "bisnd_mc_vc",
"class_name": "BisndMCVCSpider",
},
{
"agency": "City of Mandan - Weed Board",
"category_id": 42,
"name": "bisnd_mc_wb",
"class_name": "BisndMCWBSpider",
},
]


def create_spiders():
"""
Dynamically create spider classes using the spider_configs list
and then register them in the global namespace. This approach
is the equivalent of declaring each spider class in the same
file but it is a little more concise.
"""
for config in spider_configs:
class_name = config.pop("class_name")
# We make sure that the class_name is not already in the global namespace
# Because some scrapy CLI commands like `scrapy list` will inadvertently
# declare the spider class more than once otherwise
if class_name not in globals():
spider_class = type(
class_name,
(MCMixin, CityScrapersSpider), # Base classes
{**config}, # Attributes including name, agency, committee_id
)
# Register the class in the global namespace using its class_name
globals()[class_name] = spider_class


create_spiders()
Loading

0 comments on commit 4e1f47b

Please sign in to comment.