Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🏗️ Build mixin + spiders: bisnd_mc #8

Merged
merged 5 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,5 @@ src/
logs/*.log
travis/*.json

# output files: local gitignore added to city_scrapers/local_outputs/
# output files
*.csv
144 changes: 144 additions & 0 deletions city_scrapers/mixins/mc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from datetime import datetime

from city_scrapers_core.constants import (
BOARD,
CITY_COUNCIL,
COMMISSION,
COMMITTEE,
NOT_CLASSIFIED,
)
from city_scrapers_core.items import Meeting
from city_scrapers_core.spiders import CityScrapersSpider
from dateutil.relativedelta import relativedelta
from scrapy import Request


class MCMixinMeta(type):
"""
Metaclass that enforces the implementation of required static
variables in child classes.
"""

def __init__(cls, name, bases, dct):
required_static_vars = ["agency", "name", "category_id"]
missing_vars = [var for var in required_static_vars if var not in dct]

if missing_vars:
missing_vars_str = ", ".join(missing_vars)
raise NotImplementedError(
f"{name} must define the following static variable(s): {missing_vars_str}." # noqa
)
super().__init__(name, bases, dct)


class MCMixin(CityScrapersSpider, metaclass=MCMixinMeta):
"""
Spider mixin for City of Mandan in Mandan, ND. This mixin
is intended to be used as a base class for spiders that scrape meeting
data from the city's website.
"""

base_url = "https://mandannd.api.civicclerk.com"
timezone = "America/Chicago"
name = None
agency = None
category_id = None

def start_requests(self):
"""
Construct and yield a request to the API endpoint.
"""
# Calculate dates for one month prior and one year ahead
today = datetime.today()
one_month_prior = today - relativedelta(months=1)
half_year_ahead = today + relativedelta(months=6)

# Format dates like "2024-03-01T00:00:00.000Z"
meeting_date_from = one_month_prior.strftime("%Y-%m-%dT%H:00:00Z")
meeting_date_to = half_year_ahead.strftime("%Y-%m-%dT%H:00:00Z")

# build the URL
url = f"{self.base_url}/v1/Events?$filter=categoryId+in+({self.category_id})+and+startDateTime+ge+{meeting_date_from}+and+startDateTime+le+{meeting_date_to}&$orderby=startDateTime" # noqa

yield Request(url, callback=self.parse)

def parse(self, response):
"""
Parse a list of meetings from the response.
"""
print("donkey", response)
items = response.json()
if not items or len(items) == 0 or "value" not in items:
self.logger.warning("No meetings found")
return
for item in items["value"]:
meeting = Meeting(
title=item["eventName"],
description=item["eventDescription"],
classification=self._parse_classification(item["categoryName"]),
start=self._parse_start(item["startDateTime"]),
end=None,
all_day=False,
time_notes="",
location=self._parse_location(item["eventLocation"]),
links=self._parse_links(item),
source=response.url,
)
meeting["status"] = self._get_status(meeting)
meeting["id"] = self._get_id(meeting)
yield meeting

def _parse_classification(self, name):
"""
Parse or generate classification from allowed options.
"""
if "city council" in name.lower():
return CITY_COUNCIL
if "board" in name.lower():
return BOARD
if "commission" in name.lower():
return COMMISSION
if "committee" in name.lower():
return COMMITTEE
return NOT_CLASSIFIED

def _parse_start(self, start):
"""
Parse the start date and time.
"""
return datetime.strptime(start, "%Y-%m-%dT%H:%M:%SZ")

def _parse_location(self, location):
"""
Parse or generate location.
"""
if not location:
return {
"name": "",
"address": "",
}
# build location from address1, city, state, zipCode
address = location["address1"]
address_fields = ["address2", "city", "state", "zipCode"]
for field in address_fields:
if location[field]:
address += f", {location[field].strip()}"
return {
"name": "",
"address": address,
}

def _parse_links(self, item):
"""
Parse published files into links.
"""
links = []
if "publishedFiles" in item and len(item["publishedFiles"]) > 0:
for file in item["publishedFiles"]:
links.append(
{
"title": file["name"],
"href": f"{self.base_url}/v1/Meetings/GetMeetingFileStream(fileId={file['fileId']},plainText=false)", # noqa
}
)
return links
139 changes: 139 additions & 0 deletions city_scrapers/spiders/bisnd_mc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from city_scrapers_core.spiders import CityScrapersSpider

from city_scrapers.mixins.mc import MCMixin

spider_configs = [
{
"agency": "City of Mandan - General",
"category_id": 24,
"name": "bisnd_mc_g",
"class_name": "BisndMCGSpider",
},
{
"agency": "City of Mandan - City Commission",
"category_id": 26,
"name": "bisnd_mc_cc",
"class_name": "BisndMCCCSpider",
},
{
"agency": "City of Mandan - Airport Authority",
"category_id": 27,
"name": "bisnd_mc_aa",
"class_name": "BisndMCAASpider",
},
{
"agency": "City of Mandan - Architectural Review Commission",
"category_id": 28,
"name": "bisnd_mc_arc",
"class_name": "BisndMCARCSpider",
},
{
"agency": "City of Mandan - Civil Service Commission",
"category_id": 29,
"name": "bisnd_mc_csc",
"class_name": "BisndMCCSCSpider",
},
{
"agency": "City of Mandan - Code Enforcement Appeals Board",
"category_id": 30,
"name": "bisnd_mc_ceab",
"class_name": "BisndMCCEABSpider",
},
{
"agency": "City of Mandan - Community Beautification Committee",
"category_id": 31,
"name": "bisnd_mc_cbc",
"class_name": "BisndMCCBCSpider",
},
{
"agency": "City of Mandan - Board of Equalization",
"category_id": 32,
"name": "bisnd_mc_boe",
"class_name": "BisndMCBOESpider",
},
{
"agency": "City of Mandan - Growth Fund Committee",
"category_id": 33,
"name": "bisnd_mc_gfc",
"class_name": "BisndMCGFCSpider",
},
{
"agency": "City of Mandan - Library Board of Trustees",
"category_id": 34,
"name": "bisnd_mc_lbot",
"class_name": "BisndMCLBOTSpider",
},
{
"agency": "City of Mandan - Parking Authority",
"category_id": 35,
"name": "bisnd_mc_pa",
"class_name": "BisndMCPASpider",
},
{
"agency": "City of Mandan - Planning and Zoning Commission",
"category_id": 36,
"name": "bisnd_mc_pazc",
"class_name": "BisndMCPAZCSpider",
},
{
"agency": "City of Mandan - Remediation Trust",
"category_id": 37,
"name": "bisnd_mc_rt",
"class_name": "BisndMCRTSpider",
},
{
"agency": "City of Mandan - Renaissance Zone Committee",
"category_id": 38,
"name": "bisnd_mc_rzc",
"class_name": "BisndMCRZCSpider",
},
{
"agency": "City of Mandan - Special Assessment Committee",
"category_id": 39,
"name": "bisnd_mc_sac",
"class_name": "BisndMCSACSpider",
},
{
"agency": "City of Mandan - Tree Board",
"category_id": 40,
"name": "bisnd_mc_tb",
"class_name": "BisndMCTBSpider",
},
{
"agency": "City of Mandan - Visitors Committee",
"category_id": 41,
"name": "bisnd_mc_vc",
"class_name": "BisndMCVCSpider",
},
{
"agency": "City of Mandan - Weed Board",
"category_id": 42,
"name": "bisnd_mc_wb",
"class_name": "BisndMCWBSpider",
},
]


def create_spiders():
"""
Dynamically create spider classes using the spider_configs list
and then register them in the global namespace. This approach
is the equivalent of declaring each spider class in the same
file but it is a little more concise.
"""
for config in spider_configs:
class_name = config.pop("class_name")
# We make sure that the class_name is not already in the global namespace
# Because some scrapy CLI commands like `scrapy list` will inadvertently
# declare the spider class more than once otherwise
if class_name not in globals():
spider_class = type(
class_name,
(MCMixin, CityScrapersSpider), # Base classes
{**config}, # Attributes including name, agency, committee_id
)
# Register the class in the global namespace using its class_name
globals()[class_name] = spider_class


create_spiders()
Loading
Loading