generated from City-Bureau/city-scrapers-template
-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
86e3f9b
commit 4e1f47b
Showing
5 changed files
with
2,413 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
from datetime import datetime | ||
|
||
from city_scrapers_core.constants import ( | ||
BOARD, | ||
CITY_COUNCIL, | ||
COMMISSION, | ||
COMMITTEE, | ||
NOT_CLASSIFIED, | ||
) | ||
from city_scrapers_core.items import Meeting | ||
from city_scrapers_core.spiders import CityScrapersSpider | ||
from dateutil.relativedelta import relativedelta | ||
from scrapy import Request | ||
|
||
|
||
class MCMixinMeta(type): | ||
""" | ||
Metaclass that enforces the implementation of required static | ||
variables in child classes. | ||
""" | ||
|
||
def __init__(cls, name, bases, dct): | ||
required_static_vars = ["agency", "name", "category_id"] | ||
missing_vars = [var for var in required_static_vars if var not in dct] | ||
|
||
if missing_vars: | ||
missing_vars_str = ", ".join(missing_vars) | ||
raise NotImplementedError( | ||
f"{name} must define the following static variable(s): {missing_vars_str}." # noqa | ||
) | ||
super().__init__(name, bases, dct) | ||
|
||
|
||
class MCMixin(CityScrapersSpider, metaclass=MCMixinMeta): | ||
""" | ||
Spider mixin for City of Mandan in Mandan, ND. This mixin | ||
is intended to be used as a base class for spiders that scrape meeting | ||
data from the city's website. | ||
""" | ||
|
||
base_url = "https://mandannd.api.civicclerk.com" | ||
timezone = "America/Chicago" | ||
name = None | ||
agency = None | ||
category_id = None | ||
|
||
def start_requests(self): | ||
""" | ||
sdfsdfsdf | ||
""" | ||
# Calculate dates for one month prior and one year ahead | ||
today = datetime.today() | ||
one_month_prior = today - relativedelta(months=1) | ||
one_year_ahead = today + relativedelta(months=6) | ||
|
||
# Format dates like "2024-03-01T00:00:00.000Z" | ||
meeting_date_from = one_month_prior.strftime("%Y-%m-%dT%H:00:00Z") | ||
meeting_date_to = one_year_ahead.strftime("%Y-%m-%dT%H:00:00Z") | ||
|
||
# build the URL | ||
url = f"{self.base_url}/v1/Events?$filter=categoryId+in+({self.category_id})+and+startDateTime+ge+{meeting_date_from}+and+startDateTime+le+{meeting_date_to}&$orderby=startDateTime" # noqa | ||
|
||
yield Request(url, callback=self.parse) | ||
|
||
def parse(self, response): | ||
""" | ||
Parse a list of meetings from the response. | ||
""" | ||
print("donkey", response) | ||
items = response.json() | ||
if not items or len(items) == 0 or "value" not in items: | ||
self.logger.warning("No meetings found") | ||
return | ||
for item in items["value"]: | ||
meeting = Meeting( | ||
title=item["eventName"], | ||
description=item["eventDescription"], | ||
classification=self._parse_classification(item["categoryName"]), | ||
start=self._parse_start(item["startDateTime"]), | ||
end=None, | ||
all_day=False, | ||
time_notes="", | ||
location=self._parse_location(item["eventLocation"]), | ||
links=self._parse_links(item), | ||
source=response.url, | ||
) | ||
meeting["status"] = self._get_status(meeting) | ||
meeting["id"] = self._get_id(meeting) | ||
yield meeting | ||
|
||
def _parse_classification(self, name): | ||
""" | ||
Parse or generate classification from allowed options. | ||
""" | ||
if "city council" in name.lower(): | ||
return CITY_COUNCIL | ||
if "board" in name.lower(): | ||
return BOARD | ||
if "commission" in name.lower(): | ||
return COMMISSION | ||
if "committee" in name.lower(): | ||
return COMMITTEE | ||
return NOT_CLASSIFIED | ||
|
||
def _parse_start(self, start): | ||
""" | ||
Parse the start date and time. | ||
""" | ||
return datetime.strptime(start, "%Y-%m-%dT%H:%M:%SZ") | ||
|
||
def _parse_location(self, location): | ||
""" | ||
Parse or generate location. | ||
""" | ||
if not location: | ||
return { | ||
"name": "", | ||
"address": "", | ||
} | ||
# build location from address1, city, state, zipCode | ||
address = location["address1"] | ||
address_fields = ["address2", "city", "state", "zipCode"] | ||
for field in address_fields: | ||
if location[field]: | ||
address += f", {location[field].strip()}" | ||
return { | ||
"name": "", | ||
"address": address, | ||
} | ||
|
||
def _parse_links(self, item): | ||
""" | ||
Parse published files into links. | ||
""" | ||
links = [] | ||
if "publishedFiles" in item and len(item["publishedFiles"]) > 0: | ||
for file in item["publishedFiles"]: | ||
links.append( | ||
{ | ||
"title": file["name"], | ||
"href": f"https://mandannd.api.civicclerk.com/v1/Meetings/GetMeetingFileStream(fileId={file['fileId']},plainText=false)", # noqa | ||
} | ||
) | ||
return links |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
from city_scrapers_core.spiders import CityScrapersSpider | ||
|
||
from city_scrapers.mixins.mc import MCMixin | ||
|
||
spider_configs = [ | ||
{ | ||
"agency": "City of Mandan - General", | ||
"category_id": 24, | ||
"name": "bisnd_mc_g", | ||
"class_name": "BisndMCGSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - City Commission", | ||
"category_id": 26, | ||
"name": "bisnd_mc_cc", | ||
"class_name": "BisndMCCCSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Airport Authority", | ||
"category_id": 27, | ||
"name": "bisnd_mc_aa", | ||
"class_name": "BisndMCAASpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Architectural Review Commission", | ||
"category_id": 28, | ||
"name": "bisnd_mc_arc", | ||
"class_name": "BisndMCARCSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Civil Service Commission", | ||
"category_id": 29, | ||
"name": "bisnd_mc_csc", | ||
"class_name": "BisndMCCSCSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Code Enforcement Appeals Board", | ||
"category_id": 30, | ||
"name": "bisnd_mc_ceab", | ||
"class_name": "BisndMCCEABSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Community Beautification Committee", | ||
"category_id": 31, | ||
"name": "bisnd_mc_cbc", | ||
"class_name": "BisndMCCBCSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Board of Equalization", | ||
"category_id": 32, | ||
"name": "bisnd_mc_boe", | ||
"class_name": "BisndMCBOESpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Growth Fund Committee", | ||
"category_id": 33, | ||
"name": "bisnd_mc_gfc", | ||
"class_name": "BisndMCGFCSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Library Board of Trustees", | ||
"category_id": 34, | ||
"name": "bisnd_mc_lbot", | ||
"class_name": "BisndMCLBOTSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Parking Authority", | ||
"category_id": 35, | ||
"name": "bisnd_mc_pa", | ||
"class_name": "BisndMCPASpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Planning and Zoning Commission", | ||
"category_id": 36, | ||
"name": "bisnd_mc_pazc", | ||
"class_name": "BisndMCPAZCSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Remediation Trust", | ||
"category_id": 37, | ||
"name": "bisnd_mc_rt", | ||
"class_name": "BisndMCRTSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Renaissance Zone Committee", | ||
"category_id": 38, | ||
"name": "bisnd_mc_rzc", | ||
"class_name": "BisndMCRZCSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Special Assessment Committee", | ||
"category_id": 39, | ||
"name": "bisnd_mc_sac", | ||
"class_name": "BisndMCSACSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Tree Board", | ||
"category_id": 40, | ||
"name": "bisnd_mc_tb", | ||
"class_name": "BisndMCTBSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Visitors Committee", | ||
"category_id": 41, | ||
"name": "bisnd_mc_vc", | ||
"class_name": "BisndMCVCSpider", | ||
}, | ||
{ | ||
"agency": "City of Mandan - Weed Board", | ||
"category_id": 42, | ||
"name": "bisnd_mc_wb", | ||
"class_name": "BisndMCWBSpider", | ||
}, | ||
] | ||
|
||
|
||
def create_spiders(): | ||
""" | ||
Dynamically create spider classes using the spider_configs list | ||
and then register them in the global namespace. This approach | ||
is the equivalent of declaring each spider class in the same | ||
file but it is a little more concise. | ||
""" | ||
for config in spider_configs: | ||
class_name = config.pop("class_name") | ||
# We make sure that the class_name is not already in the global namespace | ||
# Because some scrapy CLI commands like `scrapy list` will inadvertently | ||
# declare the spider class more than once otherwise | ||
if class_name not in globals(): | ||
spider_class = type( | ||
class_name, | ||
(MCMixin, CityScrapersSpider), # Base classes | ||
{**config}, # Attributes including name, agency, committee_id | ||
) | ||
# Register the class in the global namespace using its class_name | ||
globals()[class_name] = spider_class | ||
|
||
|
||
create_spiders() |
Oops, something went wrong.