-
Notifications
You must be signed in to change notification settings - Fork 0
/
SFPlanning-BS.py
29 lines (24 loc) · 900 Bytes
/
SFPlanning-BS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import requests
from bs4 import BeautifulSoup
import time
all_links = []
res = requests.get('http://default.sfplanning.org/meetingarchive/planning_dept/sf-planning.org/index.aspx-page=1000.html')
soup = BeautifulSoup(res.text, 'lxml')
# build date links
base = 'http://default.sfplanning.org/meetingarchive/planning_dept/sf-planning.org/'
links = [base + a['href'] for a in soup.find('div', {'id': 'ctl00_content_Screen'})('a')]
# collect nested links
for l in links:
res = requests.get(l)
soup = BeautifulSoup(res.text, 'lxml')
links = [base + a['href'] for a in soup.find('div', {'id': 'ctl00_content_Screen'})('a')]
all_links.extend(links)
time.sleep(1)
# save HTML response for all links
for l in all_links:
html = requests.get(l).text
name = l.split('=')[-1]
print(name)
with open('sfplanning/' + name, 'w') as f:
f.write(html)
time.sleep(1)