-
Notifications
You must be signed in to change notification settings - Fork 0
/
student_tix_scanner.py
172 lines (122 loc) · 5.38 KB
/
student_tix_scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
"""
Quick and dirty tool for scraping NYC classical music student tickets websites
and sending notifications when a new show is available!
Currently supported:
- The Metropolitan Opera (https://www.metopera.org/season/tickets/student-tickets/)
- Carnegie Hall (https://www.carnegiehall.org/Events/Discount-Programs/Student-Tickets)
Currently not working:
- New York Philharmonic - they seem to have removed rush tickets from their website since COVID (https://nyphil.org/rush - also check out Free Fridays that opens on Mondays at noon: https://nyphil.org/concerts-tickets/explore/discounts-and-group-sales/free-fridays)
Jason Manley, jmanley AT rockefeller DOT edu
"""
from bs4 import BeautifulSoup as bs
import urllib.request
from urllib.request import build_opener, HTTPCookieProcessor
import smtplib
from email.message import EmailMessage
import csv
import sys
import time
### UTILITY FUNCTIONS ###
def check_new_and_notify(shows, file, To):
# read old list
old_shows = []
try:
with open(file, mode='r') as csv_file:
reader = csv.reader(csv_file, delimiter=',')
for row in reader:
old_shows.append(row)
except:
old_shows = []
with open(file, mode='a') as csv_file:
writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for show in shows:
if show not in old_shows:
send_notification(show, From=file[:-4], To=To)
writer.writerow(show)
# UPDATE: note this will now keep a ROLLING log of all shows since the start of scraping
# sometimes shows are removed & re-added (particularly by NY Phil) which had resulted in redundant notifications
def send_notification(text, To, From='Me'):
import smtplib
from email.message import EmailMessage # easy enough to send a text: e.g. 1234567890@txt.att.net
message = EmailMessage()
message.set_charset('utf-8')
message.set_content(' '.join(text))
server = smtplib.SMTP('localhost') # You can modify this to send from an SMTP server of your choice
text = ' '.join(text)
from urllib.parse import unquote
server.send_message(message, From, To)
server.quit()
### SITE-SPECIFIC FUNCTIONS ###
def parse_metopera(To):
opener = build_opener(HTTPCookieProcessor())
response = opener.open('https://www.metopera.org/season/tickets/student-tickets/')
html = response.read()
soup = bs(html, "html.parser")
divs = soup.findAll("li", {"class": "calendar-list-day"})
def parse_metopera_listing(tag):
# apologies for this ugly HTML hack to get the data out
date = tag.contents[1].contents[0].replace(',','')
time = tag.contents[3].contents[1].contents[1].contents[0].replace(',','')
opera = tag.contents[3].contents[3].contents[1].contents[1].contents[0].replace(',','')
return [date, time, opera]
shows = []
for tag in divs:
shows.append(parse_metopera_listing(tag))
check_new_and_notify(shows, 'MetOpera.csv', To)
def parse_carnegie(To):
opener = build_opener(HTTPCookieProcessor())
response = opener.open('https://www.carnegiehall.org/Events/Discount-Programs/Student-Insider/Student-Discounts')
html = response.read()
soup = bs(html, "html.parser")
divs = soup.findAll("div", {"class": "ch-events-list-item"})
def parse_carnegie_listing(tag):
# apologies again
date = tag.contents[1].contents[1].contents[0][:3] + ' ' + tag.contents[1].contents[3].contents[0]
show = tag.contents[3].contents[1].contents[3].contents[3].contents[1].contents[1].contents[1].contents[0]
subtitle = tag.contents[3].contents[1].contents[3].contents[3].contents[1].contents[3].contents[0].replace('\r','').replace('\n','').replace(' ','')
if len(subtitle)>1:
show += ' (' + subtitle + ')'
return [date.replace(',',''), show.replace(',','')]
shows = []
for tag in divs:
shows.append(parse_carnegie_listing(tag))
check_new_and_notify(shows, 'Carnegie.csv', To)
def parse_nyphil(To):
opener = build_opener(HTTPCookieProcessor())
response = opener.open('https://nyphil.org/rush')
html = response.read()
soup = bs(html, "html.parser")
divs = soup.findAll("div", {"id": "main"})
shows = divs[0].findAll("div", {"id": "content"})[0].contents[7].contents[8:-6]
results = []
for i in range(len(shows)):
try:
if len(shows[i].contents)>0:
show = shows[i].contents[0]
i += 1
date = shows[i].split('—')[1][1:]
results.append([date.replace(',',''),show.replace(',','')])
except:
pass
check_new_and_notify(results, 'NyPhil.csv', To)
### COMMAND LINE SCRIPT ###
if __name__ == "__main__":
args = sys.argv[1:]
To = args[0]
while True:
if '-met' in args:
try:
parse_metopera(To)
except:
print('met error')
if '-carnegie' in args:
try:
parse_carnegie(To)
except:
print('carnegie error')
if '-nyphil' in args:
try:
parse_nyphil(To)
except:
print('nyphil error')
time.sleep(300) # sorry for hard coding ;)