-
Notifications
You must be signed in to change notification settings - Fork 2
/
devpost.py
78 lines (62 loc) · 2.15 KB
/
devpost.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup
baseUrl = 'http://hackumass-ii.devpost.com'
subsUrl = baseUrl + '//submissions?page='
def main():
count = 1
fieldsList = []
while True:
subsObj = BeautifulSoup(urlopen(subsUrl + str(count)), 'html.parser')
submissions = subsObj.findAll('a', {'class':'block-wrapper-link fade link-to-software'})
if len(submissions) != 0:
for submission in submissions:
subUrl = submission.attrs['href']
subObj = BeautifulSoup(urlopen(subUrl), 'html.parser')
title = getTitle(subObj)
subtitle = getSubtitle(subObj, title)
images = getImages(subObj)
builtWith = getBuiltWith(subObj)
fieldsList.append([title.get_text().strip(), subtitle.get_text().strip(), images, builtWith])
count = count + 1
else:
break
writeToCSV(fieldsList)
def getTitle(subObj):
title = subObj.find('h1', {'id':'app-title'})
return title
def getSubtitle(subObj, title):
subtitle = title.parent.find('p')
return subtitle
def getImages(subObj):
imgList = []
try:
images = subObj.find('div', {'id':'gallery'}).findAll('li')
for image in images:
try:
imgSrc = image.find('img')['src']
imgList.append(imgSrc)
except:
print('Non-Image Link Found')
except:
print('No Gallery Found')
return imgList
def getBuiltWith(subObj):
builtWithList = []
try:
builtWith = subObj.find('div', {'id':'built-with'}).findAll('span', {'class':'cp-tag'})
for tool in builtWith:
builtWithList.append(tool.get_text().strip())
except:
print('No Tools Found')
return builtWithList
def writeToCSV(fieldsList):
csvFile = open('data/data.csv', 'wt')
try:
writer = csv.writer(csvFile)
writer.writerow(('Title', 'Subtitle', 'Images', 'Built With'))
for row in fieldsList:
writer.writerow((row[0], row[1], row[2], row[3]))
finally:
csvFile.close()
main()