-
Notifications
You must be signed in to change notification settings - Fork 0
/
appreciation_scrape.py
45 lines (35 loc) · 1.45 KB
/
appreciation_scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import requests
from bs4 import BeautifulSoup
from adLinks import *
from projectScrape import *
import re
import pandas as pd
import numpy as np
from readFile import *
#given user id, return basic profile, a list of appreciations links, a list of work links
def appreacted_list(userURL):
work_id_lst = []
try:
appreciated_list = all_links_of_divs('https://www.behance.net/'+userURL + "/appreciated", 'a','class','AppreciationCover-coverLink-x1o')
for url in appreciated_list:
work_id_lst.append(url.split('?')[0])
user_url_lst = np.array([userURL.split('/')[-1]]*len(work_id_lst))
df = pd.DataFrame({'user_id':user_url_lst,'appreciation_project_url': work_id_lst})
return df
except:
pass
#example: user_list = read_list('card_user_list'), index = user_list.index("['sayedgolamrabbi8960']")
def generate_appreciation_table(user_list, index):
# if a table not initiated, run the below two lines
# tbl = pd.DataFrame(columns = ['user_id', 'appreciation_project_url'])
# tbl.to_csv('card_appreciation.csv', index = False)
for uid in user_list[index+1:]:
print(user_list.index(uid))
user_id = eval(uid)
for uid in user_id:
result_tbl = appreacted_list(uid)
try:
result_tbl.to_csv('card_appreciation.csv', mode='a', index=False, header=False)
print(uid)
except:
print('error', uid)