-
Notifications
You must be signed in to change notification settings - Fork 68
/
requester.py
112 lines (96 loc) · 4.45 KB
/
requester.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import re
import requests
import time
from utils import _init_request_vars
def _get_headers(pageurl):
'''
Send a request to get cookieid as headers.
'''
pageurl = re.sub('www', 'm', pageurl)
resp = requests.get(pageurl)
headers = {'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'en'}
headers['cookie'] = '; '.join(['{}={}'.format(cookieid, resp.cookies.get_dict()[
cookieid]) for cookieid in resp.cookies.get_dict()])
# headers['cookie'] = headers['cookie'] + '; locale=en_US'
return headers
def _get_homepage(pageurl, headers):
'''
Send a request to get the homepage response
'''
pageurl = re.sub('/$', '', pageurl)
timeout_cnt = 0
while True:
try:
homepage_response = requests.get(
pageurl, headers=headers, timeout=3)
return homepage_response
except:
time.sleep(5)
timeout_cnt = timeout_cnt + 1
if timeout_cnt > 20:
class homepage_response():
text = 'Sorry, something went wrong.'
return homepage_response
def _get_pageabout(homepage_response, entryPoint, headers):
'''
Send a request to get the about page response
'''
pageurl = re.sub('/$', '', homepage_response.url)
pageabout = requests.get(pageurl + '/about', headers=headers)
return pageabout
def _get_pagetransparency(homepage_response, entryPoint, headers):
'''
Send a request to get the transparency page response
'''
pageurl = re.sub('/$', '', homepage_response.url)
if entryPoint in ['ProfilePlusCometLoggedOutRouteRoot.entrypoint']:
transparency_response = requests.get(
pageurl + '/about_profile_transparency', headers=headers)
return transparency_response
def _get_posts(headers, identifier, entryPoint, docid, cursor):
'''
Send a request to get new posts from fanspage/group.
'''
if entryPoint in ['nojs']:
params = {'page_id': identifier,
'cursor': str({"timeline_cursor": cursor,
"timeline_section_cursor": '{}',
"has_next_page": 'true'}),
'surface': 'www_pages_posts',
'unit_count': 10,
'__a': '1'}
resp = requests.get(url='https://www.facebook.com/pages_reaction_units/more/',
params=params)
else: # entryPoint in ['CometSinglePageHomeRoot.entrypoint', 'ProfilePlusCometLoggedOutRouteRoot.entrypoint', 'CometGroupDiscussionRoot.entrypoint']
data = {'variables': str({'cursor': cursor,
'id': identifier,
'count': 3}),
'doc_id': docid}
resp = requests.post(url='https://www.facebook.com/api/graphql/',
data=data,
headers=headers)
return resp
if __name__ == '__main__':
pageurl = 'https://www.facebook.com/ec.ltn.tw/'
pageurl = 'https://www.facebook.com/Gooaye'
pageurl = 'https://www.facebook.com/groups/pythontw'
pageurl = 'https://www.facebook.com/hatendhu'
headers = _get_headers(pageurl)
homepage_response = _get_homepage(pageurl=pageurl, headers=headers)
df, cursor, max_date, break_times = _init_request_vars()
cursor = 'AQHRlIMW9sczmHGnME47XeSdDNj6Jk9EcBOMlyxBdMNbZHM7dwd0rn8wsaxQxeXUsuhKVaMgVwPHb9YS9468INvb5yw2osoEmXd_sMXvj8rLhmBxeaJucMSPIDux_JuiHToC'
cursor = 'AQHRixL5fPMA_nM-78jGg4LohG3M4a2-YQR6WSaWOTiqPRJ1dOGchYRzp1wdDtusNd-5FkCPXwByL_kZM2iyLIz1XHB8WIEzHYXTU3vQzviOI9GexNv__RPn1xnFJZddnjX3'
from paser import _parse_entryPoint, _parse_identifier, _parse_docid, _parse_composite_graphql
entryPoint = _parse_entryPoint(homepage_response)
identifier = _parse_identifier(entryPoint, homepage_response)
docid = _parse_docid(entryPoint, homepage_response)
df, cursor, max_date, break_times = _init_request_vars(cursor='')
resp = _get_posts(headers=headers, identifier=identifier,
entryPoint=entryPoint, docid=docid, cursor=cursor)
ndf, max_date, cursor = _parse_composite_graphql(resp)
resp.json()
ndf
max_date
cursor
# print(len(resp.text))