-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape-tweets.py
59 lines (46 loc) · 2.04 KB
/
scrape-tweets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Load necessary modules and packages
import tweepy
import authenticate
import scrapeoptions
from tweepy import Stream
from tweepy.streaming import StreamListener
# Authenticate with Twitter using tweepy (must create a Twitter Application - check read.me)
auth = tweepy.OAuthHandler(authenticate.api_key, authenticate.api_Secret)
auth.set_access_token(authenticate.access_token, authenticate.access_token_secret)
api = tweepy.API(auth)
# Rewrite the StreamListener() class (for more details - check tweepy documentation and repo - https://github.com/tweepy/tweepy)
# Make use of on_data() method of StreamListener() class in order to obtain raw data from Twitter Streaming API
# More about the Tweet() object - https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object
class MyListener(StreamListener):
def __init__(self,limit=10):
super().__init__()
self.counter = 0
self.limit = limit
def on_data(self, data):
# ! Retweets and quoted tweets are EXCLUDED from the sample
if 'retweeted_status' in data:
return
if 'quoted_status' in data:
return
self.counter += 1
# Append each new tweet to json
try:
with open(scrapeoptions.nameoutputfile, 'a') as f:
f.write(data)
except:
print("Error: on_data.")
# Disconnect if set limit is exceeded
if self.counter < self.limit:
return True
else:
return False
def on_error(self, status):
print(status)
# Disconnect if we are rate limited from Twitter
if status == 420:
return False
# Start the listener
stream_listener = MyListener(limit = scrapeoptions.setlimit) # the default limit is 10 tweets
twitter_stream = Stream(auth,stream_listener)
twitter_stream.filter(track=scrapeoptions.keywords, languages = scrapeoptions.tweetlang)
print("Your tweets are ready.")