-
Notifications
You must be signed in to change notification settings - Fork 0
/
twaiter.py
executable file
·53 lines (42 loc) · 1.88 KB
/
twaiter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# based on http://badhessian.org/2012/10/collecting-real-time-twitter-data-with-the-streaming-api/
# with modifications by http://github.com/marciw
# requires Tweepy https://github.com/tweepy/tweepy
from tweepy import StreamListener
import json, time, sys
class TWaiter(StreamListener):
# see Tweepy for more info
def __init__(self, api = None, label = 'default_collection'):
self.api = api or API()
self.counter = 0
self.label = label
self.output = open(label + '.' + time.strftime('%b%d-%H%M') + '.txt', 'w')
self.deleted = open('deleted_tweets.txt', 'a')
def on_data(self, data):
# the presence of 'in_reply_to_status' indicates a "normal" tweet
# the presence of 'delete' indicates a tweet that was deleted after posting
if 'in_reply_to_status' in data:
self.on_status(data)
elif 'delete' in data:
delete = json.loads(data)['delete']['status']
if self.on_delete(delete['id'], delete['user_id']) is False:
return False
def on_status(self, status):
# for now we want only the text of the tweet and the id.
text = str(json.dumps(json.loads(status)['text']))
id = str(json.dumps(json.loads(status)['id_str']))
self.output.write("id:" + " " + id[1:-1] + ", " + "text:" + " " + text[1:-1] + "\n")
self.counter += 1
# stop at 500 tweets for testing
# increase this number to get bigger data!
if self.counter >= 500:
self.output.close()
print "Finished collecting tweets."
sys.exit()
# should exit more gracefully.
return
def on_delete(self, status_id, user_id):
self.deleted.write(str(status_id) + "\n")
return
def on_error(self, status_code):
sys.stderr.write('Error: ' + str(status_code) + "\n")
return False