-
Notifications
You must be signed in to change notification settings - Fork 0
/
pytter.py
100 lines (79 loc) · 3.28 KB
/
pytter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# -*- coding:utf-8 -*-
import tweepy
import re
import html
from markovGenerator import MarkovGenerator
class Twitter:
def __init__(self, consumer_key = "", consumer_secret = "", access_token = "", access_secret = ""):
# Config
# TODO: Make a config file
self.pattern_username = r"(\.?@[a-zA-Z0-9_ ]{1,15})" # https://support.twitter.com/articles/20065832#error
# <user>
self.consumer_key = consumer_key
self.consumer_secret = consumer_secret
self.access_token = access_token
self.access_secret = access_secret
# Tweepy connection
self.auth = tweepy.OAuthHandler(self.consumer_key, self.consumer_secret)
self.auth.set_access_token(self.access_token, self.access_secret)
self.api = tweepy.API(self.auth)
# Tweets
self.tweets = [] # Contains the last bunch of tweets retrieved with self.get_statuses()
"""
Get the number of remaining requests before we are limited
"""
def get_remaining_requests(self):
return self.api.rate_limit_status()["resources"]["statuses"]["/statuses/home_timeline"]["remaining"]
"""
Return the most recent and cleaned (see self.clean_tweet and self.is_nice_tweet) statuses from the user's timeline (up to 800)
"""
def get_statuses(self):
result = []
sinceId = -1 # We can't retrieve all the statuses in one bunch
passes = 0
# Try to get 800 tweets (we can't get more with /statuses/home_timeline)
# See: https://dev.twitter.com/rest/reference/get/statuses/home_timeline
while len(result) < 800 and self.get_remaining_requests() > 0:
if sinceId == None:
break
if sinceId == -1:
statuses = self.api.home_timeline(count=200, tweet_mode="extended")
else:
statuses = self.api.home_timeline(count=200, max_id=sinceId, tweet_mode="extended")
for status in statuses:
text = ""
# If this is a retweet, the fetched tweet may be truncated
try:
text = status.retweeted_status.full_text
except:
text = status.full_text
text = self.clean_tweet(text)
if self.is_nice_tweet(text): result.append(text)
backId = sinceId
sinceId = statuses.max_id
passes += 1
self.tweets = result
return result
"""
Check if we should add the current tweet to our database
"""
def is_nice_tweet(self, tweet):
# We won't add tweet that only contains one link
if tweet.startswith(("http", "https", "www")) and len(tweet.split()) == 1:
return False
# An empty tweet is not a nice tweet
if tweet == "": return False
# Validated
return True
def clean_tweet(self, tweet):
# Escape HTML tags
tweet = html.unescape(tweet)
# Remove @ usernames (only the first one)
while tweet.startswith(("@", ".@")):
tweet = re.sub(self.pattern_username, "", tweet, count=1).strip()
return tweet
def post_tweet(self, tweet):
self.api.update_status(tweet)
if __name__ == '__main__':
print("Please execute main.py")
exit()