-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBot_REST.py
230 lines (177 loc) · 8.91 KB
/
Bot_REST.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import Setup
import tweepy
import time
import Reddit
import HPlus_Pedia
import NYT
from hplusbot_database import db_get_last_tweet_id, db_store_last_tweet_id
from twitterbot_utilities import to_json, word_list, retrieve_last_seen_id
from apscheduler.schedulers.background import BackgroundScheduler
LAST_ID_FILE = 'data/Last_Tweet_ID.txt'
BOT_USERNAME = "HPlusBot"
LIKES_TO_RETWEET = 7
def wiki_post_tweet(api):
api.update_status(status=HPlus_Pedia.random_page())
return
def nyt_post_tweet(api):
api.update_status(status=NYT.scrapper())
return
def retweet(api):
"""
The function retweets all the tweets (no retweets) of the users that the bot follows.
Specifically retweets those who comply to the criteria of keywords and/or hashtags, have a certain minimum number
of likes, and were posted after a certain tweet (that is stored in Last_Tweet_ID.txt). At the end, the ID of the
last tweet retweeted is stored in the .txt to serve as the starting point of the next call to the function.
"""
last_retweet_id = db_get_last_tweet_id()
most_recent_status_id = last_retweet_id
users_followed = api.friends_ids(screen_name=BOT_USERNAME) # List of IDs of users that the bot follows
for user in users_followed:
tweets_list = tweepy.Cursor(api.user_timeline, id=user, tweet_mode='extended',
since_id=last_retweet_id).items()
for tweet in tweets_list:
parsed_tweet = to_json(tweet)
tweet_text = word_list(parsed_tweet["full_text"])
if not tweet_text[0] == "rt" and parsed_tweet["favorite_count"] >= LIKES_TO_RETWEET:
# Checks if the status is not a retweet and if it has at least x likes required
if any(elem in Setup.HASHTAGS for elem in tweet_text) or \
any(elem in Setup.KEYWORDS for elem in tweet_text):
# Checks if the tweet matches the criteria of keywords or hashtags (at least one element of any
# of those)
try:
api.retweet(id=parsed_tweet["id"])
print(f'Retweet: {parsed_tweet["id"]}')
except:
print("Already retweeted")
time.sleep(5)
# If there is a lot of status to go through, it´s better to avoid the api limit rate with sleep.
if parsed_tweet["id"] > most_recent_status_id:
most_recent_status_id = parsed_tweet["id"]
print("Retweeting done!")
db_store_last_tweet_id(most_recent_status_id)
if __name__ == "__main__":
api = Setup.setup_twitter()
# As the scheduler is not the only thing running in our process (we also use the streaming from Reddit),
# we want the scheduler to run in the background inside the script along other functionalities.
scheduler = BackgroundScheduler()
scheduler.add_job(retweet, 'interval', args=[api], hours=3)
scheduler.add_job(wiki_post_tweet, 'interval', args=[api], hours=5)
scheduler.add_job(nyt_post_tweet, 'interval', args=[api], hours=12)
scheduler.start()
Reddit.start_stream(api)
#
#
# WARNING!
# OLD METHODS AHEAD!!
# ONLY FOR REFERENCE, NOT IN USE ACTUALLY
#
#
def ratio_of_likes(api):
"""
The purpose of this function is to determinate a certain number of likes that is going to be used as a criteria
that the tweets need to match to be retweeted. To do so, the function iterates trough the tweets of the users
that the bot follows and gets the likes of those tweets. The function selects only the tweets that were
posted after a certain tweet (that is stored in Last_Tweet_ID.txt) and specifically those who comply to the criteria
of keywords and/or hashtags.
Two dictionaries store the data: one for the number of tweets that have between 0-49, between 50-499, and more
than 500 likes; and another for the total of likes between those ranges. The function then prints the ranges,
number of tweets in each range, the total of likes in each range, and the ratio
(total of likes / number of tweets in that range).
"""
total = {
"500<<": 0,
"50-499": 0,
"0-49": 0
}
quantity = {
"500<<": 0,
"50-499": 0,
"0-49": 0
}
total_tweets = 0
last_retweet_id = retrieve_last_seen_id(LAST_ID_FILE)
users_followed = api.friends_ids(screen_name="HPlusBot") # List of IDs of users that the bot follows
for user in users_followed:
tweets_list = tweepy.Cursor(api.user_timeline, id=user, tweet_mode='extended',
since_id=last_retweet_id).items()
for tweet in tweets_list:
parsed = to_json(tweet)
total_tweets += 1
tweet_text = word_list(parsed["full_text"])
if not tweet_text[0] == "rt": # Checks if the status is not a retweet
if any(elem in Setup.HASHTAGS for elem in tweet_text) or \
any(elem in Setup.KEYWORDS for elem in tweet_text):
if parsed["favorite_count"] >= 500:
total["500<<"] += parsed["favorite_count"]
quantity["500<<"] += 1
elif parsed["favorite_count"] >= 50:
total["50-499"] += parsed["favorite_count"]
quantity["50-499"] += 1
else:
total["0-49"] += parsed["favorite_count"]
quantity["0-49"] += 1
print("Done ", total_tweets, parsed["user"]["screen_name"])
print(total_tweets)
if quantity["500<<"] > 0:
print("500>> = ", quantity["500<<"])
print("Total = ", total["500<<"])
print("Ratio = ", total["500<<"] / quantity["500<<"], "\n")
if quantity["50-499"] > 0:
print("50-499 = ", quantity["50-499"])
print("Total = ", total["50-499"])
print("Ratio = ", total["50-499"] / quantity["50-499"], "\n")
if quantity["0-49"] > 0:
print("0-49 = ", quantity["0-49"])
print("Total = ", total["0-49"])
print("Ratio = ", total["0-49"] / quantity["0-49"], "\n")
def check_recent_tweets(api, username):
"""
With this function we can make sure that the user passed as parameter has made tweets or retweets
about the subjects of our interest (passed here as the Keywords and Hashtags in Setup.py). A minimum of x
tweets (in this case, 2) must be reached for the user be eligible to be followed.
"""
tweets_list = api.user_timeline(username) # The variable holds an object that contains the last 20 tweets in their
# timeline
counter = 0
for tweet in tweets_list:
# The info that the API gave us about the user is messy.
# We need a json to properly access the data.
parsed = to_json(tweet)
tweet_text = word_list(parsed["text"]) # A list that contains all the words of the tweet
if any(elem in Setup.KEYWORDS for elem in tweet_text) or \
any(elem in Setup.HASHTAGS for elem in tweet_text):
counter += 1
# At least 2 recent tweets must match our criteria of keywords and hashtags
return True if counter >= 2 else False
def search_for_users(api):
"""
This functions search for the first 200 users in Twitter (in the same
way as the Find People button on Twitter.com) using the query "transhumanism".
The criteria used is: the user must have at least 1000 followers and the bot account
does not follow the user. Then, if that criteria is met, then the check_tweets() is called.
The info of users that pass the check (their screen_name, number of followers, and profile
description) are dumped in a dictionary and after that in a list. The result that appears in
the terminal is a filtered list of users that possibly could be of our interested
(we have to check manually in the webpage).
[The first time calling this function, the output was of more or less 10% of number of items]
"""
user_list = tweepy.Cursor(api.search_users, q='transhumanism').items(200)
users_data = [] # Here we are going to store the users information
for user in user_list:
parsed = to_json(user)
try:
if parsed["followers_count"] > 1000 and not parsed["following"]:
if check_recent_tweets(api, parsed["screen_name"]):
user = {
"name": parsed["screen_name"],
"followers": parsed["followers_count"],
"description": parsed["description"]
}
users_data.append(user)
print("Data fetched.")
except: # An error can occur if the user has protected tweets.
print(f"Failed to run the command on ", parsed["screen_name"], "skipping...\n")
continue
print("Done\n")
for user in users_data:
print(user, "\n")