-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbirdwatching.py
150 lines (119 loc) · 3.2 KB
/
birdwatching.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import tweepy
import importlib
import os
import time
import threading
import sys
import re
from secrets import consumer_key, consumer_secret, access_token, access_token_secret
# Thread List
threads = []
# Tweet Fetcher
class Fetcher (threading.Thread):
def __init__(self, accounts):
threading.Thread.__init__(self)
self.accounts=accounts
def run(self):
fetch(self.accounts)
def get_all_tweets(screen_name):
alltweets = []
new_tweets = api.user_timeline(screen_name = screen_name, count=200)
alltweets.extend(new_tweets)
oldest = alltweets[-1]['id'] - 1
while len(new_tweets) > 0:
new_tweets = api.user_timeline(screen_name = screen_name, count=200, max_id=oldest)
alltweets.extend(new_tweets)
oldest = alltweets[-1]['id'] - 1
print("%s tweets downloaded so far for %s..." % ((len(alltweets)), "@" + screen_name))
return alltweets
def get_accounts():
acc=[]
with open('accounts.txt') as f:
for l in f.readlines():
account = l.replace('\n', '')
if not l.startswith('#') and not l.startswith('\n'):
acc.append(account)
if not os.path.exists('results/' + account):
os.makedirs('results/' + account)
return acc
def run_modules(account, tweets):
try:
files = os.listdir("modules")
except:
print("[!] Could not list modules directory!")
return
for file in files:
if file.endswith(".py"):
try:
module = importlib.import_module("modules" + "." + file[:-3])
except:
print("[!] Could not import '" + file + "'")
continue
try:
analysis = getattr(module, 'analyze')
except:
print("[!] '" + file + "' is not an Analyzer!")
continue
try:
analysis(account, tweets)
except Exception as e:
print("[!] Error while running '" + file + "'" + "(" + print(e) + ")")
def fetch(accounts):
crawled_tweets = []
lock.acquire()
if len(accounts) == 0:
done = True
lock.release()
return
acc = accounts.pop(0)
lock.release()
tweets = get_all_tweets(acc)
for tweet in tweets:
if 'RT' not in tweet['text']:
crawled_tweets.append(tweet)
thread1 = Fetcher(accounts)
threads.append(thread1)
thread1.daemon = True
thread1.start()
run_modules(acc, crawled_tweets)
try:
f = open('results/' + acc + '/tweets.txt', 'w', encoding='utf8')
except:
print('[!] Could not open output file!')
try:
for t in crawled_tweets:
try:
f.write('\t' + t["text"])
except:
print('[!] Unknown encoding in tweet!')
continue
except:
print('[!] Unknown error! Attempting to gracefully stop.')
finally:
f.close()
print("[" + acc + "] " + str(len(crawled_tweets)) + " tweets crawled.")
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,parser=tweepy.parsers.JSONParser())
accounts = get_accounts()
print("Target List")
print("===========")
for acc in accounts:
print("\t" + acc)
lock = threading.Lock()
for x in range(len(accounts)):
thread1 = Fetcher(accounts)
threads.append(thread1)
thread1.daemon = True
thread1.start()
while True:
should_exit = True
for i in threads:
if i.is_alive():
should_exit = False
time.sleep(1)
if should_exit:
break
print("[+] Done!")