-
Notifications
You must be signed in to change notification settings - Fork 21
/
analyze_stat.py
executable file
·132 lines (121 loc) · 5.79 KB
/
analyze_stat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 8 14:52:08 2019
@author: edoardottt
This file contains code for analyze the database.
It uses matplotlib library to displays the result.
It shows a chart with likes, retweets, followers per day.
This file is under MIT License.
"""
# all libraries required
import os
import sys
import usage
import sqlite3
import hashlib
try:
import matplotlib.pyplot as plt
plt.figure(num="twitterBot stats")
except Exception:
usage.print_usage(4)
db_filename = "database.db"
db_is_new = not os.path.exists(db_filename)
conn = sqlite3.connect(db_filename)
# check the statistics for a user
def check_stat(username, password):
timestamps = [] # contains all the timestamps saved in the records
likes = [] # contains all the likes saved in the records
retweets = [] # contains all the retweets saved in the records
followers = [] # contains all the followers saved in the records
d_likes = {} # dictionary with as keys = days & values = likes
d_retweets = {} # dictionary with as keys = days & values = retweets
d_followers = {} # dictionary with as keys = days & values = followers
if db_is_new:
usage.print_usage(5)
else:
cursor = conn.cursor()
# check if that user is in the database
p = hashlib.sha256(password.encode("utf-8")).hexdigest()
cursor.execute(
"SELECT * FROM users WHERE username = ? and password = ?", (username, p)
)
data = cursor.fetchone()
if data is None:
print("There aren't data for this username.")
sys.exit()
# if that user exists
cursor.execute("SELECT * FROM analytics WHERE username = ?", (username,))
data = cursor.fetchall()
if data is not None:
if len(data) != 0:
for record in data:
timestamps += [record[1]] # save the timestamp
likes += [int(record[2])] # save the likes count
retweets += [int(record[3])] # save the retweets count
followers += [int(record[4])] # save the followers count
# In this for loop all the arrays here declared become dictionary in this way:
# All the likes, followers and retweets counts are aggregate per day.
# Remember timestamps[:-16] means yyyy-mm-dd
for i in range(len(timestamps)):
if not (timestamps[i][:-16] in d_likes):
for j in range(len(timestamps)):
if timestamps[i][:-16] == timestamps[j][:-16]:
if timestamps[i][:-16] in d_likes:
d_likes[timestamps[i][:-16]] += likes[j]
else:
d_likes[timestamps[i][:-16]] = likes[j]
if timestamps[i][:-16] in d_retweets:
d_retweets[timestamps[i][:-16]] += retweets[j]
else:
d_retweets[timestamps[i][:-16]] = retweets[j]
if timestamps[i][:-16] in d_followers:
d_followers[timestamps[i][:-16]] -= d_followers[
timestamps[i][:-16]
]
d_followers[timestamps[i][:-16]] = followers[j]
else:
d_followers[timestamps[i][:-16]] = followers[j]
# adjust plot settings
plt.subplots_adjust(bottom=0.2)
plt.xticks(rotation=70)
ax = plt.gca()
ax.xaxis_date()
date = list(d_likes.keys())
likes_vector = [d_likes[i] for i in date]
retweets_vector = [d_retweets[i] for i in date]
followers_vector = [d_followers[i] for i in date]
plt.plot(date, likes_vector, "-r", marker="o", label="likes")
plt.plot(date, retweets_vector, "-g", marker="o", label="retweets")
plt.plot(date, followers_vector, "-b", marker="o", label="followers")
# if first > last element so the legend is shown on the right. Otherwise It's shown on the left
if (
d_likes[list(d_likes.keys())[0]]
> d_likes[list(d_likes.keys())[len(d_likes) - 1]]
):
plt.legend(loc="upper right")
else:
plt.legend(loc="upper left")
# Print the results
print("Total likes: " + str(sum(likes)))
print("Total retweets: " + str(sum(retweets)))
# add the number label in all points
for var_date, var_likes in zip(date, likes_vector):
plt.text(var_date, var_likes, str(var_likes))
for var_date, var_retweets in zip(date, retweets_vector):
plt.text(var_date, var_retweets, str(var_retweets))
for var_date, var_followers in zip(date, followers_vector):
plt.text(var_date, var_followers, str(var_followers))
plt.title("Statistics for " + username)
plt.subplots_adjust(
left=None,
bottom=0.13,
right=0.98,
top=0.94,
wspace=None,
hspace=None,
)
plt.show()
else:
print("There aren't data for this username.")
conn.close()