-
Notifications
You must be signed in to change notification settings - Fork 180
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #87 from sheefanaaz123/main
WhatsApp Chat Analyser
- Loading branch information
Showing
5 changed files
with
1,253 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
The WhatsApp Chat Analyzer is a tool designed to analyze and provide statistical insights into WhatsApp group or individual chats. By uploading the chat data, users can obtain valuable information such as total media sent, message count, word frequency, word cloud representation, and emojis usage. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
import streamlit as st | ||
import preprocessor,helper | ||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
|
||
st.sidebar.title("Whatsapp Chat Analyzer") | ||
|
||
uploaded_file = st.sidebar.file_uploader("Choose a file") | ||
if uploaded_file is not None: | ||
bytes_data = uploaded_file.getvalue() | ||
data = bytes_data.decode("utf-8") | ||
df = preprocessor.preprocess(data) | ||
|
||
# fetch unique users | ||
user_list = df['user'].unique().tolist() | ||
user_list.remove('group_notification') | ||
user_list.sort() | ||
user_list.insert(0,"Overall") | ||
|
||
selected_user = st.sidebar.selectbox("Show analysis wrt",user_list) | ||
|
||
if st.sidebar.button("Show Analysis"): | ||
|
||
# Stats Area | ||
num_messages, words, num_media_messages, num_links = helper.fetch_stats(selected_user,df) | ||
st.title("Top Statistics") | ||
col1, col2, col3, col4 = st.columns(4) | ||
|
||
with col1: | ||
st.header("Total Messages") | ||
st.title(num_messages) | ||
with col2: | ||
st.header("Total Words") | ||
st.title(words) | ||
with col3: | ||
st.header("Media Shared") | ||
st.title(num_media_messages) | ||
with col4: | ||
st.header("Links Shared") | ||
st.title(num_links) | ||
|
||
# monthly timeline | ||
st.title("Monthly Timeline") | ||
timeline = helper.monthly_timeline(selected_user,df) | ||
fig,ax = plt.subplots() | ||
ax.plot(timeline['time'], timeline['message'],color='green') | ||
plt.xticks(rotation='vertical') | ||
st.pyplot(fig) | ||
|
||
# daily timeline | ||
st.title("Daily Timeline") | ||
daily_timeline = helper.daily_timeline(selected_user, df) | ||
fig, ax = plt.subplots() | ||
ax.plot(daily_timeline['only_date'], daily_timeline['message'], color='black') | ||
plt.xticks(rotation='vertical') | ||
st.pyplot(fig) | ||
|
||
# activity map | ||
st.title('Activity Map') | ||
col1,col2 = st.columns(2) | ||
|
||
with col1: | ||
st.header("Most busy day") | ||
busy_day = helper.week_activity_map(selected_user,df) | ||
fig,ax = plt.subplots() | ||
ax.bar(busy_day.index,busy_day.values,color='purple') | ||
plt.xticks(rotation='vertical') | ||
st.pyplot(fig) | ||
|
||
with col2: | ||
st.header("Most busy month") | ||
busy_month = helper.month_activity_map(selected_user, df) | ||
fig, ax = plt.subplots() | ||
ax.bar(busy_month.index, busy_month.values,color='orange') | ||
plt.xticks(rotation='vertical') | ||
st.pyplot(fig) | ||
|
||
st.title("Weekly Activity Map") | ||
user_heatmap = helper.activity_heatmap(selected_user,df) | ||
fig,ax = plt.subplots() | ||
ax = sns.heatmap(user_heatmap) | ||
st.pyplot(fig) | ||
|
||
# finding the busiest users in the group(Group level) | ||
if selected_user == 'Overall': | ||
st.title('Most Busy Users') | ||
x,new_df = helper.most_busy_users(df) | ||
fig, ax = plt.subplots() | ||
|
||
col1, col2 = st.columns(2) | ||
|
||
with col1: | ||
ax.bar(x.index, x.values,color='red') | ||
plt.xticks(rotation='vertical') | ||
st.pyplot(fig) | ||
with col2: | ||
st.dataframe(new_df) | ||
|
||
# WordCloud | ||
st.title("Wordcloud") | ||
df_wc = helper.create_wordcloud(selected_user,df) | ||
fig,ax = plt.subplots() | ||
ax.imshow(df_wc) | ||
st.pyplot(fig) | ||
|
||
# most common words | ||
most_common_df = helper.most_common_words(selected_user,df) | ||
|
||
fig,ax = plt.subplots() | ||
|
||
ax.barh(most_common_df[0],most_common_df[1]) | ||
plt.xticks(rotation='vertical') | ||
|
||
st.title('Most commmon words') | ||
st.pyplot(fig) | ||
|
||
# emoji analysis | ||
emoji_df = helper.emoji_helper(selected_user,df) | ||
st.title("Emoji Analysis") | ||
|
||
col1,col2 = st.columns(2) | ||
|
||
with col1: | ||
st.dataframe(emoji_df) | ||
with col2: | ||
fig,ax = plt.subplots() | ||
ax.pie(emoji_df[1].head(),labels=emoji_df[0].head(),autopct="%0.2f") | ||
st.pyplot(fig) | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import re | ||
import pandas as pd | ||
|
||
def preprocess(data): | ||
pattern = '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s' | ||
|
||
messages = re.split(pattern, data)[1:] | ||
dates = re.findall(pattern, data) | ||
|
||
df = pd.DataFrame({'user_message': messages, 'message_date': dates}) | ||
# convert message_date type | ||
df['message_date'] = pd.to_datetime(df['message_date'], format='%d/%m/%Y, %H:%M - ') | ||
|
||
df.rename(columns={'message_date': 'date'}, inplace=True) | ||
|
||
users = [] | ||
messages = [] | ||
for message in df['user_message']: | ||
entry = re.split('([\w\W]+?):\s', message) | ||
if entry[1:]: # user name | ||
users.append(entry[1]) | ||
messages.append(" ".join(entry[2:])) | ||
else: | ||
users.append('group_notification') | ||
messages.append(entry[0]) | ||
|
||
df['user'] = users | ||
df['message'] = messages | ||
df.drop(columns=['user_message'], inplace=True) | ||
|
||
df['only_date'] = df['date'].dt.date | ||
df['year'] = df['date'].dt.year | ||
df['month_num'] = df['date'].dt.month | ||
df['month'] = df['date'].dt.month_name() | ||
df['day'] = df['date'].dt.day | ||
df['day_name'] = df['date'].dt.day_name() | ||
df['hour'] = df['date'].dt.hour | ||
df['minute'] = df['date'].dt.minute | ||
|
||
period = [] | ||
for hour in df[['day_name', 'hour']]['hour']: | ||
if hour == 23: | ||
period.append(str(hour) + "-" + str('00')) | ||
elif hour == 0: | ||
period.append(str('00') + "-" + str(hour + 1)) | ||
else: | ||
period.append(str(hour) + "-" + str(hour + 1)) | ||
|
||
df['period'] = period | ||
|
||
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
streamlit | ||
matplotlib | ||
seaborn | ||
urlextract | ||
wordcloud | ||
pandas | ||
emoji |
Oops, something went wrong.