Skip to content

Commit

Permalink
Merge pull request #87 from sheefanaaz123/main
Browse files Browse the repository at this point in the history
WhatsApp Chat Analyser
  • Loading branch information
Techiral authored Oct 10, 2023
2 parents 5832f59 + dd1dbeb commit f08a6d2
Show file tree
Hide file tree
Showing 5 changed files with 1,253 additions and 0 deletions.
1 change: 1 addition & 0 deletions W/WhatsApp Chat Analyser/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The WhatsApp Chat Analyzer is a tool designed to analyze and provide statistical insights into WhatsApp group or individual chats. By uploading the chat data, users can obtain valuable information such as total media sent, message count, word frequency, word cloud representation, and emojis usage.
139 changes: 139 additions & 0 deletions W/WhatsApp Chat Analyser/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import streamlit as st
import preprocessor,helper
import matplotlib.pyplot as plt
import seaborn as sns

st.sidebar.title("Whatsapp Chat Analyzer")

uploaded_file = st.sidebar.file_uploader("Choose a file")
if uploaded_file is not None:
bytes_data = uploaded_file.getvalue()
data = bytes_data.decode("utf-8")
df = preprocessor.preprocess(data)

# fetch unique users
user_list = df['user'].unique().tolist()
user_list.remove('group_notification')
user_list.sort()
user_list.insert(0,"Overall")

selected_user = st.sidebar.selectbox("Show analysis wrt",user_list)

if st.sidebar.button("Show Analysis"):

# Stats Area
num_messages, words, num_media_messages, num_links = helper.fetch_stats(selected_user,df)
st.title("Top Statistics")
col1, col2, col3, col4 = st.columns(4)

with col1:
st.header("Total Messages")
st.title(num_messages)
with col2:
st.header("Total Words")
st.title(words)
with col3:
st.header("Media Shared")
st.title(num_media_messages)
with col4:
st.header("Links Shared")
st.title(num_links)

# monthly timeline
st.title("Monthly Timeline")
timeline = helper.monthly_timeline(selected_user,df)
fig,ax = plt.subplots()
ax.plot(timeline['time'], timeline['message'],color='green')
plt.xticks(rotation='vertical')
st.pyplot(fig)

# daily timeline
st.title("Daily Timeline")
daily_timeline = helper.daily_timeline(selected_user, df)
fig, ax = plt.subplots()
ax.plot(daily_timeline['only_date'], daily_timeline['message'], color='black')
plt.xticks(rotation='vertical')
st.pyplot(fig)

# activity map
st.title('Activity Map')
col1,col2 = st.columns(2)

with col1:
st.header("Most busy day")
busy_day = helper.week_activity_map(selected_user,df)
fig,ax = plt.subplots()
ax.bar(busy_day.index,busy_day.values,color='purple')
plt.xticks(rotation='vertical')
st.pyplot(fig)

with col2:
st.header("Most busy month")
busy_month = helper.month_activity_map(selected_user, df)
fig, ax = plt.subplots()
ax.bar(busy_month.index, busy_month.values,color='orange')
plt.xticks(rotation='vertical')
st.pyplot(fig)

st.title("Weekly Activity Map")
user_heatmap = helper.activity_heatmap(selected_user,df)
fig,ax = plt.subplots()
ax = sns.heatmap(user_heatmap)
st.pyplot(fig)

# finding the busiest users in the group(Group level)
if selected_user == 'Overall':
st.title('Most Busy Users')
x,new_df = helper.most_busy_users(df)
fig, ax = plt.subplots()

col1, col2 = st.columns(2)

with col1:
ax.bar(x.index, x.values,color='red')
plt.xticks(rotation='vertical')
st.pyplot(fig)
with col2:
st.dataframe(new_df)

# WordCloud
st.title("Wordcloud")
df_wc = helper.create_wordcloud(selected_user,df)
fig,ax = plt.subplots()
ax.imshow(df_wc)
st.pyplot(fig)

# most common words
most_common_df = helper.most_common_words(selected_user,df)

fig,ax = plt.subplots()

ax.barh(most_common_df[0],most_common_df[1])
plt.xticks(rotation='vertical')

st.title('Most commmon words')
st.pyplot(fig)

# emoji analysis
emoji_df = helper.emoji_helper(selected_user,df)
st.title("Emoji Analysis")

col1,col2 = st.columns(2)

with col1:
st.dataframe(emoji_df)
with col2:
fig,ax = plt.subplots()
ax.pie(emoji_df[1].head(),labels=emoji_df[0].head(),autopct="%0.2f")
st.pyplot(fig)











51 changes: 51 additions & 0 deletions W/WhatsApp Chat Analyser/preprocessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import re
import pandas as pd

def preprocess(data):
pattern = '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s'

messages = re.split(pattern, data)[1:]
dates = re.findall(pattern, data)

df = pd.DataFrame({'user_message': messages, 'message_date': dates})
# convert message_date type
df['message_date'] = pd.to_datetime(df['message_date'], format='%d/%m/%Y, %H:%M - ')

df.rename(columns={'message_date': 'date'}, inplace=True)

users = []
messages = []
for message in df['user_message']:
entry = re.split('([\w\W]+?):\s', message)
if entry[1:]: # user name
users.append(entry[1])
messages.append(" ".join(entry[2:]))
else:
users.append('group_notification')
messages.append(entry[0])

df['user'] = users
df['message'] = messages
df.drop(columns=['user_message'], inplace=True)

df['only_date'] = df['date'].dt.date
df['year'] = df['date'].dt.year
df['month_num'] = df['date'].dt.month
df['month'] = df['date'].dt.month_name()
df['day'] = df['date'].dt.day
df['day_name'] = df['date'].dt.day_name()
df['hour'] = df['date'].dt.hour
df['minute'] = df['date'].dt.minute

period = []
for hour in df[['day_name', 'hour']]['hour']:
if hour == 23:
period.append(str(hour) + "-" + str('00'))
elif hour == 0:
period.append(str('00') + "-" + str(hour + 1))
else:
period.append(str(hour) + "-" + str(hour + 1))

df['period'] = period

return df
7 changes: 7 additions & 0 deletions W/WhatsApp Chat Analyser/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
streamlit
matplotlib
seaborn
urlextract
wordcloud
pandas
emoji
Loading

0 comments on commit f08a6d2

Please sign in to comment.