Merge pull request #87 from sheefanaaz123/main

WhatsApp Chat Analyser
Techiral · Oct 10, 2023 · f08a6d2 · f08a6d2
2 parents 5832f59 + dd1dbeb
commit f08a6d2
Show file tree

Hide file tree

Showing 5 changed files with 1,253 additions and 0 deletions.
diff --git a/W/WhatsApp Chat Analyser/README.md b/W/WhatsApp Chat Analyser/README.md
@@ -0,0 +1 @@
+The WhatsApp Chat Analyzer is a tool designed to analyze and provide statistical insights into WhatsApp group or individual chats. By uploading the chat data, users can obtain valuable information such as total media sent, message count, word frequency, word cloud representation, and emojis usage.
diff --git a/W/WhatsApp Chat Analyser/app.py b/W/WhatsApp Chat Analyser/app.py
@@ -0,0 +1,139 @@
+import streamlit as st
+import preprocessor,helper
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+st.sidebar.title("Whatsapp Chat Analyzer")
+
+uploaded_file = st.sidebar.file_uploader("Choose a file")
+if uploaded_file is not None:
+    bytes_data = uploaded_file.getvalue()
+    data = bytes_data.decode("utf-8")
+    df = preprocessor.preprocess(data)
+
+    # fetch unique users
+    user_list = df['user'].unique().tolist()
+    user_list.remove('group_notification')
+    user_list.sort()
+    user_list.insert(0,"Overall")
+
+    selected_user = st.sidebar.selectbox("Show analysis wrt",user_list)
+
+    if st.sidebar.button("Show Analysis"):
+
+        # Stats Area
+        num_messages, words, num_media_messages, num_links = helper.fetch_stats(selected_user,df)
+        st.title("Top Statistics")
+        col1, col2, col3, col4 = st.columns(4)
+
+        with col1:
+            st.header("Total Messages")
+            st.title(num_messages)
+        with col2:
+            st.header("Total Words")
+            st.title(words)
+        with col3:
+            st.header("Media Shared")
+            st.title(num_media_messages)
+        with col4:
+            st.header("Links Shared")
+            st.title(num_links)
+
+        # monthly timeline
+        st.title("Monthly Timeline")
+        timeline = helper.monthly_timeline(selected_user,df)
+        fig,ax = plt.subplots()
+        ax.plot(timeline['time'], timeline['message'],color='green')
+        plt.xticks(rotation='vertical')
+        st.pyplot(fig)
+
+        # daily timeline
+        st.title("Daily Timeline")
+        daily_timeline = helper.daily_timeline(selected_user, df)
+        fig, ax = plt.subplots()
+        ax.plot(daily_timeline['only_date'], daily_timeline['message'], color='black')
+        plt.xticks(rotation='vertical')
+        st.pyplot(fig)
+
+        # activity map
+        st.title('Activity Map')
+        col1,col2 = st.columns(2)
+
+        with col1:
+            st.header("Most busy day")
+            busy_day = helper.week_activity_map(selected_user,df)
+            fig,ax = plt.subplots()
+            ax.bar(busy_day.index,busy_day.values,color='purple')
+            plt.xticks(rotation='vertical')
+            st.pyplot(fig)
+
+        with col2:
+            st.header("Most busy month")
+            busy_month = helper.month_activity_map(selected_user, df)
+            fig, ax = plt.subplots()
+            ax.bar(busy_month.index, busy_month.values,color='orange')
+            plt.xticks(rotation='vertical')
+            st.pyplot(fig)
+
+        st.title("Weekly Activity Map")
+        user_heatmap = helper.activity_heatmap(selected_user,df)
+        fig,ax = plt.subplots()
+        ax = sns.heatmap(user_heatmap)
+        st.pyplot(fig)
+
+        # finding the busiest users in the group(Group level)
+        if selected_user == 'Overall':
+            st.title('Most Busy Users')
+            x,new_df = helper.most_busy_users(df)
+            fig, ax = plt.subplots()
+
+            col1, col2 = st.columns(2)
+
+            with col1:
+                ax.bar(x.index, x.values,color='red')
+                plt.xticks(rotation='vertical')
+                st.pyplot(fig)
+            with col2:
+                st.dataframe(new_df)
+
+        # WordCloud
+        st.title("Wordcloud")
+        df_wc = helper.create_wordcloud(selected_user,df)
+        fig,ax = plt.subplots()
+        ax.imshow(df_wc)
+        st.pyplot(fig)
+
+        # most common words
+        most_common_df = helper.most_common_words(selected_user,df)
+
+        fig,ax = plt.subplots()
+
+        ax.barh(most_common_df[0],most_common_df[1])
+        plt.xticks(rotation='vertical')
+
+        st.title('Most commmon words')
+        st.pyplot(fig)
+
+        # emoji analysis
+        emoji_df = helper.emoji_helper(selected_user,df)
+        st.title("Emoji Analysis")
+
+        col1,col2 = st.columns(2)
+
+        with col1:
+            st.dataframe(emoji_df)
+        with col2:
+            fig,ax = plt.subplots()
+            ax.pie(emoji_df[1].head(),labels=emoji_df[0].head(),autopct="%0.2f")
+            st.pyplot(fig)
+
+
+
+
+
+
+
+
+
+
+
diff --git a/W/WhatsApp Chat Analyser/preprocessor.py b/W/WhatsApp Chat Analyser/preprocessor.py
@@ -0,0 +1,51 @@
+import re
+import pandas as pd
+
+def preprocess(data):
+    pattern = '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s'
+
+    messages = re.split(pattern, data)[1:]
+    dates = re.findall(pattern, data)
+
+    df = pd.DataFrame({'user_message': messages, 'message_date': dates})
+    # convert message_date type
+    df['message_date'] = pd.to_datetime(df['message_date'], format='%d/%m/%Y, %H:%M - ')
+
+    df.rename(columns={'message_date': 'date'}, inplace=True)
+
+    users = []
+    messages = []
+    for message in df['user_message']:
+        entry = re.split('([\w\W]+?):\s', message)
+        if entry[1:]:  # user name
+            users.append(entry[1])
+            messages.append(" ".join(entry[2:]))
+        else:
+            users.append('group_notification')
+            messages.append(entry[0])
+
+    df['user'] = users
+    df['message'] = messages
+    df.drop(columns=['user_message'], inplace=True)
+
+    df['only_date'] = df['date'].dt.date
+    df['year'] = df['date'].dt.year
+    df['month_num'] = df['date'].dt.month
+    df['month'] = df['date'].dt.month_name()
+    df['day'] = df['date'].dt.day
+    df['day_name'] = df['date'].dt.day_name()
+    df['hour'] = df['date'].dt.hour
+    df['minute'] = df['date'].dt.minute
+
+    period = []
+    for hour in df[['day_name', 'hour']]['hour']:
+        if hour == 23:
+            period.append(str(hour) + "-" + str('00'))
+        elif hour == 0:
+            period.append(str('00') + "-" + str(hour + 1))
+        else:
+            period.append(str(hour) + "-" + str(hour + 1))
+
+    df['period'] = period
+
+    return df
diff --git a/W/WhatsApp Chat Analyser/requirements.txt b/W/WhatsApp Chat Analyser/requirements.txt
@@ -0,0 +1,7 @@
+streamlit
+matplotlib
+seaborn
+urlextract
+wordcloud
+pandas
+emoji