In [None]:
# Bar chart, ignore this 

import streamlit as st
import pandas as pd
import altair as alt

st.title('WhatsApp Group: Cousins - Across Borders')

df_no_of_messages = pd.read_pickle('df_no_of_messages.pkl')

# Create a new column that will be used for sorting in the chart
df_no_of_messages['sort'] = df_no_of_messages['number_of_msgs'].rank(method='first', ascending=False)

# Filter DataFrame to only include the top 10 names based on number_of_msgs
df_no_of_messages = df_no_of_messages.nsmallest(10, 'sort')

# Create a horizontal bar chart using Altair
bar = alt.Chart(df_no_of_messages).mark_bar().encode(
    x='number_of_msgs:Q',
    y=alt.Y('name:N', sort=alt.EncodingSortField(field='sort', op='min')),
    # color=alt.Color('name:N', legend=None)  # Remove if you don't want color
)

text = bar.mark_text(align='left', dx=2, dy=-3, color='white').encode(text=alt.Text('number_of_msgs:Q', format=',d'))

chart = bar + text

# Configure the chart to be scrollable
st.altair_chart(chart, use_container_width=True)

In [None]:
# Use this Bar chart

import streamlit as st
import pandas as pd
import altair as alt

st.title('WhatsApp Group: Cousins - Across Borders')

df_no_of_messages = pd.read_pickle('df_no_of_messages.pkl')

# Create a new column that will be used for sorting in the chart
df_no_of_messages['sort'] = df_no_of_messages['number_of_msgs'].rank(method='first', ascending=False)

# Add a column 'name_display' to differentiate between 'Top 10' and 'All Others'
df_no_of_messages['name_display'] = df_no_of_messages.apply(
    lambda row: row['name'] if row['sort'] <= 35 else 'All Others',
    axis=1
)

# Group by 'name_display' and sum 'number_of_msgs', preserve 'sort' for the 'Top 10'
df_no_of_messages_grouped = df_no_of_messages.groupby(
    'name_display',
    as_index=False
).agg({'number_of_msgs': 'sum', 'sort': 'min'})

# Create a new column 'name_display_serial' with serial numbers as prefix
df_no_of_messages_grouped['Name'] = df_no_of_messages_grouped['sort'].apply(lambda x: f'{int(x)}. ' if x <= 35 else '') + df_no_of_messages_grouped['name_display']

# Create a horizontal bar chart using Altair
bar = alt.Chart(df_no_of_messages_grouped).mark_bar().encode(
    x='number_of_msgs:Q',
    y=alt.Y('Name:N', sort=alt.EncodingSortField(field='sort', op='min')),
    # color=alt.Color('name:N', legend=None)  # Remove if you don't want color
)

# Adjust dy for better text alignment
text = bar.mark_text(align='left', dx=2, dy=0, color='white').encode(text=alt.Text('number_of_msgs:Q', format=',d'))

chart = bar + text

# Configure the chart to be scrollable
st.altair_chart(chart, use_container_width=True)


In [None]:
df_group_name_org = pd.read_pickle('df_group_name.pkl')
df_group_name = df_group_name_org[['date', 'name', 'group_name']]
df_group_name = df_group_name.rename(columns={
    'date': 'Date',
    'name': 'Changed by',
    'group_name': 'New Group Name',
})

# CSS to hide the row indices in the table
hide_table_row_index = """
    <style>
    thead tr th:first-child {display:none}
    tbody th {display:none}
    </style>
    """
st.markdown(hide_table_row_index, unsafe_allow_html=True)
st.set_option('deprecation.showPyplotGlobalUse', False)
st.table(df_group_name)

In [None]:
# Top 10 days

df_top10days = pd.read_pickle('df_top10days.pkl')

# Convert 'date' to a datetime datatype
df_top10days['date'] = pd.to_datetime(df_top10days['date'])

# Convert date to the 'YYYY-MMM-DD' format
df_top10days['date'] = df_top10days['date'].dt.strftime('%Y-%b-%d')

# Create a new column that will be used for sorting in the chart
df_top10days['sort'] = df_top10days['message_count'].rank(method='first', ascending=False)

# Create a vertical bar chart using Altair
bar = alt.Chart(df_top10days).mark_bar().encode(
    y='message_count:Q',
    x=alt.X('date:N', sort=alt.EncodingSortField(field='sort', op='min'), axis=alt.Axis(labelAngle=0, labelFontSize=10)),
    # color=alt.Color('name:N', legend=None)  # Remove if you don't want color
).properties(
    width=800  # Increase the width of the chart
)

text = bar.mark_text(align='center', dx=0, dy=-10, color='white').encode(text=alt.Text('message_count:Q', format=',d'))

chart = bar + text

# Configure the chart to be scrollable
st.altair_chart(chart, use_container_width=True)


In [None]:
# HEAT MAP
import streamlit as st
import pandas as pd
import altair as alt
import numpy as np

st.title('WhatsApp Group: Cousins - Across Borders')

whatsapp_chat_formatted = pd.read_pickle('whatsapp_chat_formatted.pkl')

# Specify the correct order for the days of the week
ordered_days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

# Create an ordered categorical type with our specified day order
whatsapp_chat_formatted['day'] = pd.Categorical(whatsapp_chat_formatted['day'], ordered_days, ordered=True)

# Count the number of messages per hour and day
message_counts = whatsapp_chat_formatted.groupby(['hour', 'day']).size().reset_index(name='message_count')
message_counts['message_count'] = message_counts['message_count'].astype(float)

# Create Altair chart
chart = alt.Chart(message_counts, title="Message Counts per Hour and Day").mark_rect().encode(
    alt.X('hour:O', title='Hour'),
    alt.Y('day:O', sort=ordered_days, title='Day'), # specify the sort order in the encoding
    # alt.Color('message_count:Q', title='Message Count'),
    alt.Color('message_count:Q', scale=alt.Scale(reverse=True), title='Message Count')
    # alt.Color('message_count:Q', scale=alt.Scale(scheme='blueorange', reverse=True), title='Message Count')
)

# Add text labels
text = chart.mark_text(baseline='middle', align='center').encode(
    text=alt.Text('message_count:Q', format='.0f'),
    color=alt.condition(
        alt.datum.message_count == message_counts['message_count'].max(),
        alt.value('black'),
        alt.value('black')
    )
)

heatmap = chart + text

# Configure chart properties
heatmap = heatmap.properties(
    width=700,  # adjust width as desired
    height=400,  # adjust height as desired
).configure_view(
    step=13,
    strokeWidth=0
).configure_axis(
    domain=False
)

st.altair_chart(heatmap)

# Find top 3 message counts
top_3 = message_counts.sort_values('message_count', ascending=False).head(3)

# Find bottom 3 message counts
bottom_3 = message_counts.sort_values('message_count', ascending=True).head(3)

# Print the top 3 and bottom 3 message counts
st.write('Top 3 Message Counts')
st.write(top_3)

st.write('Bottom 3 Message Counts')
st.write(bottom_3)


In [None]:
st.title('WhatsApp Group: Cousins - Across Borders')

df_by_hour = pd.read_pickle('df_by_hour.pkl')

bar_hour = alt.Chart(df_by_hour).mark_bar(color='lightblue').encode(
    y='message_count:Q',
    x=alt.X('hour:N', sort=alt.EncodingSortField(field='sort', op='min'), axis=alt.Axis(labelAngle=0, labelFontSize=10)),
)
text_hour = bar_hour.mark_text(align='center', dx=0, dy=-10, color='white').encode(text=alt.Text('message_count:Q', format=',d'))
chart_hour = bar_hour + text_hour
st.altair_chart(chart_hour, use_container_width=True)

df_by_day = pd.read_pickle('df_by_day.pkl')

# Specify the correct order for the days of the week
ordered_days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

# In 'day' chart, sort X-axis by days of the week
bar_day = alt.Chart(df_by_day).mark_bar(color='lightgreen').encode(
    y='message_count:Q',
    x=alt.X('day:N', sort=ordered_days, axis=alt.Axis(labelAngle=0, labelFontSize=10)),
)
text_day = bar_day.mark_text(align='center', dx=0, dy=-10, color='white').encode(text=alt.Text('message_count:Q', format=',d'))
chart_day = bar_day + text_day
st.altair_chart(chart_day, use_container_width=True)

df_by_month = pd.read_pickle('df_by_month.pkl')

# Specify the correct order for the months of the year
ordered_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# In 'month' chart, sort X-axis by months of the year
bar_month = alt.Chart(df_by_month).mark_bar(color='steelblue').encode(
    y='message_count:Q',
    x=alt.X('month:N', sort=ordered_months, axis=alt.Axis(labelAngle=0, labelFontSize=10)),
)
text_month = bar_month.mark_text(align='center', dx=0, dy=-10, color='white').encode(text=alt.Text('message_count:Q', format=',d'))
chart_month = bar_month + text_month

# Configure the chart to be scrollable
st.altair_chart(chart_month, use_container_width=True)

In [None]:
import streamlit as st
import pandas as pd
from PIL import Image
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt

whatsapp_chat_formatted = pd.read_pickle('whatsapp_chat_formatted.pkl')

STOPWORDS.update(['group', 'link', 'invite', 'joined', 'message', 'deleted', 'yeah', 'hai', 'yes', 'okay', 'ok', 'will', 'use', 'using', 'one', 'know', 'guy', 'group', 'media', 'omitted'])
stopwords = STOPWORDS

# Generate comment_words using a generator expression and str.join()
comment_words = ' '.join(word for message in whatsapp_chat_formatted.message for word in str(message).lower().split())

wordcloud = WordCloud(width = 800, height = 400, 
                background_color ='white', 
                stopwords = stopwords, 
                min_font_size = 10).generate(comment_words)

st.image(wordcloud.to_image())


In [9]:
# df_top10emojis.to_pickle('df_top10emojis.pkl')

df_top10emojis = pd.read_pickle('df_top10emojis.pkl')
df_top10emojis

desired_order = ['emoji', 'emoji_description', 'emoji_count']

# Reorder the columns
df_top10emojis = df_top10emojis.reindex(columns=desired_order)

df_top10emojis['emoji_count'] = df_top10emojis['emoji_count'].astype(int)
df_top10emojis['emoji_count'] = df_top10emojis['emoji_count'].apply(lambda x: '{:,}'.format(x))

df_top10emojis

Unnamed: 0,emoji,emoji_description,emoji_count
0,😂,face_with_tears_of_joy,2883
1,🎂,birthday_cake,2755
2,👍,thumbs_up,2617
3,👌,OK_hand,2539
4,💐,bouquet,2258
5,😜,winking_face_with_tongue,2021
6,🎉,party_popper,1733
7,😀,grinning_face,1511
8,🏻,light_skin_tone,1330
9,😊,smiling_face_with_smiling_eyes,1073
