In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
import json

with open('result.json', 'r') as file:
    data = json.load(file)

messages = []
for message in data['messages']:
    messages.append({
        'from': message.get("from"),
        'text': message.get("text"),
        'date_unixtime': message.get("date_unixtime")
    })

df = pd.DataFrame(messages)
df

In [None]:
df['from'].dropna(inplace=True)
df['date'] = pd.to_datetime(df['date_unixtime'], unit='s')
df.drop(['date_unixtime'], axis=1, inplace=True)
df

In [None]:
message_counts = df.groupby('from').size()
plt.bar(message_counts.index, message_counts.values)
plt.xlabel('Sender')
plt.ylabel('Number of Messages')
plt.title('Message Density by Sender')
plt.show()

In [None]:
df['day'] = df['date'].dt.date

message_counts_by_day = df.groupby(['from', 'day']).size().reset_index(name='message_count')

plt.figure(figsize=(64, 6)) # ADJUST graph size (width, height)
for user, data in message_counts_by_day.groupby('from'):
    plt.plot(data['day'], data['message_count'], label=user)

plt.xlabel('Date')
plt.ylabel('Number of Messages')
plt.title('Number of Messages by Day for Each User')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
df['message_density'] = df['text'].apply(len)
message_density_by_day = df.groupby(['from', 'day'])['message_density'].sum().reset_index()

plt.figure(figsize=(64, 6))  # ADJUST graph size (width, height)
for user, data in message_density_by_day.groupby('from'):
    plt.plot(data['day'], data['message_density'], label=user)

plt.xlabel('Date')
plt.ylabel('Message Density (Total Text Length)')
plt.title('Message Density by Day for Each User')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
from collections import Counter
import re

def find_top_words_for_user(user, min_word_length=3, num_words=30): # ADJUST minimum length of the word and number of displayed words
    user_df = df[df['from'] == user]
    all_words = ' '.join(user_df['text'].astype(str))
    all_words = re.sub(r'[.,]', '', all_words).lower().split()
    all_words = [word for word in all_words if len(word) >= min_word_length]
    word_counts = Counter(all_words)
    top_words = word_counts.most_common(num_words)
    return top_words

top_words_by_user = {}
header = "Most used from: "
unique_users = df['from'].unique()
for user in unique_users:
    top_words_by_user[user] = find_top_words_for_user(user)
    header += f"{user}".ljust(25)
max_num_words = max(len(words) for words in top_words_by_user.values())
print(header)
for i in range(0, max_num_words):
    current_words = "".ljust(17)
    for user in unique_users:
        if i < len(top_words_by_user[user]):
            word, count = top_words_by_user[user][i]
            current_words += f"{word}: {count}".ljust(25)
        else:
            current_words += "".ljust(25)
    print(current_words)