In [1]:
from pymongo import MongoClient
import pandas as pd

In [2]:
client = MongoClient('mongodb://localhost:27017/')
db = client['flask_db']

In [3]:
def extract_user_info():
    collection = db['chat_client_info']
    data = list(collection.find({}, {'_id': 0, "session_id": 1, "client_id": 1, "product": 1, "grateful": 1, "ranting": 1, "expression": 1, "civil": 1}))
    df = pd.DataFrame(data)
    df.to_csv('user_info.csv', index=False)
    return df

def extract_chat_history():
    collection = db['chat_history_collection']
    data = list(collection.find({}, {'_id': 0, "session_id": 1, "client_id": 1, "turn_number": 1, "sender": 1, "receiver": 1, "message": 1, "timestamp": 1}))
    df = pd.DataFrame(data)
    df.to_csv('chat_history.csv', index=False)
    return df

def extract_user_feedback():
    collection = db['chat_emo_feedback']
    data = list(collection.find({}, {'_id': 0, "session_id": 1, "client_id": 1, "turn_number": 1, "support_type": 1, "support_content": 1, "timestamp_arrival": 1, "timestamp_feedback": 1, "user_feedback": 1}))
    df = pd.DataFrame(data)
    df.to_csv('user_feedback.csv', index=False)
    return df

In [4]:
# export csv run functions

if __name__ == "__main__":
    user_info_df = extract_user_info()
    chat_history_df = extract_chat_history()
    user_feedback_df = extract_user_feedback()

In [5]:
# Analyze the value from csv table

user_info_df = pd.read_csv('user_info.csv')
chat_history_df = pd.read_csv('chat_history.csv')
user_feedback_df = pd.read_csv('user_feedback.csv')


In [6]:
chat_history_df

Unnamed: 0,session_id,turn_number,sender,message,timestamp,client_id,receiver
0,e7810d46-f838-4a35-9654-87ea23b80bee,1,representative,"Excuse me, I have been waiting in line for ove...",2024-04-17 18:38:43.717,,
1,e7810d46-f838-4a35-9654-87ea23b80bee,1,representative,Could you please check the status of my librar...,2024-04-17 18:39:38.058,,
2,e7810d46-f838-4a35-9654-87ea23b80bee,1,representative,"Hi, I recently booked a flight with your airli...",2024-04-17 18:39:59.279,,
3,e7810d46-f838-4a35-9654-87ea23b80bee,1,client,Ask for the customer's full name and address.,2024-04-17 18:43:02.786,,
4,e7810d46-f838-4a35-9654-87ea23b80bee,2,representative,"I'm sorry, but you need to provide a response ...",2024-04-17 18:43:02.786,,
5,e7810d46-f838-4a35-9654-87ea23b80bee,1,representative,"Hi, I have been a loyal library member for yea...",2024-04-17 18:43:25.555,,
6,e7810d46-f838-4a35-9654-87ea23b80bee,1,client,What book are you looking for?,2024-04-17 18:44:11.488,,
7,e7810d46-f838-4a35-9654-87ea23b80bee,2,representative,What book are you looking for?,2024-04-17 18:44:11.488,,
8,17017ce9-cf9d-492a-b2f5-d97aced0579f,1,representative,Please provide a product and a feeling.,2024-04-17 18:55:06.919,,
9,17017ce9-cf9d-492a-b2f5-d97aced0579f,1,representative,"Hey there, I just wanted to express my frustra...",2024-04-17 18:57:33.610,,


In [16]:
# User Info
print(user_info_df.describe())  # Basic statistics
print(user_info_df['product'].value_counts())
print(user_info_df['grateful'].mean())
print(user_info_df['ranting'].mean())
print(user_info_df['expression'].mean())

          grateful      ranting   expression        civil
count  1413.000000  1413.000000  1413.000000  1212.000000
mean      0.002831     0.003539     0.002831     0.820132
std       0.053149     0.059402     0.053149     0.384236
min       0.000000     0.000000     0.000000     0.000000
25%       0.000000     0.000000     0.000000     1.000000
50%       0.000000     0.000000     0.000000     1.000000
75%       0.000000     0.000000     0.000000     1.000000
max       1.000000     1.000000     1.000000     1.000000
product
Pizza      1406
Cup           3
Speaker       2
Book          2
Name: count, dtype: int64
0.0028308563340410475
0.003538570417551309
0.0028308563340410475


In [21]:
# Chat History
chat_history_df['timestamp'] = pd.to_datetime(chat_history_df['timestamp'])

# Report average message length for client and for rep
average_msg_length_client = chat_history_df[chat_history_df['sender'] == 'client']['message'].apply(len).mean()
average_msg_length_rep = chat_history_df[chat_history_df['sender'] == 'representative']['message'].apply(len).mean()
print("Average Message Length (Client):", average_msg_length_client)
print("Average Message Length (Rep):", average_msg_length_rep)

# Report the average time of response for client and for rep
response_times = chat_history_df.sort_values(by=['session_id', 'timestamp'])
response_times['response_time'] = response_times.groupby('session_id')['timestamp'].diff().dt.seconds
average_response_time_client = response_times[response_times['sender'] == 'client']['response_time'].mean()
average_response_time_rep = response_times[response_times['sender'] == 'representative']['response_time'].mean()
print("Average Response Time (Client):", average_response_time_client)
print("Average Response Time (Rep):", average_response_time_rep)

# Report the average number of turns
chat_history_df['turn_number'] = pd.to_numeric(chat_history_df['turn_number'], errors='coerce')  # Ensure turn numbers are numeric
max_turns_per_session = chat_history_df.groupby(['client_id', 'session_id'])['turn_number'].max().reset_index()
average_turns = max_turns_per_session.groupby('client_id')['turn_number'].mean()
print("Average Number of Turns per Session:", average_turns.mean())

Average Message Length (Client): 5.0
Average Message Length (Rep): 163.23545516769337
Average Response Time (Client): 27.285714285714285
Average Response Time (Rep): 1.196927374301676
Average Number of Turns per Session: 1.019539427773901


In [20]:
# Analysis for User Feedback
average_feedback_by_type = user_feedback_df.groupby('support_type')['user_feedback'].mean()
print(average_feedback_by_type)

support_type
Be Mindful of Your Emotions           4.866667
Put Yourself in the Client's Shoes    4.812500
You might be thinking                 4.687500
Name: user_feedback, dtype: float64
