In [13]:
import pandas as pd

In [14]:
# Load the data
DATA_DIR = '../data/server_data/sanitized_data/'

In [15]:
chat_client_info = pd.read_csv(DATA_DIR + 'chat_client_info.tsv', sep='\t')
chat_history = pd.read_csv(DATA_DIR + 'chat_history.tsv', sep='\t')
chat_in_task = pd.read_csv(DATA_DIR + 'chat_in_task.tsv', sep='\t')
chat_post_task = pd.read_csv(DATA_DIR + 'chat_post_task.tsv', sep='\t')
chat_pre_task = pd.read_csv(DATA_DIR + 'chat_pre_task.tsv', sep='\t')

## Compare Post-Task and Pre-Task

In [27]:
chat_pre_task['interaction_total'] = chat_pre_task.interaction_dignity + chat_pre_task.interaction_dignity + chat_pre_task.interaction_respect
chat_post_task['interaction_total'] = chat_post_task.interaction_dignity + chat_post_task.interaction_dignity + chat_post_task.interaction_respect

In [28]:
## Join post_task with chat_client_info
chat_post_task_m = chat_post_task.merge(chat_client_info, on=['client_id','pid'], how='left')

## Join pre_task with post_task_m on pid. Common columns for pre_task should have '_pre' suffix and for post_task should have '_post' suffix
chat_post_task_m = chat_post_task_m.merge(chat_pre_task, on='pid', how='left', suffixes=('_post', '_pre'))

In [31]:
## Calculate the difference between post and pre task
chat_post_task_m['interaction_total_diff'] = chat_post_task_m['interaction_total_post'] - chat_post_task_m['interaction_total_pre']
chat_post_task_m['cognitive_demands_diff'] = chat_post_task_m['cognitive_demands_post'] - chat_post_task_m['cognitive_demands_pre']
chat_post_task_m['cognitive_resources_diff'] = chat_post_task_m['cognitive_resources_post'] - chat_post_task_m['cognitive_resources_pre']
chat_post_task_m['affect_arousal_diff'] = chat_post_task_m['affect_arousal_post'] - chat_post_task_m['affect_arousal_pre']
chat_post_task_m['affect_valence_diff'] = chat_post_task_m['affect_valence_post'] - chat_post_task_m['affect_valence_pre']

In [35]:
'''
Create a new df from chat_post_task_m by grouping data by PID
Only 1 row per PID should be present in the new df
Columns should have ['interaction_total_diff', 'cognitive_demands_diff', 'cognitive_resources_diff', 'affect_arousal_diff', 'affect_valence_diff'] for 3 different combinations of `civil` and `emo`
First set of columns should be have suffix `_civil` for client where `civil = 1` and `emo = 0`
Second set of columns should be have suffix `_uncivil` for client where `civil = 0` and `emo = 0`
Third set of columns should be have suffix `_uncivil_emo` for client where `civil = 0` and `emo = 1`
'''

# Create DataFrames for each combination of civil and emo
df_civil = chat_post_task_m[(chat_post_task_m['civil'] == 1) & (chat_post_task_m['emo'] == 0)]
df_uncivil = chat_post_task_m[(chat_post_task_m['civil'] == 0) & (chat_post_task_m['emo'] == 0)]
df_uncivil_emo = chat_post_task_m[(chat_post_task_m['civil'] == 0) & (chat_post_task_m['emo'] == 1)]

# Select and rename columns
columns = ['interaction_total_diff', 'cognitive_demands_diff', 'cognitive_resources_diff', 'affect_arousal_diff', 'affect_valence_diff']
df_civil = df_civil[['pid'] + columns].rename(columns={col: col + '_civil' for col in columns})
df_uncivil = df_uncivil[['pid'] + columns].rename(columns={col: col + '_uncivil' for col in columns})
df_uncivil_emo = df_uncivil_emo[['pid'] + columns].rename(columns={col: col + '_uncivil_emo' for col in columns})

# Merge DataFrames on PID
df_combined = df_civil.merge(df_uncivil, on='pid', how='outer').merge(df_uncivil_emo, on='pid', how='outer')

In [36]:
print(f"Interaction | Civil: {df_combined['interaction_total_diff_civil'].mean()}, Uncivil: {df_combined['interaction_total_diff_uncivil'].mean()}, Uncivil Emo: {df_combined['interaction_total_diff_uncivil_emo'].mean()}")
print(f"Cognitive Demands | Civil: {df_combined['cognitive_demands_diff_civil'].mean()}, Uncivil: {df_combined['cognitive_demands_diff_uncivil'].mean()}, Uncivil Emo: {df_combined['cognitive_demands_diff_uncivil_emo'].mean()}")
print(f"Cognitive Resources | Civil: {df_combined['cognitive_resources_diff_civil'].mean()}, Uncivil: {df_combined['cognitive_resources_diff_uncivil'].mean()}, Uncivil Emo: {df_combined['cognitive_resources_diff_uncivil_emo'].mean()}")
print(f"Affect Arousal | Civil: {df_combined['affect_arousal_diff_civil'].mean()}, Uncivil: {df_combined['affect_arousal_diff_uncivil'].mean()}, Uncivil Emo: {df_combined['affect_arousal_diff_uncivil_emo'].mean()}")
print(f"Affect Valence | Civil: {df_combined['affect_valence_diff_civil'].mean()}, Uncivil: {df_combined['affect_valence_diff_uncivil'].mean()}, Uncivil Emo: {df_combined['affect_valence_diff_uncivil_emo'].mean()}")

Interaction | Civil: 3.55, Uncivil: -9.15, Uncivil Emo: -8.285714285714286
Cognitive Demands | Civil: -1.3, Uncivil: 0.15, Uncivil Emo: -0.23809523809523808
Cognitive Resources | Civil: 0.8, Uncivil: 0.05, Uncivil Emo: 0.38095238095238093
Affect Arousal | Civil: -0.1, Uncivil: -0.45, Uncivil Emo: -0.3333333333333333
Affect Valence | Civil: 1.15, Uncivil: -1.65, Uncivil Emo: -1.5714285714285714
