In [1]:
import pathlib as pl
import pickle
import pandas as pd
import copy
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import math
#import wilcoxon signed-rank
from scipy.stats import wilcoxon, shapiro, ttest_rel, stats
#import cliffs delta
from effsize.effsize import two_group_difference

# Define paths.

In [2]:
processed_tables_pickle_file = pl.Path(
    '../processed_data/processed_tables.pickle')
collab_best_pickle_file = pl.Path(
    '../processed_data/collab_best_df.pickle')
learning_df_pickle_file = pl.Path(
    '../processed_data/learning_df.pickle')
cdf_human_pickle_file = pl.Path(
    '../processed_data/cdf_human.pickle')

# Load pickled data.

In [3]:
with processed_tables_pickle_file.open('rb') as handle:
    processed_tables = pickle.load(handle)

with collab_best_pickle_file.open('rb') as handle:
    collab_best = pickle.load(handle)

with learning_df_pickle_file.open('rb') as handle:
    learning_df = pickle.load(handle)
    
with cdf_human_pickle_file.open('rb') as handle:
    cdf_human = pickle.load(handle)

In [4]:
processed_tables.keys()
pd.options.display.max_columns = None
df = processed_tables[10]
df = df.loc[df['header.frame_id'] == 'collab-activity-2']
df

Unnamed: 0,Time,header.seq,header.frame_id,state.edges,state.suggested.u,state.suggested.v,state.terminal,state.submit_suggested,action.agent_name,action.type,action.edge.u,action.edge.v,next_state.edges,next_state.suggested.u,next_state.suggested.v,next_state.terminal,next_state.submit_suggested,action_no,step_no,turn_agent,world_state,action,is_submission,cost,is_mst
98,1148.278606,99,collab-activity-2,[],-1,-1,False,False,robot,0,0,3,[],0,3,False,False,0,1,human,"NetworkState(e:0+1,c:0|n:10,e:20;s:0)","suggest-pick(0,3)",False,0,False
99,1151.976628,100,collab-activity-2,[],0,3,False,False,human,7,-1,-1,[],-1,-1,False,False,1,1,human,"NetworkState(e:0+1,c:0|n:10,e:20;s:0)",disagree,False,0,False
100,1164.458253,101,collab-activity-2,[],-1,-1,False,False,human,0,7,6,[],7,6,False,False,2,1,robot,"NetworkState(e:0+1,c:0|n:10,e:20;s:0)","suggest-pick(7,6)",False,0,False
101,1175.300851,102,collab-activity-2,[],7,6,False,False,robot,0,7,6,[u: 7\nv: 6],-1,-1,False,False,3,1,robot,"NetworkState(e:0+1,c:0|n:10,e:20;s:0)","suggest-pick(7,6)",False,0,False
102,1184.124874,103,collab-activity-2,[u: 7\nv: 6],-1,-1,False,False,robot,0,6,4,[u: 7\nv: 6],6,4,False,False,4,1,human,"NetworkState(e:1+1,c:3|n:10,e:20;s:0)","suggest-pick(6,4)",False,3,False
103,1185.938908,104,collab-activity-2,[u: 7\nv: 6],6,4,False,False,human,6,-1,-1,"[u: 7\nv: 6, u: 6\nv: 4]",-1,-1,False,False,5,1,human,"NetworkState(e:1+1,c:3|n:10,e:20;s:0)",agree,False,3,False
104,1211.245186,105,collab-activity-2,"[u: 7\nv: 6, u: 6\nv: 4]",-1,-1,False,False,human,0,1,2,"[u: 7\nv: 6, u: 6\nv: 4]",1,2,False,False,6,1,robot,"NetworkState(e:2+1,c:6|n:10,e:20;s:0)","suggest-pick(1,2)",False,6,False
105,1224.448048,106,collab-activity-2,"[u: 7\nv: 6, u: 6\nv: 4]",1,2,False,False,robot,0,1,2,"[u: 1\nv: 2, u: 7\nv: 6, u: 6\nv: 4]",-1,-1,False,False,7,1,robot,"NetworkState(e:2+1,c:6|n:10,e:20;s:0)","suggest-pick(1,2)",False,6,False
106,1233.681034,107,collab-activity-2,"[u: 1\nv: 2, u: 7\nv: 6, u: 6\nv: 4]",-1,-1,False,False,robot,0,2,3,"[u: 1\nv: 2, u: 6\nv: 4, u: 7\nv: 6]",2,3,False,False,8,1,human,"NetworkState(e:3+1,c:9|n:10,e:20;s:0)","suggest-pick(2,3)",False,9,False
107,1271.644502,108,collab-activity-2,"[u: 1\nv: 2, u: 6\nv: 4, u: 7\nv: 6]",2,3,False,False,human,6,-1,-1,"[u: 2\nv: 3, u: 1\nv: 2, u: 6\nv: 4, u: 7\nv: 6]",-1,-1,False,False,9,1,human,"NetworkState(e:3+1,c:9|n:10,e:20;s:0)",agree,False,9,False


# Quantifying improvement  throughout collaborative activities.

H2.  The more a student improves during the collaborative activity, the better are the learning outcomes.

## Quantifying improvement across collaborative activities.

H2.1 The more the student improves his/her submissions, the better are the learning outcomes.
- How does error difference (collab activities) correlate with learning gain (tests)?

### Calculating error difference from collab activities.

In [5]:
collab_best.sort_values(by=['student'], inplace=True)
collab_best

Unnamed: 0,activity,error,student
1,collab-activity,0.318182,1.0
1,collab-activity-2,0.0,1.0
2,collab-activity,0.0,2.0
2,collab-activity-2,0.0,2.0
3,collab-activity,0.0,3.0
3,collab-activity-2,0.057143,3.0
4,collab-activity,0.318182,4.0
4,collab-activity-2,0.0,4.0
5,collab-activity,0.318182,5.0
5,collab-activity-2,0.0,5.0


In [24]:
collab_a = collab_best.loc[collab_best['activity'] == 'collab-activity']
collab_b = collab_best.loc[collab_best['activity'] == 'collab-activity-2']

students = list(collab_a['student'])

error_diff = {}
for i in list(collab_a['student']):
    i = int(i)
    diff = float(collab_b.loc[collab_b['student'] == i]['error']) - \
        float(collab_a.loc[collab_a['student'] == i]['error'])
    error_diff[int(i)] = diff

error_diff

{1: -0.3181818181818182,
 2: 0.0,
 3: 0.05714285714285714,
 4: -0.3181818181818182,
 5: -0.3181818181818182,
 6: -0.2298701298701299,
 7: nan,
 9: -0.045454545454545456,
 10: 0.0}

### Calculating learning gain from pre and post tests.

learning gain (improvement): posttest - pretest/ 1-pretest


In [7]:
learning_df
learning_df = learning_df.set_index(learning_df['student'])

In [9]:
learning_gain = {}

for student, row in learning_df.iterrows():
    gain = math.fabs((row['post_error'] - row['pre_error']) / (1 - row['pre_error']))
    learning_gain[student] = gain

learning_gain



{1: 0.18807339449541288,
 2: 0.6186770428015562,
 3: 0.042356055592322965,
 4: 0.2569659442724459,
 5: 0.13089836660617063,
 6: 0.17755289788408465,
 7: nan,
 9: 11.068376068376086,
 10: 0.003861003861003861}

### Perform Wilcoxon signed-rank test.

In [10]:
r = list(error_diff.values())
o = list(learning_gain.values())
w, p = wilcoxon(r, o, mode="exact")
# null hypotehsis says they are the same, p val less than threshold, reject hyp, conclude that post is larger than pretest
print('Exact:', 'W=', w, 'pvalue=', p)
a, b = wilcoxon(r, o, mode="exact", alternative="greater")
print('Greater:', 'W=', a, 'pvalue=', b)

Exact: W= 2.0 pvalue= 0.01171875
Greater: W= 2.0 pvalue= 0.99609375


### Compute effect size Cliff's Deltas.

In [11]:
r = list(error_diff.values())
o = list(learning_gain.values())
# estimate effect size by Cliff's Delta
d = two_group_difference(control=r, test=o, effect_size='cliffs_delta')
print('d', d)

d 0.9375


## Quantifying improvement over attempts across both collab activities.

In [14]:
#last error - first error
cdf_human

Unnamed: 0,activity,agent,error,student
93,collab-activity,human,0.772727,6
153,collab-activity,human,0.772727,6
181,collab-activity-2,human,0.542857,6
182,collab-activity-2,human,,6
211,collab-activity-2,human,,6
231,collab-activity-2,human,,6
105,collab-activity,human,0.318182,1
155,collab-activity,human,0.454545,1
220,collab-activity-2,human,0.342857,1
245,collab-activity-2,human,0.0,1


### Across both collab activities.

In [20]:
fl_df = cdf_human.copy()

fldf = pd.DataFrame()

# formatting df so that only the first and last submissions remain
students = [1, 2, 3, 4, 5, 6, 7, 9, 10]
for i in students:
    stdf = fl_df.loc[fl_df['student'] == i]
    fl = pd.DataFrame()
    fl = fl.append(stdf.iloc[0])
    fl = fl.append(stdf.iloc[-1])
    fldf = fldf.append(fl, ignore_index=True)


fldf.drop(['agent'], axis=1, inplace=True)
fldf

Unnamed: 0,activity,error,student
0,collab-activity,0.318182,1.0
1,collab-activity-2,0.0,1.0
2,collab-activity,0.0,2.0
3,collab-activity-2,0.0,2.0
4,collab-activity,0.318182,3.0
5,collab-activity-2,0.114286,3.0
6,collab-activity,0.454545,4.0
7,collab-activity-2,0.0,4.0
8,collab-activity,0.318182,5.0
9,collab-activity-2,0.0,5.0


### Within each collab activity.

In [23]:
#gets the first and last row of a dataframe and returns a new dataframe that contains them
def get_first_last(df):
    fl = pd.DataFrame()
    fl = fl.append(df.iloc[0])
    fl = fl.append(df.iloc[-1])
    return fl

collab_all = cdf_human.copy()

collab_1 = collab_all.loc[collab_all['activity'] == 'collab-activity']

collab_1

Unnamed: 0,activity,agent,error,student
93,collab-activity,human,0.772727,6
153,collab-activity,human,0.772727,6
105,collab-activity,human,0.318182,1
155,collab-activity,human,0.454545,1
109,collab-activity,human,0.454545,9
143,collab-activity,human,0.727273,9
167,collab-activity,human,0.045455,9
65,collab-activity,human,,7
93,collab-activity,human,0.0,7
116,collab-activity,human,0.0,2


In [25]:
for i in students:
    fdf = collab_1.loc[fl_df['student'] == i]

[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0]

In [None]:

fldf = pd.DataFrame()

# formatting df so that only the first and last submissions remain
students = [1, 2, 3, 4, 5, 6, 7, 9, 10]
for i in students:
    stdf = fl_df.loc[fl_df['student'] == i]
    fl = pd.DataFrame()
    fl = fl.append(stdf.iloc[0])
    fl = fl.append(stdf.iloc[-1])
    fldf = fldf.append(fl, ignore_index=True)


fldf.drop(['agent'], axis=1, inplace=True)
fldf

- H2.2 The more the student improves his/her suggestions, the better are the learning outcomes.
- H2.3 The more the student improves his/her (dis)agreements, the better are the learning  outcomes.