Perform error analysis across activities.

In [1]:
import math
import copy
import pickle

import pathlib as pl
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from scipy.stats import wilcoxon, shapiro, stats
from effsize.effsize import two_group_difference

### Define paths.

In [2]:
# Inputs.
transitions_pickle_file = pl.Path(
    '../processed_data/justhink21_transitions.pickle')
submissions_pickle_file = pl.Path(
    '../processed_data/justhink21_submissions.pickle')

# Outputs.
collab_best_pickle_file = pl.Path(
    '../processed_data/collab_best_df.pickle')
cdf_human_pickle_file = pl.Path(
    '../processed_data/cdf_human.pickle')

### Load submission tables.

In [3]:
with submissions_pickle_file.open('rb') as handle:
    submissions = pickle.load(handle)

# Example:
# submissions[1].head()
submissions[7]

Unnamed: 0,participant,time,activity,state,next_state,action,attempt_no,cost,opt_cost,error,is_submission,is_mst,is_spanning
23,7,202.903518,pretest-1,"EnvironmentState(NetworkState(e:7+0,c:18|n:7,e...","EnvironmentState(NetworkState(e:7+0,c:18|n:7,e...","Action(submit,Human)",1,18,15,0.2,True,False,True
31,7,257.844031,pretest-2,"EnvironmentState(NetworkState(e:6+0,c:30|n:7,e...","EnvironmentState(NetworkState(e:6+0,c:30|n:7,e...","Action(submit,Human)",1,30,30,0.0,True,True,True
37,7,331.117831,pretest-3,"EnvironmentState(NetworkState(e:3+0,c:12|n:7,e...","EnvironmentState(NetworkState(e:3+0,c:12|n:7,e...","Action(submit,Human)",1,12,21,,True,False,False
41,7,370.827124,pretest-4,"EnvironmentState(NetworkState(e:2+0,c:14|n:7,e...","EnvironmentState(NetworkState(e:2+0,c:14|n:7,e...","Action(submit,Human)",1,14,45,,True,False,False
46,7,406.628718,pretest-5,"EnvironmentState(NetworkState(e:3+0,c:6|n:7,e:...","EnvironmentState(NetworkState(e:3+0,c:6|n:7,e:...","Action(submit,Human)",1,6,15,,True,False,False
65,7,699.655849,collaboration-1,"EnvironmentState(NetworkState(e:7+0,c:16|n:10,...","EnvironmentState(NetworkState(e:0+0,c:0|n:10,e...","Action(submit,Human)",2,16,22,,True,False,False
93,7,1031.683597,collaboration-1,"EnvironmentState(NetworkState(e:9+0,c:22|n:10,...","EnvironmentState(NetworkState(e:9+0,c:22|n:10,...","Action(submit,Human)",3,22,22,0.0,True,True,True
116,7,1333.020694,collaboration-2,"EnvironmentState(NetworkState(e:6+0,c:20|n:10,...","EnvironmentState(NetworkState(e:0+0,c:0|n:10,e...","Action(submit,Human)",2,20,35,,True,False,False
143,7,1739.775298,collaboration-2,"EnvironmentState(NetworkState(e:6+0,c:22|n:10,...","EnvironmentState(NetworkState(e:0+0,c:0|n:10,e...","Action(submit,Human)",3,22,35,,True,False,False
160,7,2019.286325,collaboration-2,"EnvironmentState(NetworkState(e:6+0,c:22|n:10,...","EnvironmentState(NetworkState(e:0+0,c:0|n:10,e...","Action(submit,Human)",4,22,35,,True,False,False


### Load transition tables.

In [4]:
with transitions_pickle_file.open('rb') as handle:
    transitions = pickle.load(handle)


# # Example:
# transitions.keys()
# pd.options.display.max_columns = None
# df = transitions[9]
# df = df.loc[df['activity'] == 'collaboration-2']
# df.head()

### 1. Analyze error over collaborative activities.

### Annotate and filter for human submissions and best submissions.

In [5]:
# Annotate human submissions.
for participant, df in submissions.items():
    df['is_human'] = [action.agent.name == 'Human' for action in df.action]

# Filter for human submissions.
human_submissions = {p: df[df.is_human].copy()
                     for p, df in submissions.items()}

# Annotate best submissions of each activity.
# (tests are trivial as there is only one submission allowed).
for participant, df in human_submissions.items():
    df['is_best'] = False
    for activity in df.activity.unique():
        dff = df[df.activity == activity]
        df.loc[dff.error.idxmin(), 'is_best'] = True

# Filter for best (human) submissions in collaborative activities.
table_list = list()
for participant in sorted(human_submissions):
    df = human_submissions[participant].copy()
    df = df[df.activity.isin(['collaboration-1', 'collaboration-2'])]
    df = df[df.is_best]
    table_list.append(df)
best_df = pd.concat(table_list, ignore_index=True, sort=False)
best_df.participant = best_df.participant.astype(int)
best_df.reset_index(drop=True, inplace=True)
best_df.drop(columns=['is_best'], inplace=True)

best_df.head()

Unnamed: 0,participant,time,activity,state,next_state,action,attempt_no,cost,opt_cost,error,is_submission,is_mst,is_spanning,is_human
0,1,1095.491573,collaboration-1,"EnvironmentState(NetworkState(e:11+0,c:29|n:10...","EnvironmentState(NetworkState(e:0+0,c:0|n:10,e...","Action(submit,Human)",3.0,29.0,22.0,0.318182,True,False,True,True
1,1,2461.46684,collaboration-2,"EnvironmentState(NetworkState(e:9+0,c:35|n:10,...","EnvironmentState(NetworkState(e:9+0,c:35|n:10,...","Action(submit,Human)",4.0,35.0,35.0,0.0,True,True,True,True
2,2,1750.270298,collaboration-1,"EnvironmentState(NetworkState(e:9+1,c:22|n:10,...","EnvironmentState(NetworkState(e:9+1,c:22|n:10,...","Action(submit,Human)",2.0,22.0,22.0,0.0,True,True,True,True
3,2,2569.816459,collaboration-2,"EnvironmentState(NetworkState(e:9+0,c:35|n:10,...","EnvironmentState(NetworkState(e:9+0,c:35|n:10,...","Action(submit,Human)",7.0,35.0,35.0,0.0,True,True,True,True
4,3,1588.141877,collaboration-1,"EnvironmentState(NetworkState(e:9+0,c:22|n:10,...","EnvironmentState(NetworkState(e:9+0,c:22|n:10,...","Action(submit,Human)",4.0,22.0,22.0,0.0,True,True,True,True


# TODO

In [6]:
TODO

NameError: name 'TODO' is not defined

#### Cleaning Tables.

In [None]:
# # Format table for error over best submissions.
# def clean_tables():
#     student_tables = {}
#     for participant, df in transition_tables.items():
#         # list of maximum error values for each activity
#         max_error = []

#         df = df.copy()
#         df = df[df['is_submission']]

#         # removing collaborative activity rows
# #         df = df[df['header.frame_id'] != "collab-activity"]
# #         df = df[df['header.frame_id'] != "collab-activity-2"]
#         # removing duplicate rows from submission log and keeping the last submission
# #         df.drop_duplicates(subset="header.frame_id",
# #                            keep='last', inplace=True)

#         # preserving collaborative activity rows while removing duplicate test rows
#         activities = ['collaboration-1', 'collaboration-2']
#         df = pd.concat([
#             df[df['activity'] == 'collaboration-1'],
#             df[df['activity'] == 'collaboration-2'],
#             df[~df.activity.isin([activities])].drop_duplicates(
#                 ['activity'], keep='last')
#             #             df[(df['activity'] != 'collaboration-1') & (df['activity'] !=
#             #                                                         'collaboration-2')].drop_duplicates(['activity'], keep='last')
#         ])

# #         mst_costs = []
# #         spanning = []
# #         norm_error = []

# #         for i, row in df.iterrows():
# #             cost = row['world_state'].get_mst_cost()
# #             span = row['world_state'].is_spanning()
# #             mst_costs.append(cost)
# #             spanning.append(span)
# #             # compute normalized error
# #             if span:
# #                 norm_error.append((int(row['cost']) - int(row['world_state'].get_mst_cost())) /
# #                                   int(row['world_state'].get_mst_cost()))
# #                 max_error.append((int(row['world_state'].get_max_cost()) - int(row['world_state'].get_mst_cost())) /
# #                                  int(row['world_state'].get_mst_cost()))
# #             else:
# #                 norm_error.append(None)

# #         # adding mst_cost, spanning, and normalized_error columns
# #         df['mst_cost'] = mst_costs
# #         df['spanning'] = spanning
# #         df['normalized_error'] = norm_error

#         student_tables[participant] = df

#         print('Participant {}'.format(participant))
#     return student_tables, max_error


# all_tables, max_error = clean_tables()
# print(max_error)

In [None]:
# all_tables[1]

### Formatting collab submissions dataframe.

In [None]:
# # formatting dataframe for collab activities only
# cdf = pd.DataFrame()

# #dictionary containing all collaborative attempts by each student
# collab_attempts = {}

# for key, mdf in all_tables.items():
# #     collab_df = mdf.loc[mdf['header.frame_id'].isin(
# #         ['collab-activity', 'collab-activity-2'])].copy()
# #     collab_df['student'] = key
# #     collab_df.drop(columns=['world_state', 'is_submission', 'is_mst', 'cost', 'mst_cost', 'spanning', 'action'
# #                             ], inplace=True)
# #     collab_df.rename(columns={"header.frame_id": "activity",
# #                      "action.agent_name": "agent", "normalized_error": "error"}, inplace=True)
# #     cdf = cdf.append(collab_df)
# #     collab_attempts[key] = collab_df
    
# # dataframe containing all collaborative activity submissions 
# cdf

In [None]:
# submissions[7]

In [None]:
# # dataframe containing only human collaborative activity submissions
# cdf_human = pd.DataFrame()

# # removing robot submissions
# for index, row in cdf.iterrows():
#     cdf_human = cdf.loc[cdf['agent'].isin(['human'])].copy()

# display(cdf_human)

# # reformatting dataframes with only human submissions so they are activity specific

# collab_df_1 = cdf_human.loc[cdf_human['activity'].isin(
#     ['collaboration-1'])].copy()

# collab_df_2 = cdf_human.loc[cdf_human['activity'].isin(
#     ['collaboration-2'])].copy()

# display(collab_df_1)
# display(collab_df_2)

In [None]:
# with cdf_human_pickle_file.open('wb') as handle:
#     pickle.dump(cdf_human, handle, protocol=pickle.HIGHEST_PROTOCOL)

# print('Saved cdf_human dataframe to {}'.format(cdf_human_pickle_file))

### Filtering best submissions.

In [None]:
# # making a new dataframe that only contains the best submissions in each activity per student

# # takes a dataframe containing rows of submissions and returns a dataframe with the best submissions
# def bestDf(df):

#     best_df = pd.DataFrame()
#     bdf = df.copy()
#     current = bdf.iloc[[0][0]]['student']
#     temp_dict = {}
#     temp_df = pd.DataFrame()

#     for index, row in bdf.iterrows():
#         #print('current student', current)
#         if current == row['student']:
#             # compare the errors of every student so that we select the best in each activity
#             # add them to a new df
#             temp_df = temp_df.append(row)
#             # sort based on error value
#             temp_df.sort_values(
#                 by=['error'], inplace=True, ascending=True, na_position='last', ignore_index=True)

#             # gets the dataframe row with the lowest error and adds it to a temporary dictionary
#             # update dictionary with key as student, value as the temp df where the first row is the best
#             temp_dict[current] = temp_df.iloc[0]
#             # display(temp_df)
#         else:
#             current = row['student']
#             temp_df = pd.DataFrame()

#             # figure out how to compare the errors of every student so that we select the best in each activity
#             # add them to a new df
#             temp_df = temp_df.append(row)
#             # sort based on error value
#             temp_df.sort_values(
#                 by=['error'], inplace=True, ascending=True, na_position='last', ignore_index=True)

#             # gets the dataframe row with the lowest error and adds it to a temporary dictionary
#             # update dictionary with key as student, value as the temp df where the first row is the best
#             temp_dict[current] = temp_df.iloc[0]
#             # display(temp_df)

#     # display(temp_dict)

#     # now need to loop through the dictionary and for every item, the first row in the value is the best submission
#     for student, temp in temp_dict.items():
#         # appending best submission to best_df
#         # display(temp)
#         temp.name = student
#         best_df = best_df.append(temp)
#     return best_df


# bestDf(collab_df_1)

### Creating best dataframes for visualization.

In [None]:
# collab_dfs = [collab_df_1, collab_df_2]

# collab_best = pd.DataFrame()

# # creating dataframes with the best submission of each collaborative activity for every student
# for df in collab_dfs:
#     best = bestDf(df)
#     display(best)
#     collab_best = collab_best.append(best)

# collab_best.drop(columns=['agent'], inplace=True)
# collab_best

In [None]:
# with collab_best_pickle_file.open('wb') as handle:
#     pickle.dump(collab_best, handle, protocol=pickle.HIGHEST_PROTOCOL)

# print('Saved collab_best dataframe to {}'.format(collab_best_pickle_file))

### Visualization of error over collaborative activity by best submission.

In [None]:
def plot_collaborations(
        learning_df, getter,
        participants=None,
        fig=None, ax=None, offset=0.01,
        zero_offset=0,
        ylabel='', title='',
        filename=None,
        ylim=(-0.05, 1.05),
        ygrid=False, save=True,
        yticks=False, verbose=False):
    """Graph the pre-test versus post-test scores."""
    if participants is None:
        participants = sorted(learning_df.index)
    if ax is None:
        fig, ax = plt.subplots(1, figsize=(5, 7))

    # Positions.
    prelist = list()
    postlist = list()
    positions = dict()
    for participant in participants:
        row = learning_df.loc[participant]
        values = getter(row)

        # Compute offset to prevent overlapping lines.
        p0 = values[0] - prelist.count(round(values[0], 2)) * offset
        p1 = values[1] - postlist.count(round(values[1], 2)) * offset
        if zero_offset is not None:
            if values[0] == 0:
                p0 = p0 + zero_offset
            if values[1] == 0:
                p1 = p1 + zero_offset

        prelist.append(values[0])
        postlist.append(values[1])
        if verbose:
            print(participant, values, p0, p1)
        positions[participant] = (p0, p1)

    # Plot.
    for participant in sorted(participants):
        position = positions[participant]
        # Plot a line segment for the participant.
        sns.lineplot(
            x=['pre-test', 'post-test'], y=position,
            marker='o', color=colors[participant],
            label=str(participant),
            legend=False, ax=ax)

    ax.set_ylabel(ylabel)
    ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
    if ylim:
        ax.set_ylim(ylim[0], ylim[1])

    # Turn on/off grid on the left Axis.
    ax.grid(ygrid)

    ax.set_title(title)

    leg = plt.legend(sorted(participants), loc='upper left', frameon=True)
    # Get the bounding box of the original legend.
    bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes)
    # Change to location of the legend.
    xOffset = 1.05
    bb.x0 += xOffset
    bb.x1 += xOffset
    leg.set_bbox_to_anchor(bb, transform=ax.transAxes)

    if filename is not None and save:
        export_file = figs_dir.joinpath(filename)
        plt.savefig(export_file, bbox_inches='tight')
        print(export_file)


def plot_feasible(learning_df, ax=None, save=True):
    # Order participants to have aligned offsets of repeating values.
    participants = [7, 1, 3, 8, 9, 2, 4, 5, 6]

    # Define function to retrieve and plot the feasible scores.
    def get_values_from_row(row):
        return (row.pretest_feasible, row.posttest_feasible)

    # Plot the changes from pre-test to post-test.
    plot_pretest_posttest(
        learning_df,
        participants=participants,
        ylabel='%  feasible solutions',
        title='% Valid in Tests',
        filename='learning_outcomes_is_feasible.pdf',
        getter=get_values_from_row,
        ax=ax, save=save)


plot_feasible(learning_df)

In [None]:
# visualization for error evolution over the best submission in each collaborative activity
participants = [1, 2, 3, 4, 5, 6, 7, 8, 9]
separation = 0.01

# graphing the pretest versus posttest is_spanning scores
fig, ax = plt.subplots(1, figsize=(5, 5))

prelist = list()
postlist = list()
for i in participants:
    temp = collab_best[collab_best['participant'] == i]
    values = list(temp.error)
    #computing offset between overlapping lines
    p0 = values[0] + prelist.count(values[0])*separation
    p1 = values[1] + prelist.count(values[1])*separation
    prelist.append(values[0])
    postlist.append(values[1])
    #plotting
    plt.plot(temp.activity, [p0,p1], marker='o', markersize=5)
plt.ylabel('Score')
plt.title('Collab Activity Success\n', loc='center', fontsize=20)

leg = plt.legend(students, loc='upper left', frameon=True)
# get the bounding box of the original legend
bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes)
# change to location of the legend
xOffset = 1.1
bb.x0 += xOffset
bb.x1 += xOffset
leg.set_bbox_to_anchor(bb, transform=ax.transAxes)

# set y axis ticks to percentages
yticks = plt.yticks()[0]
plt.yticks(yticks[1:-1], [str(round(i*100)) + '%' for i in yticks[1:-1]])
plt.show()


In [None]:
# # visualization for error evolution over the best submission in each collaborative activity
# students = [1, 2, 3, 4, 5, 6, 7, 9, 10]
# separation = 0.01

# # graphing the pretest versus posttest is_spanning scores
# fig, ax = plt.subplots(1, figsize=(5, 5))

# prelist = list()
# postlist = list()
# for i in students:
#     temp = collab_best[collab_best['student'] == i]
#     values = list(temp.error)
#     #computing offset between overlapping lines
#     p0 = values[0] + prelist.count(values[0])*separation
#     p1 = values[1] + prelist.count(values[1])*separation
#     prelist.append(values[0])
#     postlist.append(values[1])
#     #plotting
#     plt.plot(temp.activity, [p0,p1], marker='o', markersize=5)
# plt.ylabel('Score')
# plt.title('Collab Activity Success\n', loc='center', fontsize=20)

# leg = plt.legend(students, loc='upper left', frameon=True)
# # get the bounding box of the original legend
# bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes)
# # change to location of the legend
# xOffset = 1.1
# bb.x0 += xOffset
# bb.x1 += xOffset
# leg.set_bbox_to_anchor(bb, transform=ax.transAxes)

# # set y axis ticks to percentages
# yticks = plt.yticks()[0]
# plt.yticks(yticks[1:-1], [str(round(i*100)) + '%' for i in yticks[1:-1]])
# plt.show()


## Compute temporal dynamics accross collaborative activities.

- Compare errors of best attempt in activity 1 vs. best attempt in activity 2 by Wilcoxon’s, and compute effect size Cliff’s Delta) (extension of notebook 3)
    - evolution overall significant if Wilcoxon’s says p < 0.05. 
    - Direction (improve or not) by Cliff’s Delta’s sign.
- Note: must be spanning, for collaborative activities
- report Magnitude overall: Cliff’s Delta’s absolute value (large, small, negligible, etc.)


### Reformatting best error dataframe. 

In [None]:
def reformat_collab_df(df):

    c_data = {}
    c2_data = {}

    for index, row in df.iterrows():
        if row['activity'] == 'collab-activity':
            if int(row['student']) not in c_data.keys():
                c_data[int(row['student'])] = row['error']
        if row['activity'] == 'collab-activity-2':
            if int(row['student']) not in c2_data.keys():
                c2_data[int(row['student'])] = row['error']

    df1 = pd.DataFrame.from_dict(
        data=c_data, orient='index', columns=['collab_error_1'])
    df2 = pd.DataFrame.from_dict(
        data=c2_data, orient='index', columns=['collab_error_2'])

    final_df = pd.merge(df1, df2, left_index=True, right_index=True)
    final_df['student'] = final_df.index
    return final_df


df = reformat_collab_df(collab_best)

### Perform Wilcoxon signed-rank test.

In [None]:
r = list(df['collab_error_1'])
o = list(df['collab_error_2'])
w, p = wilcoxon(r, o, mode="exact")
# null hypotehsis says they are the same, p val less than threshold, reject hyp, conclude that post is larger than pretest
print('Exact:', 'W=', w, 'pvalue=', p)
a, b = wilcoxon(r, o, mode="exact", alternative="greater")
print('Greater:', 'W=', a, 'pvalue=', b)

### Compute effect size Cliff's Deltas.

In [None]:
r = list(df['collab_error_1'])
o = list(df['collab_error_2'])
# estimate effect size by Cliff's Delta 
d = two_group_difference(control=r, test=o, effect_size='cliffs_delta')
print('d', d)


# 2 Error over collab activity by attempt.

In [None]:
custom_xticks = {}
for student, df in collab_attempts.items():
    # add attempt number column for graph
    df.reset_index(drop=True, inplace=True)
    df['attempt'] = df.index + 1
    count = 1
    labels = []
    for index, row in df.iterrows():
        if row['activity'] == 'collab-activity-2':
            # attempt = count
            # df.at[index,'attempt'] = attempt
            labels.append(count)
            count += 1
        else:
            labels.append(row['attempt'])
    custom_xticks[student] = labels
    display(df)

In [None]:
for student, df in collab_attempts.items():
    plt.figure()
    markers = {'human': 'o', 'robot': 'X'}
    sns.scatterplot(
        data=df, x='attempt', y='error',
        hue='activity', style='agent', markers=markers)
    plt.ylim(-0.1, 1)

    yticks = [-0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1]
    plt.yticks(yticks[1:-1], [str(round(i*100)) + '%' for i in yticks[1:-1]])

    plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
    title = 'Student ' + str(int(df['student'][0])) + ' Collab Attempts\n'
    plt.title(title, loc='center', fontsize=20)

    # vertical line to divide plot by collaborative activity
    x = df.index[df['activity'] == 'collab-activity-2'].tolist()
    plt.axvline(x[0], 0, 1, ls='--', c='grey')

    # custom xticks for student attempts during each collab activity
    t = custom_xticks[student]
    if t[0] != 0:
        t.insert(0, 0)

    plt.xticks(np.arange(0, 10, 1.0), labels=t)

## Computing slope and p-value of linear regressions analysis.
Compare error progression among attempts in a collaborative activity by
computing the slope and p-value of a simple linear regression analysis 

### Reformatting dataframe.

In [None]:
collab_attempts

In [None]:
linregress_df = pd.DataFrame()

stud = []
collab_p = []
collab_s = []
collab2_p = []
collab2_s = []

for student, df in collab_attempts.items():
    if student != 7:
        df.dropna(inplace=True)
#         df.fillna(0, inplace=True) #should the na's be replaced with the maximum error or not counted at all
        c1 = list(df.loc[df['activity'] == 'collab-activity']['error'])
        ac1 = list(df.loc[df['activity'] == 'collab-activity']['attempt'])
        c2 = list(df.loc[df['activity'] == 'collab-activity-2']['error'])
        ac2 = list(df.loc[df['activity'] == 'collab-activity-2']['attempt'])
        error = list(df['error'])
        attempt = list(df['attempt'])
        print(c1, ac1, c2, ac2)

        slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(
            ac1, c1)
        slope2, intercept2, r_value2, p_value2, std_err2 = stats.linregress(
            ac2, c2)
        slope3, intercept3, r_value3, p_value3, std_err3 = stats.linregress(
            attempt, error)
        print('Stud.', student, '| len c1:', len(c1), 'len c2:', len(c2))
        print('\tcollab-activity:', 'p_value1:', p_value1, 'slope1:', slope1)
        print('\tcollab-activity-2:', 'p_value2:', p_value2, 'slope2:', slope2)
        print('\tcollab-activity-3:', 'p_value3:', p_value3, 'slope3:', slope3)

        stud.append(student)
        collab_p.append(p_value1)
        collab_s.append(slope1)
        collab2_p.append(p_value2)
        collab2_s.append(slope2)

In [None]:
linregress_df = pd.DataFrame()

linregress_df['student'] = stud
linregress_df['collab_pval'] = collab_p 
linregress_df['collab_slope'] = collab_s
linregress_df['collab2_pval'] = collab2_p
linregress_df['collab2_slope'] = collab2_s

linregress_df

In [None]:
significant = 0
for index,row in linregress_df.iterrows():
    if row['collab_pval'] < 0.05:
        significant +=1
percentage = (significant/len(linregress_df))*100
percentage

### Computing Spearman correlation.

In [None]:
for student,df in collab_attempts.items():
    if student != 7:
        df.dropna(inplace=True)
#         df.fillna(0, inplace=True) #should the na's be replaced with the maximum error or not counted at all
        c1 = list(df.loc[df['activity'] == 'collab-activity']['error'])
        ac1 = list(df.loc[df['activity'] == 'collab-activity']['attempt'])
        c2 = list(df.loc[df['activity'] == 'collab-activity-2']['error'])
        ac2 = list(df.loc[df['activity'] == 'collab-activity-2']['attempt'])
        error = list(df['error'])
        attempt = list(df['attempt'])
#         print(c1,ac1,c2,ac2)

        rho1, pval1 = stats.spearmanr(c1, ac1, nan_policy='omit')
        rho2, pval2 = stats.spearmanr(c2, ac2, nan_policy='omit')
        print('Stud.',student, '| len c1:', len(c1), 'len c2:', len(c2))
        print('\tcollab-activity:','rho1:', rho1,'pval1:',pval1)
        print('\tcollab-activity-2:','rho2:', rho2,'pval2:',pval2)


# 3 Error over entire experiment evolution.

In [None]:
collab_best_copy = collab_best.copy()
# make dataframe for entire experiment for each student, using the best submissions of the collaborative activities
evolution_dfs = {}

for student,df in all_tables.items():
    print('student',student)
    # removing collaborative activity rows
    df = df[df['header.frame_id'] != "collab-activity"]
    df = df[df['header.frame_id'] != "collab-activity-2"]
    
    df.rename(columns={"header.frame_id": "activity",
                     "action.agent_name": "agent", "normalized_error": "error"}, inplace=True)
    
    # searching collab_best for collaborative activities of the student
    for index, row in collab_best_copy.iterrows():
        #if the student number of the best row matches the student
        if int(row['student']) == int(student):
            #then add the row of that student to the df
            df = df.append(row)
    
    # removing excess columns
    df.drop(columns=['world_state', 'is_submission', 'is_mst', 'cost', 'mst_cost', 'spanning', 'action'
                            ], inplace=True)
    
    # setting student column values
    df[['student']] = int(df.iloc[[-1][0]]['student'])
    
    #reset index
    df.reset_index(drop=True, inplace=True)
    
    #reorganizing dataframe so that collab activities are between tests
    posttests = df.loc[df['activity'].isin(['posttest-1','posttest-2','posttest-3','posttest-4','posttest-5'])]
    df = df.drop([5,6,7,8,9])
    df = df.append(posttests)
    df.reset_index(drop=True, inplace=True)
    
    #adding to dictionary
    evolution_dfs[student] = df
    display(df)
evolution_dfs


# Visualizing student complete evolution.

In [None]:
test = evolution_dfs[9]

# helper function to graph each student dataframe on same plot, takes dataframe as input
def generate_visualization(df):
    temp = df

    # visualization for error evolution over the best submission in each collaborative activity
    students = [1, 2, 3, 4, 5, 6, 7, 9, 10]
    separation = 0.01

    # graphing the pretest versus posttest is_spanning scores
    plt.plot(temp.activity, temp.error, marker='o', markersize=5)
    fig = plt.gcf()
    fig.set_size_inches(15, 5)
    
    plt.ylabel('Error')
    title = "Evolution of All Students\n"
    plt.title(title, loc='center', fontsize=20)

    leg = plt.legend(evolution_dfs.keys(), loc='upper right', frameon=True)

    # ensure that y-axis labelling is proportional to maximum possible error 
    plt.ylim(-0.1, max_error[1])
    locs,labels = plt.yticks()
    labels = [str(round(i*100)) + '%' for i in locs]
    labels[0] = ''
    plt.yticks(locs,labels)

generate_visualization(test)

In [None]:
for student, df in evolution_dfs.items():
    generate_visualization(df)


# Find task intervals for all logs from their tables.
- slicers: put markers or checkpoints that point to the start and end of an activity
    - intervals dict for each student with start and end timestamps tuples
    - confirm the checkpoints: delete overlap and check contiguity and continuity
    
- One way to have a simpler check is to count number of transitions into that activity and move on if it is one, handle the case if it is more than one (for the case of going into and out from that activity).


In [None]:
# return the task interval times and handle cleaning from discontinuous log errors
def find_task_intervals(df, task):
    df = df.copy()
    df = df.loc[df['header.frame_id'] == task]
    start = df.iloc[0]['Time']
    end = df.iloc[-1]['Time']
    return start, end

headers = ['pretest-1', 'pretest-2', 'pretest-3', 'pretest-4', 'pretest-5', 'collab-activity',
           'collab-activity-2', 'posttest-1', 'posttest-2', 'posttest-3', 'posttest-4', 'posttest-5']

# log the start and end times of every task for every student
student_intervals = {}
for student, table in transition_tables.items():
    intervals = {}
    for i in headers:
        start, end = find_task_intervals(table.copy(), i)
        intervals[i] = (start, end)
    student_intervals[student] = intervals
display(student_intervals)


In [None]:
def gantt_chart(df):
    
    # Declaring a figure "gnt" 
    fig, gnt = plt.subplots() 

    # Setting Y-axis limits 
    gnt.set_ylim(0, 100) 

    # Setting X-axis limits 
    gnt.set_xlim(0, 4000) 

    # Setting labels for x-axis and y-axis 
    gnt.set_xlabel('seconds since start') 
    gnt.set_ylabel('Activity') 

    # Setting ticks on y-axis 
    gnt.set_yticks([15, 25, 35, 45, 55, 65, 75, 85, 95,105,115,125]) 
    # Labelling tickes of y-axis 
    gnt.set_yticklabels(['pretest-1', 'pretest-2', 'pretest-3', 'pretest-4', 'pretest-5', 'collab-activity', 'collab-activity-2', 'posttest-1', 'posttest-2', 'posttest-3', 'posttest-4', 'posttest-5']) 

    # Setting graph attribute 
    gnt.grid(True) 

    # Declaring all the bars for each activity 
    for i in range(len(df)):
        gnt.broken_barh([(df.loc[i,'start'], df.loc[i,'duration'])], ((i+1)*10, 9), facecolors =('tab:red')) 

In [None]:
time_df = pd.DataFrame()

for student,values in student_intervals.items():
    #time_df = pd.DataFrame.from_dict(values)
    time_df['activity'] = values.keys()
    times = values.values()
    start = []
    end = []
    duration = []
    for i in times:
        s,e = i
        start.append(s)
        end.append(e) 
        duration.append(e-s)
    time_df['start'] = start
#     time_df['end'] = end
    time_df['duration'] = duration
#     display(time_df)
    
    gantt_chart(time_df)

In [None]:
#compute averages on start end pairs and slices averages

# Evolution across human actions in a collaborative activity.

S = optimal suggestions by the human, 
A = optimal agreements by the human, 
D = optimal disagreements by the human


In [None]:
action_type_dict = {
    'TYPE_SUGGEST_PICK': 0,
    'TYPE_PICK': 1,
    'TYPE_UNPICK': 2,
    'TYPE_SUBMIT': 3,
    'TYPE_SUGGEST_SUBMIT': 4,
    'TYPE_CLEAR_SUGGEST_SUBMIT': 5,
    'TYPE_AGREE': 6,
    'TYPE_DISAGREE': 7,
    'TYPE_CLEAR': 8,
    'TYPE_GUESS': 9}

In [None]:
pd.options.display.max_rows = None
a = transition_tables[6]

a = a.loc[a['action.agent_name'] == 'human']
a = a.loc[a['action.type'] == 7]
a.iloc[0]['Time']

### Assembling action indices and time intervals.

In [None]:
lens = {}
start = {}
lens_time = {}
start_time = {}

# logging length and starting indices and times
for student, df in transition_tables.items():
    collabs = df.loc[df['header.frame_id'].isin(
        ['collab-activity', 'collab-activity-2'])].copy()
    lens[student] = len(collabs)
    print(collabs.iloc[0]['Time'], collabs.iloc[-1]['Time'])
    start[student] = collabs.index[0]
    start_time[student] = collabs.iloc[0]['Time']
    lens_time[student] = float(collabs.iloc[-1]['Time']) - \
        float(collabs.iloc[0]['Time'])
lens_time

### Checking if an action is optimal.

In [None]:
def is_opt(df,action,student,progress_by):
    is_opt = []
    #slicing only the human suggestions
    df = df.loc[df['action.agent_name'] == 'human']
    df = df.loc[df['action.type'] == action_type_dict[action]]
    
    for index,row in df.iterrows():
        #getting network state
        #n = df.iloc[0]['world_state']
        n = row['world_state']
        # print(n.edges)

        #getting mst graph of the problem
        g = n.get_mst()
        # help(g)

        #for collab activities if there are two mst solutions
        #add the edge to the mst and then check if the edge is on the superimposed network
        g_2 = g.copy()
        g_2.add_edge(2, 8, cost=3)
        g_2.add_edge(9, 4, cost=3)
        g_2.remove_edge(1, 9)

#         print(g_2.number_of_edges())
        #b = df.iloc[0]['action']
        if action_type_dict[action] == 0:
            b = row['action']
            u,v = b.edge
            is_opt.append(g.has_edge(u,v)) #then it is optimal
        if action_type_dict[action] == 6 or action_type_dict[action] == 7:
            u = row['state.suggested.u']
            v = row['state.suggested.v']
            is_opt.append(g.has_edge(u,v)) #then it is optimal
    #is optimal true/false column
    
    if progress_by == 'action':
        shift = start[student]
        shift_elements = list(df.index)
        name = 'new_index'
    if progress_by == 'time':
        shift = start_time[student]
        shift_elements = list(df['Time'])
        name = 'new_time'
        
    if action_type_dict[action] == 0:
        df['opt_suggestion'] = is_opt
        
        #shifting index so that index 0 is where the collaborative activities begin
        df[name] = [x - shift for x in shift_elements]
        if progress_by == 'action':
            df = df.set_index('new_index')
        return(is_opt,df)
    if action_type_dict[action] == 6:   
        df['opt_agree'] = is_opt
        df[name] = [x - shift for x in shift_elements]
        if progress_by == 'action':
            df = df.set_index('new_index')
        return(is_opt,df)
    if action_type_dict[action] == 7:   
        df['opt_disagree'] = is_opt
        df[name] = [x - shift for x in shift_elements]
        if progress_by == 'action':
            df = df.set_index('new_index')
        return(is_opt,df)

is_opt(transition_tables[6],'TYPE_SUGGEST_PICK',6, 'time')
is_opt(transition_tables[6],'TYPE_AGREE',6,'time')
is_opt(transition_tables[6],'TYPE_DISAGREE',6, 'action')

## 1 Evolution over actions.

### Computing quartiles and medians by action.

In [None]:
sad_dict = {}

for student,df in transition_tables.items():
    t = {}
    S,sdf = is_opt(df,'TYPE_SUGGEST_PICK',student,'action')
    A,adf = is_opt(df,'TYPE_AGREE',student,'action')
    D,ddf = is_opt(df,'TYPE_DISAGREE',student,'action')

    #loop through list to normalize by the num of the collab activities by action
    t['S'] = [x / lens[student] for x in list(sdf.index)] 
    t['A'] = [x / lens[student] for x in list(adf.index)] 
    t['D'] = [x / lens[student] for x in list(ddf.index)] 
    sad_dict[student] = t
    
#     display(sdf)
sad_dict


### Calculating descriptive statistics for action optimality.

- all optimal/all actions
- s/all actions
- a/all actions
- d/all actions


In [None]:
for student,sad in sad_dict.items():
    p_all = round(((len(sad['S'])+len(sad['A'])+len(sad['D'])) / lens[student])*100,2)
    p_s = round((len(sad['S']) / lens[student])*100,2)
    p_a = round((len(sad['A']) / lens[student])*100,2)
    p_d = round((len(sad['D']) / lens[student])*100,2)
    print('Student',student,'S:',p_s,'A:',p_a,'D:',p_d,'\t|', str(p_all)+'% optimal actions')
    

### Computing medians through progress by action.

In [None]:
average_medians = []
average_s = []
average_a = []
average_d = []
for student,sad in sad_dict.items():
    actions = ['S','A','D']
    medians = []
    for action,a in sad.items():
        print("Action",action)
        print("Q1 quantile of Student",student,"suggestions: ", np.quantile(sad[action], .25, interpolation='midpoint'))
        print("Q2 quantile of Student",student,"suggestions: ", np.quantile(sad[action], .50, interpolation='midpoint'))
        print("Q3 quantile of Student",student,"suggestions: ", np.quantile(sad[action], .75, interpolation='midpoint'))
        print("Q4 quantile of Student",student,"suggestions: ", np.quantile(sad[action], 1.0, interpolation='midpoint'))
        print("Median:",np.median(sad[action]),'\n')
        medians.append(np.median(sad[action]))
        
    average_s.append(np.median(sad['S']))
    average_a.append(np.median(sad['A']))
    average_d.append(np.median(sad['D']))

    avg_median = sum(medians) / len(medians)
    print('Average median:',avg_median,'\n\n')
    average_medians.append(avg_median)


total_avg = sum(average_medians) / len(average_medians)
total_s = sum(average_s) / len(average_s)
total_a = sum(average_a) / len(average_a)
total_d = sum(average_d) / len(average_d)

print('Average S medians:',total_s)
print('Average A medians:',total_a)
print('Average D medians:',total_d)
print('Average of all medians:', total_avg)


In [None]:
for student,sad in sad_dict.items():
    fig, ax = plt.subplots()
    title = 'Student '+str(student) + ' Progress by Action'
    ax.set_title(title)
    ax.boxplot([sad['S'],sad['A'],sad['D']])
    ax.set_xticklabels(['Suggestion','Agree','Disagree'])
    ax.set_ylabel('Normalized Action')

## 2 Evolution over time.

### Computing quartiles and medians by time.

In [None]:
sad_dict_time = {}

for student,df in transition_tables.items():
    t = {}
    S,sdf = is_opt(df,'TYPE_SUGGEST_PICK',student,'time')
    A,adf = is_opt(df,'TYPE_AGREE',student,'time')
    D,ddf = is_opt(df,'TYPE_DISAGREE',student,'time')
#     print(S,'\n',A,'\n',D)
    
    #loop through list to normalize by the num of the collab activities by time
    t['S'] = [x / lens_time[student] for x in list(sdf['new_time'])] 
    t['A'] = [x / lens_time[student] for x in list(adf['new_time'])] 
    t['D'] = [x / lens_time[student] for x in list(ddf['new_time'])] 
    sad_dict_time[student] = t
    
sad_dict_time


### Computing medians through progress by action.

In [None]:
average_medians_time = []
average_st = []
average_at = []
average_dt = []

for student,sad in sad_dict_time.items():
    actions = ['S','A','D']
    medians = []
    for action,a in sad.items():
        print("Action",action)
        print("Q1 quantile of Student",student,"suggestions: ", np.quantile(sad[action], .25, interpolation='midpoint'))
        print("Q2 quantile of Student",student,"suggestions: ", np.quantile(sad[action], .50, interpolation='midpoint'))
        print("Q3 quantile of Student",student,"suggestions: ", np.quantile(sad[action], .75, interpolation='midpoint'))
        print("Q4 quantile of Student",student,"suggestions: ", np.quantile(sad[action], 1.0, interpolation='midpoint'))
        print("Median:",np.median(sad[action]),'\n')
        medians.append(np.median(sad[action]))
        
    average_st.append(np.median(sad['S']))
    average_at.append(np.median(sad['A']))
    average_dt.append(np.median(sad['D']))

    avg_median = sum(medians) / len(medians)
    print('Average median:',avg_median,'\n\n')
    average_medians_time.append(avg_median)


total_avg_time = sum(average_medians_time) / len(average_medians_time)
total_st = sum(average_st) / len(average_st)
total_at = sum(average_at) / len(average_at)
total_dt = sum(average_dt) / len(average_dt)

print('Average S medians:',total_st)
print('Average A medians:',total_at)
print('Average D medians:',total_dt)
print('Average of all medians:', total_avg_time)


### Plotting evolution over absolute time.

In [None]:

# plot the points in teh above pltos
# boxplots over time

# plot_establishment_absolute

### Plotting evolution over normalized time.

In [None]:
# plot_establishment_normalised

# Comparing first and last collab activity submissions.

In [None]:
fl_df = cdf_human.copy()

fldf = pd.DataFrame()

# formatting df so that only the first and last submissions remain
students = [1, 2, 3, 4, 5, 6, 7, 9, 10]
for i in students:
    stdf = fl_df.loc[fl_df['student'] == i]
    fl = pd.DataFrame()
    fl = fl.append(stdf.iloc[0])
    fl = fl.append(stdf.iloc[-1])
    fldf = fldf.append(fl, ignore_index=True)


fldf.drop(['agent'], axis=1, inplace=True)
fldf

### Visualizing first to last submission progress over time.

In [None]:
# visualization for error evolution over the best submission in each collaborative activity
students = [1, 2, 3, 4, 5, 6, 7, 9, 10]
separation = 0.01

# graphing the pretest versus posttest is_spanning scores
fig, ax = plt.subplots(1, figsize=(5, 5))

prelist = list()
postlist = list()
for i in students:
    temp = fldf[fldf['student'] == i]
    values = list(temp.error)
    #computing offset between overlapping lines
    p0 = values[0] + prelist.count(values[0])*separation
    p1 = values[1] + prelist.count(values[1])*separation
    prelist.append(values[0])
    postlist.append(values[1])
    #plotting
    plt.plot(temp.activity, [p0,p1], marker='o', markersize=5)
plt.ylabel('Score')
plt.title('First to Last Collab Submission Progress\n', loc='center', fontsize=20)

leg = plt.legend(students, loc='upper left', frameon=True)
# get the bounding box of the original legend
bb = leg.get_bbox_to_anchor().inverse_transformed(ax.transAxes)
# change to location of the legend
xOffset = 1.1
bb.x0 += xOffset
bb.x1 += xOffset
leg.set_bbox_to_anchor(bb, transform=ax.transAxes)

# set y axis ticks to percentages
yticks = plt.yticks()[0]
plt.yticks(yticks[1:-1], [str(round(i*100)) + '%' for i in yticks[1:-1]])
plt.show()


### Perform Wilcoxon signed-rank test

In [None]:
a = fldf.loc[fldf['activity'] == 'collab-activity']
collab_a = list(a['error'])

b = fldf.loc[fldf['activity'] == 'collab-activity-2']
collab_b = list(b['error'])


In [None]:
r = collab_a
o = collab_b 
w, p = wilcoxon(r, o, mode="exact")
# null hypotehsis says they are the same, p val less than threshold, reject hyp, conclude that post is larger than pretest
print('Exact:', 'W=', w, 'pvalue=', p)
a, b = wilcoxon(r, o, mode="exact", alternative="greater")
print('Greater:', 'W=', a, 'pvalue=', b)

### Compute effect size Cliff's Deltas.

In [None]:
r = collab_a
o = collab_b
# estimate effect size by Cliff's Delta 
d = two_group_difference(control=r, test=o, effect_size='cliffs_delta')
print('d', d)
