In [None]:
def levenshtein(source, target):
    if len(source) < len(target):
        return levenshtein(target, source)

    # So now we have len(source) >= len(target).
    if len(target) == 0:
        return len(source)

    # We call tuple() to force strings to be used as sequences
    # ('c', 'a', 't', 's') - numpy uses them as values by default.
    source = np.array(tuple(source))
    target = np.array(tuple(target))

    # We use a dynamic programming algorithm, but with the
    # added optimization that we only need the last two rows
    # of the matrix.
    previous_row = np.arange(target.size + 1)
    for s in source:
        # Insertion (target grows longer than source):
        current_row = previous_row + 1

        # Substitution or matching:
        # Target and source items are aligned, and either
        # are different (cost of 1), or are the same (cost of 0).
        current_row[1:] = np.minimum(
                current_row[1:],
                np.add(previous_row[:-1], target != s))

        # Deletion (target grows shorter than source):
        current_row[1:] = np.minimum(
                current_row[1:],
                current_row[0:-1] + 1)

        previous_row = current_row

    return previous_row[-1]

In [None]:
averages = []
for group in [GROUP_1, GROUP_2, GROUP_3]:
    total_edit_dist = 0
    edit_vals = []
    for pair in group:
        e_d = levenshtein(pair.subjA.strategy_list, pair.subjB.strategy_list)
        edit_vals.append(e_d)
        total_edit_dist += e_d
    averages.append(edit_vals)

    
val_1 = averages[0]
val_2 = averages[1]
val_3 = averages[2]


print(stats.f_oneway(val_1, val_2, val_3))

plt.boxplot(averages)
plt.ylabel("Edit Distance")
plt.xlabel("Group")
plt.show()

In [None]:
#finds how many time each strategy is used per group
def analysis_by_group(g, group_type):
    group_dict = {}
    average_dictionary = {}
    num_strats_per_pair = []
    
    for pair in g:
        dict_A = pair.subjA.strategy_dictionary
        dict_B = pair.subjB.strategy_dictionary
        
        for k in sorted(dict_A.keys()):
            if k in group_dict:
                group_dict[k] += dict_A[k] + dict_B[k]
            else: 
                group_dict[k] = dict_A[k] + dict_B[k]
            
            num_strats_per_pair.append(len([k for k in dict_A.keys() if dict_A[k]])-1  +  len([k for k in dict_B.keys() if dict_B[k]])-1)
        
    for k in dict_A.keys():
        average_dictionary[k] = float(group_dict[k])/len(g)

    strategies = []
    values = []
    
    for w in sorted(average_dictionary, key=average_dictionary.get, reverse=True):
        strategies.append(float(w))
        values.append(average_dictionary[w]) 
        
#     strategies = sorted([float(i) for i in group_dict.keys()])
#     values = [average_dictionary[str(s)] for s in strategies]
    
    fig=plt.figure(figsize=(8, 6), dpi= 80, facecolor='w', edgecolor='k')
    plt.bar(np.arange(len(strategies)), values)
    plt.xticks(range(len(strategies)), strategies)
    plt.ylim((0, 41))
    plt.ylabel("Average Times Used")
    plt.xlabel("Strategy")
    plt.title("group " + str(group_type))    
    plt.show()
    
    return np.array(strategies), np.array(values)

In [None]:
#for every group, count how many times each strategy is used. plot by frequency

strat1, val1 = analysis_by_group(GROUP_1, 1)
strat2, val2 = analysis_by_group(GROUP_2, 2)
strat3, val3 = analysis_by_group(GROUP_3, 3)

In [None]:
#calculates OR alignment with multiple delays

averages = []
for group in GROUPS:
    
    delay_averages = []
    group_delay_corr = [] 
    
    for pair in group:
        
        delay_corr = [] # five averages - one per each delay
        for delay in [1, 3, 5, 7, 9, 11, 13, 15]:
            bool_ar = []
            
            for trial in range(80):
                before = 0
                after = 0
                if trial < delay:
                    after = pair.strategies[trial+delay]
                elif trial >= (80-delay):
                    before = pair.strategies[trial-delay]
                else:
                    before = pair.strategies[trial-delay]
                    after = pair.strategies[trial+delay]

                if pair.strategies[trial] == before or pair.strategies[trial] == after:
                    bool_ar.append(1)
                else:
                    bool_ar.append(0)
                
            avg = sum(bool_ar)/80.0
            delay_corr.append(avg)     
        group_delay_corr.append(delay_corr)
        corrs_by_delay = (np.array(group_delay_corr).T)
    for row in corrs_by_delay:
        delay_averages.append(sum(row)/float(len(row)))
    averages.append(delay_averages) 
print(np.array(averages))

width = 0.25
ind = np.arange(8)
fig, ax = plt.subplots()
rects1 = ax.bar(ind, averages[0], width, color='r')
rects2 = ax.bar(ind + width, averages[1], width, color='b')
rects3 = ax.bar(ind + 2*width, averages[2], width, color='g')
ax.set_xticks(ind + width)
ax.set_xticklabels(('1', '3', '5', '7', '9', '11', '13', '15'))
ax.set_title('Alignments with Delay')
ax.set_ylabel('Alignment')
ax.set_xlabel('Delay')
plt.show()