# Notebook for analyzing task continuum results
Contains the following functionalities:

1. Load continuum tasks results files (provided in the data/msmt_continuum.gzip --> Please unzip the folder before using.)
2. Calculate winning method per task
3. Plot analysis results for patterns P1-P4

In [None]:
import pandas as pd
import os
from os.path import isfile, join
from os import listdir
import similaritymeasures
import numpy as np
import re
from collections import Counter
import numpy as np
import matplotlib.pyplot as plt
import seaborn
seaborn.set(style='ticks')
from copy import copy
import statistics
import networkx as nx


# Load all result files

In [None]:
main_path = "../msmt_continuum"

def getAUC(f1_scores_all, ignore_first=20):
    f1_scores = f1_scores_all[ignore_first:]
    x_ax_data= np.zeros((len(f1_scores),2))
    x_ax_data[:, 0] = [0]*len(f1_scores)
    x_ax_data[:, 1] = np.arange(0,len(f1_scores))

    f1_scores_data= np.zeros((len(f1_scores),2))
    f1_scores_data[:, 0] = f1_scores
    f1_scores_data[:, 1] = np.arange(0,len(f1_scores))

    area, d = similaritymeasures.dtw(f1_scores_data, x_ax_data)
    return area

# ignore first for auc calculation
ignore_first=50

#get all results, calculate AUC and plot scatter plots based on a certain dimension
all_results =pd.DataFrame(columns=['method','EO','VPO', 'VH', 'AUC', 
                                   'F1_85','F1_150', 'F1_last_it', 'STD_last_it'])

result_type = 'micro' # ['micro','macro']
methods = ['HeALER','ALMSER', 'ALMSERgroup'] 

finished_settings =0

for setting_path in os.listdir(main_path):
    
    eo = float(re.search('EO(.+?)VH.*VPO.*',setting_path).group(1))
    vh=float(re.search('EO.*VH(.+?)VPO.*',setting_path).group(1))
    vpo = float(re.search('EO.*VH.*VPO(.*)',setting_path).group(1))

    file_almser_group = 'unassigned'
    file_almser = 'unassigned'
    file_healer = 'unassigned'
    

    for al_result in os.listdir(main_path+'/'+setting_path+"/AL_results"):
        if al_result=="ALMSERgroup.csv":
            file_almser_group=main_path+'/'+setting_path+"/AL_results/"+al_result
        elif al_result == "ALMSER.csv":
            file_almser=main_path+'/'+setting_path+"/AL_results/"+al_result
        elif al_result== "HeALER.csv":
            file_healer=main_path+'/'+setting_path+"/AL_results/"+al_result
        

    # only consider settings with complete results for comparison
    if file_almser_group=='unassigned' or file_almser=='unassigned' or file_healer=='unassigned':
        continue
    
    finished_settings+=1
    results_almser_group = pd.read_csv(file_almser_group)
    results_almser= pd.read_csv(file_almser)
    results_healer= pd.read_csv(file_healer)


    auc_results = dict()
    f1_last_it = dict()
    f1_at_85 = dict()
    f1_at_150 = dict()
    std_last_it = dict()
    
    #healer
    f1_healer = results_healer['F1_'+result_type]
    auc_results['HeALER'] = getAUC(f1_healer,ignore_first)  
    f1_last_it['HeALER'] = f1_healer.tail(1).values[0]
    f1_at_85['HeALER'] = f1_healer[75]
    f1_at_150['HeALER'] = f1_healer[150]
    std_last_it['HeALER'] = results_healer['F1_'+result_type+'_std'].tail(1).values[0]


    #almser_group
    f1_almser_group = results_almser_group['F1_'+result_type]
    auc_results['ALMSERgroup'] = getAUC(f1_almser_group,ignore_first)  
    f1_last_it['ALMSERgroup'] = f1_almser_group.tail(1).values[0]
    f1_at_85['ALMSERgroup'] = f1_almser_group[75]
    f1_at_150['ALMSERgroup'] = f1_almser_group[150]
    std_last_it['ALMSERgroup'] = results_almser_group['F1_'+result_type+'_std'].tail(1).values[0]

    
    #almser
    f1_almser = results_almser['F1_'+result_type]
    auc_results['ALMSER'] = getAUC(f1_almser,ignore_first)  
    f1_last_it['ALMSER'] = f1_almser.tail(1).values[0]
    f1_at_85['ALMSER'] = f1_almser[75]
    f1_at_150['ALMSER'] = f1_almser[150]
    std_last_it['ALMSER'] = results_almser['F1_'+result_type+'_std'].tail(1).values[0]

    
    for method in methods:
        all_results = all_results.append({'method':method, 'EO':eo, 'VPO':vpo,
                                          'VH':vh, 'AUC':auc_results[method],
                                          'F1_85':f1_at_85[method],'F1_150':f1_at_150[method], 
                                          'F1_last_it': f1_last_it[method],'STD_last_it':std_last_it[method]}, ignore_index=True)

        
print("Finished settings: %i " %finished_settings) 
display(all_results.head(5))


# Group by setting, find winning method and calculate AUC-F1 diff.

In [None]:
from copy import copy

best_results=copy(all_results)

grouped = best_results.groupby(['EO','VPO','VH'])

grouped=grouped['AUC']
best_results['best']  = grouped.transform(lambda x: x.nlargest(1).max())
best_results['second_best'] =grouped.transform(lambda x: x.nlargest(2).min())
    
best_of_setting = copy(best_results[best_results.AUC==best_results.best])
best_of_setting.reset_index(inplace=True)
best_of_setting['auc_diff'] = best_of_setting['best']-best_of_setting['second_best']

print("Total settings: ", int(best_results.shape[0]/len(set(best_results['method'].values))))
print(Counter(best_of_setting.method))

# Plot in 3D and 2D

In [None]:
import plotly.express as px
import plotly.io as pio

x_eye = -1.25
y_eye = 2
z_eye = 0.5 

print("In total: %i settings" %best_of_setting.shape[0])


best_of_setting['auc_diff_rescaled'] = copy(best_of_setting.auc_diff)
best_of_setting['auc_diff_rescaled']  -= best_of_setting['auc_diff_rescaled'] .min()
best_of_setting['auc_diff_rescaled']  /= best_of_setting['auc_diff_rescaled'] .max()
best_of_setting['auc_diff_rescaled'] = best_of_setting['auc_diff_rescaled']


print("----------------------------------------------------------")
print("OVERALL RESULTS")
selected_settings= best_of_setting
print("Selected settings size: ", selected_settings.shape[0])

fig = px.scatter_3d(data_frame=selected_settings, x='EO', y='VPO', z='VH', color='method',
                    size='auc_diff_rescaled', 
                    color_discrete_map = {'ALMSER':'magenta' , 
                                          'HeALER':'green', 'ALMSERgroup':'blue'},
                    size_max=70, template='ggplot2' ,height=500 )
pio.show(fig)
print("----------------------------------------------------------")


print("----------------------------------------------------------")


print("P1: EASY TASKS")
selection_filter=(best_of_setting.EO>0.6) &(best_of_setting.VH<0.4)&(best_of_setting.VPO>0.6)
print("AVG auc difference ALMSER: ", best_of_setting[selection_filter &(best_of_setting.method=="ALMSER")].auc_diff.mean())
print("AVG auc difference HeALER: ", best_of_setting[selection_filter & (best_of_setting.method=="HeALER")].auc_diff.mean())
print("AVG auc difference ALMSERgroup: ", best_of_setting[selection_filter&(best_of_setting.method=="ALMSERgroup")].auc_diff.mean())

print(Counter(best_of_setting[selection_filter].method))
fig = px.scatter(data_frame=best_of_setting[best_of_setting.EO==0], x='VH', y='VPO', color='method',
                    size='auc_diff_rescaled', 
                    color_discrete_map = {'ALMSER':'magenta' , 
                                          'HeALER':'green', 'ALMSERgroup':'blue'},
                    size_max=70, template='ggplot2' ,height=450)


pio.show(fig)

print("----------------------------------------------------------")

print("----------------------------------------------------------")


print("P2: HARD TASKS; NON-ZERO ENTITY OVERLAP")
selection_filter=( (best_of_setting.VH>0.5) & (best_of_setting.EO>0.0) &(best_of_setting.VPO<0.7))

print("AVG auc difference ALMSER: ", best_of_setting[selection_filter &(best_of_setting.method=="ALMSER")].auc_diff.mean())
print("AVG auc difference HeALER: ", best_of_setting[selection_filter & (best_of_setting.method=="HeALER")].auc_diff.mean())
print("AVG auc difference ALMSERgroup: ", best_of_setting[selection_filter&(best_of_setting.method=="ALMSERgroup")].auc_diff.mean())

print(Counter(best_of_setting[selection_filter].method))
fig = px.scatter(data_frame=best_of_setting[best_of_setting.EO==0], x='VH', y='VPO', color='method',
                    size='auc_diff_rescaled', 
                    color_discrete_map = {'ALMSER':'magenta' , 
                                          'HeALER':'green', 'ALMSERgroup':'blue'},
                    size_max=70, template='ggplot2' ,height=450)


pio.show(fig)

print("----------------------------------------------------------")

print("----------------------------------------------------------")


print("P3: MEDIUM-HARD TASKS; MANY GROUPS")
selection_filter=( (best_of_setting.VH<0.5) & (best_of_setting.VPO<0.5))


print("AVG auc difference ALMSER: ", best_of_setting[selection_filter &(best_of_setting.method=="ALMSER")].auc_diff.mean())
print("AVG auc difference HeALER: ", best_of_setting[selection_filter & (best_of_setting.method=="HeALER")].auc_diff.mean())
print("AVG auc difference ALMSERgroup: ", best_of_setting[selection_filter&(best_of_setting.method=="ALMSERgroup")].auc_diff.mean())

print(Counter(best_of_setting[selection_filter].method))
fig = px.scatter(data_frame=best_of_setting[best_of_setting.EO==0], x='VH', y='VPO', color='method',
                    size='auc_diff_rescaled', 
                    color_discrete_map = {'ALMSER':'magenta' , 
                                          'HeALER':'green', 'ALMSERgroup':'blue'},
                    size_max=70, template='ggplot2' ,height=450)


pio.show(fig)

print("----------------------------------------------------------")

print("----------------------------------------------------------")


print("P4: MEDIUM-HARD TASKS; FEW GROUPS")
selection_filter=( (best_of_setting.VH<0.5) & (best_of_setting.VPO>0.5))


print("AVG auc difference ALMSER: ", best_of_setting[selection_filter &(best_of_setting.method=="ALMSER")].auc_diff.mean())
print("AVG auc difference HeALER: ", best_of_setting[selection_filter & (best_of_setting.method=="HeALER")].auc_diff.mean())
print("AVG auc difference ALMSERgroup: ", best_of_setting[selection_filter&(best_of_setting.method=="ALMSERgroup")].auc_diff.mean())

print(Counter(best_of_setting[selection_filter].method))
fig = px.scatter(data_frame=best_of_setting[best_of_setting.EO==0], x='VH', y='VPO', color='method',
                    size='auc_diff_rescaled', 
                    color_discrete_map = {'ALMSER':'magenta' , 
                                          'HeALER':'green', 'ALMSERgroup':'blue'},
                    size_max=70, template='ggplot2' ,height=450)


pio.show(fig)

print("----------------------------------------------------------")
