In [1]:
## Import packages

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import os, alive_progress
from alive_progress import alive_bar

from scipy.stats import mannwhitneyu as mwu
from utils import VD_A_DF as vdadf
from utils import apfd
from utils import derive_data
from utils import sort_vda
from utils import calc_s12


# set inline print
%matplotlib inline

# Load CSV, calculate APFD and filter Equivalent==OK

In [2]:
df_fixed = derive_data(pd.merge(
                    pd.concat([pd.read_csv(os.path.join(path_log)) 
                               for path_log in ["k_1_k_2_hads_hsi_w_wp_fixed.csv", "k_1_k_2_spy_spyh.csv"]]), 
                    pd.read_csv(os.path.join("SUL_list.csv")),
                    how='left',on='SUL name'))
equiv_fixed=df_fixed.query(f'`Equivalent`=="OK" and `Extra States`==2').sort_values(by=['APFDx'],ascending=False)

df_random = derive_data(
                pd.merge(
                    pd.read_csv(os.path.join("random_logs.csv")), 
                    pd.read_csv(os.path.join("SUL_list.csv")),
                    how='left',on='SUL name')
)
equiv_random=df_random.query(f'`Equivalent`=="OK" and `Extra States`==2').sort_values(by=['APFDx'],ascending=False)

# Analyze TQ and APFDx

In [12]:
metrics_s12 = calc_s12(equiv_fixed).reset_index()

list_of_metrics = [['TQ_s1', 'TQ_s2'], ['APFDx_s1',  'APFDx_s2']]

for metrics_to_plot in list_of_metrics:
    # initialize figure with 2 subplots in a row
    fig, ax = plt.subplots(1, len(metrics_to_plot), figsize=(20*(len(metrics_to_plot)/4),3))

    # add padding between the subplots
    plt.subplots_adjust(wspace=0.4)

    idx=0
    for metric in metrics_to_plot:
        # draw plots
        sns.boxplot(data=metrics_s12, ax=ax[idx], x='CTT',y=metric)
        ax[idx].set_xlabel('')
        #ax[idx].set_xlim([0,1])
        ax[idx].tick_params(axis='x', rotation=90)
        if "_s1" in metric and not "APFDx_" in metric: ax[idx].set(yscale='log')
        idx=idx+1
    plt.show()

Unnamed: 0_level_0,SUL name,TQ_s1,APFDx_s1,TQ_s2,APFDx_s2
CTT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
W,GnuTLS_3.3.12_server_full.dot,3754,0.973029,0.054559,0.987083
W,model1.dot,1896214,0.944248,1.0,0.975437
W,TCP_Windows8_Client.dot,51771,0.941312,0.355358,0.967504
W,4_learnresult_MAESTRO_fix.dot,31524,0.939064,0.255011,0.975835
W,ASN_learnresult_MAESTRO_fix.dot,31524,0.939064,0.255011,0.975835
...,...,...,...,...,...
HadsInt,NSS_3.17.4_client_regular.dot,5981,0.721628,0.476574,0.896338
HadsInt,DropBear.dot,421014,0.719828,0.85966,0.929967
HadsInt,learnresult_fix.dot,63808,0.701325,0.703848,0.774613
HadsInt,1_learnresult_MasterCard_fix.dot,14278,0.690422,0.538386,0.912419


In [75]:

# s1or2=1
# display(metrics_s12.groupby('CTT').sum()[[f'TQ_s{s1or2}']].sort_values(f'TQ_s{s1or2}'))
# display(metrics_s12.groupby('CTT').sum()[[f'APFDx_s{s1or2}']].sort_values(f'APFDx_s{s1or2}', ascending=False))

# for col in ['TQ_s1','TQ_s2','APFDx_s1','APFDx_s2']:
# #     display(metrics_s12.sort_values('SUL name').pivot_table([col],'SUL name','CTT'))
#     display(metrics_s12.sort_values('SUL name').groupby('CTT').apply(lambda x: x[col].tolist()))
xxx=metrics_s12.sort_values(['SUL name','CTT']).groupby(['CTT']).apply(lambda x: x['TQ_s1'].tolist()).reset_index()

# importing combinations
from itertools import product,combinations
# apply combination method
results = dict(zip(list(combinations(xxx['CTT'], 2)),list(combinations(xxx[0], 2))))

for method_pair in results.keys():
    ctti = method_pair[0]
    cttj = method_pair[1]
    print(f'{ctti}\t{cttj}\t{mwu(results[method_pair][0],results[method_pair][1]).pvalue:.3f}')

W	Wp	0.136
W	Hsi	0.021
W	SPY	0.112
W	SPYH	0.432
W	HadsInt	0.024
Wp	Hsi	0.388
Wp	SPY	0.897
Wp	SPYH	0.384
Wp	HadsInt	0.419
Hsi	SPY	0.410
Hsi	SPYH	0.074
Hsi	HadsInt	0.910
SPY	SPYH	0.384
SPY	HadsInt	0.446
SPYH	HadsInt	0.074


In [None]:
# vda_tq=equiv_fixed.sort_values(['SUL group','SUL name','EquivalenceOracle','Seed']).groupby('SUL group').apply(
#     lambda x: sort_vda(vdadf(x,'TQ','CTT'))
# )
# pvt_tq=vda_tq.pivot_table(['estimate','magnitude'],['A', 'SUL group'],'B',aggfunc='first')
# display(pvt_tq.round(3).fillna(''))
# xxx=equiv_fixed.sort_values(['SUL group','SUL name','EquivalenceOracle','Seed'])\
#     .pivot_table('TQ',['SUL group','SUL name','Seed'],'CTT',aggfunc='first')

# display(xxx)
# # all_ctts = ["HadsInt", "Hsi", "SPY", "SPYH", "W", "Wp"]
# # for gp in equiv_fixed['SUL group'].unique():
# #     xx = xxx.query(f'`SUL group`=="{gp}"')
# #     for i in range(len(all_ctts)):
# #         ctti=all_ctts[i]
# #         for j in range(i+1,len(all_ctts)):
# #             cttj=all_ctts[j]
# #             print(f'{gp}\t{ctti}\t{cttj}\t{mwu(xx[ctti],xx[cttj]).pvalue:.3f}')


# Analyze APFDx

In [None]:
vda_apfdx=equiv_fixed.sort_values(['SUL group','SUL name','EquivalenceOracle','Seed']).groupby('SUL group').apply(
    lambda x: sort_vda(vdadf(x,'APFDx','CTT'))
)
pvt_apfdx=vda_apfdx.pivot_table(['estimate','magnitude'],['SUL group', 'A'],'B',aggfunc='first')
display(pvt_apfdx.round(3).fillna(''))

# Plot %states detected per test case (for all methods)

In [None]:
all_qtype = ['Testing symbols'] # alternative
all_runs = df_equiv[['SUL name', 'Seed']].drop_duplicates()
total = len(all_qtype)*len(all_runs)

# define figure size
sns.set(rc={'figure.figsize':(10,5),'figure.dpi':300})

os.makedirs(f'img/fixed_mode/', exist_ok=True)

with alive_bar(total, force_tty=True, title=f'Plotting APFD') as bar:
    for idx,row in all_runs.iterrows():
        # get an entry <SUL, seed>
        sulname,seed = row['SUL name'],row['Seed']
        subj=df_equiv.query(f'`SUL name`=="{sulname}"').copy()
        
        # add percent columns
        subj['HypSizePercent'] = subj['HypSize'].apply(lambda x: x/np.max(x)*100)

        # explode column with % of symbols and hypothesis sizes in the learning process
        subj=subj.explode(['HypSizePercent',*all_qtype])

        for qtype in all_qtype:
            #create line chart
            apfd_plot = sns.lineplot(subj, x=f'{qtype}', y='HypSizePercent',
                                     markers=True, 
                                     style='CTT', hue='CTT',
                                     palette='tab10'
                                    )
            apfd_plot.set(xscale='log')
            locator = ticker.LogLocator()
            locator.MAXTICKS = np.max(subj[f'{qtype}'])
            apfd_plot.xaxis.set_major_locator(locator)

            apfd_plot.yaxis.set_major_locator(ticker.MultipleLocator(10))
            apfd_plot.set_ylim(0,100)
            
            #add plot labels, titles and legends
            plt.xlabel(f'Number of {qtype.title()} (log scale)')
            plt.ylabel('Fraction of the SUL learned')
            plt.title(f'Subject: {sulname}')
            
            #get handles and labels
            handles, labels = plt.gca().get_legend_handles_labels()
            
            #specify order of items in legend from APFDx
            order = subj[['CTT','APFDx']].sort_values(['APFDx'],ascending=False).drop_duplicates().CTT.to_list()
            for idx,ctt in enumerate(order): order[idx]=labels.index(ctt)
            #add legend to plot
            plt.legend([handles[idx] for idx in order],[labels[idx] for idx in order],
                       title='Testing Technique', loc='lower right', 
                       fontsize='xx-small', title_fontsize='xx-small')
            
            # save line chart
            fig = apfd_plot.get_figure()
            fname=sulname.replace('.dot','')
            fig.savefig(f'img/fixed_mode/cumsum_{fname}_{qtype}_{seed}.jpg')
            fig.clf()
            bar()

In [None]:
# define figure size
sns.set(rc={'figure.figsize':(10,5),'figure.dpi':300})

os.makedirs(f'img/fixed_mode/', exist_ok=True)

tq_plot = sns.lineplot(df_equiv, x='Qsize', y='TQ [Symbols]',
             markers=True, 
             style='CTT', hue='CTT',
             palette='tab10'
            )

# Calculate the effect size of the APFDx

In [None]:
df_equiv=df.query(f'`Equivalent`=="OK" and `Extra States`==2').sort_values(by=['APFDx'],ascending=False)

p=sns.displot(data=df_equiv, x="APFDx", hue="CTT", col="SUL group", kind="kde",palette='tab10', height=3, aspect=1.5, col_wrap=3)
p.fig.set_dpi(200)

In [None]:
def sort_vda(df_vda):
    df_vda.estimate = df_vda.estimate.astype(float)
    df_vda['estimate_abs'] = np.abs(df_vda.estimate.astype(float)-0.5)
    df_vda['A'] = pd.Categorical(df_vda['A'], ["W", "Wp", "Hsi", "SPY", "SPYH", "HadsInt"])
    df_vda['B'] = pd.Categorical(df_vda['B'], ["W", "Wp", "Hsi", "SPY", "SPYH", "HadsInt"])
    df_vda.magnitude = df_vda.apply(lambda x: x['magnitude'] + (f'({x.A})' if x['estimate']<0.5 else f'({x.B})'), axis=1)
    return df_vda[['A','B','estimate','magnitude']].set_index(['A','B'])

df_vda=df_equiv.groupby('SUL group').apply(
        lambda x: 
            sort_vda(vdadf(x.sort_values(['SUL name','CTT']),'APFDx','CTT'))
    )

# with pd.option_context('display.max_rows', 100,
#                        'display.max_columns', 10,
#                        'display.precision', 3,
#                        ):
#     display(df_vda)

pvt=df_vda.pivot_table(['estimate','magnitude'],['SUL group', 'A'],'B',aggfunc='first')
display(pvt.round(3).fillna(''))