In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from os import listdir
from os.path import isfile, join
import argparse
import os.path
import sys
import struct
import time
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams["mathtext.fontset"] = "cm"
import random
import math
import re
import collections

In [None]:
np.random.seed(10)

In [None]:
root_path = "../"

sel_dataset = "speech_commands"

sel_attack  = "deepfool"  #DeepFool
#sel_attack  = "cw_l2"    #Carlini & Wagner
#sel_attack  = "fgsm_v2"  #Fast Gradient Sign Method
#sel_attack  = "rand_pgd" #Projected Gradient Descent

#Attack parameters
if sel_attack=="deepfool":
    overshoot_df = 0.02
    max_iter_thr = 30
    max_dist_vec = [0.0005, 0.001, 0.0025, 0.005, 0.01, 0.05, 0.1, 0.15] #l2 norm
elif sel_attack in ["cw_l2"]:
    max_iter_thr = 1000
    max_dist_vec = [0.01, 0.05, 0.1, 0.15] #l2 norm
elif sel_attack in ["fgsm_v2"]:
    max_iter_thr = 1000
    max_dist_vec = [0.00005, 0.0001, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02] #linf norms
elif sel_attack in ["rand_pgd"]:
    max_iter_thr = 30
    max_dist_vec = [0.00005, 0.0001, 0.0005, 0.001, 0.002, 0.005] #linf norms
else: sys.exit("Supported attacks: [deepfool, rand_pgd, cw_l2, fgsm_v2]")

    
if sel_attack=="deepfool":
    param_appendix_patt = "_ov_%s"%(str(overshoot_df)) + "_eps_%s_iters_%d_start_%d_fold_%d.npy"
elif sel_attack in ["rand_pgd", "cw_l2", "fgsm_v2"]:
    param_appendix_patt = "_eps_%s_iters_%d_start_%d_fold_%d.npy"
else:
    sys.exit("Attack not supported")
          
#ID of the set of target distributions
p_y_obj_set_idx = 1 #1: 100 random dirichlets, 2: uniform distribution (1/k,...1/k)
validation_folder = root_path + "optimization/validation/"
p_y_obj_set = np.load(validation_folder + "p_y_obj_set_%d.npy"%p_y_obj_set_idx)

len(p_y_obj_set)

# Visualizing all the results

In [None]:
#Configuration: main methods + baselines
method_names  = ["m1", "m2",  "m3",   "m4",  "mab", "mfrb"]
plot_labels   = ["AM", "UBM", "EWTM", "CRM", "MAB", "MFRB"]
plot_colors   = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
marker_types  = ["s", "o", "v", "^", "X", "*"]
get_prob_methods = [1, 1, 1, 1, 1, 1]

##Configuration: main methods
#method_names = ["m1", "m2",  "m3",   "m4"]
#plot_labels  = ["AM", "UBM", "EWTM", "CRM"]
#plot_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
#marker_types = ["s", "o", "v", "^"]
#get_prob_methods = [1, 1, 1, 1]

N_methods = len(method_names)

In [None]:
N_per_class = 1000
N_per_class_train = 500 #Number of samples used to generate the transition matrices

N_multistarts = 50  #Number of k-fold cross validation repetitions
N_folds = N_per_class//N_per_class_train #Number of folds in each cross-validation

print("N_multistarts: %d"%N_multistarts)
print("N_folds: %d"%N_folds)

# Success in the optimization

In [None]:
#Global variable to store in which cases the transition matrices were generated (=1) or not (=0)
#(that is, in which cases the linear program was successfully solved).
success_ref_full = np.zeros((N_methods, len(max_dist_vec), N_multistarts, N_folds, len(p_y_obj_set)), dtype=int)

#This variable will be used to summarize the results for each method and max. dist. threshold
data_full = np.zeros((N_methods, len(max_dist_vec))) #Percentage of success

for i_m in range(N_methods):
    sel_method = method_names[i_m] #Method to be processed
    
    #Path containing the results for the current method & setup
    results_root = root_path + "optimization/%s/%s/results_%s/"%(sel_dataset, sel_attack, sel_method)
    load_path = results_root + "p_y_obj_set_%d/"%p_y_obj_set_idx
    load_path = load_path    + "N_train_%d/"%(N_per_class_train)
    print(load_path)
    
    for i_d in range(len(max_dist_vec)):
        max_dist_thr = np.copy(max_dist_vec[i_d]) #Distance threshold
        
        #In this variable we will save, for each start, the average results of both folds.
        #If for a target distribution at least in one fold the method failed, then, for that target 
        #distribution, we report failure for the whole start.
        tmp_start_vec = np.zeros((len(p_y_obj_set), N_multistarts), dtype=int)
        
        for i_start in range(N_multistarts):
            
            #In this variable we will save the values of the current k-fold cross validation.
            tmp_fold_vec = np.zeros((len(p_y_obj_set), N_folds), dtype=int)
            
            for i_fold in range(N_folds):
                    
                param_appendix = param_appendix_patt%(str(max_dist_thr), max_iter_thr, i_start, i_fold)
                
                #SUCCESS (for every target distribution in the set)
                success_optimization = np.load(load_path + "success_opt" + param_appendix)
                
                #Save the results of the current fold
                tmp_fold_vec[:,i_fold] = np.copy(success_optimization)
                #Save the results of the current fold also in the global variable
                success_ref_full[i_m,i_d,i_start,i_fold,:] = np.copy(success_optimization)
                
            #Average/reduce results by fold
            for i in range(len(p_y_obj_set)):
                #If for a target distribution at least in one fold the method failed, then, for that target 
                #distribution, we report failure for the whole start.
                if np.any(tmp_fold_vec[i,:]==0): tmp_start_vec[i, i_start] = 0
                else:                            tmp_start_vec[i, i_start] = 1
                    
        #Average/reduce results by multistart
        #First average for each target dist. the results of all the multistarts
        cur_val = np.mean(tmp_start_vec,axis=1)
        if len(cur_val)!=len(p_y_obj_set): sys.exit("Wrong axis")
        
        #Second, average those results (all the target distributions)
        cur_val = np.mean(cur_val) 
        data_full[i_m, i_d] = np.copy(cur_val)*100
        
        #Sanity check
        #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
        tmp_check = np.sum(tmp_start_vec)/(tmp_start_vec.shape[0]*tmp_start_vec.shape[1])
        if np.abs(cur_val-tmp_check)>1e-8:
            sys.exit("Means are not the same")
        #-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#

In [None]:
#Print the results
np.round(data_full,1)

# Fooling rate

### Maximum possible fooling rate

In [None]:
#Note that the maximum possible fooling rate only depends on the validation set, not on the target prob. distr.
fr_clean_full = np.zeros((N_methods, len(max_dist_vec), N_multistarts, N_folds))

for i_d in range(len(max_dist_vec)):
    max_dist_thr = max_dist_vec[i_d] #Current max. distortion threshold
    
    for i_start in range(N_multistarts):
        for i_fold in range(N_folds):
            for i_m in range(N_methods):
                sel_method = method_names[i_m] #Current method
                
                #Compute the clean fooling rate
                results_root = root_path + "optimization/%s/%s/results_%s/"%(sel_dataset, sel_attack, sel_method)
                load_path = results_root + "p_y_obj_set_%d/"%p_y_obj_set_idx
                load_path = load_path    + "N_train_%d/"%(N_per_class_train)
                    
                param_appendix = param_appendix_patt%(str(max_dist_thr), max_iter_thr, i_start, i_fold)
                    
                #Load the optimum fooling rate (proportion of inputs for which a successful targeted attack was created)
                opt_fr = np.load(load_path + "opt_fr" + param_appendix)
                #Store the value
                fr_clean_full[i_m,i_d,i_start,i_fold] = np.copy(opt_fr)
            
            #Sanity check: all the methods have the same values
            for i_m in range(N_methods-1):
                tmp_val1 = np.copy(fr_clean_full[i_m,  i_d,i_start,i_fold])
                tmp_val2 = np.copy(fr_clean_full[i_m+1,i_d,i_start,i_fold])
                if np.abs(tmp_val1 - tmp_val2)>1e-8: sys.exit("Not the same opt FR???")

### Achieved fooling rate

In [None]:
#Average fooling rate (for each method and distortion threshold)
fr_adv_full    = np.zeros((N_methods, len(max_dist_vec)))


for i_m in range(N_methods):
    sel_method = method_names[i_m] #Current method
    
    for i_d in range(len(max_dist_vec)):
        max_dist_thr = np.copy(max_dist_vec[i_d]) #Current max. dist. threshold
        
        #In this variable we will save, for each start, the average results of all the k-fold trials.
        tmp_start_fr_vec    = np.zeros((len(p_y_obj_set), N_multistarts))
        
        for i_start in range(N_multistarts):
            
            #In this variable we will save the values of the current k-fold cross validation.
            tmp_fold_fr_vec   = np.zeros((len(p_y_obj_set), N_folds))
            
            for i_fold in range(N_folds):
                #Set the path and the filename
                results_root = root_path + "optimization/%s/%s/results_%s/"%(sel_dataset, sel_attack, sel_method)
                load_path = results_root + "p_y_obj_set_%d/"%p_y_obj_set_idx
                load_path = load_path    + "N_train_%d/"%(N_per_class_train)
                
                param_appendix = param_appendix_patt%(str(max_dist_thr), max_iter_thr, i_start, i_fold)
                    
                #Load the SUCCESS and FR
                success_opt   = np.load(load_path + "success_opt" + param_appendix)
                fooling_rates = np.load(load_path + "fr"          + param_appendix)
                
                #Sanity check
                if np.any(success_opt!=success_ref_full[i_m,i_d,i_start,i_fold,:]): sys.exit("Check success")
                
                #Store the value
                tmp_fold_fr_vec[:,i_fold] = np.copy(fooling_rates)

                
            #Average/reduce results by fold 
            #(later those starts in which one of the folds failed will be discarded)
            tmp_start_fr_vec[:,i_start] = np.mean(tmp_fold_fr_vec, axis=1)
            
            #Sanity check
            if len(np.mean(tmp_fold_fr_vec, axis=1))!=len(p_y_obj_set): sys.exit("Wrong axis")
            
        
        #Auxiliary variable to store the average results for each target distribution
        tmp_pyobj_fr_vec = np.zeros(len(p_y_obj_set))
        #Auxiliary variable to store for which target distribution there are results to be averaged
        mask_pyobj_with_any_success = np.zeros(len(p_y_obj_set), dtype=int)
        
        for i_p in range(len(p_y_obj_set)): 
            #Mask which indicates, given the current target distribution, for which starts at least one fold failed
            mask_success_start = np.zeros(N_multistarts, dtype=int)
            for i_start in range(N_multistarts):
                if np.any(success_ref_full[i_m,i_d,i_start,:,i_p]==0): mask_success_start[i_start] = 0
                else                                                 : mask_success_start[i_start] = 1
            
            #If at least in one start there are not fails, average those results
            if not np.all(mask_success_start==0):
                #Compute the average results for the current target distributions
                tmp_pyobj_fr_vec[i_p] = np.mean(tmp_start_fr_vec[i_p,mask_success_start==1])
                if np.any(tmp_start_fr_vec[i_p,mask_success_start==1]!=tmp_start_fr_vec[i_p,:][mask_success_start==1]):
                    sys.exit("Check maskings")
                
                #Mark that for the current target distribution there are values to be averaged
                mask_pyobj_with_any_success[i_p] = 1
            else:
                #Mark that for the current target distribution there are NOT values to be averaged
                mask_pyobj_with_any_success[i_p] = 0
        
        
        #Finally, average the results obtained for all the target distributions
        if np.all(mask_pyobj_with_any_success==0):
            print("(%s - %s) --> All the results failed"%(method_names[i_m], str(max_dist_vec[i_d])))
            fr_adv_full[i_m, i_d] = np.nan
        else:
            fr_adv_full[i_m, i_d] = np.mean(tmp_pyobj_fr_vec[mask_pyobj_with_any_success==1])

In [None]:
#Plot the obtained fooling rates
np.zeros((N_methods, len(max_dist_vec), N_multistarts, N_folds))
matplotlib.rc('figure', figsize=(6, 4))
fig, ax = plt.subplots()

#Take the results corresponding to the first method (the results are the same for all the methods)
red_fr_clean_full = np.copy(fr_clean_full[0,:,:,:]) 
#Average by folds
red_fr_clean_full = np.mean(red_fr_clean_full, axis=2)
#Sanity check
if red_fr_clean_full.shape!=(len(max_dist_vec), N_multistarts): sys.exit("Wrong axis (1)")
#Average by starts
red_fr_clean_full = np.mean(red_fr_clean_full, axis=1)
#Sanity check
if red_fr_clean_full.shape!=(len(max_dist_vec),): sys.exit("Wrong axis (2)")

#Plot the maximum fooling rates
ax.plot(red_fr_clean_full*100, label="Max. fooling rate", marker="d", color="black", linewidth=2, markersize=9)

#Plot the fooling rates
for i_m in range(N_methods):
    ax.plot(fr_adv_full[i_m,:]*100, label=plot_labels[i_m], marker=marker_types[i_m], alpha=0.85, color=plot_colors[i_m])

    #Sanity check
    if np.any(fr_adv_full[i_m,:]>red_fr_clean_full):
        if np.any(np.abs(fr_adv_full[i_m,:]-red_fr_clean_full)>1e-8):
            sys.exit("AN EMPIRICAL FR > OPT FR!!!")
    
ax.set_xlabel('$\epsilon$', size=25, labelpad=-12)
ax.set_ylabel('Fooling rate (%)', size=15, labelpad=-5)
ax.set_xticks(range(len(max_dist_vec)))
ax.set_xticklabels(max_dist_vec, size=12, rotation=30)
ax.tick_params(axis="y", which='major', labelsize=12)
ax.tick_params(axis="x", which='major', labelsize=12, pad=0)
ax.set_ylim([-3,105])
plt.title("\nFooling rate percentage", size=14)
fig.tight_layout()
matplotlib.pyplot.gcf().set_size_inches(4, 3.6)

#Save figure without legend
plt.savefig("/tmp/%s_fooling_rates_noleg.pdf"%sel_attack, bbox_inches='tight', dpi=350)

plt.legend(fontsize=12, borderaxespad=0.25, loc='upper left', framealpha=0.0, labelspacing=0.14)
#plt.legend(fontsize=12, borderaxespad=0.25, loc='lower right', framealpha=0.0, labelspacing=0.2) #for cw_l2 and rand_pgd

#Save figure with legend
plt.savefig("/tmp/%s_fooling_rates.pdf"%sel_attack, bbox_inches='tight', dpi=350)
plt.show()

In [None]:
#Plot the results in tabular format
for i in range(fr_adv_full.shape[0]):
    data_to_tab = " & ".join(map(str,np.round(fr_adv_full[i,:]*100,2)))
    print(plot_labels[i] + " & " + data_to_tab + " \\\\\n")

In [None]:
#Maximum fooling rate
np.round(red_fr_clean_full*100,2)

# 2) Distance metrics

In [None]:
#Some formatted labels
p_orig_str = "$\mathcal{P}(Y)$"
p_obj_str = "$\widetilde{\mathcal{P}}(Y)$"
p_emp_str = "$\hat{\mathcal{P}}(Y)$"
#Short metric identifiers ("ID"s)
metric_vec = ["kl_obj", "kl_orig", "kl_orig_obj", 
              "max_dif_obj", "mean_dif_obj", "max_dif_orig", "mean_dif_orig",
              "spearman_obj", "pearson_obj", "success_opt"]
#Explanation of each metric
metric_expls = ["KL divergence \n between %s and %s"%(p_obj_str, p_emp_str),
               "KL divergence \n between %s and %s"%(p_orig_str, p_emp_str),
               "KL divergence \n between %s and %s"%(p_orig_str, p_obj_str),
               "Max. abs. difference\nbetween %s and %s"%(p_obj_str, p_emp_str),
               "Mean abs. difference\nbetween %s and %s"%(p_obj_str, p_emp_str),
               "Max. abs. difference\nbetween %s and %s"%(p_orig_str, p_emp_str),
               "Mean abs. difference\nbetween %s and %s"%(p_orig_str, p_emp_str),
               "Spearman correlation\nbetween %s and %s"%(p_obj_str, p_emp_str),
               "Pearson correlation\nbetween %s and %s"%(p_obj_str, p_emp_str),
               "Success\nin generating a valid transition matrix $T$"]
#Vertical axis table for each metric
ylabels = ["$D_{KL}$(%s, %s)"%(p_obj_str, p_emp_str),
          "$D_{KL}$(%s, %s)"%(p_orig_str, p_emp_str),
          "$D_{KL}$(%s, %s)"%(p_orig_str, p_obj_str),
          "Max abs. difference", "Mean abs. difference",
          "Max abs. difference", "Mean abs. difference",
          "Correlation", "Correlation", "Success rate"]

In [None]:
#General function to compute the results given the desired performance metric
def get_performance(metric, method_names, get_prob_methods):
    
    print("Metric: ", metric)
    
    #Sanity check
    if len(method_names)!=N_methods: sys.exit("You passed less method names than N_methods!")

    
    #Variable to store the average results of each method and distortion threshold
    data_full = np.zeros((N_methods, len(max_dist_vec)))
    
    
    for i_m in range(N_methods):
        sel_method = method_names[i_m] #Current method
        
        #Load paths
        results_root = root_path + "optimization/%s/%s/results_%s/"%(sel_dataset, sel_attack, sel_method)
        load_path = results_root + "p_y_obj_set_%d/"%p_y_obj_set_idx
        load_path = load_path    + "N_train_%d/"%(N_per_class_train)
        print(load_path)
        
        get_row_probs_method = np.copy(get_prob_methods[i_m])

        for i_d in range(len(max_dist_vec)):
            max_dist_thr = np.copy(max_dist_vec[i_d])  #Current distortion threshold
                
            #In this variable we will save, for each start, the average results of all the k-fold trials.
            tmp_start_vec     = np.zeros((len(p_y_obj_set), N_multistarts))

            for i_start in range(N_multistarts):

                #In this variable we will save the values of the current k-fold cross validation.
                tmp_fold_vec = np.zeros((len(p_y_obj_set), N_folds))

                for i_fold in range(N_folds):

                    #Compute the clean fooling rate
                    results_root = root_path + "optimization/%s/%s/results_%s/"%(sel_dataset, sel_attack, sel_method)
                    load_path = results_root + "p_y_obj_set_%d/"%p_y_obj_set_idx
                    load_path = load_path    + "N_train_%d/"%(N_per_class_train)

                    param_appendix = param_appendix_patt%(str(max_dist_thr), max_iter_thr, i_start, i_fold)

                    #Load the SUCCESS and the RESULTS OBTAINED WITH THE SPECIFIED METRIC
                    success_opt = np.load(load_path + "success_opt" + param_appendix)
                    data        = np.load(load_path +  metric       + param_appendix)

                    #Sanity checks
                    if np.any(success_opt!=success_ref_full[i_m,i_d,i_start,i_fold,:]):
                        sys.exit("Check success")
                        
                    #Store the value
                    tmp_fold_vec[:,i_fold] = np.copy(data)


                #Average/reduce results by fold 
                #(later those starts in which one of the folds failed will be discarded)
                tmp_start_vec[:,i_start] = np.mean(tmp_fold_vec, axis=1)

            
            #Auxiliary variable to store the average results for each target distribution
            tmp_pyobj_vec = np.zeros(len(p_y_obj_set))
            #Auxiliary variable to store for which target distribution there are results to be averaged
            mask_pyobj_with_any_success = np.zeros(len(p_y_obj_set), dtype=int)
            
            for i_p in range(len(p_y_obj_set)): 
                
                #Mask which indicates, given the current target distribution, for which starts at least one fold failed
                mask_success_start = np.zeros(N_multistarts, dtype=int)
                for i_start in range(N_multistarts):
                    if np.any(success_ref_full[i_m,i_d,i_start,:,i_p]==0): mask_success_start[i_start] = 0
                    else                                                 : mask_success_start[i_start] = 1
                
                #If at least in one start there are not fails, average those results
                if not np.all(mask_success_start==0):
                    #Compute the average results for the current target distributions
                    tmp_pyobj_vec[i_p] = np.mean(tmp_start_vec[i_p,mask_success_start==1])
                    #Mark that for the current target distribution there are values to be averaged
                    mask_pyobj_with_any_success[i_p] = 1
                else:
                    #Mark that for the current target distribution there are NOT values to be averaged
                    mask_pyobj_with_any_success[i_p] = 0

            #Finally, average the results obtained for all the target distributions
            if np.all(mask_pyobj_with_any_success==0):
                print("(%s - %s) --> All the results failed"%(method_names[i_m], str(max_dist_vec[i_d])))
                data_full[i_m, i_d] = np.nan
            else:
                data_full[i_m, i_d] = np.mean(tmp_pyobj_vec[mask_pyobj_with_any_success==1])
            
    return data_full

In [None]:
for i_metric in range(len(metric_vec)):
                    
    metric = metric_vec[i_metric]   #Current metric identifier
    expl   = metric_expls[i_metric] #Current metric explanation
    
    #Compute the average results
    data_full = get_performance(metric, method_names, get_prob_methods)
    print(np.round(data_full,4))
    
    
    #Visualize the results
    matplotlib.rc('figure', figsize=(6, 4))
    
    #Plot the results for each method
    for i_m in range(N_methods):
        plt.plot(data_full[i_m,:], label=plot_labels[i_m], marker=marker_types[i_m], color=plot_colors[i_m])

    plt.xlabel('$\epsilon$', size=25, labelpad=-12)
    plt.ylabel(ylabels[i_metric], size=15)
    plt.xticks(range(len(max_dist_vec)), max_dist_vec, size=12, rotation=30)
    plt.yticks(size=12)
    #ax.set_ylim([20,100])
    plt.title("%s"%expl, size=15)
    fig.tight_layout()
    matplotlib.pyplot.gcf().set_size_inches(4.3, 3)
    
    #Save figure without legend
    plt.savefig("/tmp/%s_comp_%s_noleg.pdf"%(sel_attack, metric), bbox_inches='tight', dpi=300)
    plt.legend(fontsize=11.5, framealpha=0.5, labelspacing=0.1)
    
    #Save figure with legend
    plt.savefig("/tmp/%s_comp_%s.pdf"%(sel_attack, metric), bbox_inches='tight', dpi=300)
    plt.show()

In [None]:
plt.show()

# Pareto plots

In [None]:
from matplotlib.lines import Line2D
from matplotlib.patches import Rectangle

In [None]:
cur_marker_types = [".", "P", "v", "^", "H", "s", "<", ">"] #markers for the distortion thresholds

In [None]:
#Auxiliary front to select those values that form the "Pareto front".
#The default criterion is to minimize all the factors.
#The comparison between values can be 'strict' (i.e., <) or 'non-strict' (i.e., <=), determined by the boolean
#values passed to the 'strict' argument.
def get_pareto_front_one(values, strict=True):
    is_efficient = np.ones(values.shape[0], dtype = bool)
    for i, v in enumerate(values):
        if is_efficient[i]:
            if strict:
                is_efficient[is_efficient] = np.any(values[is_efficient]<v, axis=1)
            else:
                is_efficient[is_efficient] = np.any(values[is_efficient]<=v, axis=1)
            is_efficient[i] = True
    return is_efficient

#c1 and c2 control whether the values of data1 and data2, respectively, should be minimized (=1.0) or maximized (=-1.0).
def get_pareto_front_all(data1, data2, c1=1.0, c2=1.0, strict=True):
    assert data1.shape==data2.shape
    pareto_mask  = np.zeros(data1.shape, dtype=bool) #Boolean array. True if a value is in the pareto front
    pareto_order = np.zeros(data1.shape, dtype=int)  #Array to store the "order" of the values in the pareto set
    for i_d in range(data1.shape[1]):
        #Compute which values are in the pareto front
        cur_values = np.array((c1*data1[:,i_d], c2*data2[:,i_d])).T
        pareto_mask[:,i_d] = get_pareto_front_one(cur_values, strict)
        #"Order" of the values in the pareto front
        cur_vals_pareto    = np.copy(-cur_values[pareto_mask[:,i_d],:])
        pareto_order[pareto_mask[:,i_d],i_d] = np.lexsort((cur_vals_pareto[:,0], cur_vals_pareto[:,1]))

    return pareto_mask, pareto_order


#Plot pareto front
def plot_scatter_pareto(data1, data2, pareto_mask, pareto_order, plot_colors, cur_marker_types, plt):
    for i_d in range(data1.shape[1]):
        #plot line between those values corresponding to the same distortion threshold
        data1_pareto = np.copy(data1[pareto_mask[:,i_d], :]) #get the values in the pareto front
        data2_pareto = np.copy(data2[pareto_mask[:,i_d], :]) #get the values in the pareto front
        cur_pareto_order_idx = np.copy(pareto_order[pareto_mask[:,i_d],i_d]) #order
        plt.plot(data1_pareto[cur_pareto_order_idx,i_d], data2_pareto[cur_pareto_order_idx,i_d], 
                 color="black", alpha=0.35, linestyle=":")

        #Plot the value corresponding to each method individually
        for i_m in range(data1.shape[0]):
            #Plot the current value
            cur_alpha = 0.85 if pareto_mask[i_m,i_d] else 0.65
            #cur_alpha = 0.85 if pareto_mask[i_m,i_d] else 0.45
            cur_edgecolor = plot_colors[i_m] #if pareto_mask[i_m,i_d] else "white"
            cur_facecolor = plot_colors[i_m] if pareto_mask[i_m,i_d] else "none"
            plt.scatter(data1[i_m,i_d], data2[i_m,i_d], facecolors=cur_facecolor, edgecolors=plot_colors[i_m],
                        marker=cur_marker_types[i_d], s=80, alpha=cur_alpha)

## Spearman correlation & fooling rate

In [None]:
data1 = np.copy(fr_adv_full)

metric2_idx = np.where(np.array(metric_vec)=="spearman_obj")[0][0]
data2 = get_performance(metric_vec[metric2_idx], method_names, get_prob_methods)

In [None]:
#Data1 (Fooling rate)--> maximize
#Data2 (Correlation) --> maximize
pareto_mask, pareto_order = get_pareto_front_all(data1, data2, c1=-1.0, c2=-1.0, strict=True)


#Visualize the results
fig, ax = plt.subplots()

plot_scatter_pareto(data1, data2, pareto_mask, pareto_order, plot_colors, cur_marker_types, plt)
        
#Formatting...
plt.xlim(1.0, np.min(data1) -0.03)
plt.ylim(np.max(data2)+0.05, np.min(data2) -0.03)
plt.xlabel("Fooling rate", size=14)
plt.ylabel(ylabels[metric2_idx], size=14)
plt.xticks(size=12) ; plt.yticks(size=12)
#Inset axes
zoom_x1, zoom_x2, zoom_y1, zoom_y2 = 0.97, 0.69, 0.92, 0.795
plt.arrow(0.84, 0.77, -0.03, -0.21, head_width=0.025, head_length=0.04, color="black", linewidth=0.2, linestyle="-")
ax2 = plt.axes([.138, .5, .27, .35], facecolor='none')
plt.setp(ax2, xticks=[], yticks=[])
plot_scatter_pareto(data1, data2, pareto_mask, pareto_order, plot_colors, cur_marker_types, ax2)
ax2.set_xlim(zoom_x1, zoom_x2)
ax2.set_ylim(zoom_y1, zoom_y2)        
rect = Rectangle((0.69, 0.77), 0.285, 0.17, edgecolor="black", lw=1.0, facecolor='none')
ax.add_patch(rect)

matplotlib.pyplot.gcf().set_size_inches(5, 3.5)

#Save figure
plt.savefig("/tmp/pareto_corr_fr.pdf", bbox_inches='tight', dpi=300)
plt.show()

## Maximum absolute difference & fooling rate

In [None]:
data1 = np.copy(fr_adv_full)

metric2_idx = np.where(np.array(metric_vec)=="max_dif_obj")[0][0]
data2 = get_performance(metric_vec[metric2_idx], method_names, get_prob_methods)

In [None]:
#Data1 (Fooling rate)--> maximize
#Data2 (max. dif.)   --> minimize
pareto_mask, pareto_order = get_pareto_front_all(data1, data2, c1=-1.0, c2=1.0, strict=True)

#Visualize the results
fig, ax = plt.subplots()

plot_scatter_pareto(data1, data2, pareto_mask, pareto_order, plot_colors, cur_marker_types, plt)

        
#Formatting...
plt.xlim(np.nanmax(data1)+0.05, np.nanmin(data1) -0.03)
plt.ylim(np.nanmin(data2)-0.005, np.nanmax(data2) + 0.005)
plt.xlabel("Fooling rate", size=14)
plt.ylabel(ylabels[metric2_idx], size=14)
plt.xticks(size=12) ; plt.yticks(size=12)
plt.rc('ytick', labelsize=11)
plt.xlim(1,-0.02)

matplotlib.pyplot.gcf().set_size_inches(5, 3.5)

#Save the figure
plt.savefig("/tmp/pareto_max_fr.pdf", bbox_inches='tight', dpi=300)
plt.show()

# Pareto Plots with the number of parameters

In [None]:
#Set the positions of each method in the horizontal axis
base_x_ticks = np.array([0.5]*len(max_dist_vec))
data1 = np.vstack((np.array([0]*len(max_dist_vec)), np.array([0]*len(max_dist_vec)),
                   base_x_ticks, base_x_ticks, base_x_ticks, 
                   np.array([1]*len(max_dist_vec))))
#Sort the data by the order of appearance (in this case by the order of parameters):
data1 = np.copy(data1[[2,3,4,5,0,1],:]) #Order: MAB, MFBR,   AM, UBM, EWTM,   CRM

## Mean difference & Number of parameters

In [None]:
metric2_idx = np.where(np.array(metric_vec)=="mean_dif_obj")[0][0]
data2 = get_performance(metric_vec[metric2_idx], method_names, get_prob_methods)

In [None]:
#Data1 (# of parameters)--> minimize
#Data2 (mean. dif.)     --> minimize
pareto_mask, pareto_order = get_pareto_front_all(data1, data2, c1=1.0, c2=1.0, strict=True)

#Visualize the results
fig, ax = plt.subplots()

plot_scatter_pareto(data1, data2, pareto_mask, pareto_order, plot_colors, cur_marker_types, plt)
    
#Formatting...
plt.xlim(-0.2,1.2)
plt.xticks([0.0, 0.5, 1],["$O(k)$", "$O(k^2)$","$O(2^kk^2)$"], size=17, y=0)
plt.yticks(size=12)
plt.ylim(np.nanmin(data2)-0.002, np.nanmax(data2)+0.002)
plt.xlabel("Number of parameters", size=14, labelpad=0)
plt.ylabel(ylabels[metric2_idx], size=14)

matplotlib.pyplot.gcf().set_size_inches(5, 3.5)

#Save the figure
plt.savefig("/tmp/pareto_params_mean.pdf", bbox_inches='tight', dpi=350)
plt.show()

# Legend

In [None]:
cur_marker_types = [".", "P", "v", "^", "H", "s", "<", ">"]
plt.plot()

legend_elems = []
legend_elems2 = []

#col 1
legend_elems.append(Line2D([0], [0], marker=cur_marker_types[0],label="$\epsilon=$"+str(max_dist_vec[0]),
                            color="white", markerfacecolor="grey", markersize=13)) #Distortion
legend_elems.append(Line2D([0], [0], marker="", color="white", markerfacecolor="white", label="")) #empty
#col 2
legend_elems.append(Line2D([0], [0], marker=cur_marker_types[1],label="$\epsilon=$"+str(max_dist_vec[1]),
                            color="white", markerfacecolor="grey", markersize=13)) #Distortion
legend_elems.append(Line2D([0], [0], marker="o",color="white", markerfacecolor=plot_colors[0], 
                           label=plot_labels[0], markersize=13)) #Method
#col 3
legend_elems.append(Line2D([0], [0], marker=cur_marker_types[2],label="$\epsilon=$"+str(max_dist_vec[2]),
                            color="white", markerfacecolor="grey", markersize=13)) #Distortion
legend_elems.append(Line2D([0], [0], marker="o",color="white", markerfacecolor=plot_colors[1], 
                           label=plot_labels[1], markersize=13)) #Method
#col 4
legend_elems.append(Line2D([0], [0], marker=cur_marker_types[3],label="$\epsilon=$"+str(max_dist_vec[3]),
                            color="white", markerfacecolor="grey", markersize=13)) #Distortion
legend_elems.append(Line2D([0], [0], marker="o",color="white", markerfacecolor=plot_colors[2], 
                           label=plot_labels[2], markersize=13)) #Method
#col 5
legend_elems.append(Line2D([0], [0], marker=cur_marker_types[4],label="$\epsilon=$"+str(max_dist_vec[4]),
                            color="white", markerfacecolor="grey", markersize=13)) #Distortion
legend_elems.append(Line2D([0], [0], marker="o",color="white", markerfacecolor=plot_colors[3], 
                           label=plot_labels[3], markersize=13)) #Method
#col 6
legend_elems.append(Line2D([0], [0], marker=cur_marker_types[5],label="$\epsilon=$"+str(max_dist_vec[5]),
                            color="white", markerfacecolor="grey", markersize=13)) #Distortion
legend_elems.append(Line2D([0], [0], marker="o",color="white", markerfacecolor=plot_colors[4], 
                           label=plot_labels[4], markersize=13)) #Method
#col 7
legend_elems.append(Line2D([0], [0], marker=cur_marker_types[6],label="$\epsilon=$"+str(max_dist_vec[6]),
                            color="white", markerfacecolor="grey", markersize=13)) #Distortion
legend_elems.append(Line2D([0], [0], marker="o",color="white", markerfacecolor=plot_colors[5], 
                           label=plot_labels[5], markersize=13)) #Method
#col 8
legend_elems.append(Line2D([0], [0], marker=cur_marker_types[7],label="$\epsilon=$"+str(max_dist_vec[7]),
                            color="white", markerfacecolor="grey", markersize=13)) #Distortion
#legend_elems2.append(Line2D([0], [0], marker="", color="white", markerfacecolor="white", label="")) #empty

legend = plt.legend(handles=legend_elems, ncol=8, columnspacing=-0.0, handletextpad=-0.2, fontsize=11, 
           borderpad=0.3, frameon=True, borderaxespad=0.15, bbox_to_anchor=(0.96, 1.2))

fig   = legend.figure
fig.canvas.draw()
bbox  = legend.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
fig.savefig("/tmp/legend_pareto.pdf", dpi="figure", bbox_inches=bbox)
plt.close()
plt.show()