In [1]:
import json
import os
import re
import logging
from functools import reduce

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from sklearn.model_selection import ParameterGrid

from nnattack.variables import auto_var, get_file_name
from params import (
    nn_k1, nn_k3, nn_k5, nn_k7,
    opt_of_rf_attack, rf_attack, rf500_attack,
    opt_of_nnopt,
    robust_nn_k1, robust_nn_k3,
    robust_rf,
)

logging.basicConfig(level=0)

Using TensorFlow backend.


In [2]:
def get_result(auto_var):
    file_name = get_file_name(auto_var, name_only=True).replace("_", "-")
    file_path = f"./results/{file_name}.json"
    if not os.path.exists(file_path):
        return None
    try:
        with open(file_path, "r") as f:
            ret = json.load(f)
    except:
        print("problem with %s" % file_path)
        raise
    return ret


def params_to_dataframe(grid_param, column=None):
    params, loaded_results = auto_var.run_grid_params(get_result, grid_param, with_hook=False, verbose=0, n_jobs=1)
    if column is None:
        results = [r['results'] if isinstance(r, dict) else r for r in loaded_results]
    else:
        results = loaded_results
    
    params, results = zip(*[(params[i], results[i]) for i in range(len(params)) if results[i]])
    params, results = list(params), list(results)
    #print(len(results))
    accs = []
    for i, param in enumerate(params):
        if column is None:
            for r in results[i]:
                #params[i][f'eps_{r["eps"]:.2f}_trn'] = r['trn_acc']
                params[i][f'eps_{r["eps"]:.2f}_tst'] = r['tst_acc']
        else:
            if column not in results[i]:
                params[i][column] = np.nan
            else:
                if column == 'avg_pert':
                    params[i][column] = results[i][column]['avg']
                    if 'missed_count' in results[i]['avg_pert']:
                        params[i]['missed_count'] = results[i]['avg_pert']['missed_count']
                    else:
                        params[i]['missed_count'] = 0
                else:
                    params[i][column] = results[i][column]
                          
    df = pd.DataFrame(params)
    return df

def set_plot(fig, ax, ord=np.inf):
    fig.autofmt_xdate()
    ax.legend()
    ax.set_ylim(0, 1)
        #ax.legend(bbox_to_anchor=(1.5, 0., 0.5, 0.5))
    ax.legend()
    ax.set_ylabel('Accuracy')
    xlabel = 'Adversarial Perturbation'
    if ord == np.inf:
        ax.set_xlabel(xlabel + ' (Linf)')
    else:
        ax.set_xlabel(xlabel)
        
                          
def result_latex_figs(exp_name, control_var, caption):
    control = ParameterGrid(control_var)
    ret = """
\\begin{figure}[ht!]
\\centering"""
    img_paths = []
    for i, g in enumerate(control):
        dataset, ord = g['dataset'], g['ord']
        img_path = f'./figs/{exp_name}_{dataset}_{ord}.eps'
        dataset = dataset.replace("_", " ")
        ret += """
\\subfloat[%s]{
    \\includegraphics[width=.45\\textwidth]{%s}}""" % (dataset, img_path)
        if i % 2 == 1:
            ret += "\n"
    ret += """
\\caption{%s}
\\label{fig:%s}
\\end{figure} 
""" % (caption, exp_name)
    return ret
                      
def plot_result(df, exp_nme, control_var, variables, show_plot=True):
    control = ParameterGrid(control_var)
    for g in control:
        title = exp_name
        temp_df = df
                      
        for k, v in g.items():
            if v in variable_name[k]:
                title = title + f"_{variable_name[k][v]}"
            else:
                title = title + f"_{v}"
            temp_df = temp_df.loc[df[k] == v]
                      
        fig, ax = plt.subplots()
        ax.set_title(title)
        for name, group in temp_df.groupby(variables):
            #print(name, len(group))
            eps_list = [re.findall(r'[-+]?\d*\.\d+|\d+', t)[0] for t in group.mean().index.tolist()[:-1]]
            s = [r for r in group.mean().tolist()[:-1] if not np.isnan(r)]
            x = [float(eps_list[i]) for i, r in enumerate(group.mean().tolist()[:-1]) if not np.isnan(r)]
                      
            if isinstance(name, str):
                if name in variable_name[variables[0]]:
                    label = variable_name[variables[0]][name]
                else:
                    label = name
            else:
                mod_names = []
                for i, n in enumerate(name):
                    if n in variable_name[variables[i]]:
                        mod_names.append(variable_name[variables[i]][n])
                    else:
                        mod_names.append(n)
                label = mod_name.join("_")
            ax.plot(x, s, label=label)

        dataset = g['dataset']
        ord = g['ord']
        set_plot(fig, ax)
        plt.savefig(f'./figs/{exp_name}_{dataset}_{ord}.eps', format='eps')
        plt.savefig(f'./figs/{exp_name}_{dataset}_{ord}.png', format='png')
        if show_plot:
            plt.show()
        else:
            plt.close()
                      
def get_avg(df, exp_nme, control_var, variables):
    control = ParameterGrid(control_var)
    for g in control:
        title = exp_name
        temp_df = df
                      
        for k, v in g.items():
            if v in variable_name[k]:
                title = title + f"_{variable_name[k][v]}"
            else:
                title = title + f"_{v}"
            temp_df = temp_df.loc[df[k] == v]
                      
        for name, group in temp_df.groupby(variables):
            print(name, len(group))
            eps_list = [re.findall(r'[-+]?\d*\.\d+|\d+', t)[0] for t in group.mean().index.tolist()[:-1]]
            s = [r for r in group.mean().tolist()[:-1] if not np.isnan(r)]
            x = [float(eps_list[i]) for i, r in enumerate(group.mean().tolist()[:-1]) if not np.isnan(r)]
                      
variable_name = {
    'dataset': {
        'fashion_mnist35_2200_pca5': 'f-mnist35-pca5',
        'mnist35_2200_pca5': 'mnist35-pca5',
        'fashion_mnist06_2200_pca5': 'f-mnist06-pca5',
        'fashion_mnist35_2200_pca10': 'f-mnist35-pca10',
        'mnist35_2200_pca10': 'mnist35-pca10',
        'fashion_mnist06_2200_pca10': 'f-mnist06-pca10',
        'fashion_mnist35_2200_pca15': 'f-mnist35-pca15',
        'mnist35_2200_pca15': 'mnist35-pca15',
        'fashion_mnist06_2200_pca15': 'f-mnist06-pca15',
        'fashion_mnist35_2200_pca25': 'f-mnist35-pca25',
        'mnist35_2200_pca25': 'mnist35-pca25',
        'fashion_mnist06_2200_pca25': 'f-mnist06-pca25',
        'digits_pca5': 'digits-pca5',
        'halfmoon_2200': 'halfmoon',
        
        'abalone': 'abalone',
        'iris': 'iris',
        'digits_pca5': 'digits-pca5',
        'wine': 'wine',
        
        'halfmoon_300': 'halfmoon',
        'fashion_mnist35_300_pca5': 'f-mnist35-pca5',
        'mnist35_300_pca5': 'mnist35-pca5',
        'fashion_mnist06_300_pca5': 'f-mnist06-pca5',
        'fashion_mnist35_300_pca15': 'f-mnist35-pca15',
        'mnist35_2300_pca15': 'mnist35-pca15',
        'fashion_mnist06_300_pca15': 'f-mnist06-pca15',
        'fashion_mnist35_300_pca10': 'f-mnist35-pca10',
        'mnist35_2300_pca10': 'mnist35-pca10',
        'fashion_mnist06_300_pca10': 'f-mnist06-pca10',
        
        
    },
    'attack': {
        'blackbox': 'blackbox (Cheng\'s)',
        'kernelsub_c10000_pgd': 'kernelsub',
        'kernelsub_c1000_pgd': 'kernelsub',
        'rev_nnopt_k1_20': 'nnopt-20',
        'rev_nnopt_k1_50': 'nnopt-50',
        'rev_nnopt_k3_20': 'nnopt-20',
        'rev_nnopt_k3_50': 'nnopt-50',
        'rev_nnopt_k3_20_region': 'nnopt-20-region',
        'rev_nnopt_k3_50_region': 'nnopt-50-region',
        'rev_nnopt_k5_20': 'nnopt-20',
        'rev_nnopt_k5_50': 'nnopt-50',
        'rev_nnopt_k5_20_region': 'nnopt-20-region',
        'rev_nnopt_k5_50_region': 'nnopt-50-region',
        'rev_nnopt_k7_20': 'nnopt-20',
        'rev_nnopt_k7_50': 'nnopt-50',
        'rev_nnopt_k7_20_region': 'nnopt-20-region',
        'rev_nnopt_k7_50_region': 'nnopt-50-region',
        
        'nnopt_k1_all': 'nnopt-all',
        'nnopt_k3_all': 'nnopt-all',
        
        'direct_k1': 'direct attack',
        'direct_k3': 'direct attack',
        'direct_k5': 'direct attack',
        'direct_k7': 'direct attack',
        
        'rf_attack_all': 'RF-all',
        'rf_attack_rev': 'RF-rev',
        'rf_attack_rev_20': 'RF-rev-20',
        'rf_attack_rev_50': 'RF-rev-50',
        'rf_attack_rev_100': 'RF-rev-100',
    },
    'ord': {},
}


In [3]:
def knn_attack_plots(exp_name, grid_param, caption='', show_plot=True):
    df = params_to_dataframe(grid_param)
    datasets = set.union(*[set(g['dataset']) for g in grid_param]) if isinstance(grid_param, list) else grid_param['dataset']

    control = {
        'dataset': datasets,
        'ord': grid_param[0]['ord'],
    }
    variables = ['attack']
    plot_result(df, exp_name, control, variables, show_plot)
    return result_latex_figs(exp_name, control, caption)
        

def avg_pert_table(exp_name, grid_param, columns, obj='avg_pert'):
    df = params_to_dataframe(grid_param, 'avg_pert')
    datasets = set.union(*[set(g['dataset']) for g in grid_param]) if isinstance(grid_param, list) else grid_param['dataset']
    d = {}
    if len(grid_param[0]['model']) == 1:
        temp_df = df.groupby(["dataset", "attack"])[obj].mean()
        temp_df_sem = df.groupby(["dataset", "attack"])[obj].sem()
        for col in columns:
            aug_col = variable_name['attack'][col]
            d[aug_col] = {}
            for dataset in datasets:
                row_name = variable_name['dataset'][dataset]
                try:
                    d[aug_col][row_name] = "$%.4f \pm %.4f$" % (temp_df[dataset, col], temp_df_sem[dataset, col])
                except:
                    print("missing: ", dataset, col)
                    d[aug_col][row_name] = -1
    else:
        models = grid_param[0]['model']
        temp_df = df.groupby(["model", "dataset", "attack"])[obj].mean()
        temp_df_sem = df.groupby(["model", "dataset", "attack"])[obj].sem()
        for col in columns:
            aug_col = variable_name['attack'][col]
            d[aug_col] = {}
            for dataset in datasets:
                for model in models:
                    model_name = "%s_%02d" % ("_".join(model.split("_")[:-1]), int(model.split("_")[-1]))
                    dataset_name = variable_name['dataset'][dataset]
                    row_name = '%s %s' % (dataset_name, model_name)
                    row_name = row_name.replace('_', '-')
                    try:
                        d[aug_col][row_name] = "$%.4f \pm %.4f$" % (temp_df[model, dataset, col], temp_df_sem[model, dataset, col])
                        #d[aug_col][dataset][model] = "$%.4f \pm %.4f$" % (temp_df[model, dataset, col], temp_df_sem[model, dataset, col])
                    except:
                        print("missing: ", dataset, col)
                        d[aug_col][row_name] = -1
                        #d[aug_col][dataset][model] = -1
                
    return pd.DataFrame(d)

def table_wrapper(exp_name, grid_param, columns, obj='avg_pert', caption=""):
    t = """
\\begin{table}[h!]
\\centering
"""
    t += avg_pert_table(exp_name, grid_param, columns, obj).to_latex(escape=False)
    t += """\\caption{%s}
\\label{table:%s_%s}
\\end{table}
""" % (caption, exp_name, obj)
    return t

tex_base = "./tex_files"

def write_to_tex(s, file_name):
    with open(os.path.join(tex_base, file_name), 'w') as f:
        f.write(s)

In [4]:
experiments = [rf_attack, opt_of_rf_attack, robust_rf]
avg_caption = "average purturbation distance (Linf)"
miss_caption = "\\# of data algorithms is not able to generate successful attack (total 100 data points)"
for fn in experiments:
    _, exp_name, grid_param, _ = fn()
    print(exp_name)
    #columns = ['blackbox', 'rf_attack_rev_20', 'rf_attack_rev_100']
    columns = grid_param[0]['attack']
    table_str = table_wrapper(exp_name, grid_param, columns, caption=avg_caption)
    write_to_tex(table_str, exp_name + '_table.tex')
    fig_str = knn_attack_plots(exp_name, grid_param, show_plot=False)
    write_to_tex(fig_str, exp_name + '_fig.tex')

random_forest
optimality_rf
Robust-RF
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  fashion_mnist06_2200_pca5 rf_attack_rev_20
missing:  mnist35_2200_pca5 rf_attack_rev_100
missing:  mnist35_2200_pca5 rf_attack_rev_100
missing:  mnist35_2200_pca5 rf_attack_rev_100
missing:  mnist35_2200_pca5 rf_attack_rev_100
missing:  mnist35_2200_pca5 rf_attack_rev_100
missing:  mnist35_2200_pca5 rf_attack_rev_100
missing:  mnist35_2200_pca5 rf_attack_rev_100
miss

In [8]:
nn_experiments = [nn_k1, nn_k3, nn_k5, nn_k7, opt_of_nnopt]
avg_caption = "average purturbation distance (Linf)"
miss_caption = "\\# of data algorithms is not able to generate successful attack (total 100 data points)"
for fn in nn_experiments:
    _, exp_name, grid_param, _ = fn()
    print(grid_param)
    variables = grid_param[0]['attack']
    #variables = list(filter(lambda v: 'kernelsub' not in v and 'direct' not in v, variables))
    variables = list(filter(lambda v: 'kernelsub' not in v, variables))
    print(variables)
    table_str = table_wrapper(exp_name, grid_param, variables, caption=avg_caption)
    table_str += table_wrapper(exp_name, grid_param, variables, obj='missed_count', caption=miss_caption)
    #print(table_str)
    write_to_tex(table_str, exp_name + '_table.tex')

[{'model': ['knn1'], 'ord': ['inf'], 'dataset': ['digits_pca5'], 'attack': ['rev_nnopt_k1_20', 'rev_nnopt_k1_50', 'direct_k1', 'kernelsub_c1000_pgd', 'blackbox'], 'random_seed': [0, 1]}, {'model': ['knn1'], 'ord': ['inf'], 'dataset': ['abalone', 'mnist35_2200_pca5', 'fashion_mnist06_2200_pca5', 'fashion_mnist35_2200_pca5'], 'attack': ['rev_nnopt_k1_20', 'rev_nnopt_k1_50', 'direct_k1', 'kernelsub_c10000_pgd', 'blackbox'], 'random_seed': [0, 1]}, {'model': ['knn1'], 'ord': ['inf'], 'dataset': ['halfmoon_2200'], 'attack': ['rev_nnopt_k1_20', 'rev_nnopt_k1_50', 'direct_k1', 'kernelsub_c1000_pgd', 'blackbox'], 'random_seed': [0, 1]}]
['rev_nnopt_k1_20', 'rev_nnopt_k1_50', 'direct_k1', 'blackbox']
[{'model': ['knn3'], 'ord': ['inf'], 'dataset': ['digits_pca5', 'abalone', 'mnist35_2200_pca5', 'fashion_mnist35_2200_pca5', 'fashion_mnist06_2200_pca5', 'mnist35_2200_pca25', 'fashion_mnist35_2200_pca25', 'halfmoon_2200'], 'attack': ['rev_nnopt_k3_50', 'direct_k3', 'blackbox', 'rev_nnopt_k3_50_reg

In [7]:
nn_experiments = [nn_k1, nn_k3, nn_k5, nn_k7, opt_of_nnopt]
for fn in nn_experiments:
    _, exp_name, grid_param, _ = fn()
    print(exp_name)
    fig_str = knn_attack_plots(exp_name, grid_param, show_plot=False)
    write_to_tex(fig_str, exp_name + '_fig.tex')

1nn
3nn
5nn
7nn
Optimality3NNOPT


In [6]:
nn_experiments = [robust_nn_k1, robust_nn_k3]
avg_caption = "average purturbation distance (Linf)"
miss_caption = "\\# of data algorithms is not able to generate successful attack (total 100 data points)"
for fn in nn_experiments:
    _, exp_name, grid_param, _ = fn()
    variables = grid_param[0]['attack']
    variables = list(filter(lambda v: 'kernelsub' not in v and 'direct' not in v, variables))
    print(variables)
    table_str = table_wrapper(exp_name, grid_param, variables, caption=avg_caption)
    table_str += table_wrapper(exp_name, grid_param, variables, obj='missed_count', caption=miss_caption)
    write_to_tex(table_str, exp_name + '_table.tex')

['rev_nnopt_k1_20', 'blackbox']
missing:  halfmoon_2200 rev_nnopt_k1_20
missing:  halfmoon_2200 rev_nnopt_k1_20
missing:  fashion_mnist06_2200_pca5 blackbox
missing:  halfmoon_2200 blackbox
missing:  halfmoon_2200 blackbox
missing:  halfmoon_2200 rev_nnopt_k1_20
missing:  halfmoon_2200 rev_nnopt_k1_20
missing:  fashion_mnist06_2200_pca5 blackbox
missing:  halfmoon_2200 blackbox
missing:  halfmoon_2200 blackbox
['rev_nnopt_k3_50', 'blackbox', 'rev_nnopt_k3_50_region']
missing:  mnist35_2200_pca5 rev_nnopt_k3_50
missing:  fashion_mnist06_2200_pca5 rev_nnopt_k3_50
missing:  fashion_mnist06_2200_pca5 rev_nnopt_k3_50
missing:  fashion_mnist06_2200_pca5 rev_nnopt_k3_50
missing:  fashion_mnist06_2200_pca5 rev_nnopt_k3_50
missing:  fashion_mnist06_2200_pca5 rev_nnopt_k3_50
missing:  fashion_mnist06_2200_pca5 rev_nnopt_k3_50
missing:  fashion_mnist06_2200_pca5 rev_nnopt_k3_50
missing:  fashion_mnist06_2200_pca5 rev_nnopt_k3_50
missing:  fashion_mnist06_2200_pca5 rev_nnopt_k3_50
missing:  fashio