In [1]:
import json
import os
import re
import logging
from functools import reduce

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from sklearn.model_selection import ParameterGrid

from nnattack.variables import auto_var, get_file_name
from params import (
    nn_k1, nn_k3, nn_k5, nn_k7,
    opt_of_rf_attack, rf_attack, rf500_attack,
    opt_of_nnopt,
    robust_nn_k1, robust_nn_k3,
)

logging.basicConfig(level=0)

Using TensorFlow backend.


In [100]:
def get_result(auto_var):
    file_name = get_file_name(auto_var, name_only=True).replace("_", "-")
    file_path = f"./results/{file_name}.json"
    if not os.path.exists(file_path):
        return None
    try:
        with open(file_path, "r") as f:
            ret = json.load(f)
    except:
        print("problem with %s" % file_path)
        raise
    return ret


def params_to_dataframe(grid_param, column=None):
    params, loaded_results = auto_var.run_grid_params(get_result, grid_param, with_hook=False, verbose=0, n_jobs=1)
    if column is None:
        results = [r['results'] if isinstance(r, dict) else r for r in loaded_results]
    else:
        results = loaded_results
    
    params, results = zip(*[(params[i], results[i]) for i in range(len(params)) if results[i]])
    params, results = list(params), list(results)
    #print(len(results))
    accs = []
    for i, param in enumerate(params):
        if column is None:
            for r in results[i]:
                #params[i][f'eps_{r["eps"]:.2f}_trn'] = r['trn_acc']
                params[i][f'eps_{r["eps"]:.2f}_tst'] = r['tst_acc']
        else:
            if column not in results[i]:
                params[i][column] = np.nan
            else:
                if column == 'avg_pert':
                    params[i][column] = results[i][column]['avg']
                    if 'missed_count' in results[i]['avg_pert']:
                        params[i]['missed_count'] = results[i]['avg_pert']['missed_count']
                    else:
                        params[i]['missed_count'] = 0
                else:
                    params[i][column] = results[i][column]
                          
    df = pd.DataFrame(params)
    return df

def set_plot(fig, ax, ord=np.inf):
    fig.autofmt_xdate()
    ax.legend()
    ax.set_ylim(0, 1)
        #ax.legend(bbox_to_anchor=(1.5, 0., 0.5, 0.5))
    ax.legend()
    ax.set_ylabel('Accuracy')
    xlabel = 'Adversarial Perturbation'
    if ord == np.inf:
        ax.set_xlabel(xlabel + ' (Linf)')
    else:
        ax.set_xlabel(xlabel)
        
                          
def result_latex_figs(exp_name, control_var, caption):
    control = ParameterGrid(control_var)
    ret = """
\\begin{figure}[ht!]
\\centering"""
    img_paths = []
    for i, g in enumerate(control):
        dataset, ord = g['dataset'], g['ord']
        img_path = f'./figs/{exp_name}_{dataset}_{ord}.eps'
        dataset = dataset.replace("_", " ")
        ret += """
\\subfloat[%s]{
    \\includegraphics[width=.45\\textwidth]{%s}}""" % (dataset, img_path)
        if i % 2 == 1:
            ret += "\n"
    ret += """
\\caption{%s}
\\label{fig:%s}
\\end{figure} 
""" % (caption, exp_name)
    return ret
                      
def plot_result(df, exp_nme, control_var, variables, show_plot=True):
    control = ParameterGrid(control_var)
    for g in control:
        title = exp_name
        temp_df = df
                      
        for k, v in g.items():
            if v in variable_name[k]:
                title = title + f"_{variable_name[k][v]}"
            else:
                title = title + f"_{v}"
            temp_df = temp_df.loc[df[k] == v]
                      
        fig, ax = plt.subplots()
        ax.set_title(title)
        for name, group in temp_df.groupby(variables):
            #print(name, len(group))
            eps_list = [re.findall(r'[-+]?\d*\.\d+|\d+', t)[0] for t in group.mean().index.tolist()[:-1]]
            s = [r for r in group.mean().tolist()[:-1] if not np.isnan(r)]
            x = [float(eps_list[i]) for i, r in enumerate(group.mean().tolist()[:-1]) if not np.isnan(r)]
                      
            if isinstance(name, str):
                if name in variable_name[variables[0]]:
                    label = variable_name[variables[0]][name]
                else:
                    label = name
            else:
                mod_names = []
                for i, n in enumerate(name):
                    if n in variable_name[variables[i]]:
                        mod_names.append(variable_name[variables[i]][n])
                    else:
                        mod_names.append(n)
                label = mod_name.join("_")
            ax.plot(x, s, label=label)

        dataset = g['dataset']
        ord = g['ord']
        set_plot(fig, ax)
        plt.savefig(f'./figs/{exp_name}_{dataset}_{ord}.eps', format='eps')
        plt.savefig(f'./figs/{exp_name}_{dataset}_{ord}.png', format='png')
        if show_plot:
            plt.show()
        else:
            plt.close()
                      
def get_avg(df, exp_nme, control_var, variables):
    control = ParameterGrid(control_var)
    for g in control:
        title = exp_name
        temp_df = df
                      
        for k, v in g.items():
            if v in variable_name[k]:
                title = title + f"_{variable_name[k][v]}"
            else:
                title = title + f"_{v}"
            temp_df = temp_df.loc[df[k] == v]
                      
        for name, group in temp_df.groupby(variables):
            print(name, len(group))
            eps_list = [re.findall(r'[-+]?\d*\.\d+|\d+', t)[0] for t in group.mean().index.tolist()[:-1]]
            s = [r for r in group.mean().tolist()[:-1] if not np.isnan(r)]
            x = [float(eps_list[i]) for i, r in enumerate(group.mean().tolist()[:-1]) if not np.isnan(r)]
                      
variable_name = {
    'dataset': {
        'fashion_mnist35_2000_pca5': 'f-mnist35-pca5',
        'mnist35_2000_pca5': 'mnist35-pca5',
        'fashion_mnist06_2000_pca5': 'f-mnist06-pca5',
        'fashion_mnist35_2000_pca10': 'f-mnist35-pca10',
        'mnist35_2000_pca10': 'mnist35-pca10',
        'fashion_mnist06_2000_pca10': 'f-mnist06-pca10',
        'fashion_mnist35_2000_pca15': 'f-mnist35-pca15',
        'mnist35_2000_pca15': 'mnist35-pca15',
        'fashion_mnist06_2000_pca15': 'f-mnist06-pca15',
        'fashion_mnist35_2000_pca25': 'f-mnist35-pca25',
        'mnist35_2000_pca25': 'mnist35-pca25',
        'fashion_mnist06_2000_pca25': 'f-mnist06-pca25',
        'digits_pca5': 'digits-pca5',
        'halfmoon_2000': 'halfmoon',
        
        'abalone': 'abalone',
        'iris': 'iris',
        'digits_pca5': 'digits-pca5',
        'wine': 'wine',
        
        'halfmoon_200': 'halfmoon',
        'fashion_mnist35_200_pca5': 'f-mnist35-pca5',
        'mnist35_200_pca5': 'mnist35-pca5',
        'fashion_mnist06_200_pca5': 'f-mnist06-pca5',
        'fashion_mnist35_200_pca15': 'f-mnist35-pca15',
        'mnist35_2000_pca15': 'mnist35-pca15',
        'fashion_mnist06_200_pca15': 'f-mnist06-pca15',
        'fashion_mnist35_200_pca10': 'f-mnist35-pca10',
        'mnist35_2000_pca10': 'mnist35-pca10',
        'fashion_mnist06_200_pca10': 'f-mnist06-pca10',
        
        
    },
    'attack': {
        'blackbox': 'blackbox (Cheng\'s)',
        'kernelsub_c10000_pgd': 'kernelsub',
        'kernelsub_c1000_pgd': 'kernelsub',
        'rev_nnopt_k1_20': 'nnopt-20',
        'rev_nnopt_k1_50': 'nnopt-50',
        'rev_nnopt_k3_20': 'nnopt-20',
        'rev_nnopt_k3_50': 'nnopt-50',
        'rev_nnopt_k3_20_region': 'nnopt-20-region',
        'rev_nnopt_k3_50_region': 'nnopt-50-region',
        'rev_nnopt_k5_20': 'nnopt-20',
        'rev_nnopt_k5_50': 'nnopt-50',
        'rev_nnopt_k5_20_region': 'nnopt-20-region',
        'rev_nnopt_k5_50_region': 'nnopt-50-region',
        'rev_nnopt_k7_20': 'nnopt-20',
        'rev_nnopt_k7_50': 'nnopt-50',
        'rev_nnopt_k7_20_region': 'nnopt-20-region',
        'rev_nnopt_k7_50_region': 'nnopt-50-region',
        
        'nnopt_k1_all': 'nnopt-all',
        'nnopt_k3_all': 'nnopt-all',
        
        'direct_k1': 'direct attack',
        'direct_k3': 'direct attack',
        'direct_k5': 'direct attack',
        'direct_k7': 'direct attack',
        
        'rf_attack_all': 'RF-all',
        'rf_attack_rev': 'RF-rev',
        'rf_attack_rev_20': 'RF-rev-20',
        'rf_attack_rev_50': 'RF-rev-50',
        'rf_attack_rev_100': 'RF-rev-100',
    },
    'ord': {},
}


In [101]:
def knn_attack_plots(exp_name, grid_param, caption='', show_plot=True):
    df = params_to_dataframe(grid_param)
    datasets = set.union(*[set(g['dataset']) for g in grid_param]) if isinstance(grid_param, list) else grid_param['dataset']

    control = {
        'dataset': datasets,
        'ord': grid_param[0]['ord'],
    }
    variables = ['attack']
    plot_result(df, exp_name, control, variables, show_plot)
    return result_latex_figs(exp_name, control, caption)
        

def avg_pert_table(exp_name, grid_param, columns, obj='avg_pert'):
    df = params_to_dataframe(grid_param, 'avg_pert')
    datasets = set.union(*[set(g['dataset']) for g in grid_param]) if isinstance(grid_param, list) else grid_param['dataset']
    d = {}
    if len(grid_param[0]['model']) == 1:
        temp_df = df.groupby(["dataset", "attack"])[obj].mean()
        temp_df_sem = df.groupby(["dataset", "attack"])[obj].sem()
        for col in columns:
            aug_col = variable_name['attack'][col]
            d[aug_col] = {}
            for dataset in datasets:
                row_name = variable_name['dataset'][dataset]
                try:
                    d[aug_col][row_name] = "$%.4f \pm %.4f$" % (temp_df[dataset, col], temp_df_sem[dataset, col])
                except:
                    print("missing: ", dataset, col)
                    d[aug_col][row_name] = -1
    else:
        models = grid_param[0]['model']
        temp_df = df.groupby(["model", "dataset", "attack"])[obj].mean()
        temp_df_sem = df.groupby(["model", "dataset", "attack"])[obj].sem()
        for col in columns:
            aug_col = variable_name['attack'][col]
            d[aug_col] = {}
            for dataset in datasets:
                for model in models:
                    model_name = "%s_%02d" % ("_".join(model.split("_")[:-1]), int(model.split("_")[-1]))
                    dataset_name = variable_name['dataset'][dataset]
                    row_name = '%s %s' % (dataset_name, model_name)
                    row_name = row_name.replace('_', '-')
                    try:
                        d[aug_col][row_name] = "$%.4f \pm %.4f$" % (temp_df[model, dataset, col], temp_df_sem[model, dataset, col])
                        #d[aug_col][dataset][model] = "$%.4f \pm %.4f$" % (temp_df[model, dataset, col], temp_df_sem[model, dataset, col])
                    except:
                        print("missing: ", dataset, col)
                        d[aug_col][row_name] = -1
                        #d[aug_col][dataset][model] = -1
                
    return pd.DataFrame(d)

def table_wrapper(exp_name, grid_param, columns, obj='avg_pert', caption=""):
    t = """
\\begin{table}[h!]
\\centering
"""
    t += avg_pert_table(exp_name, grid_param, columns, obj).to_latex(escape=False)
    t += """\\caption{%s}
\\label{table:%s_%s}
\\end{table}
""" % (caption, exp_name, obj)
    return t
    

_, exp_name, grid_param, _ = opt_of_rf_attack()
columns = ['blackbox', 'rf_attack_all', 'rf_attack_rev', 'rf_attack_rev_20', 'rf_attack_rev_50']
print(table_wrapper(exp_name, grid_param, columns))
_, exp_name, grid_param, _ = rf_attack()
columns = ['blackbox', 'rf_attack_rev_20', 'rf_attack_rev_100']
print(table_wrapper(exp_name, grid_param, columns))


\begin{table}[h!]
\centering
\begin{tabular}{llllll}
\toprule
{} &   blackbox (Cheng's) &               RF-all &               RF-rev &            RF-rev-20 &            RF-rev-50 \\
\midrule
f-mnist06-pca5 &  $0.1533 \pm 0.0109$ &  $0.0713 \pm 0.0017$ &  $0.1186 \pm 0.0054$ &  $0.1204 \pm 0.0068$ &  $0.1186 \pm 0.0054$ \\
f-mnist35-pca5 &  $0.2405 \pm 0.0058$ &  $0.1719 \pm 0.0050$ &  $0.2144 \pm 0.0070$ &  $0.2481 \pm 0.0264$ &  $0.2144 \pm 0.0070$ \\
halfmoon       &  $0.1572 \pm 0.0025$ &  $0.0924 \pm 0.0005$ &  $0.1150 \pm 0.0114$ &  $0.1150 \pm 0.0114$ &  $0.1150 \pm 0.0114$ \\
mnist35-pca5   &  $0.1379 \pm 0.0211$ &  $0.0928 \pm 0.0075$ &  $0.1236 \pm 0.0050$ &  $0.1286 \pm 0.0011$ &  $0.1238 \pm 0.0048$ \\
\bottomrule
\end{tabular}
\caption{}
\label{table:optimality_rf_avg_pert}
\end{table}


\begin{table}[h!]
\centering
\begin{tabular}{llll}
\toprule
{} &   blackbox (Cheng's) &            RF-rev-20 &           RF-rev-100 \\
\midrule
f-mnist06-pca10 &  $0.1491 \pm 0.0104$ &  $

In [77]:
nn_experiments = [nn_k1, nn_k3, nn_k5, nn_k7, opt_of_nnopt]
avg_caption = "average purturbation distance (Linf)"
miss_caption = "\\# of data algorithms is not able to generate successful attack (total 100 data points)"
for fn in nn_experiments:
    _, exp_name, grid_param, _ = fn()
    variables = grid_param[0]['attack']
    variables = list(filter(lambda v: 'kernelsub' not in v and 'direct' not in v, variables))
    print(variables)
    print(table_wrapper(exp_name, grid_param, variables, caption=avg_caption))
    print(table_wrapper(exp_name, grid_param, variables, obj='missed_count', caption=miss_caption))


['rev_nnopt_k1_20', 'rev_nnopt_k1_50', 'blackbox']

\begin{table}[h!]
\centering
\begin{tabular}{llll}
\toprule
{} &             nnopt-20 &             nnopt-50 &   blackbox (Cheng's) \\
\midrule
abalone         &  $0.0077 \pm 0.0017$ &  $0.0077 \pm 0.0017$ &  $0.0292 \pm 0.0017$ \\
digits-pca5     &  $0.0453 \pm 0.0012$ &  $0.0453 \pm 0.0012$ &  $0.0734 \pm 0.0007$ \\
f-mnist06-pca5  &  $0.0282 \pm 0.0029$ &  $0.0282 \pm 0.0029$ &  $0.0875 \pm 0.0016$ \\
f-mnist35-pca15 &  $0.0954 \pm 0.0022$ &  $0.0951 \pm 0.0020$ &  $0.2299 \pm 0.0011$ \\
f-mnist35-pca5  &  $0.1140 \pm 0.0013$ &  $0.1140 \pm 0.0014$ &  $0.1807 \pm 0.0067$ \\
halfmoon        &  $0.0723 \pm 0.0011$ &  $0.0722 \pm 0.0012$ &  $0.1331 \pm 0.0066$ \\
iris            &  $0.1236 \pm 0.0045$ &  $0.1236 \pm 0.0045$ &  $0.1607 \pm 0.0096$ \\
mnist35-pca5    &  $0.0549 \pm 0.0005$ &  $0.0549 \pm 0.0005$ &  $0.1088 \pm 0.0016$ \\
wine            &  $0.0902 \pm 0.0008$ &  $0.0902 \pm 0.0008$ &  $0.2042 \pm 0.0009$ \\
\bottomrule


In [5]:
nn_experiments = [nn_k1, nn_k3, nn_k5, nn_k7, opt_of_nnopt]
for fn in nn_experiments:
    _, exp_name, grid_param, _ = fn()
    print(knn_attack_plots(exp_name, grid_param, show_plot=False))


\begin{figure}[ht!]
\centering
\subfloat[fashion mnist35 2000 pca15]{
    \includegraphics[width=.45\textwidth]{./figs/1nn_fashion_mnist35_2000_pca15_inf.eps}}
\subfloat[mnist35 2000 pca5]{
    \includegraphics[width=.45\textwidth]{./figs/1nn_mnist35_2000_pca5_inf.eps}}

\subfloat[fashion mnist35 2000 pca5]{
    \includegraphics[width=.45\textwidth]{./figs/1nn_fashion_mnist35_2000_pca5_inf.eps}}
\subfloat[fashion mnist06 2000 pca5]{
    \includegraphics[width=.45\textwidth]{./figs/1nn_fashion_mnist06_2000_pca5_inf.eps}}

\subfloat[digits pca5]{
    \includegraphics[width=.45\textwidth]{./figs/1nn_digits_pca5_inf.eps}}
\subfloat[halfmoon 2000]{
    \includegraphics[width=.45\textwidth]{./figs/1nn_halfmoon_2000_inf.eps}}

\subfloat[abalone]{
    \includegraphics[width=.45\textwidth]{./figs/1nn_abalone_inf.eps}}
\subfloat[iris]{
    \includegraphics[width=.45\textwidth]{./figs/1nn_iris_inf.eps}}

\subfloat[wine]{
    \includegraphics[width=.45\textwidth]{./figs/1nn_wine_inf.eps}}
\captio

In [6]:
nn_experiments = [rf_attack, rf500_attack, opt_of_rf_attack]
for fn in nn_experiments:
    _, exp_name, grid_param, _ = fn()
    print(knn_attack_plots(exp_name, grid_param, show_plot=False))


\begin{figure}[ht!]
\centering
\subfloat[fashion mnist06 2000 pca10]{
    \includegraphics[width=.45\textwidth]{./figs/random_forest_fashion_mnist06_2000_pca10_inf.eps}}
\subfloat[fashion mnist35 2000 pca10]{
    \includegraphics[width=.45\textwidth]{./figs/random_forest_fashion_mnist35_2000_pca10_inf.eps}}

\subfloat[mnist35 2000 pca5]{
    \includegraphics[width=.45\textwidth]{./figs/random_forest_mnist35_2000_pca5_inf.eps}}
\subfloat[halfmoon 2000]{
    \includegraphics[width=.45\textwidth]{./figs/random_forest_halfmoon_2000_inf.eps}}

\subfloat[fashion mnist35 2000 pca5]{
    \includegraphics[width=.45\textwidth]{./figs/random_forest_fashion_mnist35_2000_pca5_inf.eps}}
\subfloat[mnist35 2000 pca10]{
    \includegraphics[width=.45\textwidth]{./figs/random_forest_mnist35_2000_pca10_inf.eps}}

\subfloat[fashion mnist06 2000 pca5]{
    \includegraphics[width=.45\textwidth]{./figs/random_forest_fashion_mnist06_2000_pca5_inf.eps}}
\caption{}
\label{fig:random_forest}
\end{figure} 


\be

In [102]:
nn_experiments = [robust_nn_k1, robust_nn_k3]
avg_caption = "average purturbation distance (Linf)"
miss_caption = "\\# of data algorithms is not able to generate successful attack (total 100 data points)"
for fn in nn_experiments:
    _, exp_name, grid_param, _ = fn()
    variables = grid_param[0]['attack']
    variables = list(filter(lambda v: 'kernelsub' not in v and 'direct' not in v, variables))
    print(variables)
    print(table_wrapper(exp_name, grid_param, variables, caption=avg_caption))
    print()
    print(table_wrapper(exp_name, grid_param, variables, 'missed_count', caption=miss_caption))


['rev_nnopt_k1_20', 'blackbox']
missing:  halfmoon_2000 blackbox

\begin{table}[h!]
\centering
\begin{tabular}{lll}
\toprule
{} &             nnopt-20 &   blackbox (Cheng's) \\
\midrule
abalone adv-nn-k1-00             &  $0.0077 \pm 0.0017$ &  $0.0358 \pm 0.0064$ \\
abalone adv-nn-k1-05             &  $0.0080 \pm 0.0018$ &  $0.0278 \pm 0.0043$ \\
abalone adv-nn-k1-10             &  $0.0082 \pm 0.0020$ &  $0.0334 \pm 0.0106$ \\
abalone adv-nn-k1-15             &  $0.0082 \pm 0.0020$ &  $0.0484 \pm 0.0064$ \\
abalone adv-nn-k1-20             &  $0.0082 \pm 0.0020$ &  $0.0321 \pm 0.0026$ \\
abalone adv-nn-k1-25             &  $0.0082 \pm 0.0020$ &  $0.0318 \pm 0.0046$ \\
abalone adv-nn-k1-30             &  $0.0082 \pm 0.0020$ &  $0.0392 \pm 0.0068$ \\
abalone adv-nn-k1-35             &  $0.0082 \pm 0.0020$ &     $0.0418 \pm nan$ \\
abalone adv-nn-k1-40             &  $0.0082 \pm 0.0020$ &  $0.0422 \pm 0.0142$ \\
abalone robustv1-nn-k1-00        &  $0.0077 \pm 0.0017$ &  $0.0373 \pm 0.009

missing:  abalone blackbox

\begin{table}[h!]
\centering
\begin{tabular}{llll}
\toprule
{} &               nnopt-50 &     blackbox (Cheng's) &        nnopt-50-region \\
\midrule
abalone adv-nn-k3-00             &    $0.0000 \pm 0.0000$ &       $0.0000 \pm nan$ &    $0.0000 \pm 0.0000$ \\
abalone adv-nn-k3-05             &    $0.0000 \pm 0.0000$ &    $1.5000 \pm 1.5000$ &    $0.0000 \pm 0.0000$ \\
abalone adv-nn-k3-10             &    $0.0000 \pm 0.0000$ &                     -1 &    $0.0000 \pm 0.0000$ \\
abalone adv-nn-k3-15             &    $0.0000 \pm 0.0000$ &    $0.0000 \pm 0.0000$ &    $0.0000 \pm 0.0000$ \\
abalone adv-nn-k3-20             &    $0.0000 \pm 0.0000$ &    $2.5000 \pm 0.5000$ &    $0.0000 \pm 0.0000$ \\
abalone adv-nn-k3-25             &    $0.0000 \pm 0.0000$ &    $2.0000 \pm 1.0000$ &    $0.0000 \pm 0.0000$ \\
abalone adv-nn-k3-30             &    $0.0000 \pm 0.0000$ &    $2.0000 \pm 0.0000$ &    $0.0000 \pm 0.0000$ \\
abalone adv-nn-k3-35             &    $0.0000