In [1]:
import json
import os
import re
import logging
from functools import reduce, partial
from collections import OrderedDict
from typing import Dict, List, Tuple, Union, Callable
import pprint
from mkdir_p import mkdir_p

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
from sklearn.model_selection import ParameterGrid

from nnattack.variables import auto_var
from params import (
    compare_attacks,
    compare_defense,
    
    compare_nns,
    nn_k1_robustness,
    nn_k3_robustness,
    
    rf_robustness,
    dt_robustness,
    tst_scores,
    
    dt_robustness_figs,
    nn_k1_robustness_figs,
    nn_k3_robustness_figs,
    rf_robustness_figs,
    rf_optimality_figs,
    nn_k1_optimality_figs,
    nn_k3_optimality_figs,
)
from utils import set_plot, get_result, write_to_tex, union_param_key, params_to_dataframe, table_wrapper

auto_var.set_variable_value('random_seed', 0)
auto_var.set_variable_value('ord', 'inf')
auto_var.set_logging_level(0)

compare_attacks = compare_attacks()
compare_defense = compare_defense()
tst_scores = tst_scores()

compare_nns = compare_nns()
nn_k1_robustness = nn_k1_robustness()
nn_k3_robustness = nn_k3_robustness()
rf_robustness = rf_robustness()
dt_robustness = dt_robustness()
dt_robustness_figs = dt_robustness_figs()
nn_k1_robustness_figs = nn_k1_robustness_figs()
nn_k3_robustness_figs = nn_k3_robustness_figs()
rf_optimality_figs = rf_optimality_figs()
nn_k1_optimality_figs = nn_k1_optimality_figs()
nn_k3_optimality_figs = nn_k3_optimality_figs()
rf_robustness_figs = rf_robustness_figs()


Using TensorFlow backend.


In [2]:
def result_latex_figs(exp_name, control_var, caption):
    control = ParameterGrid(control_var)
    ret = """
\\begin{figure}[ht!]
\\centering"""
    img_paths = []
    for i, g in enumerate(control):
        dataset, ord = g['dataset'], g['ord']
        img_path = f'./figs/{exp_name}_{dataset}_{ord}.eps'
        dataset = dataset.replace("_", " ")
        ret += """
\\subfloat[%s]{
    \\includegraphics[width=.45\\textwidth]{%s}}""" % (dataset, img_path)
        if i % 2 == 1:
            ret += "\n"
    ret += """
\\caption{%s}
\\label{fig:%s}
\\end{figure} 
""" % (caption, exp_name)
    return ret
                      
def plot_result(df, exp_name, control_var, variables,
                get_title_fn: Union[Callable[[Dict], str], None]=None,
                get_label_name_fn: Union[Callable[[Dict], str], None]=None,
                get_label_color_fn: Union[Callable[[Dict], str], None]=None, show_plot=True):
    ret = []
    for g in ParameterGrid(control_var):
        temp_df = df
                      
        if get_title_fn is None:
            title = exp_name
            for k, v in g.items():
                if v in variable_name[k]:
                    title = title + f"_{variable_name[k][v]}"
                else:
                    title = title + f"_{v}"
        else:
            title = get_title_fn(g)
            
        for k, v in g.items():
            temp_df = temp_df.loc[df[k] == v]
                      
        fig, ax = plt.subplots()
        ax.set_title(title)
        for name, group in temp_df.groupby(variables):
            #print(name, len(group))
            eps_list = [re.findall(r'[-+]?\d*\.\d+|\d+', t)[0] for t in group.mean().index.tolist()[:-1]]
            s = [r for r in group.mean().tolist()[:-1] if not np.isnan(r)]
            x = [float(eps_list[i]) for i, r in enumerate(group.mean().tolist()[:-1]) if not np.isnan(r)]
                      
            if get_label_name_fn is not None:
                label = get_label_name(name)
            elif isinstance(name, str):
                if variables[0] not in variable_name:
                    label = name
                elif name in variable_name[variables[0]]:
                    label = variable_name[variables[0]][name]
                else:
                    label = name
            else:
                mod_names = []
                for i, n in enumerate(name):
                    if n in variable_name[variables[i]]:
                        mod_names.append(variable_name[variables[i]][n])
                    else:
                        mod_names.append(n)
                label = mod_name.join("_")

            if get_label_color_fn is not None:
                ax.plot(x, s, label=label, linewidth=3.5, color=get_label_color_fn(name))
            else:
                ax.plot(x, s, label=label, linewidth=3.5)

        dataset = g['dataset']
        ord = g['ord']
        set_plot(fig, ax)
        plt.savefig(f'./figs/{exp_name}_{dataset}_{ord}.eps', format='eps')
        plt.savefig(f'./figs/{exp_name}_{dataset}_{ord}.png', format='png')
        ret.append((g, f'./figs/{exp_name}_{dataset}_{ord}.eps'))
        if show_plot:
            plt.show()
        else:
            plt.close()
    return ret
                      
variable_name = {
    #'model': {
    #    ''
    #},
    'dataset': {
        r"fashion_mnist35_(?P<samples>\d+)(?P<dims>_pca\d+)?": 'f-mnist35',
        r"fashion_mnist06_(?P<samples>\d+)(?P<dims>_pca\d+)?": 'f-mnist06',
        r"mnist17_(?P<samples>\d+)(?P<dims>_pca\d+)?": 'mnist17',
        r"mnist35_(?P<samples>\d+)(?P<dims>_pca\d+)?": 'mnist35',
        r"digits(?P<dims>_\d+)?": 'digits',
        r"halfmoon_(?P<samples>\d+)": 'halfmoon',
        r"ijcnn1_(?P<samples>\d+)": 'ijcnn',
        r'covtypebin_(?P<samples>\d+)': 'covtype',
        
        'abalone': 'abalone',
        'iris': 'iris',
        'wine': 'wine',
        'covtype_3200': 'covtype',
    },
    'attack': {
        'blackbox': 'Cheng\'s',
        'kernelsub_c10000_pgd': 'kernelsub',
        'kernelsub_c1000_pgd': 'kernelsub',
        'rev_nnopt_k1_20': 'nnopt-20-ori',
        'rev_nnopt_k1_50': 'nnopt-50-ori',
        'rev_nnopt_k1_20_region': 'nnopt-20',
        'rev_nnopt_k1_50_region': 'nnopt-50',
        'rev_nnopt_k3_20': 'nnopt-20-ori',
        'rev_nnopt_k3_50': 'nnopt-50-ori',
        'rev_nnopt_k3_20_region': 'nnopt-20',
        'rev_nnopt_k3_50_region': 'nnopt-50',
        'rev_nnopt_k5_20': 'nnopt-20-ori',
        'rev_nnopt_k5_50': 'nnopt-50-ori',
        'rev_nnopt_k5_20_region': 'nnopt-20',
        'rev_nnopt_k5_50_region': 'nnopt-50',
        'rev_nnopt_k7_20': 'nnopt-20-ori',
        'rev_nnopt_k7_50': 'nnopt-50-ori',
        'rev_nnopt_k7_20_region': 'nnopt-20',
        'rev_nnopt_k7_50_region': 'nnopt-50',
        
        'nnopt_k1_all': 'nnopt-all',
        'nnopt_k3_all': 'nnopt-all',
        
        'direct_k1': 'direct attack',
        'direct_k3': 'direct attack',
        'direct_k5': 'direct attack',
        'direct_k7': 'direct attack',
        
        'rf_attack_all': 'RF-all',
        'rf_attack_rev': 'RF-rev',
        'rf_attack_rev_20': 'RF-rev-20',
        'rf_attack_rev_50': 'RF-rev-50',
        'rf_attack_rev_100': 'RF-rev-100',
    },
    'ord': {},
}


In [28]:
def knn_attack_plots(exp_name, grid_param, caption='', show_plot=True):
    df = params_to_dataframe(grid_param)
    datasets = set.union(*[set(g['dataset']) for g in grid_param]) if isinstance(grid_param, list) else grid_param['dataset']

    control = {
        'dataset': datasets,
        'ord': grid_param[0]['ord'],
    }
    variables = ['attack']
    plot_result(df, exp_name, control, variables, show_plot)
    return result_latex_figs(exp_name, control, caption)
    
def get_var_name(var, arg):
    if var == 'model':
        if 'adv' in arg or 'robust' in arg:
            arg = "%s_%02d" % ("_".join(arg.split("_")[:-1]), int(arg.split("_")[-1]))
        else:
            arg = arg
    else:
        arg = variable_name[var].get(arg, arg)
    return arg.replace('_', '-')

def get_var_name(var, arg):
    
    if var == 'dataset':
        for k, v in variable_name['dataset'].items():
            arg = re.sub(k, v, arg)
        return arg
    return arg.replace('_', '-')

def avg_pert_table(exp_name, grid_param, columns, rows, objs:list=None, obj_formats:list=None):
    if objs is None:
        objs = ['avg_pert']
    columns = list(filter(lambda a: a not in ['n_features', 'n_samples', 'n_classes'], columns))
    if len(columns) == 0 or len(rows) == 0:
        return pd.DataFrame({})
    df = params_to_dataframe(grid_param, objs)
    
    d = OrderedDict()
    col_grid = OrderedDict([(c, union_param_key(grid_param, c)) for c in columns])
    row_grid = OrderedDict([(r, union_param_key(grid_param, r)) for r in rows])
    for i, obj in enumerate(objs):
        temp_df = df.groupby(columns + rows)[obj].mean()
        temp_df_sem = df.groupby(columns + rows)[obj].sem()
        
        if obj == 'tst_score':
            assert columns[0] == 'model'
        for col in ParameterGrid(col_grid):
            col_k = tuple(col[c] for c in columns)
            col_name = tuple([get_var_name(c, col[c]) for c in columns[:-1]] \
                             + ["%s-%s" % (get_var_name(columns[-1], col[columns[-1]]), obj.replace("_", "-"))])
            d[col_name] = {}
            for row in ParameterGrid(row_grid):
                row_k = tuple(row[r] for r in rows)
                row_name = tuple(get_var_name(r, row[r]) for r in rows)
                if (col_k + row_k) in temp_df:
                    #d[col_name][row_name] = "$%.3f \pm %.3f$" % (temp_df[col_k + row_k], temp_df_sem[col_k + row_k])
                    if obj_formats is None:
                        str_format = "$%.3f$"
                    else:
                        str_format = obj_formats[i]
                    d[col_name][row_name] = str_format % (temp_df[col_k + row_k])
                    d[col_name][row_name] = d[col_name][row_name].replace("0.", ".")
                else:
                    d[col_name][row_name] = -1

    #d = OrderedDict([(k, d[k]) for k in d.keys()])
    return pd.DataFrame(d)

def dataset_stat_column(df, grid_param, columns, rows):
    if ("n_features" not in columns) and ("n_samples" not in columns) and ("n_classes" not in columns) \
        and ("n_train" not in columns) and ("n_test" not in columns):
        return df
    
    column_names = {
        'n_train': '\# training',
        'n_test': '\# testing',
        'n_features': '\# features',
        'n_samples': '\# examples',
        'n_classes': '\# classes',
    }
    
    d = df.to_dict(into=OrderedDict)
    datasets = union_param_key(grid_param, "dataset")
    if len(d.keys()) > 0:
        first_key = list(d.keys())[0]
        row_len = 1 if isinstance(d[first_key], str) else len(first_key)
        col_len = 1 if isinstance(first_key, str) else len(first_key)
        ori_cols = list(d.keys())
    else:
        row_len = 1
        col_len = 1
        ori_cols = []
    
    for dataset in datasets:
        X, y, _ = auto_var.get_var_with_argument("dataset", dataset)
        row_name = (get_var_name("dataset", dataset), )
        for col in columns:
            if col not in column_names:
                continue
            column_name = tuple(['-' for _ in range(col_len-1)] + [column_names[col]])
            if col == "n_features":
                d.setdefault(column_name, {})[row_name] = X.shape[1]
            elif col == "n_samples":
                d.setdefault(column_name, {})[row_name] = X.shape[0]
            elif col == "n_train":
                d.setdefault(column_name, {})[row_name] = X.shape[0] - 200
            elif col == "n_test":
                d.setdefault(column_name, {})[row_name] = 100
            elif col == "n_classes":
                d.setdefault(column_name, {})[row_name] = len(np.unique(y))
                
    for col in ori_cols:
        d.move_to_end(col)
        
    return pd.DataFrame(d)
    
def cmp_ratio(df):
    ret = OrderedDict()
    d = df.to_dict(into=OrderedDict)
    cmp_base = []
    
    i = 0
    for col, col_dict in d.items():
        ret[col] = col_dict
        if 'avg-pert' not in col[1]:
            continue
        if i == 0 or i == 1:
            cmp_base.append(col_dict)
            i += 1
            continue
        temp = {}
        for k, v in col_dict.items():
            if v == -1 or cmp_base[i % 2][k] == -1:
                temp[k] = int(-1)
            else:
                v = v.replace("$", "")
                t = cmp_base[i % 2][k].replace("$", "")
                temp[k] = "$%.2f$" % (float(v) / float(t))
        
        ret[tuple([c for c in col[:-1]] + ["%s imp." % col[-1]])] = temp
        i += 1
        
    return pd.DataFrame(ret)

def max_imp(df):
    ret = OrderedDict()
    d = df.to_dict(into=OrderedDict)
    
    def add_new_col(col_list, ret):
        new_col = {}
        
        for attack_name in [col_list[0][0][1], col_list[1][0][1]]:
            temp = list(filter(lambda t: t[0][1] == attack_name, col_list))
            imps = []
            for c in temp:
                imps.append([float(v.replace("$", "")) if v != -1 else -1 for _, v in c[1].items()])
            imps = (np.array(imps).T).argmax(axis=1)

            new_col = {}
            new_col_imp = {}
            new_col_eps = {}
            pcol = temp[0][0]
            
            if 'd' in pcol[0].split("-")[-1]:
                tt = pcol[0].split("-")
                tt.pop(-2)
            else:
                tt = pcol[0].split("-")[:-1]
            new_col_name = ("-".join(tt), pcol[1])
            new_col_imp_name = ("-".join(tt), ("%s imp." % pcol[1]))
            new_col_eps_name = ("-".join(tt), ("%s $\\epsilon$" % pcol[1]))
            for i, idx in enumerate(imps):
                k, v = list(temp[idx][1].items())[i]
                new_col[k] = v
                k, v = list(temp[idx][2].items())[i]
                new_col_imp[k] = v 

                if 'd' in temp[idx][0][0].split("-")[-1]:
                    new_col_eps[k] = "$" + ("%.1f$" % (float(temp[idx][0][0].split("-")[-2]) * 0.01))[1:]
                else:
                    new_col_eps[k] = "$" + ("%.1f$" % (float(temp[idx][0][0].split("-")[-1]) * 0.01))[1:]

            ret[new_col_eps_name] = new_col_eps
            ret[new_col_name] = new_col
            ret[new_col_imp_name] = new_col_imp
    
    prev_col = None
    temp = []
    for i, (col, col_dict) in enumerate(d.items()):
        if 'd' in col[0].split("-")[-1]:
            check_idx = -2
        else:
            check_idx = -1
            
        if i == 0 or i == 1:
            ret[col] = col_dict
            continue
            
        if len(temp) == 0:
            temp.append(col_dict)
        elif i % 2 == 1:
            temp[-1] = (prev_col, temp[-1], col_dict)
            if i == (len(d.items())-1):
                add_new_col(temp, ret)
        else:
            if col[0].split("-")[:check_idx] != prev_col[0].split("-")[:check_idx]:
                add_new_col(temp, ret)
                temp = [col_dict]
            else:
                temp.append(col_dict)
                
        prev_col = col
        
    return pd.DataFrame(ret)

def bold_best(df, reverse=False):
    d = df.to_dict(into=OrderedDict)
    
    temp = []
    for i, (col, col_dict) in enumerate(d.items()):
        temp.append([])
        for row, row_value in col_dict.items():
            if isinstance(row_value, str):
                temp[-1].append(float(row_value.replace("$", '')))
            else:
                temp[-1].append(np.inf if reverse else -np.inf)
            
    temp = np.array(temp).T
    if reverse:
        best_idx = temp.argmin(axis=1)
    else:
        best_idx = temp.argmax(axis=1)
        
    ret = OrderedDict()
    for i, (col, col_dict) in enumerate(d.items()):
        ret[col] = {}
        for j, (row, row_value) in enumerate(col_dict.items()):
            if not isinstance(row_value, str):
                ret[col][row] = row_value
            else:
                if float(row_value[1:-1]) == temp[j][best_idx[j]]:
                    ret[col][row] = "$\\mathbf{" + row_value[1:-1] + "}$"
                else:
                    ret[col][row] = row_value

    return pd.DataFrame(ret)

def gen_table(exp_name, grid_params, columns, rows, objs=None,
              combine_method=None, additionals=None, obj_formats=None):
    if objs is None:
        objs = ['avg_pert']
    df = pd.DataFrame({})
    if combine_method is None:
        df = avg_pert_table(exp_name, grid_params, columns, rows, objs, obj_formats)
        if additionals:
            for fn in additionals:
                df = fn(df)
    else:
        dfs = []
        for g in grid_params:
            df = avg_pert_table(exp_name, g, columns, rows, objs, obj_formats)
            if additionals:
                for fn in additionals:
                    df = fn(df)
            dfs.append(df)
        df = pd.concat(dfs, axis=combine_method)
    
    if 'dataset' in rows:
        df = dataset_stat_column(df, grid_param, columns, rows)
    return df


In [4]:
def model_acc(df, grid_param):
    # col = ['model', 'attack']
    ret = OrderedDict()
    tst_df = params_to_dataframe(grid_param, ['tst_score'])

    d = df.to_dict(into=OrderedDict)
    models = set([c[0] for c, _ in d.items()])
    
    prev_col =None
    for i, (col, col_dict) in enumerate(d.items()):
        new_col_dict = OrderedDict({})
        if i == 0:
            for row, _ in col_dict.items():
                temp_df = tst_df[(tst_df['model'] == col[0].replace("-", "_"))
                                 & (tst_df['attack'] == 'blackbox') 
                                 & (tst_df['dataset'] == row[0].replace("-", "_"))]
                new_col_dict[row] = "$%.2f$" % temp_df['tst_score'].mean()
            ret[(col[0], col[1].replace('-avg-pert', ' tst acc.'))] = new_col_dict
            
        elif '\\epsilon' in col[1]:
            m = re.match(r"(?P<attack>[a-zA-Z_0-9'-]+) \$\\epsilon\$", col[1])
            attack_name = m.group("attack")[:-9].replace("-", "_") # remove '$epsilon$'
            for row, row_val in col_dict.items():
                if 'd' in col[0].split('-')[-1]:
                    model_name = '%s-%d-%s' % ('-'.join(col[0].split('-')[:-1]),
                                               int(float(row_val.replace("$", ""))*100),
                                               col[0].split('-')[-1],)
                else:
                    model_name = "%s-%d" % (col[0], int(float(row_val.replace("$", ""))*100))
                model_name = model_name.replace("-", "_")
                temp_df = tst_df[(tst_df['model'] == model_name)
                                 & (tst_df['attack'] == attack_name) 
                                 & (tst_df['dataset'] == row[0].replace("-", "_"))]
                new_col_dict[row] = "$%.2f$" % temp_df['tst_score'].mean()
            ret[(col[0], col[1].replace('-avg-pert $\\epsilon$', ' tst acc.'))] = new_col_dict
            
        prev_col = col
        ret[col] = col_dict
    return pd.DataFrame(ret)

In [5]:
#_, exp_name, grid_param, _ = nn_k1_robustness()
#params_to_dataframe(grid_param, ['trnX_len', 'aug_len']).groupby(['dataset', 'model'])['aug_len'].mean()

In [6]:
#_, exp_name, grid_param, _ = nn_k1_robustness()
#_, exp_name, grid_param, _ = dt_robustness()
#_, exp_name, grid_param, _ = rf_robustness()
#_, exp_name, grid_param, _ = nn_k3_robustness()
#df = params_to_dataframe(grid_param, ['avg_pert', 'tst_score']).groupby(['dataset', 'model'])
#df.mean()

In [7]:
def improvement(df):
    ret = OrderedDict()
    d = df.to_dict(into=OrderedDict)
    
    for i, (col, col_dict) in enumerate(d.items()):
        ret[col] = {}
        for row, row_value in col_dict.items():
            if i == 0:
                ref = col_dict
                value = 1.0
            elif ref[row] == -1 or row_value == -1:
                value = -1.
            else:
                value = (float(row_value.replace("$", '')) / float(ref[row].replace("$", '')))
                
            ret[col][row] = "$%.2f$" % value
        
    return pd.DataFrame(ret)

_, exp_name, grid_param, _ = compare_defense()
avg_caption = """
Improvement Factor. We measure the ratio of the empirical robustness of the defended classifier over that of the regular (undefended) classifier. A number greater than one indicates that the defense yields a more robust model, 
while less than one indicates less robustness (higher is better; best is in bold).
"""
table_str = table_wrapper(gen_table(
                exp_name, grid_param, ['model', 'attack'], ['dataset'], combine_method=1,
                objs=['avg_pert'], additionals=[improvement, bold_best]
            ), '%s_%s' % (exp_name, 'avg_pert'), caption=avg_caption)
table_str = table_str.replace("                 knn1 &          adv-nn-k1-30 &     robustv2-nn-k1-30 &     robustv1-nn-k1-30",
                              "\multicolumn{4}{c}{1-NN}")
table_str = table_str.replace("                           knn3 &                    adv-nn-k3-30 &               robustv1-nn-k3-30",
                              "\multicolumn{3}{c}{3-NN}")
table_str = table_str.replace("      decision-tree-d5 & adv-decision-tree-d5-30 & robust-decision-tree-d5-30 & robustv1-decision-tree-d5-30",
                              "\multicolumn{4}{c}{DT}")
table_str = table_str.replace("      random-forest-100-d5 &           adv-rf-100-30-d5 &        robust-rf-100-30-d5 &      robustv1-rf-100-30-d5",
                              "\multicolumn{4}{c}{RF}")
table_str = table_str.replace(" mlp &        adv-mlp-30 &  robustv1-mlp-30",
                              "\multicolumn{3}{c}{MLP}")
table_str = table_str.replace("nnopt-k1-all-avg-pert & nnopt-k1-all-avg-pert & nnopt-k1-all-avg-pert & nnopt-k1-all-avg-pert",
                              "regular & AT & Wang's & AP")
table_str = table_str.replace("rev-nnopt-k3-50-region-avg-pert & rev-nnopt-k3-50-region-avg-pert & rev-nnopt-k3-50-region-avg-pert",
                              "regular & AT & AP")
table_str = table_str.replace("dt-attack-opt-avg-pert &  dt-attack-opt-avg-pert &     dt-attack-opt-avg-pert &       dt-attack-opt-avg-pert",
                              "regular & AT & RS & AP")
table_str = table_str.replace("rf-attack-rev-100-avg-pert & rf-attack-rev-100-avg-pert & rf-attack-rev-100-avg-pert & rf-attack-rev-100-avg-pert",
                              "regular & AT & RS & AP")
table_str = table_str.replace("pgd-avg-pert &      pgd-avg-pert &     pgd-avg-pert",
                              "regular & AT & AP")
table_str = table_str.replace("adv-nnopt-k1-all-avg-pert", "AT")
table_str = table_str.replace("robustv1-nn-k1-30", "AP")
table_str = table_str.replace("adv-decision-tree-d5-30", "AT")
table_str = table_str.replace("robustv1-decision-tree-d5-30", "AP")
table_str = table_str.replace("lllllllllllllllllll", "l|cccc|ccc|cccc|cccc|ccc")
print(table_str)
#for k, v in variable_name['dataset'].items():
#    table_str = re.sub(k.replace('_', '-'), v, table_str)
write_to_tex(table_str, exp_name + '_table.tex')


\begin{table}[h!]
\tiny
\centering
\setlength{\tabcolsep}{2.0pt}
\begin{tabular}{l|cccc|ccc|cccc|cccc|ccc}
\toprule
        & \multicolumn{4}{c}{1-NN} & \multicolumn{3}{c}{3-NN} & \multicolumn{4}{c}{DT} & \multicolumn{4}{c}{RF} &         \multicolumn{3}{c}{MLP} \\
        & regular & AT & Wang's & AP & regular & AT & AP & regular & AT & RS & AP & regular & AT & RS & AP & regular & AT & AP \\
\midrule
australian &                $1.00$ &                $0.64$ &       $\mathbf{1.65}$ &       $\mathbf{1.65}$ &                          $1.00$ &                          $0.68$ &                 $\mathbf{1.20}$ &                 $1.00$ &                  $2.36$ &            $\mathbf{5.86}$ &                       $2.37$ &                     $1.00$ &                     $1.07$ &            $\mathbf{1.12}$ &                     $1.04$ &       $1.00$ &  $\mathbf{12.10}$ &           $1.22$ \\
cancer &                $1.00$ &                $0.82$ &                $1.05$ &       $\mathbf{1.41}$

In [52]:
_, exp_name, grid_param, _ = compare_attacks()
avg_caption = "Empirical robustness (measured in $\ell_\infty$) to alter all predictions (lower is better; best is in bold)."
table_str = table_wrapper(gen_table(
                exp_name, grid_param, ['model', 'attack'], ['dataset'], combine_method=1,
                objs=['avg_pert'], additionals=[partial(bold_best, reverse=True)]
            ), '%s_%s' % (exp_name, 'avg_pert'), caption=avg_caption)
table_str = re.sub(r"([a-zA-Z_0-9'-]+) imp\.", "imp.", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+) tst acc\.", "tst acc.", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+)-avg-pert", r"\1", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+) \$\\epsilon\$", "$\\epsilon$", table_str)
table_str = table_str.replace("llllllllllll", "l|ccc|ccc|ccc|cc")
table_str = table_str.replace("-avg-pert", "")
table_str = table_str.replace("direct-k1", "Direct")
table_str = table_str.replace("direct-k3", "Direct")
table_str = table_str.replace("nnopt-k1-all", "RBA-Exact")
table_str = table_str.replace("dt-papernots", "Papernot's")
table_str = table_str.replace("rev-nnopt-k3-50-region", "RBA-Approx")
table_str = table_str.replace("rf-attack-rev-100", "RBA-Approx")
table_str = table_str.replace("dt-attack-opt", "RBA-Exact")
table_str = table_str.replace("decision-tree-d5", "DT")
table_str = table_str.replace("random-forest-100-d5", "RF")
table_str = table_str.replace("knn1", "1-NN")
table_str = table_str.replace("knn3", "3-NN")
table_str = table_str.replace("\multicolumn{3}{l}", "\multicolumn{3}{c}")
table_str = table_str.replace("\multicolumn{2}{l}", "\multicolumn{2}{c}")
table_str = table_str.replace("blackbox", "BBox")
print(table_str)
write_to_tex(table_str, exp_name + '_table.tex')


\begin{table}[h!]
\tiny
\centering
\setlength{\tabcolsep}{2.0pt}
\begin{tabular}{l|ccc|ccc|ccc|cc}
\toprule
        & \multicolumn{3}{c}{1-NN} & \multicolumn{3}{c}{3-NN} & \multicolumn{3}{c}{DT} & \multicolumn{2}{c}{RF} \\
        & Direct & BBox & RBA-Exact & Direct & BBox & RBA-Approx & Papernot's & BBox & RBA-Exact &    BBox & RBA-Approx \\
\midrule
australian &             $.442$ &            $.336$ &       $\mathbf{.151}$ &             $.719$ &            $.391$ &                 $\mathbf{.278}$ &                $.140$ &            $.139$ &        $\mathbf{.070}$ &      $\mathbf{.364}$ &                     $.446$ \\
cancer &             $.354$ &            $.339$ &       $\mathbf{.137}$ &             $.403$ &            $.385$ &                 $\mathbf{.204}$ &                $.459$ &            $.309$ &        $\mathbf{.255}$ &               $.425$ &            $\mathbf{.383}$ \\
covtype &             $.320$ &            $.207$ &       $\mathbf{.076}$ &             $.443$ &   

In [9]:
exp_fns = [nn_k1_robustness_figs, nn_k3_robustness_figs, dt_robustness_figs, rf_robustness_figs]
model_names = ["1-NN", "3-NN", "Decision tree", "Random forest"]
def get_label_name(name):
    if 'robustv1' in name:
        return "AP"
    elif 'robust' in name:
        return "RS"
    elif 'decision_tree' in name:
        return "Reg."
    elif 'knn1' in name:
        return "Reg."
    elif 'knn3' in name:
        return "Reg."
    elif 'random_forest' in name:
        return "Reg."
        
    return name

def get_label_color(name):
    if 'robustv1' in name:
        return "#ff7f0e"
    elif 'robust' in name:
        return "#1f77b4"
    elif 'decision_tree' in name:
        return "#7f7f7f"
    elif 'knn1' in name:
        return "#7f7f7f"
    elif 'knn3' in name:
        return "#7f7f7f"
    elif 'random_forest' in name:
        return "#7f7f7f"
        
    return name

def compare_nn_plots(exp_name, grid_param, caption='', show_plot=False):
    df = params_to_dataframe(grid_param)
    datasets = set.union(*[set(g['dataset']) for g in grid_param]) if isinstance(grid_param, list) else grid_param['dataset']

    control = {
        'dataset': datasets,
        'ord': grid_param['ord'],
    }
    variables = ['model']
    
    fig_paths = plot_result(df, exp_name, control, variables,
                            get_title_fn=lambda g: g['dataset'],
                            get_label_name_fn=get_label_name,
                            get_label_color_fn=get_label_color,
                            show_plot=show_plot)
    return fig_paths

def fig_paths_latex(fig_paths: List[List[Tuple[Dict, str]]], fig_label, caption):
    ret = """
\\begin{figure}[ht!]
\\centering"""
    img_paths = []
    for row in fig_paths:
        for entry in row:
            g, img_path = entry
            ret += """
\\subfloat[%s]{
    \\includegraphics[width=%.2f\\textwidth]{%s}}""" % (g['subfig_label'], 1/len(fig_paths[0]), img_path)
        ret += "\n"
    ret += """
\\caption{%s}
\\label{fig:%s}
\\end{figure} 
""" % (caption, fig_label)
    return ret

fig_paths = []
for i, fn in enumerate(exp_fns):
    _, exp_name, grid_param, _ = fn()
    fig_path = compare_nn_plots(exp_name, grid_param, show_plot=False)
    for g, _ in fig_path:
        g['subfig_label'] = model_names[i]
        for k, v in variable_name['dataset'].items():
            g['subfig_label'] = re.sub(k, v, g['subfig_label'])
    fig_paths.append(fig_path)
transpose = [list() for c in fig_paths[0]]
for i, col in enumerate(fig_paths):
    for j, r in enumerate(col):
        transpose[j].append(r)
        
caption = "The maximum perturbation distance allowed versus accuracy"
fig_str = fig_paths_latex(transpose[:5], "defense-cmp", caption)
write_to_tex(fig_str, 'defense_cmp_fig.tex')

fig_str = fig_paths_latex(transpose[5:], "defense-cmp2", caption)
write_to_tex(fig_str, 'defense_cmp2_fig.tex')

In [10]:
_, exp_name, grid_param, _ = compare_nns()

def get_title_fn(g):
    ret = g['dataset']
    for k, v in variable_name['dataset'].items():
        ret = re.sub(k.replace('_', '-'), v, ret)
    return ret

def compare_nn_plots(exp_name, grid_param, caption='', show_plot=True):
    df = params_to_dataframe(grid_param)
    datasets = set.union(*[set(g['dataset']) for g in grid_param]) if isinstance(grid_param, list) else grid_param['dataset']

    control = {
        'dataset': datasets,
        'ord': grid_param[0]['ord'],
    }
    variables = ['model']
    figs = plot_result(df, exp_name, control, variables, get_title_fn=get_title_fn, show_plot=show_plot)
    fig_paths = []
    for i, f in enumerate(figs):
        if i % 4 == 0:
            fig_paths.append([])
        f[0]['subfig_label'] = f[0]['dataset']
        for k, v in variable_name['dataset'].items():
            f[0]['subfig_label'] = re.sub(k, v, f[0]['subfig_label'])
        fig_paths[-1].append(f)
    
    return fig_paths_latex(fig_paths, exp_name, caption=caption)
    #return result_latex_figs(exp_name, control, caption)
fig_str = compare_nn_plots(exp_name, grid_param, show_plot=False)
print(fig_str)
write_to_tex(fig_str, exp_name + '_fig.tex')


\begin{figure}[ht!]
\centering
\subfloat[cancer]{
    \includegraphics[width=0.25\textwidth]{./figs/compare_nns_cancer_inf.eps}}
\subfloat[australian]{
    \includegraphics[width=0.25\textwidth]{./figs/compare_nns_australian_inf.eps}}
\subfloat[fourclass]{
    \includegraphics[width=0.25\textwidth]{./figs/compare_nns_fourclass_inf.eps}}
\subfloat[f-mnist35]{
    \includegraphics[width=0.25\textwidth]{./figs/compare_nns_fashion_mnist35_2200_pca25_inf.eps}}

\subfloat[f-mnist06]{
    \includegraphics[width=0.25\textwidth]{./figs/compare_nns_fashion_mnist06_2200_pca25_inf.eps}}
\subfloat[covtype]{
    \includegraphics[width=0.25\textwidth]{./figs/compare_nns_covtypebin_1200_inf.eps}}
\subfloat[mnist17]{
    \includegraphics[width=0.25\textwidth]{./figs/compare_nns_mnist17_2200_pca25_inf.eps}}
\subfloat[diabetes]{
    \includegraphics[width=0.25\textwidth]{./figs/compare_nns_diabetes_inf.eps}}

\subfloat[halfmoon]{
    \includegraphics[width=0.25\textwidth]{./figs/compare_nns_halfmoon_220

In [11]:
_, exp_name, grid_param, _ = tst_scores()
avg_caption = "test accuracy with different defense strength"
df = gen_table(exp_name, grid_param, ['model', 'attack'], ['dataset'],
               combine_method=1, objs=['tst_score'], additionals=[])
table_str = table_wrapper(df, table_name=exp_name, caption=avg_caption,)
table_str = re.sub(r"([a-zA-Z_0-9'-]+) imp\.", "imp.", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+) tst acc\.", "tst acc.", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+)-avg-pert", r"\1", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+) \$\\epsilon\$", "$\\epsilon$", table_str)
table_str = table_str.replace("lllllllllllllllll", "lcccc|cccc|cccc|cccc")
table_str = table_str.replace("                  knn1 &      robustv1-nn-k1-10 &      robustv1-nn-k1-30 &      robustv1-nn-k1-50",
                              "\multicolumn{4}{c}{1-NN}")
table_str = table_str.replace("                            knn3 &                robustv1-nn-k3-10 &                robustv1-nn-k3-30 &                robustv1-nn-k3-50",
                              "\multicolumn{4}{c}{3-NN}")
table_str = table_str.replace("       decision-tree-d5 & robustv1-decision-tree-d5-10 & robustv1-decision-tree-d5-30 & robustv1-decision-tree-d5-50",
                              "\multicolumn{4}{c}{DT}")
table_str = table_str.replace("       random-forest-100-d5 &       robustv1-rf-100-10-d5 &       robustv1-rf-100-30-d5 &       robustv1-rf-100-50-d5",
                              "\multicolumn{4}{c}{RF}")
table_str = table_str.replace("nnopt-k1-all-tst-score & " * 4,
                              "Regular & AP-10 & AP-30 & AP-50 & ")
table_str = table_str.replace("& rev-nnopt-k3-50-region-tst-score " * 4,
                              "& Regular & AP-10 & AP-30 & AP-50 ")
table_str = table_str.replace("& dt-attack-opt-tst-score &      dt-attack-opt-tst-score &      dt-attack-opt-tst-score &      dt-attack-opt-tst-score",
                              "& Regular & AP-10 & AP-30 & AP-50 ")
table_str = table_str.replace("& rf-attack-rev-100-tst-score " * 4,
                              "& Regular & AP-10 & AP-30 & AP-50 ")
for k, v in variable_name['dataset'].items():
    table_str = re.sub(k.replace('_', '-'), v, table_str)
write_to_tex(table_str, exp_name + '_table.tex')

In [25]:
from params import datasets, tree_datasets
_, _, grid_param, _ = tst_scores()

col_names = [
    "\\parbox{15mm}{\\centering \# training \\\\ (1-NN, 3-NN)}",
    "\\parbox{15mm}{\\centering \# training \\\\ (DT, RF, MLP)}",
    "\\parbox{15mm}{\\centering \# testing \\\\ (perturbation)}",
    "\\parbox{15mm}{\\centering \# testing \\\\ (test accuracy)}",
    "\# features",
    "\# classes",
]
ret = OrderedDict()
for i, ds in enumerate(datasets):
    print(ds, auto_var.get_var_shown_name("dataset", ds))
    X, y, _ = auto_var.get_var_with_argument("dataset", ds)
    tX, _, _ = auto_var.get_var_with_argument("dataset", tree_datasets[i])
    ret[auto_var.get_var_shown_name("dataset", ds)] = OrderedDict([
        (col_names[0], X.shape[0]-200),
        (col_names[1], tX.shape[0]-200),
        (col_names[2], 100),
        (col_names[3], 200),
        (col_names[4], X.shape[1]),
        (col_names[5], 2),
    ])
df = pd.DataFrame(ret).T
df = df[[c for c in col_names]]

exp_name = "dataset-stats"
caption = "Dataset statistics."
table_str = table_wrapper(df, table_name=exp_name, caption=caption)
table_str = table_str.replace("lrrrrrr", "lcccccc")
print(table_str)
write_to_tex(table_str, exp_name + '_table.tex')

australian australian
fourclass fourclass
diabetes diabetes
cancer cancer
halfmoon_2200 halfmoon
covtypebin_1200 covtype
fashion_mnist35_2200_pca25 f-mnist35
fashion_mnist06_2200_pca25 f-mnist06
mnist17_2200_pca25 mnist17

\begin{table}[h!]
\tiny
\centering
\setlength{\tabcolsep}{2.0pt}
\begin{tabular}{lcccccc}
\toprule
{} &  \parbox{15mm}{\centering \# training \\ (1-NN, 3-NN)} &  \parbox{15mm}{\centering \# training \\ (DT, RF, MLP)} &  \parbox{15mm}{\centering \# testing \\ (perturbation)} &  \parbox{15mm}{\centering \# testing \\ (test accuracy)} &  \# features &  \# classes \\
\midrule
australian &                                                490 &                                                490 &                                                100 &                                                200 &           14 &           2 \\
fourclass  &                                                662 &                                                662 &                             

In [13]:
auto_var.var_shown_name
print(re.fullmatch('mnist35_(?P<n_samples>\\d+)(?P<n_dims>_pca\\d+)?', 'mnist35_2200_pca25jlkjl'))

None


In [53]:
%%bash
bash ./sync_report.sh

In [15]:
assert 1==0

AssertionError: 

In [47]:
_, exp_name, grid_param, _ = nn_k3_robustness()
avg_caption = "3-NN average perturbation distance (Linf)"
df = gen_table(exp_name, grid_param, ['model', 'attack'], ['dataset'],
               combine_method=1, objs=['tst_score', 'avg_pert', 'aug_len'],
               additionals=[])

col_names = [
    ('knn3', 'rev-nnopt-k3-50-region-avg-pert'),
    ('knn3', 'rev-nnopt-k3-50-region-tst-score'),
    ('knn3', 'rev-nnopt-k3-50-region-aug-len'),
    ('robustv1-nn-k3-10', 'rev-nnopt-k3-50-region-avg-pert'),
    ('robustv1-nn-k3-10', 'rev-nnopt-k3-50-region-tst-score'),
    ('robustv1-nn-k3-10', 'rev-nnopt-k3-50-region-aug-len'),
    ('robustv1-nn-k3-10', 'imp.'),
    ('robustv1-nn-k3-30', 'rev-nnopt-k3-50-region-avg-pert'),
    ('robustv1-nn-k3-30', 'rev-nnopt-k3-50-region-tst-score'),
    ('robustv1-nn-k3-30', 'rev-nnopt-k3-50-region-aug-len'),
    ('robustv1-nn-k3-30', 'imp.'),
    ('robustv1-nn-k3-50', 'rev-nnopt-k3-50-region-avg-pert'),
    ('robustv1-nn-k3-50', 'rev-nnopt-k3-50-region-tst-score'),
    ('robustv1-nn-k3-50', 'rev-nnopt-k3-50-region-aug-len'),
    ('robustv1-nn-k3-50', 'imp.'),
]
df = df.apply(lambda a: a.apply(lambda b: float(b.replace("$", "")) if b else b))
df[('robustv1-nn-k3-10', 'imp.')] = df[('knn3', 'rev-nnopt-k3-50-region-avg-pert')] / df[('robustv1-nn-k3-10', 'rev-nnopt-k3-50-region-avg-pert')]
df[('robustv1-nn-k3-30', 'imp.')] = df[('knn3', 'rev-nnopt-k3-50-region-avg-pert')] / df[('robustv1-nn-k3-30', 'rev-nnopt-k3-50-region-avg-pert')]
df[('robustv1-nn-k3-50', 'imp.')] = df[('knn3', 'rev-nnopt-k3-50-region-avg-pert')] / df[('robustv1-nn-k3-50', 'rev-nnopt-k3-50-region-avg-pert')]
df = df[col_names]
df = df.rename(index=str, columns={
    "robustv1-nn-k3-50": "AP .5",
    "robustv1-nn-k3-30": "AP .3",
    "robustv1-nn-k3-10": "AP .1",
    "rev-nnopt-k3-50-region-aug-len": "\# train",
    "rev-nnopt-k3-50-region-tst-score": "accuracy",
    "rev-nnopt-k3-50-region-avg-pert": "pert",
})
def 
df.apply()
table_str = table_wrapper(df, table_name=exp_name, caption=avg_caption,)

for k, v in variable_name['dataset'].items():
    table_str = re.sub(k.replace('_', '-'), v, table_str)
print(table_str)
#write_to_tex(table_str, exp_name + '_table.tex')


\begin{table}[h!]
\tiny
\centering
\setlength{\tabcolsep}{2.0pt}
\begin{tabular}{lrrrrrrrrrrrrrrr}
\toprule
        & \multicolumn{3}{l}{knn3} & \multicolumn{4}{l}{AP .1} & \multicolumn{4}{l}{AP .3} & \multicolumn{4}{l}{AP .5} \\
        &   pert & accuracy & \# train &   pert & accuracy & \# train &      imp. &   pert & accuracy & \# train &      imp. &   pert & accuracy & \# train &      imp. \\
\midrule
australian &  0.278 &    0.805 &      NaN &  0.317 &    0.810 &    484.0 &  0.876972 &  0.333 &    0.815 &    458.0 &  0.834835 &  0.371 &    0.825 &    427.0 &  0.749326 \\
cancer &  0.204 &    0.975 &      NaN &  0.204 &    0.975 &    483.0 &  1.000000 &  0.283 &    0.960 &    473.0 &  0.720848 &  0.350 &    0.970 &    458.0 &  0.582857 \\
covtype &  0.120 &    0.690 &      NaN &  0.127 &    0.675 &    977.0 &  0.944882 &  0.306 &    0.685 &    724.0 &  0.392157 &  0.375 &    0.710 &    695.0 &  0.320000 \\
diabetes &  0.078 &    0.755 &      NaN &  0.078 &    0.750 &    535.0 &  

In [None]:
_, exp_name, grid_param, _ = rf_robustness()
avg_caption = "Random forest average perturbation distance (Linf)"
table_str = table_wrapper(exp_name, grid_param, ['n_samples', 'n_features', 'model', 'attack'], ['dataset'], caption=avg_caption, 
                          objs=['avg_pert'],
                          additionals=[cmp_ratio, max_imp, partial(model_acc, grid_param=grid_param)])
table_str = re.sub(r"([a-zA-Z_0-9'-]+) imp\.", "imp.", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+) tst acc\.", "tst acc.", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+)-avg-pert", r"\1", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+) \$\\epsilon\$", "$\\epsilon$", table_str)
table_str = table_str.replace("lrrlllllllllllllllllllllllllll", "lcc|lll|llll|llll|llll|llll|llll|llll")
table_str = table_str.replace("rf-attack-rev-", "our-")
table_str = table_str.replace("blackbox", "Cheng's")
table_str = table_str.replace("random-forest-100-d5", "random forest")
table_str = table_str.replace("robust-rf-100-d5", "random forest with robust splitting")
table_str = table_str.replace("robustv1-rf-100-d5", "random forest with adversarial pruning")

table_str = table_str.replace("multicolumn{8}{l}", "multicolumn{8}{|c}")
table_str = table_str.replace("multicolumn{3}{l}", "multicolumn{3}{|c}")
for k, v in variable_name['dataset'].items():
    table_str = re.sub(k.replace('_', '-'), v, table_str)
write_to_tex(table_str, exp_name + '_table.tex')

In [None]:
_, exp_name, grid_param, _ = dt_robustness()
avg_caption = "Decision tree average perturbation distance (Linf)"
table_str = table_wrapper(exp_name, grid_param, ['model', 'attack'], ['dataset'], caption=avg_caption, 
                          objs=['avg_pert'],
                          additionals=[cmp_ratio, max_imp, partial(model_acc, grid_param=grid_param)])
table_str = re.sub(r"([a-zA-Z_0-9'-]+) imp\.", "imp.", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+) tst acc\.", "tst acc.", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+)-avg-pert", r"\1", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+) \$\\epsilon\$", "$\\epsilon$", table_str)
table_str = table_str.replace("llllllllllllllllllllllllllll", "llll|llll|llll|llll|llll|llll|llll")
table_str = table_str.replace("dt-attack-opt", "opt")
write_to_tex(table_str, exp_name + '_table.tex')

In [None]:
_, exp_name, grid_param, _ = nn_k1_optimality_figs()
df = params_to_dataframe(grid_param, ['avg_pert'])
datasets = union_param_key(grid_param, 'dataset')
for dataset in datasets:
    temp_df = df[(df['dataset'] == dataset)]
    fig, ax = plt.subplots()
    ax.set_title(dataset)
    x = []
    for k, v in temp_df.groupby("attack"):
        avg_pert = v['avg_pert'].mean()
        if k == 'blackbox':
            bb_avg = avg_pert
            ax.hlines(avg_pert, xmin=0, xmax=100, label='Cheng', colors='c')
        elif k.split('_')[-1] == 'all':
            opt_avg = avg_pert
            ax.hlines(avg_pert, xmin=0, xmax=100, label='opt')
        elif k.split('_')[-1] == 'rev':
            x.append((100, avg_pert))
        else:
            if k.split('_')[-1] != 'region':
                x.append((int(k.split('_')[-1]), avg_pert))
            else:
                x.append((int(k.split('_')[-2]), avg_pert))
    x = sorted(x, key=lambda t: t[0])
    x, y = zip(*x)
    ax.plot(x, y)
    set_plot(fig, ax)
    ax.set_ylim(opt_avg-0.03, max(list(y) + [bb_avg])+0.03)
    ax.set_xlim(0, 100)
    #plt.savefig(f'./figs/{exp_name}_{dataset}_{ord}.eps', format='eps')
    #plt.savefig(f'./figs/{exp_name}_{dataset}_{ord}.png', format='png')
    plt.show()

In [None]:
for fn in [nn_k1_robustness_figs, nn_k3_robustness_figs]:
    _, exp_name, grid_param, _ = fn()
    df = params_to_dataframe(grid_param, ['avg_pert'])
    datasets = union_param_key(grid_param, 'dataset')
    for dataset in datasets:
        temp_df = df[(df['dataset'] == dataset)]
        fig, ax = plt.subplots()
        ax.set_title(dataset)
        x = []
        for k, v in temp_df.groupby("attack"):
            avg_pert = v['avg_pert'].mean()
            if k == 'blackbox':
                ax.hlines(avg_pert, xmin=0, xmax=50, label='Cheng')
            elif k.split('_')[-1] == 'all':
                opt_pert = avg_pert
                ax.hlines(avg_pert, xmin=0, xmax=50, label='opt')
            else:
                x.append((int(k.split('_')[-2]), avg_pert))
        x = sorted(x, key=lambda t: t[0])
        x, y = zip(*x)
        ax.plot(x, y)
        set_plot(fig, ax)
        ax.set_ylim(opt_pert-0.03, max(y)+0.03)
        ax.set_xlim(0, 50)
        #ax.set_xscale('log')
        #plt.savefig(f'./figs/{exp_name}_{dataset}_{ord}.eps', format='eps')
        #plt.savefig(f'./figs/{exp_name}_{dataset}_{ord}.png', format='png')
        plt.show()

In [None]:
_, exp_name, grid_param, _ = rf_optimality()
avg_caption = "RF average perturbation distance (Linf)"
table_str = table_wrapper(exp_name, grid_param,
                          ['n_samples', 'n_features', 'n_classes', 'model', 'attack'], ['dataset'],
                          caption=avg_caption, combine_method=1)
table_str = re.sub(r"([a-zA-Z_0-9'-]+)-avg-pert", r"\1", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+)-region", r"\1", table_str)
table_str = table_str.replace("rf-attack-rev-", "our-")
write_to_tex(table_str, exp_name + '_table.tex')

In [None]:
_, exp_name, grid_param, _ = nn_optimality()
avg_caption = "3NN average perturbation distance (Linf)"
table_str = table_wrapper(exp_name, grid_param,
                          ['n_samples', 'n_features', 'n_classes', 'model', 'attack'], ['dataset'],
                          caption=avg_caption, combine_method=1)
table_str = re.sub(r"([a-zA-Z_0-9'-]+)-avg-pert", r"\1", table_str)
table_str = re.sub(r"([a-zA-Z_0-9'-]+)-region", r"\1", table_str)
table_str = table_str.replace("nnopt-k3-", "our-")
write_to_tex(table_str, exp_name + '_table.tex')

In [None]:
experiments = [rf_attack, opt_of_rf_attack, robust_rf]
avg_caption = "average purturbation distance (Linf)"
miss_caption = "\\# of data algorithms is not able to generate successful attack (total 100 data points)"
for fn in experiments:
    _, exp_name, grid_param, _ = fn()
    print(exp_name)
    #columns = ['blackbox', 'rf_attack_rev_20', 'rf_attack_rev_100']
    table_str = table_wrapper(exp_name, grid_param, ['attack'], ['dataset'], caption=avg_caption)
    write_to_tex(table_str, exp_name + '_table.tex')
    fig_str = knn_attack_plots(exp_name, grid_param, show_plot=False)
    write_to_tex(fig_str, exp_name + '_fig.tex')

In [None]:
nn_experiments = [nn_k1, nn_k3, nn_k5, nn_k7, opt_of_nnopt]
avg_caption = "average purturbation distance (Linf)"
miss_caption = "\\# of data algorithms is not able to generate successful attack (total 100 data points)"
for fn in nn_experiments:
    _, exp_name, grid_param, _ = fn()
    print(grid_param)
    variables = grid_param[0]['attack']
    #variables = list(filter(lambda v: 'kernelsub' not in v and 'direct' not in v, variables))
    variables = list(filter(lambda v: 'kernelsub' not in v, variables))
    print(variables)
    table_str = table_wrapper(exp_name, grid_param, variables, caption=avg_caption)
    table_str += table_wrapper(exp_name, grid_param, variables, obj='missed_count', caption=miss_caption)
    #print(table_str)
    write_to_tex(table_str, exp_name + '_table.tex')

In [None]:
nn_experiments = [nn_k1, nn_k3, nn_k5, nn_k7, opt_of_nnopt]
for fn in nn_experiments:
    _, exp_name, grid_param, _ = fn()
    print(exp_name)
    fig_str = knn_attack_plots(exp_name, grid_param, show_plot=False)
    write_to_tex(fig_str, exp_name + '_fig.tex')

In [None]:
nn_experiments = [robust_nn_k1, robust_nn_k3]
avg_caption = "average purturbation distance (Linf)"
miss_caption = "\\# of data algorithms is not able to generate successful attack (total 100 data points)"
for fn in nn_experiments:
    _, exp_name, grid_param, _ = fn()
    variables = grid_param[0]['attack']
    variables = list(filter(lambda v: 'kernelsub' not in v and 'direct' not in v, variables))
    print(variables)
    table_str = table_wrapper(exp_name, grid_param, variables, caption=avg_caption)
    table_str += table_wrapper(exp_name, grid_param, variables, obj='missed_count', caption=miss_caption)
    write_to_tex(table_str, exp_name + '_table.tex')

In [None]:
nn_experiments = [robust_nn_k1, robust_nn_k3]
avg_caption = "average purturbation distance (Linf)"
miss_caption = "\\# of data algorithms is not able to generate successful attack (total 100 data points)"
for fn in nn_experiments:
    _, exp_name, grid_param, _ = fn()
    variables = grid_param[0]['attack']
    variables = list(filter(lambda v: 'kernelsub' not in v and 'direct' not in v, variables))
    print(variables)
    table_str = table_wrapper(exp_name, grid_param, variables, caption=avg_caption)
    table_str += table_wrapper(exp_name, grid_param, variables, obj='missed_count', caption=miss_caption)
    write_to_tex(table_str, exp_name + '_table.tex')

In [None]:
from nnattack.models.robust_nn.eps_separation import build_collision_graph, find_min_cover
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
nn = NearestNeighbors(n_neighbors=3)

auto_var.set_variable_value("random_seed", 0)
np.random.seed(0)
X, y, _ = auto_var.get_var_with_argument("dataset", "mnist17_2200")
pts = PCA().fit_transform(X)
idx = np.arange(len(X))
np.random.shuffle(idx)
pts = MinMaxScaler().fit_transform(pts)

nn.fit(pts)
y_pts = [1 if i>0 else -1 for i in y]
adj_lst, graph = build_collision_graph(0.15, pts, y_pts, np.inf)
matching, min_cover = find_min_cover(graph, adj_lst, y_pts)

In [None]:
min_cover

In [None]:
y[list(min_cover)]

In [None]:
for i in min_cover:
    _, idx = nn.kneighbors([pts[i]])
    plt.imshow(X[i].reshape(28, 28))
    plt.savefig(f'/home/arbiter/figs/{i}.png', format='png')
    plt.close()
    mkdir_p(f'/home/arbiter/figs/{i}')
    for j in idx[0]:
        print(j, X.shape)
        plt.imshow(X[j].reshape(28, 28))
        plt.savefig(f'/home/arbiter/figs/{i}/{j}.png', format='png')
        plt.close()
    

In [None]:
plt.imshow(X[0].reshape(28, 28))

In [None]:
y