In [None]:
import torch
import numpy as np
from pathlib import Path
from argparse import Namespace
from matplotlib import pyplot as plt

def iter_results(main_dir):
    for exp in Path(main_dir).iterdir():
        try:
            dataset, model, epsilon = exp.name.split('-')
        except ValueError:
            continue
        epsilon = float(epsilon)
        experiment = Namespace(dataset=dataset, model=model, epsilon=epsilon)
        def results():
            for folder in exp.iterdir():
                try:
                    learning_rate, factor, temperature = (float(x) for x  in folder.name.split('-'))
                except:
                    continue
                test_epsilons, pgd_accuracies = [], []
                for pgd in folder.iterdir():
                    if not pgd.name.startswith('pgd-'):
                        continue
                    results = torch.load(pgd)
                    test_epsilons.append(results['test_epsilon'])
                    pgd_accuracies.append(1 - results['fooling_rate'])
                accuracy = results['accuracy'] / 100
                if learning_rate == 0:
                    label = 'Deep Mind'
                elif factor == 0:
                    label = f'Nominal lr = {learning_rate}'
                else:
                    label = f'lr = {learning_rate}, f = {factor}, t = {temperature}'
                try:
                    test_epsilons, pgd_accuracies = zip(*sorted(zip(test_epsilons, pgd_accuracies)))
                    if len(test_epsilons) < 2:
                        denominator = 1
                    else:
                        denominator = test_epsilons[-1] - test_epsilons[0]
                    auc = np.trapz(pgd_accuracies, test_epsilons) / denominator
                except:
                    auc = 0
                    pass

                yield Namespace(
                    label=label,
                    experiment=experiment,
                    parameters=Namespace(learning_rate=learning_rate, factor=factor, temperature=temperature),
                    results=Namespace(accuracy=accuracy, pgd_accuracy=auc),
                    pgd=Namespace(epsilons=test_epsilons, accuracies=pgd_accuracies),
                )
        yield experiment, results

In [None]:
# for experiment, results in iter_results(Path.home() / 'results'):
#     for r in results():
#         label = f'[{100 * r.results.accuracy:.2f}% - {100 * r.results.pgd_accuracy:.2f}%] {r.label}'
#         plt.plot(r.pgd.epsilons, r.pgd.accuracies, marker='o', label=label)
#     eps = experiment.epsilon
#     if len(str(eps)) > 6:
#         eps = f'{int(experiment.epsilon * 255)}/255'
#     plt.title(f'{experiment.dataset} - {experiment.model} - eps = {eps}')
#     plt.grid()
#     plt.legend(bbox_to_anchor=(1, 1))
#     plt.show()

In [None]:
def show(dataset, model, test_epsilon=None, threshold=None, marker=100):
    fig, ax = plt.subplots()

    def plot(results, *args, **kwargs):
        if len(results) > 0:
            xs = [r.results.accuracy for r in results]
            if test_epsilon is None:
                ys = [r.results.pgd_accuracy for r in results]
            else:
                ys = [r.pgd.accuracies[r.pgd.epsilons.index(test_epsilon)] for r in results]
            ax.scatter(
                xs,
                ys,
                s=marker,
                alpha=0.6,
                cmap='viridis',
                *args, **kwargs,
            )
            
    def get_eps(epsilon, train):
        phase = 'train' if train else 'test'
        if epsilon is None:
            return 'AUC'
        else:
            if len(str(epsilon)) > 6:
                return f'$\epsilon_\mathrm{{{phase}}}={int(epsilon * 255)}/255$'
            else:
                return f'$\epsilon_\mathrm{{{phase}}}={epsilon}$'

    for experiment, results in iter_results(Path.home() / 'results'):
        if experiment.dataset != dataset or experiment.model != model:
            continue

        nominal = []
        deep_mind = []
        ours = []
        for r in results():
            if threshold is not None and r.results.accuracy < threshold:
                continue
            if r.parameters.learning_rate == 0:
                deep_mind.append(r)
            elif r.parameters.factor == 0:
                nominal.append(r)
            else:
                ours.append(r)

        if len(ours) > 0:
            _, max_index = max([(r.results.pgd_accuracy, i) for i, r in enumerate(ours)])
            ours = [ours[max_index]]
        
        plot(nominal, marker='d', label=f'Nominal')
        plot(ours, marker='o', label=f'Ours [{get_eps(experiment.epsilon, True)}]')
        plot(deep_mind, marker='s', label=f'DeepMind [{get_eps(experiment.epsilon, True)}]')
        
    ax.set_title(f'{dataset} - {model}: {get_eps(test_epsilon, False)}')
    ax.set_xlabel('Accuracy')
    ax.set_ylabel('PGD Accuracy')
#     ax.set_xlim([0.45, 0.7])
#     ax.set_ylim([-0.1, 0.6])
    ax.grid()
    ax.legend(bbox_to_anchor=(1.5, 1))
    plt.show()

dataset = 'CIFAR10'
model = 'small_cnn'

if dataset == 'CIFAR10':
    threshold = None  # 0.5
    epsilons = [2/255, 8/255, 16/255, 0.1]
if dataset == 'MNIST':
    threshold = None  # 0.975
    epsilons = [0.1, 0.2, 0.3, 0.4]
    
for eps in epsilons:
    show(dataset, model, eps, threshold=threshold)
show(dataset, model, threshold=threshold)