In [None]:
import torch
from pathlib import Path
from ibp_neurips19 import models
from argparse import Namespace

def iter_results(main_dir=Path.home() / 'results', exclude=None, with_models=False):
    main_dir = Path(main_dir)
    for file in sorted((main_dir / 'pgd').iterdir()):
        r = Namespace(**torch.load(file))
        if exclude and exclude(r):
            continue
        if r.learning_rate == 0:
            checkpoint_file = (f'{main_dir}/dm_torch/'
                               f'{r.dataset.lower()}-{r.model}-{r.epsilon}.pth')
        else:
            checkpoint_file = (f'{main_dir}/{r.dataset}-{r.model}_cnn-{r.epsilon}'
                               f'/{r.learning_rate}/checkpoint.pth')
        r.checkpoint_file = Path(checkpoint_file)
        checkpoint = torch.load(r.checkpoint_file)
        r.accuracy = checkpoint['accuracy'] if r.learning_rate == 0 else checkpoint['best_acc1'] / 100
        if with_models:
            r.net = models.__dict__[f'{r.model}_cnn']()
            models.fit_to_dataset(r.net, r.dataset).eval()
            r.net.load_state_dict(checkpoint['state_dict'])
        yield r

In [None]:
# exclude = lambda r: r.dataset != 'MNIST' or r.model != 'large' or r.epsilon != 0.1 or r.test_epsilon != 0.1

# for r in iter_results(exclude=exclude):
#     exp = f'{r.dataset}-{r.model}-{r.epsilon}:{r.test_epsilon:.4e}-{r.learning_rate}'
#     print(f"{exp} [{100 * r.accuracy:0.2f}%] = {100 * (1 - r.fooling_rate):.2f}%")

In [None]:
from collections import defaultdict

results = defaultdict(lambda: defaultdict(dict))
for r in iter_results(exclude=lambda r: r.dataset != 'MNIST' or r.model != 'medium'):
    results[r.dataset, r.model, r.epsilon][r.learning_rate][r.test_epsilon] = Namespace(
        accuracy=r.accuracy, fooling_rate=r.fooling_rate)

In [None]:
import numpy as np
from matplotlib import pyplot as plt

for key in results:
    for learning_rate, r in results[key].items():
        test_epsilons = list(r.keys())
        accuracy = [v.accuracy for v in r.values()][0]  # all the same
        pgd_accuracy = [1 - v.fooling_rate for v in r.values()]
        test_epsilons, pgd_accuracy = zip(*sorted(zip(test_epsilons, pgd_accuracy)))
        auc = np.trapz(pgd_accuracy, test_epsilons) / (test_epsilons[-1] - test_epsilons[0])
        if learning_rate == 0:
            label = f'[{100 * accuracy:.2f}% - {100 * auc:.2f}%] Deep Mind'
        else:
            label = f'[{100 * accuracy:.2f}% - {100 * auc:.2f}%] lr = {learning_rate}'
        plt.plot(test_epsilons, pgd_accuracy, marker='o', label=label)

    plt.title(str(key))
    plt.ylabel('PGD accuracy')
    plt.xlabel('eps_test')
    plt.legend()
    plt.show()