In [2]:
%load_ext autoreload
%autoreload 2
import torch
from utils import get_mnist_data
from models import ConvNN
from training_and_evaluation import evaluate_robustness_smoothing

  import pandas.util.testing as tm


# Part 4: Randomized smoothing certification
In this notebook we compare the robustness of the classifiers from Parts 1-3 via randomized smoothing.

In [3]:
mnist_testset = get_mnist_data(train=False)
use_cuda = torch.cuda.is_available()
model = ConvNN()
if use_cuda:
    model = model.cuda()
    
num_samples_1 = int(1e2)  # reduce this to 1e2 in case it takes too long, e.g. because you don't have CUDA
num_samples_2 = int(1e3)  # reduce this to 1e3 in case it takes too long, e.g. because you don't have CUDA
certification_batch_size = int(5e3)  # reduce this to 5e2 if required (e.g. not enough memory)
sigma = 1
alpha = 0.05

In [4]:
training_types = ['standard_training', "adversarial_training", "randomized_smoothing"]
use_cuda

True

In [5]:
a = torch.tensor([1,2,3])
a.repeat((2,1,1,1)).shape


torch.Size([2, 1, 1, 3])

### Robustness certification
Here we first load the checkpoints for the base classifiers of the different training methods of Parts 1-3. Then, perform robustness certification of the smooth classifier via randomized smoothing.

In [6]:
results = {}
training_types = ['standard_training', "adversarial_training", "randomized_smoothing"]
#training_types = ["randomized_smoothing"]
for training_type in training_types:
    model.load_state_dict(torch.load(f"models/{training_type}.checkpoint"))
    # print(training_type)
    certification_results = evaluate_robustness_smoothing(model, sigma, mnist_testset, num_samples_1=num_samples_1,
                                                          num_samples_2=num_samples_2, alpha=alpha, 
                                                          certification_batch_size=certification_batch_size)
    results[training_type] = certification_results

HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10000.0), HTML(value='')))




### Robustness comparison
Compare the robustness of the different training types. As we can see, robust training via randomized smoothing leads to the best robustness.

In [7]:
for k,v in results.items():
    print(f"{k}: correct_certified {v['correct_certified']}, avg. certifiable radius: {v['avg_radius']}")
# 0.1947
# 0.0508
# 0.8009

standard_training: correct_certified 978, avg. certifiable radius: 1.8696490264975123
adversarial_training: correct_certified 2129, avg. certifiable radius: 0.5519319865542822
randomized_smoothing: correct_certified 7820, avg. certifiable radius: 0.9933074982593139
