In [None]:
# load pretrained model weights
!wget https://github.com/wielandbrendel/robustness_workshop/releases/download/v0.0.1/diversity_cifar10_ResNet20v1_model.159.h5

In [None]:
# make sure the right version of tensorflow is installed
!pip3 install --upgrade tensorflow==2.0.0

In [None]:
import tensorflow as tf

from tensorflow.keras.layers import AveragePooling2D, Input, Flatten
from tensorflow.keras.models import Model, load_model

import os
import numpy as np
import foolbox as fb
from model import resnet_v1

In [None]:
# parameters
num_classes = 10
lamda = 2.0
log_det_lamda = 0.5
augmentation = False
filepath = 'diversity_cifar10_ResNet20v1_model.159.h5'

n = 3
depth = n * 6 + 2
version = 1

input_shape = (32, 32, 3)

In [None]:
model_input = Input(shape=input_shape)
model_dic = {}
model_out = []
for i in range(3):
    model_dic[str(i)] = resnet_v1(input=model_input, depth=depth, num_classes=num_classes, dataset='cifar10')
    model_out.append(model_dic[str(i)][2])
model_output = tf.keras.layers.concatenate(model_out)
model = Model(inputs=model_input, outputs=model_output)
model_ensemble = tf.keras.layers.Average()(model_out)
model_ensemble = Model(inputs=model_input, outputs=model_ensemble)

# load model
model.load_weights(filepath)

# compile model
model_ensemble.compile('sgd')

In [None]:
# Subtracting pixel mean improves accuracy
subtract_pixel_mean = True

import keras
from keras.datasets import cifar10

# Load the data.
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Input image dimensions.
input_shape = x_train.shape[1:]

# Normalize data.
x_train = x_train.astype('float32') / 255
x_test = x_test[:200].astype('float32') / 255

# If subtract pixel mean is enabled
clip_min = 0.0
clip_max = 1.0
if subtract_pixel_mean:
    x_train_mean = np.mean(x_train, axis=0)
    x_train -= x_train_mean
    x_test -= x_train_mean
    clip_min -= x_train_mean
    clip_max -= x_train_mean
    
y_test, y_train = y_test[:200].flatten(), y_train.flatten()

In [None]:
pred = model_ensemble.predict(x_test).argmax(1)
print(f'Clean accuracy: {np.mean(pred == y_test):.3f}')

In [None]:
# convert to Foolbox model
fmodel = fb.models.TensorFlowModel(model_ensemble, bounds=(-2, 2))

In [None]:
images = tf.convert_to_tensor(x_test, dtype=tf.float32)
labels = tf.convert_to_tensor(y_test)

In [None]:
# a simple wrapper for the init attack in BB
class init_attack(object):
    
    def __init__(self, attack):
        self.attack = attack
        
    def run(self, model, originals, criterion_):
        return self.attack(model, images, criterion=criterion_, epsilons=0.3)[1]

In [None]:
acc = 0
total_images = 0

epsilon = 0.01
repetitions = 3

pdg20_attack = fb.attacks.LinfPGD(steps=20, abs_stepsize=epsilon/10, random_start=True)
pdg200_attack = fb.attacks.LinfPGD(steps=200, abs_stepsize=epsilon/20, random_start=True)
pdg_init_attack = fb.attacks.LinfPGD(steps=20, abs_stepsize=epsilon/2, random_start=True)
bb_attack = fb.attacks.LinfinityBrendelBethgeAttack(init_attack(pdg_init_attack), steps=200)

for _images, _labels in zip(np.split(x_test, 10), np.split(y_test, 10)):
    mask = np.array([True] * 20)
    images = tf.convert_to_tensor(_images[mask], dtype=tf.float32)
    labels = tf.convert_to_tensor(_labels[mask])
    
    _epsilon = 10 * epsilon
    
    # run quick PGD attacks
    for r in range(repetitions):
        if mask.sum() > 0:
            adv, adv_clipped, adv_mask = pdg20_attack(fmodel, images, criterion=fb.criteria.Misclassification(labels), epsilons=epsilon)
        
            mask[mask] = ~adv_mask.numpy()

            images = tf.convert_to_tensor(_images[mask], dtype=tf.float32)
            labels = tf.convert_to_tensor(_labels[mask])
        
        
    # run longer PGD attacks
    for r in range(repetitions):
        if mask.sum() > 0:
            adv, adv_clipped, adv_mask = pdg200_attack(fmodel, images, criterion=fb.criteria.Misclassification(labels), epsilons=epsilon)
        
            mask[mask] = ~adv_mask.numpy()

            images = tf.convert_to_tensor(_images[mask], dtype=tf.float32)
            labels = tf.convert_to_tensor(_labels[mask])
    
        
    # run Brendel & Bethge attack
    for r in range(repetitions):
        if mask.sum() > 0:
            adv, adv_clipped, adv_mask = bb_attack(fmodel, images, criterion=fb.criteria.Misclassification(labels), epsilons=epsilon)
        
            mask[mask] = ~adv_mask.numpy()

            images = tf.convert_to_tensor(_images[mask], dtype=tf.float32)
            labels = tf.convert_to_tensor(_labels[mask])
    
    acc += (1 - adv_mask.numpy().mean()) * len(adv)
    total_images += _images.shape[0]
    
    print(total_images, acc / total_images)

print(f'Model accuracy on adversarials: {acc / total_images:.3f}')