In [1]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from nn_robust_attacks.setup_mnist import MNIST, MNISTModel

Using TensorFlow backend.


In [2]:
sess = tf.Session(config=tf.ConfigProto())
K.set_session(sess)

data = MNIST()

# CNN Model for MNIST 
class MNIST_Model:
    def __init__(self, session=None):
        self.num_channels = 1
        self.image_size = 28
        self.num_labels = 10

        model = Sequential()

        model.add(Conv2D(32, (3, 3),
                         input_shape=(28, 28, 1)))
        model.add(Activation('relu'))
        model.add(Conv2D(32, (3, 3)))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        model.add(Conv2D(64, (3, 3)))
        model.add(Activation('relu'))
        model.add(Conv2D(64, (3, 3)))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        model.add(Flatten())
        model.add(Dense(200))
        model.add(Activation('relu'))
        model.add(Dense(200))
        model.add(Activation('relu'))
        model.add(Dense(10))

        self.model = model

    def predict(self, data):
        return self.model(data)

In [3]:
# Training variables
num_epochs = 10
batch_size = 128
train_temp = 1

training = False

# Model Training

In [4]:
from keras.models import load_model, Sequential
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten
from keras.optimizers import SGD

def fn(correct, predicted):
    return tf.nn.softmax_cross_entropy_with_logits(labels=correct,
                                                   logits=predicted/train_temp)

# Train first model 
modelname = "models/trained_model1"
model1 = MNIST_Model()
if training:            
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model1.model.compile(loss=fn,
                  optimizer=sgd,
                  metrics=['accuracy'])
    model1.model.fit(data.train_data, data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              nb_epoch=num_epochs,
              shuffle=True)
    model1.model.save(modelname)
else:
    model1.model = load_model(modelname, custom_objects={'fn':fn})
        
model1.model.summary()


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.


Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_17 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
activation_25 (Activation)   (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
activation_26 (Activation)   (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 12, 12, 32)        0         
___________________________________________________________

In [5]:
# Train second model 
modelname = "models/trained_model2"
model2 = MNIST_Model()
if training:            
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model2.model.compile(loss=fn,
                  optimizer=sgd,
                  metrics=['accuracy'])
    model2.model.fit(data.train_data, data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              nb_epoch=num_epochs,
              shuffle=True)
    model2.model.save(modelname)
else:
    model2.model = load_model(modelname, custom_objects={'fn':fn})
    
model2.model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_29 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
activation_43 (Activation)   (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
activation_44 (Activation)   (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 10, 10, 64)        18496     
_________________________________________________________________
activation_45 (Activation)   (None, 10, 10, 64)       

In [6]:
# Train third model 
modelname = "models/trained_model3"
model3 = MNIST_Model()
if training:            
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model3.model.compile(loss=fn,
                  optimizer=sgd,
                  metrics=['accuracy'])
    model3.model.fit(data.train_data, data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              nb_epoch=num_epochs,
              shuffle=True)
    model3.model.save(modelname)   
else:
    model3.model = load_model(modelname, custom_objects={'fn':fn})
    
model3.model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_25 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
activation_37 (Activation)   (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
activation_38 (Activation)   (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_27 (Conv2D)           (None, 10, 10, 64)        18496     
_________________________________________________________________
activation_39 (Activation)   (None, 10, 10, 64)       

In [7]:
# Train fourth model 
modelname = "models/trained_model4"
model4 = MNIST_Model()
if training:            
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model4.model.compile(loss=fn,
                  optimizer=sgd,
                  metrics=['accuracy'])
    model4.model.fit(data.train_data, data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              nb_epoch=num_epochs,
              shuffle=True)
    model4.model.save(modelname)
else:
    model4.model = load_model(modelname, custom_objects={'fn':fn})
    
model4.model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_29 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
activation_43 (Activation)   (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
activation_44 (Activation)   (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 10, 10, 64)        18496     
_________________________________________________________________
activation_45 (Activation)   (None, 10, 10, 64)       

In [8]:
# Train fifth model 
modelname = "models/trained_model5"
model5 = MNIST_Model()
if training:            
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model5.model.compile(loss=fn,
                  optimizer=sgd,
                  metrics=['accuracy'])
    model5.model.fit(data.train_data, data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              nb_epoch=num_epochs,
              shuffle=True)
    model5.model.save(modelname)
else:
    model5.model = load_model(modelname, custom_objects={'fn':fn})
    
model5.model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_17 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
activation_25 (Activation)   (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
activation_26 (Activation)   (None, 24, 24, 32)        0         
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 10, 10, 64)        18496     
_________________________________________________________________
activation_27 (Activation)   (None, 10, 10, 64)       

# Preprocess / Setup

In [9]:
from keras.datasets import mnist as data_keras
from keras.utils import to_categorical

(x_train, y_train), (x_test, y_test) = data_keras.load_data()
x_train = x_train[...,np.newaxis] /255.0
x_test = x_test[...,np.newaxis] / 255.0
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [10]:
# Model performances
scores = model1.model.evaluate(x_test, y_test)
print("loss={}, accuracy={}".format(*scores))

scores = model2.model.evaluate(x_test, y_test)
print("loss={}, accuracy={}".format(*scores))

scores = model3.model.evaluate(x_test, y_test)
print("loss={}, accuracy={}".format(*scores))

scores = model4.model.evaluate(x_test, y_test)
print("loss={}, accuracy={}".format(*scores))

scores = model5.model.evaluate(x_test, y_test)
print("loss={}, accuracy={}".format(*scores))

loss=0.060424025029380574, accuracy=0.9833999872207642
loss=0.060767219057303735, accuracy=0.9829999804496765
loss=0.1604284207782708, accuracy=0.9513999819755554
loss=0.06374289788251045, accuracy=0.979200005531311
loss=0.1024473569555208, accuracy=0.9670000076293945


In [11]:
plt.set_cmap('Greys_r')
plt.figure()

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

In [13]:
true_label = 0
num_img = 25

targets = np.array([to_categorical(9)])
for i in range(0, num_img - 1):
    targets = np.vstack([targets, np.array(to_categorical(9))])
    
inputs = np.load('inputs_0.csv.npy')

# Helper Functions

In [14]:
def plot_adversarial(inputs, adv):
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3)
    ax1.imshow(inputs[0][:,:,0])
    ax2.imshow(adv[0][:,:,0])
    ax3.imshow(adv[0][:,:,0] - inputs[0][:,:,0])

In [16]:
from numpy import savetxt

def save_results(inputs, adv, targets, subset_attacked):
    np.save('four_subset_data/inputs_att_' + '_' + subset_attacked + '.csv', inputs)
    np.save('four_subset_data/adv_att_' + subset_attacked + '.csv', adv)
    np.save('four_subset_data/targets_att_' + subset_attacked + '.csv', targets)  

In [17]:
def get_label_confidence(image, model):
    pr = model.predict(image)
    pr = np.array([np.exp(x) for x in pr])
    pr = pr / np.sum(pr)
    image_class = model.predict_classes(image)
    return image_class[0], max(max(pr))

In [18]:
def attack_evaluation(num_img, adv, num_models):
    for i in range(0, num_img):
        print(i)
        print('Model 1', get_label_confidence(adv[i][np.newaxis, ...], model1.model))
        if num_models > 1: 
            print('Model 2', get_label_confidence(adv[i][np.newaxis, ...], model2.model))
            print('Model 3', get_label_confidence(adv[i][np.newaxis, ...], model3.model))
            print('Model 4', get_label_confidence(adv[i][np.newaxis, ...], model4.model))

In [19]:
def size_of_attack(adv, inputs, num_img):
    perturb = adv - inputs
    perturb_mean =  np.sum(perturb) / num_img 
    perturb_std = np.std(np.array([sum(x) for x in perturb]))
    print('Mean', perturb_mean)
    print('Standard Deviation', perturb_std)

In [20]:
def transferability(num_img, adv, n):
    for i in range(0, num_img):
        print(i)
        print('Model 5', get_label_confidence(adv[i][np.newaxis, ...], model5.model))

In [21]:
def print_results(num_img, adv, num_models, inputs):
    print(attack_evaluation(num_img, adv, num_models))
    print(size_of_attack(adv, inputs, num_img))
    print(transferability(num_img, adv, num_models))

# Baseline (Attacking One Model)

In [None]:
from nn_robust_attacks.l2_attack import CarliniL2

subset = 'baseline'
adv = CarliniL2(sess, model1).attack(inputs, targets)
save_results(inputs, adv, targets, subset)



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

go up to 25
tick 0
tick 1
tick 2


In [None]:
print_results(num_img, adv, 1, inputs)

# Four Model Attack (1 Target)

## D : (A, D, D, D)

In [1]:
from nn_robust_attacks.l2_attack_quad_single_triple import CarliniL2

subset = '1'
adv = CarliniL2(sess, model1, model2, model3, model4, attack=False).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [1]:
print_results(num_img, adv, 4, inputs)

In [138]:
subset = '2'
adv = CarliniL2(sess, model2, model1, model3, model4, attack=False).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [138]:
print_results(num_img, adv, 4, inputs)

In [138]:
subset = '3'
adv = CarliniL2(sess, model3, model1, model2, model4, attack=False).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [138]:
print_results(num_img, adv, 4, inputs)

In [138]:
subset = '4'
adv = CarliniL2(sess, model4, model1, model2, model3, attack=False).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [138]:
print_results(num_img, adv, 4, inputs)

# Four Model Attack (2 Targets)
## A : (A, A, D, D)

In [2]:
from nn_robust_attacks.l2_attack_quad_double import CarliniL2

subset = '12'
adv = CarliniL2(sess, model1, model2, model3, model4).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

SyntaxError: EOF while scanning triple-quoted string literal (<ipython-input-2-5915713f6fa7>, line 6)

In [142]:
print_results(num_img, adv, 4, inputs)

In [144]:
subset = '13'
adv = CarliniL2(sess, model1, model3, model2, model4).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [144]:
print_results(num_img, adv, 4, inputs)

In [144]:
subset = '14'
adv = CarliniL2(sess, model1, model4, model2, model3).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [144]:
print_results(num_img, adv, 4, inputs)

In [None]:
subset = '23'
adv = CarliniL2(sess, model2, model3, model1, model4).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [None]:
print_results(num_img, adv, 4, inputs)

In [None]:
subset = '24'
adv = CarliniL2(sess, model2, model4, model1, model3).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [None]:
print_results(num_img, adv, 4, inputs)

In [None]:
subset = '34'
adv = CarliniL2(sess, model3, model4, model1, model2).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [None]:
print_results(num_img, adv, 4, inputs)

# Four Model Attack (3 Targets)
## A : (A, A, A, D)

In [3]:
from nn_robust_attacks.l2_attack_quad_single_triple import CarliniL2

subset = '123'
adv = CarliniL2(sess, model1, model2, model3, model4, attack=True).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [3]:
print_results(num_img, adv, 4, inputs)

In [3]:
subset = '124'
adv = CarliniL2(sess, model1, model2, model4, model3, attack=True).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [3]:
print_results(num_img, adv, 4, inputs)

In [3]:
subset = '134'
adv = CarliniL2(sess, model1, model3, model4, model2, attack=True).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [3]:
print_results(num_img, adv, 4, inputs)

In [None]:
subset = '234'
adv = CarliniL2(sess, model2, model3, model4, model1, attack=True).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

In [None]:
print_results(num_img, adv, 4, inputs)

# Four Model Attack (4 Targets)
## A: (A, A, A, A)

In [4]:
from nn_robust_attacks.l2_attack_quad_all import CarliniL2

subset = '1234'
adv = CarliniL2(sess, model1, model2, model3, model4).attack(inputs, targets)
save_results(inputs, adv, targets, subset)

SyntaxError: EOF while scanning triple-quoted string literal (<ipython-input-4-c4e2b39d9fe7>, line 6)

In [None]:
print_results(num_img, adv, 4, inputs)