In [1]:
# https://www.kaggle.com/angomcha/adversarial-attact-on-mnist/data

In [54]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '0'

import logging
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import flags

from cleverhans.loss import CrossEntropy
from cleverhans.dataset import MNIST
from cleverhans.utils_tf import model_eval
from cleverhans.train import train
from cleverhans.attacks import FastGradientMethod
from cleverhans.utils import AccuracyReport, set_log_level
from cleverhans_tutorials.tutorial_models import ModelBasicCNN

In [55]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.FATAL)
tf.reset_default_graph()

In [56]:
testing = True

nb_epochs = 3
batch_size = 128
learning_rate = 0.001
clean_train = True
backprop_through_attack = False
nb_filters = 64

train_start = 0
train_end = 60000
test_start = 0
test_end = 10000
label_smoothing = 0.1

In [57]:
# Object used to keep track of (and return) key accuracies
report = AccuracyReport()

# Set TF random seed to improve reproducibility
tf.set_random_seed(1234)

# Set logging level to see debug information
set_log_level(logging.DEBUG)

# Create TF session
config_args = {}
sess = tf.Session(config=tf.ConfigProto(**config_args))

In [58]:
# Get MNIST data
mnist = MNIST(train_start=train_start, train_end=train_end,
            test_start=test_start, test_end=test_end)
x_train, y_train = mnist.get_set('train')
x_test, y_test = mnist.get_set('test')

# Use Image Parameters
img_rows, img_cols, nchannels = x_train.shape[1:4]
nb_classes = y_train.shape[1]

# Define input TF placeholder
x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                    nchannels))
y = tf.placeholder(tf.float32, shape=(None, nb_classes))

In [59]:
print(y_train[0])

[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


In [60]:
# Train an MNIST model
train_params = {
  'nb_epochs': nb_epochs,
  'batch_size': batch_size,
  'learning_rate': learning_rate
}
eval_params = {'batch_size': batch_size}
fgsm_params = {
  'eps': 0.3,
  'clip_min': 0.,
  'clip_max': 1.
}
rng = np.random.RandomState([2017, 8, 30])

In [61]:
def do_eval(preds, x_set, y_set, report_key, is_adv=None):
    acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
    setattr(report, report_key, acc)
    if is_adv is None:
        report_text = None
    elif is_adv:
        report_text = 'adversarial'
    else:
        report_text = 'legitimate'
    if report_text:
        print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

In [62]:
model = ModelBasicCNN('model1', nb_classes, nb_filters)
preds = model.get_logits(x)
loss = CrossEntropy(model, smoothing=label_smoothing)



In [64]:
def evaluate():
    do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

train(sess, loss, x_train, y_train, evaluate=evaluate,
      args=train_params, rng=rng, var_list=model.get_params())

# Calculate training error
if testing:
    do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

# Initialize the Fast Gradient Sign Method (FGSM) attack object and
target_label = np.array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])
fgsm = FastGradientMethod(model, sess=sess, y_target=target_label)
adv_x = fgsm.generate(x, **fgsm_params)
preds_adv = model.get_logits(adv_x)



num_devices:  1


[INFO 2019-10-22 09:32:38,082 cleverhans] Epoch 0 took 19.54606580734253 seconds


Test accuracy on legitimate examples: 0.9884


[INFO 2019-10-22 09:32:57,557 cleverhans] Epoch 1 took 18.56864833831787 seconds


Test accuracy on legitimate examples: 0.9922


[INFO 2019-10-22 09:33:17,288 cleverhans] Epoch 2 took 18.855457067489624 seconds


Test accuracy on legitimate examples: 0.9923


[DEBUG 2019-10-22 09:33:19,068 cleverhans] Batch 100
[DEBUG 2019-10-22 09:33:20,040 cleverhans] Batch 200
[DEBUG 2019-10-22 09:33:21,008 cleverhans] Batch 300
[DEBUG 2019-10-22 09:33:21,975 cleverhans] Batch 400




In [65]:
model2 = ModelBasicCNN('model2', nb_classes, nb_filters)
preds = model2.get_logits(x)
loss2 = CrossEntropy(model2, smoothing=label_smoothing)

train(sess, loss2, x_train, y_train, evaluate=evaluate,
      args=train_params, rng=rng, var_list=model2.get_params())

# Use attack trained on model 1 to see its viability on model 2
adv_x2 = fgsm.generate(x, **fgsm_params)
preds_adv2 = model2.get_logits(adv_x2)





num_devices:  1


[INFO 2019-10-22 09:33:48,761 cleverhans] Epoch 0 took 20.096494913101196 seconds


Test accuracy on legitimate examples: 0.9884


[INFO 2019-10-22 09:34:09,075 cleverhans] Epoch 1 took 19.350675344467163 seconds


Test accuracy on legitimate examples: 0.9920


[INFO 2019-10-22 09:34:32,519 cleverhans] Epoch 2 took 22.553614139556885 seconds


Test accuracy on legitimate examples: 0.9924


In [66]:
# Evaluate the accuracy of the MNIST model on adversarial examples
do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

# Evaluate the accuracy of the second model on attack on first
do_eval(preds_adv2, x_test, y_test, 'clean_train_adv_eval', True)

'''
if testing:
    do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')
'''

Test accuracy on adversarial examples: 0.0968
Test accuracy on adversarial examples: 0.2087


"\nif testing:\n    do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')\n"