In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np

#mpl.rcParams['figure.figsize'] = (16, 16)
mpl.rcParams['axes.grid'] = False
mpl.rcParams['figure.figsize'] = (4, 4)

Let's load the pretrained MobileNetV2 model and the ImageNet class names.

In [None]:
pretrained_model = tf.keras.applications.MobileNetV2(include_top=True,
                                                     weights='imagenet')
pretrained_model.trainable = False

# ImageNet labels
decode_predictions = tf.keras.applications.mobilenet_v2.decode_predictions

In [None]:
# Helper function to preprocess the image so that it can be inputted in MobileNetV2
def preprocess(image):
  image = tf.cast(image, tf.float32)
  image = tf.image.resize(image, (224, 224))
  image = tf.keras.applications.mobilenet_v2.preprocess_input(image)
  image = image[None, ...]
  return image

# Helper function to extract labels from probability vector
def get_imagenet_label(probs):
  return decode_predictions(probs, top=1)[0][0]

## Original image
Let's use a sample image of a [Labrador Retriever](https://commons.wikimedia.org/wiki/File:YellowLabradorLooking_new.jpg) by Mirko [CC-BY-SA 3.0](https://creativecommons.org/licenses/by-sa/3.0/) from Wikimedia Common and create adversarial examples from it. The first step is to preprocess it so that it can be fed as an input to the MobileNetV2 model.

In [None]:
image_path = tf.keras.utils.get_file('YellowLabradorLooking_new.jpg', 'https://storage.googleapis.com/download.tensorflow.org/example_images/YellowLabradorLooking_new.jpg')

image_raw = tf.io.read_file(image_path)
image = tf.image.decode_image(image_raw)

image = preprocess(image)
image_probs = pretrained_model.predict(image)

In [None]:
plt.figure()
plt.imshow(image[0] * 0.5 + 0.5)  # To change [-1, 1] to [0,1]
_, image_class, class_confidence = get_imagenet_label(image_probs)
plt.title('{} : {:.2f}% Confidence'.format(image_class, class_confidence*100))
plt.show()

## Create the adversarial image

### Implementing fast gradient sign method
The first step is to create perturbations which will be used to distort the original image resulting in an adversarial image. As mentioned, for this task, the gradients are taken with respect to the image.

In [None]:
loss_object = tf.keras.losses.CategoricalCrossentropy()

def create_adversarial_pattern_PATCH(input_image, input_label, patch_size):
  with tf.GradientTape() as tape:
    tape.watch(input_image)
    prediction = pretrained_model(input_image)
    loss = loss_object(input_label, prediction)

  # Get the gradients of the loss w.r.t to the input image.
  gradient = tape.gradient(loss, input_image)

  # Get the sign of the gradients to create the perturbation
  signed_grad = tf.sign(gradient)

  # Apply the perturbation only on the specified patch size
  height, width, channels = signed_grad.shape[1:4]
  center_h = height // 2
  center_w = width // 2
  patch_radius = patch_size // 2
  perturbation_mask = np.zeros_like(signed_grad[0])
  perturbation_mask[center_h - patch_radius:center_h + patch_radius + 1, center_w - patch_radius:center_w + patch_radius + 1, :] = 1
  signed_grad = signed_grad * perturbation_mask

  return signed_grad

The resulting perturbations can also be visualised.

In [None]:
# Get the input label of the image.
labrador_retriever_index = 905
label = tf.one_hot(labrador_retriever_index, image_probs.shape[-1])
label = tf.reshape(label, (1, image_probs.shape[-1]))

patch_size = 60
perturbations = create_adversarial_pattern_PATCH(image, label, patch_size)
plt.imshow(perturbations[0] * 0.5 + 0.5);  # To change [-1, 1] to [0,1]

Let's try this out for different values of epsilon and observe the resultant image. You'll notice that as the value of epsilon is increased, it becomes easier to fool the network. However, this comes as a trade-off which results in the perturbations becoming more identifiable.

In [None]:
def display_images(images, descriptions):
  num_images = len(images)
  num_rows = (num_images + 3) // 4  # Calculate the number of rows needed

  fig, axs = plt.subplots(num_rows, 4, figsize=(16, 4 * num_rows))  # Create a figure with multiple subplots

  for i, (image, description) in enumerate(zip(images, descriptions)):
    row, col = i // 4, i % 4  # Calculate the row and column indices
    _, label, confidence = get_imagenet_label(pretrained_model.predict(image))

    axs[row, col].imshow(image[0] * 0.5 + 0.5)
    axs[row, col].set_title('{} \n {} : {:.2f}% Confidence'.format(description, label, confidence * 100))
    axs[row, col].axis('off')

  # Remove any unused subplots
  for i in range(num_images, num_rows * 4):
    row, col = i // 4, i % 4
    fig.delaxes(axs[row, col])

  plt.tight_layout()
  plt.show()

In [None]:
epsilons = [0, 0.01, 0.1, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 1.0]
#epsilons = [0.15, 0.20, 0.25, 0.50]
descriptions = [('Epsilon = {:0.3f}'.format(eps) if eps else 'Input') for eps in epsilons]

images = [image]
descriptions = ['Input']  # Initialize the descriptions list with 'Input'

for eps in epsilons:
  adv_x = image + eps * perturbations
  adv_x = tf.clip_by_value(adv_x, -1, 1)
  images.append(adv_x)
  descriptions.append(f'Epsilon = {eps:.3f}')  # Add the description for the current epsilon

display_images(images, descriptions)