# Testing adversarial attack on CIFAR10

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt, matplotlib.image as mpimg
import numpy as np

import torch
import torchvision
import torchvision.transforms as transforms

The output of torchvision datasets are PILImage images of range [0, 1].
We transform them to Tensors of normalized range [-1, 1]

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Using downloaded and verified file: ./data/cifar-10-python.tar.gz
Files already downloaded and verified


## Training and testing a neural network on binary data

In [100]:
# function to invert binary images
def invert_binary_image(img):
    img[img == 1] = -1
    img[img == 0] = 1
    img[img == -1] = 0
    return img

In [101]:
# invert test data and compute accuracy
x_test_inv = invert_binary_image(x_test)
print("Test score:",clf.score(x_test_inv,y_test))

Test score: 0.007


The accuracy is very low, almost 0. 

## Training on mixture of training data 
Now lets invert train data and mix it with original training data and then train the network.

In [102]:
# invert train data
x_train_inv = invert_binary_image(x_train)

In [103]:
# concatenate x_train and x_train_inv
x_train_mix = np.concatenate((x_train,x_train_inv),axis=0)
y_train_mix = np.concatenate((y_train,y_train),axis=0)

In [104]:
x_test_inv.shape

(1000, 784)

In [105]:
# train a new neural network
clf_mix = MLPClassifier()
clf_mix.fit(x_train_mix, y_train_mix)

  y = column_or_1d(y, warn=True)


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [106]:
# check accuracy
print("training score:", clf_mix.score(x_train_mix,y_train_mix))
print("Test score:",clf_mix.score(x_test,y_test))
print("Inverted Test score:",clf_mix.score(x_test_inv,y_test))

training score: 1.0
Test score: 0.896
Inverted Test score: 0.896


## Train on gradients of images
In this part we will find gradients of the images and then train a neural network on that data. This should make it robust to colar changes.

In [107]:
# a function that takes a matrix of images and ouput a matrix of edges/gradients
import cv2
def edges_in_image(img_mat):
    edge_mat = np.empty_like(img_mat)
    for i in range(img_mat.shape[0]):
        img = np.reshape(img_mat[i,:],(28,28))
        img = cv2.Laplacian(img,cv2.CV_64F)
        img = np.reshape(img,(28*28))
        edge_mat[i,:] = img
    return edge_mat

In [108]:
# find gradients of x_train, x_test and x_test_inv
x_train_edge = edges_in_image(x_train)
x_test_edge = edges_in_image(x_test)
x_test_inv_edge = edges_in_image(x_test_inv)

In [109]:
# train a neural network on edges
clf_edge = MLPClassifier()
clf_edge.fit(x_train_edge, y_train)

  y = column_or_1d(y, warn=True)


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [110]:
# check accuracy
print("training score:", clf_edge.score(x_train_edge,y_train))
print("Test score:",clf_edge.score(x_test_edge,y_test))
print("Inverted Test score:",clf_edge.score(x_test_inv_edge,y_test))

training score: 1.0
Test score: 0.759
Inverted Test score: 0.759


Accuracy is not that high on test images (probably overfitting) but its the same for both normal test and inverted test images.