# Clone the Github Repo

In [None]:
! git clone https://github.com/yungyuchen521/Graduation_Project.git
! ls

Cloning into 'Graduation_Project'...
remote: Enumerating objects: 2107, done.[K
remote: Counting objects: 100% (1290/1290), done.[K
remote: Compressing objects: 100% (1254/1254), done.[K
remote: Total 2107 (delta 43), reused 1279 (delta 36), pack-reused 817[K
Receiving objects: 100% (2107/2107), 89.79 MiB | 27.42 MiB/s, done.
Resolving deltas: 100% (59/59), done.
Graduation_Project  sample_data


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Extract the Selected People

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/yungyuchen521/Graduation_Project/main/names.txt', sep='\t', header=None)
df.columns = ['name', 'count']
df.head()

Unnamed: 0,name,count
0,AJ_Cook,1
1,AJ_Lamas,1
2,Aaron_Eckhart,1
3,Aaron_Guiel,1
4,Aaron_Patterson,1


In [None]:
N = 5 # select top n people with most images

df = df.sort_values(by=['count'], ascending=False).iloc[:N, :]
df

Unnamed: 0,name,count
1871,George_W_Bush,530
1047,Colin_Powell,236
5458,Tony_Blair,144
1404,Donald_Rumsfeld,121
1892,Gerhard_Schroeder,109


In [None]:
people = df.set_index('name').T.to_dict('list')

for key, value in people.items():
  people[key] = people[key][0]
  
people

{'Colin_Powell': 236,
 'Donald_Rumsfeld': 121,
 'George_W_Bush': 530,
 'Gerhard_Schroeder': 109,
 'Tony_Blair': 144}

# Data Preprocessing

In [None]:
def get_file_name(name, num):
  path = 'Graduation_Project/img/'
  num = str(num)

  while len(num) != 4:
    num = '0' + num

  return path + name + '/' + name + '_' + num + '.jpg'

def get_all_img(map):
  imgs = []
  labels = []

  for key, value in map.items():
    for i in range(1, value+1):
      jpg = Image.open(get_file_name(key, i))
      imgs.append(np.array(jpg) / 255.0) # make the range within [0, 1]
      jpg.close()

      labels.append(key)

  return np.array(imgs), np.array(labels)

def horizontal_flip(img):
  return np.flip(img, 1)

# RAM crashes if doing augmentation
def augmentation(data):
  imgs, labels = data
  aug_imgs = []
  aug_labels = []

  for i in range(len(labels)):
    aug_imgs.append(imgs[i])
    aug_imgs.append(horizontal_flip(imgs[i]))

    aug_labels.append(labels[i])
    aug_labels.append(labels[i])

  return np.array(aug_imgs), np.array(aug_labels)

In [None]:
#imgs, labels = augmentation(get_all_img(people))
imgs, labels = get_all_img(people)

# check the images
'''
plt.figure(figsize=(10, 10))
for i in range(25):
  plt.subplot(5, 5, i+1)
  plt.xticks([])
  plt.yticks([])
  plt.grid(False)
  plt.imshow(imgs[(i*123214 + 2021) % len(imgs)])

plt.show()
'''

'\nplt.figure(figsize=(10, 10))\nfor i in range(25):\n  plt.subplot(5, 5, i+1)\n  plt.xticks([])\n  plt.yticks([])\n  plt.grid(False)\n  plt.imshow(imgs[(i*123214 + 2021) % len(imgs)])\n\nplt.show()\n'

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
tmp = LabelEncoder().fit_transform(labels)
tmp

array([2, 2, 2, ..., 3, 3, 3])

In [None]:
Y = np.array(pd.get_dummies(tmp))

Y.shape

(1140, 5)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(imgs, Y, test_size=0.33, stratify=Y)

# Pre-trained Model

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Input, Activation, Dense, Flatten, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
vgg = VGG16(weights='imagenet', input_tensor=Input(shape=x_train[0].shape))
vgg.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 250, 250, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 250, 250, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 250, 250, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 125, 125, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 125, 125, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 125, 125, 128)     14758

**Remove the Last 2 Layers**

In [None]:
model = Sequential()

for layer in vgg.layers[:-2]:
  model.add(layer)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 block1_conv1 (Conv2D)       (None, 250, 250, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 250, 250, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 125, 125, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 125, 125, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 125, 125, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 62, 62, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 62, 62, 256)       2

In [None]:
for layer in model.layers[:-2]:
  layer.trainable = False

In [None]:
model.add(Dropout(0.1))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(N, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 block1_conv1 (Conv2D)       (None, 250, 250, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 250, 250, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 125, 125, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 125, 125, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 125, 125, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 62, 62, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 62, 62, 256)       2

In [None]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics='accuracy'
)

In [None]:
checkpoint = ModelCheckpoint('best_model', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callback_list = [checkpoint]

history = model.fit(
    x_train, y_train,
    validation_data = (x_test, y_test),
    epochs = 15#,
    callbacks = callback_list
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


# Evaluate the Model

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
pred = model.predict(x_test)

In [None]:
def max_index(arr):
  m = 0

  for i in range(1, len(arr)):
    if arr[i] > arr[m]:
      m = i

  return m

In [None]:
yhat = []
ytrue = []

for p in pred:
  yhat.append(max_index(p))

for y in y_test:
  ytrue.append(max_index(y))

In [None]:
accuracy_score(ytrue, yhat)

0.9708222811671088

# Target-Agnostic Attack

In [None]:
FE = Sequential()

for layer in vgg.layers[:-2]:
  FE.add(layer)

FE.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 block1_conv1 (Conv2D)       (None, 250, 250, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 250, 250, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 125, 125, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 125, 125, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 125, 125, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 62, 62, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 62, 62, 256)      

In [None]:
from tensorflow import GradientTape
from tensorflow.keras import backend
import tensorflow as tf

In [None]:
output_neurons = 4096

In [None]:
def generate_attack_img(img, FE, i, alpha=0.1, beta=0.01, gamma=1):
  epoch = 20
  #imgs = []

  target = np.zeros(output_neurons)
  target[i] = 1000

  beta  = tf.constant(beta,  dtype=tf.float32)
  gamma = tf.constant(gamma, dtype=tf.float32)

  X = tf.convert_to_tensor(img, dtype=tf.float32)
  FE.call = tf.function(FE.call)

  for i in range(epoch):
    with GradientTape() as tape:
      tape.watch(X)
      loss = gamma * backend.square( FE(X)[0][i] - target[i] ) + beta * backend.sum( backend.square( backend.relu(FE(X)[0] - target) ) )

    '''
      gamma * (F(X)[i] - target[i])^2 + beta * sigma[ RELU(F(X)[l] - target[l])^2 ], l != i
      ~ gamma * (F(X)[i] - target[i])^2 + beta * sigma[ RELU(F(X)[l] - target[l])^2 ], l = 1~output_neurons, for gamma >> beta
    '''

    gradients = tape.gradient(loss, X)
    X = X - alpha * gradients

    #imgs.append(np.copy(X[0]))
  
  return X

def attack(n, I, FE, model, alpha=0.1, beta=0.01, gamma=1):
  '''
    n: number of neurons at the output layer of FC
    I: initial image
    fc: Feature Extractor
    model: Target Model to Attack
  '''

  for i in range(n):
    img = generate_attack_img(np.copy(I), FE, i, alpha, beta, gamma)
    pred = model.predict(tf.reshape(img, [1, 250, 250, 3]))[0]

    if auth(pred):
      print(i)
      return img

    print(max(pred))
  
  return None

# authenticate the prediction
def auth(pred):
  threshold = 0.90

  for p in pred:
    if p > threshold:
      return True

  return False


In [None]:
init_img = np.zeros((1, 250, 250, 3))

attacker = attack(output_neurons, init_img,  FE, model)

ResourceExhaustedError: ignored