# Clone the Github Repo

In [None]:
! git clone https://github.com/yungyuchen521/Graduation_Project.git
! ls

Cloning into 'Graduation_Project'...
remote: Enumerating objects: 2088, done.[K
remote: Counting objects: 100% (1271/1271), done.[K
remote: Compressing objects: 100% (1235/1235), done.[K
remote: Total 2088 (delta 38), reused 1265 (delta 36), pack-reused 817[K
Receiving objects: 100% (2088/2088), 89.78 MiB | 31.39 MiB/s, done.
Resolving deltas: 100% (54/54), done.
Graduation_Project  sample_data


# Extract the Selected People

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/yungyuchen521/Graduation_Project/main/names.txt', sep='\t', header=None)
df.columns = ['name', 'count']
df.head()

Unnamed: 0,name,count
0,AJ_Cook,1
1,AJ_Lamas,1
2,Aaron_Eckhart,1
3,Aaron_Guiel,1
4,Aaron_Patterson,1


In [None]:
N = 20 # select top n people with most images

df = df.sort_values(by=['count'], ascending=False).iloc[:N, :]
df

Unnamed: 0,name,count
1871,George_W_Bush,530
1047,Colin_Powell,236
5458,Tony_Blair,144
1404,Donald_Rumsfeld,121
1892,Gerhard_Schroeder,109
373,Ariel_Sharon,77
2175,Hugo_Chavez,71
2941,Junichiro_Koizumi,60
2468,Jean_Chretien,55
2682,John_Ashcroft,53


In [None]:
people = df.set_index('name').T.to_dict('list')

for key, value in people.items():
  people[key] = people[key][0]
  
people

{'Ariel_Sharon': 77,
 'Arnold_Schwarzenegger': 42,
 'Colin_Powell': 236,
 'Donald_Rumsfeld': 121,
 'George_W_Bush': 530,
 'Gerhard_Schroeder': 109,
 'Gloria_Macapagal_Arroyo': 44,
 'Hans_Blix': 39,
 'Hugo_Chavez': 71,
 'Jacques_Chirac': 52,
 'Jean_Chretien': 55,
 'Jennifer_Capriati': 42,
 'John_Ashcroft': 53,
 'Junichiro_Koizumi': 60,
 'Laura_Bush': 41,
 'Lleyton_Hewitt': 41,
 'Luiz_Inacio_Lula_da_Silva': 48,
 'Serena_Williams': 52,
 'Tony_Blair': 144,
 'Vladimir_Putin': 49}

# Data Preprocessing

In [None]:
def get_file_name(name, num):
  path = 'Graduation_Project/img/'
  num = str(num)

  while len(num) != 4:
    num = '0' + num

  return path + name + '/' + name + '_' + num + '.jpg'

def get_all_img(map):
  imgs = []
  labels = []

  for key, value in map.items():
    for i in range(1, value+1):
      jpg = Image.open(get_file_name(key, i))
      imgs.append(np.array(jpg) / 255.0) # make the range within [0, 1]
      jpg.close()

      labels.append(key)

  return np.array(imgs), np.array(labels)

def horizontal_flip(img):
  return np.flip(img, 1)

# RAM crashes if doing augmentation
def augmentation(data):
  imgs, labels = data
  aug_imgs = []
  aug_labels = []

  for i in range(len(labels)):
    aug_imgs.append(imgs[i])
    aug_imgs.append(horizontal_flip(imgs[i]))

    aug_labels.append(labels[i])
    aug_labels.append(labels[i])

  return np.array(aug_imgs), np.array(aug_labels)

In [None]:
#imgs, labels = augmentation(get_all_img(people))
imgs, labels = get_all_img(people)

# check the images
'''
plt.figure(figsize=(10, 10))
for i in range(25):
  plt.subplot(5, 5, i+1)
  plt.xticks([])
  plt.yticks([])
  plt.grid(False)
  plt.imshow(imgs[(i*123214 + 2021) % len(imgs)])

plt.show()
'''

'\nplt.figure(figsize=(10, 10))\nfor i in range(25):\n  plt.subplot(5, 5, i+1)\n  plt.xticks([])\n  plt.yticks([])\n  plt.grid(False)\n  plt.imshow(imgs[(i*123214 + 2021) % len(imgs)])\n\nplt.show()\n'

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
tmp = LabelEncoder().fit_transform(labels)
tmp

array([4, 4, 4, ..., 7, 7, 7])

In [None]:
Y = np.array(pd.get_dummies(tmp))

Y.shape

(1906, 20)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(imgs, Y, test_size=0.33, stratify=Y)

# Target-Agnostic Attack

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Input, Activation, Dense, Flatten, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
vgg = VGG16(include_top=False, weights='imagenet', input_tensor=Input(shape=x_train[0].shape))
vgg.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 250, 250, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 250, 250, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 250, 250, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 125, 125, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 125, 125, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 125, 125, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 62, 62, 128)       0     

In [None]:
for layer in vgg.layers:
  layer.trainable = False

In [None]:
model = Sequential([
  vgg,
  Flatten(),
  Dropout(0.1),
  Dense(64, activation='relu'),
  Dropout(0.1),
  Dense(32, activation='relu'),
  Dropout(0.1),
  Dense(N),
  Activation('softmax')
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 dropout (Dropout)           (None, 25088)             0         
                                                                 
 dense (Dense)               (None, 64)                1605696   
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dropout_2 (Dropout)         (None, 32)                0

In [None]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics='accuracy'
)

In [None]:
checkpoint = ModelCheckpoint('best_model', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callback_list = [checkpoint]

history = model.fit(
    x_train, y_train,
    batch_size=20,
    validation_data = (x_test, y_test),
    epochs = 25,
    callbacks = callback_list
)

Epoch 1/25
Epoch 00001: val_accuracy improved from -inf to 0.44038, saving model to best_model
INFO:tensorflow:Assets written to: best_model/assets
Epoch 2/25
Epoch 00002: val_accuracy improved from 0.44038 to 0.55803, saving model to best_model
INFO:tensorflow:Assets written to: best_model/assets
Epoch 3/25
Epoch 00003: val_accuracy improved from 0.55803 to 0.66932, saving model to best_model
INFO:tensorflow:Assets written to: best_model/assets
Epoch 4/25
Epoch 00004: val_accuracy improved from 0.66932 to 0.73132, saving model to best_model
INFO:tensorflow:Assets written to: best_model/assets
Epoch 5/25
Epoch 00005: val_accuracy did not improve from 0.73132
Epoch 6/25
Epoch 00006: val_accuracy improved from 0.73132 to 0.74404, saving model to best_model
INFO:tensorflow:Assets written to: best_model/assets
Epoch 7/25
Epoch 00007: val_accuracy improved from 0.74404 to 0.78537, saving model to best_model
INFO:tensorflow:Assets written to: best_model/assets
Epoch 8/25
Epoch 00008: val_acc