A first-pass trial notebook experimenting with creating a CNN for our FakeFaces dataset

In [1]:
# imports 
import pandas as pd # to read the csv files 
import os.path
import pickle
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
# need to allow for the memory limit to be able to grow (?) https://www.tensorflow.org/guide/gpu
gpu = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

In [3]:
# maybe we should use IMG instead...
# thanks so much to Alex Kyllo for pointing this out to us!
# https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator
rescale_datagen = ImageDataGenerator(rescale=1./255)
train_generator = rescale_datagen.flow_from_directory('data/archive/real_vs_fake/train',
                                                      class_mode='binary',
                                                     batch_size=32, 
                                                     shuffle=True,
                                                     seed=42)
validation_generator = rescale_datagen.flow_from_directory('data/archive/real_vs_fake/valid',
                                                      class_mode='binary',
                                                     batch_size=32, 
                                                     shuffle=True,
                                                     seed=42)

Found 99999 images belonging to 2 classes.
Found 20000 images belonging to 2 classes.


Let's remind ourselves that in the labels, a 1 is real, while a 0 is fake.

In [None]:
# let's try plotting some of the images 
# don't try running this; rigth now this doesnt work because I changed the setup a little, but you can change it a bit to get it to work if you are curious 
# plt.figure(figsize=(10, 10))
# x = 1
# for i in dataset.sample(n=9, random_state=1).index:
#     ax = plt.subplot(3, 3, x)
#     x += 1
#     plt.imshow(mpimg.imread(train_img_filenames[i].numpy().decode('utf-8')))
#     img_label = 'real' if (int.from_bytes(train_img_labels[i].numpy(), byteorder='little')== 1) else 'fake'
#     plt.title('img ' + str(i) + ': ' + img_label)
#     plt.axis('off') 

In [None]:
# just try adapting the cnn tutorial network for practice purposes 
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(256, 256, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(layers.Flatten())
model.add(layers.BatchNormalization())
model.add(layers.Dense(64, activation='elu', kernel_initializer='he_normal'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(32, activation='elu', kernel_initializer='he_normal'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(2, activation='softmax'))

In [None]:
model.summary()

In [5]:
model_vgg = models.Sequential()
model_vgg.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(256, 256, 3)))
model_vgg.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_vgg.add(layers.MaxPooling2D((2, 2)))
model_vgg.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_vgg.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_vgg.add(layers.MaxPooling2D((2, 2)))
model_vgg.add(layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_vgg.add(layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_vgg.add(layers.Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_vgg.add(layers.MaxPooling2D((2, 2)))
model_vgg.add(layers.Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_vgg.add(layers.Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_vgg.add(layers.Conv2D(512, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model_vgg.add(layers.MaxPooling2D((2, 2)))
model_vgg.add(layers.Flatten())
model_vgg.add(layers.BatchNormalization())
model_vgg.add(layers.Dense(4096, activation='elu', kernel_initializer='he_normal'))
model_vgg.add(layers.BatchNormalization())
model_vgg.add(layers.Dense(4096, activation='elu', kernel_initializer='he_normal'))
model_vgg.add(layers.BatchNormalization())
model_vgg.add(layers.Dense(2, activation='softmax'))

In [4]:
root_logdir = os.path.join(os.curdir, 'my_logs')

def get_run_logdir():
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

In [8]:
model_vgg.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                 loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                 metrics=['accuracy'])
model_vgg_history = model_vgg.fit(train_generator, 
                                  epochs=50, 
                                  validation_data=validation_generator, 
                                  callbacks=[
                                      tf.keras.callbacks.ModelCheckpoint('models/model_vgg', save_best_only=True),
                                      tf.keras.callbacks.EarlyStopping(patience=10,restore_best_weights=True),
                                      tf.keras.callbacks.TensorBoard(get_run_logdir())
                                  ])

Epoch 1/50
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: models/model_vgg\assets
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50


In [9]:
test_generator = rescale_datagen.flow_from_directory('data/archive/real_vs_fake/test',
                                                      class_mode='binary',
                                                     batch_size=32, 
                                                     shuffle=True,
                                                     seed=42)

Found 20000 images belonging to 2 classes.


In [10]:
model_vgg.evaluate(test_generator, batch_size=32)



[0.03162318840622902, 0.9897000193595886]