A first-pass trial notebook experimenting with creating a CNN for our FakeFaces dataset

In [1]:
# imports 
import pandas as pd # to read the csv files 
import os.path
import pickle
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
# need to allow for the memory limit to be able to grow (?) https://www.tensorflow.org/guide/gpu
gpu = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

In [3]:
# maybe we should use IMG instead...
# thanks so much to Alex Kyllo for pointing this out to us!
# https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator
rescale_datagen = ImageDataGenerator(rescale=1./255)
train_generator = rescale_datagen.flow_from_directory('data/archive/real_vs_fake/train',
                                                      class_mode='binary',
                                                     batch_size=32, 
                                                     shuffle=True,
                                                     seed=42)
validation_generator = rescale_datagen.flow_from_directory('data/archive/real_vs_fake/valid',
                                                      class_mode='binary',
                                                     batch_size=32, 
                                                     shuffle=True,
                                                     seed=42)

Found 99999 images belonging to 2 classes.
Found 20000 images belonging to 2 classes.


Let's remind ourselves that in the labels, a 1 is real, while a 0 is fake.

In [None]:
# let's try plotting some of the images 
# don't try running this; rigth now this doesnt work because I changed the setup a little, but you can change it a bit to get it to work if you are curious 
# plt.figure(figsize=(10, 10))
# x = 1
# for i in dataset.sample(n=9, random_state=1).index:
#     ax = plt.subplot(3, 3, x)
#     x += 1
#     plt.imshow(mpimg.imread(train_img_filenames[i].numpy().decode('utf-8')))
#     img_label = 'real' if (int.from_bytes(train_img_labels[i].numpy(), byteorder='little')== 1) else 'fake'
#     plt.title('img ' + str(i) + ': ' + img_label)
#     plt.axis('off') 

In [4]:
# just try adapting the cnn tutorial network for practice purposes 
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(256, 256, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(layers.Flatten())
model.add(layers.BatchNormalization())
model.add(layers.Dense(64, activation='elu', kernel_initializer='he_normal'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(32, activation='elu', kernel_initializer='he_normal'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(2, activation='softmax'))

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 254, 254, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 127, 127, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 125, 125, 64)      18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 62, 62, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 60, 60, 64)        36928     
_________________________________________________________________
flatten (Flatten)            (None, 230400)            0         
_________________________________________________________________
batch_normalization (BatchNo (None, 230400)            9

In [6]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

history = model.fit(train_generator, epochs=10, validation_data=validation_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
 246/3125 [=>............................] - ETA: 2:36 - loss: 0.6932 - accuracy: 0.4959

KeyboardInterrupt: 