<a href="https://colab.research.google.com/github/vishnucramesh/deepfake-detection/blob/master/DeepfakeDetector_VGG_TF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
Step 1: Upload the API key file provided by kaggle to a location in google drive
Step 2: Set the config file as the environemnt variable
Step 5: Change working directory
Step 3: Get kaggle download link from kaggle 
"""

import os

!pip install kaggle


PATH = '/Users/vishnu/Work/uni/VISOPE/deepfake-image-detector/dataset'

# set kaggle config file directory
os.environ['KAGGLE_CONFIG_DIR'] = PATH

%cd $PATH

!kaggle datasets download -d xhlulu/140k-real-and-fake-faces --unzip

In [None]:
pip install tensorflow matplotlib pillow sklearn git+https://github.com/yaledhlab/vggface.git


In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow


from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Activation, Convolution2D, MaxPooling2D, \
                                    BatchNormalization, BatchNormalization, Conv2D, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [None]:
base_path = '/Users/vishnu/Work/uni/VISOPE/deepfake-image-detector/dataset/real_vs_fake/real-vs-fake/'
image_gen = ImageDataGenerator(rescale=1./255.)
batch_size = 64
train_flow = image_gen.flow_from_directory(
    base_path + 'train/',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary'
)
valid_flow = image_gen.flow_from_directory(
    base_path + 'valid/',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary'
)
test_flow = image_gen.flow_from_directory(
    base_path + 'test/',
    target_size=(224, 224),
    batch_size=1,
    shuffle=False,
    class_mode='binary'
)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_flow:
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i])
        plt.title(int(labels[i]))
        plt.axis("off")
    break

In [None]:
input_shape = (224,224,3)
activation = 'relu'
padding = 'same'
droprate = 0.1
epsilon=0.001

vgg = tf.keras.applications.vgg16.VGG16(weights='imagenet', include_top=False, input_shape=input_shape) 

x = Flatten()(vgg.output) #Output obtained on vgg19 is now flattened. 
prediction = Dense(1, activation='softmax')(x) # We have 2 classes

#Creating model object 
model = Model(inputs=vgg.input, outputs=prediction)

model.build(input_shape)

model.summary()

In [None]:
# for layer in model.layers:
#   print(layer.name)
#   layer.trainable = False

In [None]:
# x = Flatten()(model.output) #Output obtained on vgg19 is now flattened. 
# prediction = Dense(2, activation='softmax')(x)

In [None]:
model.compile(loss='binary_crossentropy',optimizer=Adam(0.0001), metrics=['accuracy'])

In [None]:
steps_per_epoch = 2
validation_steps = 1

earlystop_callback = EarlyStopping(monitor='loss', patience=3)
checkpoint_callback = ModelCheckpoint(filepath=f"model.hdf5", 
                             monitor='val_loss',
                             verbose=1, 
                             save_best_only=True,
                             mode='min')

history = model.fit(
      train_flow,
      epochs=10,
      steps_per_epoch = steps_per_epoch,
      validation_data = valid_flow,
      validation_steps = validation_steps,
      callbacks=[earlystop_callback, checkpoint_callback]
)

In [None]:
def plot_loss(epochs, loss, val_loss):
    plt.plot(epochs, loss, 'bo', label='Training Loss')
    plt.plot(epochs, val_loss, 'orange', label = 'Validation Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.show()

def plot_accuracy(epochs, acc, val_acc):
    plt.plot(epochs, acc, 'bo', label='Training accuracy')
    plt.plot(epochs, val_acc, 'orange', label = 'Validation accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()
    plt.show()

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
plot_loss(range(1, len(loss) + 1), loss, val_loss)
plot_accuracy(range(1, len(loss) + 1), acc, val_acc)

In [None]:
y_pred = model.predict(test_flow)
y_test = test_flow.classes

In [None]:
from sklearn import metrics

print("ROC AUC Score:", metrics.roc_auc_score(y_test, y_pred))

print("AP Score:", metrics.average_precision_score(y_test, y_pred))

print(metrics.classification_report(y_test, y_pred > 0.5))
