In [2]:
!pip install tensorflow kagglehub



In [3]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras import mixed_precision

In [4]:
# Set mixed precision policy for faster training on GPUs
mixed_precision.set_global_policy('mixed_float16')
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  2


In [5]:
# Data Augmentation Layers
random_rotation = tf.keras.layers.RandomRotation(0.2, dtype=tf.float32)
random_zoom = tf.keras.layers.RandomZoom(0.2, dtype=tf.float32)
random_flip = tf.keras.layers.RandomFlip("horizontal", dtype=tf.float32)
random_contrast = tf.keras.layers.RandomContrast(0.3, dtype=tf.float32)

def augment_image(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32)
    
    def apply_augmentation(img):
        choice = tf.random.uniform(shape=[], minval=0, maxval=7, dtype=tf.int32)
        
        def apply_flip(): return random_flip(img)
        def apply_brightness(): return tf.image.random_brightness(img, 0.4)
        def apply_contrast(): return random_contrast(img)
        def apply_rotation(): return random_rotation(tf.expand_dims(img, 0))[0]
        def apply_zoom(): return random_zoom(tf.expand_dims(img, 0))[0]
        def apply_hue(): return tf.image.random_hue(img, 0.3)
        def apply_saturation(): return tf.image.random_saturation(img, 0.6, 1.6)
            
        return tf.switch_case(choice, [
            apply_flip, apply_brightness, apply_contrast, apply_rotation,
            apply_zoom, apply_hue, apply_saturation
        ])

    # 50% chance to apply augmentation
    image = tf.cond(
        tf.random.uniform(()) < 0.5,
        lambda: apply_augmentation(image),
        lambda: image
    )
    
    image = preprocess_input(image)
    return tf.cast(image, tf.float16), label

In [6]:
AUTOTUNE = tf.data.AUTOTUNE

def create_dataset(directory, augment=False):
    ds = tf.keras.utils.image_dataset_from_directory(
        directory,
        image_size=(256, 256),
        batch_size=None,
        shuffle=augment,
        seed=42 if augment else None
    )
    if augment:
        ds = ds.map(augment_image, num_parallel_calls=AUTOTUNE)
    else:
        ds = ds.map(lambda x, y: (preprocess_input(x), y), num_parallel_calls=AUTOTUNE)
    return ds.batch(256).prefetch(AUTOTUNE)

In [7]:
# Download dataset using kagglehub
import kagglehub
dataset_path = kagglehub.dataset_download("manjilkarki/deepfake-and-real-images")
base_dir = os.path.join(dataset_path, "Dataset")

train_ds = create_dataset(os.path.join(base_dir, "Train"), augment=True)
val_ds = create_dataset(os.path.join(base_dir, "Validation"))
test_ds = create_dataset(os.path.join(base_dir, "Test"))

Found 140002 files belonging to 2 classes.
Found 39428 files belonging to 2 classes.
Found 10905 files belonging to 2 classes.


In [8]:
# Model Setup: Use ResNet50 as a feature extractor
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(256, 256, 3))
base_model.trainable = False  # Freeze base model for initial training

x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(512, kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
x = tf.keras.layers.BatchNormalization()(x)
x = tf.keras.layers.Activation('relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1, activation='sigmoid', dtype='float32')(x)

model = tf.keras.Model(inputs=base_model.input, outputs=output)

# Callbacks: Checkpointing, EarlyStopping, and Learning Rate Reduction
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_auc', mode='max', factor=0.5, patience=2, min_lr=1e-7, verbose=1
)
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    "best_model.keras", monitor='val_auc', mode='max', save_best_only=True, verbose=1
)
earlystop_cb = tf.keras.callbacks.EarlyStopping(
    monitor='val_auc', mode='max', patience=5, restore_best_weights=True, verbose=1
)

# Use BinaryCrossentropy with label smoothing to prevent overconfidence
loss_fn = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.1)

# Phase 1: Train with frozen base
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=loss_fn,
    metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [9]:
history_phase1 = model.fit(
    train_ds,
    epochs=30,
    validation_data=val_ds,
    callbacks=[checkpoint_cb, earlystop_cb, reduce_lr]
)

Epoch 1/30
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 848ms/step - accuracy: 0.7203 - auc: 0.7967 - loss: 5.9587
Epoch 1: val_auc improved from -inf to 0.90867, saving model to best_model.keras
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m591s[0m 1s/step - accuracy: 0.7204 - auc: 0.7967 - loss: 5.9553 - val_accuracy: 0.8073 - val_auc: 0.9087 - val_loss: 1.7008 - learning_rate: 1.0000e-04
Epoch 2/30
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 676ms/step - accuracy: 0.8033 - auc: 0.8870 - loss: 1.3961
Epoch 2: val_auc improved from 0.90867 to 0.91724, saving model to best_model.keras
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m422s[0m 772ms/step - accuracy: 0.8033 - auc: 0.8870 - loss: 1.3957 - val_accuracy: 0.8353 - val_auc: 0.9172 - val_loss: 0.8262 - learning_rate: 1.0000e-04
Epoch 3/30
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 673ms/step - accuracy: 0.8216 - auc: 0.9051 - loss: 0.7789


In [10]:
# Evaluate Phase 1
model.load_weights("best_model.keras")
test_loss, test_acc, test_auc = model.evaluate(test_ds)
print(f"Phase 1 - Test Loss: {test_loss:.4f}")
print(f"Phase 1 - Test Accuracy: {test_acc:.4f}")
print(f"Phase 1 - Test AUC: {test_auc:.4f}")

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 940ms/step - accuracy: 0.8139 - auc: 0.4395 - loss: 0.5174
Phase 1 - Test Loss: 0.6631
Phase 1 - Test Accuracy: 0.7334
Phase 1 - Test AUC: 0.8175


In [11]:
# Phase 2: Fine-Tuning
# Instead of unfreezing the entire base model, unfreeze only the top layers.
fine_tune_at = len(base_model.layers) - 50
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False
for layer in base_model.layers[fine_tune_at:]:
    layer.trainable = True

# Compile with a lower learning rate for fine-tuning
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss=loss_fn,
    metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
)

In [None]:
history_phase2 = model.fit(
    train_ds,
    epochs=10,s
    validation_data=val_ds,
    callbacks=[checkpoint_cb, earlystop_cb, reduce_lr]
)

Epoch 1/10
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 804ms/step - accuracy: 0.9974 - auc: 0.9999 - loss: 0.2138
Epoch 1: val_auc did not improve from 0.99543
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m489s[0m 894ms/step - accuracy: 0.9974 - auc: 0.9999 - loss: 0.2138 - val_accuracy: 0.9707 - val_auc: 0.9952 - val_loss: 0.2549 - learning_rate: 2.5000e-06
Epoch 2/10
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 789ms/step - accuracy: 0.9968 - auc: 0.9998 - loss: 0.2140
Epoch 2: val_auc did not improve from 0.99543
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m481s[0m 879ms/step - accuracy: 0.9968 - auc: 0.9998 - loss: 0.2140 - val_accuracy: 0.9713 - val_auc: 0.9953 - val_loss: 0.2542 - learning_rate: 2.5000e-06
Epoch 3/10
[1m357/547[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m2:30[0m 792ms/step - accuracy: 0.9971 - auc: 0.9999 - loss: 0.2135

In [13]:
# Evaluate Phase 2
model.load_weights("best_model.keras")
test_loss, test_acc, test_auc = model.evaluate(test_ds)
print(f"Phase 2 - Test Loss: {test_loss:.4f}")
print(f"Phase 2 - Test Accuracy: {test_acc:.4f}")
print(f"Phase 2 - Test AUC: {test_auc:.4f}")

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 386ms/step - accuracy: 0.9261 - auc: 0.4926 - loss: 0.3369
Phase 2 - Test Loss: 0.4846
Phase 2 - Test Accuracy: 0.8534
Phase 2 - Test AUC: 0.9215
