# Improved Deepfake Detector V4 - Large Dataset Training (40k)

**Based on successful model3.ipynb architecture (PROVEN TO WORK)**

**Dataset:** 48k images (40k train, 4k test, 4k validate) - 4x larger than original V4

**Previous Results with 20k train:**
- Test Accuracy: 92%
- Fake Detection: ~85-87%
- Confusion matrix showed excellent performance

**Target with 40k train:** >93% accuracy, >88% fake detection

**Strategy:**
1. **Binary crossentropy** (proven loss function)
2. **Class weights 2.0x** for fakes (penalize missing fakes)

3. **Extended epochs:** 30+40 (scaled for 4x data vs original)5. **Clean epoch-by-epoch output**
4. **Proven architecture:** 1024‚Üí512‚Üí384‚Üí256‚Üí128 dense layers

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model, Input
from tensorflow.keras.applications import EfficientNetV2B0
from tensorflow.keras.preprocessing import image_dataset_from_directory
import os
import numpy as np
import matplotlib.pyplot as plt

# Configuration
IMG_SIZE = (256, 256)
BATCH_SIZE = 16
data_dir = "/home/wizz/ML Project/Dataset"

print("Loading datasets...")
train_ds = image_dataset_from_directory(
    os.path.join(data_dir, 'train'),
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='binary'
)

val_ds = image_dataset_from_directory(
    os.path.join(data_dir, 'validate'),
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='binary'
)

test_ds = image_dataset_from_directory(
    os.path.join(data_dir, 'test'),
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='binary',
    shuffle=False
)

# Data augmentation
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
    layers.RandomBrightness(0.1),
], name='augmentation')

def preprocess(images, labels):
    images = data_augmentation(images, training=True)
    return images, labels

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.map(preprocess, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)
val_ds = val_ds.prefetch(AUTOTUNE)
test_ds = test_ds.prefetch(AUTOTUNE)

train_batches = tf.data.experimental.cardinality(train_ds).numpy()
val_batches = tf.data.experimental.cardinality(val_ds).numpy()
test_batches = tf.data.experimental.cardinality(test_ds).numpy()

print(f"\n‚úÖ Datasets loaded:")
print(f"   Train: {train_batches * BATCH_SIZE} images (40,000 expected)")
print(f"   Val:   {val_batches * BATCH_SIZE} images (4,000 expected)")
print(f"   Test:  {test_batches * BATCH_SIZE} images (4,000 expected)")
print(f"\nüìä Dataset is 4x larger than original - using extended epochs")

## Build Model - PROVEN ARCHITECTURE

**Same successful architecture from model3 + V4:**
- EfficientNetV2B0 backbone + 1024‚Üí512‚Üí384‚Üí256‚Üí128 dense layers
- Dropout: 0.5‚Üí0.4‚Üí0.35‚Üí0.3‚Üí0.2 (prevents overfitting)
- L2 regularization (0.001) on dense layers
- Binary crossentropy loss (PROVEN, not focal loss)

In [10]:
# Build model - based on model3 architecture
inputs = Input(shape=IMG_SIZE + (3,))

# Normalization
x = layers.Rescaling(1./127.5, offset=-1)(inputs)  # [-1, 1] normalization

# Use EfficientNetV2 as backbone
backbone = EfficientNetV2B0(include_top=False, weights='imagenet', input_tensor=x)
backbone.trainable = False  # Freeze initially

backbone_output = backbone.output
x = layers.GlobalAveragePooling2D()(backbone_output)

# Dense layers - PROVEN ARCHITECTURE FROM MODEL3 + V4
x = layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)

x = layers.Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.4)(x)

# Extra layer for improved feature discrimination
x = layers.Dense(384, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.35)(x)

x = layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)

x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.2)(x)

# Output layer
outputs = layers.Dense(1, activation='sigmoid', name='output')(x)

model = Model(inputs=inputs, outputs=outputs, name='DeepfakeDetector_V4')

# Compile with BINARY CROSSENTROPY (proven to work in model3 and V4)
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=0.001, weight_decay=0.0001),
    loss='binary_crossentropy',  # BACK TO PROVEN LOSS FUNCTION
    metrics=['accuracy', 
             tf.keras.metrics.Precision(name='precision'),
             tf.keras.metrics.Recall(name='recall'),
             tf.keras.metrics.AUC(name='auc')]
)

print(f"‚úÖ Model built - Params: {model.count_params():,}")
print(f"üìå Using binary_crossentropy (proven) with class_weight=2.0x for fakes")
model.summary()

## Phase 1: Train Classifier - 40k Training Images

**Proven strategy scaled for 4x larger dataset:**
- Binary crossentropy loss
- Class weights: Real=1.0, Fake=2.0
- Monitor: val_loss (decreasing = improving)
- 30 epochs (scaled for 40k images), patience=8

In [None]:
# Class weights - proven approach from model V4
class_weight = {
    0: 1.0,   # Real images
    1: 2.0    # Fake images - 2x penalty
}

print("="*60)
print("PHASE 1: Training classifier (frozen backbone)")
print("="*60)
print(f"Loss: binary_crossentropy | Class weights: {class_weight}")
print(f"Training on 40,000 images (20k real + 20k fake)")
print(f"Epochs: 30 | Monitoring: val_loss")
print("="*60 + "\n")

# Callbacks - SIMPLE AND PROVEN
callbacks_phase1 = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',  # Monitor LOSS, not AUC
        patience=8,
        restore_best_weights=True,
        mode='min',
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',  # Monitor LOSS, not AUC
        factor=0.5,
        patience=3,
        min_lr=1e-7,
        mode='min',
        verbose=1
    ),
    tf.keras.callbacks.ModelCheckpoint(
        'best_recall_phase1.h5',
        monitor='val_recall',
        mode='max',
        save_best_only=True,
        verbose=1
    )
]

history1 = model.fit(
    train_ds,
    epochs=30,  # Increased for 40k training images
    epochs=25,
    class_weight=class_weight,
    callbacks=callbacks_phase1,
    verbose=1  # Clean epoch-by-epoch output
)

print(f"\n{'='*60}")
print(f"PHASE 1 COMPLETE")
print(f"{'='*60}")
print(f"Best Accuracy: {max(history1.history['val_accuracy']):.2%}")
print(f"Best Recall:   {max(history1.history['val_recall']):.2%}")
print(f"Best AUC:      {max(history1.history['val_auc']):.4f}")
print(f"{'='*60}\n")

## Phase 2: Fine-tune - 40k Training Images

Unfreeze backbone and fine-tune with 40k training images.
40 epochs (scaled for larger dataset), patience=10, monitor val_loss

In [None]:
print("="*60)
print("PHASE 2: Fine-tuning entire model")
print("="*60)

# Unfreeze backbone
backbone.trainable = True
print(f"Trainable params: {model.count_params():,}")

# Recompile with lower learning rate
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-5, weight_decay=0.0001),
    loss='binary_crossentropy',
    metrics=['accuracy',
             tf.keras.metrics.Precision(name='precision'),
             tf.keras.metrics.Recall(name='recall'),
             tf.keras.metrics.AUC(name='auc')]
)

print(f"Training on 40,000 images")
print(f"Epochs: 40 | Monitoring: val_loss")
print("="*60 + "\n")

# Callbacks - SIMPLE AND PROVEN
callbacks_phase2 = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',  # Monitor LOSS, not AUC
        patience=10,
        restore_best_weights=True,
        mode='min',
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',  # Monitor LOSS, not AUC
        factor=0.3,
        patience=4,
        min_lr=1e-8,
        mode='min',
        verbose=1
    ),
    tf.keras.callbacks.ModelCheckpoint(
        'best_recall_phase2.h5',
        monitor='val_recall',
        mode='max',
        save_best_only=True,
        verbose=1
    )
]

history2 = model.fit(
    train_ds,
    epochs=40,  # Increased for 40k training images
    epochs=35,
    class_weight=class_weight,
    callbacks=callbacks_phase2,
    verbose=1  # Clean epoch-by-epoch output
)

print(f"\n{'='*60}")
print(f"PHASE 2 COMPLETE")
print(f"{'='*60}")
print(f"Final Accuracy: {max(history2.history['val_accuracy']):.2%}")
print(f"Final Recall:   {max(history2.history['val_recall']):.2%}")
print(f"Final AUC:      {max(history2.history['val_auc']):.4f}")
print(f"{'='*60}\n")

model.save('deepfake_detector_v4_40k.h5')
print("‚úÖ Model saved: deepfake_detector_v4_40k.h5")

print("üìä Trained on 40,000 images (20k real + 20k fake)")print("‚úÖ Model saved: deepfake_detector_v4_large_dataset.h5")

## Training Visualization

In [None]:
# Plot training history
fig, axes = plt.subplots(2, 3, figsize=(18, 10))

metrics = ['loss', 'accuracy', 'precision', 'recall', 'auc']
titles = ['Loss', 'Accuracy', 'Precision', 'Recall (Fake Detection)', 'AUC']

for idx, (metric, title) in enumerate(zip(metrics, titles)):
    ax = axes[idx // 3, idx % 3]
    
    # Phase 1
    phase1_train = history1.history[metric]
    phase1_val = history1.history[f'val_{metric}']
    epochs1 = range(1, len(phase1_train) + 1)
    
    # Phase 2
    phase2_train = history2.history[metric]
    phase2_val = history2.history[f'val_{metric}']
    epochs2 = range(len(phase1_train) + 1, len(phase1_train) + len(phase2_train) + 1)
    
    # Plot
    ax.plot(epochs1, phase1_train, 'b-', label='Phase 1 Train', linewidth=2)
    ax.plot(epochs1, phase1_val, 'b--', label='Phase 1 Val', linewidth=2)
    ax.plot(epochs2, phase2_train, 'r-', label='Phase 2 Train', linewidth=2)
    ax.plot(epochs2, phase2_val, 'r--', label='Phase 2 Val', linewidth=2)
    
    ax.set_title(title, fontsize=14, fontweight='bold')
    ax.set_xlabel('Epoch', fontsize=11)
    ax.set_ylabel(title, fontsize=11)
    ax.legend(fontsize=9)
    ax.grid(alpha=0.3)

fig.delaxes(axes[1, 2])
plt.tight_layout()
plt.savefig('training_history_v4_large.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n" + "="*70)
print("VALIDATION METRICS - 40K TRAINING")
print("="*70)
print(f"Accuracy:  {history2.history['val_accuracy'][-1]:.2%} (Target: >93%)")
print(f"Precision: {history2.history['val_precision'][-1]:.2%}")
print(f"Recall:    {history2.history['val_recall'][-1]:.2%} (Target: >88%)")
print(f"AUC:       {history2.history['val_auc'][-1]:.4f}")
print("="*70)

print("\nüí° Previous 20k train: 92% accuracy, ~85-87% recall")print("   Goal with 40k train: >93% accuracy, >88% recall")

## Test Set Evaluation

In [None]:
print("="*70)
print("TEST SET EVALUATION")
print("="*70)

test_results = model.evaluate(test_ds, verbose=1)

print("\n" + "="*70)
print("MODEL V4 TEST RESULTS")
print("="*70)
print(f"Test Loss:      {test_results[0]:.4f}")
print(f"Test Accuracy:  {test_results[1]:.4f} ({test_results[1]*100:.2f}%)")
print(f"Test Precision: {test_results[2]:.4f}")
print(f"Test Recall:    {test_results[3]:.4f}")
print(f"Test AUC:       {test_results[4]:.4f}")
print("="*70)

# Get predictions for detailed analysis
print("\nGenerating predictions for detailed analysis...")
y_true = []
y_pred_probs = []

for images, labels in test_ds:
    predictions = model.predict(images, verbose=0)
    y_pred_probs.extend(predictions.flatten())
    y_true.extend(labels.numpy())

y_true = np.array(y_true, dtype=np.float32)
y_pred_probs = np.array(y_pred_probs, dtype=np.float32)
y_pred = (y_pred_probs >= 0.5).astype(int)

# Calculate false negatives
false_negatives = np.sum((y_true == 1) & (y_pred == 0))
total_fakes = np.sum(y_true == 1)
fn_rate = false_negatives / total_fakes

# Calculate false positives  
false_positives = np.sum((y_true == 0) & (y_pred == 1))
total_reals = np.sum(y_true == 0)
fp_rate = false_positives / total_reals

print("\n" + "="*70)
print("COMPARISON - PROGRESSIVE IMPROVEMENT")
print("="*70)
print(f"{'Metric':<25} | {'Model 3':<12} | {'V4 (12k)':<12} | {'V4 (20k)':<12} | {'V4 (40k)':<12} | {'Change'}")
print("-"*70)
print(f"{'Overall Accuracy':<25} | {'87.25%':<12} | {'89.15%':<12} | {'~92.00%':<12} | {f'{test_results[1]*100:.2f}%':<12} | {(test_results[1]-0.92)*100:+.2f}%")
print(f"{'Fake Detection (Recall)':<25} | {'77.50%':<12} | {'81.10%':<12} | {'~87.00%':<12} | {f'{test_results[3]*100:.2f}%':<12} | {(test_results[3]-0.87)*100:+.2f}%")
print(f"{'False Negatives':<25} | {'225':<12} | {'189':<12} | {'~130':<12} | {f'{false_negatives}':<12} | {false_negatives-130:+.0f}")
print(f"{'False Negative Rate':<25} | {'22.50%':<12} | {'18.90%':<12} | {'~13.00%':<12} | {f'{fn_rate*100:.2f}%':<12} | {(fn_rate-0.13)*100:+.2f}%")
print(f"{'Real Detection':<25} | {'97.00%':<12} | {'97.20%':<12} | {'~97.50%':<12} | {f'{(1-fp_rate)*100:.2f}%':<12} | {((1-fp_rate)-0.975)*100:+.2f}%")
print(f"{'Precision':<25} | {'96.27%':<12} | {'96.87%':<12} | {'~97.00%':<12} | {f'{test_results[2]*100:.2f}%':<12} | {(test_results[2]-0.97)*100:+.2f}%")
print(f"{'AUC':<25} | {'95.93%':<12} | {'96.34%':<12} | {'~96.80%':<12} | {f'{test_results[4]*100:.2f}%':<12} | {(test_results[4]-0.968)*100:+.2f}%")
print("="*70)

# Success criteria for 40k training
target_recall = 0.88  # 88% fake detection = 12% FN rate
target_accuracy = 0.93  # 93% overall accuracy
target_fn = 120  # Out of 1000 fakes (improved from ~130 with 20k)

if test_results[3] >= target_recall and test_results[1] >= target_accuracy:
    print("\nüéØ EXCELLENT: Target achieved with 40k training!")
    print(f"   Accuracy: {test_results[1]*100:.2f}% (Target: ‚â•93%)")
    print(f"   Fake detection: {test_results[3]*100:.2f}% (Target: ‚â•88%)")
    print(f"   False negatives: {false_negatives} (Target: <{target_fn})")
    print(f"   Improvement from 20k: {int((0.87-test_results[3])*-1000)} fewer missed fakes")
elif test_results[3] >= 0.85 or test_results[1] >= 0.92:  # Close to target
    print("\n‚úÖ STRONG IMPROVEMENT: Close to target!")
    print(f"   Accuracy: {test_results[1]*100:.2f}% (Target: 93%)")
    print(f"   Fake detection: {test_results[3]*100:.2f}% (Target: 88%)")
    print(f"   False negatives: {false_negatives}")
    print("\nüí° Fine-tune: Try threshold=0.45-0.48 to optimize balance")
else:
    print("\n‚ö†Ô∏è  Below expected improvement")
print(f"   1. Update predict_gui.py to use: deepfake_detector_v4_40k.h5")
print(f"   2. If recall <88%: Try threshold=0.45-0.48 for better fake detection")
print(f"   3. Test best_recall_phase2.h5 if you need maximum fake detection")

print(f"   4. Deploy model - 40k training should give production-ready results")
    print(f"   3. Evaluate best_recall_phase2.h5 if recall is highest priority")

print("\nüí° Next steps:")print(f"   2. Test threshold adjustment (0.45-0.48) for optimal balance")
print(f"   1. Update predict_gui.py to use: deepfake_detector_v4_large_dataset.h5")