In [None]:
!pip install scikit-learn pandas datasets transformers torchvision evaluate accelerate

# Load Xception Model for Transfer Learning

In [1]:
!export TRANSFORMERS_VERBOSITY=error
import torch
from utils import xception, TransferModel, create_path_label_dataset, get_transforms, evaluate_model, DeepFakeDetector

In [2]:
old_model = xception()
transforms = get_transforms(old_model)
model = TransferModel(model=old_model,
                             num_out_classes=2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Utils for Creating Finetuning Dataset

In [3]:
transforms = get_transforms(old_model)
labels = ['Real', 'Fake']

def create_finetune_dataset(folder_true, folder_false):
    dataset = create_path_label_dataset(folder_true, folder_false)
    dataset = dataset.train_test_split(test_size=0.2)
    dataset = dataset.with_transform(transforms)
    return dataset

# Trainer API for Convenient Training

In [6]:
import evaluate
import numpy as np
from transformers import TrainingArguments, Trainer, DefaultDataCollator

accuracy = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

## Finetune Model Without DataAugmentation

In [9]:
# Prepare for Finetuning Dataset
folder_true = '/home/jovyan/shared/S25/nochange/true'
folder_false = '/home/jovyan/shared/S25/nochange/false'

dataset = create_finetune_dataset(folder_true, folder_false)

# Training Configurations
training_args = TrainingArguments(
    output_dir="deepfake-unchanged",
    remove_unused_columns=False,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=False,
)

# Trainer Setup
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=DefaultDataCollator(),
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    compute_metrics=compute_metrics,
)

# Run Training
trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Accuracy
0,0.1674,0.129588,0.973
1,0.1212,0.079329,0.987


TrainOutput(global_step=124, training_loss=0.2133740455873551, metrics={'train_runtime': 684.2005, 'train_samples_per_second': 11.692, 'train_steps_per_second': 0.181, 'total_flos': 0.0, 'train_loss': 0.2133740455873551, 'epoch': 1.98})

## Finetune Model With DataAugmentation

In [10]:
# Prepare for Finetuning Dataset
folder_true = '/home/jovyan/shared/S25/nochange/true'
folder_false = '/home/jovyan/shared/S25/nochange/false'

dataset = create_finetune_dataset(folder_true, folder_false)

# Training Configurations
training_args = TrainingArguments(
    output_dir="deepfake-changed",
    remove_unused_columns=False,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=False,
)

# Trainer Setup
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=DefaultDataCollator(),
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    compute_metrics=compute_metrics,
)

# Run Training
trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Accuracy
0,0.0353,0.049969,0.984
1,0.0621,0.030726,0.995


TrainOutput(global_step=124, training_loss=0.055367469787597656, metrics={'train_runtime': 649.4912, 'train_samples_per_second': 12.317, 'train_steps_per_second': 0.191, 'total_flos': 0.0, 'train_loss': 0.055367469787597656, 'epoch': 1.98})

# Evaluation 

In [4]:
import evaluate
import numpy as np

accuracy = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)
from utils import xception_from_tensors

## Model With Data Augmentation

####  Original Finetuning Dataset

In [5]:
model = xception_from_tensors('checkpoints/changed-model.safetensors')

base_path = '/mnt/c/Users/28379/OneDrive/Desktop/AI/proj/Datasets'
folder_true = base_path + '/Training2500/With_Augmentation/TrainingReal2500'
folder_false = base_path + '/Training2500/With_Augmentation/TrainingFake2500'

test_ds = create_path_label_dataset(folder_true, folder_false, n=200).with_transform(transforms)    
evaluate_model(model, test_ds, compute_metrics)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


{'eval_loss': 0.0817825123667717,
 'eval_accuracy': 0.98,
 'eval_runtime': 90.4352,
 'eval_samples_per_second': 2.212,
 'eval_steps_per_second': 0.276}

#### Celeb-DF Dataset

In [7]:
model = xception_from_tensors('checkpoints/changed-model.safetensors')

base_path = '/mnt/c/Users/28379/OneDrive/Desktop/AI/proj/Datasets'
folder_true = base_path + '/ValidationCeleb/Real1000'
folder_false = base_path + '/ValidationCeleb/Fake1000'

test_ds = create_path_label_dataset(folder_true, folder_false, n=200).with_transform(transforms)    
evaluate_model(model, test_ds, compute_metrics)

{'eval_loss': 0.9286133050918579, 'eval_accuracy': 0.45, 'eval_runtime': 73.4797, 'eval_samples_per_second': 2.722, 'eval_steps_per_second': 0.34}


{'eval_loss': 0.9286133050918579,
 'eval_accuracy': 0.45,
 'eval_runtime': 73.4797,
 'eval_samples_per_second': 2.722,
 'eval_steps_per_second': 0.34}

#### DeepFakeMNIST+ Dataset

In [6]:
model = xception_from_tensors('checkpoints/changed-model.safetensors')

base_path = '/mnt/c/Users/28379/OneDrive/Desktop/AI/proj/Datasets'
folder_true = base_path + '/ValidationMNIST+/Real1000'
folder_false = base_path + '/ValidationMNIST+/Fake1000'

test_ds = create_path_label_dataset(folder_true, folder_false, n=200).with_transform(transforms)    
evaluate_model(model, test_ds, compute_metrics)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


{'eval_loss': 0.7522754073143005,
 'eval_accuracy': 0.515,
 'eval_runtime': 59.6124,
 'eval_samples_per_second': 3.355,
 'eval_steps_per_second': 0.419}

## Model Without Data Augementation

####  Original Finetuning Dataset

In [6]:
model = xception_from_tensors('checkpoints/unchanged-model.safetensors')

base_path = '/mnt/c/Users/28379/OneDrive/Desktop/AI/proj/Datasets'
folder_true = base_path + '/Training2500/Without_Augmentation/TrainingReal2500'
folder_false = base_path + '/Training2500/Without_Augmentation/TrainingFake2500'

test_ds = create_path_label_dataset(folder_true, folder_false, n=200).with_transform(transforms)    
evaluate_model(model, test_ds, compute_metrics)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


{'eval_loss': 0.026446111500263214,
 'eval_accuracy': 0.995,
 'eval_runtime': 136.366,
 'eval_samples_per_second': 1.467,
 'eval_steps_per_second': 0.183}

#### Celeb-DF Dataset

In [7]:
model = xception_from_tensors('checkpoints/unchanged-model.safetensors')

base_path = '/mnt/c/Users/28379/OneDrive/Desktop/AI/proj/Datasets'
folder_true = base_path + '/ValidationCeleb/Real1000'
folder_false = base_path + '/ValidationCeleb/Fake1000'

test_ds = create_path_label_dataset(folder_true, folder_false, n=200).with_transform(transforms)    
evaluate_model(model, test_ds, compute_metrics)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


{'eval_loss': 1.980319857597351,
 'eval_accuracy': 0.56,
 'eval_runtime': 83.7656,
 'eval_samples_per_second': 2.388,
 'eval_steps_per_second': 0.298}

#### DeepFakeMNIST+ Dataset

In [12]:
model = xception_from_tensors('checkpoints/unchanged-model.safetensors')

base_path = '/mnt/c/Users/28379/OneDrive/Desktop/AI/proj/Datasets'
folder_true = base_path + '/ValidationMNIST+/Real1000'
folder_false = base_path + '/ValidationMNIST+/Fake1000'

test_ds = create_path_label_dataset(folder_true, folder_false, n=200).with_transform(transforms)    
evaluate_model(model, test_ds, compute_metrics)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


{'eval_loss': 1.5682590007781982,
 'eval_accuracy': 0.565,
 'eval_runtime': 109.9382,
 'eval_samples_per_second': 1.819,
 'eval_steps_per_second': 0.227}