<a href="https://colab.research.google.com/github/zaidlameer/DeetectorPrototype/blob/main/preTrainedModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"zaidthemler","key":"3b85272ca9bb36b4344d6af5456e904d"}'}

In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!pip install timm



In [None]:
!pip install torch torchvision transformers pillow matplotlib opencv-python pandas



In [None]:
!pip install tqdm



In [None]:
!kaggle datasets download -d sanikatiwarekar/deep-fake-detection-dfd-entire-original-dataset

Dataset URL: https://www.kaggle.com/datasets/sanikatiwarekar/deep-fake-detection-dfd-entire-original-dataset
License(s): MIT
Downloading deep-fake-detection-dfd-entire-original-dataset.zip to /content
100% 22.5G/22.5G [17:02<00:00, 23.9MB/s]
100% 22.5G/22.5G [17:02<00:00, 23.6MB/s]


In [None]:
import zipfile
import os

with zipfile.ZipFile("/content/deep-fake-detection-dfd-entire-original-dataset.zip", 'r') as zip_ref:
    zip_ref.extractall("dataset-folder")

In [None]:
import os
import cv2
import torch
import random
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification


In [None]:
# Paths for original and manipulated videos
original_videos_dir = "/content/dataset-folder/DFD_original sequences"
manipulated_videos_dir = "/content/dataset-folder/DFD_manipulated_sequences/DFD_manipulated_sequences"

# Collect video paths and labels
original_videos = [os.path.join(original_videos_dir, filename) for filename in os.listdir(original_videos_dir)]
manipulated_videos = [os.path.join(manipulated_videos_dir, filename) for filename in os.listdir(manipulated_videos_dir)]

original_labels = [0] * len(original_videos)  # 0 for original videos
manipulated_labels = [1] * len(manipulated_videos)  # 1 for manipulated videos

all_videos = original_videos + manipulated_videos
labels = original_labels + manipulated_labels


In [None]:
class DeepfakeDataset(Dataset):
    def __init__(self, videos, labels, processor, frame_count=5, transform=None):
        self.videos = videos
        self.labels = labels
        self.processor = processor
        self.frame_count = frame_count
        self.transform = transform or transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.videos)

    def __getitem__(self, idx):
        video_path = self.videos[idx]
        label = self.labels[idx]


    # Extract frames from video
        cap = cv2.VideoCapture(video_path)
        frames = []
        for _ in range(self.frame_count):
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = Image.fromarray(frame)
            frames.append(self.transform(frame))
        cap.release()

        # Handle empty frames
        if len(frames) == 0:
            # Add default blank frames of size [3, 224, 224]
            blank_frame = torch.zeros(3, 224, 224)  # RGB with height and width
            frames = [blank_frame] * self.frame_count
    # Pad frames if less than required
        while len(frames) < self.frame_count:
            frames.append(torch.zeros_like(frames[0]))

        # Stack frames into a tensor and aggregate
        frames_tensor = torch.stack(frames)
        aggregated_frame = frames_tensor.mean(dim=0)

        # Ensure the pixel values are within [0, 255]
        aggregated_frame = aggregated_frame * 255  # Scale to [0, 255]
        aggregated_frame = aggregated_frame.clamp(0, 255).byte()  # Convert to uint8

        # Process the aggregated frame using the processor
        inputs = self.processor(images=aggregated_frame, return_tensors="pt", do_rescale=False)
        pixel_values = inputs['pixel_values'].squeeze(0)

        return pixel_values, torch.tensor(label)



# Initialize Dataset and DataLoader
processor = AutoImageProcessor.from_pretrained("Wvolf/ViT_Deepfake_Detection")
train_videos, val_videos, train_labels, val_labels = train_test_split(
    all_videos, labels, test_size=0.2, random_state=42, stratify=labels
)
train_dataset = DeepfakeDataset(train_videos, train_labels, processor)
val_dataset = DeepfakeDataset(val_videos, val_labels, processor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)


# Model Setup
model = AutoModelForImageClassification.from_pretrained("Wvolf/ViT_Deepfake_Detection")
model.config.num_labels = 2
model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Training Loop
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5, weight_decay=1e-4)
criterion = torch.nn.CrossEntropyLoss()
device = model.device


def evaluate(model, val_loader, criterion):
    model.eval()
    val_loss = 0.0
    val_accuracy = 0.0
    with torch.no_grad():
        for pixel_values, labels in val_loader:
            pixel_values, labels = pixel_values.to(device), labels.to(device)
            outputs = model(pixel_values=pixel_values)
            loss = criterion(outputs.logits, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.logits, 1)
            val_accuracy += (predicted == labels).sum().item() / labels.size(0)
    return val_loss / len(val_loader), val_accuracy / len(val_loader)

for epoch in range(20):
    model.train()
    total_loss = 0.0
    total_correct = 0
    total_samples = 0

    for pixel_values, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        pixel_values, labels = pixel_values.to(device), labels.to(device)
         # Forward pass
        outputs = model(pixel_values=pixel_values)
        loss = criterion(outputs.logits, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update total loss
        total_loss += loss.item()

        # Calculate batch accuracy
        _, predicted = torch.max(outputs.logits, 1)
        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    # Calculate average training loss and accuracy
    avg_train_loss = total_loss / len(train_loader)
    train_accuracy = total_correct / total_samples

    # Evaluate on validation data
    val_loss, val_accuracy = evaluate(model, val_loader, criterion)

    # Print training and validation metrics
    print(f"Epoch {epoch+1}: "
          f"Train Loss = {avg_train_loss:.4f}, Train Acc = {train_accuracy:.4f}, "
          f"Val Loss = {val_loss:.4f}, Val Acc = {val_accuracy:.4f}")

# Save the Model
model.save_pretrained("fine_tuned_deepfake_vit")
processor.save_pretrained("fine_tuned_deepfake_vit")

config.json:   0%|          | 0.00/719 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/343M [00:00<?, ?B/s]

Epoch 1: 100%|██████████| 43/43 [08:55<00:00, 12.44s/it]


Epoch 1: Train Loss = 0.3591, Train Acc = 0.8944, Val Loss = 0.3378, Val Acc = 0.8953


Epoch 2: 100%|██████████| 43/43 [08:28<00:00, 11.82s/it]


Epoch 2: Train Loss = 0.3398, Train Acc = 0.8944, Val Loss = 0.3372, Val Acc = 0.8953


Epoch 3: 100%|██████████| 43/43 [08:29<00:00, 11.85s/it]


Epoch 3: Train Loss = 0.3361, Train Acc = 0.8944, Val Loss = 0.3382, Val Acc = 0.8953


Epoch 4: 100%|██████████| 43/43 [08:41<00:00, 12.14s/it]


Epoch 4: Train Loss = 0.3344, Train Acc = 0.8944, Val Loss = 0.3387, Val Acc = 0.8953


Epoch 5: 100%|██████████| 43/43 [08:32<00:00, 11.91s/it]


Epoch 5: Train Loss = 0.3319, Train Acc = 0.8944, Val Loss = 0.3391, Val Acc = 0.8953


Epoch 6: 100%|██████████| 43/43 [08:30<00:00, 11.88s/it]


Epoch 6: Train Loss = 0.3317, Train Acc = 0.8944, Val Loss = 0.3407, Val Acc = 0.8953


Epoch 7: 100%|██████████| 43/43 [08:24<00:00, 11.74s/it]


Epoch 7: Train Loss = 0.3280, Train Acc = 0.8944, Val Loss = 0.3422, Val Acc = 0.8953


Epoch 8: 100%|██████████| 43/43 [08:26<00:00, 11.78s/it]


Epoch 8: Train Loss = 0.3242, Train Acc = 0.8944, Val Loss = 0.3411, Val Acc = 0.8953


Epoch 9: 100%|██████████| 43/43 [08:36<00:00, 12.01s/it]


Epoch 9: Train Loss = 0.3213, Train Acc = 0.8944, Val Loss = 0.3416, Val Acc = 0.8953


Epoch 10: 100%|██████████| 43/43 [08:32<00:00, 11.92s/it]


Epoch 10: Train Loss = 0.3143, Train Acc = 0.8944, Val Loss = 0.3427, Val Acc = 0.8953


Epoch 11: 100%|██████████| 43/43 [08:32<00:00, 11.93s/it]


Epoch 11: Train Loss = 0.3065, Train Acc = 0.8944, Val Loss = 0.3484, Val Acc = 0.8953


Epoch 12: 100%|██████████| 43/43 [08:33<00:00, 11.94s/it]


Epoch 12: Train Loss = 0.2979, Train Acc = 0.8944, Val Loss = 0.3518, Val Acc = 0.8953


Epoch 13:  42%|████▏     | 18/43 [03:34<04:48, 11.53s/it]