<a href="https://colab.research.google.com/github/zaidlameer/DeetectorPrototype/blob/main/transformerPrototypeWorking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle (1).json


{'kaggle (1).json': b'{"username":"zaidthemler","key":"3b85272ca9bb36b4344d6af5456e904d"}'}

In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d manjilkarki/deepfake-and-real-images

Dataset URL: https://www.kaggle.com/datasets/manjilkarki/deepfake-and-real-images
License(s): unknown
Downloading deepfake-and-real-images.zip to /content
100% 1.68G/1.68G [01:18<00:00, 23.9MB/s]
100% 1.68G/1.68G [01:18<00:00, 23.1MB/s]


In [None]:
import zipfile
import os

with zipfile.ZipFile("/content/deepfake-and-real-images.zip", 'r') as zip_ref:
    zip_ref.extractall("dataset-folder")


In [None]:
import os
import random
import shutil

def downsize_dataset(input_dir, output_dir, num_images_per_folder):
    # Ensure output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate through each folder in the dataset
    for subdir, _, _ in os.walk(input_dir):
        relative_path = os.path.relpath(subdir, input_dir)
        target_dir = os.path.join(output_dir, relative_path)

        # Create corresponding output directory
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)

        # Get all files in the current directory
        files = [f for f in os.listdir(subdir) if os.path.isfile(os.path.join(subdir, f))]

        # Randomly sample the desired number of images
        sampled_files = random.sample(files, min(num_images_per_folder, len(files)))

        # Copy the sampled files to the output directory
        for file in sampled_files:
            shutil.copy(os.path.join(subdir, file), os.path.join(target_dir, file))

# Specify parameters
input_directory = "/content/dataset-folder/Dataset"  # Change to your dataset path
output_directory = "/content/dataset-folder/DownsizedDataset"  # Change to desired output directory
images_per_folder = 1000  # Change to the desired number of images per folder

# Downsize dataset
downsize_dataset(input_directory, output_directory, images_per_folder)


In [None]:
import os
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image

# Custom Dataset Class
class DeepfakeDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data = []

        # Collect all image paths and labels
        for label, folder in enumerate(['Real', 'Fake']):
            folder_path = os.path.join(root_dir, folder)
            for file in os.listdir(folder_path):
                if file.endswith(('png', 'jpg', 'jpeg')):
                    self.data.append((os.path.join(folder_path, file), label))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),          # Resize to 224x224
    transforms.ToTensor(),                 # Convert to tensor
    transforms.Normalize([0.5]*3, [0.5]*3) # Normalize RGB channels (mean=0.5, std=0.5)
])

# Paths
train_dir = "/content/dataset-folder/DownsizedDataset/Train"
val_dir = "/content/dataset-folder/DownsizedDataset/Validation"
test_dir = "/content/dataset-folder/DownsizedDataset/Test"

# Datasets
train_dataset = DeepfakeDataset(train_dir, transform)
val_dataset = DeepfakeDataset(val_dir, transform)
test_dataset = DeepfakeDataset(test_dir, transform)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
import torch
import torch.nn as nn
from torchvision.models import vit_b_16

# Load Pretrained Vision Transformer
class DeepfakeModel(nn.Module):
    def __init__(self, num_classes=2):
        super(DeepfakeModel, self).__init__()
        self.vit = vit_b_16(pretrained=True)  # Load pretrained ViT
        # Modify for binary classification
        # Access the classifier (Linear layer) inside the heads module
        classifier = self.vit.heads[-1]
        # Replace the classifier with a new one for binary classification
        self.vit.heads[-1] = nn.Linear(classifier.in_features, num_classes)

    def forward(self, x):
        return self.vit(x)

# Instantiate Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DeepfakeModel(num_classes=2).to(device)

Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:02<00:00, 167MB/s]


In [None]:
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from tqdm import tqdm

# Optimizer and Loss Function
optimizer = Adam(model.parameters(), lr=1e-4)
criterion = CrossEntropyLoss()

# Training Function
def train(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(loader):
        images, labels = images.to(device), labels.to(device)

        # Forward Pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metrics
        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    accuracy = correct / total
    return running_loss / len(loader), accuracy

# Validation Function
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)

            # Forward Pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Metrics
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    return running_loss / len(loader), accuracy


In [None]:
# Train the model
num_epochs = 10

for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}")


100%|██████████| 63/63 [01:11<00:00,  1.14s/it]


Epoch 1/10
Train Loss: 0.4190, Train Accuracy: 0.7995
Val Loss: 0.2786, Val Accuracy: 0.8805


100%|██████████| 63/63 [01:13<00:00,  1.17s/it]


Epoch 2/10
Train Loss: 0.1491, Train Accuracy: 0.9385
Val Loss: 0.2789, Val Accuracy: 0.8910


100%|██████████| 63/63 [01:13<00:00,  1.17s/it]


Epoch 3/10
Train Loss: 0.0933, Train Accuracy: 0.9625
Val Loss: 0.6481, Val Accuracy: 0.8290


100%|██████████| 63/63 [01:13<00:00,  1.17s/it]


Epoch 4/10
Train Loss: 0.0649, Train Accuracy: 0.9760
Val Loss: 0.3967, Val Accuracy: 0.8855


100%|██████████| 63/63 [01:13<00:00,  1.17s/it]


Epoch 5/10
Train Loss: 0.0785, Train Accuracy: 0.9735
Val Loss: 0.3157, Val Accuracy: 0.8925


100%|██████████| 63/63 [01:13<00:00,  1.17s/it]


Epoch 6/10
Train Loss: 0.0296, Train Accuracy: 0.9890
Val Loss: 0.3563, Val Accuracy: 0.9015


100%|██████████| 63/63 [01:13<00:00,  1.17s/it]


Epoch 7/10
Train Loss: 0.0485, Train Accuracy: 0.9830
Val Loss: 0.3833, Val Accuracy: 0.8720


100%|██████████| 63/63 [01:13<00:00,  1.17s/it]


Epoch 8/10
Train Loss: 0.0319, Train Accuracy: 0.9870
Val Loss: 0.3528, Val Accuracy: 0.9060


100%|██████████| 63/63 [01:13<00:00,  1.17s/it]


Epoch 9/10
Train Loss: 0.0103, Train Accuracy: 0.9960
Val Loss: 0.4542, Val Accuracy: 0.8900


100%|██████████| 63/63 [01:13<00:00,  1.17s/it]


Epoch 10/10
Train Loss: 0.0520, Train Accuracy: 0.9790
Val Loss: 0.3389, Val Accuracy: 0.8615


In [None]:
torch.save(model.state_dict(), "deepfake_model.pth")
