<a href="https://colab.research.google.com/github/zaidlameer/DeetectorPrototype/blob/main/transformerPrototypeWorking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"zaidthemler","key":"3b85272ca9bb36b4344d6af5456e904d"}'}

In [None]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d manjilkarki/deepfake-and-real-images

Dataset URL: https://www.kaggle.com/datasets/manjilkarki/deepfake-and-real-images
License(s): unknown
Downloading deepfake-and-real-images.zip to /content
100% 1.68G/1.68G [00:45<00:00, 42.5MB/s]
100% 1.68G/1.68G [00:46<00:00, 39.2MB/s]


In [None]:
import zipfile
import os

with zipfile.ZipFile("/content/deepfake-and-real-images.zip", 'r') as zip_ref:
    zip_ref.extractall("dataset-folder")


In [None]:
import os
import random
import shutil

def downsize_dataset(input_dir, output_dir, num_images_per_folder):
    # Ensure output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate through each folder in the dataset
    for subdir, _, _ in os.walk(input_dir):
        relative_path = os.path.relpath(subdir, input_dir)
        target_dir = os.path.join(output_dir, relative_path)

        # Create corresponding output directory
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)

        # Get all files in the current directory
        files = [f for f in os.listdir(subdir) if os.path.isfile(os.path.join(subdir, f))]

        # Randomly sample the desired number of images
        sampled_files = random.sample(files, min(num_images_per_folder, len(files)))

        # Copy the sampled files to the output directory
        for file in sampled_files:
            shutil.copy(os.path.join(subdir, file), os.path.join(target_dir, file))

# Specify parameters
input_directory = "/content/dataset-folder/Dataset"  # Change to your dataset path
output_directory = "/content/dataset-folder/DownsizedDataset"  # Change to desired output directory
images_per_folder = 1000  # Change to the desired number of images per folder

# Downsize dataset
downsize_dataset(input_directory, output_directory, images_per_folder)


In [None]:
import os
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image

# Custom Dataset Class
class DeepfakeDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data = []

        # Collect all image paths and labels
        for label, folder in enumerate(['Real', 'Fake']):
            folder_path = os.path.join(root_dir, folder)
            for file in os.listdir(folder_path):
                if file.endswith(('png', 'jpg', 'jpeg')):
                    self.data.append((os.path.join(folder_path, file), label))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),          # Resize to 224x224
    transforms.ToTensor(),                 # Convert to tensor
    transforms.Normalize([0.5]*3, [0.5]*3) # Normalize RGB channels (mean=0.5, std=0.5)
])

# Paths
train_dir = "/content/dataset-folder/DownsizedDataset/Train"
val_dir = "/content/dataset-folder/DownsizedDataset/Validation"
test_dir = "/content/dataset-folder/DownsizedDataset/Test"

# Datasets
train_dataset = DeepfakeDataset(train_dir, transform)
val_dataset = DeepfakeDataset(val_dir, transform)
test_dataset = DeepfakeDataset(test_dir, transform)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
import torch
import torch.nn as nn
from torchvision.models import vit_b_16

# Load Pretrained Vision Transformer
class DeepfakeModel(nn.Module):
    def __init__(self, num_classes=2):
        super(DeepfakeModel, self).__init__()
        self.vit = vit_b_16(pretrained=True)  # Load pretrained ViT
        # Modify for binary classification
        # Access the classifier (Linear layer) inside the heads module
        classifier = self.vit.heads[-1]
        # Replace the classifier with a new one for binary classification
        self.vit.heads[-1] = nn.Linear(classifier.in_features, num_classes)

    def forward(self, x):
        return self.vit(x)

# Instantiate Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DeepfakeModel(num_classes=2).to(device)

Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:02<00:00, 144MB/s]


In [None]:
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from tqdm import tqdm

# Optimizer and Loss Function
optimizer = Adam(model.parameters(), lr=1e-4)
criterion = CrossEntropyLoss()

# Training Function
def train(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(loader):
        images, labels = images.to(device), labels.to(device)

        # Forward Pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metrics
        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    accuracy = correct / total
    return running_loss / len(loader), accuracy

# Validation Function
def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)

            # Forward Pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Metrics
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    return running_loss / len(loader), accuracy


In [None]:
# Train the model
num_epochs = 5

for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = validate(model, val_loader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}")


100%|██████████| 63/63 [01:11<00:00,  1.14s/it]


Epoch 1/5
Train Loss: 0.4532, Train Accuracy: 0.7860
Val Loss: 0.3365, Val Accuracy: 0.8690


100%|██████████| 63/63 [01:13<00:00,  1.16s/it]


Epoch 2/5
Train Loss: 0.1881, Train Accuracy: 0.9260
Val Loss: 0.2080, Val Accuracy: 0.9140


100%|██████████| 63/63 [01:13<00:00,  1.16s/it]


Epoch 3/5
Train Loss: 0.0654, Train Accuracy: 0.9765
Val Loss: 0.2611, Val Accuracy: 0.8955


100%|██████████| 63/63 [01:13<00:00,  1.16s/it]


Epoch 4/5
Train Loss: 0.0520, Train Accuracy: 0.9800
Val Loss: 0.3145, Val Accuracy: 0.8950


100%|██████████| 63/63 [01:13<00:00,  1.16s/it]


Epoch 5/5
Train Loss: 0.0786, Train Accuracy: 0.9670
Val Loss: 0.3624, Val Accuracy: 0.8635


In [None]:
# Save only the model's state dictionary
torch.save(model.state_dict(), "/content/models/deepfake_model.pth")



In [None]:
from google.colab import files

# Download the saved model file
files.download("/content/models/deepfake_model.pth")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
!pip install vit_pytorch

Collecting vit_pytorch
  Downloading vit_pytorch-1.9.2-py3-none-any.whl.metadata (69 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/69.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.4/69.4 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Downloading vit_pytorch-1.9.2-py3-none-any.whl (138 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.6/138.6 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vit_pytorch
Successfully installed vit_pytorch-1.9.2


In [None]:
import torch
from vit_pytorch import ViT  # Ensure you have the correct package installed
from PIL import Image
from torchvision import transforms

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define the Vision Transformer (ViT) model
model = ViT(
    image_size=224,         # Image size used during training
    patch_size=16,          # Patch size used during training
    num_classes=2,          # Number of classes for classification
    dim=512,                # Embedding dimension
    depth=6,                # Number of transformer blocks
    heads=8,                # Number of attention heads
    mlp_dim=1024,           # Dimension of the feedforward layers
    dropout=0.1,            # Dropout rate
    emb_dropout=0.1         # Embedding dropout rate
)

# Load the saved model weights
state_dict = torch.load('/content/models/deepfake_model.pth', map_location=device)

# Fix key mismatch if necessary (strip "vit." prefix)
new_state_dict = {key.replace("vit.", ""): value for key, value in state_dict.items()}
model.load_state_dict(torch.load('/content/models/deepfake_model.pth', map_location=device), strict=False)
model.to(device)  # Move model to GPU/CPU
model.eval()  # Set the model to evaluation mode



Using device: cuda


  state_dict = torch.load('/content/models/deepfake_model.pth', map_location=device)
  model.load_state_dict(torch.load('/content/models/deepfake_model.pth', map_location=device), strict=False)


ViT(
  (to_patch_embedding): Sequential(
    (0): Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=16, p2=16)
    (1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (2): Linear(in_features=768, out_features=512, bias=True)
    (3): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
  )
  (dropout): Dropout(p=0.1, inplace=False)
  (transformer): Transformer(
    (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (layers): ModuleList(
      (0-5): 6 x ModuleList(
        (0): Attention(
          (norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (attend): Softmax(dim=-1)
          (dropout): Dropout(p=0.1, inplace=False)
          (to_qkv): Linear(in_features=512, out_features=1536, bias=False)
          (to_out): Sequential(
            (0): Linear(in_features=512, out_features=512, bias=True)
            (1): Dropout(p=0.1, inplace=False)
          )
        )
        (1): FeedForward(
          (net): Sequential(
            (0

In [None]:
# Preprocessing pipeline for the input image
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match model input size
    transforms.ToTensor(),          # Convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# Function to predict the class of an image
def predict_image(image_path):
    """
    Predicts the class of the given image using the loaded ViT model.
    """
    try:
        # Load and preprocess the image
        image = Image.open(image_path).convert('RGB')  # Ensure RGB format
        img_tensor = preprocess(image).unsqueeze(0)  # Add batch dimension
        img_tensor = img_tensor.to(device)  # Move tensor to GPU/CPU

        # Perform prediction
        with torch.no_grad():
            output = model(img_tensor)  # Forward pass
            _, predicted = torch.max(output, 1)  # Get the predicted class index

        # Define class names (adjust based on your dataset)
        class_names = ['Real', 'Fake']
        prediction = class_names[predicted.item()]
        return prediction

    except Exception as e:
        return f"Error during prediction: {str(e)}"

# Test the prediction function
test_image_path = '/content/dataset-folder/DownsizedDataset/Train/Fake/fake_10119.jpg'  # Replace with your test image path
prediction = predict_image(test_image_path)
print(f"Prediction: {prediction}")


Prediction: Real
