In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
from sklearn.metrics import classification_report

# --- Configuration ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 64
EPOCHS = 3
IMG_SIZE = 128
NUM_CLASSES = 2
EPSILON = 0.03
PGD_STEPS = 7

In [3]:
# --- Adversarial Attack Functions ---
def fgsm_attack(model, images, labels, epsilon=0.05):
    images.requires_grad = True
    outputs = model(images)
    loss = F.cross_entropy(outputs, labels)
    model.zero_grad()
    loss.backward()
    perturbed = images + epsilon * images.grad.sign()
    return torch.clamp(perturbed, 0, 1).detach()

def pgd_attack(model, images, labels, epsilon=EPSILON, alpha=0.01, iters=PGD_STEPS):
    perturbed = images.clone().detach()
    for _ in range(iters):
        perturbed.requires_grad = True
        outputs = model(perturbed)
        loss = F.cross_entropy(outputs, labels)
        model.zero_grad()
        loss.backward()
        with torch.no_grad():
            perturbed += alpha * perturbed.grad.sign()
            perturbed = torch.max(torch.min(perturbed, images + epsilon), images - epsilon)
            perturbed = torch.clamp(perturbed, 0, 1)
    return perturbed.detach()

def one_pixel_attack(images, pixel_count=1):
    perturbed = images.clone()
    batch_size, _, h, w = images.shape
    for i in range(batch_size):
        for _ in range(pixel_count):
            x, y = np.random.randint(0, h), np.random.randint(0, w)
            perturbed[i, :, x, y] = torch.rand(3)
    return perturbed

In [4]:
!pip install timm



In [5]:
import torch.nn as nn
from timm import create_model  # requires timm: pip install timm

class RobustHybridModel(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        self.model = create_model(
            'deit_small_patch16_224',   # Vision Transformer Small
            pretrained=True,
            num_classes=num_classes,
            img_size=128                # Adjust for 128x128 input images
        )

    def forward(self, x):
        return self.model(x)

# Without Defense

In [6]:
def normal_train(model, loader, optimizer, criterion):
    model.train()
    running_loss, correct = 0, 0
    for x, y in tqdm(loader, desc="Normal Training"):
        x, y = x.to(device), y.to(device)
        
        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        correct += (outputs.argmax(1) == y).sum().item()
    
    acc = correct / len(loader.dataset)
    return running_loss / len(loader), acc

In [8]:
# --- Evaluation with Attacks ---
def adversarial_test(model, loader):
    model.eval()
    results = {'clean': {'correct': 0, 'total': 0},
               'fgsm': {'correct': 0, 'total': 0},
               'pgd': {'correct': 0, 'total': 0},
               'pixel': {'correct': 0, 'total': 0}}
    
    for x, y in tqdm(loader, desc="Testing"):
        x, y = x.to(device), y.to(device)
        
        # Clean samples
        with torch.no_grad():
            out_clean = model(x)
        results['clean']['correct'] += (out_clean.argmax(1) == y).sum().item()
        results['clean']['total'] += y.size(0)
        
        # Generate attacks
        with torch.enable_grad():
            x_fgsm = fgsm_attack(model, x, y)
            x_pgd = pgd_attack(model, x, y)
        x_pixel = one_pixel_attack(x)
        
        # Test attacks
        with torch.no_grad():
            for name, data in [('fgsm', x_fgsm), ('pgd', x_pgd), ('pixel', x_pixel)]:
                out = model(data)
                results[name]['correct'] += (out.argmax(1) == y).sum().item()
                results[name]['total'] += y.size(0)
    
    # Calculate accuracies
    metrics = {}
    for key in results:
        metrics[key] = results[key]['correct'] / results[key]['total']
    return metrics

In [9]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/cifake-real-and-ai-generated-synthetic-images


In [10]:
transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor()
])

train_dataset = datasets.ImageFolder('/kaggle/input/cifake-real-and-ai-generated-synthetic-images/train', transform=transform)
test_dataset = datasets.ImageFolder('/kaggle/input/cifake-real-and-ai-generated-synthetic-images/test', transform=transform)

# Remap targets: FAKE=1, REAL=0
for dataset in [train_dataset, test_dataset]:
    dataset.targets = [1 if x == dataset.class_to_idx['FAKE'] else 0 for x in dataset.targets]

In [12]:
train_loader = DataLoader(train_dataset, BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, BATCH_SIZE, shuffle=False)

In [13]:
# --- Initialize Models ---
normal_model = RobustHybridModel().to(device)

optimizer_normal = torch.optim.AdamW(normal_model.parameters(), lr=1e-4, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()

# --- Training Loop ---
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    
    # Normal Training (Clean Data)
    normal_loss, normal_acc = normal_train(normal_model, train_loader, optimizer_normal, criterion)
    print(f"Normal Train Acc: {normal_acc:.4f}")
    
    
    # Periodic Evaluation
    if (epoch+1) % 2 == 0:
        print("\nEvaluating Normal Model:")
        normal_metrics = adversarial_test(normal_model, test_loader)
        for attack, acc in normal_metrics.items():
            print(f"Normal {attack.upper():<6} Acc: {acc:.2%}")
        
       


Epoch 1/3


Normal Training: 100%|██████████| 1563/1563 [10:31<00:00,  2.47it/s]


Normal Train Acc: 0.9563

Epoch 2/3


Normal Training: 100%|██████████| 1563/1563 [05:19<00:00,  4.88it/s]


Normal Train Acc: 0.9811

Evaluating Normal Model:


Testing: 100%|██████████| 313/313 [07:07<00:00,  1.37s/it]


Normal CLEAN  Acc: 98.06%
Normal FGSM   Acc: 7.47%
Normal PGD    Acc: 0.01%
Normal PIXEL  Acc: 98.06%

Epoch 3/3


Normal Training: 100%|██████████| 1563/1563 [05:37<00:00,  4.64it/s]

Normal Train Acc: 0.9872





In [14]:
# --- Final Test ---
print("\n=== Final Results ===")

# Normal Model Performance
print("\n[ Normal Model ]")
normal_metrics = adversarial_test(normal_model, test_loader)
for attack, acc in normal_metrics.items():
    print(f"{attack.upper():<6} Accuracy: {acc:.2%}")

# Save Models
torch.save(normal_model.state_dict(), "ViT_cifake_baseline_nodefense.pth")



=== Final Results ===

[ Normal Model ]


Testing: 100%|██████████| 313/313 [05:59<00:00,  1.15s/it]

CLEAN  Accuracy: 97.98%
FGSM   Accuracy: 11.22%
PGD    Accuracy: 0.01%
PIXEL  Accuracy: 97.97%



