In [1]:
!pip install scikit-learn



In [2]:
import warnings 
import sklearn.exceptions 

import glob 

from PIL import Image 

import torch
import timm
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR

from sklearn.metrics import classification_report

warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
torch.cuda.is_available()

True

In [5]:
!pip install torchattacks 
import torchattacks
import cv2
import numpy as np
import os
import torch
import torchvision.transforms as transforms
from shutil import copyfile
from torchvision import datasets, models
from torch.utils.data import DataLoader

# Define paths
INPUT_PATH = "/kaggle/input/cifake-real-and-ai-generated-synthetic-images/train"  # Set the correct path
OUTPUT_PATH = "/kaggle/working/train"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load CIFAKE Dataset
transform = transforms.Compose([transforms.ToTensor()])
dataset = datasets.ImageFolder(root=INPUT_PATH, transform=transform)

# Select 4000 images
subset_indices = np.random.choice(len(dataset), 4000, replace=False)
subset = torch.utils.data.Subset(dataset, subset_indices)

# Create DataLoader
dataloader = DataLoader(subset, batch_size=1, shuffle=False)

# Debugging: Ensure cuDNN errors are visible
torch.backends.cudnn.enabled = False

# ✅ Load a Pretrained Model for Binary Classification
model = models.resnet18(pretrained=True)
model.fc = torch.nn.Linear(512, 2)  # Adjust output for 2 classes (real/fake)
model = model.to(device)
model.eval()  # Set model to evaluation mode

# Initialize PGD Attack
attack_pgd = torchattacks.PGD(model, eps=8/255, alpha=2/255, steps=10)

# Ensure output folders exist
os.makedirs(os.path.join(OUTPUT_PATH, "real"), exist_ok=True)
os.makedirs(os.path.join(OUTPUT_PATH, "fake"), exist_ok=True)

# Process images
for i, (img_tensor, label) in enumerate(dataloader):
    img_tensor = img_tensor.to(device)
    label = label.to(device).long().squeeze()  # Ensure labels are long & scalar

    # Ensure labels are valid
    if label.item() not in [0, 1]:
        print(f"⚠️ Invalid label at index {i}: {label.item()} (Skipping)")
        continue

    category = "real" if label.item() == 0 else "fake"
    filename = f"image_{i}.jpg"

    if i < 2000:
        # Apply PGD Attack
        try:
            pgd_perturbed = attack_pgd(img_tensor, label.unsqueeze(0).to(device))  # Fix label shape
            pgd_path = os.path.join(OUTPUT_PATH, category, f"image_{i}_PGD.jpg")
            
            # Ensure image is converted properly
            pgd_perturbed = torch.clamp(pgd_perturbed.cpu().squeeze(), 0, 1)  # Clamp to valid range
            transforms.ToPILImage()(pgd_perturbed).convert("RGB").save(pgd_path)

            print(f"Processed {i+1}/4000: {filename} → {category} (Perturbed & Saved)")
        except RuntimeError as e:
            print(f"⚠️ PGD Attack Failed for {filename}: {e}")
    else:
        # Save original image without attack
        original_path = os.path.join(OUTPUT_PATH, category, filename)
        img_tensor = torch.clamp(img_tensor.cpu().squeeze(), 0, 1)  # Clamp to valid range
        transforms.ToPILImage()(img_tensor).convert("RGB").save(original_path)

        print(f"Processed {i+1}/4000: {filename} → {category} (Copied without attack)")


Collecting torchattacks
  Downloading torchattacks-3.5.1-py3-none-any.whl.metadata (927 bytes)
Collecting requests~=2.25.1 (from torchattacks)
  Downloading requests-2.25.1-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting chardet<5,>=3.0.2 (from requests~=2.25.1->torchattacks)
  Downloading chardet-4.0.0-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting idna<3,>=2.5 (from requests~=2.25.1->torchattacks)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting urllib3<1.27,>=1.21.1 (from requests~=2.25.1->torchattacks)
  Downloading urllib3-1.26.20-py2.py3-none-any.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.1/50.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Downloading torchattacks-3.5.1-py3-none-any.whl (142 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m142.0/142.0 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading requests-2.25.1-py2.py3-none-any.whl (61 kB)
[2K   [90m━━━━

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 200MB/s]


Processed 1/4000: image_0.jpg → real (Perturbed & Saved)
Processed 2/4000: image_1.jpg → fake (Perturbed & Saved)
Processed 3/4000: image_2.jpg → fake (Perturbed & Saved)
Processed 4/4000: image_3.jpg → fake (Perturbed & Saved)
Processed 5/4000: image_4.jpg → real (Perturbed & Saved)
Processed 6/4000: image_5.jpg → fake (Perturbed & Saved)
Processed 7/4000: image_6.jpg → fake (Perturbed & Saved)
Processed 8/4000: image_7.jpg → fake (Perturbed & Saved)
Processed 9/4000: image_8.jpg → fake (Perturbed & Saved)
Processed 10/4000: image_9.jpg → real (Perturbed & Saved)
Processed 11/4000: image_10.jpg → fake (Perturbed & Saved)
Processed 12/4000: image_11.jpg → fake (Perturbed & Saved)
Processed 13/4000: image_12.jpg → fake (Perturbed & Saved)
Processed 14/4000: image_13.jpg → fake (Perturbed & Saved)
Processed 15/4000: image_14.jpg → fake (Perturbed & Saved)
Processed 16/4000: image_15.jpg → fake (Perturbed & Saved)
Processed 17/4000: image_16.jpg → real (Perturbed & Saved)
Processed 18/400

In [6]:
data_config = timm.data.resolve_model_data_config(model)
train_transforms = timm.data.create_transform(**data_config, is_training=True)
test_transforms = timm.data.create_transform(**data_config, is_training=False)


In [7]:
import timm
import torch.nn as nn
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Load TinyViT model
model = timm.create_model("tiny_vit_21m_224.dist_in22k", pretrained=True)

# 🔍 Check model architecture
print("Original Model Structure:\n", model)

# 🔹 Identify correct classifier layer and modify it
if hasattr(model, "head"):
    if isinstance(model.head, nn.Linear):
        model.head = nn.Linear(model.head.in_features, 1)  # Direct Linear layer case
    elif hasattr(model.head, "fc"):
        model.head.fc = nn.Linear(model.head.fc.in_features, 1)  # If head has fc layer
    elif isinstance(model.head, nn.Sequential):
        model.head[-1] = nn.Linear(model.head[-1].in_features, 1)  # If Sequential
    else:
        raise ValueError("Unknown model head structure! Modify manually.")
else:
    raise ValueError("Model has no head attribute!")

# ✅ Move model to GPU
model = model.to(device)

# 🛠 Debugging: Test a forward pass
with torch.no_grad():
    dummy_input = torch.randn(1, 3, 224, 224).to(device)  # Create a fake image batch
    output = model(dummy_input)
    print("\n🟢 Model Output (Raw):", output)
    print("🟢 Output Shape:", output.shape)  # Expected shape: [1, 1]


model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

Original Model Structure:
 TinyVit(
  (patch_embed): PatchEmbed(
    (conv1): ConvNorm(
      (conv): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (act): GELU(approximate='none')
    (conv2): ConvNorm(
      (conv): Conv2d(48, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (stages): Sequential(
    (0): ConvLayer(
      (blocks): Sequential(
        (0): MBConv(
          (conv1): ConvNorm(
            (conv): Conv2d(96, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (bn): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
          (act1): GELU(approximate='none')
          (conv2): ConvNorm(
            (conv): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1

In [8]:
DATASET_PATH =' /kaggle/working/train ' # Update if needed
REAL_PATH = os.path.join(DATASET_PATH, "real")
FAKE_PATH = os.path.join(DATASET_PATH, "fake")

In [9]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.images = glob.glob(f"{root_dir}/real/*.jpg") + glob.glob(
            f"{root_dir}/fake/*.jpg"
        )
        self.labels = [0] * len(glob.glob(f"{root_dir}/real/*.jpg")) + [1] * len(
            glob.glob(f"{root_dir}/fake/*.jpg")
        )
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        img = Image.open(img_path)
        if self.transform:
            img = self.transform(img)
        label = self.labels[idx]
        return img, torch.tensor(label, dtype=torch.float32)

In [10]:
train_ds = CustomDataset(root_dir="/kaggle/working/train", transform=train_transforms)
#test_ds = CustomDataset(root_dir="/kaggle/wroking/cifake-real-and-ai-generated-synthetic-images/test", transform=test_transforms)

In [11]:
len(train_ds)

4000

In [12]:
train_loader = DataLoader(
    train_ds, batch_size=32, shuffle=True, num_workers=4, pin_memory=True
)
'''test_loader = DataLoader(
    test_ds, batch_size=32, shuffle=False, num_workers=4, pin_memory=True
)'''

'test_loader = DataLoader(\n    test_ds, batch_size=32, shuffle=False, num_workers=4, pin_memory=True\n)'

In [13]:
epochs =5
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = CosineAnnealingLR(optimizer, T_max=epochs)

for epoch in range(epochs):
    model.train()
    for i, (images, labels) in enumerate(train_loader):
        labels = labels.view(-1, 1).float()  # Ensure correct shape

        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(images).view(-1, 1)  # Ensure model outputs the correct shape
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if i % 1000 == 0:
            print(f"Epoch: {epoch}, Iteration: {i}, Loss: {loss.item()}")

    scheduler.step()
    print(f"Epoch: {epoch}, Loss: {loss.item()}, LR: {scheduler.get_last_lr()}")


Epoch: 0, Iteration: 0, Loss: 0.7222620248794556
Epoch: 0, Loss: 0.4538324177265167, LR: [9.045084971874738e-05]
Epoch: 1, Iteration: 0, Loss: 0.625525951385498
Epoch: 1, Loss: 0.40043485164642334, LR: [6.545084971874737e-05]
Epoch: 2, Iteration: 0, Loss: 0.4730417728424072
Epoch: 2, Loss: 0.534018874168396, LR: [3.454915028125263e-05]
Epoch: 3, Iteration: 0, Loss: 0.33211445808410645
Epoch: 3, Loss: 0.39600473642349243, LR: [9.549150281252631e-06]
Epoch: 4, Iteration: 0, Loss: 0.28870677947998047
Epoch: 4, Loss: 0.20295949280261993, LR: [0.0]


In [14]:
save_dir = "/kaggle/working/model"
os.makedirs(save_dir, exist_ok=True)  # Ensure the directory exists

# Define model save path
model_path = os.path.join(save_dir, "tiny_vit_modified.pth")

# Save the model
torch.save(model.state_dict(), model_path)

print(f"✅ Model saved at: {model_path}")

✅ Model saved at: /kaggle/working/model/tiny_vit_modified.pth
