In [None]:
import zipfile

with zipfile.ZipFile('Dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/data')

In [None]:
data_dir = '/content/data/Dataset'
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import timm

In [None]:
data_dir = '/content/data/Dataset'
image_size = 224
batch_size = 32

train_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
test_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

train_dataset = datasets.ImageFolder(f'{data_dir}/Train', transform=train_transform)
val_dataset = datasets.ImageFolder(f'{data_dir}/Validation', transform=test_transform)
test_dataset = datasets.ImageFolder(f'{data_dir}/Test', transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)


In [None]:
def train_model(model, train_loader, val_loader, epochs=5, lr=1e-4):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                preds = torch.argmax(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)
        print(f"Epoch {epoch+1}/{epochs}, Validation Accuracy: {correct/total:.4f}")
    return model

def evaluate_model(model, test_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for imgs, labels in test_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            preds = torch.argmax(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    print(f"Test Accuracy: {correct/total:.4f}")


In [None]:
effnet_model = timm.create_model('efficientnet_b0', pretrained=True)
effnet_model.classifier = nn.Linear(effnet_model.classifier.in_features, 2)  # 2 classes
effnet_model = train_model(effnet_model, train_loader, val_loader)
torch.save(effnet_model.state_dict(), '/content/efficientnet_model.pt')
evaluate_model(effnet_model, test_loader)

In [None]:
deit_model = timm.create_model('deit_small_patch16_224', pretrained=True)
deit_model.head = nn.Linear(deit_model.head.in_features, 2)  # 2 classes
deit_model = train_model(deit_model, train_loader, val_loader)
torch.save(deit_model.state_dict(), '/content/deit_model.pt')
evaluate_model(deit_model, test_loader)

In [None]:
!pip install matplotlib seaborn pandas
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Dataset root directory
dataset_dir = '/content/data/Dataset'

In [None]:
# Install OpenCV for sharpness calculation
!pip install opencv-python-headless

import os
from PIL import Image
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import cv2

# For inline plots in Colab notebooks
%matplotlib inline

# Dataset path and classes
dataset_dir = '/content/data/Dataset'  # <-- Change this path if needed
splits = ['Train', 'Validation', 'Test']
labels = ['Real', 'Fake']

# Collect image statistics
stats = []

for split in splits:
    for label in labels:
        folder = os.path.join(dataset_dir, split, label)
        if os.path.exists(folder):
            for img_file in os.listdir(folder):
                img_path = os.path.join(folder, img_file)
                try:
                    # Load image as RGB and convert to numpy
                    img = Image.open(img_path).convert('RGB')
                    img_np = np.array(img)
                    width, height = img.size

                    # Per-channel mean and std
                    mean_r = img_np[:, :, 0].mean()
                    mean_g = img_np[:, :, 1].mean()
                    mean_b = img_np[:, :, 2].mean()
                    std_r = img_np[:, :, 0].std()
                    std_g = img_np[:, :, 1].std()
                    std_b = img_np[:, :, 2].std()

                    # Grayscale mean and std
                    img_gray = img.convert('L')
                    img_gray_np = np.array(img_gray)
                    mean_gray = img_gray_np.mean()
                    std_gray = img_gray_np.std()

                    # Sharpness estimate (variance of Laplacian)
                    laplacian_var = cv2.Laplacian(img_gray_np, cv2.CV_64F).var()

                    # Append results
                    stats.append({
                        'split': split,
                        'label': label,
                        'filename': img_file,
                        'width': width,
                        'height': height,
                        'mean_r': mean_r,
                        'mean_g': mean_g,
                        'mean_b': mean_b,
                        'std_r': std_r,
                        'std_g': std_g,
                        'std_b': std_b,
                        'mean_gray': mean_gray,
                        'std_gray': std_gray,
                        'sharpness': laplacian_var
                    })
                except Exception as e:
                    print(f"Error processing {img_path}: {e}")

# Create DataFrame
df_stats = pd.DataFrame(stats)

# Basic data overview
print("Sample data:")
print(df_stats.head())

print("\nSummary statistics:")
print(df_stats.describe())

# Set seaborn style for visuals
sns.set(style='whitegrid', palette='pastel', font_scale=1.2)

# 1) Histogram - Sharpness Distribution by Class
plt.figure(figsize=(8,5))
sns.histplot(data=df_stats, x='sharpness', hue='label', bins=30, palette='Set2', kde=False)
plt.title('Histogram of Image Sharpness by Class')
plt.xlabel('Sharpness (Variance of Laplacian)')
plt.ylabel('Count')
plt.show()

# 2) Scatter Plot - Width vs Height by Class
plt.figure(figsize=(10, 7))
sns.scatterplot(
    data=df_stats,
    x='width',
    y='height',
    hue='label',
    palette='Set2',
    alpha=0.7,
    edgecolor='w',
    s=80
)
plt.title('Image Width vs Height by Class', fontsize=16, weight='bold')
plt.xlabel('Width (pixels)', fontsize=14)
plt.ylabel('Height (pixels)', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.minorticks_on()
plt.legend(title='Class', fontsize=12, title_fontsize=14)
plt.tight_layout()
plt.show()

# 3) Bar Chart - Average RGB Means by Class
plt.figure(figsize=(8,5))
df_rgb_mean = df_stats.groupby('label')[['mean_r', 'mean_g', 'mean_b']].mean().reset_index()
df_rgb_mean_melted = df_rgb_mean.melt(id_vars='label', var_name='Channel', value_name='Mean Intensity')

sns.barplot(x='Channel', y='Mean Intensity', hue='label', data=df_rgb_mean_melted, palette='Set2')
plt.title('Average RGB Means by Class')
plt.xlabel('Color Channel')
plt.ylabel('Mean Intensity')
plt.show()

# 4) Pie Chart - Proportion of Real vs Fake Images (overall dataset)
plt.figure(figsize=(6,6))
class_counts = df_stats['label'].value_counts()
plt.pie(class_counts,
        labels=class_counts.index,
        autopct='%1.1f%%',
        colors=sns.color_palette('Set2'),
        startangle=140,
        wedgeprops={'edgecolor': 'black'})
plt.title('Proportion of Real vs Fake Images')
plt.show()

# 5) Boxplot - Sharpness Distribution by Class with swarmplot overlay
plt.figure(figsize=(8,6))
sns.boxplot(x='label', y='sharpness', data=df_stats, palette='Set2')
sns.swarmplot(x='label', y='sharpness', data=df_stats, color='k', alpha=0.5)
plt.title('Image Sharpness Distribution by Class')
plt.xlabel('Class')
plt.ylabel('Sharpness (Variance of Laplacian)')
plt.yscale('log')  # Log scale helps visualize skewed sharpness values
plt.show()

# 6) Line Chart - Average Sharpness Trend Across Dataset Splits
plt.figure(figsize=(8,5))
df_split_sharpness = df_stats.groupby(['split', 'label'])['sharpness'].mean().reset_index()
sns.lineplot(data=df_split_sharpness, x='split', y='sharpness', hue='label', marker='o', palette='Set2')
plt.title('Average Image Sharpness Trend Across Dataset Splits')
plt.xlabel('Dataset Split')
plt.ylabel('Mean Sharpness')
plt.show()
