In [None]:
import zipfile

with zipfile.ZipFile('Dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/data')

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import timm
import copy
from tqdm.notebook import tqdm

In [None]:
data_dir = '/content/data/Dataset'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Parameters
image_size = 224
batch_size = 32
num_epochs = 10
learning_rate = 1e-4
train_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])
test_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

# Datasets & loaders
train_dataset = datasets.ImageFolder(f'{data_dir}/Train', transform=train_transform)
val_dataset = datasets.ImageFolder(f'{data_dir}/Validation', transform=test_transform)
test_dataset = datasets.ImageFolder(f'{data_dir}/Test', transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
def train_model(model, train_loader, val_loader, epochs=num_epochs, lr=learning_rate):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_acc = 0.0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        train_iter = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} - Training", leave=False)
        for imgs, labels in train_iter:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * imgs.size(0)
            train_iter.set_postfix(loss=loss.item())

        epoch_loss = running_loss / len(train_loader.dataset)

        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            val_iter = tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} - Validation", leave=False)
            for imgs, labels in val_iter:
                imgs, labels = imgs.to(device), labels.to(device)
                outputs = model(imgs)
                preds = torch.argmax(outputs, dim=1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)
            val_acc = correct / total

        print(f"Epoch [{epoch+1}/{epochs}] Loss: {epoch_loss:.4f} Val Acc: {val_acc:.4f}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            # Save checkpoint in Colab (optional)
            torch.save(best_model_wts, '/content/best_model.pth')
            print("Best model saved.")

    print(f"Training complete. Best Val Acc: {best_val_acc:.4f}")
    model.load_state_dict(best_model_wts)
    return model

# Evaluation function
def evaluate_model(model, test_loader):
    model = model.to(device)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        test_iter = tqdm(test_loader, desc="Testing", leave=False)
        for imgs, labels in test_iter:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            preds = torch.argmax(outputs, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    acc = correct / total
    print(f"Test Accuracy: {acc:.4f}")
    return acc

In [None]:
effnet_model = timm.create_model('efficientnet_b0', pretrained=True)
effnet_model.classifier = nn.Linear(effnet_model.classifier.in_features, 2)
print("Training EfficientNet...")
effnet_model = train_model(effnet_model, train_loader, val_loader)
torch.save(effnet_model.state_dict(), '/content/efficientnet_model_final.pth')
evaluate_model(effnet_model, test_loader)

In [None]:
deit_model = timm.create_model('deit_small_patch16_224', pretrained=True)
deit_model.head = nn.Linear(deit_model.head.in_features, 2)
print("Training DeiT...")
deit_model = train_model(deit_model, train_loader, val_loader)
torch.save(deit_model.state_dict(), '/content/deit_model_final.pth')
evaluate_model(deit_model, test_loader)

In [None]:
!pip install matplotlib seaborn pandas
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Dataset root directory
dataset_dir = '/content/data/Dataset'

In [None]:
# !pip install opencv-python-headless

import os
from PIL import Image
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import cv2
from scipy import stats

# For inline plots
%matplotlib inline

# Dataset path and classes
dataset_dir = '/content/data/Dataset'
splits = ['Train', 'Validation', 'Test']
labels = ['Real', 'Fake']

# Collect image statistics
stats_list = []

for split in splits:
    for label in labels:
        folder = os.path.join(dataset_dir, split, label)
        if os.path.exists(folder):
            for img_file in os.listdir(folder):
                img_path = os.path.join(folder, img_file)
                try:
                    img = Image.open(img_path).convert('RGB')
                    img_np = np.array(img)
                    width, height = img.size

                    # Channel-wise stats
                    mean_r = img_np[:, :, 0].mean()
                    mean_g = img_np[:, :, 1].mean()
                    mean_b = img_np[:, :, 2].mean()
                    std_r = img_np[:, :, 0].std()
                    std_g = img_np[:, :, 1].std()
                    std_b = img_np[:, :, 2].std()

                    # Grayscale stats
                    img_gray = img.convert('L')
                    gray_np = np.array(img_gray)
                    mean_gray = gray_np.mean()
                    std_gray = gray_np.std()

                    # Sharpness
                    sharpness = cv2.Laplacian(gray_np, cv2.CV_64F).var()

                    stats_list.append({
                        'split': split,
                        'label': label,
                        'filename': img_file,
                        'width': width,
                        'height': height,
                        'mean_r': mean_r,
                        'mean_g': mean_g,
                        'mean_b': mean_b,
                        'std_r': std_r,
                        'std_g': std_g,
                        'std_b': std_b,
                        'brightness': mean_gray,
                        'std_gray': std_gray,
                        'sharpness': sharpness
                    })
                except Exception as e:
                    print(f"Error processing {img_path}: {e}")

#DataFrame
df_stats = pd.DataFrame(stats_list)

#Statistical Summary
def display_enhanced_stats(df):
    numeric_cols = ['sharpness', 'brightness','width', 'height',
                    'mean_r', 'mean_g', 'mean_b']
    
    print(f"=== Dataset Overview ===")
    total_images = len(df)
    print(f"Total Images: {total_images}")
    class_counts = df['label'].value_counts()
    for cls, cnt in class_counts.items():
        print(f"Class '{cls}': {cnt} images ({cnt/total_images*100:.2f}%)")
    
    print("\n--- Numeric Column Summary ---")
    for col in numeric_cols:
        print(f"\nColumn: {col}")
        print(f"Count: {df[col].count()}")
        print(f"Mean: {df[col].mean():.2f}")
        print(f"Median: {df[col].median():.2f}")
        print(f"Mode: {df[col].mode().values[0]:.2f}")
        print(f"25th Percentile (Q1): {df[col].quantile(0.25):.2f}")
        print(f"75th Percentile (Q3): {df[col].quantile(0.75):.2f}")
        print(f"Min: {df[col].min():.2f}")
        print(f"Max: {df[col].max():.2f}")
        print(f"Standard Deviation: {df[col].std():.2f}")
    

# Call the enhanced stats function
display_enhanced_stats(df_stats)

# --------------------- VISUALIZATIONS ---------------------
sns.set(style='whitegrid', palette='Set2', font_scale=1.1)

#Histogram for sharpness check
plt.figure(figsize=(8,5))
sns.histplot(data=df_stats, x='sharpness', hue='label', bins=40, kde=True)
plt.title('Histogram: Image Sharpness by Class')
plt.xlabel('Sharpness (Laplacian Variance)')
plt.ylabel('Image Count')
plt.show()

#Bar Chart - Average RGB Means
df_rgb_mean = df_stats.groupby('label')[['mean_r', 'mean_g', 'mean_b']].mean().reset_index()
df_rgb_mean = df_rgb_mean.melt(id_vars='label', var_name='Channel', value_name='Mean')

plt.figure(figsize=(8,5))
sns.barplot(data=df_rgb_mean, x='Channel', y='Mean', hue='label')
plt.title('Average RGB Channel Mean by Class')
plt.xlabel('Channel')
plt.ylabel('Mean Intensity')
plt.show()

#pie chart for realvsfake 
plt.figure(figsize=(6,6))
class_counts = df_stats['label'].value_counts()
plt.pie(class_counts, labels=class_counts.index, autopct='%1.1f%%', startangle=140,
        colors=sns.color_palette('Set2'), wedgeprops={'edgecolor': 'black'})
plt.title('Real vs Fake Image Proportion')
plt.show()

#line chart to sharpness trend over train test and validation
plt.figure(figsize=(8,5))
df_line = df_stats.groupby(['split', 'label'])['sharpness'].mean().reset_index()
sns.lineplot(data=df_line, x='split', y='sharpness', hue='label', marker='o')
plt.title('Line Chart: Avg Sharpness across Splits')
plt.xlabel('Dataset Split')
plt.ylabel('Mean Sharpness')
plt.show()
