In [46]:
import torch 
import os
import numpy as np 
from torchvision.models.vision_transformer import VisionTransformer
from Dataset_Loader import ImageDataset
from torch import Tensor
from torch import nn,optim
from torch.utils.data import DataLoader,TensorDataset
import torch
from CAE import ConvAutoencoder
from torch.utils.data import DataLoader
from tqdm import tqdm
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from torch.optim import Adam
from skimage.metrics import structural_similarity as ssim
import cv2
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"


In [5]:
IMG_SIZE = 224
BATCH_SIZE = 32
SHUFFLE = True
EPOCHS = 50
PATCH_SIZE = 16
NUM_LAYERS = 2
NUM_HEADS = 2
HIDDEN_DIM = 128
MLP_DIM = 256
DROPOUT = 0.2
LEARNING_RATE = 1e-4
TRAIN_PATH = "Dataset/train/"
VAL_PATH = "Dataset/val1/"

In [7]:
train_image_dataset = ImageDataset(path=TRAIN_PATH,img_size=IMG_SIZE,batch_size=BATCH_SIZE,shuffle=SHUFFLE)
val_image_dataset =  ImageDataset(path=VAL_PATH,img_size=IMG_SIZE,batch_size=BATCH_SIZE,shuffle=SHUFFLE)

In [9]:
train_loader = train_image_dataset.get_dataloader()
val_loader = val_image_dataset.get_dataloader()

In [11]:
train_image_dataset.get_labels()

{'Safe': 0, 'notSafe': 1}

# Autoencoder Model Training

In [19]:
CAE = ConvAutoencoder()
CAE

ConvAutoencoder(
  (encoder): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (5): ReLU()
  )
  (decoder): Sequential(
    (0): ConvTranspose2d(64, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
    (1): ReLU()
    (2): ConvTranspose2d(32, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
    (3): ReLU()
    (4): ConvTranspose2d(16, 3, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), output_padding=(1, 1))
    (5): Sigmoid()
  )
)

In [58]:
def calculate_psnr(original, reconstructed):
    mse = np.mean((original - reconstructed) ** 2)
    if mse == 0:
        return float('inf')
    max_pixel = 1.0  # Assuming images are normalized between 0 and 1
    psnr = 20 * np.log10(max_pixel / np.sqrt(mse))
    return psnr

def calculate_ssim(original, reconstructed):
    original = (original * 255).astype(np.uint8)  # Convert to 0-255 range
    reconstructed = (reconstructed * 255).astype(np.uint8)
    return ssim(original, reconstructed, channel_axis=-1, data_range=255, win_size=min(original.shape[0], original.shape[1], 7))


In [60]:
def train_autoencoder(model, train_loader, val_loader, epochs=10, learning_rate=0.001, device=None):
    device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    history = {"train_loss": [], "val_loss": [], "val_psnr": [], "val_ssim": []}
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        
        for images, _ in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} [Train]"):
            images = images.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, images)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        history["train_loss"].append(train_loss)
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        psnr_total = 0.0
        ssim_total = 0.0
        with torch.no_grad():
            for images, _ in tqdm(val_loader, desc=f"Epoch {epoch+1}/{epochs} [Val]"):
                images = images.to(device)
                outputs = model(images)
                loss = criterion(outputs, images)
                val_loss += loss.item()
                
                # Convert tensors to numpy for SSIM & PSNR
                images_np = images.cpu().numpy().transpose(0, 2, 3, 1)  # Convert to (batch, height, width, channels)
                outputs_np = outputs.cpu().numpy().transpose(0, 2, 3, 1)
                
                for i in range(images_np.shape[0]):
                    psnr_total += calculate_psnr(images_np[i], outputs_np[i])
                    ssim_total += calculate_ssim(images_np[i], outputs_np[i])
        
        val_loss /= len(val_loader)
        avg_psnr = psnr_total / len(val_loader.dataset)
        avg_ssim = ssim_total / len(val_loader.dataset)
        history["val_loss"].append(val_loss)
        history["val_psnr"].append(avg_psnr)
        history["val_ssim"].append(avg_ssim)
        
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, PSNR: {avg_psnr:.2f}, SSIM: {avg_ssim:.4f}")
    
    return history