In [1]:
!nvidia-smi

Sat Mar  2 09:12:29 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.14              Driver Version: 550.54.14      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 2070 ...    Off |   00000000:0A:00.0  On |                  N/A |
| 49%   51C    P5             37W /  215W |     616MiB /   8192MiB |      6%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
import random
from PIL import Image
import matplotlib.pyplot as plt
import math
import cv2
from collections import defaultdict


import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import datasets, transforms
from torchvision.transforms import Normalize, ToTensor, Compose
from torchvision.models import densenet121, densenet169, densenet201

import segmentation_models_pytorch as smp
import timm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
if torch.cuda.is_available():
    device = torch.device(type="cuda", index=0)
else:
    device = torch.device(type="cpu", index=0)
    
print(device)

cuda:0


In [4]:
manual_seed = random.randint(1, 10000)
print(f"Random Seed: {manual_seed}")
random.seed(manual_seed)
torch.manual_seed(manual_seed)
torch.use_deterministic_algorithms(True)

Random Seed: 1385


In [5]:
torch.cuda.empty_cache()

# Data Processing and Dataset Augumentation

In [6]:
# def process_image(input_path, output_path):
#     try:
#     
#         image = Image.open(input_path)

#         
#         if image.mode != 'RGBA':
#             image = image.convert('RGBA')

#         
#         image = image.resize((256, 256))

#        
#         image.save(output_path, 'PNG')
#     except Exception as e:
#         print(f"Error processing image {input_path}: {e}")

# 
# root_folder = "class_dirs_orignal"
# output_folder = "class_dirs_final"

# 
# os.makedirs(output_folder, exist_ok=True)

# 
# for class_name in os.listdir(root_folder):
#     class_path = os.path.join(root_folder, class_name)
#     output_class_path = os.path.join(output_folder, class_name)

#     
#     if not os.path.isdir(class_path):
#         continue

#     # Create output class folder if it doesn't exist
#     os.makedirs(output_class_path, exist_ok=True)

#     # Iterate through images in the sub-folder
#     for filename in tqdm(os.listdir(class_path)):
#         input_image_path = os.path.join(class_path, filename)
#         output_image_path = os.path.join(output_class_path, filename.split('.')[0] + '.png')

#         # Process and save the image
#         process_image(input_image_path, output_image_path)
#         tqdm.write(f"Processed: {input_image_path}")

# print("All images processed and saved.")

In [7]:
def create_splice_manipulation(img, augumentation_list):
    # gt_mask = np.zeros_like(img[:,:,0]) #ground_truth_mask
    
    h, w, _ = img.shape
    size_patch_pcent = 0.3
    
    r1,c1 = random.randint(0,int(np.floor((1-size_patch_pcent)*h))), random.randint(0,int(np.floor((1-size_patch_pcent)*w)))
    r2,c2 = random.randint(r1+int(np.floor(size_patch_pcent*h)),h), random.randint(c1+int(np.floor(size_patch_pcent*w)),w)
    
    patch = img[r1:r2,c1:c2,:]
    # gt_mask[r1:r2,c1:c2,:] = 1
    
    augmentation = random.choice(augumentation_list)
    if augmentation == 'H-Flip':
        patch = np.fliplr(patch)
    elif augmentation == 'V-Flip':
        patch = np.flipud(patch)
    elif augmentation == '90':
        patch = np.rot90(patch, 1)
    elif augmentation == '180':
        patch = np.rot90(patch, 2)
    elif augmentation == '270':
        patch = np.rot90(patch, 3)
    elif augmentation == 'Shear':
        shear_factor = random.uniform(-0.2, 0.2)
        shear_matrix = np.array([[1, shear_factor, 0], [0, 1, 0]])
        patch = cv2.warpAffine(patch, shear_matrix, (patch.shape[1], patch.shape[0]), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
        
    ph, pw, _ = patch.shape
    
    start_r, start_c = random.randint(0, h-ph), random.randint(0, w-pw)
    img_spliced = np.copy(img)
    img_spliced[start_r:start_r+ph, start_c:start_c+pw, :] = patch
    # gt_mask[start_r:start_r+ph, start_c:start_c+pw] = 1
    
    return img_spliced

In [8]:
# for class_name in os.listdir(root_folder):
#     class_path = os.path.join(root_folder, class_name)
    
#     if not os.path.isdir(class_path):
#         continue
    
#     augmented_images_dir = os.path.join(class_path, "augmented_images")
#     os.makedirs(augmented_images_dir, exist_ok=True)
    
#     image_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
#     num_images = len(image_files)
    
#     for image_file in tqdm(image_files, desc=f"Processing images in {class_name}"):
#         img = cv2.imread(image_file)
#         if img is None:
#             print(f"Error: Unable to load image {image_file}")
#             continue
#         num_augmentations = random.randint(1, 6)
        
#         for _ in range(num_augmentations):
#             augmentation_list = ['H-Flip', 'V-Flip', '90', '180', '270', 'Shear']
#             img_spliced = create_splice_manipulation(img, augmentation_list)
#             augmented_image_filename = f"spliced_augmented_{os.path.basename(image_file).split('.')[0]}_{_}.jpg"
#             cv2.imwrite(os.path.join(augmented_images_dir, augmented_image_filename), img_spliced)

# Creating an iterable DataLoader

In [9]:
root_folder = "DatasetLoad/class_dirs_final"
# class_dirs = [os.path.join(root_folder, d) for d in os.listdir(root_folder) if os.path.isdir(os.path.join(root_folder, d))]

workers = 2

batch_size = 16

image_size = 256

EPOCHS = 20

lr = 0.001

beta1 = 0.5

ENCODER = 'densenet201'
ENCODER_WT = 'imagenet'
CLASSES = ["Blot", "FACS", "Macroscopy", "Microscopy", "None"]
AUXPARAMS = dict(pooling='max',dropout=0.5,activation='softmax',classes=CLASSES)
preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WT)
# nGPU = 1

In [10]:
class ImageSet(Dataset):
    def __init__(self, root_path, num_samples_per_class = 7996, transform=None):
        super().__init__()
        self.root_path = root_path
        self.transform = transform
        self.classes = self.classes = sorted([d for d in os.listdir(root_path) if os.path.isdir(os.path.join(root_path, d))])
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
        self.num_samples_per_class = num_samples_per_class
        self.images = self._load_images()
        self._print_class_distribution()
        # print(self.class_to_idx)
        
        
    def _load_images(self):
        images = []
        for cls_idx, cls_name in enumerate(self.classes):
            cls_dir = os.path.join(self.root_path, cls_name)
            img_list = [f for f in os.listdir(cls_dir) if not f.startswith('.') and os.path.isfile(os.path.join(cls_dir, f))]
            # img_list = [f for f in os.listdir(cls_dir) if f.endswith('.png') and f.startswith('.') and os.path.isfile(os.path.join(cls_dir, f))]
            random.shuffle(img_list)
            img_list = img_list[:self.num_samples_per_class]
            for img_name in img_list:
                img_path = os.path.join(cls_dir, img_name)
                images.append((img_path, cls_idx))
                # print(cls_idx)
        return images
    
    def _print_class_distribution(self):
        print("Class distribution:")
        temp = []
        for cls_name in self.classes:
            count = sum(1 for _, label in self.images if self.classes[label] == cls_name)
            print(f"{cls_name}: {count} images")
            temp.append(count)
        self.num_samples_per_class = min(temp)
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path, label = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        image = self.transform(image)
        return image, label

In [11]:
transform = transforms.Compose([
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    # preprocessing_fn
])
dataset = ImageSet(root_path=root_folder, transform=transform)
lenDataset = dataset.__len__()
print(lenDataset)

trainSiz = int(0.75 * len(dataset))
valSiz = int(0.10 * len(dataset))
testSiz = len(dataset) - trainSiz - valSiz

trainSet, valSet, testSet = random_split(dataset, [trainSiz, valSiz, testSiz])
print(f"train: {trainSet.__len__()}, val: {valSet.__len__()}, test: {testSet.__len__()}")

Class distribution:
Blot: 7996 images
FACS: 7996 images
Macroscopy: 7996 images
Microscopy: 7996 images
negetive: 7989 images
39973
train: 29979, val: 3997, test: 5997


In [12]:
train_dataloader=DataLoader(dataset=trainSet,batch_size=batch_size,shuffle=True, num_workers=12)

val_dataloader=DataLoader(dataset=valSet,batch_size=batch_size, num_workers=4)

# Creating a Neural Network Class

In [13]:
# class UDenseNet():
#     def __init__(self, encoderName = 'densenet201', encoder_weights = 'imagenet', in_channels = 3, classes = 5):    
#         self.encoderName = encoderName
#         self.encoderWeights = encoder_weights
#         self.inCh = 3
#         self.classess = classes
#         self.auxParams = dict(pooling='max',dropout=0.5,activation='softmax',classes=classes)
                
#     def get_Unet(self): 
        
#         model = smp.Unet(
#             encoder_name=self.encoderName,
#             encoder_weights=self.encoderWeights,
#             decoder_use_batchnorm= True,
#             in_channels= self.inCh,
#             classes=self.classess,
#             aux_params=self.auxParams)
        
#         # class CustomHead(nn.Module):
#         #     def __init__(self, base_model, classes):
#         #         super(CustomHead, self).__init__()
#         #         self.baseModel = base_model
                
#         #         self.Flatten = nn.Flatten()
#         #         self.relu = nn.ReLU()
                
#         #         # self.fc1_in_features = self._get_fc1_in_features()
#         #         # print(self.fc1_in_features)
#         #         self.fc1 = nn.Linear(in_features=327680, out_features=384)
#         #         self.bn1 = nn.BatchNorm1d(num_features=384)
                
#         #         self.dropout = nn.Dropout(0.5)
                
#         #         self.out = nn.Linear(384, classes)
                
#         #     # def _get_fc1_in_features(self):
#         #     #     # Forward pass a dummy tensor to get the output size
#         #     #     dummy_input = torch.randn(1, 3, 256, 256)
#         #     #     with torch.no_grad():
#         #     #         x = self.baseModel.encoder(dummy_input)
                
#         #     #     # If the encoder returns a list of tensors, concatenate them along spatial dimensions
#         #     #     return x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
            
#         #     def forward(self, x):
#         #         x = self.baseModel(x)
#         #         x = self.Flatten(x)
#         #         # print(x.shape)
#         #         # x = x.view(x.size(0), -1)
#         #         x = self.fc1(x)
#         #         x = self.bn1(x)
#         #         x = self.relu(x)
#         #         x = self.dropout(x)
#         #         x = self.out(x)
#         #         return F.softmax(x, dim=1)
            
#         # model = CustomHead(base_model, self.classess)    
        
#         return model



class DenseUNet(nn.Module):
    def __init__(self, encoder_name, encoder_weights, classes):
        super(DenseUNet, self).__init__()
        self.Unet = smp.Unet(
            encoder_name=encoder_name,
            encoder_weights=encoder_weights,
            in_channels=3,
            classes=len(classes)
        )
        # self.Unet.segmentation_head = None
        self.Unet.classification_head = nn.Sequential(
            nn.Conv2d(1920, 1280, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(1),
        )
        self.classifier = nn.Linear(1280,len(classes))
        # Random wt for classifers as pretrained uses pretrained wts.
        nn.init.xavier_uniform_(self.classifier.weight)
        nn.init.constant_(self.classifier.bias, 0)
        
    def forward(self, x):
        _, features = self.Unet(x)
        features = features.view(features.size(0), -1)
        x = self.classifier(features)
        return F.softmax(x, dim=1)
            



In [14]:
model = DenseUNet(ENCODER, ENCODER_WT, CLASSES).to(device)
print(model)

DenseUNet(
  (Unet): Unet(
    (encoder): DenseNetEncoder(
      (features): Sequential(
        (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu0): ReLU(inplace=True)
        (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (denseblock1): _DenseBlock(
          (denselayer1): _DenseLayer(
            (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (relu1): ReLU(inplace=True)
            (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (relu2): ReLU(inplace=True)
            (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          )
          (denselayer2): _DenseLayer(
 

In [15]:
def train_one_epoch(dataloader, model, loss_fn, optimizer, batch_size, device):
    model.train()
    track_loss = 0
    correct_predictions = 0

    for i, (imgs, labels) in enumerate(dataloader):
        imgs = imgs.to(device)
        labels = labels.to(device)

        pred = model(imgs)

        loss = loss_fn(pred, labels)
        track_loss += loss.item()
        correct_predictions += torch.sum(torch.argmax(pred, dim=1) == labels).item()

        running_loss = round(track_loss / (i + 1), 2)
        running_acc = round((correct_predictions / ((i + 1) * batch_size)) * 100, 2)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if i % 100 == 0:
            print("Batch:", i + 1, "/", len(dataloader), "Running Loss:", running_loss, "Running Accuracy:", running_acc)

    epoch_loss = running_loss
    epoch_acc = running_acc
    return epoch_loss, epoch_acc

def validate_one_epoch(dataloader, model, loss_fn, batch_size, device):
    model.eval()
    track_loss = 0
    correct_predictions = 0

    with torch.no_grad():
        for i, (imgs, labels) in enumerate(dataloader):
            imgs = imgs.to(device)
            labels = labels.to(device)

            pred = model(imgs)

            loss = loss_fn(pred, labels)
            track_loss += loss.item()
            correct_predictions += torch.sum(torch.argmax(pred, dim=1) == labels).item()

            running_loss = round(track_loss / (i + 1), 2)
            running_acc = round((correct_predictions / ((i + 1) * batch_size)) * 100, 2)

            if i % 100 == 0:
                print("Validation Batch:", i + 1, "/", len(dataloader), "Running Loss:", running_loss, "Running Accuracy:", running_acc)

        epoch_loss = running_loss
        epoch_acc = running_acc
    return epoch_loss, epoch_acc

In [16]:
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

In [17]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=lr)
loss_fn=nn.CrossEntropyLoss()

for epoch in range(EPOCHS):
    print("------------------------------------------------")
    print(f"EPOCHS: {epoch}")
    print("------------------------------------------------")
    train_loss, train_acc = train_one_epoch(train_dataloader, model, loss_fn, optimizer, batch_size, device)

    val_loss, val_acc = validate_one_epoch(val_dataloader, model, loss_fn, batch_size, device)

    print(f"Epoch {epoch + 1}/{EPOCHS} - Training Loss: {train_loss}, Training Accuracy: {train_acc}% - Validation Loss: {val_loss}, Validation Accuracy: {val_acc}%")
    if val_loss > train_loss:
        print("Warning: Potential overfitting!")


------------------------------------------------
EPOCHS: 0
------------------------------------------------
Batch: 1 / 1874 Running Loss: 1.64 Running Accuracy: 6.25
Batch: 101 / 1874 Running Loss: 1.59 Running Accuracy: 31.62
Batch: 201 / 1874 Running Loss: 1.61 Running Accuracy: 28.92
Batch: 301 / 1874 Running Loss: 1.6 Running Accuracy: 30.5
Batch: 401 / 1874 Running Loss: 1.59 Running Accuracy: 31.34
Batch: 501 / 1874 Running Loss: 1.59 Running Accuracy: 31.0
Batch: 601 / 1874 Running Loss: 1.57 Running Accuracy: 32.98
Batch: 701 / 1874 Running Loss: 1.56 Running Accuracy: 34.64
Batch: 801 / 1874 Running Loss: 1.54 Running Accuracy: 36.45
Batch: 901 / 1874 Running Loss: 1.53 Running Accuracy: 37.72
Batch: 1001 / 1874 Running Loss: 1.51 Running Accuracy: 38.83
Batch: 1101 / 1874 Running Loss: 1.5 Running Accuracy: 39.84
Batch: 1201 / 1874 Running Loss: 1.5 Running Accuracy: 39.77
Batch: 1301 / 1874 Running Loss: 1.5 Running Accuracy: 40.3
Batch: 1401 / 1874 Running Loss: 1.5 Running

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7392df7cd450>>
Traceback (most recent call last):
  File "/home/joy/pyenv/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 770, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


Batch: 501 / 1874 Running Loss: 1.31 Running Accuracy: 59.23
