### Functions
---

In [43]:
def printer(iterable:list|dict):
    """
    Displays Lists and Dictionaries nicely
    """
    if isinstance(iterable,list):
        print("List".center(16,"-"))
        for item in iterable:
            print(f" - {item}")
        print()
    elif isinstance(iterable,dict):
        print("Dictionary".center(16,"-"))
        
        for key in list(iterable.keys()):
            print(f" {key} :",iterable[key])
        print()
    else:
        print(iterable)


In [44]:
def giveMeImageSize(modelVersion = "B4") -> tuple:
    """
    Returns the resolution for the respective EfficientNet model verisons:

    "B0", "B1", "B2", "B3", "B4", "B"5, "B6", "B7"

    """
    if modelVersion == "B0":
        return (224,224)
    elif modelVersion == "B1":
        return (240,240)
    elif modelVersion == "B2":
        return (260,260)
    elif modelVersion == "B3":
        return (300,300)
    elif modelVersion == "B4":
        return (380,380)
    elif modelVersion == "B5":
        return (456,456)
    elif modelVersion == "B6":
        return (528,528)
    elif modelVersion == "B7":
        return (600,600)


In [45]:
from torchvision import transforms
#? function for image transformation
def pad_to_Square(image):
    width, height = image.size
    max_side = max(width,height)
    left_pad = (max_side-width)//2
    right_pad = (max_side-width-left_pad)
    top_pad = (max_side-height)//2
    bot_pad = (max_side-height - top_pad)

    padded_image = transforms.functional.pad(image, (left_pad, top_pad, right_pad, bot_pad), padding_mode='constant', fill=0)
    return padded_image


In [46]:
import os
from sklearn.model_selection import train_test_split
from PIL import Image
# Function to create the necessary directories
def create_dirs(base_dir, classes):
    os.makedirs(base_dir, exist_ok=True)
    for subset in ['train', 'valid', 'test']:
        subset_path = os.path.join(base_dir, subset)
        os.makedirs(subset_path, exist_ok=True)
        for class_name in classes:
            os.makedirs(os.path.join(subset_path, class_name), exist_ok=True)

# Function to split and save images into train and test directories
def split_and_process_images(raw_data_dir:str, processed_data_dir:str, randomState=42):
    # Get all class names (subfolder names)
    classes = os.listdir(raw_data_dir)
    print("Subfolders:",classes)
    create_dirs(processed_data_dir, classes)
    
    # Iterate through each class folder
    for class_name in classes:
        class_folder = os.path.join(raw_data_dir, class_name)
        if os.path.isdir(class_folder):
            # Get all image filenames
            image_filenames = os.listdir(class_folder)
            print(f"{class_name} : {len(image_filenames)} files.")
            # Split into train and test sets
            train_files, testval_files = train_test_split(image_filenames, test_size=0.2, random_state=randomState)
            print(f" Train: {len(train_files)}, Test&Val: {len(testval_files)}")
            # Process and save training images
            for filename in train_files:
                image_path = os.path.join(class_folder, filename)
                img = Image.open(image_path)
                # Save the training image
                train_save_path = os.path.join(processed_data_dir, 'train', class_name, filename)
                img.save(train_save_path)

            # Split into validation and test sets
            val_files, test_files = train_test_split(testval_files, test_size=0.5, random_state=randomState)

            # Process and save validation images
            for filename in val_files:
                image_path = os.path.join(class_folder, filename)
                img = Image.open(image_path)
                # Save the validation image
                val_save_path = os.path.join(processed_data_dir, 'valid', class_name, filename)
                img.save(val_save_path)

            # Process and save test images
            for filename in test_files:
                image_path = os.path.join(class_folder, filename)
                img = Image.open(image_path)
                # Save the testing image
                test_save_path = os.path.join(processed_data_dir, 'test', class_name, filename)
                img.save(test_save_path)

        else:
            print(f"Error on {class_name}")


In [47]:

def freeze_model(model):
    for params in model.parameters():
        params.requires_grad=False
    
def unfreeze_last_n_blocks(model,n):
    lastblock = len(model.features)-1
    blocknames = ["classifier"] + [f"features.{lastblock-i}" for i in range(n)]
    for name,params in model.named_parameters():
        if any(substring in name for substring in blocknames): 
            params.requires_grad = True

    print(f"Layers unfrozen: {blocknames}")

# def unfreeze_last_two_blocks(model):
#     lastblock = str(len(model.features)-1)
#     blocknames = [f"features.{lastblock-i}" for i in range(2)]
#     for name, params in model.named_parameters():
#         if f"features.{lastblock}" in name or f"features.{lastblock-1}": 
#             params.requires_grad = True
#     print("Phase 3 - Training last 2 blocks and classifier")

# def unfreeze_last_four_blocks(model):
#     lastblock = str(len(model.features)-1)
#     blocknames = [f"features.{lastblock-i}" for i in range(4)]
#     for name, params in model.named_parameters():
#         if any(substring in name for substring in blocknames): 
#             params.requires_grad = True
#     print("Phase 3 - Training last 4 blocks and classifier")

def unfreeze_whole_model(model):
    for params in model.parameters():
        params.requires_grad = True 
    print("phase 4 - whole model training")

### Data Splitting
---

In [9]:
# Split and process the images
split_and_process_images(raw_data_dir='data',
                         processed_data_dir='processedData',
                        )


Subfolders: ['1. Eczema 1677', '10. Warts Molluscum and other Viral Infections - 2103', '2. Melanoma 15.75k', '3. Atopic Dermatitis - 1.25k', '4. Basal Cell Carcinoma (BCC) 3323', '5. Melanocytic Nevi (NV) - 7970', '6. Benign Keratosis-like Lesions (BKL) 2624', '7. Psoriasis pictures Lichen Planus and related diseases - 2k', '8. Seborrheic Keratoses and other Benign Tumors - 1.8k', '9. Tinea Ringworm Candidiasis and other Fungal Infections - 1.7k']
1. Eczema 1677 : 1677 files.
 Train: 1341, Test&Val: 336
10. Warts Molluscum and other Viral Infections - 2103 : 2103 files.
 Train: 1682, Test&Val: 421
2. Melanoma 15.75k : 3140 files.
 Train: 2512, Test&Val: 628
3. Atopic Dermatitis - 1.25k : 1257 files.
 Train: 1005, Test&Val: 252
4. Basal Cell Carcinoma (BCC) 3323 : 3323 files.
 Train: 2658, Test&Val: 665
5. Melanocytic Nevi (NV) - 7970 : 7970 files.
 Train: 6376, Test&Val: 1594
6. Benign Keratosis-like Lesions (BKL) 2624 : 2079 files.
 Train: 1663, Test&Val: 416
7. Psoriasis pictures Li

### Data Loaders and Processing
---

In [48]:
imgSize = giveMeImageSize()
print(imgSize)

(380, 380)


#### Cleaning folder names into class names

In [49]:
 #? raw dataset File path
raw_path = './data'

folderNames = [folder for folder in os.listdir(raw_path) if os.path.isdir(os.path.join(raw_path, folder))]

cleanedNames = []
for idx in range(len(folderNames)):
    words = folderNames[idx].split()
    words.pop(0)
    words.pop()
    if "-" in words:
        words.pop(words.index("-"))

    out = " ".join(words)
    cleanedNames.append(out)

printer(cleanedNames)

classMapping = {}
for index, value in enumerate(cleanedNames):
    classMapping[index] = value

printer(classMapping)
print(len(classMapping))

------List------
 - Eczema
 - Warts Molluscum and other Viral Infections
 - Melanoma
 - Atopic Dermatitis
 - Basal Cell Carcinoma (BCC)
 - Melanocytic Nevi (NV)
 - Benign Keratosis-like Lesions (BKL)
 - Psoriasis pictures Lichen Planus and related diseases
 - Seborrheic Keratoses and other Benign Tumors
 - Tinea Ringworm Candidiasis and other Fungal Infections

---Dictionary---
 0 : Eczema
 1 : Warts Molluscum and other Viral Infections
 2 : Melanoma
 3 : Atopic Dermatitis
 4 : Basal Cell Carcinoma (BCC)
 5 : Melanocytic Nevi (NV)
 6 : Benign Keratosis-like Lesions (BKL)
 7 : Psoriasis pictures Lichen Planus and related diseases
 8 : Seborrheic Keratoses and other Benign Tumors
 9 : Tinea Ringworm Candidiasis and other Fungal Infections

10


#### Getting Mean and STD for image normalization

In [50]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [None]:
# import torch
# from torchvision import datasets, transforms
# from torch.utils.data import DataLoader

#? Getting the mean and std of the raw dataset
initial_Transform = transforms.Compose([
    transforms.Lambda(pad_to_Square),
    transforms.Resize(224),  # Resize images
    transforms.ToTensor(),  # Convert images to tensor
])

#? raw dataset File path
raw_path = './data'

# Load raw dataset
dataset = datasets.ImageFolder(root=raw_path, transform=initial_Transform)
loader = DataLoader(dataset, batch_size=16, shuffle=False)       #? num_workers indicate the number of parallel processes

# Initialize sums
mean = 0.
std = 0.
total_images = 0

print("dataset:", dataset)
batch = 0
print(f"Total Batches: {len(loader)}")
for images, _ in loader:
    batch_samples = images.size(0)  # batch size
    images = images.view(batch_samples, images.size(1), -1)  # flatten H and W
    mean += images.mean(2).sum(0)
    std += images.std(2).sum(0)
    total_images += batch_samples
    batch += 1
    print(f"Batch [{batch}/{len(loader)}]".ljust(60), end="\r")
print()

mean /= total_images
std /= total_images

print(f"Mean: {mean}")
print(f"Std: {std}")

dataset: Dataset ImageFolder
    Number of datapoints: 27153
    Root location: ./data
    StandardTransform
Transform: Compose(
               Lambda()
               Resize(size=224, interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           )
Total Batches: 1698
Batch [1698/1698]                                           
Mean: tensor([0.5246, 0.3975, 0.3844])
Std: tensor([0.2744, 0.2188, 0.2165])


#### Load Datasets:
- Training DataSet with augmentation.
- Testing DataSet without augmentation.


In [51]:
mean = torch.tensor([0.5246, 0.3975, 0.3844])
std = torch.tensor([0.2744, 0.2188, 0.2165])

transform_train = transforms.Compose([
    transforms.Lambda(pad_to_Square),
    transforms.Resize(imgSize),  # Resize images
    transforms.ToTensor(),  # Convert images to tensor
    transforms.Normalize(mean=mean, std=std),  # Normalize images
])

transform_test = transforms.Compose([
    transforms.Lambda(pad_to_Square),
    transforms.Resize(imgSize),  # Resize images
    transforms.ToTensor(),  # Convert images to tensor
])


In [52]:

#? training dataset directory
train_path = "processedData/train"
#? validation dataset directory
valid_path = "processedData/valid"
#? testing dataset directory
# test_path = "processedData/test"

batchSize = 64

#? Load TRAIN dataset from directory
train_dataset = datasets.ImageFolder(root=train_path, transform=transform_train)
#? Create a DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=batchSize, shuffle=True)
print(train_dataset, f"Batches: {len(train_dataloader)}","\n".ljust(50,"-"))

#? Load VALID dataset from directory
valid_dataset = datasets.ImageFolder(root=valid_path, transform=transform_test)
#? Create a DataLoader
valid_dataloader = DataLoader(valid_dataset, batch_size=batchSize, shuffle=False)
print(valid_dataset, f"Batches: {len(valid_dataloader)}","\n".ljust(50,"-"))

# #? Load TEST dataset from directory
# test_dataset = datasets.ImageFolder(root=test_path, transform=transform_test)
# #? Create a DataLoader
# test_dataloader = DataLoader(test_dataset, batch_size=batchSize, shuffle=False)
# print(test_dataset, f"Batches: {len(test_dataloader)}","\n".ljust(50,"-"))

Dataset ImageFolder
    Number of datapoints: 21719
    Root location: processedData/train
    StandardTransform
Transform: Compose(
               Lambda()
               Resize(size=(380, 380), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
               Normalize(mean=tensor([0.5246, 0.3975, 0.3844]), std=tensor([0.2744, 0.2188, 0.2165]))
           ) Batches: 340 
-------------------------------------------------
Dataset ImageFolder
    Number of datapoints: 2715
    Root location: processedData/valid
    StandardTransform
Transform: Compose(
               Lambda()
               Resize(size=(380, 380), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
           ) Batches: 43 
-------------------------------------------------


#### Model Training
---

##### Model

In [53]:
import torch
from torchvision import models
import torch.nn as nn 
efficientnetmodel = models.efficientnet_b4(pretrained=True)
no_features = efficientnetmodel.classifier[1].in_features  
efficientnetmodel.classifier[1] = nn.Linear(no_features, 10) 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
efficientnetmodel = efficientnetmodel.to(device)
print(efficientnetmodel)



EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
            (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActiv

In [54]:
def showGrads(model):
    for name, param in model.named_parameters():
        print(f"Layer: {name}, requires_grad: {param.requires_grad}")

showGrads(efficientnetmodel)

Layer: features.0.0.weight, requires_grad: True
Layer: features.0.1.weight, requires_grad: True
Layer: features.0.1.bias, requires_grad: True
Layer: features.1.0.block.0.0.weight, requires_grad: True
Layer: features.1.0.block.0.1.weight, requires_grad: True
Layer: features.1.0.block.0.1.bias, requires_grad: True
Layer: features.1.0.block.1.fc1.weight, requires_grad: True
Layer: features.1.0.block.1.fc1.bias, requires_grad: True
Layer: features.1.0.block.1.fc2.weight, requires_grad: True
Layer: features.1.0.block.1.fc2.bias, requires_grad: True
Layer: features.1.0.block.2.0.weight, requires_grad: True
Layer: features.1.0.block.2.1.weight, requires_grad: True
Layer: features.1.0.block.2.1.bias, requires_grad: True
Layer: features.1.1.block.0.0.weight, requires_grad: True
Layer: features.1.1.block.0.1.weight, requires_grad: True
Layer: features.1.1.block.0.1.bias, requires_grad: True
Layer: features.1.1.block.1.fc1.weight, requires_grad: True
Layer: features.1.1.block.1.fc1.bias, requires

In [63]:
import torch.nn as nn
import torch.optim as optim
from torchvision import models

efficientnetmodel = models.efficientnet_b4(pretrained=True)

no_features = efficientnetmodel.classifier[1].in_features  
efficientnetmodel.classifier[1] = nn.Linear(no_features, 10) 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
efficientnetmodel = efficientnetmodel.to(device)

criterion = nn.CrossEntropyLoss()
freeze_model(efficientnetmodel)
showGrads(efficientnetmodel)



Layer: features.0.0.weight, requires_grad: False
Layer: features.0.1.weight, requires_grad: False
Layer: features.0.1.bias, requires_grad: False
Layer: features.1.0.block.0.0.weight, requires_grad: False
Layer: features.1.0.block.0.1.weight, requires_grad: False
Layer: features.1.0.block.0.1.bias, requires_grad: False
Layer: features.1.0.block.1.fc1.weight, requires_grad: False
Layer: features.1.0.block.1.fc1.bias, requires_grad: False
Layer: features.1.0.block.1.fc2.weight, requires_grad: False
Layer: features.1.0.block.1.fc2.bias, requires_grad: False
Layer: features.1.0.block.2.0.weight, requires_grad: False
Layer: features.1.0.block.2.1.weight, requires_grad: False
Layer: features.1.0.block.2.1.bias, requires_grad: False
Layer: features.1.1.block.0.0.weight, requires_grad: False
Layer: features.1.1.block.0.1.weight, requires_grad: False
Layer: features.1.1.block.0.1.bias, requires_grad: False
Layer: features.1.1.block.1.fc1.weight, requires_grad: False
Layer: features.1.1.block.1.f

In [64]:
# import pickle
from datetime import datetime
modelSaver_path = f"modelLogs/{datetime.today().strftime('%Y-%m-%d %H-%M')}"
os.makedirs(modelSaver_path,exist_ok=True)
print("Save path:", modelSaver_path)

epochsPerPhase = 5
phases = 5
modelVariation = "type2Phases"  # <--- Change this each time you run a new Model


optimiser = optim.Adam(efficientnetmodel.parameters(), lr=0.005)

for phase in range(phases):

    #? progressive unfreezing of layers
    unfreeze_last_n_blocks(efficientnetmodel, phase)

    efficientnetmodel.train()
    for epoch in range(epochsPerPhase):

        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for batch, (inputs, labels) in enumerate(train_dataloader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimiser.zero_grad()
            outputs = efficientnetmodel(inputs).squeeze()
        
            loss = criterion(outputs, labels)
            loss.backward()
            optimiser.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs,1)
            correct_train += (predicted == labels).sum().item()
            total_train += labels.size(0)

            print(f"Epoch: {epoch+1}".ljust(10), f"Batch: {batch+1}".ljust(12), f"Accuracy: {((correct_train / total_train)*100):.2f}%",end="\r")
            if batch > 400:
                break
        print()
        
        train_accuracy = (correct_train / total_train)*100
        avg_loss = running_loss / len(train_dataloader)

        print(f"phase {phase+1} | epoch [{epoch+1}/{epochsPerPhase}]",
              f"loss = {avg_loss:.4f}, train acc = {train_accuracy:.2f}%")

        checkpoint_filename = modelSaver_path+f"/{modelVariation}_phase{phase+1}_epoch{epoch+1}.pth"
        torch.save(efficientnetmodel.state_dict(), checkpoint_filename)
        print(f"model saved to {checkpoint_filename}")

        # checkpoint_filename2 = f"/efficientnet_phase{phase_idx}_epoch{epoch+1}.pkl"
        # with open(modelSaver_path+checkpoint_filename2,'wb') as file:
        #     pickle.dump(efficientnetmodel.state_dict(),file)

        break
                
    print("-- Evaluation Mode --")
    efficientnetmodel.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for vBatch, (inputs, labels) in enumerate(valid_dataloader):
            inputs, labels = inputs.to(device), labels.float().to(device)
            outputs = efficientnetmodel(inputs).squeeze()
            _, predicted = torch.max(outputs,1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            print(f"Epoch: {epoch+1}".ljust(10), f"Batch: {vBatch+1}".ljust(12), f"Accuracy: {((correct/total)*100):.2f}%",end="\r")
            if vBatch > 40:
                break


    val_accuracy = (correct / total)*100
    print(f"phase {phase+1} val acc: {val_accuracy:.2f}%\n")


Save path: modelLogs/2025-03-27 18-12
Layers unfrozen: ['classifier']
Epoch: 1   Batch: 48    Accuracy: 51.99%

KeyboardInterrupt: 