# SuperAI Season 4 - Level 2 Hackathon - Liver_Ultrasound_Detection_Classification_Lightning

## Set Annotation files for classification

In [1]:
%pwd
%cd ./SuperAI_SS4_Level_2/Hack_6_Liver_Ultrasound_Detection

/lustrefs/disk/home/ai4122/SuperAI_SS4_Recap/SuperAI_SS4_Level_2/Hack_6_Liver_Ultrasound_Detection


In [2]:
import glob
import pandas as pd
import numpy as np
import os
from PIL import Image
from tqdm.notebook import tqdm

In [None]:
annotation_for_train = {
    'file_name' : [] ,
    'width' : [] ,
    'height' : [] ,
    'class' : []
}
annotation_for_validation = {
    'file_name' : [] ,
    'width' : [] ,
    'height' : [] ,
    'class' : []
}
annotation_for_test = {
    'file_name' : [] ,
    'width' : [] ,
    'height' : [] ,
    'class' : []
}

In [None]:
train_labels = os.listdir('./datasets/train/labels')
validation_labels = os.listdir('./datasets/validation/labels')


In [None]:
for idx , file_path in enumerate(tqdm(glob.glob('./datasets/train/images/*.jpg'))) :
    
    image = Image.open(file_path)
    image = np.array(image)
    image_shape = image.shape
    
    file_name = file_path.split('\\')[-1]
    
    annotation_for_train['file_name'].append(file_name)
    annotation_for_train['width'].append(image_shape[0])
    annotation_for_train['height'].append(image_shape[1])
    
    name = file_name.split('.')[0]
    class_ = 1 if f'{name}.txt' in train_labels else 0
    
    annotation_for_train['class'].append(class_)

In [None]:
for idx , file_path in enumerate(tqdm(glob.glob('./datasets/validation/images/*.jpg'))) :
    
    image = Image.open(file_path)
    image = np.array(image)
    image_shape = image.shape
    
    file_name = file_path.split('\\')[-1]
    
    annotation_for_validation['file_name'].append(file_name)
    annotation_for_validation['width'].append(image_shape[0])
    annotation_for_validation['height'].append(image_shape[1])
    
    name = file_name.split('.')[0]
    class_ = 1 if f'{name}.txt' in validation_labels else 0
    
    annotation_for_validation['class'].append(class_)

In [None]:
for idx , file_path in enumerate(tqdm(glob.glob('./datasets/test/images/*.jpg'))) :
    
    image = Image.open(file_path)
    image = np.array(image)
    image_shape = image.shape
    
    file_name = file_path.split('\\')[-1]
    
    annotation_for_test['file_name'].append(file_name)
    annotation_for_test['width'].append(image_shape[0])
    annotation_for_test['height'].append(image_shape[1])
    
    name = file_name.split('.')[0]
    class_ = 0
    
    annotation_for_test['class'].append(class_)

In [None]:
annotation_for_train_df = pd.DataFrame.from_dict(annotation_for_train)
annotation_for_validation_df = pd.DataFrame.from_dict(annotation_for_validation)
annotation_for_test_df = pd.DataFrame.from_dict(annotation_for_test)

In [None]:
annotation_for_train_df

In [None]:
annotation_for_train_df['class'].value_counts()

In [None]:
annotation_for_validation_df

In [None]:
annotation_for_validation_df['class'].value_counts()

In [None]:
annotation_for_test_df

In [None]:
annotation_for_train_df.to_csv('./datasets/annotations/annotation_for_train_datasets.csv' , index = False)
annotation_for_validation_df.to_csv('./datasets/annotations/annotation_for_validation_datasets.csv' , index = False)
annotation_for_test_df.to_csv('./datasets/annotations/annotation_for_test_datasets.csv' , index = False)

## Dataloader for lesion (1) , non lesion (0) dataset

In [3]:
import torch
from torch.utils.data import Dataset
from torchvision.transforms import Lambda
from torch.utils.data import DataLoader


In [4]:
import timm
import timm.optim
import timm.scheduler
from PIL import Image
import pandas as pd

In [82]:
class lesion_nonlesion_dataset (Dataset) :
    
    def __init__ (self , dataset_path, annotation_path , transform = None , target_transform = None) :
        
        super().__init__() 
        
        self.dataset_path = dataset_path
        self.annotation_path = annotation_path
        self.annotation_df = pd.read_csv(annotation_path)
        
        self.transform = transform
        self.target_transform = target_transform
        
    def __len__ (self)  :
        
        return len(self.annotation_df)
    
    def __getitem__ (self , idx) :
        
        file_name = self.annotation_df.iloc[idx]['file_name']
        
        image = Image.open( f'{self.dataset_path}/{file_name}' )
        image = image.convert('L')
        image = np.array(image)
        image = np.stack([image , image , image] , axis = 2)
        image = Image.fromarray(image)
        
        label = self.annotation_df.iloc[idx]['class']

        if self.transform :
            
            image = self.transform(image)
        
        if self.target_transform :
            
            label = self.target_transform(label)
        
        
        
        return image , label

In [83]:
train_data_path = './datasets/train/images'
validation_data_path = './datasets/validation/images'
test_data_path = './datasets/test/images'

In [84]:
train_annotation_path = './datasets/annotations/annotation_for_train_datasets.csv'
validation_annotation_path = './datasets/annotations/annotation_for_validation_datasets.csv'
test_annotation_path = './datasets/annotations/annotation_for_test_datasets.csv'

In [85]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "timm/maxvit_tiny_tf_224.in1k"
pretrained_model = timm.create_model(model_name, pretrained = False , num_classes = 1 , checkpoint_path='./models/base_maxvit_224.pth').to(device)

In [86]:
pretrained_model

MaxxVit(
  (stem): Stem(
    (conv1): Conv2dSame(3, 64, kernel_size=(3, 3), stride=(2, 2))
    (norm1): BatchNormAct2d(
      64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): GELUTanh()
    )
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (stages): Sequential(
    (0): MaxxVitStage(
      (blocks): Sequential(
        (0): MaxxVitBlock(
          (conv): MbConvBlock(
            (shortcut): Downsample2d(
              (pool): AvgPool2dSame(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0))
              (expand): Identity()
            )
            (pre_norm): BatchNormAct2d(
              64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
              (drop): Identity()
              (act): Identity()
            )
            (down): Identity()
            (conv1_1x1): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (norm1): BatchNormAct2d(
      

In [87]:
device

device(type='cuda')

In [88]:
data_config = timm.data.resolve_model_data_config(pretrained_model)
transform = timm.data.create_transform(** data_config , is_training = True)
target_transform = Lambda(lambda y: torch.tensor(y, dtype=torch.float32))

In [89]:
train_dataset = lesion_nonlesion_dataset(train_data_path , train_annotation_path , transform = transform , target_transform = target_transform)
validation_dataset = lesion_nonlesion_dataset(validation_data_path , validation_annotation_path , transform = transform , target_transform = target_transform)
test_dataset = lesion_nonlesion_dataset(test_data_path , test_annotation_path , transform = transform , target_transform = target_transform)

In [90]:
train_dataloader = DataLoader(train_dataset , batch_size = 16 , shuffle = True , num_workers = 16 , persistent_workers=True)
validation_dataloader = DataLoader(validation_dataset , batch_size = 16 , shuffle = False, num_workers = 16, persistent_workers=True)
test_dataloader = DataLoader(test_dataset , batch_size = 16 , shuffle = False, num_workers = 16, persistent_workers=True)

## ⚡ MaxVit - Torch Lightning ⚡

In [91]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader , random_split
import lightning as L
from torch.utils.data import Dataset
import pandas as pd
import torchmetrics

In [92]:
class lesion_nonlesion_classifier(L.LightningModule):
    
    def __init__(self , pretrained_model):
        
        super().__init__()
        
        self.pretrained_model  = pretrained_model
        
        self.train_accuracy = torchmetrics.Accuracy(task='binary')
        self.val_accuracy = torchmetrics.Accuracy(task='binary')
        self.test_accuracy = torchmetrics.Accuracy(task='binary')
        

    def forward(self , x) :
        
        y_hat = self.pretrained_model(x)
        
        return  y_hat
    
    def training_step(self, batch, batch_idx):
        
        x , y = batch
        
        y_hat = self(x.to(device))
        
        loss = F.binary_cross_entropy_with_logits(y_hat.squeeze() , y.to(device))
        acc = self.train_accuracy(y_hat.squeeze(), y)
        
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True)
        
        return loss
    
    def validation_step(self, batch, batch_idx):
        
        x , y = batch
        
        y_hat = self(x)
        
        val_loss = F.binary_cross_entropy_with_logits(y_hat.squeeze(), y.to(device))
        val_acc = self.val_accuracy(y_hat.squeeze(), y.to(device))
        
        self.log('val_loss', val_loss, on_step = True, on_epoch = True, prog_bar = True)
        self.log('val_acc', val_acc, on_step = True, on_epoch = True, prog_bar = True)
        
    def test_step(self, batch, batch_idx):
        
        x , y = batch
        y_hat = self(x)
        
        test_loss = F.binary_cross_entropy_with_logits(y_hat.squeeze() , y)
        test_acc = self.test_accuracy(y_hat.squeeze(), y.to(device))
        
        self.log('test_loss', test_loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log('test_acc', test_acc, on_step=True, on_epoch=True, prog_bar=True)

    def configure_optimizers(self):
        
        optimizer = torch.optim.AdamW(self.parameters(), lr = 1e-4)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 10, gamma=0.1)
        
        return [optimizer], [scheduler]

In [93]:
lesion_nonlesion_classifier_model = lesion_nonlesion_classifier(pretrained_model)

In [94]:
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import ModelCheckpoint

In [95]:
checkpoint_callback = ModelCheckpoint(dirpath='./models'  , filename='maxvit_224_{epoch}-{val_acc:.2f}')

In [96]:
trainer = L.Trainer(max_epochs = 50 , callbacks=[checkpoint_callback])
trainer.fit(model = lesion_nonlesion_classifier_model, train_dataloaders = train_dataloader, val_dataloaders = validation_dataloader )

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/lustrefs/disk/home/ai4122/SuperAI_SS4_Recap/myenv/lib/python3.10/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:653: Checkpoint directory /lustrefs/disk/home/ai4122/SuperAI_SS4_Recap/SuperAI_SS4_Level_2/Hack_6_Liver_Ultrasound_Detection/models exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name             | Type           | Params
----------------------------------------------------
0 | pretrained_model | MaxxVit        | 30.4 M
1 | train_accuracy   | BinaryAccuracy | 0     
2 | val_accuracy     | BinaryAccuracy | 0     
3 | test_accuracy    | BinaryAccuracy | 0     
----------------------------------------------------
30.4 M    Trainable params
0         Non-trainable params
30.4 M    Total params
121.616   Total estimated model params size (MB)
SLURM auto-requeueing enabled. Setting signal han

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.


In [97]:
model = lesion_nonlesion_classifier.load_from_checkpoint("./models/maxvit_epoch=24-val_acc=0.81.ckpt" , pretrained_model = pretrained_model )

FileNotFoundError: [Errno 2] No such file or directory: '/lustrefs/disk/home/ai4122/SuperAI_SS4_Recap/SuperAI_SS4_Level_2/Hack_6_Liver_Ultrasound_Detection/models/maxvit_epoch=24-val_acc=0.81.ckpt'

In [None]:
model.to(device)