# SuperAI Season 4 - Level 2 Hackathon - Liver_Ultrasound_Detection_Classification_Lightning

## Set Annotation files for classification

In [1]:
%cd SuperAI_SS4_Level_2/Hack_6_Liver_Ultrasound_Detection

/lustrefs/disk/home/ai4122/SuperAI_SS4_Recap/SuperAI_SS4_Level_2/Hack_6_Liver_Ultrasound_Detection


In [7]:
import glob
import pandas as pd
import numpy as np
import os
from PIL import Image
from tqdm.notebook import tqdm

In [8]:
annotation_for_train = {
    'file_name' : [] ,
    'width' : [] ,
    'height' : [] ,
    'class' : []
}
annotation_for_validation = {
    'file_name' : [] ,
    'width' : [] ,
    'height' : [] ,
    'class' : []
}
annotation_for_test = {
    'file_name' : [] ,
    'width' : [] ,
    'height' : [] ,
    'class' : []
}

In [12]:
train_labels = os.listdir('./datasets/train/labels')
validation_labels = os.listdir('./datasets/validation/labels')


In [13]:
for idx , file_path in enumerate(tqdm(glob.glob('./datasets/train/images/*.jpg'))) :
    
    image = Image.open(file_path)
    image = np.array(image)
    image_shape = image.shape
    
    file_name = file_path.split('\\')[-1]
    
    annotation_for_train['file_name'].append(file_name)
    annotation_for_train['width'].append(image_shape[0])
    annotation_for_train['height'].append(image_shape[1])
    
    name = file_name.split('.')[0]
    class_ = 1 if f'{name}.txt' in train_labels else 0
    
    annotation_for_train['class'].append(class_)

  0%|          | 0/14448 [00:00<?, ?it/s]

In [14]:
for idx , file_path in enumerate(tqdm(glob.glob('./datasets/validation/images/*.jpg'))) :
    
    image = Image.open(file_path)
    image = np.array(image)
    image_shape = image.shape
    
    file_name = file_path.split('\\')[-1]
    
    annotation_for_validation['file_name'].append(file_name)
    annotation_for_validation['width'].append(image_shape[0])
    annotation_for_validation['height'].append(image_shape[1])
    
    name = file_name.split('.')[0]
    class_ = 1 if f'{name}.txt' in validation_labels else 0
    
    annotation_for_validation['class'].append(class_)

  0%|          | 0/2812 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
for idx , file_path in enumerate(tqdm(glob.glob('./datasets/test/images/*.jpg'))) :
    
    image = Image.open(file_path)
    image = np.array(image)
    image_shape = image.shape
    
    file_name = file_path.split('\\')[-1]
    
    annotation_for_test['file_name'].append(file_name)
    annotation_for_test['width'].append(image_shape[0])
    annotation_for_test['height'].append(image_shape[1])
    
    name = file_name.split('.')[0]
    class_ = 0
    
    annotation_for_test['class'].append(class_)

  0%|          | 0/5153 [00:00<?, ?it/s]

In [None]:
annotation_for_train_df = pd.DataFrame.from_dict(annotation_for_train)
annotation_for_validation_df = pd.DataFrame.from_dict(annotation_for_validation)
annotation_for_test_df = pd.DataFrame.from_dict(annotation_for_test)

In [None]:
annotation_for_train_df

Unnamed: 0,file_name,width,height,class
0,100010.jpg,810,1080,1
1,100022.jpg,810,1080,1
2,100024.jpg,540,720,1
3,100034.jpg,810,1080,1
4,100047.jpg,405,540,0
...,...,...,...,...
15198,99975.jpg,540,720,0
15199,99977.jpg,810,1080,1
15200,99989.jpg,720,960,0
15201,99991.jpg,810,1080,0


In [None]:
annotation_for_train_df['class'].value_counts()

class
0    7607
1    7596
Name: count, dtype: int64

In [None]:
annotation_for_validation_df

Unnamed: 0,file_name,width,height,class
0,100.jpg,3024,4032,1
1,100006.jpg,810,1080,0
2,1001.jpg,3024,4032,1
3,100103.jpg,405,540,1
4,100171.jpg,810,1080,0
...,...,...,...,...
2807,40654.jpg,810,1080,1
2808,40655.jpg,540,720,0
2809,4067.jpg,3024,4032,0
2810,4068.jpg,3024,4032,0


In [None]:
annotation_for_validation_df['class'].value_counts()

class
0    1457
1    1355
Name: count, dtype: int64

In [None]:
annotation_for_test_df

Unnamed: 0,file_name,width,height,class
0,1.jpg,3024,4032,0
1,10.jpg,3024,4032,0
2,1000.jpg,3024,4032,0
3,10001.jpg,810,1080,0
4,100077.jpg,540,720,0
...,...,...,...,...
5148,998.jpg,3024,4032,0
5149,99805.jpg,3024,4032,0
5150,9982.jpg,3024,4032,0
5151,999.jpg,3024,4032,0


In [None]:
annotation_for_train_df.to_csv('./datasets/annotations/annotation_for_train_datasets.csv' , index = False)
annotation_for_validation_df.to_csv('./datasets/annotations/annotation_for_validation_datasets.csv' , index = False)
annotation_for_test_df.to_csv('./datasets/annotations/annotation_for_test_datasets.csv' , index = False)

## Dataloader for lesion (1) , non lesion (0) dataset

In [2]:
import torch
from torch.utils.data import Dataset
from torchvision.transforms import Lambda
from torch.utils.data import DataLoader


In [3]:
import timm
import timm.optim
import timm.scheduler
from PIL import Image
import pandas as pd

In [4]:
class lesion_nonlesion_dataset (Dataset) :
    
    def __init__ (self , dataset_path, annotation_path , transform = None , target_transform = None) :
        
        super().__init__() 
        
        self.dataset_path = dataset_path
        self.annotation_path = annotation_path
        self.annotation_df = pd.read_csv(annotation_path)
        
        self.transform = transform
        self.target_transform = target_transform
        
    def __len__ (self)  :
        
        return len(self.annotation_df)
    
    def __getitem__ (self , idx) :
        
        file_name = self.annotation_df.iloc[idx]['file_name']
        
        image = Image.open( f'{self.dataset_path}/{file_name}' )
        image = image.convert('RGB')

        label = self.annotation_df.iloc[idx]['class']
        

        if self.transform :
            
            image = self.transform(image)
        
        if self.target_transform :
            
            label = self.target_transform(label)
            
        return image , label

In [5]:
train_data_path = './datasets/train/images'
validation_data_path = './datasets/validation/images'
test_data_path = './datasets/test/images'

In [6]:
train_annotation_path = './datasets/annotations/annotation_for_train_datasets.csv'
validation_annotation_path = './datasets/annotations/annotation_for_validation_datasets.csv'
test_annotation_path = './datasets/annotations/annotation_for_test_datasets.csv'

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "timm/maxvit_tiny_tf_224.in1k"
model = timm.create_model(model_name, pretrained = False, num_classes = 1 , checkpoint_path= './models/base_maxvit.pth')

In [8]:
device

device(type='cuda')

In [9]:
data_config = timm.data.resolve_model_data_config(model)
transform = timm.data.create_transform(** data_config , is_training = True)
target_transform = Lambda(lambda y: torch.tensor(y, dtype=torch.float32))

In [10]:
train_dataset = lesion_nonlesion_dataset(train_data_path , train_annotation_path , transform = transform , target_transform = target_transform)
validation_dataset = lesion_nonlesion_dataset(validation_data_path , validation_annotation_path , transform = transform , target_transform = target_transform)
test_dataset = lesion_nonlesion_dataset(test_data_path , test_annotation_path , transform = transform , target_transform = target_transform)

In [11]:
train_dataloader = DataLoader(train_dataset , batch_size = 16 , shuffle = True , num_workers = 16 , persistent_workers=True)
validation_dataloader = DataLoader(validation_dataset , batch_size = 16 , shuffle = False, num_workers = 16, persistent_workers=True)
test_dataloader = DataLoader(test_dataset , batch_size = 16 , shuffle = False, num_workers = 16, persistent_workers=True)

In [12]:
# for images , labels in train_dataloader :
    
#     print(images.shape, labels.shape)
    
#     break

## ⚡ MaxVit - Torch Lightning ⚡

In [13]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader , random_split
import lightning as L
from torch.utils.data import Dataset
import pandas as pd
import torchmetrics

In [14]:
class lesion_nonlesion_classifier(L.LightningModule):
    
    def __init__(self , pretrained_model):
        
        super().__init__()
        
        self.pretrained_model  = pretrained_model
        
        self.train_accuracy = torchmetrics.Accuracy(task='binary')
        self.val_accuracy = torchmetrics.Accuracy(task='binary')
        self.test_accuracy = torchmetrics.Accuracy(task='binary')
        

    def forward(self , x) :
        
        y_hat = self.pretrained_model(x)
        
        return  y_hat
    
    def training_step(self, batch, batch_idx):
        
        x , y = batch
        
        y_hat = self(x.to(device))
        
        loss = F.binary_cross_entropy_with_logits(y_hat.squeeze() , y.to(device))
        acc = self.train_accuracy(y_hat.squeeze(), y)
        
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True)
        
        return loss
    
    def validation_step(self, batch, batch_idx):
        
        x , y = batch
        
        y_hat = self(x)
        
        val_loss = F.binary_cross_entropy_with_logits(y_hat.squeeze(), y.to(device))
        val_acc = self.val_accuracy(y_hat.squeeze(), y.to(device))
        
        self.log('val_loss', val_loss, on_step = True, on_epoch = True, prog_bar = True)
        self.log('val_acc', val_acc, on_step = True, on_epoch = True, prog_bar = True)
        
    def test_step(self, batch, batch_idx):
        
        x , y = batch
        y_hat = self(x)
        
        test_loss = F.binary_cross_entropy_with_logits(y_hat.squeeze() , y)
        test_acc = self.test_accuracy(y_hat.squeeze(), y.to(device))
        
        self.log('test_loss', test_loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log('test_acc', test_acc, on_step=True, on_epoch=True, prog_bar=True)

    def configure_optimizers(self):
        
        optimizer = torch.optim.AdamW(self.parameters(), lr = 1e-4)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 10, gamma=0.1)
        
        return [optimizer], [scheduler]

In [15]:
lesion_nonlesion_classifier_model = lesion_nonlesion_classifier(model)

In [16]:
trainer = L.Trainer(max_epochs = 25)
trainer.fit(model = lesion_nonlesion_classifier_model, train_dataloaders = train_dataloader, val_dataloaders = validation_dataloader )

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/lustrefs/disk/home/ai4122/SuperAI_SS4_Recap/env/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

/lustrefs/disk/home/ai4122/SuperAI_SS4_Recap/env/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...
