In [20]:
import os
import glob
import cv2
import numpy as np
from tqdm import tqdm
import pandas as pd
import yaml
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms as T




In [21]:
# check torch cuda is available
print("Torch CUDA available:", torch.cuda.is_available())

Torch CUDA available: True


In [22]:
!ls /mnt/d/object_detect_tracking/camera-viewer/data/brain_tumor_copy


README.MD  axial_t1wce_2_class	coronal_t1wce_2_class  sagittal_t1wce_2_class


In [23]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms as T

class YoloDataset(Dataset):
    def __init__(self, data_dir, img_size=320, transform=None, mode='train'):
        self.data_dir = data_dir
        self.img_size = img_size
        self.transform = transform
        self.mode = mode

        self.images = []
        self.labels = []

        self._prepare_dataset()

    def _check_and_clean(self, img_dir, label_dir):
        for img_name in os.listdir(img_dir):
            if img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                label_name = os.path.splitext(img_name)[0] + '.txt'
                if not os.path.exists(os.path.join(label_dir, label_name)):
                    os.remove(os.path.join(img_dir, img_name))

    def _load_images_and_labels(self, img_dir, label_dir):
        for img_name in os.listdir(img_dir):
            if img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                img_path = os.path.join(img_dir, img_name)
                label_path = os.path.join(label_dir, os.path.splitext(img_name)[0] + '.txt')

                img = Image.open(img_path).convert('RGB')
                if self.transform:
                    img = self.transform(img)

                if os.path.exists(label_path):
                    with open(label_path, 'r') as f:
                        labels = [list(map(float, line.strip().split())) for line in f if line.strip()]
                else:
                    labels = []

                self.images.append(img)
                self.labels.append(torch.tensor(labels, dtype=torch.float32))

    def _prepare_dataset(self):
        image_dir = os.path.join(self.data_dir, 'images', self.mode)
        label_dir = os.path.join(self.data_dir, 'labels', self.mode)

        self._check_and_clean(image_dir, label_dir)
        self._load_images_and_labels(image_dir, label_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

# Example usage
transform = T.Compose([
    T.Resize((320, 320)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

data_dir = "/mnt/d/object_detect_tracking/camera-viewer/data/brain_tumor_copy/axial_t1wce_2_class"
train_dataset = YoloDataset(data_dir, img_size=320, transform=transform, mode='train')
test_dataset = YoloDataset(data_dir, img_size=320, transform=transform, mode='test')

# DataLoader ready for training:
from torch.utils.data import DataLoader
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=4)




In [16]:
print("Number of training samples:", len(train_dataset))
print("Number of testing samples:", len(test_dataset))


Number of training samples: 296
Number of testing samples: 75


# SIoU Loss

In [24]:

# import torch
import torch.nn as nn
# import numpy as np
 
 
class SIoU(nn.Module):
        # SIoU Loss https://arxiv.org/pdf/2205.12740.pdf
    def __init__(self, x1y1x2y2=True, eps=1e-7):
        super(SIoU, self).__init__()
        self.x1y1x2y2 = x1y1x2y2
        self.eps = eps
    
    
            
    def forward(self, box1, box2):
    
        # Get the coordinates of bounding boxes
        if self.x1y1x2y2:  # x1, y1, x2, y2 = box1
            b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
            b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
        else:  # transform from xywh to xyxy
            b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
            b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
            b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
            b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
    
    
        # Intersection area
        inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
                (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
    
    
        # Union Area
        w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + self.eps
        w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + self.eps
        union = w1 * h1 + w2 * h2 - inter + self.eps
    
        # IoU value of the bounding boxes
        iou = inter / union
        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
        s_cw = (b2_x1 + b2_x2 - b1_x1 - b1_x2) * 0.5
        s_ch = (b2_y1 + b2_y2 - b1_y1 - b1_y2) * 0.5
        sigma = torch.pow(s_cw ** 2 + s_ch ** 2, 0.5) + self.eps
        sin_alpha_1 = torch.abs(s_cw) / sigma
        sin_alpha_2 = torch.abs(s_ch) / sigma
        threshold = pow(2, 0.5) / 2
        sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1)
            
        # Angle Cost
        angle_cost = 1 - 2 * torch.pow( torch.sin(torch.arcsin(sin_alpha) - np.pi/4), 2)
            
        # Distance Cost
        rho_x = (s_cw / (cw + self.eps)) ** 2
        rho_y = (s_ch / (ch + self.eps)) ** 2
        gamma = 2 - angle_cost
        distance_cost = 2 - torch.exp(gamma * rho_x) - torch.exp(gamma * rho_y)
            
        # Shape Cost
        omiga_w = torch.abs(w1 - w2) / torch.max(w1, w2)
        omiga_h = torch.abs(h1 - h2) / torch.max(h1, h2)


        print("omiga_w:", omiga_w)
        print("omiga_h:", omiga_h)
        print("distance_cost:", distance_cost)
        print("angle_cost:", angle_cost)
        shape_cost = torch.pow(1 - torch.exp(-1 * omiga_w), 4) + torch.pow(1 - torch.exp(-1 * omiga_h), 4)
        print("shape_cost:", shape_cost)

        return 1 - (iou + 0.5 * (distance_cost + shape_cost))

In [19]:
siou = SIoU(x1y1x2y2=True)

box1 = torch.tensor([50, 50, 150, 150], dtype=torch.float32)  # x1, y1, x2, y2
box2 = torch.tensor([60, 60, 140, 140], dtype=torch.float32)

loss = siou(box1, box2)
print(loss)
print("SIoU Loss:", loss.item())


omiga_w: tensor(0.2000)
omiga_h: tensor(0.2000)
distance_cost: tensor(0.)
angle_cost: tensor(5.9605e-08)
shape_cost: tensor(0.0022)
tensor(0.3589)
SIoU Loss: 0.3589203357696533


# Show  model infomation


In [26]:

from torchvision import transforms
from model.yolo.yolo_net import YOLONet

model = YOLONet(num_classes=2, num_anchors=9)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

for param in model.parameters():
    param.requires_grad = True

# show size of the model
print("Model size:", sum(p.numel() for p in model.parameters() if p.requires_grad))
print("Model Memory Size (MB):", sum(p.numel() for p in model.parameters() if p.requires_grad) * 4 / (1024 * 1024))





Model size: 25786365
Model Memory Size (MB): 98.3671760559082


In [None]:
# train model

