In [2]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
from PIL import Image
import torch.nn as nn
import numpy as np



# Check CUDA
print("Torch CUDA available:", torch.cuda.is_available())


Torch CUDA available: True


In [3]:
def get_labels( label_path):
    labels = []
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) == 5:
                class_id, cx, cy, w, h = map(float, parts)
                labels.append([int(class_id), cx, cy, w, h])
    return labels

path = r"D:\object_detect_tracking\data\brain_tumor_copy\axial_t1wce_2_class\labels\test\00018_109.txt"
labels = get_labels(path)
print(labels)

[[0, 0.611502, 0.534624, 0.15493, 0.078638], [0, 0.602113, 0.671362, 0.178404, 0.161972]]


# ---------------------- Dataset ----------------------------------

In [4]:
import os
from torch.utils.data import Dataset
from PIL import Image

class YoloDataset(Dataset):
    def __init__(self, data_dir, img_size=320, transform=None, mode='train', split_ratio_test=0.8):
        self.data_dir = data_dir
        self.img_size = img_size
        self.transform = transform
        self.mode = mode
        self.split_ratio_test = split_ratio_test

        self.train_df = []
        self.val_df = []
        self.test_df = []

        self._filter_and_clean(data_dir)
        self._prepare_dataset()

    def _clean_images_without_labels(self, images_path, labels_path):
        label_files = set(os.listdir(labels_path))
        for img_file in os.listdir(images_path):
            if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                label_name = os.path.splitext(img_file)[0] + '.txt'
                if label_name not in label_files:
                    os.remove(os.path.join(images_path, img_file))

    def _clean_labels_without_images(self, labels_path, images_path):
        image_files = set(os.listdir(images_path))
        for label_file in os.listdir(labels_path):
            if label_file.endswith('.txt'):
                img_name = os.path.splitext(label_file)[0]
                found = any(
                    (img_name + ext) in image_files
                    for ext in ['.jpg', '.jpeg', '.png']
                )
                if not found:
                    os.remove(os.path.join(labels_path, label_file))

    def _filter_and_clean(self, dir_path):
        for folder_name in os.listdir(dir_path):
            folder_path = os.path.join(dir_path, folder_name)
            if not os.path.isdir(folder_path):
                continue

            if "images" in folder_name:
                for subset in os.listdir(folder_path):
                    images_path = os.path.join(folder_path, subset)
                    labels_path = os.path.join(dir_path, folder_name.replace('images', 'labels'), subset)
                    if os.path.exists(labels_path):
                        self._clean_images_without_labels(images_path, labels_path)

            elif "labels" in folder_name:
                for subset in os.listdir(folder_path):
                    labels_path = os.path.join(folder_path, subset)
                    images_path = os.path.join(dir_path, folder_name.replace('labels', 'images'), subset)
                    if os.path.exists(images_path):
                        self._clean_labels_without_images(labels_path, images_path)

    def _transform_image(self, image_path):
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image

    def _get_labels(self, label_path):
        labels = []
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) == 5:
                    class_id, cx, cy, w, h = map(float, parts)
                    labels.append([int(class_id), cx, cy, w, h])
        return labels

    def _prepare_dataset(self):
        image_base = os.path.join(self.data_dir, 'images')
        label_base = os.path.join(self.data_dir, 'labels')

        for subset in ['train', 'test']:
            img_folder = os.path.join(image_base, subset)
            lbl_folder = os.path.join(label_base, subset)

            image_files = [f for f in os.listdir(img_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            if subset == 'test':
                val_len = int(len(image_files) * self.split_ratio_test)
                val_files = image_files[:val_len]
                test_files = image_files[val_len:]
            else:
                val_files, test_files = [], []

            for img_file in image_files:
                img_path = os.path.join(img_folder, img_file)
                lbl_path = os.path.join(lbl_folder, os.path.splitext(img_file)[0] + '.txt')
                if not os.path.exists(lbl_path):
                    continue
                img = self._transform_image(img_path)
                lbl = self._get_labels(lbl_path)

                if subset == 'train':
                    self.train_df.append((img, lbl))
                elif subset == 'test':
                    if img_file in val_files:
                        self.val_df.append((img, lbl))
                    else:
                        self.test_df.append((img, lbl))

    def __len__(self):
        if self.mode == 'train':
            return len(self.train_df)
        elif self.mode == 'val':
            return len(self.val_df)
        elif self.mode == 'test':
            return len(self.test_df)
        return 0

    def __getitem__(self, idx):
        if self.mode == 'train':
            return self.train_df[idx]
        elif self.mode == 'val':
            return self.val_df[idx]
        elif self.mode == 'test':
            return self.test_df[idx]

    def get_df(self):
        return self.train_df, self.val_df, self.test_df


In [5]:

# Transform and DataLoader setup
transform = T.Compose([
    T.Resize((320, 320)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

data_dir = r"D:/object_detect_tracking/data/brain_tumor_copy/axial_t1wce_2_class"
load_yolo_data  = YoloDataset(data_dir, transform, split_ratio_test=0.2)
train_df , val_df, test_df = load_yolo_data.get_df()






In [9]:

print(len(train_df), len(val_df), len(test_df))
image , label = train_df[0]
print (label)




296 15 60
[[0, 0.428991, 0.361502, 0.144366, 0.150235]]


# ---------------------- SIoU Loss --------------------------

In [None]:



import torch
import torch.nn as nn

class SIoU(nn.Module):
    def __init__(self, eps=1e-7):
        super(SIoU, self).__init__()
        self.eps = eps

    def forward(self, box1, box2):
        # box1, box2: [N, 4] or [4]
        # YOLO normalized: [cx, cy, w, h]
        if box1.ndim == 1:
            box1 = box1.unsqueeze(0)
        if box2.ndim == 1:
            box2 = box2.unsqueeze(0)

        b1_x1 = box1[:, 0] - box1[:, 2] / 2
        b1_y1 = box1[:, 1] - box1[:, 3] / 2
        b1_x2 = box1[:, 0] + box1[:, 2] / 2
        b1_y2 = box1[:, 1] + box1[:, 3] / 2

        b2_x1 = box2[:, 0] - box2[:, 2] / 2
        b2_y1 = box2[:, 1] - box2[:, 3] / 2
        b2_x2 = box2[:, 0] + box2[:, 2] / 2
        b2_y2 = box2[:, 1] + box2[:, 3] / 2

        inter_w = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0)
        inter_h = (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
        inter = inter_w * inter_h

        w1 = b1_x2 - b1_x1 + self.eps
        h1 = b1_y2 - b1_y1 + self.eps
        w2 = b2_x2 - b2_x1 + self.eps
        h2 = b2_y2 - b2_y1 + self.eps

        union = w1 * h1 + w2 * h2 - inter + self.eps
        iou = inter / union

        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) + self.eps
        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) + self.eps
        s_cw = (b2_x1 + b2_x2 - b1_x1 - b1_x2) * 0.5
        s_ch = (b2_y1 + b2_y2 - b1_y1 - b1_y2) * 0.5
        sigma = torch.sqrt(s_cw ** 2 + s_ch ** 2) + self.eps

        sin_alpha_1 = torch.abs(s_cw) / sigma
        sin_alpha_2 = torch.abs(s_ch) / sigma
        threshold = torch.sqrt(torch.tensor(2.0)) / 2
        sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1)

        angle_cost = 1 - 2 * torch.pow(torch.sin(torch.arcsin(sin_alpha) - torch.pi / 4), 2)
        rho_x = (s_cw / cw) ** 2
        rho_y = (s_ch / ch) ** 2
        gamma = 2 - angle_cost
        distance_cost = 2 - torch.exp(-gamma * rho_x) - torch.exp(-gamma * rho_y)

        omiga_w = torch.abs(w1 - w2) / torch.max(w1, w2)
        omiga_h = torch.abs(h1 - h2) / torch.max(h1, h2)
        shape_cost = torch.pow(1 - torch.exp(-omiga_w), 4) + torch.pow(1 - torch.exp(-omiga_h), 4)

        siou = iou - 0.5 * (distance_cost + shape_cost)

        return 1 - siou.mean()



# Example SIoU loss usage

In [None]:
siou = SIoU()

# Example (YOLO normalized):
# box1 = [cx, cy, w, h]
# box2 = [cx, cy, w, h]
box1 = torch.tensor([0.5, 0.5, 0.2, 0.2])
box2 = torch.tensor([0.52, 0.52, 0.22, 0.22])

loss = siou(box1, box2)
print(loss.item())





0.3173433542251587


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from model.yolo.yolo_net import YOLONet

# SIoU loss implementation
class SIoU(nn.Module):
    def __init__(self, eps=1e-7):
        super().__init__()
        self.eps = eps

    def forward(self, box1: torch.Tensor, box2 : torch.Tensor)-> float:
        if box1.ndim == 1:
            box1 = box1.unsqueeze(0)
        if box2.ndim == 1:
            box2 = box2.unsqueeze(0)

        b1_x1 = box1[:, 0] - box1[:, 2] / 2
        b1_y1 = box1[:, 1] - box1[:, 3] / 2
        b1_x2 = box1[:, 0] + box1[:, 2] / 2
        b1_y2 = box1[:, 1] + box1[:, 3] / 2

        b2_x1 = box2[:, 0] - box2[:, 2] / 2
        b2_y1 = box2[:, 1] - box2[:, 3] / 2
        b2_x2 = box2[:, 0] + box2[:, 2] / 2
        b2_y2 = box2[:, 1] + box2[:, 3] / 2

        inter_w = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0)
        inter_h = (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
        inter = inter_w * inter_h

        w1 = b1_x2 - b1_x1 + self.eps
        h1 = b1_y2 - b1_y1 + self.eps
        w2 = b2_x2 - b2_x1 + self.eps
        h2 = b2_y2 - b2_y1 + self.eps

        union = w1 * h1 + w2 * h2 - inter + self.eps
        iou = inter / union

        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) + self.eps
        ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) + self.eps
        s_cw = (b2_x1 + b2_x2 - b1_x1 - b1_x2) * 0.5
        s_ch = (b2_y1 + b2_y2 - b1_y1 - b1_y2) * 0.5
        sigma = torch.sqrt(s_cw ** 2 + s_ch ** 2) + self.eps

        sin_alpha_1 = torch.abs(s_cw) / sigma
        sin_alpha_2 = torch.abs(s_ch) / sigma
        threshold = torch.sqrt(torch.tensor(2.0)) / 2
        sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1)

        angle_cost = 1 - 2 * torch.pow(torch.sin(torch.arcsin(sin_alpha) - torch.pi / 4), 2)
        rho_x = (s_cw / cw) ** 2
        rho_y = (s_ch / ch) ** 2
        gamma = 2 - angle_cost
        distance_cost = 2 - torch.exp(-gamma * rho_x) - torch.exp(-gamma * rho_y)

        omiga_w = torch.abs(w1 - w2) / torch.max(w1, w2)
        omiga_h = torch.abs(h1 - h2) / torch.max(h1, h2)
        shape_cost = torch.pow(1 - torch.exp(-omiga_w), 4) + torch.pow(1 - torch.exp(-omiga_h), 4)

        siou = iou - 0.5 * (distance_cost + shape_cost)
        return 1 - siou.mean()

# Process YOLO output
def process_output(output: torch.Tensor, num_anchors=3, num_classes=2)-> torch.Tensor:
    B, C, S, _ = output.shape
    output = output.permute(0, 2, 3, 1).contiguous()
    output = output.view(B, S, S, num_anchors, 5 + num_classes)
    return output

# YOLO + SIoU loss
def calculate_loss(output: torch.Tensor, target: list)-> float:
    siou_loss = SIoU()
    total_loss = 0
    count = 0
    for b in range(output.shape[0]):
        for i in range(output.shape[1]):
            for j in range(output.shape[2]):
                for a in range(output.shape[3]):
                    pred = output[b, i, j, a, :4]
                    # type of target is list
                    for target_bbox in target:
                        target_bbox = torch.tensor(target_bbox).to(device)
                        loss = siou_loss(pred, target_bbox)
                        total_loss += loss
                    count += 1
    return total_loss / count

# Training loop
model = YOLONet(num_classes=2, num_anchors=3)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

num_anchors = 3
num_classes = 2
epochs = 3

for epoch in range(epochs):
    total_loss = 0
    for images, targets in train_df:
        images = images.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()

        out_13, out_26, out_52 = model(images)
        out_13_proc = process_output(out_13, num_anchors, num_classes)
        out_26_proc = process_output(out_26, num_anchors, num_classes)
        out_52_proc = process_output(out_52, num_anchors, num_classes)

        loss13 = calculate_loss(out_13_proc, targets)
        loss26 = calculate_loss(out_26_proc, targets)
        loss52 = calculate_loss(out_52_proc, targets)

        mean_loss = (loss13 + loss26 + loss52) / 3

        mean_loss.backward()
        optimizer.step()
        total_loss += mean_loss.item()

    print(f"Epoch [{epoch+1}/{epochs}] Loss: {total_loss / len(train_df):.4f}")


# ---------------------- Training model ----------------------------------

In [19]:
output = torch.rand(1, 2, 2, 1, 4)

target = [
    [0.5, 0.5, 0.2, 0.2],
    [0.52, 0.52, 0.22, 0.22]
]




siou_loss = SIoU()
siou_loss = SIoU()
total_loss = 0
count = 0
for b in range(output.shape[0]):
    for i in range(output.shape[1]):
        for j in range(output.shape[2]):
            for a in range(output.shape[3]):
                pred = output[b, i, j, a, :4]
                # type of target is list
                for target_bbox in target:
                    target_bbox = torch.tensor(target_bbox)
                    loss = siou_loss(pred, target_bbox)
                    total_loss += loss
                count += 1
print( total_loss / count)


tensor(2.3734)
