In [1]:
import sys
sys.path.insert(1, "../../")
sys.path.insert(1, "../../Models/")
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
import albumentations as A
from albumentations.pytorch import ToTensorV2
from yolov2 import YOLOv2D19 as YOLOv2
from detection_datasets import VOCDatasetV2
from torch import optim
from train import *
import torch.optim.lr_scheduler as lr_scheduler
from data_preprocessing import get_norms
import pickle
from eval import evaluate, get_pred_boxes, get_gt_boxes, NMS
with open('../../Models/anchors_VOC0712trainval.pickle', 'rb') as handle:
    anchors = pickle.load(handle)

  check_for_updates()


In [2]:
import torch
import torch.nn as nn
import datetime

class YOLOv2Loss(nn.Module):
    def __init__(self, anchors, lambda_noobj=0.5, lambda_coord=5.0, lambda_isobj=1.0, lambda_class=1.0, num_classes=20):
        super().__init__()
        self.mse = torch.nn.MSELoss(reduction='mean')
        self.softmax = torch.nn.Softmax(dim=1)
        self.lambda_noobj = lambda_noobj
        self.lambda_coord = lambda_coord
        self.lambda_isobj = lambda_isobj
        self.lambda_class = lambda_class
        self.num_classes = num_classes
        self.anchors = anchors
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, out, gt_out):
        # [conf, obj_xc, obj_yc, obj_w, obj_h]
        is_obj = gt_out[:, 0::25, ...] == 1.0
        no_obj = gt_out[:, 0::25, ...] == 0.0

        # print(is_obj.shape)
        # print(is_obj)

        # CONFIDENCE LOSS ===========
        # conf_true = gt_out[:, 0::25, ...]
        # conf_pred = out[:, 0::25, ...].sigmoid()
        # out[:, 0::25, ...] = out[:, 0::25, ...].sigmoid()

        # is_obj_conf_pred = is_obj * conf_pred
        # is_obj_conf_true = is_obj * conf_true
        
        
        # no_obj_conf_pred = no_obj * conf_pred
        # no_obj_conf_true = no_obj * conf_true

        is_obj_conf_loss = self.mse(is_obj * out[:, 0::25, ...].sigmoid(), is_obj * gt_out[:, 0::25, ...])
        no_obj_conf_loss = self.mse(no_obj * out[:, 0::25, ...].sigmoid(), no_obj * gt_out[:, 0::25, ...]) 
        # ===========================

        # BOX LOSS ==================
            # XCYC LOSS ==================
        # xc_true = gt_out[:, 1::25, ...]
        # yc_true = gt_out[:, 2::25, ...]

        # xc_pred = out[:, 1::25, ...].sigmoid()
        # yc_pred = out[:, 2::25, ...].sigmoid()

        # xc_pred = is_obj * xc_pred
        # xc_true = is_obj * xc_true
        # yc_pred = is_obj * yc_pred
        # yc_true = is_obj * yc_true

        xc_loss = self.mse(is_obj * out[:, 1::25, ...].sigmoid(), is_obj * gt_out[:, 1::25, ...])
        yc_loss = self.mse(is_obj * out[:, 2::25, ...].sigmoid(), is_obj * gt_out[:, 2::25, ...])
            # ============================

            # WH LOSS ====================
        
        # w_true = gt_out[:, 3::25, ...]
        # h_true = gt_out[:, 4::25, ...]
        
        scale = gt_out.shape[-1]
        _anchors = torch.tensor(self.anchors).to(out.device) * scale
        pw = _anchors[:, 0]
        ph = _anchors[:, 1]
        
        # w_pred = out[:, 3::25, ...]
        # h_pred = out[:, 4::25, ...]

        gt_out[:, 3::25, ...] = torch.log(
                                1e-16 + gt_out[:, 3::25, ...] / pw[None, :, None, None]
        )
        gt_out[:, 4::25, ...] = torch.log(
                                1e-16 + gt_out[:, 4::25, ...] / ph[None, :, None, None]
        )
        
        # w_pred = is_obj * w_pred
        # w_true = is_obj * w_true
        # h_pred = is_obj * h_pred
        # h_true = is_obj * h_true

        w_loss = self.mse(is_obj * out[:, 3::25, ...], is_obj * gt_out[:, 3::25, ...])
        h_loss = self.mse(is_obj * out[:, 4::25, ...], is_obj * gt_out[:, 4::25, ...])
        
            # ============================
        # ===========================
        
        # CLASS LOSS ================
        # class_true = []
        # for i in range(len(self.anchors)):
        #     first_idx = 5 + i*(5+self.num_classes)
        #     last_idx = 25 + i*(5+self.num_classes)
        #     class_true.append(gt_out[:, first_idx:last_idx, ...])
        # class_true = torch.stack(class_true, dim=1)

        # class_pred = []
        # for i in range(len(self.anchors)):
        #     first_idx = 5 + i*(5+self.num_classes)
        #     last_idx = 25 + i*(5+self.num_classes)
        #     class_pred.append(gt_out[:, first_idx:last_idx, ...])
        # class_pred = torch.stack(class_pred, dim=1)

        # class_pred = self.softmax(class_pred)
        
        # class_pred = is_obj[:, :, None, :, :] * class_pred
        # class_true = is_obj[:, :, None, :, :] * class_true

        # class_loss = self.mse(class_pred, class_true)
        
        class_loss = 0
        for i in range(len(self.anchors)):
            first_idx = 5 + i*(5+self.num_classes)
            last_idx = 25 + i*(5+self.num_classes)
            # print(is_obj.shape, self.softmax(out[:, first_idx:last_idx, ...]).shape)
            class_loss += self.mse(is_obj[:, i, ...].unsqueeze(1).repeat(1, 20, 1, 1) * self.softmax(out[:, first_idx:last_idx, ...]), 
                                   is_obj[:, i, ...].unsqueeze(1).repeat(1, 20, 1, 1) * gt_out[:, first_idx:last_idx, ...])
        # ===========================

        loss =  \
                self.lambda_coord * (w_loss + h_loss) + \
                self.lambda_coord * (xc_loss + yc_loss) + \
                self.lambda_isobj * is_obj_conf_loss + \
                self.lambda_noobj * no_obj_conf_loss + \
                self.lambda_class * class_loss
        return loss

In [3]:
ANCHORS = [
    [(0.28, 0.22), (0.38, 0.48), (0.9, 0.78)],
    [(0.07, 0.15), (0.15, 0.11), (0.14, 0.29)],
    [(0.02, 0.03), (0.04, 0.07), (0.08, 0.06)],
]

In [4]:
device = torch.device('cuda:0')
dtype=torch.float32

In [5]:
norms = get_norms('../../../datasets/VOCdevkit/trainval_norms.json')
means = norms['means']
stds = norms['stds']

In [6]:
transforms = A.Compose([
    A.Resize(width=416, height=416),
    # A.VerticalFlip(p=1.0),
    A.Normalize(mean=means, std=stds),
    ToTensorV2()
], bbox_params=A.BboxParams(format='pascal_voc'))
train_set = VOCDatasetV2(devkit_path = '../../../datasets/VOCdevkit/', 
                         subsets = [('VOC2012', 'trainval')],
                         scales=[13], anchors=ANCHORS[0], transforms=transforms, 
                         dtype=dtype, device=device)
val_set = VOCDatasetV2(devkit_path = '../../../datasets/VOCdevkit/', 
                       subsets = [('VOC2007', 'val')],
                       scales=[13], anchors=ANCHORS[0], transforms=transforms, 
                       dtype=dtype, device=device)

True ../../../datasets/VOCdevkit/VOC2012\ImageSets\Main\trainval.txt
True ../../../datasets/VOCdevkit/VOC2007\ImageSets\Main\val.txt


In [7]:
train_set = Subset(train_set, list(range(0, 100)))
val_set = Subset(val_set, list(range(1, 10)))

In [8]:
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
val_loader = DataLoader(val_set, batch_size=32, shuffle=False)

In [9]:
model = YOLOv2(state_dict_path='../../Models/darknet19_72.96.pth', num_anchors=3, device=device, dtype=dtype)

  state_dict = torch.load(state_dict_path, map_location=self.device)


In [10]:
scaler = torch.amp.GradScaler("cuda")

In [11]:
loss_fn = YOLOv2Loss(anchors=ANCHORS[0], lambda_noobj=10, lambda_coord=10.0, lambda_isobj=1.0, lambda_class=1.0)

In [12]:
epochs=1
optimizer = optim.SGD(model.parameters(), lr=0.00001, momentum=0.9, weight_decay=0.0001)
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=1.0)

In [13]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x1ec0b9b3520>

In [14]:
history, gradient_stats = train(epochs, train_loader, train_loader, model, optimizer, loss_fn, scheduler, scaler, 
                                save_grad=False, outputs_path='../log/YOLOv2/training/')

2025-03-23 19:38:00.049528 Epoch 1 
[Train] Loss per batch: 3.0123
[Val] loss per batch: 2.4987
Epoch 1: SGD lr 0.0000 -> 0.0000


In [15]:
from _utils import (
    mean_average_precision,
    cells_to_bboxes,
    get_evaluation_bboxes,
    save_checkpoint,
    load_checkpoint,
    check_class_accuracy,
    plot_couple_examples
)
import config

In [16]:
pred_boxes, true_boxes = get_evaluation_bboxes(
                train_loader,
                model,
                iou_threshold=config.NMS_IOU_THRESH,
                anchors=config.ANCHORS,
                threshold=config.CONF_THRESHOLD,
            )
mapval = mean_average_precision(
    pred_boxes,
    true_boxes,
    iou_threshold=config.MAP_IOU_THRESH,
    box_format="midpoint",
    num_classes=config.NUM_CLASSES,
)
print(f"MAP: {mapval.item()}")

  0%|                                                                                            | 0/4 [00:00<?, ?it/s]

torch.Size([32, 3, 13, 13, 4]) torch.Size([32, 3, 13, 13, 1])
torch.Size([32, 3, 13, 13, 1])
torch.Size([75, 13, 4]) torch.Size([75, 3, 13, 13, 1])
torch.Size([75, 13, 1])





RuntimeError: The size of tensor a (75) must match the size of tensor b (13) at non-singleton dimension 2

In [17]:
model

YOLOv2D19(
  (backbone): DarkNet_19(
    (conv_1): Sequential(
      (0): Conv_BN_LeakyReLU(
        (convs): Sequential(
          (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): LeakyReLU(negative_slope=0.1, inplace=True)
        )
      )
      (1): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (conv_2): Sequential(
      (0): Conv_BN_LeakyReLU(
        (convs): Sequential(
          (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): LeakyReLU(negative_slope=0.1, inplace=True)
        )
      )
      (1): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (conv_3): Sequential(
      (0): Conv_BN_LeakyReLU(
        (convs): Sequential(
         