# **Import Libraries**

In [13]:
import torch
import torch.nn as nn
import pandas as pd
import os, csv
import PIL
import skimage
from skimage import io
import numpy as np
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torchvision.transforms as transforms
from torchvision.tv_tensors import BoundingBoxes, Mask

import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch.optim as optim
import torchvision.transforms.functional as FT
from torch.utils.data import DataLoader
from tqdm import tqdm
import cv2
from collections import Counter

torch.manual_seed(1337)

<torch._C.Generator at 0x79d0e92d3430>

# **Model Architecture**

In [15]:
architecture_config = [
    #Tuple: (kernel_size, number of filters, strides, padding)
    (7, 64, 2, 3),
    #"M" = Max Pool Layer
    "M",
    (3, 192, 1, 1),
    "M",
    (1, 128, 1, 0),
    (3, 256, 1, 1),
    (1, 256, 1, 0),
    (3, 512, 1, 1),
    "M",
    #List: [(tuple), (tuple), how many times to repeat]
    [(1, 256, 1, 0), (3, 512, 1, 1), 4],
    (1, 512, 1, 0),
    (3, 1024, 1, 1),
    "M",
    [(1, 512, 1, 0), (3, 1024, 1, 1), 2],
    (3, 1024, 1, 1),
    (3, 1024, 2, 1),
    (3, 1024, 1, 1),
    (3, 1024, 1, 1),
    #Doesnt include fc layers
]

class Config:
    S = 12
    C = 7
    B = 2


In [16]:
class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(CNNBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.batchnorm = nn.BatchNorm2d(out_channels)
        self.leakyrelu = nn.LeakyReLU(0.1)
        
    def forward(self, x):
        return self.leakyrelu(self.batchnorm(self.conv(x)))
    
class YoloV1(nn.Module):
    def __init__(self, in_channels=3, **kwargs):
        super(YoloV1, self).__init__()
        self.architecture = architecture_config
        self.in_channels = in_channels
        self.darknet = self._create_conv_layers(self.architecture)
        self.fcs = self._create_fcs(**kwargs)
        
    def forward(self, x):
        x = self.darknet(x)
        return self.fcs(torch.flatten(x, start_dim=1))
    
    def _create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels
        
        for x in architecture:
            if type(x) == tuple:
                layers += [CNNBlock(in_channels, x[1], kernel_size=x[0], stride=x[2], padding=x[3])]
                in_channels = x[1]
            elif type(x) == str:
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            elif type(x) == list:
                conv1 = x[0] #Tuple
                conv2 = x[1] #Tuple
                repeats = x[2] #Int
                
                for _ in range(repeats):
                    layers += [CNNBlock(in_channels, conv1[1], kernel_size=conv1[0], stride=conv1[2], padding=conv1[3])]
                    layers += [CNNBlock(conv1[1], conv2[1], kernel_size=conv2[0], stride=conv2[2], padding=conv2[3])]
                    in_channels = conv2[1]
                    
        return nn.Sequential(*layers)
    
    def _create_fcs(self):
        S, B, C = Config.S, Config.B, Config.C
        # original code 496 (instead of currently 4096); but they mentioned that 4096 was used in the paper
        
        return nn.Sequential(nn.Flatten(), nn.Linear(288 * S * S, 496), nn.Dropout(0.0), nn.LeakyReLU(0.1), nn.Linear(496, S * S * (C + B * 5)))#Original paper uses nn.Linear(1024 * S * S, 4096) not 496. Also the last layer will be reshaped to (S, S, 13) where C+B*5 = 13
    
model = YoloV1()#.to(DEVICE)
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}")

Number of parameters: 34774976


In [17]:
def intersection_over_union(boxes_preds, boxes_labels, box_format='midpoint'):
    """
    Parameters:
        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
        box_format (str): midpoint/corners, if boxes are (x,y,w,h) or (x1,y1,x2,y2) respectively.
    
    Returns:
        tensor: Intersection over union for all examples
    """
    
    if box_format == 'midpoint':
        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
        
    if box_format == 'corners':
        box1_x1 = boxes_preds[..., 0:1]
        box1_y1 = boxes_preds[..., 1:2]
        box1_x2 = boxes_preds[..., 2:3]
        box1_y2 = boxes_preds[..., 3:4] # Output tensor should be (N, 1). If we only use 3, we go to (N)
        box2_x1 = boxes_labels[..., 0:1]
        box2_y1 = boxes_labels[..., 1:2]
        box2_x2 = boxes_labels[..., 2:3]
        box2_y2 = boxes_labels[..., 3:4]
    
    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.min(box1_x2, box2_x2)
    y2 = torch.min(box1_y2, box2_y2)
    
    #.clamp(0) is for the case when they don't intersect. Since when they don't intersect, one of these will be negative so that should become 0
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
    
    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
    
    return intersection / (box1_area + box2_area - intersection + 1e-6)

# **Non-Max Supression**

In [18]:
def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
    """
    Parameters:
        bboxes (list): list of lists containing all bboxes with each bboxes
        specified as [class_pred, prob_score, x1, y1, x2, y2]
        iou_threshold (float): threshold where predicted bboxes is correct
        threshold (float): threshold to remove predicted bboxes (independent of IoU) 
        box_format (str): "midpoint" or "corners" used to specify bboxes
    Returns:
        list: bboxes after performing NMS given a specific IoU threshold
    """

    assert type(bboxes) == list

    bboxes = [box for box in bboxes if box[1] > threshold]
    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
    bboxes_after_nms = []

    while bboxes:
        chosen_box = bboxes.pop(0)

        bboxes = [
            box
            for box in bboxes
            if box[0] != chosen_box[0]
            or intersection_over_union(
                torch.tensor(chosen_box[2:]),
                torch.tensor(box[2:]),
                box_format=box_format,
            )
            < iou_threshold
        ]

        bboxes_after_nms.append(chosen_box)

    return bboxes_after_nms

# **Mean Average Precision**

In [19]:
def mean_average_precision(
    pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint"
):
    """
    Parameters:
        pred_boxes (list): list of lists containing all bboxes with each bboxes
        specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
        true_boxes (list): Similar as pred_boxes except all the correct ones 
        iou_threshold (float): threshold where predicted bboxes is correct
        box_format (str): "midpoint" or "corners" used to specify bboxes
        num_classes (int): number of classes
    Returns:
        float: mAP value across all classes given a specific IoU threshold 
    """

    # list storing all AP for respective classes
    average_precisions = []

    # used for numerical stability later on
    epsilon = 1e-6

    for c in range(Config.C):
        detections = []
        ground_truths = []

        # Go through all predictions and targets,
        # and only add the ones that belong to the
        # current class c
        for detection in pred_boxes:
            if detection[1] == c:
                detections.append(detection)

        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(true_box)

        # find the amount of bboxes for each training example
        # Counter here finds how many ground truth bboxes we get
        # for each training example, so let's say img 0 has 3,
        # img 1 has 5 then we will obtain a dictionary with:
        # amount_bboxes = {0:3, 1:5}
        amount_bboxes = Counter([gt[0] for gt in ground_truths])

        # We then go through each key, val in this dictionary
        # and convert to the following (w.r.t same example):
        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
        for key, val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)

        # sort by box probabilities which is index 2
        detections.sort(key=lambda x: x[2], reverse=True)
        TP = torch.zeros((len(detections)))
        FP = torch.zeros((len(detections)))
        total_true_bboxes = len(ground_truths)
        
        # If none exists for this class then we can safely skip
        if total_true_bboxes == 0:
            continue

        for detection_idx, detection in enumerate(detections):
            # Only take out the ground_truths that have the same
            # training idx as detection
            ground_truth_img = [
                bbox for bbox in ground_truths if bbox[0] == detection[0]
            ]

            num_gts = len(ground_truth_img)
            best_iou = 0

            for idx, gt in enumerate(ground_truth_img):
                iou = intersection_over_union(
                    torch.tensor(detection[3:]),
                    torch.tensor(gt[3:]),
                    box_format=box_format,
                )

                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = idx

            if best_iou > iou_threshold:
                # only detect ground truth detection once
                if amount_bboxes[detection[0]][best_gt_idx] == 0:
                    # true positive and add this bounding box to seen
                    TP[detection_idx] = 1
                    amount_bboxes[detection[0]][best_gt_idx] = 1
                else:
                    FP[detection_idx] = 1

            # if IOU is lower then the detection is a false positive
            else:
                FP[detection_idx] = 1

        TP_cumsum = torch.cumsum(TP, dim=0)
        FP_cumsum = torch.cumsum(FP, dim=0)
        recalls = TP_cumsum / (total_true_bboxes + epsilon)
        precisions = torch.divide(TP_cumsum, (TP_cumsum + FP_cumsum + epsilon))
        precisions = torch.cat((torch.tensor([1]), precisions))
        recalls = torch.cat((torch.tensor([0]), recalls))
        # torch.trapz for numerical integration
        average_precisions.append(torch.trapz(precisions, recalls))

    return sum(average_precisions) / len(average_precisions)

In [20]:
def get_bboxes(loader, model, iou_threshold, threshold, pred_format="cells", 
               box_format="midpoint", device="cuda", output_x = False):
    all_pred_boxes = []
    all_true_boxes = []

    model.eval()
    train_idx = 0
    
    if output_x:
        x_out = []

    for batch_idx, (x, labels) in enumerate(loader):
        x = x.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            predictions = model(x)

        batch_size = x.shape[0]
        true_bboxes = cellboxes_to_boxes(labels)
        bboxes = cellboxes_to_boxes(predictions)

        for idx in range(batch_size):
            
            nms_boxes = non_max_suppression(
                bboxes[idx],
                iou_threshold=iou_threshold,
                threshold=threshold,
                box_format=box_format,
            )

            for nms_box in nms_boxes:
                all_pred_boxes.append([train_idx] + nms_box)
                
                if output_x:
                    x_out.append([train_idx, x[idx], labels[idx], nms_box])

            for box in true_bboxes[idx]:
                if box[1] > threshold:
                    all_true_boxes.append([train_idx] + box)

            train_idx += 1

    model.train()
    if output_x:
        return all_pred_boxes, all_true_boxes, x_out
    return all_pred_boxes, all_true_boxes

def convert_cellboxes(predictions):
    S = Config.S
    C = Config.C
    """
    Converts bounding boxes output from Yolo with
    an image split size of S into entire image ratios
    rather than relative to cell ratios. Tried to do this
    vectorized, but this resulted in quite difficult to read
    code... Use as a black box? Or implement a more intuitive,
    using 2 for loops iterating range(S) and convert them one
    by one, resulting in a slower but more readable implementation.
    """

    predictions = predictions.to("cpu")
    batch_size = predictions.shape[0]
    predictions = predictions.reshape(batch_size, Config.S, Config.S, C + 10)
    bboxes1 = predictions[..., C + 1:C + 5]
    bboxes2 = predictions[..., C + 6:C + 10]
    scores = torch.cat(
        (predictions[..., C].unsqueeze(0), predictions[..., C + 5].unsqueeze(0)), dim=0
    )
    best_box = scores.argmax(0).unsqueeze(-1)
    best_boxes = bboxes1 * (1 - best_box) + best_box * bboxes2
    cell_indices = torch.arange(Config.S).repeat(batch_size, Config.S, 1).unsqueeze(-1)
    x = 1 / S * (best_boxes[..., :1] + cell_indices)
    y = 1 / S * (best_boxes[..., 1:2] + cell_indices.permute(0, 2, 1, 3))
    w_y = 1 / S * best_boxes[..., 2:4]
    converted_bboxes = torch.cat((x, y, w_y), dim=-1)
    predicted_class = predictions[..., :C].argmax(-1).unsqueeze(-1)
    best_confidence = torch.max(predictions[..., C], predictions[..., C + 5]).unsqueeze(
        -1
    )
    converted_preds = torch.cat(
        (predicted_class, best_confidence, converted_bboxes), dim=-1
    )

    return converted_preds


def cellboxes_to_boxes(out):
    S = Config.S
    converted_pred = convert_cellboxes(out).reshape(out.shape[0], S * S, -1)
    converted_pred[..., 0] = converted_pred[..., 0].long()
    all_bboxes = []

    for ex_idx in range(out.shape[0]):
        bboxes = []

        for bbox_idx in range(S * S):
            bboxes.append([x.item() for x in converted_pred[ex_idx, bbox_idx, :]])
        all_bboxes.append(bboxes)

    return all_bboxes

def save_checkpoint(state, filename="my_checkpoint.pth"):
    print("=> Saving checkpoint")
    torch.save(state, filename)
    
def load_checkpoint(checkpoint, model, optimizer):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])

# **Dataset Preprocessing**

In [21]:
files_dir = '../input/sy32-panneaux-2/train'
val_dir = '../input/sy32-panneaux-2/val'
test_dir = '../input/sy32-panneaux-2/test'

excluded = ['0716', '0265', '0546']

images = [image for image in sorted(os.listdir(files_dir+'/images'))
                        if image[-4:]=='.jpg' and not image[-8:-4] in excluded]
annots = []
for image in images:
    annot = image.replace('images/','labels/').replace('.jpg','.csv')
    annots.append(annot)
    
images = pd.Series(images, name='images')
annots = pd.Series(annots, name='annots')
df = pd.concat([images, annots], axis=1)
df = pd.DataFrame(df)

test_images = [image for image in sorted(os.listdir(test_dir))
                        if image[-4:]=='.jpg']

test_annots = []
for image in test_images:
    test_annots.append(None)

test_images = pd.Series(test_images, name='test_images')
test_annots = pd.Series(test_annots, name='test_annots')
test_df = pd.concat([test_images, test_annots], axis=1)
test_df = pd.DataFrame(test_df)

##############

val_images = [image for image in sorted(os.listdir(val_dir+'/images'))
                        if image[-4:]=='.jpg']

val_annots = []
for image in val_images:
    annot = image.replace('images/','labels/').replace('.jpg','.csv')
    val_annots.append(annot)

val_images = pd.Series(val_images, name='val_images')
val_annots = pd.Series(val_annots, name='val_images')
val_df = pd.concat([val_images, val_annots], axis=1)
val_df = pd.DataFrame(val_df)

In [22]:
class_dictionary = ['danger', 'interdiction', 'stop', 'ceder', 'frouge', 'forange', 'fvert']
    
new_transform = A.Compose([
    A.Rotate(limit=15, p=0.6, border_mode=cv2.BORDER_CONSTANT),
    A.HorizontalFlip(p=0.3),
    A.RandomCropFromBorders(crop_left=0.15, crop_right=0.15, crop_top=0.15, crop_bottom=0.15, p=0.5),
    A.Resize(576, 576),
    ToTensorV2(), 
    
], bbox_params=A.BboxParams(format='yolo'))

    
new_transform_val = A.Compose([
    A.Resize(576, 576),
    #A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225))
    ToTensorV2(),
    
], bbox_params=A.BboxParams(format='yolo'))



In [23]:
class TrafficSignTestImagesDataset(torch.utils.data.Dataset):
    def __init__(self, df, files_dir, transform=None):
        self.annotations = df
        self.files_dir = files_dir
        self.transform = transform
        self.S = Config.S
        self.B = Config.B
        self.C = Config.C
        
    def __len__(self):
        return len(self.annotations) 
    
    def __getitem__(self, index):
        dataset_row = self.annotations.iloc[index]
        image_path = os.path.join(self.files_dir, dataset_row[0])
        #label_path = os.path.join(self.files_dir, 'labels', dataset_row[1])
        
        boxes = []
        labels = []
        
        img = Image.open(image_path)
        img_width, img_height = img.size
        
        image = img.convert("RGB")
        image = np.array(image)
        
        if self.transform:
            transformed = self.transform(image=image, bboxes=[])
            image = transformed['image']
            
        image = torch.tensor(image, dtype=torch.float32) / 255.0
        
        y = [dataset_row[0], img_width, img_height]
        
        return image, y
    
  

In [24]:
class TrafficSignImagesDataset(torch.utils.data.Dataset):
    def __init__(self, df, files_dir, transform=None):
        self.annotations = df
        self.files_dir = files_dir
        self.transform = transform
        self.S = Config.S
        self.B = Config.B
        self.C = Config.C
        
        self.cache = {}
        self.use_cache = True
        self.cache_hits = 0
        self.repetitions = 3

    def clear_cache(self):
        print("Cleared {} cached entries (hits: {})".format(len(self.cache), self.cache_hits))
        self.cache_hits = 0
        self.cache = {}
        
    def __len__(self):
        return len(self.annotations) * self.repetitions
    
    def __getitem__(self, index):
        if not self.use_cache:
            return self.load_index(index)
        
        if not index in self.cache:
            self.cache[index] = self.load_index(index)
        else:
            self.cache_hits += 1
        return self.cache[index]

    def load_index(self, virtual_index):
        index = virtual_index % len(self.annotations)
        dataset_row = self.annotations.iloc[index]
        image_path = os.path.join(self.files_dir, 'images', dataset_row[0])
        label_path = os.path.join(self.files_dir, 'labels', dataset_row[1])
        
        boxes = []
        labels = []
        
        img = Image.open(image_path)
        img_width, img_height = img.size
        
        with open(label_path, 'r') as csvfile:
            labelreader = csv.reader(csvfile, delimiter=',')
            for row in labelreader:
                if len(row) == 5:
                    [xmin,ymin, xmax, ymax] = [int(x) for x in row[:4]]
                    
                    centerx = ((xmax + xmin) / 2) / img_width
                    centery = ((ymax + ymin) / 2) / img_height
                    boxwidth = (xmax - xmin) / img_width
                    boxheight = (ymax - ymin) / img_height
                    
                    if row[4] in class_dictionary:
                        class_num = class_dictionary.index(row[4])
                        boxes.append([centerx, centery, boxwidth, boxheight, class_num])
                    else:
                        continue
                
        #boxes = torch.tensor(boxes)
        image = img.convert("RGB")
        image = np.array(image)
        
        if self.transform:
            transformed = self.transform(image=image, bboxes=boxes)
            image = transformed['image']
            boxes = transformed['bboxes']
        image = torch.tensor(image, dtype=torch.float32) / 255.0
            
        label_matrix = torch.zeros((self.S, self.S, self.C + 5 * self.B)) 
        for box in boxes:
            x, y, width, height, class_label = box
            class_label = int(class_label)

            # i,j represents the cell row and cell column
            i, j = int(self.S * y), int(self.S * x)
            x_cell, y_cell = self.S * x - j, self.S * y - i

            width_cell, height_cell = ( width * self.S, height * self.S )

            # If no object already found for specific cell i,j
            # Note: This means we restrict to ONE object
            # per cell!
            if label_matrix[i, j, self.C] == 0:
                # Set that there exists an object
                label_matrix[i, j, self.C] = 1

                # Box coordinates
                box_coordinates = torch.tensor(
                    [x_cell, y_cell, width_cell, height_cell]
                )

                label_matrix[i, j, 8:12] = box_coordinates
                

                # Set one hot encoding for class_label
                label_matrix[i, j, class_label] = 1

        return image, label_matrix
    
    
'''

for ridx, ax in enumerate(axs.ravel()):
    idx = ridx + 560
    im = torch.moveaxis(train_dataset[idx][0], 0, 2).cpu().numpy()
    
    image = Image.fromarray((im * 255).astype(np.uint8))
    draw = ImageDraw.Draw(image)
    
    
    labels = train_dataset[idx][1]
    S_SIZE = im.shape[1] / Config.S
    
    for i in range(Config.S):
        for j in range(Config.S):
            if labels[i, j, Config.C] == 1.0:
                [x, y, lwidth, lheight] = labels[i, j, 8:12] * S_SIZE
                lwidth /= 2
                lheight /= 2
                
                x += j * S_SIZE
                y += i * S_SIZE
                
                xy = [x - lwidth, y - lheight, x + lwidth, y + lheight]
        
                label = int( np.argmax(labels[i, j, :7]))
                print("label", class_dictionary[label])
                draw.rectangle(xy=xy, outline='green', width=5)
                
    
    #for label in train_dataset.real_labels[idx]:
    #    draw.rectangle(xy=label[:4], outline='red', width=5)
    
    ax.imshow(image)
'''

'\n\nfor ridx, ax in enumerate(axs.ravel()):\n    idx = ridx + 560\n    im = torch.moveaxis(train_dataset[idx][0], 0, 2).cpu().numpy()\n    \n    image = Image.fromarray((im * 255).astype(np.uint8))\n    draw = ImageDraw.Draw(image)\n    \n    \n    labels = train_dataset[idx][1]\n    S_SIZE = im.shape[1] / Config.S\n    \n    for i in range(Config.S):\n        for j in range(Config.S):\n            if labels[i, j, Config.C] == 1.0:\n                [x, y, lwidth, lheight] = labels[i, j, 8:12] * S_SIZE\n                lwidth /= 2\n                lheight /= 2\n                \n                x += j * S_SIZE\n                y += i * S_SIZE\n                \n                xy = [x - lwidth, y - lheight, x + lwidth, y + lheight]\n        \n                label = int( np.argmax(labels[i, j, :7]))\n                print("label", class_dictionary[label])\n                draw.rectangle(xy=xy, outline=\'green\', width=5)\n                \n    \n    #for label in train_dataset.real_lab

# **Model Loss**

In [25]:
class YoloLoss(nn.Module):

    def __init__(self):
        super(YoloLoss, self).__init__()
        self.mse = nn.MSELoss(reduction="sum")
        
        self.S = Config.S
        self.B = Config.B
        self.C = Config.C

        self.lambda_noobj = 0.5
        self.lambda_coord = 5

    def forward(self, predictions, target):
        # predictions are shaped (BATCH_SIZE, S*S(C+B*5) when inputted
        predictions = predictions.reshape(-1, self.S, self.S, self.C + self.B * 5)

        # Calculate IoU for the two predicted bounding boxes with target bbox
        iou_b1 = intersection_over_union(predictions[..., self.C + 1:self.C + 5], target[..., self.C + 1:self.C + 5])
        iou_b2 = intersection_over_union(predictions[..., self.C + 6:self.C + 10], target[..., self.C + 1:self.C + 5])
        ious = torch.cat([iou_b1.unsqueeze(0), iou_b2.unsqueeze(0)], dim=0)

        # Take the box with highest IoU out of the two prediction
        # Note that bestbox will be indices of 0, 1 for which bbox was best
        iou_maxes, bestbox = torch.max(ious, dim=0)
        exists_box = target[..., self.C].unsqueeze(3)  # in paper this is Iobj_i

        # ======================== #
        #   FOR BOX COORDINATES    #
        # ======================== #

        # Set boxes with no object in them to 0. We only take out one of the two 
        # predictions, which is the one with highest Iou calculated previously.
        box_predictions = exists_box * (
            (
                bestbox * predictions[..., self.C + 6:self.C + 10]
                + (1 - bestbox) * predictions[..., self.C + 1:self.C + 5]
            )
        )

        box_targets = exists_box * target[..., self.C + 1:self.C + 5]

        # Take sqrt of width, height of boxes to ensure that
        box_predictions[..., 2:4] = torch.sign(box_predictions[..., 2:4]) * torch.sqrt(
            torch.abs(box_predictions[..., 2:4] + 1e-6)
        )
        box_targets[..., 2:4] = torch.sqrt(box_targets[..., 2:4])

        box_loss = self.mse(
            torch.flatten(box_predictions, end_dim=-2),
            torch.flatten(box_targets, end_dim=-2),
        )

        # ==================== #
        #   FOR OBJECT LOSS    #
        # ==================== #

        # pred_box is the confidence score for the bbox with highest IoU
        pred_box = (
            bestbox * predictions[..., self.C + 5:self.C + 6] + (1 - bestbox) * predictions[..., self.C:self.C + 1]
        )

        object_loss = self.mse(
            torch.flatten(exists_box * pred_box),
            torch.flatten(exists_box * target[..., self.C:self.C + 1]),
        )

        # ======================= #
        #   FOR NO OBJECT LOSS    #
        # ======================= #

        #max_no_obj = torch.max(predictions[..., 20:21], predictions[..., 25:26])
        #no_object_loss = self.mse(
        #    torch.flatten((1 - exists_box) * max_no_obj, start_dim=1),
        #    torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1),
        #)

        no_object_loss = self.mse(
            torch.flatten((1 - exists_box) * predictions[..., self.C:self.C + 1], start_dim=1),
            torch.flatten((1 - exists_box) * target[..., self.C:self.C + 1], start_dim=1),
        )

        no_object_loss += self.mse(
            torch.flatten((1 - exists_box) * predictions[..., self.C + 5:self.C + 6], start_dim=1),
            torch.flatten((1 - exists_box) * target[..., self.C:self.C + 1], start_dim=1)
        )

        # ================== #
        #   FOR CLASS LOSS   #
        # ================== #

        class_loss = self.mse(
            torch.flatten(exists_box * predictions[..., :self.C], end_dim=-2,),
            torch.flatten(exists_box * target[..., :self.C], end_dim=-2,),
        )

        loss = (
            self.lambda_coord * box_loss  # first two rows in paper
            + object_loss  # third row in paper
            + self.lambda_noobj * no_object_loss  # forth row
            + class_loss  # fifth row
        )

        return loss

# **Model Training**

In [65]:
LEARNING_RATE = 2e-5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16
WEIGHT_DECAY = 0
EPOCHS = 100
LOAD_MODEL = False
LOAD_MODEL_FILE = "model-large-7.3-576-4x.pth"
TMP_MODEL_FILE = "model-tmp.pth"

In [30]:
def train_fn(train_loader, model, optimizer, loss_fn):
    loop = tqdm(train_loader, leave=True)
    mean_loss = []
    
    for batch_idx, (x, y) in enumerate(loop):
        x, y = x.to(DEVICE), y.to(DEVICE)
        out = model(x)
        loss = loss_fn(out, y)
        mean_loss.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loop.set_postfix(loss = loss.item())
        
    print(f"Mean loss was {sum(mean_loss) / len(mean_loss)}")
    return sum(mean_loss) / len(mean_loss)

In [31]:
def calc_map(data_loader, model):
    pred_boxes, target_boxes = get_bboxes(
        data_loader, model, iou_threshold=0.5, threshold=0.4
    )

    mean_avg_prec = mean_average_precision(
        pred_boxes, target_boxes, iou_threshold=0.5, 
        box_format="midpoint"
    )
    
    
    return mean_avg_prec

In [32]:
def main():
    model = YoloV1().to(DEVICE)

    optimizer = optim.Adam(
        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
    )
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=2, mode='min', verbose=True)
    loss_fn = YoloLoss()

    if LOAD_MODEL:
        load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)

    train_dataset = TrafficSignImagesDataset( df=df, transform=new_transform, files_dir=files_dir )

    val_dataset = TrafficSignImagesDataset(
        df=val_df,
        transform=new_transform_val, 
        files_dir=val_dir
    )

    train_loader = DataLoader(
        dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=False
    )

    val_loader = DataLoader(
        dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False,drop_last=False,
    )

    for epoch in range(EPOCHS):
        loss = train_fn(train_loader, model, optimizer, loss_fn)
        
        mean_avg_prec_train = calc_map(train_loader, model)
        print(f"Training data mAP: {mean_avg_prec_train}")
        
        scheduler.step(loss)
        
        if (epoch + 1) % 10 == 0:
            #train_dataset.clear_cache()
            mean_avg_prec = calc_map(val_loader, model)
            print(f"Validation mAP: {mean_avg_prec}")
        
            print(epoch+1, str(mean_avg_prec).replace(".",","), str(loss).replace(".",","))
            print("Saving tmp file")
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
            }
            save_checkpoint(checkpoint, filename=TMP_MODEL_FILE)
    
    checkpoint = {
            "state_dict": model.state_dict(),
            "optimizer": optimizer.state_dict(),
    }
    save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE)
    
if __name__ == "__main__":
    main()

=> Loading checkpoint


  image_path = os.path.join(self.files_dir, 'images', dataset_row[0])
  label_path = os.path.join(self.files_dir, 'labels', dataset_row[1])
  image = torch.tensor(image, dtype=torch.float32) / 255.0
100%|██████████| 132/132 [01:01<00:00,  2.15it/s, loss=17]  


Mean loss was 33.172677632534146
Training data mAP: 0.6267604827880859


100%|██████████| 132/132 [00:26<00:00,  4.98it/s, loss=2.93]


Mean loss was 23.357967922181793
Training data mAP: 0.6929622888565063


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=10.9]


Mean loss was 19.97017453656052
Training data mAP: 0.7395100593566895


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=2.7] 


Mean loss was 17.763590879512556
Training data mAP: 0.7728337645530701


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=1.82]


Mean loss was 16.33938034556129
Training data mAP: 0.8100749850273132


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=2.96]


Mean loss was 15.049295815554531
Training data mAP: 0.8255395889282227


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=9.94]


Mean loss was 14.099350055058798
Training data mAP: 0.8391776084899902


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=2.63]


Mean loss was 13.23802204023708
Training data mAP: 0.8596371412277222


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=1.4] 


Mean loss was 12.538257639516484
Training data mAP: 0.8662630915641785


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=3.89]


Mean loss was 11.968930630972892
Training data mAP: 0.8911338448524475
Validation mAP: 0.28446826338768005
10 tensor(0,2845) 11,968930630972892
Saving tmp file
=> Saving checkpoint


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=8.89]


Mean loss was 11.456422368685404
Training data mAP: 0.9065121412277222


100%|██████████| 132/132 [00:26<00:00,  4.91it/s, loss=5.93]


Mean loss was 10.857257875529202
Training data mAP: 0.9033292531967163


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=2.75]


Mean loss was 10.432272519126084
Training data mAP: 0.915012776851654


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=2.97]


Mean loss was 10.051290488604343
Training data mAP: 0.92353355884552


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=2.07]


Mean loss was 9.625406915491277
Training data mAP: 0.9166001677513123


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=2.43]


Mean loss was 9.328946940826647
Training data mAP: 0.936197817325592


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=3.16]


Mean loss was 8.935835616155105
Training data mAP: 0.9433971047401428


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=3.34]


Mean loss was 8.84988889910958
Training data mAP: 0.9388279318809509


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=3.28]


Mean loss was 8.376352373397712
Training data mAP: 0.9409889578819275


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=3.88]


Mean loss was 8.234320463556232
Training data mAP: 0.9450309872627258
Validation mAP: 0.28968167304992676
20 tensor(0,2897) 8,234320463556232
Saving tmp file
=> Saving checkpoint


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=1.27]


Mean loss was 8.069273843909755
Training data mAP: 0.9483684301376343


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=3.92]


Mean loss was 7.7487638556596
Training data mAP: 0.9399523138999939


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=2.5] 


Mean loss was 7.514674706892534
Training data mAP: 0.9442297220230103


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=2.13]


Mean loss was 7.340929826100667
Training data mAP: 0.9484343528747559


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=2.69]


Mean loss was 7.217334248802879
Training data mAP: 0.9412404298782349


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=1.33]


Mean loss was 7.00378713463292
Training data mAP: 0.9527220129966736


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=2.09]


Mean loss was 6.76416394927285
Training data mAP: 0.9610352516174316


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=7.38]


Mean loss was 6.675326394312309
Training data mAP: 0.9612723588943481


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=2.61]


Mean loss was 6.484269315546209
Training data mAP: 0.9636077880859375


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=2.23]


Mean loss was 6.448035552646175
Training data mAP: 0.9606267809867859
Validation mAP: 0.2246176153421402
30 tensor(0,2246) 6,448035552646175
Saving tmp file
=> Saving checkpoint


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.63]


Mean loss was 6.246390880960407
Training data mAP: 0.9534136056900024


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.52]


Mean loss was 6.038932919502258
Training data mAP: 0.9474702477455139


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=2.42]


Mean loss was 5.9536329489765745
Training data mAP: 0.9495641589164734


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=1.75]


Mean loss was 5.833123124007023
Training data mAP: 0.956572413444519


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=2.6] 


Mean loss was 5.728970690207048
Training data mAP: 0.9709315896034241


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=1.39]


Mean loss was 5.632698414000598
Training data mAP: 0.9665804505348206


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.47]


Mean loss was 5.607647052316954
Training data mAP: 0.9598610997200012


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=1.26]


Mean loss was 5.449889429590919
Training data mAP: 0.9580075144767761


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.14]


Mean loss was 5.470724261168278
Training data mAP: 0.9538341164588928


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=1.53]


Mean loss was 5.327625408317104
Training data mAP: 0.9646824598312378
Validation mAP: 0.24432624876499176
40 tensor(0,2443) 5,327625408317104
Saving tmp file
=> Saving checkpoint


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=1.97]


Mean loss was 5.134750413172172
Training data mAP: 0.9744866490364075


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=3.57]


Mean loss was 5.172526258410829
Training data mAP: 0.9588943123817444


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=2.21]


Mean loss was 5.628469555666952
Training data mAP: 0.9427345991134644


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=1.12]


Mean loss was 5.574991367983095
Training data mAP: 0.9660683274269104
Epoch 00044: reducing learning rate of group 0 to 1.2500e-06.


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=2.33]


Mean loss was 4.905621234214667
Training data mAP: 0.9817270040512085


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=1.25]


Mean loss was 4.691975949388562
Training data mAP: 0.9844270348548889


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=3.82]


Mean loss was 4.634768719022924
Training data mAP: 0.9865558743476868


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=2.33]


Mean loss was 4.565675695737203
Training data mAP: 0.9811937212944031


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.5] 


Mean loss was 4.4890476790341465
Training data mAP: 0.9842526316642761


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=2.32]


Mean loss was 4.4692605119763
Training data mAP: 0.9806360006332397
Validation mAP: 0.21734336018562317
50 tensor(0,2173) 4,4692605119763
Saving tmp file
=> Saving checkpoint


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=2.83]


Mean loss was 4.4590585574959265
Training data mAP: 0.9758864045143127


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=2.62]


Mean loss was 4.311225367314888
Training data mAP: 0.9807964563369751


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=2.47]


Mean loss was 4.257392270998522
Training data mAP: 0.9806380271911621


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=3.06]


Mean loss was 4.26825683405905
Training data mAP: 0.9860311150550842


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=2.39]


Mean loss was 4.262662602193428
Training data mAP: 0.9786384701728821


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=4.46]


Mean loss was 4.248703514084672
Training data mAP: 0.9816211462020874


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.28]


Mean loss was 4.148741760037162
Training data mAP: 0.9821364283561707


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.41]


Mean loss was 4.136509719220075
Training data mAP: 0.9781137704849243


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=1.29]


Mean loss was 4.055737137794495
Training data mAP: 0.980681836605072


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=2.13]


Mean loss was 4.006193233258797
Training data mAP: 0.9838514924049377
Validation mAP: 0.22656093537807465
60 tensor(0,2266) 4,006193233258797
Saving tmp file
=> Saving checkpoint


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=4.32]


Mean loss was 4.0803317608255325
Training data mAP: 0.9825819134712219


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=2.13]


Mean loss was 3.9740207556522256
Training data mAP: 0.9781715273857117


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.77]


Mean loss was 3.88758863102306
Training data mAP: 0.9872912764549255


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=1.23]


Mean loss was 3.8914437510750512
Training data mAP: 0.981358528137207


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=0.803]


Mean loss was 3.7947290472008963
Training data mAP: 0.9777730107307434


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=2.12]


Mean loss was 3.8799536426862082
Training data mAP: 0.9868890047073364


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=2.28]


Mean loss was 3.7704467032894944
Training data mAP: 0.9851322174072266


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=0.895]


Mean loss was 3.7308621605237327
Training data mAP: 0.9810949563980103


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=1.81]


Mean loss was 3.7457990781827406
Training data mAP: 0.981486976146698


100%|██████████| 132/132 [00:26<00:00,  4.92it/s, loss=1.95]


Mean loss was 3.7217285497622057
Training data mAP: 0.9806021451950073
Validation mAP: 0.22555139660835266
70 tensor(0,2256) 3,7217285497622057
Saving tmp file
=> Saving checkpoint


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=1.16]


Mean loss was 3.7434026264783107
Training data mAP: 0.9869184494018555


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=3.35]


Mean loss was 3.7020203733082973
Training data mAP: 0.983610987663269


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=1.68]


Mean loss was 3.695634151950027
Training data mAP: 0.9823024868965149


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=2.74]


Mean loss was 3.5893570690444023
Training data mAP: 0.9816433191299438


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=1.45]


Mean loss was 3.591013156103365
Training data mAP: 0.9795967936515808


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.36]


Mean loss was 3.570087803132606
Training data mAP: 0.9858319163322449


100%|██████████| 132/132 [00:26<00:00,  4.98it/s, loss=2.41]


Mean loss was 3.5949751788919624
Training data mAP: 0.9895927309989929


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.33]


Mean loss was 3.523792696721626
Training data mAP: 0.9867472052574158


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=1.25]


Mean loss was 3.4987469550335044
Training data mAP: 0.977078378200531


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=1.15]


Mean loss was 3.541467263843074
Training data mAP: 0.9840460419654846
Validation mAP: 0.2239328771829605
80 tensor(0,2239) 3,541467263843074
Saving tmp file
=> Saving checkpoint


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=1.17]


Mean loss was 3.41714989326217
Training data mAP: 0.984122097492218


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=0.889]


Mean loss was 3.399669640895092
Training data mAP: 0.9870612025260925


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=0.83]


Mean loss was 3.4212664636698635
Training data mAP: 0.987418532371521


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=1.67]


Mean loss was 3.4042405504168887
Training data mAP: 0.9864639639854431


100%|██████████| 132/132 [00:26<00:00,  4.92it/s, loss=3.94]


Mean loss was 3.421518378185503
Training data mAP: 0.9809283018112183
Epoch 00085: reducing learning rate of group 0 to 6.2500e-07.


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=0.991]


Mean loss was 3.2917303000435685
Training data mAP: 0.9886088371276855


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=2.9] 


Mean loss was 3.2436910578698823
Training data mAP: 0.9835720062255859


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.1] 


Mean loss was 3.1430421327099656
Training data mAP: 0.9881632924079895


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=2.24]


Mean loss was 3.2024785352475718
Training data mAP: 0.9850059151649475


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=1.63]


Mean loss was 3.059813346826669
Training data mAP: 0.9920949935913086
Validation mAP: 0.23132732510566711
90 tensor(0,2313) 3,059813346826669
Saving tmp file
=> Saving checkpoint


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=0.918]


Mean loss was 3.1363345673589995
Training data mAP: 0.9896483421325684


100%|██████████| 132/132 [00:26<00:00,  4.96it/s, loss=1.05]


Mean loss was 3.045069436232249
Training data mAP: 0.9894833564758301


100%|██████████| 132/132 [00:26<00:00,  4.94it/s, loss=5.62]


Mean loss was 3.1356243843382057
Training data mAP: 0.9887120127677917


100%|██████████| 132/132 [00:26<00:00,  4.93it/s, loss=0.928]


Mean loss was 3.0929408208890394
Training data mAP: 0.9880391359329224


100%|██████████| 132/132 [00:26<00:00,  4.95it/s, loss=1.05]


Mean loss was 3.091881803490899
Training data mAP: 0.9885891675949097
Epoch 00095: reducing learning rate of group 0 to 3.1250e-07.


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=1.05]


Mean loss was 3.000576329050642
Training data mAP: 0.9922992587089539


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=0.865]


Mean loss was 3.0249083358230013
Training data mAP: 0.9893946647644043


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=1.36]


Mean loss was 2.9892692493669912
Training data mAP: 0.9909056425094604


100%|██████████| 132/132 [00:26<00:00,  4.97it/s, loss=1.97]


Mean loss was 2.941788133346673
Training data mAP: 0.9939131140708923


100%|██████████| 132/132 [00:26<00:00,  4.98it/s, loss=1.37]


Mean loss was 2.96002814896179
Training data mAP: 0.9902182221412659
Validation mAP: 0.22721931338310242
100 tensor(0,2272) 2,96002814896179
Saving tmp file
=> Saving checkpoint
=> Saving checkpoint


# **Predictions**

In [46]:
PRED_BOXES = []

def predictions():
    global PRED_BOXES
    model = YoloV1().to(DEVICE)
    
    optimizer = optim.Adam(
        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
    )
    loss_fn = YoloLoss()

    load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)
    
    val_dataset = TrafficSignImagesDataset(
        transform=new_transform_val, 
        #df = df,
        #files_dir=files_dir
        df=val_df,
        files_dir=val_dir
    )

    val_loader = DataLoader(
        dataset=val_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        drop_last=False,
    )
        
    model.eval()
    train_fn(val_loader, model, optimizer, loss_fn)

    pred_boxes, target_boxes, x_values = get_bboxes(
        val_loader, model, iou_threshold=0.5, threshold=0.4, output_x = True,
    )
    PRED_BOXES = [pred_boxes, x_values]

    mean_avg_prec = mean_average_precision(
        pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint"
    )
    print(f"Test mAP: {mean_avg_prec}")


predictions()

=> Loading checkpoint


  image_path = os.path.join(self.files_dir, 'images', dataset_row[0])
  label_path = os.path.join(self.files_dir, 'labels', dataset_row[1])
  image = torch.tensor(image, dtype=torch.float32) / 255.0
100%|██████████| 17/17 [00:05<00:00,  2.98it/s, loss=10.2]


Mean loss was 39.140269335578466
Test mAP: 0.6176915764808655


# Make predictions and write them to file (validation)

In [None]:
PRED_BOXES = []

def get_pred_bboxes(loader, model, iou_threshold, threshold, pred_format="cells", 
               box_format="midpoint", device="cuda"):
    all_pred_boxes = []
    all_true_boxes = []

    model.eval()
    train_idx = 0

    for batch_idx, (x,y) in enumerate(loader):
        x = x.to(device)
        [file_names, widths, heights] = y
        #labels = labels.to(device)

        with torch.no_grad():
            predictions = model(x)

        batch_size = x.shape[0]
        #true_bboxes = cellboxes_to_boxes(labels)
        bboxes = cellboxes_to_boxes(predictions)

        for idx in range(batch_size):
            
            width = widths[idx].item()
            height = heights[idx].item()
            
            nms_boxes = non_max_suppression(
                bboxes[idx],
                iou_threshold=iou_threshold,
                threshold=threshold,
                box_format=box_format,
            )

            for nms_box in nms_boxes:
                [class_id, prob, cx, cy, w, h] = nms_box
                
                cx *= width
                cy *= height

                w *= width / 2.0
                h *= height / 2.0

                x1 = int(cx - w)
                x2 = int(cx + w)

                y1 = int(cy - h)
                y2 = int(cy + h)
                
                all_pred_boxes.append([
                    file_names[idx].replace('.jpg', ''),
                    x1, y1,
                    x2, y2,
                    prob,
                    class_dictionary[int(class_id)]
                ])
                #all_pred_boxes.append([train_idx, file_names[idx], width, height] + nms_box)

            #for box in true_bboxes[idx]:
            #    if box[1] > threshold:
            #        all_true_boxes.append([train_idx] + box)

            train_idx += 1

    model.train()
    return all_pred_boxes


def prediction_test_dataset():
    model = YoloV1().to(DEVICE)
    
    optimizer = optim.Adam(
        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
    )
    loss_fn = YoloLoss()

    load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)
    
    test_dataset = TrafficSignTestImagesDataset(
        transform=new_transform_val, 
        df=test_df,
        files_dir=test_dir
    )

    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        drop_last=False,
    )
        
    model.eval()

    pred_boxes = get_pred_bboxes(
        test_loader, model, iou_threshold=0.5, threshold=0.4, output_x = False,
    )
    
    with open('predictions.csv', 'w', newline='\n') as f:
        writer = csv.writer(f)
        writer.writerows(pred_boxes)


prediction_test_dataset()

# Predictions (test dataset)

In [None]:
def draw_image(x, y, y_pred=None):
    img = x.cpu() 
    X = img.shape[1]
    
    labels = y.cpu().numpy()

    img_np = np.moveaxis((img.numpy() * 255.0).astype(np.uint8),0,2)
    img = Image.fromarray(img_np)
    
    [width, height,_] = img_np.shape
    
    draw = ImageDraw.Draw(img)

    S = Config.S
    
    S_SIZE = X / S
    
    if y_pred is not None:
        for [L, C, x, y, lwidth, lheight] in y_pred:
            print("FOUND", class_dictionary[int(L)], "p=", C, x, y, lwidth, lheight)
            x *= width
            y *= height
            
            lwidth *= width / 2.0
            lheight *= height / 2.0
            
            x1 = int(x - lwidth)
            x2 = int(x + lwidth)

            y1 = int(y - lheight)
            y2 = int(y + lheight)
            try:
                draw.rectangle(xy=[x1,y1,x2,y2], outline='red', width=5)
            except:
                pass
                #print(x1,y1,x2,y2)
            
    for i in range(Config.S):
        for j in range(Config.S):
            if labels[i, j, Config.S] == 1.0:
                [x, y, lwidth, lheight] = labels[i, j, 8:12] * S_SIZE
                
                
                lwidth /= 2
                lheight /= 2
                
                x += j * S_SIZE
                y += i * S_SIZE
                
                xy = [x - lwidth, y - lheight, x + lwidth, y + lheight]
                label = int( np.argmax(labels[i, j, :7]))
                print("label", class_dictionary[label], "", labels[i, j, :7])
                draw.rectangle(xy=xy, outline='green', width=5)
                
    print("*******")
                
    
    return np.array(img)

fig, axs = plt.subplots(2, 3, figsize=(15, 6))

XY = PRED_BOXES[1]

for idx, ax in enumerate(axs.ravel()):
    IDX = idx + 6 * 14
    
    y_preds = []
    
    idx_id, x, y = None, None, None
    
    for row in XY:
        if row[0] == IDX:
            if len(y_preds) == 0:
                [idx_id, x, y, y_pred] = row
            
            y_preds.append(row[3])
    
    if x is None or y is None:
        continue
    img = draw_image(x, y, y_preds)
    
    ax.imshow(img)
    ax.axis('off')