In [2]:
! nvidia-smi

Sun Nov 28 17:01:09 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# MODEL


-   main.py – Program execution file
-   model.py – Model class file
-   option.py – Argument file
-   utils – Folder keeping other Python files


In [None]:
import random
import torch
import torch.nn as nn
import numpy as np

SEED = 18

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x14bf3642d50>

In [None]:
def make_conv(in_channels, out_channels, kernel_size, stride=1, padding=1):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.LeakyReLU()
    )


class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super(ResidualBlock, self).__init__()
        self.block = nn.Sequential(
            make_conv(channels, channels // 2, kernel_size=1, padding=0),
            make_conv(channels// 2, channels , kernel_size=3)
        )
    
    def forward(self, x):
        return x + self.block(x)


class Darknet53(nn.Module):
    def __init__(self):
        super(Darknet53, self).__init__()
        self.darknet53 = nn.Sequential(
            make_conv(3, 32, kernel_size=3),
            make_conv(32, 64, kernel_size=3, stride=2),
            ResidualBlock(channels=64),
            make_conv(64, 128, kernel_size=3, stride=2),
            ResidualBlock(channels=128),
            ResidualBlock(channels=128),
            make_conv(128, 256, kernel_size=3, stride=2),
            ResidualBlock(channels=256),
            ResidualBlock(channels=256),
            ResidualBlock(channels=256),
            ResidualBlock(channels=256),
            ResidualBlock(channels=256),
            ResidualBlock(channels=256),
            ResidualBlock(channels=256),
            ResidualBlock(channels=256),
            make_conv(256, 512, kernel_size=3, stride=2),
            ResidualBlock(channels=512),
            ResidualBlock(channels=512),
            ResidualBlock(channels=512),
            ResidualBlock(channels=512),
            ResidualBlock(channels=512),
            ResidualBlock(channels=512),
            ResidualBlock(channels=512),
            ResidualBlock(channels=512),
            make_conv(512, 1024, kernel_size=3, stride=2),
            ResidualBlock(channels=1024),
            ResidualBlock(channels=1024),
            ResidualBlock(channels=1024),
            ResidualBlock(channels=1024),
        )
    
    def forward(self, x):
        return self.darknet53(x)

In [None]:
import utils.iou as iou

class YOLODetection(nn.Module):
    def __init__(self, anchors, img_size, num_classes):
        super(YOLODetection, self).__init__()
        self.anchors = anchors
        self.num_anchors = len(anchors)
        self.img_size = img_size
        self.num_classes = num_classes
        self.mse_loss = nn.MSELoss()
        self.bce_loss = nn.BCELoss()
        self.threshold = 0.5
        self.obj_scale = 1
        self.no_obj_scale = 100

    def forward(self, x, t):
        device = torch.device('cuda' if x.is_cuda else 'cpu')
        num_batches = x.size(0)
        grid_size = x.size(2)
        
        # x = [batch, num_anchors * (num_class + 5), grid, grid]
        #   --> [batch, num_anchors,  grid, grid, num_class + 5]
        
        pred = x.view(num_batches, self.num_anchors, self.num_classes + 5, grid_size, grid_size)\
             .permute(0, 1, 3, 4, 2).contiguous()
        
        # predicted values
        pred_cx = torch.sigmoid(pred[..., 0])
        pred_cy = torch.sigmoid(pred[..., 1])
        pred_w = pred[..., 2]
        pred_h = pred[..., 3]
        pred_conf = torch.sigmoid(pred[..., 4])
        pred_class = torch.sigmoid(pred[..., 5:])
        
        # offsef of grid
        stride = self.img_size / grid_size
        grid_x = torch.arange(grid_size, dtype=torch.float, device=device)\
                      .repeat(grid_size, 1).view(1, 1, grid_size, grid_size)
        grid_y = torch.arange(grid_size, dtype=torch.float, device=device)\
                      .repeat(grid_size, 1).t().view(1, 1, grid_size, grid_size)
        scaled_anchors = torch.as_tensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors],
                                         dtype=torch.float, device=device)
        anchor_w = scaled_anchors[:, 0].view(1, self.num_anchors, 1, 1)
        anchor_h = scaled_anchors[:, 1].view(1, self.num_anchors, 1, 1)
        
        # calculate output
        pred_box = torch.zeros_like(pred[..., :4], device=device)
        pred_box[..., 0] = pred_cx + grid_x
        pred_box[..., 1] = pred_cy + grid_y
        pred_box[..., 2] = torch.exp(pred_w) * anchor_w
        pred_box[..., 3] = torch.exp(pred_h) * anchor_h
        
        output = (pred_box.view(num_batches, -1, 4) * stride,
                  pred_conf.view(num_batches, -1, 1),
                  pred_class.view(num_batches, -1, self.num_classes))
        output = torch.cat(output, -1)
        
        # test phase -> return output
        if t is None:
            return output, 0
        
        
        ############################################################################
        #여기서부터 모르겟아
        # train phase -> get loss
        obj_mask = torch.zeros(num_batches, self.num_anchors, grid_size, grid_size, dtype=torch.bool, device=device)
        no_obj_mask = torch.ones(num_batches, self.num_anchors, grid_size, grid_size, dtype=torch.bool, device=device)
        class_mask = torch.zeros(num_batches, self.num_anchors, grid_size, grid_size, dtype=torch.float, device=device)
        iou_scores = torch.zeros(num_batches, self.num_anchors, grid_size, grid_size, dtype=torch.float, device=device)
        t_cx = torch.zeros(num_batches, self.num_anchors, grid_size, grid_size, dtype=torch.float, device=device)
        t_cy = torch.zeros(num_batches, self.num_anchors, grid_size, grid_size, dtype=torch.float, device=device)
        t_w = torch.zeros(num_batches, self.num_anchors, grid_size, grid_size, dtype=torch.float, device=device)
        t_h = torch.zeros(num_batches, self.num_anchors, grid_size, grid_size, dtype=torch.float, device=device)
        t_class = torch.zeros(num_batches, self.num_anchors, grid_size, grid_size, self.num_classes, dtype=torch.float, device=device)
        
        
        # target boxes -> 0xywh
        t_boxes = t[:, 2:6] * grid_size
        gxy = t[:, 1:3]
        gwh = t[:, 3:5]
        
        # get best anchor
        ious = torch.stack([iou.box_wh_iou(anchor, gwh) for anchor in self.anchors])
        _, best_iou_idx = ious.max(0)
        
        b, target_labels = t[:, :2].long().t()
        gx, gy = gxy.t()
        gw, gh = gwh.t()
        gi, gj = gxy.long().t()
        
        obj_mask[b, best_iou_idx, gj, gi] = 1
        no_obj_mask[b, best_iou_idx, gj, gi] = 0
        
        # Set noobj mask to zero where iou exceeds ignore threshold
        for i, anchor_ious in enumerate(ious.t()):
            no_obj_mask[b[i], anchor_ious > self.threshold, gj[i], gi[i]] = 0

        # Coordinates
        t_cx[b, best_iou_idx, gj, gi] = gx - gx.floor()
        t_cy[b, best_iou_idx, gj, gi] = gy - gy.floor()

        # Width and height
        t_w[b, best_iou_idx, gj, gi] = torch.log(gw / self.anchors[best_iou_idx][:, 0] + 1e-16)
        t_h[b, best_iou_idx, gj, gi] = torch.log(gh / self.anchors[best_iou_idx][:, 1] + 1e-16)

        # One-hot encoding of label
        t_class[b, best_iou_idx, gj, gi, target_labels] = 1

        # Compute label correctness and iou at best anchor
        class_mask[b, best_iou_idx, gj, gi] = (pred_class[b, best_iou_idx, gj, gi].argmax(-1) == target_labels).float()
        iou_scores[b, best_iou_idx, gj, gi] = iou.box_iou(pred_box[b, best_iou_idx, gj, gi], t_boxes, x1y1x2y2=False)

        t_conf = obj_mask.float()
        
        for i, anchor_ious in enumerate(ious.t()):
            no_obj_mask[b[i], anchor_ious > self.threshold, gj[i], gi[i]] = 0
        
        loss_x = self.mse_loss(pred_cx[obj_mask], t_cx[obj_mask])
        loss_y = self.mse_loss(pred_cy[obj_mask], t_cy[obj_mask])
        loss_w = self.mse_loss(pred_w[obj_mask], t_w[obj_mask])
        loss_h = self.mse_loss(pred_h[obj_mask], t_h[obj_mask])
        loss_box = loss_x + loss_y + loss_w + loss_h
        
        loss_conf_obj = self.bce_loss(pred_conf[obj_mask], t_conf[obj_mask])
        loss_conf_no_obj = self.bce_loss(pred_conf[no_obj_mask], t_conf[no_obj_mask])
        loss_conf = self.obj_scale * loss_conf_obj + self.no_obj_scale * loss_conf_no_obj
        
        loss_class = self.bce_loss(pred_class[obj_mask], t_class[obj_mask])
        
        loss = loss_box + loss_conf + loss_class
        return x, loss

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
test = torch.rand([2, 210, 13, 13], device=device)
a = YOLODetection(torch.tensor([[142,211], [212,2]], device=device), 416, 100).to(device)
ttt = torch.tensor([[0, 1, 0.4, 0.1, 0.1, 0.1],
                    [0, 2, 0.3, 0.2, 0.4, 0.5]], device=device)
c = a(test, ttt)
print(c[0].shape)
print(c[1])

cuda:0
tensor([1., 2.], device='cuda:0')
torch.Size([2, 210, 13, 13])
tensor(166.6534, device='cuda:0')


In [None]:
class YOLOv3(nn.Module):
    def __init__(self, anchors, img_size=416, num_classes=1 + 67):
        super(YOLOv3, self).__init__()
        self.anchors = anchors
        
        last_out_channels = len(anchors) * (4 + 1 + num_classes)
        
        self.darknet53 = Darknet53()
        self.detection_block = self.make_detection_block(1024, 512, last_out_channels)
        self.yolo_layer = YOLODetection(anchors, img_size, num_classes)
    
    def forward(self, x, y=None):
        x = self.darknet53(x)
        x = self.detection_block(x)
        x_yolo, loss = self.yolo_layer(x, y)
        
        return x_yolo, loss
    
    def make_conv(self, in_channels, out_channels, kernel_size, stride=1, padding=1):       
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
            nn.BatchNorm2d(out_channels, eps=1e-5, momentum=0.9),
            nn.LeakyReLU(negative_slope=0.1))
    
    def make_detection_block(self, in_channels, out_channels, last_out_channels):
        return nn.Sequential(
            self.make_conv(in_channels, out_channels, kernel_size=1, padding=0),
            self.make_conv(out_channels, out_channels * 2, kernel_size=3),
            self.make_conv(out_channels * 2, out_channels, kernel_size=1, padding=0),
            self.make_conv(out_channels, out_channels * 2, kernel_size=3),
            self.make_conv(out_channels * 2, out_channels, kernel_size=1, padding=0),
            self.make_conv(out_channels, out_channels * 2, kernel_size=3),
            nn.Conv2d(out_channels * 2, last_out_channels, kernel_size=1, stride=1, padding=0, bias=True)
        )

In [None]:
anchors = [1, 2]
model = YOLOv3(anchors, 416, 100)
test = torch.rand([1, 3, 416, 416])

y = model(test)

torch.Size([1, 210, 13, 13])
