# yolo-v1 code

## yolo-v1 网络结构
输入：Batch x C x H x W  
输出：Batch x GridCell x GridCell x (xywh-c-cls x BBox)


In [1]:
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F


__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
           'resnet152']


model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}


def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class detnet_bottleneck(nn.Module):
    # no expansion
    # dilation = 2
    # type B use 1x1 conv
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, block_type='A'):
        super(detnet_bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=2, bias=False,dilation=2)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.downsample = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes or block_type=='B':
            self.downsample = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.downsample(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1470):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        # self.layer5 = self._make_layer(block, 512, layers[3], stride=2)
        self.layer5 = self._make_detnet_layer(in_channels=2048)
        # self.avgpool = nn.AvgPool2d(14) #fit 448 input size
        # self.fc = nn.Linear(512 * block.expansion, num_classes)
        self.conv_end = nn.Conv2d(256, 30, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn_end = nn.BatchNorm2d(30)
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)
    
    def _make_detnet_layer(self,in_channels):
        layers = []
        layers.append(detnet_bottleneck(in_planes=in_channels, planes=256, block_type='B'))
        layers.append(detnet_bottleneck(in_planes=256, planes=256, block_type='A'))
        layers.append(detnet_bottleneck(in_planes=256, planes=256, block_type='A'))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        # x = self.avgpool(x)
        # x = x.view(x.size(0), -1)
        # x = self.fc(x)
        x = self.conv_end(x)
        x = self.bn_end(x)
        x = F.sigmoid(x) #归一化到0-1
        # x = x.view(-1,7,7,30)
        x = x.permute(0,2,3,1) #(-1,7,7,30)

        return x


def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
    return model


def resnet34(pretrained=False, **kwargs):
    """Constructs a ResNet-34 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
    return model


def resnet50(pretrained=False, **kwargs):
    """Constructs a ResNet-50 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
    return model


def resnet101(pretrained=False, **kwargs):
    """Constructs a ResNet-101 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
    return model


def resnet152(pretrained=False, **kwargs):
    """Constructs a ResNet-152 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
    return model


if __name__ == "__main__":
    
    import torch
    import torchvision
    
    def copy_params(model1, model2, restrict=False):
        '''
        model1: dst model
        model2: src model
        restrict: 
        '''
        model1_state_dict = model1.state_dict()
        model2_state_dict = model2.state_dict()
        if restrict:
            pass
        else:
            collected_dict = {k: v for k, v in model2_state_dict.items() if k in model1_state_dict}
        
            model1_state_dict.update(collected_dict)
        
        model1.load_state_dict(model1_state_dict)
        
        return model1

    model = resnet50()
    pre_model = torchvision.models.resnet50(pretrained=True)
    
    model = copy_params(model, pre_model, restrict=False)
    
    H, W = 416, 416
    # H, W = 128, 128
    # H, W = 208, 208
    B = 3
    # data = torch.rand(B, 2, 3, W, H)
    data = torch.rand(B, 3, W, H)

    preds = model(data)
    
    print(f'preds: {preds.shape}')
    # print(f'bbox: {preds[0].shape}')
    # print(f'hm: {preds[1].shape}')


preds: torch.Size([3, 13, 13, 30])




## yolo-v1 损失函数

In [None]:
#encoding:utf-8
#
#created by xiongzihua 2017.12.26
#
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class yoloLoss(nn.Module):
    def __init__(self,S,B,l_coord,l_noobj):
        super(yoloLoss,self).__init__()
        '''
        S: 表示gridcell大小，总共SxS个gridcell。
        B: 表示每个gridcell包含B个BBox。
        l_noobj: 表示不包含目标
        '''
        self.S = S
        self.B = B
        self.l_coord = l_coord
        self.l_noobj = l_noobj

    def compute_iou(self, box1, box2):
        '''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].
        Args:
          box1: (tensor) bounding boxes, sized [N,4].
          box2: (tensor) bounding boxes, sized [M,4].
        Return:
          (tensor) iou, sized [N,M].
        
        ## IOU在计算过程中，对矩形框进行了扩展，这点用到了矩阵思维。
        ## 通常情况下，两个for循环，搞定。但是如何用矩阵思维解决问题，速度更快呢？
        ## 首先： 考虑这是一个两级遍历，即box1中每个框要遍历box2中每个框。
        ## 其次： 用矩阵思维考虑，先求出交集lt，和abs交集rb。
        ## 再次： 计算交集面积。
        ## 再次： 并集面积为两个框面积之和减去交集面积。
        ## 最后： 进行交并比。
        ## 计算过程：
        ## 1. 首先对box1进行维度扩展，box1 shape为(N, 4)，选取x1，y1坐标，为box2增加一个维度，即从(N,2)-->(N,1,2)，再对新增维度进行扩展到box2对应尺寸中M的维度。
        ## 2. 其次对box2进行维度扩展，box2 shape为(M, 4)，选取x1，y1坐标，为box1增加一个维度，即从(M,2)-->(1,M,2)，再对新增维度进行扩展到box1对应尺寸中N的维度。
        ## 3. 进行左上角最大值选取。
        ##   注：
        ##      1. 为什么要维度扩展呢？
        ##      为了做矩阵元素比较，并且要保证两级循环。
        ## 4. 进行右下角最小值选取。
        ## 5. 计算wh。
        ## 6. 计算交集面积。
        ## 7. 计算并集面积。
        ## 8. 计算IOU。
        ## ------------------------------------------------
        ## 这里需要补充一下python关于扩展维度的知识点：
        ## 这里只做扩展维度的部分，不做congtinguous,view,pemute等的介绍。
        ## 特性：扩展维度，就是重复该维度之后的所有维度。
        ##    理解:
        ##    .> 矩阵维度扩展的本质就是对该扩展的维度之后的维度的重复。
        ##    首先：理解扩展的维度，如对矩阵(N,S,S)扩展最后一维，那么该矩阵可以理解为立体矩阵(1,N,1,S,1,S,1)，
        ##       其有效维度是第1维，有效长度为长度为N，第3维，有效长度为S，第5维，有效长度为S，其它维度上
        ##       有效长度为1，是作为辅助的维度(维度的有效长度为1，实际表示就是标量scalar的维度，就像一维向量一样)。
        ##    其次：从维度上讲，扩展矩阵第i个维度，就是扩展立体矩阵第i个维度。
        ##    再次：从内容上讲，扩展矩阵第i个维度，就是重复第i维以后的维度，因此可以将第i维以后的
        ##       所有维度和所有维度的内容看做一个整体，进行重复。
        ##    1> 矩阵(N,S,S)，扩展最后一维，可以理解矩阵为N个(S,S)的二维矩阵，
        ##       在(S,S)中每个<i,j>位置都是一个值，此时要对这个值进行扩展，相当
        ##       于重复这个值。那么(S,S)的矩阵就成了三维矩阵(S,S,M)，此时看原来
        ##       的矩阵，就相当于N个(S,S,M)的三维矩阵。那么扩展后的矩阵就是(N,S,S,M)。
        ##       ———————— 根据.>中的理解：
        ##       矩阵(N,S,S)的立体矩阵(1,N,1,S,1,S,1)，扩展最后一维，就是重复最后一维，
        ##       最后一维是一个scalar，即(1,1)，那么扩展最后一维，就是讲1x1重复M次，
        ##       就变成了(1,M)。还原到立体矩阵:
        ##       (1,N,1,S,1,S,1)==>(1,N,1,S,1,S,1,M)==>(1,N,1,S,1,S,1,M,1)
        ##       简化立体矩阵：(1,N,1,S,1,S,1,M,1)==>(N,S,S,M)
        ##    2> 矩阵(N,S,S)，要重复第1维，就是重复(S,S)这个矩阵，即重复第一维以后的维度。
        ##       ———————— 根据.>中的理解：
        ##       矩阵(N,S,S)的立体矩阵(1,N,1,S,1,S,1)，扩展第i维，就是重复第i为之后的所有维度的内容。
        ##       如扩展第1维，就是重复(S,S)的内容。首先增加第1维度，矩阵维度变成(N,1,S,S)，即显示化第1个维度。
        ##       其次对第1维度进行expand，即重复显示化维度后的第1维之后的内容。
        ## 目标：扩展某个维度，从(d1,d2,d3)扩展到(d1,d2,d3,d4)
        ## 步骤：
        ## 1> 显示化隐藏的立体矩阵中对应的维度。即在需要扩展的维度的位置，增加一个维度；
        ## 2> 对该增加维度进行expand
        ## ------------------------------------------------
        ## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        '''
        ## IOU矩阵尺寸(N, M) = (N, 4) x (4, M)
        N = box1.size(0)
        M = box2.size(0)

        ## left-top即左上角点
        ## 选取左上角点x的最大值和y的最大值，即交集部分的left-top。
        lt = torch.max(
            box1[:,:2].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
            box2[:,:2].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
        )

        ## right-bottom即右下角点
        ## 选取右下角点x的最小值和y的最小值，即交际部分的right-bottom。
        rb = torch.min(
            box1[:,2:].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
            box2[:,2:].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
        )

        wh = rb - lt  # [N,M,2]
        wh[wh<0] = 0  # clip at 0
        inter = wh[:,:,0] * wh[:,:,1]  # [N,M]

        area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1])  # [N,]
        area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1])  # [M,]
        area1 = area1.unsqueeze(1).expand_as(inter)  # [N,] -> [N,1] -> [N,M]
        area2 = area2.unsqueeze(0).expand_as(inter)  # [M,] -> [1,M] -> [N,M]

        iou = inter / (area1 + area2 - inter)
        return iou
    def forward(self,pred_tensor,target_tensor):
        '''
        pred_tensor: (tensor) size(batchsize,S,S,Bx5+20=30) [x,y,w,h,c]
        target_tensor: (tensor) size(batchsize,S,S,30)
        '''
        ## 获取BatchSize
        N = pred_tensor.size()[0]
        ## 包含目标的mask为c>0
        coo_mask = target_tensor[:,:,:,4] > 0  # (B, S, S)
        ## 不包含目标的mask为c=0
        noo_mask = target_tensor[:,:,:,4] == 0  # (B, S, S)
        ## 将mask进行扩展，扩展到最后一维的尺寸。
        ## 一定要先添加最后一个维度，在进行最后新添加的这个维度的扩展。
        ##     可以想想一下B个SxS这样二维矩阵，添加最后一个维度后，
        ##     即SxS中每个元素都是1x1的，进行扩展，就是1xE，再放
        ##     到SxS矩阵中，就形成立方体，二维矩阵。
        coo_mask = coo_mask.unsqueeze(-1).expand_as(target_tensor)  # (B, S, S, 30)
        noo_mask = noo_mask.unsqueeze(-1).expand_as(target_tensor)  # (B, S, S, 30)

        ## python中[]的用法：
        ## pred_tensor[coo_mask]表示取出coo_mask中不为0的元素。
        ##    1. 这些取出来的元素构成一维，即这种形式取出来的元素并不关心原来矩阵中的维度。
        ##    2. 这些不为0的元素有个特点，就是整行整行取出，为什么呢？因为coo_mask进行了expand_as(...)。
        ## 所以pred_tensor[coo_mask]的结果才能够执行view(-1, 30）。
        ## 注：这种操作实际上可以用select(...)获取。
        coo_pred = pred_tensor[coo_mask].view(-1,30)
        ## 取出x11y11x12y12c11x21y21x22y22c21这10个元素。
        ## 注意查看contiguous(...)和view(...)两个方法的使用。
        box_pred = coo_pred[:,:10].contiguous().view(-1,5) #box[x1,y1,w1,h1,c1]
        class_pred = coo_pred[:,10:]                       #[x2,y2,w2,h2,c2]
        
        coo_target = target_tensor[coo_mask].view(-1,30)
        box_target = coo_target[:,:10].contiguous().view(-1,5)  # (N, 5)
        class_target = coo_target[:,10:]

        # compute not contain obj loss
        noo_pred = pred_tensor[noo_mask].view(-1,30)
        noo_target = target_tensor[noo_mask].view(-1,30)
        noo_pred_mask = torch.cuda.ByteTensor(noo_pred.size())
        noo_pred_mask.zero_()
        ## 表示gridcell中是否包含对象，此时对应B个BBox，即计算所有B个BBox的信息，对应nooobj_loss计算了B个BBox的C。
        noo_pred_mask[:,4]=1;noo_pred_mask[:,9]=1
        noo_pred_c = noo_pred[noo_pred_mask] #noo pred只需要计算 c 的损失 size[-1,2]
        noo_target_c = noo_target[noo_pred_mask]
        ## 计算confidence不包含目标的loss
        nooobj_loss = F.mse_loss(noo_pred_c,noo_target_c,size_average=False)

        #compute contain obj loss
        coo_response_mask = torch.cuda.ByteTensor(box_target.size())  # (N, 5)
        coo_response_mask.zero_()
        coo_not_response_mask = torch.cuda.ByteTensor(box_target.size())  # (N, 5)
        coo_not_response_mask.zero_()
        box_target_iou = torch.zeros(box_target.size()).cuda()  # (N, 5)
        ## for循环表示取SxS个gridcell中，每个gridcell包含B个BBox，求每个gridcell中B个BBox与对应位置的gridcell的IOU最大值。
        ## 比如：第(i,j)位置的gridcell，包含B个BBox，此时对比pred和target中该位置的B个BBox，获取该gridcell中最大IOU的那个BBox。
        for i in range(0,box_target.size()[0],2): #choose the best iou box
            ## 计算pred的左上角点和右下角点。
            box1 = box_pred[i:i+2]  # box_pred==>(N, 5)(xywhc)  box1==>(2, 5)(xywhc)
            box1_xyxy = Variable(torch.FloatTensor(box1.size()))
            box1_xyxy[:,:2] = box1[:,:2]/14. -0.5*box1[:,2:4]  # 中心点减去一半的wh，即左上角点。
            box1_xyxy[:,2:4] = box1[:,:2]/14. +0.5*box1[:,2:4]  # 中心点加上一半的wh，即右下角点。
            ## 计算target的左上角点和右下角点。
            box2 = box_target[i].view(-1,5)
            box2_xyxy = Variable(torch.FloatTensor(box2.size()))
            box2_xyxy[:,:2] = box2[:,:2]/14. -0.5*box2[:,2:4]
            box2_xyxy[:,2:4] = box2[:,:2]/14. +0.5*box2[:,2:4]
            ## 计算IOU。
            iou = self.compute_iou(box1_xyxy[:,:4],box2_xyxy[:,:4]) #[2,1]
            max_iou,max_index = iou.max(0)
            max_index = max_index.data.cuda()  # 这里的max_index表示B个BBox的最大IOU的位置。当B=2，表示2组<(pred-0, target-0), (pred-1, target-1)>中最大IOU的那个index。
            
            ## 这里实际上就是一个互斥逻辑的判断。
            ## 注：此时的B只能等于2，大于2，此处逻辑就不能满足。
            ## 如果max_index=0，则i+max_index=i,i+1-max_index=i+1；
            ## 如果max_index=1，则i+max_index=i+1,i+1-max_index=i；
            coo_response_mask[i+max_index]=1
            coo_not_response_mask[i+1-max_index]=1

            #####
            # we want the confidence score to equal the
            # intersection over union (IOU) between the predicted box
            # and the ground truth
            #####
            box_target_iou[i+max_index,torch.LongTensor([4]).cuda()] = (max_iou).data.cuda()
        box_target_iou = Variable(box_target_iou).cuda()
        #1.response loss
        box_pred_response = box_pred[coo_response_mask].view(-1,5)
        box_target_response_iou = box_target_iou[coo_response_mask].view(-1,5)
        box_target_response = box_target[coo_response_mask].view(-1,5)
        ## contain obj confidence loss
        contain_loss = F.mse_loss(box_pred_response[:,4],box_target_response_iou[:,4],size_average=False)
        ## xy和wh loss
        loc_loss = F.mse_loss(box_pred_response[:,:2],box_target_response[:,:2],size_average=False) + F.mse_loss(torch.sqrt(box_pred_response[:,2:4]),torch.sqrt(box_target_response[:,2:4]),size_average=False)
        #2.not response loss
        box_pred_not_response = box_pred[coo_not_response_mask].view(-1,5)
        box_target_not_response = box_target[coo_not_response_mask].view(-1,5)
        box_target_not_response[:,4]= 0
        #not_contain_loss = F.mse_loss(box_pred_response[:,4],box_target_response[:,4],size_average=False)
        
        #I believe this bug is simply a typo
        not_contain_loss = F.mse_loss(box_pred_not_response[:,4], box_target_not_response[:,4],size_average=False)

        #3.class loss
        class_loss = F.mse_loss(class_pred,class_target,size_average=False)
        
        ## loc_loss: xy and wh
        ## contain_loss: 包含目标，且与目标IOU最大的那个gridcell的某个BBox
        ## not_contain_loss: 包含目标，且不是与目标IOU最大的那些(此处应该是那个，因为B=2)BBox
        ## nooobj_loss: 不包含目标的gridcell的confidence
        ## class_loss: 类别loss
        return (self.l_coord*loc_loss + 2*contain_loss + not_contain_loss + self.l_noobj*nooobj_loss + class_loss)/N
