# 模型定义及损失函数实现原理 

1. 双线性插值方法定义
2. FCN模型搭建
3. 损失函数原理补充
4. 损失函数计算过程

In [None]:
# encoding: utf-8
import torch 
from torchvision import models
from torch import nn
import torch.nn.functional as F
import numpy as np

In [None]:
def bilinear_kernel(in_channels, out_channels, kernel_size):
    """Define a bilinear kernel according to in channels and out channels.
    Returns:
        return a bilinear filter tensor
    """
    factor = (kernel_size + 1) // 2
    if kernel_size % 2 == 1:
        center = factor - 1
    else:
        center = factor - 0.5
    og = np.ogrid[:kernel_size, :kernel_size]
    bilinear_filter = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor)
    weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32)
    weight[range(in_channels), range(out_channels), :, :] = bilinear_filter
    return torch.from_numpy(weight)

In [3]:
pretrained_net = models.vgg16_bn(pretrained=False)
pretrained_net.features

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU(inplace=True)
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (9): ReLU(inplace=True)
  (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (12): ReLU(inplace=True)
  (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 

In [None]:
pretrained_net.features[0]  # vgg16网络第一层

Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

In [None]:
pretrained_net.features[0].weight.shape # 64个3*3*3的卷积核

torch.Size([64, 3, 3, 3])

In [None]:
pretrained_net.features[:7]  # vgg16网络第1-6层

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU(inplace=True)
  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

In [5]:
%%html
<img src="FCN.png", width=800, heigth=800>

In [None]:

class FCN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        self.stage1 = pretrained_net.features[:7]
        self.stage2 = pretrained_net.features[7:14]
        self.stage3 = pretrained_net.features[14:24]
        self.stage4 = pretrained_net.features[24:34]
        self.stage5 = pretrained_net.features[34:]

        self.scores1 = nn.Conv2d(512, num_classes, 1)
        self.scores2 = nn.Conv2d(512, num_classes, 1)
        self.scores3 = nn.Conv2d(128, num_classes, 1)

        self.conv_trans1 = nn.Conv2d(512, 256, 1)
        self.conv_trans2 = nn.Conv2d(256, num_classes, 1)
        # ConvTranspose2d(in_channels, out_channels, kernel_size, stride=1, padding=0, **args)
        self.upsample_8x = nn.ConvTranspose2d(num_classes, num_classes, 16, 8, 4, bias=False)
        self.upsample_8x.weight.data = bilinear_kernel(num_classes, num_classes, 16)
        
        self.upsample_2x_1 = nn.ConvTranspose2d(512, 512, 4, 2, 1, bias=False)
        self.upsample_2x_1.weight.data = bilinear_kernel(512, 512, 4)

        self.upsample_2x_2 = nn.ConvTranspose2d(256, 256, 4, 2, 1, bias=False)
        self.upsample_2x_2.weight.data = bilinear_kernel(256, 256, 4)

    def forward(self, x):
        s1 = self.stage1(x)
        s2 = self.stage2(s1)
        s3 = self.stage3(s2)
        s4 = self.stage4(s3)
        s5 = self.stage5(s4)

        scores1 = self.scores1(s5)
        s5 = self.upsample_2x_1(s5)
        add1 = s5 + s4

        scores2 = self.scores2(add1)

        add1 = self.conv_trans1(add1)
        add1 = self.upsample_2x_2(add1)
        add2 = add1 + s3

        output = self.conv_trans2(add2)
        output = self.upsample_8x(output)
        return output


In [24]:
# fake label data
gt = np.random.rand(1, 352, 480)*12
gt = gt.astype(np.int64)
gt = torch.from_numpy(gt)
print(gt)
x = t.randn(1, 3, 352, 480)
print(x)

tensor([[[ 9,  3,  7,  ...,  9,  1,  3],
         [ 2, 10, 11,  ...,  5, 10,  7],
         [ 5,  3,  1,  ...,  6,  6,  1],
         ...,
         [ 7, 11, 10,  ...,  4,  7,  0],
         [ 2,  6, 11,  ...,  3,  3,  5],
         [ 1,  2, 11,  ..., 10,  5,  7]]])
tensor([[[[-0.1696, -0.0032, -0.7078,  ...,  0.5222, -0.7567, -0.5255],
          [-0.1247,  0.3469, -0.0461,  ...,  0.1995, -1.8711, -1.2577],
          [ 0.8483,  0.0107,  0.1716,  ..., -0.8260,  0.7499, -2.0751],
          ...,
          [ 1.1645, -0.3760,  0.4654,  ...,  0.6341, -0.9732,  0.3170],
          [ 0.6558, -1.7625, -0.0942,  ...,  1.1686,  2.0618, -1.5564],
          [-0.0292, -0.0116,  0.2264,  ...,  0.1721,  0.1979,  0.1158]],

         [[ 1.0275, -0.8438,  0.9060,  ..., -1.8316,  1.0276, -0.1227],
          [ 0.2348, -0.0512,  0.7146,  ...,  1.1838,  0.0182, -0.3521],
          [-0.8555,  0.5010, -0.3183,  ..., -1.7400, -1.7042, -0.9826],
          ...,
          [-0.7848,  0.3533,  0.0935,  ..., -1.0900,  0.46

In [19]:
net = FCN(12)
y = net(x)
print(y.shape)

out = F.log_softmax(y, dim=1)
print(out.shape)

criterion = nn.NLLLoss()
print(gt.shape)
loss = criterion(out, gt)
loss

torch.Size([1, 12, 352, 480])
torch.Size([1, 12, 352, 480])
torch.Size([1, 352, 480])


tensor(2.7314, grad_fn=<NllLoss2DBackward>)

In [20]:
loss.item()

2.7313966751098633

**损失函数**

　　损失的回传最好精细到每个像素上，具体的呈现一下数值是怎么来的

In [23]:
%%html
<img src="15.png", width=500, heigth=500>
<img src="16.png", width=500, heigth=500>
<img src="17.png", width=500, heigth=500>
<img src="18.png", width=500, heigth=500>
<img src="19.png", width=500, heigth=500>
<img src="20.png", width=500, heigth=500>
<img src="21.png", width=500, heigth=500>
<img src="OneHot2.png", width=500, heigth=500>
<img src="OneHot.png", width=600, heigth=600>
<img src="22.png", width=500, heigth=500>

**NLLLoss**

　　这个损失函数的计算可以表达为：`loss(input, class) = -input[class]`。举例说明，三分类任务，输入`input=[-1.233, 2.657, 0.534]`， 真实标签类别`class=2`，则`loss=-0.534`，就是在对应类别的输出上取一个负号。实际应用： 常用于多分类任务，但是input在输入NLLLoss()之前，需要对input进行log_softmax函数激活，即将input转换成概率分布的形式，并且取对数。

In [54]:
# fake label data
gt = np.random.rand(1, 2, 3)*2
gt = gt.astype(np.int64)
gt = torch.from_numpy(gt)

x = torch.randn(1, 2, 2, 3)
out = F.log_softmax(x, dim=1)

print(gt)
print('='*40)
print(x)
print('-'*40)
print(out)

tensor([[[0, 1, 1],
         [0, 0, 0]]])
tensor([[[[ 0.0655, -0.4263,  0.2459],
          [ 0.8114,  0.3539,  0.0906]],

         [[-1.4042,  0.2177,  0.2541],
          [-0.9366,  0.1628, -0.4252]]]])
----------------------------------------
tensor([[[[-0.2070, -1.0661, -0.6972],
          [-0.1605, -0.6022, -0.4681]],

         [[-1.6767, -0.4221, -0.6891],
          [-1.9085, -0.7933, -0.9839]]]])


　　直白而言是按标签给出的像素点的类别，去每个像素点对应通道上找相应的值算到损失里。为什么可以这样呢，因为如果该点被正确分类，即在这个点的特征向量相对应类别的位置为1，其他位置的值为0，则经log_softmax计算后，该位置的值为0，即正确分类的损失为0

In [55]:
# gt = tensor([[[0, 1, 1],   
#         [0, 0, 0]]]) 
# gt:dim(batch, w, h)

# out = tensor([[[[-0.2070, -1.0661, -0.6972],   
#          [-0.1605, -0.6022, -0.4681]],
#
#          [[-1.6767, -0.4221, -0.6891],
#          [-1.9085, -0.7933, -0.9839]]]])
# out:dim(Batch, channel, w, h)

criterion = nn.NLLLoss(reduction='none') # default reduction='mean'
loss = criterion(out, gt)
loss

# loss = tensor([[[0.2070, 0.4221, 0.6891],
#          [0.1605, 0.6022, 0.4681]]])
# loss:dim(batch, w, h)

# loss[0][0][0] = 0.2070 来自 -out[0][i][0][0]  i=gt[0][0][0]=0 
# loss[0][1][0] = 0.1605 来自 -out[0][i][1][0]  i=gt[0][1][0]=0 
# loss[0][0][1] = 0.4221 来自 -out[0][i][0][1]  i=gt[0][0][1]=1 

# (0.2070 + 0.4221 + 0.6891 + 0.1605 + 0.6022 + 0.4681) / 6 = 0.4248

# criterion = nn.NLLLoss() # default reduction='mean'
# loss = criterion(out, gt) = 0.4248


tensor([[[0.2070, 0.4221, 0.6891],
         [0.1605, 0.6022, 0.4681]]])

In [69]:
(0.2070 + 0.4221 + 0.6891 + 0.1605 + 0.6022 + 0.4681) / 6

0.4248333333333334

In [70]:
criterion = nn.NLLLoss()
loss = criterion(out, gt)
loss

tensor(0.4248)