## 理论问题回答
### 1. fcn中最开始为什么使用padding=100？
#### 使用padding=100是为了在降采样的特征提取过程中保证能够适应任意尺寸的输入图像
### 2. 上采样过程中为什么使用裁剪？
#### 裁剪是因为上采样后的尺寸与之前特征提取后的特征图尺寸不一致，通过裁剪到相同的大小，才能进行特征融合（相加）

## 代码整体介绍
### 重构了fcn的参考代码，可以支持所有类型的VGG结构作为骨干网络，代码可以跑通。  
#### 由于预训练模型的tensor名称与自己模型中的定义不一致，需要更改名称后再读入模型，因此加载模型阶段进行了适配，可以正常加载预训练模型  


## 疑问  
### 模型参数加载进来后，对同一张图片多次推理预测，结果都不一样，哪个环节存在随机？

In [1]:
import cv2
import numpy as np
import torch
import torch.nn as nn
from fcn8s import FCN8s

In [2]:
# 上采样权重参数生成
# https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py
def get_upsampling_weight(in_channels, out_channels, kernel_size):
    """Make a 2D bilinear kernel suitable for upsampling"""
    factor = (kernel_size + 1) // 2
    if kernel_size % 2 == 1:
        center = factor - 1
    else:
        center = factor - 0.5
    og = np.ogrid[:kernel_size, :kernel_size]
    filt = (1 - abs(og[0] - center) / factor) * \
           (1 - abs(og[1] - center) / factor)
    weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size),
                      dtype=np.float64)
    weight[range(in_channels), range(out_channels), :, :] = filt
    return torch.from_numpy(weight).float()


### 支持所有的vgg结构

In [3]:
__all__ = [
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
]

# 对官方的配置方式进行了修改，适配当前的fcn
vgg_cfgs = {
    'vgg_11': [[64, 'M'], [128, 'M'], [256, 256, 'M'], [512, 512, 'M'], [512, 512, 'M']],
    'vgg_13': [[64, 64, 'M'], [128, 128, 'M'], [256, 256, 'M'], [512, 512, 'M'], [512, 512, 'M']],
    'vgg_16': [[64, 64, 'M'], [128, 128, 'M'], [256, 256, 256, 'M'], [512, 512, 512, 'M'], [512, 512, 512, 'M']],
    'vgg_19': [[64, 64, 'M'], [128, 128, 'M'], [256, 256, 256, 256, 'M'], [512, 512, 512, 512, 'M'], [512, 512, 512, 512, 'M']],
}

### FCN网络结构定义

In [4]:
class FCN(nn.Module):   
    def __init__(self, vgg_cfgs, mode=8, n_class=21):
        super(FCN, self).__init__()

        self.mode = mode

        self.in_channels = 3  # 输入通道
        self.padding = 100    # 首次卷积padding

        self.vgg_stage1 = self.make_stage_layers(vgg_cfgs[0])    # 1/2
        self.vgg_stage2 = self.make_stage_layers(vgg_cfgs[1])    # 1/4
        self.vgg_stage3 = self.make_stage_layers(vgg_cfgs[2])    # 1/8
        self.vgg_stage4 = self.make_stage_layers(vgg_cfgs[3])    # 1/16
        self.vgg_stage5 = self.make_stage_layers(vgg_cfgs[4])    # 1/32

        self.fc6 = nn.Sequential(
            nn.Conv2d(512, 4096, 7),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
        )
        self.fc7 = nn.Sequential(
            nn.Conv2d(4096, 4096, 1),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
        )

        self.score_fr = nn.Conv2d(4096, n_class, 1)
        #self.upscore = self.make_upscore_layers()   # upsample
        self.score_pool3 = nn.Conv2d(256, n_class, 1)
        self.score_pool4 = nn.Conv2d(512, n_class, 1)

        self.upscore = nn.ConvTranspose2d(
            n_class, n_class, 64, stride=32, bias=False)
        self.upscore2 = nn.ConvTranspose2d(
            n_class, n_class, 4, stride=2, bias=False)
        self.upscore8 = nn.ConvTranspose2d(
            n_class, n_class, 16, stride=8, bias=False)
        self.upscore16 = nn.ConvTranspose2d(
            n_class, n_class, 32, stride=16, bias=False)

        self.upscore_pool4 = nn.ConvTranspose2d(
            n_class, n_class, 4, stride=2, bias=False)
        
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                #m.weight.data.zero_()
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                #    m.bias.data.zero_()
                    nn.init.constant_(m.bias, 0)
                #print("{}初始化OK!".format(m))
            if isinstance(m, nn.ConvTranspose2d):
                assert m.kernel_size[0] == m.kernel_size[1]
                initial_weight = get_upsampling_weight(
                    m.in_channels, m.out_channels, m.kernel_size[0])
                m.weight.data.copy_(initial_weight)
                #print("{}初始化OK!".format(m))        
    
    def make_stage_layers(self, cfg, batch_norm=False):
        layers = []          
        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                conv2d = nn.Conv2d(self.in_channels, v, kernel_size=3, padding=self.padding)
                if batch_norm:
                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.ReLU(inplace=True)]
                self.in_channels = v
                self.padding = 1 

        return nn.Sequential(*layers)

    def upscores(self, x, h, mode=8):
        if mode == 8:            
            h = self.upscore2(h)
            upscore2 = h  # 1/16

            h = self.score_pool4(self.pool4)
            h = h[:, :, 5:5 + upscore2.size()[2], 5:5 + upscore2.size()[3]]
            score_pool4c = h  # 1/16

            h = upscore2 + score_pool4c  # 1/16
            h = self.upscore_pool4(h)
            upscore_pool4 = h  # 1/8

            h = self.score_pool3(self.pool3)
            h = h[:, :,
                9:9 + upscore_pool4.size()[2],
                9:9 + upscore_pool4.size()[3]]
            score_pool3c = h  # 1/8

            h = upscore_pool4 + score_pool3c  # 1/8

            h = self.upscore8(h)
            h = h[:, :, 31:31 + x.size()[2], 31:31 + x.size()[3]].contiguous()
            return h
        
        if mode == 16:
            h = self.upscore2(h)
            upscore2 = h  # 1/16

            h = self.score_pool4(self.pool4)
            h = h[:, :, 5:5 + upscore2.size()[2], 5:5 + upscore2.size()[3]]
            score_pool4c = h  # 1/16

            h = upscore2 + score_pool4c

            h = self.upscore16(h)
            h = h[:, :, 27:27 + x.size()[2], 27:27 + x.size()[3]]
            return h
        
        if mode == 32:
            h = self.upscore(h)
            h = h[:, :, 19:19 + x.size()[2], 19:19 + x.size()[3]]
            return h

        pass

    def forward(self, x):
        h = x
        h = self.vgg_stage1(h)

        h = self.vgg_stage2(h)

        h = self.vgg_stage3(h)
        self.pool3 = h

        h = self.vgg_stage4(h)
        self.pool4 = h

        h = self.vgg_stage5(h)
        self.pool5 = h

        h = self.fc6(h)

        h = self.fc7(h)
        
        h = self.score_fr(h)

        #h = self.upscores(x, h, mode=self.mode)
        
        h = self.upscore2(h)
        upscore2 = h  # 1/16

        h = self.score_pool4(self.pool4)
        h = h[:, :, 5:5 + upscore2.size()[2], 5:5 + upscore2.size()[3]]
        score_pool4c = h  # 1/16

        h = upscore2 + score_pool4c  # 1/16
        h = self.upscore_pool4(h)
        upscore_pool4 = h  # 1/8

        h = self.score_pool3(self.pool3)
        h = h[:, :,
              9:9 + upscore_pool4.size()[2],
              9:9 + upscore_pool4.size()[3]]
        score_pool3c = h  # 1/8

        h = upscore_pool4 + score_pool3c  # 1/8

        h = self.upscore8(h)
        h = h[:, :, 31:31 + x.size()[2], 31:31 + x.size()[3]].contiguous()

        return h

### 调用接口

In [9]:
def get_state_dict(my_model_path, pretrained_path):
    state_dict1 = torch.load(my_model_path) # 读取自己定义的模型，以获取参数名称
    valid_name_list = list(state_dict1.keys())    
    ext_name_list = ['upscore.weight', 'upscore16.weight']
    valid_name_list.remove(ext_name_list[0])
    valid_name_list.remove(ext_name_list[1])
    state_dict2 = torch.load(pretrained_path)
    i = 0
    for k,v in state_dict2.items():
        value = v.clone()
        if state_dict1[valid_name_list[i]].shape == value.shape:
            state_dict1[valid_name_list[i]] = value
            i += 1
    state_dict1[ext_name_list[0]] = torch.zeros([21, 21, 64, 64])
    state_dict1[ext_name_list[1]] = torch.zeros([21, 21, 32, 32])
    return state_dict1

def get_myfcn(vgg_name, fcn_mode, pretrained_path):
    """
    根据配置产生fcn网络结构
    input:
        vgg_name:  vgg_cfgs中的可选配置
        fcn_mode:  fcn可选模式, 8; 16, 32
        pretrained_path: True or False
    output:
        fcn_net:   fcn模型
    """
    fcn_net = FCN(vgg_cfgs=vgg_name, mode=fcn_mode)
    if pretrained_path:
        print("开始加载预训练模型")
        #state_dict = torch.load(pretrained_path)
        my_model_path = 'my_params.pth'
        state_dict = get_state_dict(my_model_path, pretrained_path)
        fcn_net.load_state_dict(state_dict)
    return fcn_net

def get_fcn8s(pretrained_path):
    """
    加载开源的模型
    """
    fcn_net = FCN8s()
    state_dict = torch.load(pretrained_path)
    fcn_net.load_state_dict(state_dict)
    return fcn_net


def preprocess(img):
    """
    预处理
    """
    mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
    img = img.astype(np.float64)
    img -= mean_bgr
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).float()
    img = img.unsqueeze(0)
    return img

def postprocess(pred):
    """
    后处理
    """
    pred = pred.detach().numpy()
    pred = pred.transpose(0, 2, 3, 1)  # NCHW NHWC
    pred = np.asarray(np.argmax(pred, axis=3), dtype=np.uint8)
    return pred

def get_palette(num_cls):
    """ Returns the color map for visualizing the segmentation mask.
    Args:
        num_cls: Number of classes
    Returns:
        The color map
    """

    n = num_cls
    palette = [0] * (n * 3)
    for j in range(0, n):
        lab = j
        palette[j * 3 + 0] = 0
        palette[j * 3 + 1] = 0
        palette[j * 3 + 2] = 0
        i = 0
        while lab:
            palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
            palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
            palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
            i += 1
            lab >>= 3
    return palette

### 一个简单的模型初始化及模型参数恢复实例

In [6]:
vgg_name = 'vgg_16'
fcn_mode = 8
pretrained_path = 'fcn8s_from_caffe.pth'   
myfcn_net = get_myfcn(vgg_cfgs[vgg_name], fcn_mode, pretrained_path)
print("创建模型对象成功")

fcn8s_net = get_fcn8s(pretrained_path)
print("创建模型对象成功")

img = cv2.imread('dog.jpg')
img_input = preprocess(img)

print(img.shape)

my_pred = myfcn_net(img_input)
my_pred = postprocess(my_pred)
#print(my_pred)
fcn8s_pred = fcn8s_net(img_input)
fcn8s_pred = postprocess(fcn8s_pred)
#print(fcn8s_pred)
#isEqual = torch.equal(my_pred, fcn8s_pred)
#print("两种实现结果是否一致:{}".format(isEqual))

# # 保存模型
# torch.save(fcn_net.state_dict(), 'my_params.pth')
# print("模型保存完毕！！！")
# pretrained_path = 'my_params.pth'
# fcn_test = get_fcn(vgg_cfgs[vgg_name], fcn_mode, pretrained_path)
# print("模型加载完毕！！！")


开始加载预训练模型
创建模型对象成功
创建模型对象成功
(121, 154, 3)


In [7]:
print(my_pred.shape)
print(my_pred)

(1, 121, 154)
[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]]


In [11]:
print(fcn8s_pred.shape)
print(fcn8s_pred)

(1, 121, 154)
[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]]


## 两个模型的参数也验证过，各层都一致，结果不一样，暂未找到原因

In [10]:
idx = np.argwhere(my_pred!=fcn8s_pred)
print(idx.shape)

(2560, 3)
