## 问题回答
### 1. 为什么模型的最开始要加入padding=100进行填充？
#### 通过padding=100，可以保证能够接受任意尺寸的输入图像
### 2. 在上采样阶段，为什么需要裁剪？
#### 由于上采样后的尺寸与下采样阶段的特征图尺寸不一致，通过裁减到一样的尺寸，拼接起来再做后续的操作，直到最后得到与输入相同的尺寸

### 重构了fcn的参考代码，并对各个模型结构进行了对比验证，可以跑通，但是由于预训练模型的tensor名称与自己模型中的定义不一致，需要更改名称后再读入模型，因此暂时没有与原版的fcn8s进行对比验证

In [1]:
import cv2
import numpy as np
import torch
import torch.nn as nn
from fcn8s import FCN8s

In [2]:
# 上采样权重参数生成
# https://github.com/shelhamer/fcn.berkeleyvision.org/blob/master/surgery.py
def get_upsampling_weight(in_channels, out_channels, kernel_size):
    """Make a 2D bilinear kernel suitable for upsampling"""
    factor = (kernel_size + 1) // 2
    if kernel_size % 2 == 1:
        center = factor - 1
    else:
        center = factor - 0.5
    og = np.ogrid[:kernel_size, :kernel_size]
    filt = (1 - abs(og[0] - center) / factor) * \
           (1 - abs(og[1] - center) / factor)
    weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size),
                      dtype=np.float64)
    weight[range(in_channels), range(out_channels), :, :] = filt
    return torch.from_numpy(weight).float()


### 支持所有的vgg结构

In [3]:
__all__ = [
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
]

# 对官方的配置方式进行了修改，适配当前的fcn
vgg_cfgs = {
    'vgg_11': [[64, 'M'], [128, 'M'], [256, 256, 'M'], [512, 512, 'M'], [512, 512, 'M']],
    'vgg_13': [[64, 64, 'M'], [128, 128, 'M'], [256, 256, 'M'], [512, 512, 'M'], [512, 512, 'M']],
    'vgg_16': [[64, 64, 'M'], [128, 128, 'M'], [256, 256, 256, 'M'], [512, 512, 512, 'M'], [512, 512, 512, 'M']],
    'vgg_19': [[64, 64, 'M'], [128, 128, 'M'], [256, 256, 256, 256, 'M'], [512, 512, 512, 512, 'M'], [512, 512, 512, 512, 'M']],
}

### FCN网络结构定义

In [4]:
class FCN(nn.Module):   
    def __init__(self, vgg_cfgs, mode=8, n_class=21):
        super(FCN, self).__init__()

        self.mode = mode

        self.in_channels = 3  # 输入通道
        self.padding = 100    # 首次卷积padding

        self.vgg_stage1 = self.make_stage_layers(vgg_cfgs[0])    # 1/2
        self.vgg_stage2 = self.make_stage_layers(vgg_cfgs[1])    # 1/4
        self.vgg_stage3 = self.make_stage_layers(vgg_cfgs[2])    # 1/8
        self.vgg_stage4 = self.make_stage_layers(vgg_cfgs[3])    # 1/16
        self.vgg_stage5 = self.make_stage_layers(vgg_cfgs[4])    # 1/32

        self.fc6 = nn.Sequential(
            nn.Conv2d(512, 4096, 7),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
        )
        self.fc7 = nn.Sequential(
            nn.Conv2d(4096, 4096, 1),
            nn.ReLU(inplace=True),
            nn.Dropout2d(),
        )

        self.score_fr = nn.Conv2d(4096, n_class, 1)
        #self.upscore = self.make_upscore_layers()   # upsample
        self.score_pool3 = nn.Conv2d(256, n_class, 1)
        self.score_pool4 = nn.Conv2d(512, n_class, 1)

        self.upscore = nn.ConvTranspose2d(
            n_class, n_class, 64, stride=32, bias=False)
        self.upscore2 = nn.ConvTranspose2d(
            n_class, n_class, 4, stride=2, bias=False)
        self.upscore8 = nn.ConvTranspose2d(
            n_class, n_class, 16, stride=8, bias=False)
        self.upscore16 = nn.ConvTranspose2d(
            n_class, n_class, 32, stride=16, bias=False)

        self.upscore_pool4 = nn.ConvTranspose2d(
            n_class, n_class, 4, stride=2, bias=False)
        
        #self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                #m.weight.data.zero_()
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                #    m.bias.data.zero_()
                    nn.init.constant_(m.bias, 0)
                #print("{}初始化OK!".format(m))
            if isinstance(m, nn.ConvTranspose2d):
                assert m.kernel_size[0] == m.kernel_size[1]
                initial_weight = get_upsampling_weight(
                    m.in_channels, m.out_channels, m.kernel_size[0])
                m.weight.data.copy_(initial_weight)
                #print("{}初始化OK!".format(m))        
    
    def make_stage_layers(self, cfg, batch_norm=False):
        layers = []          
        for v in cfg:
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                conv2d = nn.Conv2d(self.in_channels, v, kernel_size=3, padding=self.padding)
                if batch_norm:
                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
                else:
                    layers += [conv2d, nn.ReLU(inplace=True)]
                self.in_channels = v
                self.padding = 1 

        return nn.Sequential(*layers)

    def upscores(self, x, h, mode=8):
        if mode == 8:            
            h = self.upscore2(h)
            upscore2 = h  # 1/16

            h = self.score_pool4(self.pool4)
            h = h[:, :, 5:5 + upscore2.size()[2], 5:5 + upscore2.size()[3]]
            score_pool4c = h  # 1/16

            h = upscore2 + score_pool4c  # 1/16
            h = self.upscore_pool4(h)
            upscore_pool4 = h  # 1/8

            h = self.score_pool3(self.pool3)
            h = h[:, :,
                9:9 + upscore_pool4.size()[2],
                9:9 + upscore_pool4.size()[3]]
            score_pool3c = h  # 1/8

            h = upscore_pool4 + score_pool3c  # 1/8

            h = self.upscore8(h)
            h = h[:, :, 31:31 + x.size()[2], 31:31 + x.size()[3]].contiguous()
            return h
        if mode == 16:
            h = self.upscore2(h)
            upscore2 = h  # 1/16

            h = self.score_pool4(self.pool4)
            h = h[:, :, 5:5 + upscore2.size()[2], 5:5 + upscore2.size()[3]]
            score_pool4c = h  # 1/16

            h = upscore2 + score_pool4c

            h = self.upscore16(h)
            h = h[:, :, 27:27 + x.size()[2], 27:27 + x.size()[3]]
            return h
        if mode == 32:
            h = self.upscore(h)
            h = h[:, :, 19:19 + x.size()[2], 19:19 + x.size()[3]]
            return h


    def forward(self, x):
        h = x
        h = self.vgg_stage1(h)

        h = self.vgg_stage2(h)

        h = self.vgg_stage3(h)
        self.pool3 = h

        h = self.vgg_stage4(h)
        self.pool4 = h

        h = self.vgg_stage5(h)
        self.pool5 = h

        h = self.fc6(h)

        h = self.fc7(h)
        
        h = self.score_fr(h)

        h = self.upscores(x, h, mode=self.mode)

        return h

### 调用接口

In [5]:
def get_state_dict(my_model_path, pretrained_path):
    state_dict1 = torch.load(my_model_path) # 读取自己定义的模型，以获取参数名称
    valid_name_list = list(state_dict1.keys())    
    ext_name_list = ['upscore.weight', 'upscore16.weight']
    valid_name_list.remove(ext_name_list[0])
    valid_name_list.remove(ext_name_list[1])
    state_dict2 = torch.load(pretrained_path)
    i = 0
    for k,v in state_dict2.items():
        value = v.clone()
        if state_dict1[valid_name_list[i]].shape == value.shape:
            state_dict1[valid_name_list[i]] = value
            i += 1
    state_dict1[ext_name_list[0]] = torch.zeros([21, 21, 64, 64])
    state_dict1[ext_name_list[1]] = torch.zeros([21, 21, 32, 32])
    return state_dict1

def get_myfcn(vgg_name, fcn_mode, pretrained_path):
    """
    根据配置产生fcn网络结构
    input:
        vgg_name:  vgg_cfgs中的可选配置
        fcn_mode:  fcn可选模式, 8; 16, 32
        pretrained_path: True or False
    output:
        fcn_net:   fcn模型
    """
    fcn_net = FCN(vgg_cfgs=vgg_name, mode=fcn_mode)
    if pretrained_path:
        print("开始加载预训练模型")
        #state_dict = torch.load(pretrained_path)
        my_model_path = 'my_params.pth'
        state_dict = get_state_dict(my_model_path, pretrained_path)
        fcn_net.load_state_dict(state_dict)
    return fcn_net

def get_fcn8s(pretrained_path):
    fcn_net = FCN8s()
    state_dict = torch.load(pretrained_path)
    fcn_net.load_state_dict(state_dict)
    return fcn_net
    

In [16]:
my_dict_name = list(myfcn_net.state_dict().keys())
fcn8s_dict_name = list(fcn8s_net.state_dict().keys())
print(my_dict_name)
print(fcn8s_dict_name)
#print(myfcn_net.state_dict()['upscore2.weight'])
#print(fcn8s_net.state_dict()['upscore2.weight'])
print(torch.equal(myfcn_net.state_dict()['upscore_pool4.weight'], fcn8s_net.state_dict()['upscore_pool4.weight']))
# for i in range(len(fcn8s_dict_name)):
#     my_tensor = myfcn_net.state_dict()[my_dict_name[i]]
#     fcn8s_tensor = fcn8s_net.state_dict()[fcn8s_dict_name[i]]
#     isEqual = torch.equal(my_tensor, fcn8s_tensor)
#     print("{},{}".format(my_dict_name[i], fcn8s_dict_name[i]))
#     print("两种实现结果是否一致:{}".format(isEqual))

['vgg_stage1.0.weight', 'vgg_stage1.0.bias', 'vgg_stage1.2.weight', 'vgg_stage1.2.bias', 'vgg_stage2.0.weight', 'vgg_stage2.0.bias', 'vgg_stage2.2.weight', 'vgg_stage2.2.bias', 'vgg_stage3.0.weight', 'vgg_stage3.0.bias', 'vgg_stage3.2.weight', 'vgg_stage3.2.bias', 'vgg_stage3.4.weight', 'vgg_stage3.4.bias', 'vgg_stage4.0.weight', 'vgg_stage4.0.bias', 'vgg_stage4.2.weight', 'vgg_stage4.2.bias', 'vgg_stage4.4.weight', 'vgg_stage4.4.bias', 'vgg_stage5.0.weight', 'vgg_stage5.0.bias', 'vgg_stage5.2.weight', 'vgg_stage5.2.bias', 'vgg_stage5.4.weight', 'vgg_stage5.4.bias', 'fc6.0.weight', 'fc6.0.bias', 'fc7.0.weight', 'fc7.0.bias', 'score_fr.weight', 'score_fr.bias', 'score_pool3.weight', 'score_pool3.bias', 'score_pool4.weight', 'score_pool4.bias', 'upscore.weight', 'upscore2.weight', 'upscore8.weight', 'upscore16.weight', 'upscore_pool4.weight']
['conv1_1.weight', 'conv1_1.bias', 'conv1_2.weight', 'conv1_2.bias', 'conv2_1.weight', 'conv2_1.bias', 'conv2_2.weight', 'conv2_2.bias', 'conv3_1.w

### 一个简单的模型初始化及模型参数恢复实例

In [6]:
vgg_name = 'vgg_16'
fcn_mode = 8
pretrained_path = 'fcn8s_from_caffe.pth'   
myfcn_net = get_myfcn(vgg_cfgs[vgg_name], fcn_mode, pretrained_path)
#print("创建模型对象成功")

fcn8s_net = get_fcn8s(pretrained_path)
#print("创建模型对象成功")

img = cv2.imread('dog.jpg')
img = img/255
img = np.transpose(img, (2,0,1))

input = torch.from_numpy(img)
input = input.unsqueeze(0)
input = torch.tensor(input,dtype=torch.float32)

print(input.shape)

my_pred = myfcn_net(input)
print(my_pred)
#fcn8s_pred = fcn8s_net(input)
#print(fcn8s_pred)
#isEqual = torch.equal(my_pred, fcn8s_pred)
#print("两种实现结果是否一致:{}".format(isEqual))

# # 保存模型
# torch.save(fcn_net.state_dict(), 'my_params.pth')
# print("模型保存完毕！！！")
# pretrained_path = 'my_params.pth'
# fcn_test = get_fcn(vgg_cfgs[vgg_name], fcn_mode, pretrained_path)
# print("模型加载完毕！！！")


开始加载预训练模型
创建模型对象成功
创建模型对象成功
torch.Size([1, 3, 1213, 1546])
tensor([[[[ 5.1597e+00,  5.3911e+00,  5.5899e+00,  ...,  8.0725e+00,
            8.0070e+00,  7.9085e+00],
          [ 5.3908e+00,  5.6326e+00,  5.8403e+00,  ...,  8.4336e+00,
            8.3651e+00,  8.2622e+00],
          [ 5.5884e+00,  5.8391e+00,  6.0543e+00,  ...,  8.7422e+00,
            8.6714e+00,  8.5648e+00],
          ...,
          [ 8.7452e+00,  9.1371e+00,  9.4732e+00,  ...,  1.4179e+01,
            1.4064e+01,  1.3891e+01],
          [ 8.7551e+00,  9.1474e+00,  9.4837e+00,  ...,  1.4185e+01,
            1.4070e+01,  1.3897e+01],
          [ 8.7650e+00,  9.1577e+00,  9.4943e+00,  ...,  1.4192e+01,
            1.4076e+01,  1.3903e+01]],

         [[-5.8455e-01, -6.1039e-01, -6.3246e-01,  ..., -9.6674e-01,
           -9.5810e-01, -9.4597e-01],
          [-6.1081e-01, -6.3782e-01, -6.6088e-01,  ..., -1.0099e+00,
           -1.0008e+00, -9.8811e-01],
          [-6.3401e-01, -6.6205e-01, -6.8597e-01,  ..., -1.0474e+00,

In [None]:
print(fcn8s_pred)