### 定义Paddle模型
这里以刚才定义的DecoderNet模型为例，Encoder权重转换同理。

In [1]:
import paddle
import paddle.nn as nn

class DecoderNet(nn.Layer):
    def __init__(self):
        super(DecoderNet, self).__init__()

        self.resblock_41 = ResnetBlock(512)
        self.convblock_41 = ConvBlock(512, 256)
        self.resblock_31 = ResnetBlock(256)
        self.convblock_31 = ConvBlock(256, 128)

        self.convblock_21 = ConvBlock(128, 128)
        self.convblock_22 = ConvBlock(128, 64)

        self.convblock_11 = ConvBlock(64, 64)
        self.upsample = nn.Upsample(scale_factor=2, mode='nearest')

        self.final_conv = nn.Sequential(nn.Pad2D([1, 1, 1, 1], 
                                                  mode='reflect'),
                                        nn.Conv2D(64, 3, (3, 3)))

    def forward(self, cF, sF):

        out = adaptive_instance_normalization(cF['r41'], sF['r41'])
        out = self.resblock_41(out)
        out = self.convblock_41(out)

        out = self.upsample(out)
        out += adaptive_instance_normalization(cF['r31'], sF['r31'])
        out = self.resblock_31(out)
        out = self.convblock_31(out)

        out = self.upsample(out)
        out += adaptive_instance_normalization(cF['r21'], sF['r21'])
        out = self.convblock_21(out)
        out = self.convblock_22(out)

        out = self.upsample(out)
        out = self.convblock_11(out)
        out = self.final_conv(out)
        return out

class ResnetBlock(nn.Layer):
    def __init__(self, dim):
        super(ResnetBlock, self).__init__()
        self.conv_block = nn.Sequential(nn.Pad2D([1, 1, 1, 1], 
                                                  mode='reflect'),
                                        nn.Conv2D(dim, dim, (3, 3)), 
                                        nn.ReLU(),
                                        nn.Pad2D([1, 1, 1, 1], 
                                                  mode='reflect'),
                                        nn.Conv2D(dim, dim, (3, 3)))

    def forward(self, x):
        out = x + self.conv_block(x)
        return out

class ConvBlock(nn.Layer):
    def __init__(self, dim1, dim2):
        super(ConvBlock, self).__init__()
        self.conv_block = nn.Sequential(nn.Pad2D([1, 1, 1, 1], 
                                                  mode='reflect'),
                                        nn.Conv2D(dim1, dim2, (3, 3)),
                                        nn.ReLU())

    def forward(self, x):
        out = self.conv_block(x)
        return out

def calc_mean_std(feat, eps=1e-5):
    size = feat.shape
    assert (len(size) == 4)
    N, C = size[:2]
    feat_var = feat.reshape([N, C, -1])
    feat_var = paddle.var(feat_var, axis=2) + eps
    feat_std = paddle.sqrt(feat_var)
    feat_std = feat_std.reshape([N, C, 1, 1])
    feat_mean = feat.reshape([N, C, -1])
    feat_mean = paddle.mean(feat_mean, axis=2)
    feat_mean = feat_mean.reshape([N, C, 1, 1])
    return feat_mean, feat_std

def adaptive_instance_normalization(content_feat, style_feat):
    assert (content_feat.shape[:2] == style_feat.shape[:2])
    size = content_feat.shape
    style_mean, style_std = calc_mean_std(style_feat)
    content_mean, content_std = calc_mean_std(content_feat)

    normalized_feat = (content_feat -
                       content_mean.expand(size)) / content_std.expand(size)
    return normalized_feat * style_std.expand(size) + style_mean.expand(size)


#### 看一下paddle模型的参数

In [2]:
model_p = DecoderNet()
pw = model_p.state_dict()
print('paddle num_params:', len(pw), 'paddle params:', pw.keys())

  and should_run_async(code)


paddle num_params: 20 paddle params: odict_keys(['resblock_41.conv_block.1.weight', 'resblock_41.conv_block.1.bias', 'resblock_41.conv_block.4.weight', 'resblock_41.conv_block.4.bias', 'convblock_41.conv_block.1.weight', 'convblock_41.conv_block.1.bias', 'resblock_31.conv_block.1.weight', 'resblock_31.conv_block.1.bias', 'resblock_31.conv_block.4.weight', 'resblock_31.conv_block.4.bias', 'convblock_31.conv_block.1.weight', 'convblock_31.conv_block.1.bias', 'convblock_21.conv_block.1.weight', 'convblock_21.conv_block.1.bias', 'convblock_22.conv_block.1.weight', 'convblock_22.conv_block.1.bias', 'convblock_11.conv_block.1.weight', 'convblock_11.conv_block.1.bias', 'final_conv.1.weight', 'final_conv.1.bias'])


#### 加载torch模型参数

In [3]:
import torch
tw = torch.load('/workspace/visCVPR2021/ZBK/pre_trained/decoder_iter_10000.pth')
print('torch num_params:', len(tw), 'torch params:', tw.keys())

torch num_params: 20 torch params: odict_keys(['resblock_41.conv_block.1.weight', 'resblock_41.conv_block.1.bias', 'resblock_41.conv_block.4.weight', 'resblock_41.conv_block.4.bias', 'convblock_41.conv_block.1.weight', 'convblock_41.conv_block.1.bias', 'resblock_31.conv_block.1.weight', 'resblock_31.conv_block.1.bias', 'resblock_31.conv_block.4.weight', 'resblock_31.conv_block.4.bias', 'convblock_31.conv_block.1.weight', 'convblock_31.conv_block.1.bias', 'convblock_21.conv_block.1.weight', 'convblock_21.conv_block.1.bias', 'convblock_22.conv_block.1.weight', 'convblock_22.conv_block.1.bias', 'convblock_11.conv_block.1.weight', 'convblock_11.conv_block.1.bias', 'final_conv.1.weight', 'final_conv.1.bias'])


#### 对比paddle和torch的模型参数

In [4]:
for kt, kp in zip(tw.keys(), pw.keys()):
    print(kt, tw[kt].shape, '####', kp, pw[kp].shape)

resblock_41.conv_block.1.weight torch.Size([512, 512, 3, 3]) #### resblock_41.conv_block.1.weight [512, 512, 3, 3]
resblock_41.conv_block.1.bias torch.Size([512]) #### resblock_41.conv_block.1.bias [512]
resblock_41.conv_block.4.weight torch.Size([512, 512, 3, 3]) #### resblock_41.conv_block.4.weight [512, 512, 3, 3]
resblock_41.conv_block.4.bias torch.Size([512]) #### resblock_41.conv_block.4.bias [512]
convblock_41.conv_block.1.weight torch.Size([256, 512, 3, 3]) #### convblock_41.conv_block.1.weight [256, 512, 3, 3]
convblock_41.conv_block.1.bias torch.Size([256]) #### convblock_41.conv_block.1.bias [256]
resblock_31.conv_block.1.weight torch.Size([256, 256, 3, 3]) #### resblock_31.conv_block.1.weight [256, 256, 3, 3]
resblock_31.conv_block.1.bias torch.Size([256]) #### resblock_31.conv_block.1.bias [256]
resblock_31.conv_block.4.weight torch.Size([256, 256, 3, 3]) #### resblock_31.conv_block.4.weight [256, 256, 3, 3]
resblock_31.conv_block.4.bias torch.Size([256]) #### resblock_31.

  and should_run_async(code)


若paddle参数的顺序与torch不一致，则需要调整paddle参数

In [5]:
from collections import OrderedDict
import pickle
# adatpt_paddle_keys是调整参数顺序后的keys
adatpt_paddle_keys = ['resblock_41.conv_block.1.weight', 'resblock_41.conv_block.1.bias',
                      'resblock_41.conv_block.4.weight', 'resblock_41.conv_block.4.bias',
                      'convblock_41.conv_block.1.weight', 'convblock_41.conv_block.1.bias', 
                      'resblock_31.conv_block.1.weight', 'resblock_31.conv_block.1.bias', 
                      'resblock_31.conv_block.4.weight', 'resblock_31.conv_block.4.bias', 
                      'convblock_31.conv_block.1.weight', 'convblock_31.conv_block.1.bias', 
                      'convblock_21.conv_block.1.weight', 'convblock_21.conv_block.1.bias', 
                      'convblock_22.conv_block.1.weight', 'convblock_22.conv_block.1.bias', 
                      'convblock_11.conv_block.1.weight', 'convblock_11.conv_block.1.bias', 
                      'final_conv.1.weight', 'final_conv.1.bias']
# 接下来创建一个order_dict, key是paddle权重的key，value是pytroch 权重的value的numpy的值
paddle_new_dict = OrderedDict() 
for torch_key, paddle_key in zip(tw.keys(), adatpt_paddle_keys):
    paddle_new_dict[paddle_key] = tw[torch_key].detach().numpy()
    # paddle_new_dict[paddle_key] = tw[torch_key].detach().numpy().T
f = open('/workspace/visCVPR2021/ZBK/pre_trained/decoder_iter_10000_p.pdparams', 'wb')
pickle.dump(paddle_new_dict, f)
f.close()

### 注意事项
1. 如果有Dropout层，需要注释掉，否则影响权重转换。
2. FC层的权重需要转置。