#### 定义pytorch模型

In [1]:
import torch
import torch.nn as nn

class DecoderNet(nn.Module):
    def __init__(self):
        super(DecoderNet, self).__init__()

        self.resblock_41 = ResnetBlock(512)
        self.convblock_41 = ConvBlock(512,256)
        self.resblock_31 = ResnetBlock(256)
        self.convblock_31 = ConvBlock(256,128)

        self.convblock_21 = ConvBlock(128,128)
        self.convblock_22 = ConvBlock(128,64)

        self.convblock_11 = ConvBlock(64,64)
        self.upsample = nn.Upsample(scale_factor=2, mode='nearest')

        final_conv = [nn.ReflectionPad2d((1, 1, 1, 1)),
                      nn.Conv2d(64, 3, (3, 3))]
        self.final_conv = nn.Sequential(*final_conv)

    def forward(self, cF, sF):

        out = adaptive_instance_normalization(cF['r41'], sF['r41'])
        out = self.resblock_41(out)
        out = self.convblock_41(out)

        out = self.upsample(out)
        out += adaptive_instance_normalization(cF['r31'], sF['r31'])
        out = self.resblock_31(out)
        out = self.convblock_31(out)
        
        out = self.upsample(out)
        out += adaptive_instance_normalization(cF['r21'], sF['r21'])
        out = self.convblock_21(out)
        out = self.convblock_22(out)

        out = self.upsample(out)
        out = self.convblock_11(out)
        out = self.final_conv(out)
        return out


vgg = nn.Sequential(
    nn.Conv2d(3, 3, (1, 1)),
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(3, 64, (3, 3)),
    nn.ReLU(),  # relu1-1
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(64, 64, (3, 3)),
    nn.ReLU(),  # relu1-2
    nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(64, 128, (3, 3)),
    nn.ReLU(),  # relu2-1
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(128, 128, (3, 3)),
    nn.ReLU(),  # relu2-2
    nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(128, 256, (3, 3)),
    nn.ReLU(),  # relu3-1
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(256, 256, (3, 3)),
    nn.ReLU(),  # relu3-2
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(256, 256, (3, 3)),
    nn.ReLU(),  # relu3-3
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(256, 256, (3, 3)),
    nn.ReLU(),  # relu3-4
    nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(256, 512, (3, 3)),
    nn.ReLU(),  # relu4-1, this is the last layer used
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(512, 512, (3, 3)),
    nn.ReLU(),  # relu4-2
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(512, 512, (3, 3)),
    nn.ReLU(),  # relu4-3
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(512, 512, (3, 3)),
    nn.ReLU(),  # relu4-4
    nn.MaxPool2d((2, 2), (2, 2), (0, 0), ceil_mode=True),
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(512, 512, (3, 3)),
    nn.ReLU(),  # relu5-1
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(512, 512, (3, 3)),
    nn.ReLU(),  # relu5-2
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(512, 512, (3, 3)),
    nn.ReLU(),  # relu5-3
    nn.ReflectionPad2d((1, 1, 1, 1)),
    nn.Conv2d(512, 512, (3, 3)),
    nn.ReLU()  # relu5-4
)


class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        vgg_net = vgg
        vgg_net.load_state_dict(torch.load('/workspace/visCVPR2021/ZBK/pre_trained/vgg_normalised.pth'))
        self.enc_1 = nn.Sequential(*list(
            vgg_net.children())[:4])  # input -> relu1_1
        self.enc_2 = nn.Sequential(*list(
            vgg_net.children())[4:11])  # relu1_1 -> relu2_1
        self.enc_3 = nn.Sequential(*list(
            vgg_net.children())[11:18])  # relu2_1 -> relu3_1
        self.enc_4 = nn.Sequential(*list(
            vgg_net.children())[18:31])  # relu3_1 -> relu4_1
        self.enc_5 = nn.Sequential(*list(
            vgg_net.children())[31:44])  # relu4_1 -> relu5_1

    def forward(self, x):
        out = {}
        x = self.enc_1(x)
        out['r11'] = x
        x = self.enc_2(x)
        out['r21'] = x
        x = self.enc_3(x)
        out['r31'] = x
        x = self.enc_4(x)
        out['r41'] = x
        x = self.enc_5(x)
        out['r51'] = x
        return out

class ResnetBlock(nn.Module):
    def __init__(self, dim):
        super(ResnetBlock, self).__init__()
        conv_block = [nn.ReflectionPad2d((1, 1, 1, 1)),
                      nn.Conv2d(dim, dim, (3, 3)),
                      nn.ReLU(),
                      nn.ReflectionPad2d((1, 1, 1, 1)),
                      nn.Conv2d(dim, dim, (3, 3))]
        self.conv_block = nn.Sequential(*conv_block)

    def forward(self, x):
        out = x + self.conv_block(x)  
        return out

class ConvBlock(nn.Module):
    def __init__(self, dim1, dim2):
        super(ConvBlock, self).__init__()
        conv_block = [nn.ReflectionPad2d((1, 1, 1, 1)),
                      nn.Conv2d(dim1, dim2, (3, 3)),
                      nn.ReLU()]
        self.conv_block = nn.Sequential(*conv_block)

    def forward(self, x):
        out = self.conv_block(x)  
        return out

def calc_mean_std(feat, eps=1e-5):
    size = feat.size()
    assert (len(size) == 4)
    N, C = size[:2]
    feat_var = feat.view(N, C, -1).var(dim=2) + eps
    feat_std = feat_var.sqrt().view(N, C, 1, 1)
    feat_mean = feat.view(N, C, -1).mean(dim=2).view(N, C, 1, 1)
    return feat_mean, feat_std

def adaptive_instance_normalization(content_feat, style_feat):
    assert (content_feat.size()[:2] == style_feat.size()[:2])
    size = content_feat.size()
    style_mean, style_std = calc_mean_std(style_feat)
    content_mean, content_std = calc_mean_std(content_feat)

    normalized_feat = (content_feat - content_mean.expand(
        size)) / content_std.expand(size)
    return normalized_feat * style_std.expand(size) + style_mean.expand(size)


#### 在pytorch中，生成全1的tensor，得到输出

In [2]:
import numpy as np

encode = Encoder()
decode = DecoderNet()
decode.load_state_dict(torch.load('/workspace/visCVPR2021/ZBK/pre_trained/decoder_iter_10000.pth'))
encode.eval()
decode.eval()

img = np.ones([1,3,224,224]).astype("float32")
img =torch.from_numpy(img)
out1_t = encode(img)
out2_t = decode(out1_t,out1_t)
out2_t = np.array(out2_t.data.cpu().numpy())
print (out2_t[0,0,0,:6])

[1.0242335 1.0242335 1.0242335 1.0242335 1.0242335 1.0242335]


#### 定义paddle模型

In [3]:
import paddle
import paddle.nn as nn

class DecoderNet(nn.Layer):
    def __init__(self):
        super(DecoderNet, self).__init__()

        self.resblock_41 = ResnetBlock(512)
        self.convblock_41 = ConvBlock(512, 256)
        self.resblock_31 = ResnetBlock(256)
        self.convblock_31 = ConvBlock(256, 128)

        self.convblock_21 = ConvBlock(128, 128)
        self.convblock_22 = ConvBlock(128, 64)

        self.convblock_11 = ConvBlock(64, 64)
        self.upsample = nn.Upsample(scale_factor=2, mode='nearest')

        self.final_conv = nn.Sequential(nn.Pad2D([1, 1, 1, 1], 
                                                  mode='reflect'),
                                        nn.Conv2D(64, 3, (3, 3)))

    def forward(self, cF, sF):

        out = adaptive_instance_normalization(cF['r41'], sF['r41'])
        out = self.resblock_41(out)
        out = self.convblock_41(out)

        out = self.upsample(out)
        out += adaptive_instance_normalization(cF['r31'], sF['r31'])
        out = self.resblock_31(out)
        out = self.convblock_31(out)

        out = self.upsample(out)
        out += adaptive_instance_normalization(cF['r21'], sF['r21'])
        out = self.convblock_21(out)
        out = self.convblock_22(out)

        out = self.upsample(out)
        out = self.convblock_11(out)
        out = self.final_conv(out)
        return out
    
vgg = nn.Sequential(
    nn.Conv2D(3, 3, (1, 1)),
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(3, 64, (3, 3)),
    nn.ReLU(),  # relu1-1
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(64, 64, (3, 3)),
    nn.ReLU(),  # relu1-2
    nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True),
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(64, 128, (3, 3)),
    nn.ReLU(),  # relu2-1
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(128, 128, (3, 3)),
    nn.ReLU(),  # relu2-2
    nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True),
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(128, 256, (3, 3)),
    nn.ReLU(),  # relu3-1
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(256, 256, (3, 3)),
    nn.ReLU(),  # relu3-2
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(256, 256, (3, 3)),
    nn.ReLU(),  # relu3-3
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(256, 256, (3, 3)),
    nn.ReLU(),  # relu3-4
    nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True),
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(256, 512, (3, 3)),
    nn.ReLU(),  # relu4-1, this is the last layer used
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(512, 512, (3, 3)),
    nn.ReLU(),  # relu4-2
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(512, 512, (3, 3)),
    nn.ReLU(),  # relu4-3
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(512, 512, (3, 3)),
    nn.ReLU(),  # relu4-4
    nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True),
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(512, 512, (3, 3)),
    nn.ReLU(),  # relu5-1
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(512, 512, (3, 3)),
    nn.ReLU(),  # relu5-2
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(512, 512, (3, 3)),
    nn.ReLU(),  # relu5-3
    nn.Pad2D([1, 1, 1, 1], mode='reflect'),
    nn.Conv2D(512, 512, (3, 3)),
    nn.ReLU()  # relu5-4
)


class Encoder(nn.Layer):
    def __init__(self):
        super(Encoder, self).__init__()
        vgg_net = vgg
        vgg_net.set_dict(paddle.load('/workspace/visCVPR2021/ZBK/pre_trained/vgg_normalised.pdparams'))
        self.enc_1 = nn.Sequential(*list(
            vgg_net.children())[:4])  # input -> relu1_1
        self.enc_2 = nn.Sequential(*list(
            vgg_net.children())[4:11])  # relu1_1 -> relu2_1
        self.enc_3 = nn.Sequential(*list(
            vgg_net.children())[11:18])  # relu2_1 -> relu3_1
        self.enc_4 = nn.Sequential(*list(
            vgg_net.children())[18:31])  # relu3_1 -> relu4_1
        self.enc_5 = nn.Sequential(*list(
            vgg_net.children())[31:44])  # relu4_1 -> relu5_1

    def forward(self, x):
        out = {}
        x = self.enc_1(x)
        out['r11'] = x
        x = self.enc_2(x)
        
        out['r21'] = x
        x = self.enc_3(x)
        out['r31'] = x
        x = self.enc_4(x)
        out['r41'] = x
        x = self.enc_5(x)
        out['r51'] = x
        return out

class ResnetBlock(nn.Layer):
    def __init__(self, dim):
        super(ResnetBlock, self).__init__()
        self.conv_block = nn.Sequential(nn.Pad2D([1, 1, 1, 1], 
                                                  mode='reflect'),
                                        nn.Conv2D(dim, dim, (3, 3)), 
                                        nn.ReLU(),
                                        nn.Pad2D([1, 1, 1, 1], 
                                                  mode='reflect'),
                                        nn.Conv2D(dim, dim, (3, 3)))

    def forward(self, x):
        out = x + self.conv_block(x)
        return out

class ConvBlock(nn.Layer):
    def __init__(self, dim1, dim2):
        super(ConvBlock, self).__init__()
        self.conv_block = nn.Sequential(nn.Pad2D([1, 1, 1, 1], 
                                                  mode='reflect'),
                                        nn.Conv2D(dim1, dim2, (3, 3)),
                                        nn.ReLU())

    def forward(self, x):
        out = self.conv_block(x)
        return out

def calc_mean_std(feat, eps=1e-5):
    size = feat.shape
    assert (len(size) == 4)
    N, C = size[:2]
    feat_var = feat.reshape([N, C, -1])
    feat_var = paddle.var(feat_var, axis=2) + eps
    feat_std = paddle.sqrt(feat_var)
    feat_std = feat_std.reshape([N, C, 1, 1])
    feat_mean = feat.reshape([N, C, -1])
    feat_mean = paddle.mean(feat_mean, axis=2)
    feat_mean = feat_mean.reshape([N, C, 1, 1])
    return feat_mean, feat_std

def adaptive_instance_normalization(content_feat, style_feat):
    assert (content_feat.shape[:2] == style_feat.shape[:2])
    size = content_feat.shape
    style_mean, style_std = calc_mean_std(style_feat)
    content_mean, content_std = calc_mean_std(content_feat)

    normalized_feat = (content_feat -
                       content_mean.expand(size)) / content_std.expand(size)
    return normalized_feat * style_std.expand(size) + style_mean.expand(size)

#### 在paddlepaddle中，生成全1的tensor，得到输出

In [4]:
import numpy as np

encode = Encoder()
decode = DecoderNet()
decode.set_dict(paddle.load('/workspace/visCVPR2021/ZBK/pre_trained/decoder_iter_10000.pdparams'))
encode.eval()
decode.eval()

img = np.ones([1,3,224,224]).astype("float32")
img =paddle.to_tensor(img)
out1_p = encode(img)
out2_p = decode(out1_p,out1_p)
out2_p = np.array(out2_p.numpy())
print (out2_p[0,0,0,:6])

  and should_run_async(code)


[1.0242336 1.0242336 1.0242336 1.0242336 1.0242336 1.0242336]


#### 比较paddle和torch的输出的差异

In [5]:
import numpy as np
np.testing.assert_allclose(out2_t, out2_p)
print('out2_t is equal to out2_p')

AssertionError: 
Not equal to tolerance rtol=1e-07, atol=0

Mismatched elements: 100352 / 150528 (66.7%)
Max absolute difference: 1.1920929e-07
Max relative difference: 1.16388776e-07
 x: array([[[[1.024233, 1.024233, 1.024233, ..., 1.024233, 1.024233,
          1.024233],
         [1.024233, 1.024233, 1.024233, ..., 1.024233, 1.024233,...
 y: array([[[[1.024234, 1.024234, 1.024234, ..., 1.024234, 1.024234,
          1.024234],
         [1.024234, 1.024234, 1.024234, ..., 1.024234, 1.024234,...

### 比较输出
若paddle和torch的输出数据差异很小（万分之一误差内），则视为前向对齐，继续下一步；

若差异较大，需要打印中间数据，输出并对比差异，定位差异点，并分析问题所在。