### 定义torch的FlatFolderDataset

注意将随机的部分固定住，这里随机的部分为RandomCrop

In [3]:
import torch
from torchvision import transforms
from torch.utils.data import Dataset
import os
from PIL import Image

class FlatFolderDataset(Dataset):
    def __init__(self, content_root, style_root):
        super(FlatFolderDataset, self).__init__()
        self.content_root = content_root
        self.paths = os.listdir(self.content_root)
        self.style_root = style_root
        self.transform = self.data_transform(128)

    def __getitem__(self, index):
        path = self.paths[index]
        content_img = Image.open(os.path.join(self.content_root, 
                                               path)).convert('RGB')
        content_img = content_img.resize((128, 128), Image.BILINEAR)
        content_img = self.transform(content_img)
        style_img = Image.open(self.style_root).convert('RGB')
        style_img = style_img.resize((128, 128), Image.BILINEAR)
        style_img = self.transform(style_img)[:3, :, :]
        return content_img, style_img
    
    def data_transform(self, crop_size=128):
        transform_list = [
            transforms.RandomCrop(crop_size),
            transforms.ToTensor()
            ]
        return transforms.Compose(transform_list)

    def __len__(self):
        return len(self.paths)

    def name(self):
        return 'FlatFolderDataset'

### torch读入一张图片
这里'/workspace/visCVPR2021/ZBK/data/test1/'中只有一张图像。

In [4]:
from torch.utils.data import DataLoader
import numpy as np
dataset = FlatFolderDataset('/workspace/visCVPR2021/ZBK/data/coco/test1/', '/workspace/visCVPR2021/ZBK/data/starrynew.png')
data_iter = DataLoader(dataset, batch_size=1, num_workers=0)
for i, item in enumerate(data_iter):
    if i>0:
        break
    content_image_t = np.array(item[0].data.cpu().numpy())
    print('content_image_t:', content_image_t[0,0,0,:6])
    style_image_t = np.array(item[1].data.cpu().numpy())
    print('style_image_t:', style_image_t[0,0,0,:6])


content_image_t: [0.45882353 0.45490196 0.4509804  0.45490196 0.47058824 0.48235294]
style_image_t: [0.1254902  0.10196079 0.16078432 0.2901961  0.20784314 0.26666668]


### 定义paddle的LapStyleDataset

注意将随机的部分固定住，这里随机的部分为RandomCrop

In [5]:
import paddle
from paddle.vision import transforms
from paddle.io import Dataset
import os
from PIL import Image

class LapStyleDataset(Dataset):
    def __init__(self, content_root, style_root):
        super(LapStyleDataset, self).__init__()
        self.content_root = content_root
        self.paths = os.listdir(self.content_root)
        self.style_root = style_root
        self.transform = self.data_transform(128)

    def __getitem__(self, index):
        path = self.paths[index]
        content_img = Image.open(os.path.join(self.content_root,
                                               path)).convert('RGB')
        content_img = content_img.resize((128, 128), Image.BILINEAR)
        content_img = self.transform(content_img)
        style_img = Image.open(self.style_root).convert('RGB')
        style_img = style_img.resize((128, 128), Image.BILINEAR)
        style_img = self.transform(style_img)[:3, :, :]
        return content_img, style_img

    def data_transform(self, crop_size=136):
        transform_list = [
            transforms.RandomCrop(crop_size), 
            transforms.ToTensor()
            ] 
        return transforms.Compose(transform_list)

    def __len__(self):
        return len(self.paths)

    def name(self):
        return 'LapStyleDataset'

### paddle读入一张图片
这里'/workspace/visCVPR2021/ZBK/data/test1/'中只有一张图像。

In [6]:
from paddle.io import DataLoader
dataset = LapStyleDataset('/workspace/visCVPR2021/ZBK/data/coco/test1/', '/workspace/visCVPR2021/ZBK/data/starrynew.png')
data_iter = DataLoader(dataset, batch_size=1, num_workers=0)
for i, item in enumerate(data_iter):
    if i>0:
        break
    content_image_p = np.array(item[0].numpy())
    print('content_image_p:', content_image_p[0,0,0,:6])
    style_image_p = np.array(item[1].numpy())
    print('style_image_p:', style_image_p[0,0,0,:6])


  and should_run_async(code)


content_image_p: [0.45882356 0.454902   0.45098042 0.454902   0.47058827 0.48235297]
style_image_p: [0.1254902  0.10196079 0.16078432 0.2901961  0.20784315 0.26666668]


### 比较paddle和torch的输出数据

In [7]:
import numpy as np
np.testing.assert_allclose(content_image_t, content_image_p)
print('content_image_t is equal to content_image_p')
np.testing.assert_allclose(style_image_t, style_image_p)
print('style_image_t is equal to style_image_p')

content_image_t is equal to content_image_p
style_image_t is equal to style_image_p


### 比较输出
若paddle和torch的输出数据差异很小（万分之一误差内），则视为数据处理正确，继续下一步；

若差异较大，需要打印中间数据，输出并对比差异，定位差异点，并分析问题所在。

### 注意：
1. 在对齐数据处理时，要将随机的部分固定住，如随机剪裁等。
2. 确保torch与paddle读入的是同一张图像（可以建立一个只有一张图像的文件夹）