In [11]:
import os,sys,glob,shutil,json
import cv2
from PIL import Image
import numpy as np
import torch
from torch.utils.data.dataset import Dataset
import torchvision.transforms as transforms

In [12]:
# 自定义数据集 Dataset：对数据集的封装，提供索引方式的对数据样本进行读取
class SVHNDataset(Dataset):  
    # 初始化
    def __init__(self,img_path,img_label,transform=None):  # 参数：（图像，图像的标签，可选参数：图像的预处理）
        self.img_path = img_path
        self.img_label = img_label
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None
    # 获取特定索引的数据
    def __getitem__(self,index):
        img = Image.open(self.img_path[index]).convert('RGB')  # 将索引为index的图像转化为RGB格式的数据 

        if self.transform is not None:  # 如果需要对数据进行预处理
            img = self.transform(img)

        lbl = np.array(self.img_label[index],dtype=np.int32)  # 将标签数据加载为一个整数数组
        # lbl = list(lbl)  如果只是这样写，不会填充
        lbl = list(lbl) + (6-len(lbl)) * [10]  # 扩充这个数组  为了统一运算都变成定长的，不够6个的补10
        
        return img,torch.from_numpy(np.array(lbl[:6]))  # 返回的是索引为index的图像和标签

    # 数据集的长度
    def __len__(self):
        return len(self.img_path)



In [18]:
train_path = glob.glob('../data/mchar_train/*.png')  # mchar_train文件中的所有图片
train_path.sort()  # 按字母顺序排列
train_json = json.load(open('../data/train.json'))  # 载入json文件
train_label = [train_json[x]['label'] for x in train_json]  # 在标签的json文件中，将标签label的内容提取出来，存储为列表list[]

train_data = SVHNDataset(train_path,train_label,
                    transforms.Compose([
                        # 缩放到固定尺寸
                        transforms.Resize((64,128)),
                        # 随机颜色变换
                        transforms.ColorJitter(0.2,0.2,0.2),
                        # 加入随机旋转
                        transforms.RandomRotation(5),
                        # 将图片转换为tensor格式
                        transforms.ToTensor()
                        # 对图像像素进行归一化
                    ]))

In [19]:
data[29979]

(tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.5569, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.5569, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.5569, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.5608,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.5608,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.5608,  ..., 0.0000, 0.0000, 0.0000]],
 
         [[0.0000, 0.0000, 0.0000,  ..., 0.5490, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.5490, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.5490, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.5569,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.5569,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.5569,  ..., 0.0000, 0.0000, 0.0000]],
 
         [[0.0000, 0.0000, 0.0000,  ..., 0.5333, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.5333, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ...,

In [15]:
data[1][0]  #取出图像的tensor表示，三维的[[[...]]]

tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.5373, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.5294, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.5333, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.4902,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.4902,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.4902,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.5725, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.5725, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.5765, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.5333,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.5333,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.5333,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.4863, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.4902, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.4941, 0.0000, 0.

In [16]:
data[1][1]  # 取出该图像的标签，固定长度为6，不足的补10（一个特殊的符号，标识中不会含有的）

tensor([ 2,  3, 10, 10, 10, 10])

In [21]:
val_path = glob.glob('../data/mchar_val/*.png')  # mchar_train文件中的所有图片
val_path.sort()  # 按字母顺序排列
val_json = json.load(open('../data/val.json'))  # 载入json文件
val_label = [val_json[x]['label'] for x in val_json]  # 在标签的json文件中，将标签label的内容提取出来，存储为列表list[]

val_data = SVHNDataset(val_path,val_label,
                    transforms.Compose([
                        # 缩放到固定尺寸
                        transforms.Resize((64,128)),
                        # 随机颜色变换
                        transforms.ColorJitter(0.2,0.2,0.2),
                        # 加入随机旋转
                        transforms.RandomRotation(5),
                        # 将图片转换为tensor格式
                        transforms.ToTensor()
                        # 对图像像素进行归一化
                    ]))

In [23]:
val_data[7]

(tensor([[[0.1412, 0.1647, 0.1725,  ..., 0.2667, 0.2667, 0.2745],
          [0.1451, 0.1647, 0.1765,  ..., 0.2627, 0.2667, 0.2745],
          [0.1804, 0.1765, 0.1686,  ..., 0.2471, 0.2549, 0.2667],
          ...,
          [0.2039, 0.1961, 0.1922,  ..., 0.1804, 0.1804, 0.1804],
          [0.2039, 0.1961, 0.1922,  ..., 0.1804, 0.1804, 0.1804],
          [0.2078, 0.1961, 0.1922,  ..., 0.1882, 0.1843, 0.1804]],
 
         [[0.1647, 0.1922, 0.1882,  ..., 0.1804, 0.1882, 0.1922],
          [0.1569, 0.1804, 0.1882,  ..., 0.1882, 0.1961, 0.1961],
          [0.1843, 0.1843, 0.1725,  ..., 0.1804, 0.1882, 0.1961],
          ...,
          [0.1765, 0.1686, 0.1608,  ..., 0.1333, 0.1373, 0.1412],
          [0.1765, 0.1686, 0.1608,  ..., 0.1333, 0.1373, 0.1373],
          [0.1765, 0.1686, 0.1608,  ..., 0.1333, 0.1412, 0.1373]],
 
         [[0.1294, 0.1569, 0.1647,  ..., 0.1216, 0.1255, 0.1294],
          [0.1333, 0.1608, 0.1647,  ..., 0.1216, 0.1255, 0.1294],
          [0.1647, 0.1608, 0.1529,  ...,

In [17]:
# 加入DataLoader的数据读取函数  DataLoader：对Dataset进行封装，提供批量读取的迭代读取
train_loader = torch.utils.data.DataLoader(
    SVHNDataset(train_path,train_label,
                transforms.Compose([ # 数据扩增
                    transforms.Resize((64, 128)),
                    transforms.ColorJitter(0.3, 0.3, 0.2),
                    transforms.RandomRotation(5),
                    transforms.ToTensor(),
                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                ])
    ),
    batch_size = 10, # 每批样本个数
    shuffle = False, # 是否打乱顺序
    num_workers = 10 # 读取的线程个数
)

# 加入dataloader后，数据data的格式为  torch.Size([10,3,64,128]),torch.Size([10,6])