In [None]:
!tree /F ./data/dogcat  

In [4]:
import torch as t
from torch.utils import data

In [6]:
import os
from PIL import Image
import numpy as np

- **自定义的数据集需要继承Dataset**  
- **并且实现**
    - __getitem__
    - __len__

In [16]:
root = './data/dogcat/'
imgs = os.listdir(root)
imgs = [os.path.join(root,img) for img in imgs]
imgs

['./data/dogcat/cat.12484.jpg',
 './data/dogcat/cat.12485.jpg',
 './data/dogcat/cat.12486.jpg',
 './data/dogcat/cat.12487.jpg',
 './data/dogcat/dog.12496.jpg',
 './data/dogcat/dog.12497.jpg',
 './data/dogcat/dog.12498.jpg',
 './data/dogcat/dog.12499.jpg']

In [35]:
t.Tensor()

tensor([])

In [5]:
class DogCat(data.Dataset):
    
    def __init__(self,root):
        imgs = os.listdir(root)
        # 不实际加载图片，只是指定路径，当调用__getitem__时才会真正读图片
        self.imgs = [os.path.join(root,img) for img in imgs] 
        
    def __getitem__(self,index):
        img_path = self.imgs[index]
        label = 1 if 'dog' in img_path.split('/')[-1] else 0
        pil = Image.open(img_path) # open
        array = np.asarray(pil) # array
        data = t.from_numpy(array) # tensor
        return data,label
    
    def __len__(self):
        return len(self.imgs)

In [6]:
dataset = DogCat('./data/dogcat/')
# img,label = dataset[0]
for data,label in dataset:
    print(data.size(),img.float().mean(),label)

torch.Size([500, 497, 3]) 0
torch.Size([499, 379, 3]) 0
torch.Size([236, 289, 3]) 0
torch.Size([374, 499, 3]) 0
torch.Size([375, 499, 3]) 1
torch.Size([375, 499, 3]) 1
torch.Size([377, 499, 3]) 1
torch.Size([400, 300, 3]) 1


  del sys.path[0]


**torchvision.transforms**

In [10]:
from torchvision import transforms as T

In [4]:
transfrom = T.Compose([
    T.Resize(224), # 缩放图片(Image)，保持长宽比不变，最短边为224像素
    T.CenterCrop(224), # 从图片中间切出224*224的图片
    T.ToTensor(), # 将图片(Image)转成Tensor，归一化至[0, 1]
    T.Normalize(mean=[0.5,0.5,0.5],std=[0.5,0.5,0.5]) # 标准化至[-1, 1]，规定均值和标准差
])

In [8]:
class DogCat(data.Dataset):
    
    def __init__(self,root,transfrom=None):
        imgs = os.listdir(root)
        # 不实际加载图片，只是指定路径，当调用__getitem__时才会真正读图片
        self.imgs = [os.path.join(root,img) for img in imgs]
        self.transform = transfrom
        
    def __getitem__(self,index):
        img_path = self.imgs[index]
        label = 1 if 'dog' in img_path.split('/')[-1] else 0
        data = Image.open(img_path)
        if self.transform:
            data = self.transform(data)
        return data,label
    
    def __len__(self):
        return len(self.imgs)

dataset = DogCat('./data/dogcat/',transfrom=transfrom)
for img, label in dataset:
    print(img.size(), img.float().mean(),label)

torch.Size([3, 224, 224]) tensor(-0.1655) 0
torch.Size([3, 224, 224]) tensor(0.3892) 0
torch.Size([3, 224, 224]) tensor(0.0711) 0
torch.Size([3, 224, 224]) tensor(-0.0462) 0
torch.Size([3, 224, 224]) tensor(-0.0649) 1
torch.Size([3, 224, 224]) tensor(0.1176) 1
torch.Size([3, 224, 224]) tensor(0.2234) 1
torch.Size([3, 224, 224]) tensor(-0.0267) 1


**ImageFolder**  
假设所有的文件按文件夹保存，每个文件夹下存储同一个类别的图片，文件夹名为类名

In [None]:
!tree /F  ./data/dogcat_2/

In [8]:
from torchvision.datasets import ImageFolder

In [14]:
dataset = ImageFolder('./data/dogcat_2/')
dataset.class_to_idx

{'cat': 0, 'dog': 1}

In [None]:
dataset.imgs

In [None]:
print(dataset[0][1]) # 第一张label
dataset[0][0] # 第一张图（Image对象）

In [9]:
transform = T.Compose([
    T.RandomResizedCrop(224),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(mean=[0.4, 0.4, 0.4], std=[0.2, 0.2, 0.2])
])

In [20]:
dataset = ImageFolder('./data/dogcat_2/', transform=transform)
dataset[0][0].size()

torch.Size([3, 224, 224])

In [None]:
to_img = T.ToPILImage()
to_img(dataset[0][0]*0.2+0.4)

**DataLoader**  
- Dataset只负责数据的抽象，一次调用__getitem__只返回一个样本
- 在训练神经网络时，最好是对一个batch的数据进行操作，同时还需要对数据进行shuffle和并行加速等

In [7]:
from torch.utils.data import DataLoader

In [33]:
# 可迭代对象
dataloader = DataLoader(dataset,batch_size=3,shuffle=True,num_workers=0,drop_last=False)
dataiter = iter(dataloader)
imags,labels = next(dataiter)
imags.size()

torch.Size([3, 3, 224, 224])

**如果某张图片损坏无法读取**  
- 过滤：这种情况下dataloader返回的batch数目会少于batch_size

In [7]:
class DogCat(data.Dataset):
    
    def __init__(self,root,transfrom=None):
        imgs = os.listdir(root)
        # 不实际加载图片，只是指定路径，当调用__getitem__时才会真正读图片
        self.imgs = [os.path.join(root,img) for img in imgs]
        self.transform = transfrom
        
    def __getitem__(self,index):
        try:
            img_path = self.imgs[index]
            label = 1 if 'dog' in img_path.split('/')[-1] else 0
            data = Image.open(img_path)
            if self.transform:
                data = self.transform(data)
            return data,label
        except:
            return None,None # 返回空对象
    
    def __len__(self):
        return len(self.imgs)

In [36]:
from torch.utils.data.dataloader import default_collate # 导入默认的拼接方式

def my_collate_fn(batch): # batch-->(data,label)
    
    batch = list(filter(lambda x:x[0] is not None,batch))
    if len(batch)==0:
        return t.Tensor() # tensor([])
    return default_collate(batch) 

In [42]:
dataset = DogCat('data/dogcat_wrong/', transfrom=transform)

In [None]:
to_img(dataset[0][0]*0.2+0.4)

In [47]:
dataloader = DataLoader(dataset,2,collate_fn=my_collate_fn,num_workers=0,shuffle=True,drop_last=True)
for data,label in dataloader:
    print(data.size())

torch.Size([2, 3, 224, 224])
torch.Size([2, 3, 224, 224])
torch.Size([1, 3, 224, 224])
torch.Size([2, 3, 224, 224])


- 随机取一张图片代替

In [None]:
class DogCat(data.Dataset):
    
    def __init__(self,root,transfrom=None):
        imgs = os.listdir(root)
        # 不实际加载图片，只是指定路径，当调用__getitem__时才会真正读图片
        self.imgs = [os.path.join(root,img) for img in imgs]
        self.transform = transfrom
        
    def __getitem__(self,index):
        try:
            img_path = self.imgs[index]
            label = 1 if 'dog' in img_path.split('/')[-1] else 0
            data = Image.open(img_path)
            if self.transform:
                data = self.transform(data)
            return data,label
        except:
            new_index = random.randint(0,len(self)-1)
            return self[new_index]
    
    def __len__(self):
        return len(self.imgs)

**sampler**  
用来对数据进行采样

In [None]:
dataset = DogCat('./data/dogcat/',transfrom=transform)

w = [2 if label==1 else 1 for data,label in dataset]

In [None]:
from torch.utils.data.sampler import  WeightedRandomSampler
sampler = WeightedRandomSampler(weights,num_samples=9,replacement=True)
dataloader = DataLoader(dataset,batch_size=3,sampler=sampler)

**torchvision**  
- models
- datasets
- transforms

预训练模型位置：  
- win:C:\Users\wensh\.cache\torch\hub\checkpoints
- linux:~/.torch/models/

In [1]:
from torchvision import models
from torch import nn

resnet34 = models.squeezenet1_1(pretrained=True)
resnet34.fc=nn.Linear(512,10) # 修改最后的全连接层为10分类问题（默认是ImageNet上的1000分类）

Downloading: "https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth" to C:\Users\wensh/.cache\torch\hub\checkpoints\squeezenet1_1-f364aa15.pth


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4966400.0), HTML(value='')))




In [13]:
from torchvision import datasets
# 通过train=False获取测试集
dataset = datasets.MNIST('data/', download=True, train=False,transform=transform)
len(dataset)

10000

In [17]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

to_pil = transforms.ToPILImage()
to_pil(t.randn(3, 64, 64))
to_img = transforms.ToPILImage()

In [19]:
dataloader = DataLoader(dataset,shuffle=True,batch_size=16)

from torchvision.utils import make_grid,save_image
dataiter = iter(dataloader)
img = make_grid(next(dataiter)[0], 4) # next(dataiter) --> (data,label)
to_img(img)

[tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]]],
 
 
         [[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.9882, 0.9882, 0.9882],
           [0.0000, 0.0000, 0.0000,  ..., 0.9882, 0.9882, 0.9882],
           [0.0000, 0.0000, 0.0000,  ..., 0.9882, 0.9882, 0.9882]]],
 
 
         [[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000