# 一、标签在文件夹上的数据加载

>数据集文件夹  
>>标签(文件夹）  
>>>图片.jpg  

In [None]:
import os

import torch
from torch.utils.data import Dataset, TensorDataset, DataLoader #数据集打开加载工具

from torchvision import transforms,utils,datasets # 图片增强工具

from PIL import Image

## 1.定义transform增强数据

```PYTHON
from torchvision import transforms
```

**1.定义transform参数**
```python
trans变量 = transforms.Compose([
    # 缩放图片Image，保持长宽比不变，最短边为__像素
    transforms.Resize(缩放像素), 
    # 从图片中间切出__*__图片
    transforms.CenterCrop(像素), 
    # 将图片Image转成Tensor格式(通道,高,宽),并归一化至[0,1]
    transforms.ToTensor(),        
])
```

**2.用transform变量变换处理图片**
```python
图片img = trans变量(图片img)
```

In [None]:
data_transform = transforms.Compose([
    transforms.Resize(32),  # 缩放图片Image，保持长宽比不变，最短边为32像素
    transforms.CenterCrop(32),  #从图片中间切出32*32图片
    transforms.ToTensor(),  # 将图片Image转成Tensor格式(通道,高,宽),并归一化至[0,1]
])

## 2.使用ImageFolder进行数据集导入(生成数据集dataset)

```python
from torchvision import datasets
```

**1.不使用transform增强导入生成数据集**
```python
数据集变量 = datasets.ImageFolder(
    root = './训练数据根文件夹路径',  
    transform = None          
)
```

**2.使用transform增强导入生成数据集**
```python
数据集变量 = datasets.ImageFolder(
    root = './训练数据根文件夹路径',  
    transform = trans变量          
)
```

In [None]:
# Transform增强后数据集

dataset = datasets.ImageFolder(
    root = './horses-or-human',   # 训练数据根文件夹路径
    transform = data_transform    # transform增强调用
)

# 查看数据集第一个样例

img, label = dataset[0]
print(label)
print(img.shape)
img

# 3.加载数据集（loader）

```python
from torch.utils.data import DataLoader 
```

```python
loader变量 = DataLoader(
    数据集, 
    batch_size = 一次训练图片数, 
    shuffle = True/False是否随机选择数据
)
```

In [None]:
dataset_loader = DataLoader(dataset, batch_size=256, shuffle=True)

---
---
---
# 二、标签在图片命名上的数据加载

In [None]:
import os

import torch
from torch.utils.data import Dataset, TensorDataset, DataLoader #数据集打开加载工具

from torchvision import transforms,utils,datasets # 图片增强工具

from PIL import Image

## 1.定义transform增强数据

```PYTHON
from torchvision import transforms
```

**1.定义transform参数**
```python
trans变量 = transforms.Compose([
    # 缩放图片Image，保持长宽比不变，最短边为__像素
    transforms.Resize(缩放像素), 
    # 从图片中间切出__*__图片
    transforms.CenterCrop(像素), 
    # 将图片Image转成Tensor格式(通道,高,宽),并归一化至[0,1]
    transforms.ToTensor(),        
])
```

**2.用transform变量变换处理图片**
```python
图片img = trans变量(图片img)
```

In [None]:
data_transform = transforms.Compose([
    transforms.Resize(32),  # 缩放图片Image，保持长宽比不变，最短边为32像素
    transforms.CenterCrop(32),  #从图片中间切出32*32图片
    transforms.ToTensor(),  # 将图片Image转成Tensor格式(通道,高,宽),并归一化至[0,1]
])

## 2.利用Dataset类进行数据导入(重载Dataset)
```python
from torch.utils.data import Dataset, TensorDataset, DataLoader
from PIL import Image
```

**1.重载Dataset类**
```python
class Dataset自定义类名(Dataset):
```

**2.初始化属性**
```python
def __init__(self, path_dir, transform=None):
    self.path_dir = path_dir # 文件路径
    self.transform = transform #对图形预处理
    self.images = os.listdir(self.path_dir) # 把路径下的所有文件放入一个列表

def __len__(self):
    return len(self.images) # 返回数据集大小
```

**3.根据索引index返回图像及标签**
```python
def __getitem__(self, index):
    # 获取图片
    image_index = self.images[index] # 根据索引获取图像文件名
    img_path = os.path.join(self.path_dir, image_index)
    img = Image.open(img_path).convert('图像通道')  # 图像通道：RGB
    if self.transform is not None:
        img = self.transform(img)
            
    # 获取标签，并将标签用数字int编码
    label = img_path.split('\\')[-1].split('.')[0]
    ...需要改写...
    if '图片标签' in label:
        label = 0
    elif '图片标签' in label:
        label = 1
    ...需要改写...
        
    return img,label
```

**5.调用重载类**
```python
dataset = Dataset自定义类名('文件夹路径',trans变量)
```


In [None]:
class MyDataset(Dataset):
    #初始化属性
    def __init__(self, path_dir, transform=None):
        self.path_dir = path_dir # 文件路径
        self.transform = transform #对图形预处理
        self.images = os.listdir(self.path_dir) # 把路径下的所有文件放入一个列表
        
    #返回整个数据集大小
    def __len__(self):
        return len(self.images)
    
    #根据索引index、返回图像及标签
    def __getitem__(self, index):
        # 获取图片
        image_index = self.images[index] # 根据索引获取图像文件名
        img_path = os.path.join(self.path_dir, image_index)
        img = Image.open(img_path).convert('RGB')   #读取图像
        if self.transform is not None:
            img = self.transform(img)
            
        # 获取标签
        label = img_path.split('\\')[-1].split('.')[0]
        if 'horse' in label:
            label = 1
        elif 'human' in label:
            label = 0 
        
        return img,label
        

In [None]:
dataset = MyDataset('E:\code\jupyter\py-torch-learning-notes-master\实例2\horses-or-human\humans')

img,label = dataset[5]
print(label)
img


## 3.加载数据集

```python
from torch.utils.data import DataLoader 
```

```python
loader变量 = DataLoader(
    数据集, 
    batch_size = 一次训练图片数, 
    shuffle = True/False是否随机选择数据
)
```

In [None]:
dataset_loader = DataLoader(dataset, batch_size=256, shuffle=True)

---
---
---
# 三、创建自己的数据集

In [None]:
from torch.utils.data import Dataset, TensorDataset ,DataLoader

## 1.将数据转为tensor格式
`数据 = torch.tensor(mumpy数据)` 

In [None]:
train_images = torch.tensor(train_images)
train_labels = torch.tensor(train_labels)

test_images = torch.tensor(test_images)
test_labels = torch.tensor(test_labels)

## 2.数据处理

>图像数据处理：
>>**图像数据列表维度shape：[图像数量,通道维数,图像长像素,图像宽像素]**  
>>缺少通道维黑白图像处理:`图片样本data = Variable(torch.unsqueeze(图片样本data, dim=1), volatile=True).type(torch.FloatTensor)/255`  
>>数据类型转换:`数据变量 = 数据变量.type(torch.FloatTensor)`

>标签处理
>>转换为one-hot编码:`标签labels = utils.to_categorical(标签labels)`  
>>标签转换成long数据格式：`标签labels = 标签labels.long()`

In [None]:
# 将标签转换成long格式
train_labels = train_labels.long()
test_labels = test_labels.long()

# 图像数据调整增加维度 [图片数, 长, 宽]->[图片数, 通道数, 长, 宽], 将数据转为tensor的Float格式
train_images = Variable(torch.unsqueeze(train_images, dim=1), volatile=True).type(torch.FloatTensor)/255
test_images = Variable(torch.unsqueeze(test_images, dim=1), volatile=True).type(torch.FloatTensor)/255

## 3.生成数据集
`数据集 = TensorDataset(样本data, 标签labels)`

In [None]:
train_dataset = TensorDataset(train_images, train_labels)
test_dataset = TensorDataset(test_images, test_labels)

## 4.加载数据集
`train_loader = DataLoader(train_dataset, batch_size=120)`

In [None]:
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_loader =DataLoader(test_dataset, batch_size=120)

---
---
---
# 四、加载CSV数据