# 数据集
    此次資料集為網路上蒐集到的食物照片，共有11類
    Bread, Dairy product, Dessert, Egg, Fried food, Meat, Noodles/Pasta, Rice, Seafood, Soup, and Vegetable/Fruit.
    Training set: 9866張
    Validation set: 1451張
    Testing set: 3347張


In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import cv2
import numpy as np
import pandas
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
import time

**Note:** os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"，这一句必须加在import torch之前，紧跟import os之后。

## read image
    利用opencv(cv2)读入图片并放在numpy array中

In [5]:
def readfile(path, label):
    """
    path:数据集的文件夹地址
    label:一個 boolean variable，代表需不需要回傳 y 值 ??不懂什么叫做需不需要回传y值
    （回传y,就是下面的数据集需要y值的时候，一起将y值也返回，比如training set, validation set）
    """
    
    image_dir = sorted(os.listdir(path))  # 获取文件夹中的文件列表，并将文件名排序
    
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        image = cv2.imread(os.path.join(path, file))  # 读取图片的数值
        if image is None:
            continue
        x[i,:,:] = cv2.resize(image,(128, 128))  # 图片缩放为128*128的方形图像
        if label:
            y[i] = int(file.split('_')[0])  # 获取图像的label标签
    if label:
        return x, y
    else:
        return x

In [6]:
# 分别将training set, validation set, testing set用readfile函数读进来
workspace_dir = "./food-11"
print("Reading data: ")
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)
print("Size of training data = {}".format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, "validation"), True)
print("Size of validation data = {}".format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir, "testing"), False)
print("Size of Testing data = {}".format(len(test_x)))

Reading data: 
Size of training data = 9866
Size of validation data = 1451
Size of Testing data = 3347


# Dataset
数据增强，增加数据的数量，阻止神经网络学习不相关特征
    
在 PyTorch 中，我們可以利用 torch.utils.data 的 Dataset(数据集抽象类) 及 DataLoader（数据加载器） 來"包裝" data，使後續的 training 及 testing 更為方便。

Dataset 需要 overload 兩個函數：__len__ 及 __getitem__

__len__ 必須要回傳 dataset 的大小，而 __getitem__ 則定義了當程式利用 [ ] 取值時，dataset 應該要怎麼回傳資料。

實際上我們並不會直接使用到這兩個函數，但是使用 DataLoader 在 enumerate Dataset 時會使用到，沒有實做的話會在程式運行階段出現 error。

In [7]:
# 设置转换格式
# training时做data augmentation
train_transform = transforms.Compose([
    transforms.ToPILImage(),  # 将shape为(C,H,W)的Tensor或shape为(H,W,C)的numpy.ndarray转换成PIL.Image，值不变。
    transforms.RandomHorizontalFlip(),  # 随机将图片水平翻转  
    transforms.RandomRotation(15),  # 随机旋转图片
    transforms.ToTensor()  # 将图片转成Tensor,并把数值normalize到[0,1](data normalization)
])
# testing时做data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()
])
class ImgDataset(Dataset):  
    """创建img数据集的类，继承自Dataset抽象类"""
    def __init__(self, x, y=None, transform=None):
        """
        重写初始化函数，定义变量x,y,transform
        """
        self.x  = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    
    def __len__(self):
        """
        重写__len__方法
        """
        return len(self.x)
    
    def __getitem__(self, index):
        """
        重写__getitem__方法
        """
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        return X

In [8]:
batch_size = 128
train_set = ImgDataset(train_x, train_y, train_transform)  # 训练集
val_set = ImgDataset(val_x, val_y, test_transform)  # 验证集（源码为什么要进行test_transform?）
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)  # 每个epoch都打乱顺序什么意思？
# 因为读取数据的时候是排序的，所以读取出的数据集的label也是有序的
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

# Model

In [9]:
class Classifier(nn.Module):
    def __init__(self):
        """
        初始化网络结构模块
        """
        # 继承自基类
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 维度 [3, 128, 128]
        # 定义隐藏层结构
        self.cnn = nn.Sequential(
            # 五层卷积层
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128,]
            # 归一化，使得在做Relu的时候不会因为数据过大而导致网络不稳定，参数通常是通道数 
            nn.BatchNorm2d(64),  
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [64, 64, 64]
            
            nn.Conv2d(64, 128, 3, 1, 1),  # [128, 64, 64]
            nn.BatchNorm2d(128), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [128, 32, 32]
            
            nn.Conv2d(128, 256, 3, 1, 1),  # [256, 32, 32]
            nn.BatchNorm2d(256), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [256, 16, 16]
            
            nn.Conv2d(256, 512, 3, 1, 1),  # [512, 16, 16]
            nn.BatchNorm2d(512), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),  # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1),  # [512, 8, 8]
            nn.BatchNorm2d(512), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0)  # [512, 4, 4]
        )
        # 定义输出层线性变换，fc的分类网络
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),  # 对输入数据做线性变换（输入样本特征值的大小， 输出样本特征值的大小）
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )
    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)  # 将卷积后的图像矩阵拉直成向量
        return self.fc(out)

## Training
model.train():让模型编程训练模式，此时dropout和Batch Normalization 在训练的时候起到防止网络过拟合的作用

model.eval():pytorch会自动把BN 和 dropout固定住，不会取平均值，而是训练好的值，

网上的一个问题：请问一下 pytorch 模型的eval模式比train模式的效果差很多 （bn层导致的） 应该怎么解决呢？

答案：说明过拟合了，测试的时候用eval模式，如果测试的时候开成train模式，dropout会起作用，bn参数会改变（根据测试数据），测试效果就会变差。

所以不开eval模式效果会变差，如果开了eval模式test效果很差，那就是过拟合了。

In [10]:
model = Classifier().cuda()
loss = nn.CrossEntropyLoss()  #  分类任务，loss是使用交叉熵损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # 优化函数用Adam
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train()  # 确保model是在train model (开放Dropout等…)
    for i, data in enumerate(train_loader):
        optimizer.zero_grad()  # 用optimizer将model参数的gradient清零
        train_pred = model(data[0].cuda())  # 利用model得到预测的概率分布，这里实际上就是去调用model的forward函数
        batch_loss = loss(train_pred, data[1].cuda())  # 计算loss,(注意prediction跟label必须同时在cpu或者gpu上)
        batch_loss.backward()  # 利用back propagation算出每个参数的gradient
        optimizer.step()  # 以optimizer用gradient更新参数值

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

[001/030] 29.50 sec(s) Train Acc: 0.215690 Loss: 0.018544 | Val Acc: 0.115782 loss: 0.018633
[002/030] 23.52 sec(s) Train Acc: 0.322623 Loss: 0.015025 | Val Acc: 0.339766 loss: 0.016480
[003/030] 28.58 sec(s) Train Acc: 0.384857 Loss: 0.013807 | Val Acc: 0.360441 loss: 0.018917
[004/030] 23.76 sec(s) Train Acc: 0.424083 Loss: 0.013039 | Val Acc: 0.307374 loss: 0.016808
[005/030] 26.00 sec(s) Train Acc: 0.448814 Loss: 0.012522 | Val Acc: 0.366644 loss: 0.014795
[006/030] 22.70 sec(s) Train Acc: 0.490675 Loss: 0.011589 | Val Acc: 0.331496 loss: 0.016848
[007/030] 24.84 sec(s) Train Acc: 0.519765 Loss: 0.010964 | Val Acc: 0.421089 loss: 0.014235
[008/030] 26.08 sec(s) Train Acc: 0.535171 Loss: 0.010514 | Val Acc: 0.414197 loss: 0.013803
[009/030] 28.92 sec(s) Train Acc: 0.568620 Loss: 0.009839 | Val Acc: 0.534114 loss: 0.011428
[010/030] 21.16 sec(s) Train Acc: 0.591932 Loss: 0.009296 | Val Acc: 0.368711 loss: 0.016856
[011/030] 22.38 sec(s) Train Acc: 0.604399 Loss: 0.008960 | Val Acc: 0

train_set 和 val_set一起训练，训练数据越多，精确度越高

In [12]:
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

In [1]:
model_best = Classifier().cuda()
loss_func = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model_best.parameters(), lr=0.001)
epoches = 30

print('iter, \tduration, \ttrain_val_acc, \ttrain_val_loss')
for epoch in range(epoches):
    epoch_start_time = time.time()
    train_val_acc = 0.0
    train_val_loss = 0.0
    model_best.train()
    for i,data in enumerate(train_val_loader):
        optim.zero_grad()
        train_val_predict = model_best(data[0].cuda())
        batch_loss = loss_func(train_val_predict, data[1].cuda())
        optim.step()
        
        train_val_acc += np.sum(np.argmax(train_val_predict.data.numpy(), axis=1) == data[1].cuda())
        train_val_loss += batch_loss.item()
    print('{}, \t{} sec(s), \t{:.4f}, \t{:.4f}'.format(epoch, time.time() - epoch_start_time, train_val_acc, train_val_loss))

NameError: name 'Classifier' is not defined

## test
利用刚刚train好的model进行prediction

In [20]:
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

model.eval()
predict = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_predict = model(data.cuda())
        # 怎么理解.data呢，module返回的是个什么样的值呢？tensor?
        test_label = np.argmax(test_predict.cpu().data.numpy(), axis=1)  
        for y in test_label:
            predict.append(y)

RuntimeError: CUDA out of memory. Tried to allocate 512.00 MiB (GPU 0; 10.76 GiB total capacity; 3.98 GiB already allocated; 201.56 MiB free; 262.33 MiB cached)

In [None]:
# 将预测结果写入test_out.csv文件
with open('test_out.csv', 'w') as file:
    file.write('Id, \tCategory\n')
    for i, y in enumerate(predict):
        file.write('{}, \t{}\n'.format(i, y))

***因为GPU内存不够的原因，在第一步训练的时候就占用了5277MB内存，以致于第二次的训练和test时内存不够用，大致就是这样啦。**

**会了第一次的训练，第二次训练和第一次差不多，测试的时候的原理也差不多**

**谢谢注释和文档，第三次作业就算是结束啦！！**