In [1]:
import os
import torch
import cv2
import time
import numpy as np
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

In [2]:
def read_file(path, flag):
    """
    读取文件目录里的内容
    :param path: 文件夹位置
    :param flag: 1训练集或验证集 0测试集
    """
    image_dir = os.listdir(path)
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros(len(image_dir))
    
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :, :] = cv2.resize(img, (128, 128)) # 将图片大小变为128*128
        if flag:
            y[i] = file.split('_')[0]
    
    if flag:
        return x, y
    else:
        return x

In [3]:
class ImgDataset(Dataset):
    """
    实现对数据的封装
    """
    def __init__(self, x, y=None, transform=None):
        self.x = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, index):
        res_x = self.x[index]
        if self.transform is not None:
            res_x = self.transform(res_x)
        if self.y is not None:
            res_y = self.y[index]
            return res_x, res_y
        else:
            return res_x
        

train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), # 随机水平翻转图片
    transforms.RandomRotation(15), # 随机旋转图片15度
    transforms.ToTensor() # 将图片变为Tensor [H, W, C]-->[C, H, W]
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()
])

In [4]:
class Classifier1(nn.Module):
    """
    构建神经网络1：5层卷积+5层池化+3层全连接
    """
    def __init__(self):
        super(Classifier1, self).__init__()
        
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        self.cnn = nn.Sequential(           # input: 3 * 128 * 128
            # 卷积层1
            nn.Conv2d(3, 64, 3, 1, 1),       # output: 64 * 128 * 128
            nn.BatchNorm2d(64), # 归一化处理，可以使每一个batch的分布都在高斯分布附近，这样可以使用更大的学习率，加快训练速度
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),           # output: 64 * 64 * 64
            
            # 卷积层2
            nn.Conv2d(64, 128, 3, 1, 1),     # output: 128 * 64 * 64
            nn.BatchNorm2d(128), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),           # output: 128 * 32 * 32
            
             # 卷积层3
            nn.Conv2d(128, 256, 3, 1, 1),    # output: 256 * 32 * 32
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),           # output: 256 * 16 * 16
            
            # 卷积层4
            nn.Conv2d(256, 512, 3, 1, 1),    # output: 512 * 16 * 16
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),           # output: 512 * 8 * 8
            
            # 卷积层5
            nn.Conv2d(512, 512, 3, 1, 1),    # output: 512 * 8 * 8
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0)            # output: 512 * 4 * 4
        ) 
        
        self.fc = nn.Sequential(
            nn.Linear(512 * 4 * 4, 1024), # 全连接层
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )
        
    def forward(self, x):
        cnn_out = self.cnn(x)
        flatten = cnn_out.view(cnn_out.size()[0], -1) # 将Tensor展开
        return self.fc(flatten)
        

In [None]:
class Classifier2(nn.Module):
    """
    构建神经网络2：3层卷积+3层池化+3层全连接
    """
    def __init__(self):
        super(Classifier2, self).__init__()
        
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        self.cnn = nn.Sequential(           # input: 3 * 128 * 128
            # 卷积层1
            nn.Conv2d(3, 64, 3, 1, 1),       # output: 64 * 128 * 128
            nn.BatchNorm2d(64), # 归一化处理
            nn.ReLU(),
            nn.MaxPool2d(4, 4, 0),           # output: 64 * 32 * 32
            
            # 卷积层2
            nn.Conv2d(64, 512, 3, 1, 1),     # output: 512 * 32 * 32
            nn.BatchNorm2d(512), 
            nn.ReLU(),
            nn.MaxPool2d(4, 4, 0),           # output: 512 * 8 * 8
            
             # 卷积层3
            nn.Conv2d(512, 512, 3, 1, 1),    # output: 512 * 8 * 8
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),           # output: 512 * 4 * 4
        ) 
        
        self.fc = nn.Sequential(
            nn.Linear(512 * 4 * 4, 1024), # 全连接层
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )
        
    def forward(self, x):
        cnn_out = self.cnn(x)
        flatten = cnn_out.view(cnn_out.size()[0], -1) # 将Tensor展开
        return self.fc(flatten)
        

In [None]:
class Classifier3(nn.Module):
    """
    构建神经网络3：5层卷积+5层池化+2层全连接
    """
    def __init__(self):
        super(Classifier3, self).__init__()
        
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        self.cnn = nn.Sequential(           # input: 3 * 128 * 128
            # 卷积层1
            nn.Conv2d(3, 64, 3, 1, 1),       # output: 64 * 128 * 128
            nn.BatchNorm2d(64), # 归一化处理
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),           # output: 64 * 64 * 64
            
            # 卷积层2
            nn.Conv2d(64, 128, 3, 1, 1),     # output: 128 * 64 * 64
            nn.BatchNorm2d(128), 
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),           # output: 128 * 32 * 32
            
             # 卷积层3
            nn.Conv2d(128, 256, 3, 1, 1),    # output: 256 * 32 * 32
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),           # output: 256 * 16 * 16
            
            # 卷积层4
            nn.Conv2d(256, 512, 3, 1, 1),    # output: 512 * 16 * 16
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),           # output: 512 * 8 * 8
            
            # 卷积层5
            nn.Conv2d(512, 512, 3, 1, 1),    # output: 512 * 8 * 8
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0)            # output: 512 * 4 * 4
        ) 
        
        self.fc = nn.Sequential(
            nn.Linear(512 * 4 * 4, 1024), # 全连接层
            nn.ReLU(),
            nn.Linear(1024, 11)
        )
        
    def forward(self, x):
        cnn_out = self.cnn(x)
        flatten = cnn_out.view(cnn_out.size()[0], -1) # 将Tensor展开
        return self.fc(flatten)
        

In [5]:
def train_model(train_loader, val_loader, train_len, val_len):
    """
    模型训练
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # 构建神经网络1：5层卷积+5层池化+3层全连接
    # model = Classifier1().to(device)
    
    # 构建神经网络2：3层卷积+3层池化+3层全连接
    model = Classifier2().to(device)
    
    # 构建神经网络3：5层卷积+5层池化+2层全连接
    # model = Classifier3().to(device)
    
    loss = nn.CrossEntropyLoss() # 使用交叉熵损失函数
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    epochs = 30
    
    for epoch in range(epochs):
        epoch_start_time = time.time()
        train_acc = 0.0
        train_loss = 0.0
        val_acc = 0.0
        val_loss = 0.0
        
        # 保证BN层(Batch Normalization)用每一批数据的均值和方差，而对于Dropout层，随机取一部分网络连接来训练更新参数
        model.train()
        for i, data in enumerate(train_loader):
            optimizer.zero_grad() # 清空梯度，否则会一直累加
            train_pred = model(data[0].to(device)) # data[0]：x data[1]：y
            batch_loss = loss(train_pred, data[1].to(device))
            batch_loss.backward()
            optimizer.step() # 更新参数
            
            # .data表示将Variable中的Tensor取出来
            # train_pred是(50，11)的数据，np.argmax()返回最大值的索引，axis=1则是对行进行，返回的索引正好就对应了标签，然后和y真实标签比较，则可得到分类正确的数量
            train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            train_loss += batch_loss.item()
         
        
        # 保证BN用全部训练数据的均值和方差，而对于Dropout层，利用到了所有网络连接
        model.eval()
        with torch.no_grad():
            for i, data in enumerate(val_loader):
                val_pred = model(data[0].to(device))
                batch_loss = loss(val_pred, data[1].to(device))
                
                val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
                val_loss += batch_loss.item()
                
         
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
              (epoch + 1, epochs, time.time() - epoch_start_time, \
               train_acc / train_len, train_loss / train_len, val_acc / val_len,
               val_loss / val_len))
        
    return model
    

In [6]:
def predict_model(test_loader, model):
    """
    模型预测
    """
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.eval()
    result = []
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            test_pred = model(data.to(device))
            test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
            for y in test_label:
                result.append(y)
    return result

def write_file(result):
    with open('result.csv', mode='w') as f:
        f.write('Id,Category\n')
        for i, label in enumerate(result):
            f.write('{},{}\n'.format(i, label))

In [7]:
def params_count(model):
    """
    获得模型参数量
    """
    return np.sum([p.numel() for p in model.parameters()]).item()

In [8]:
def main():
    train_x, train_y = read_file('../data/food-11/training', True)
    val_x, val_y = read_file('../data/food-11/validation', True)
    test_x = read_file('../data/food-11/testing', False)
    
    batch_size = 50
    train_set = ImgDataset(train_x, train_y, train_transform)
    val_set = ImgDataset(val_x, val_y, test_transform)
    test_set = ImgDataset(x=test_x, transform=test_transform)
    
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
    
    model = train_model(train_loader, val_loader, train_set.__len__(), val_set.__len__())
    print('The number of parameters is %d', params_count(model))
    result = predict_model(test_loader, model)
    write_file(result)

In [9]:
if __name__ == '__main__':
    main()

[001/030] 1623.54 sec(s) Train Acc: 0.249848 Loss: 0.043039 | Val Acc: 0.309038 loss: 0.038609


KeyboardInterrupt: 