<a href="https://colab.research.google.com/github/wey-code/code_practice/blob/master/cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

import os
os.chdir("/content/gdrive/My Drive/Colab Notebooks/Hashing")

#!gdown --id '19CzXudqN58R3D-1G8KeFWk8UDQwlb8is' --output food-11.zip # 下載資料集
#!unzip food-11.zip # 解壓縮


In [3]:
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader,Dataset
import time

将图片读入

In [4]:
def readfile(path,label):
    image_dir = sorted(os.listdir(path))
    x = np.zeros((len(image_dir),128,128,3),dtype=np.uint8)
    y = np.zeros((len(image_dir)),dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path,file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
            y[i] = int(file.split("_")[0])
    if label:
        return x,y
    else:
        return x

In [5]:
workspace_dir = './food-11'
print("reading data")
train_x,train_y = readfile(os.path.join(workspace_dir,"training"),True)
print("size of training data = {}".format(len(train_x)))
val_x,val_y = readfile(os.path.join(workspace_dir,"validation"),True)
print("size of validation data = {}".format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir,"testing"),False)
print("size of testing data = {}".format(len(test_x)))

reading data
size of training data = 9866
size of validation data = 3430
size of testing data = 3347


使用dataset来打包数据

In [6]:
#training时候 通过翻转之类做数据扩增 data augmentation
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
])

class ImgDataset(Dataset):
    def __init__(self,x,y=None, transform=None):
        self.x = x
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self,index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X,Y
        else:
            return X

In [7]:
batch_size = 128
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

开始建立模型

In [8]:
class Classifier(nn.Module):
    def __init__(self):
                super(Classifier,self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
                self.cnn = nn.Sequential(
                    nn.Conv2d(3,64,3,1,1), #[64,128,128]
                    nn.BatchNorm2d(64),
                    nn.ReLU(),
                    nn.MaxPool2d(2,2,0),#[64,64,64]

                    nn.Conv2d(64,128,3,1,1), #[128,64,64]
                    nn.BatchNorm2d(128),
                    nn.ReLU(),
                    nn.MaxPool2d(2,2,0),#[128,32,32]

                    nn.Conv2d(128,256,3,1,1), #[256,32,32]
                    nn.BatchNorm2d(256),
                    nn.ReLU(),
                    nn.MaxPool2d(2,2,0),#[256,16,16]

                    nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
                    nn.BatchNorm2d(512),
                    nn.ReLU(),
                    nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]

                    nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
                    nn.BatchNorm2d(512),
                    nn.ReLU(),
                    nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]   
            )
                self.fc =  nn.Sequential(
                    nn.Linear(512*4*4,1024),
                    nn.ReLU(),
                    nn.Linear(1024,512),
                    nn.ReLU(),
                    nn.Linear(512,11),
            )
            
    def forward(self,x):
            out = self.cnn(x)
            out = out.view(out.size()[0],-1)
            return self.fc(out)

training

In [10]:
model = Classifier().cuda()
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0
    
    model.train()
    for i, data in enumerate(train_loader):
        optimizer.zero_grad()
        train_pred = model(data[0].cuda())
        batch_loss = loss(train_pred,data[1].cuda())
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
                val_pred = model(data[0].cuda())
                batch_loss = loss(val_pred, data[1].cuda())

                val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
                val_loss += batch_loss.item()

            #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
                (epoch + 1, num_epoch, time.time()-epoch_start_time, \
                 train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))

[001/030] 17.25 sec(s) Train Acc: 0.253902 Loss: 0.033449 | Val Acc: 0.289796 loss: 0.038061
[002/030] 17.32 sec(s) Train Acc: 0.373100 Loss: 0.028208 | Val Acc: 0.314577 loss: 0.034175
[003/030] 17.37 sec(s) Train Acc: 0.430063 Loss: 0.025858 | Val Acc: 0.435860 loss: 0.025331
[004/030] 17.46 sec(s) Train Acc: 0.470809 Loss: 0.023840 | Val Acc: 0.438484 loss: 0.026522
[005/030] 17.40 sec(s) Train Acc: 0.499392 Loss: 0.022754 | Val Acc: 0.503499 loss: 0.022996
[006/030] 17.39 sec(s) Train Acc: 0.526150 Loss: 0.021296 | Val Acc: 0.385131 loss: 0.033345
[007/030] 17.29 sec(s) Train Acc: 0.559497 Loss: 0.020070 | Val Acc: 0.495918 loss: 0.023542
[008/030] 17.34 sec(s) Train Acc: 0.579161 Loss: 0.018958 | Val Acc: 0.559475 loss: 0.020949
[009/030] 17.38 sec(s) Train Acc: 0.609467 Loss: 0.017740 | Val Acc: 0.506706 loss: 0.023654
[010/030] 17.30 sec(s) Train Acc: 0.621123 Loss: 0.017067 | Val Acc: 0.427697 loss: 0.031007
[011/030] 17.37 sec(s) Train Acc: 0.644841 Loss: 0.016178 | Val Acc: 0

得到较好的超参数之后    
将所有数据混合在一起进行训练

In [16]:
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0

    model_best.train()
    for i, data in enumerate(train_val_loader):
        optimizer.zero_grad()
        train_pred = model_best(data[0].cuda())
        batch_loss = loss(train_pred, data[1].cuda())
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

        #將結果 print 出來
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

[001/030] 20.60 sec(s) Train Acc: 0.291667 Loss: 0.031876
[002/030] 20.58 sec(s) Train Acc: 0.390193 Loss: 0.027089
[003/030] 20.61 sec(s) Train Acc: 0.455024 Loss: 0.024467
[004/030] 20.61 sec(s) Train Acc: 0.502031 Loss: 0.022296
[005/030] 20.56 sec(s) Train Acc: 0.538658 Loss: 0.020624
[006/030] 20.54 sec(s) Train Acc: 0.573105 Loss: 0.019109
[007/030] 20.63 sec(s) Train Acc: 0.604242 Loss: 0.017951
[008/030] 20.61 sec(s) Train Acc: 0.628084 Loss: 0.016788
[009/030] 20.59 sec(s) Train Acc: 0.647187 Loss: 0.015950
[010/030] 20.60 sec(s) Train Acc: 0.672232 Loss: 0.014615
[011/030] 20.65 sec(s) Train Acc: 0.696525 Loss: 0.013714
[012/030] 20.61 sec(s) Train Acc: 0.713372 Loss: 0.012836
[013/030] 20.61 sec(s) Train Acc: 0.734582 Loss: 0.011916
[014/030] 20.59 sec(s) Train Acc: 0.753535 Loss: 0.011320
[015/030] 20.57 sec(s) Train Acc: 0.756468 Loss: 0.010969
[016/030] 20.59 sec(s) Train Acc: 0.770532 Loss: 0.010264
[017/030] 20.56 sec(s) Train Acc: 0.786778 Loss: 0.009623
[018/030] 20.5

In [17]:
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [20]:
model_best.eval()
prediction = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_pred = model_best(data.cuda())
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        for y in test_label:
            prediction.append(y)

In [21]:
#將結果寫入 csv 檔
with open("predict2.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(i, y))