In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms as tfs
from datetime import datetime

In [2]:
data_tf = tfs.ToTensor()

In [3]:
train_set = ImageFolder('./Fruit-Images-Dataset/Training', transform = data_tf)
test_set = ImageFolder('./Fruit-Images-Dataset/Test', transform = data_tf)

In [29]:
train_data = DataLoader(train_set, batch_size = 64, shuffle = True)
test_data = DataLoader(test_set, batch_size = 128, shuffle = True)

In [5]:
class conv_net1(nn.Module):
    def __init__(self):
        super(conv_net1, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.batch_norm1 = nn.BatchNorm2d(32)
        self.max_pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding = 1)
        self.batch_norm2 = nn.BatchNorm2d(64)
        self.max_pool2 = nn.MaxPool2d(2 ,2)
        self.conv3 = nn.Conv2d(64, 128, 3, padding = 1)
        self.batch_norm3 = nn.BatchNorm2d(128)
        self.max_pool3 = nn.MaxPool2d(2, 2)
        self.conv4 = nn.Conv2d(128, 256, 3)
        self.batch_norm4 = nn.BatchNorm2d(256)
        self.max_pool4 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(6400, 3200)
        self.fc2 = nn.Linear(3200, 1600)
        self.fc3 = nn.Linear(1600, 101)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.batch_norm1(x)
        x = self.max_pool1(x)
        x = self.conv2(x)
        x = self.batch_norm2(x)
        x = self.max_pool2(x)
        x = self.conv3(x)
        x = self.batch_norm3(x)
        x = self.max_pool3(x)
        x = self.conv4(x)
        x = self.batch_norm4(x)
        x = self.max_pool4(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        return x

In [6]:
net = conv_net1()
optimizer = torch.optim.Adam(net.parameters(), lr = 0.01)
criterion = nn.CrossEntropyLoss()

In [7]:
def set_learning_rate(optimizer, lr):
    for param_groups in optimizer.param_groups:
        param_groups['lr'] = lr
        
def get_acc(output, label):
    total = output.shape[0]
    _, pred_label = output.max(1)
    num_correct = (pred_label == label).sum().data.item()
    return num_correct / total

def train(net, train_data, test_data, epoch, optimizer, criterion):
    train_losses = []
    test_losses = []
    if torch.cuda.is_available():
        net = net.cuda()
    prev_time = datetime.now()
    for epoch in range(epoch):
        if epoch == 15:
            set_learning_rate(optimizer, 0.001)
        train_loss = 0
        train_acc = 0
        net = net.train()
        for im, labels in train_data:
            if torch.cuda.is_available():
                im = im.cuda()
                labels = labels.cuda()
            
            output = net(im)
            loss = criterion(output, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.data.item()
            train_acc += get_acc(output, labels)
        cur_time = datetime.now()
        h,remainder = divmod((cur_time - prev_time).seconds, 3600)
        m,s = divmod(remainder, 60)
        time_str = 'Time:%02d:%02d:%02d'%(h, m, s)
        
        test_loss = 0
        test_acc = 0
        net = net.eval()
        for im, labels in test_data:
            if torch.cuda.is_available():
                im = im.cuda()
                labels = labels.cuda()
            output = net(im)
            loss = criterion(output, labels)
            test_loss += loss.data.item()
            test_acc += get_acc(output, labels)
        epoch_str = (
                "Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, "
                % (epoch, train_loss / len(train_data),
                   train_acc / len(train_data), test_loss / len(test_data),
                   test_acc / len(test_data)))
        prev_time = cur_time
        train_losses.append(train_loss / len(train_data))
        test_losses.append(test_loss / len(test_data))
        print(epoch_str + time_str)

In [8]:
train(net, train_data, test_data, 25, optimizer, criterion)

Epoch 0. Train Loss: 3.175397, Train Acc: 0.726502, Valid Loss: 1.510375, Valid Acc: 0.713308, Time:00:01:22
Epoch 1. Train Loss: 0.330226, Train Acc: 0.911587, Valid Loss: 0.910819, Valid Acc: 0.810138, Time:00:01:44
Epoch 2. Train Loss: 0.308968, Train Acc: 0.927269, Valid Loss: 0.972013, Valid Acc: 0.861778, Time:00:01:41
Epoch 3. Train Loss: 0.278857, Train Acc: 0.938740, Valid Loss: 1.427092, Valid Acc: 0.823828, Time:00:01:42
Epoch 4. Train Loss: 0.170069, Train Acc: 0.963058, Valid Loss: 0.651804, Valid Acc: 0.907979, Time:00:01:44
Epoch 5. Train Loss: 0.143462, Train Acc: 0.972128, Valid Loss: 0.603411, Valid Acc: 0.917320, Time:00:01:42
Epoch 6. Train Loss: 0.251222, Train Acc: 0.952554, Valid Loss: 1.168884, Valid Acc: 0.831028, Time:00:01:44
Epoch 7. Train Loss: 0.087363, Train Acc: 0.981045, Valid Loss: 0.555847, Valid Acc: 0.917996, Time:00:01:44
Epoch 8. Train Loss: 0.104398, Train Acc: 0.977749, Valid Loss: 0.418779, Valid Acc: 0.942010, Time:00:01:38
Epoch 9. Train Loss

In [13]:
class conv_net2(nn.Module):
    def __init__(self):
        super(conv_net2, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3)
        self.batch_norm1 = nn.BatchNorm2d(64)
        self.max_pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(64, 128, 3, padding = 1)
        self.batch_norm2 = nn.BatchNorm2d(128)
        self.max_pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(128, 256, 3, padding = 1)
        self.batch_norm3 = nn.BatchNorm2d(256)
        self.max_pool3 = nn.MaxPool2d(2, 2)
        self.conv4 = nn.Conv2d(256, 512, 3)
        self.batch_norm4 = nn.BatchNorm2d(512)
        self.max_pool4 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(12800, 6400)
        self.fc2 = nn.Linear(6400, 3200)
        self.fc3 = nn.Linear(3200, 1600)
        self.fc4 = nn.Linear(1600, 101)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.batch_norm1(x)
        x = self.max_pool1(x)
        x = self.conv2(x)
        x = self.batch_norm2(x)
        x = self.max_pool2(x)
        x = self.conv3(x)
        x = self.batch_norm3(x)
        x = self.max_pool3(x)
        x = self.conv4(x)
        x = self.batch_norm4(x)
        x = self.max_pool4(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.fc4(x)
        return x

In [14]:
net = conv_net2()
optimizer = torch.optim.Adam(net.parameters(), lr = 0.01)
criterion = nn.CrossEntropyLoss()

In [15]:
train(net, train_data, test_data, 25, optimizer, criterion)

Epoch 0. Train Loss: 25.574792, Train Acc: 0.093941, Valid Loss: 4.352253, Valid Acc: 0.031126, Time:00:01:42
Epoch 1. Train Loss: 4.136753, Train Acc: 0.071922, Valid Loss: 4.032511, Valid Acc: 0.066206, Time:00:02:06
Epoch 2. Train Loss: 3.889701, Train Acc: 0.096841, Valid Loss: 3.765036, Valid Acc: 0.120186, Time:00:02:05
Epoch 3. Train Loss: 3.821041, Train Acc: 0.124271, Valid Loss: 3.495181, Valid Acc: 0.146182, Time:00:02:05
Epoch 4. Train Loss: 3.295712, Train Acc: 0.216328, Valid Loss: 3.012486, Valid Acc: 0.216269, Time:00:02:05
Epoch 5. Train Loss: 2.414540, Train Acc: 0.367779, Valid Loss: 2.283053, Valid Acc: 0.402435, Time:00:02:06
Epoch 6. Train Loss: 1.928114, Train Acc: 0.480827, Valid Loss: 2.192950, Valid Acc: 0.418152, Time:00:02:06
Epoch 7. Train Loss: 1.609250, Train Acc: 0.545399, Valid Loss: 1.555637, Valid Acc: 0.558167, Time:00:02:06
Epoch 8. Train Loss: 1.302749, Train Acc: 0.611489, Valid Loss: 1.778664, Valid Acc: 0.530773, Time:00:02:06
Epoch 9. Train Los

In [18]:
class conv_net3(nn.Module):
    def __init__(self):
        super(conv_net3, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, stride = 2)
        self.batch_norm1 = nn.BatchNorm2d(64)
        self.max_pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(64, 128, 3)
        self.batch_norm2 = nn.BatchNorm2d(128)
        self.max_pool2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(128, 256, 3)
        self.batch_norm3 = nn.BatchNorm2d(256)
        self.max_pool3 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(4096, 2000)
        self.fc2 = nn.Linear(2000, 1000)
        self.fc3 = nn.Linear(1000, 101)
        
    def forward(self,x):
        x = self.conv1(x)
        x = self.batch_norm1(x)
        x = self.max_pool1(x)
        x = self.conv2(x)
        x = self.batch_norm2(x)
        x = self.max_pool2(x)
        x = self.conv3(x)
        x = self.batch_norm3(x)
        x = self.max_pool3(x)
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        return x

In [19]:
net3 = conv_net3()
optimizer = torch.optim.Adam(net3.parameters(), lr = 0.01)
criterion = nn.CrossEntropyLoss()

In [20]:
train(net3, train_data, test_data, 25, optimizer, criterion)

Epoch 0. Train Loss: 1.715153, Train Acc: 0.718586, Valid Loss: 0.668927, Valid Acc: 0.805212, Time:00:01:08
Epoch 1. Train Loss: 0.270468, Train Acc: 0.918375, Valid Loss: 0.914820, Valid Acc: 0.825802, Time:00:01:30
Epoch 2. Train Loss: 0.215664, Train Acc: 0.940155, Valid Loss: 0.592757, Valid Acc: 0.891869, Time:00:01:35
Epoch 3. Train Loss: 0.160126, Train Acc: 0.956714, Valid Loss: 0.656955, Valid Acc: 0.915912, Time:00:01:33
Epoch 4. Train Loss: 0.232850, Train Acc: 0.947376, Valid Loss: 1.734532, Valid Acc: 0.752714, Time:00:01:34
Epoch 5. Train Loss: 0.125665, Train Acc: 0.967359, Valid Loss: 0.500475, Valid Acc: 0.925076, Time:00:01:36
Epoch 6. Train Loss: 0.066225, Train Acc: 0.983065, Valid Loss: 0.345162, Valid Acc: 0.946731, Time:00:01:35
Epoch 7. Train Loss: 0.027291, Train Acc: 0.992226, Valid Loss: 0.293368, Valid Acc: 0.950814, Time:00:01:35
Epoch 8. Train Loss: 0.030351, Train Acc: 0.991645, Valid Loss: 0.571581, Valid Acc: 0.921619, Time:00:01:35
Epoch 9. Train Loss

# 总结

第一个模型和第三个模型的表现比较类似，第一个模型在第二和第三个conv layer加上了padding = 1，而第三个模型在第一个conv layer上使用了stride = 2。最终两个模型在训练集上都有99.9%的准确率，测试集上大约为96%
但是第二个模型的表现就明显和第一个第三个模型的表现相差甚远。我其实并没有改动什么，只是在第一个模型上将filter的数量在每层都翻了一倍，相应的全连接层也多加了一层，但是模型训练却比第一个模型难多了。最终训练集上的表现只有92%的准确率，测试集上只有87%。
第二个模型和第一个第三个模型表现差了那么多，我想是因为把每层的filter的数量都增加了一倍，导致卷积层和最后的全连接层要训练的参数变得比第一个模型多了好多，训练难度更大。