In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import numpy as np
import random

In [105]:
# device
device = 'cuda'

In [15]:
# 設置seed
seed = 198964
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [106]:
# Import data
train_data = datasets.MNIST('./MNIST_data', train=True, download=True,
                            transform=transforms.Compose([
                                transforms.ToTensor(),
                                transforms.Normalize((0.1307,), (0.3081,))
                            ]))
test_data = datasets.MNIST('./MNIST_data', train=False, download=True,
                           transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize((0.1307,),(0.3081,))
                           ]))
val_data, test_data = torch.utils.data.random_split(dataset=test_data, lengths=[5000,5000]) # 將test分成validation和test，validation用來驗證model，test最後用於測試

In [17]:
# Dataloader
train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True, num_workers=3)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=1000, shuffle=True, num_workers=3)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1000, shuffle=True, num_workers=3)

In [108]:
#training function
def train(model, loss_func, data_loader, optimizer):
    model.train()
    train_loss = 0
    correct = 0.
    for batch_idx, (data, target) in enumerate(data_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_func(output, target)
        train_loss +=  loss.item()
        pred = output.argmax(dim=1) # dim 0 是一個batch內資料的數量，所以從dim 1取最大值的index
        correct += pred.eq(target).sum() # 若index跟target一樣會表示成true，sum會把true當作1並加起來
        loss.backward()
        optimizer.step()
    train_loss /= len(data_loader)
    correct /= len(data_loader.dataset)
    print(f'Training Loss: {train_loss:.4f}, Accuracy: {100. * correct:.2f}%')

In [109]:
#Testing function
def evaluate(model, loss_func, data_loader, val=True):
    model.eval()
    test_loss = 0
    correct = 0.
    for batch_idx, (data, target) in enumerate(data_loader):
        data, target = data.to(device), target.to(device)
        with torch.no_grad():
            output = model(data)
            test_loss += loss_func(output, target).item()
            pred = output.argmax(dim=1) 
            correct += pred.eq(target).sum()

    test_loss /= len(data_loader)
    correct /= len(data_loader.dataset)
    if val: # 若val=False則改印test的訊息
        print(f'Validation Loss: {test_loss:.4f}, Accuracy: {100. * correct:.2f}%\n')
    else:
        print(f'Test Loss: {test_loss:.4f}, Accuracy: {100. * correct:.2f}%\n')

# Optimizer: SGD<br>
備註：如果訓練顯示訊息太長可以右鍵點選"Enable Scrolling for Output"

## Vanilla version

In [79]:
# module
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=(5,5),stride=1, padding=0)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=(5,5),stride=1, padding=0)
        self.fc1 = nn.Linear(16*4*4, 120) 
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = torch.flatten(out, start_dim=1) 
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [78]:
# Start training model
model = Net().to(device) # build model
epochs = 20
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=0.01, momentum=0.9)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.1, verbose=True)
for epoch in range(epochs):
    print(f'Epoch: {epoch+1}')
    train(model, loss_func, train_loader, optimizer)
    evaluate(model, loss_func, val_loader, val=True)
    lr_scheduler.step()
    
evaluate(model, loss_func, test_loader, val=False)

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 1
Training Loss: 0.4733, Accuracy: 85.20%
Validation Loss: 0.0691, Accuracy: 97.84%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 2
Training Loss: 0.0775, Accuracy: 97.60%
Validation Loss: 0.0463, Accuracy: 98.68%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 3
Training Loss: 0.0566, Accuracy: 98.23%
Validation Loss: 0.0405, Accuracy: 98.84%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 4
Training Loss: 0.0447, Accuracy: 98.58%
Validation Loss: 0.0335, Accuracy: 98.88%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 5
Training Loss: 0.0367, Accuracy: 98.82%
Validation Loss: 0.0311, Accuracy: 99.06%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 6
Training Loss: 0.0319, Accuracy: 99.00%
Validation Loss: 0.0287, Accuracy: 99.12%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 7
Training Loss: 0.0275, Accuracy: 99.08%
Validation Loss: 0.0327, Accuracy: 98.84%

Adjust

## Insert two 3x3 convolution layers

In [85]:
# module with two more layers
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=(5,5),stride=1, padding=0)
        self.conv2 = nn.Conv2d(6, 6, kernel_size=(3,3),stride=1, padding=1)
        self.conv3 = nn.Conv2d(6, 6, kernel_size=(3,3),stride=1, padding=1)
        self.conv4 = nn.Conv2d(6, 16, kernel_size=(5,5),stride=1, padding=0)
        self.fc1 = nn.Linear(16*4*4, 120) # make neuron 2x wider
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv3(out))
        out = F.relu(self.conv4(out))
        out = F.max_pool2d(out, 2)
        out = torch.flatten(out, start_dim=1) #flatten
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [83]:
# Start training model
model = Net().to(device) # build model
epochs = 20
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=0.01, momentum=0.9)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.1, verbose=True)
for epoch in range(epochs):
    print(f'Epoch: {epoch+1}')
    train(model, loss_func, train_loader, optimizer)
    evaluate(model, loss_func, val_loader, val=True)
    lr_scheduler.step()
    
evaluate(model, loss_func, test_loader, val=False)

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 1
Training Loss: 0.9278, Accuracy: 66.73%
Validation Loss: 0.1085, Accuracy: 96.38%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 2
Training Loss: 0.1078, Accuracy: 96.68%
Validation Loss: 0.0928, Accuracy: 97.14%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 3
Training Loss: 0.0747, Accuracy: 97.68%
Validation Loss: 0.0618, Accuracy: 97.96%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 4
Training Loss: 0.0589, Accuracy: 98.18%
Validation Loss: 0.0517, Accuracy: 98.38%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 5
Training Loss: 0.0483, Accuracy: 98.48%
Validation Loss: 0.0441, Accuracy: 98.78%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 6
Training Loss: 0.0403, Accuracy: 98.74%
Validation Loss: 0.0446, Accuracy: 98.70%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 7
Training Loss: 0.0347, Accuracy: 98.91%
Validation Loss: 0.0468, Accuracy: 98.48%

Adjust

## 2x more neuron

In [86]:
# module with 2x more neuron
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=(5,5),stride=1, padding=0)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=(5,5),stride=1, padding=0)
        self.fc1 = nn.Linear(16*4*4, 240) 
        self.fc2 = nn.Linear(240, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = torch.flatten(out, start_dim=1) 
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [83]:
# Start training model
model = Net().to(device) # build model
epochs = 20
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=0.01, momentum=0.9)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.1, verbose=True)
for epoch in range(epochs):
    print(f'Epoch: {epoch+1}')
    train(model, loss_func, train_loader, optimizer)
    evaluate(model, loss_func, val_loader, val=True)
    lr_scheduler.step()
    
evaluate(model, loss_func, test_loader, val=False)

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 1
Training Loss: 0.9278, Accuracy: 66.73%
Validation Loss: 0.1085, Accuracy: 96.38%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 2
Training Loss: 0.1078, Accuracy: 96.68%
Validation Loss: 0.0928, Accuracy: 97.14%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 3
Training Loss: 0.0747, Accuracy: 97.68%
Validation Loss: 0.0618, Accuracy: 97.96%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 4
Training Loss: 0.0589, Accuracy: 98.18%
Validation Loss: 0.0517, Accuracy: 98.38%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 5
Training Loss: 0.0483, Accuracy: 98.48%
Validation Loss: 0.0441, Accuracy: 98.78%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 6
Training Loss: 0.0403, Accuracy: 98.74%
Validation Loss: 0.0446, Accuracy: 98.70%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 7
Training Loss: 0.0347, Accuracy: 98.91%
Validation Loss: 0.0468, Accuracy: 98.48%

Adjust

# Optimizer: Adam

## Vanilla version

In [79]:
# module
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=(5,5),stride=1, padding=0)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=(5,5),stride=1, padding=0)
        self.fc1 = nn.Linear(16*4*4, 120) 
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = torch.flatten(out, start_dim=1) 
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [87]:
# Start training model
model = Net().to(device) # build model
epochs = 20
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.01)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.1, verbose=True)
for epoch in range(epochs):
    print(f'Epoch: {epoch+1}')
    train(model, loss_func, train_loader, optimizer)
    evaluate(model, loss_func, val_loader, val=True)
    lr_scheduler.step()
    
evaluate(model, loss_func, test_loader, val=False)

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 1
Training Loss: 0.1728, Accuracy: 94.70%
Validation Loss: 0.0811, Accuracy: 97.64%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 2
Training Loss: 0.0789, Accuracy: 97.83%
Validation Loss: 0.0761, Accuracy: 97.66%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 3
Training Loss: 0.0687, Accuracy: 98.16%
Validation Loss: 0.0497, Accuracy: 98.54%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 4
Training Loss: 0.0669, Accuracy: 98.25%
Validation Loss: 0.0761, Accuracy: 97.92%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 5
Training Loss: 0.0610, Accuracy: 98.45%
Validation Loss: 0.0657, Accuracy: 97.90%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 6
Training Loss: 0.0682, Accuracy: 98.32%
Validation Loss: 0.0620, Accuracy: 98.44%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 7
Training Loss: 0.0578, Accuracy: 98.59%
Validation Loss: 0.0586, Accuracy: 98.70%

Adjust

## Insert two 3x3 convolution layers

In [94]:
# module with two more layers
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=(5,5),stride=1, padding=0)
        self.conv2 = nn.Conv2d(6, 6, kernel_size=(3,3),stride=1, padding=1)
        self.conv3 = nn.Conv2d(6, 6, kernel_size=(3,3),stride=1, padding=1)
        self.conv4 = nn.Conv2d(6, 16, kernel_size=(5,5),stride=1, padding=0)
        self.fc1 = nn.Linear(16*4*4, 120) # make neuron 2x wider
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv3(out))
        out = F.relu(self.conv4(out))
        out = F.max_pool2d(out, 2)
        out = torch.flatten(out, start_dim=1) #flatten
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [95]:
# Start training model
model = Net().to(device) # build model
epochs = 20
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.01)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.1, verbose=True)
for epoch in range(epochs):
    print(f'Epoch: {epoch+1}')
    train(model, loss_func, train_loader, optimizer)
    evaluate(model, loss_func, val_loader, val=True)
    lr_scheduler.step()
    
evaluate(model, loss_func, test_loader, val=False)

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 1
Training Loss: 0.2347, Accuracy: 92.21%
Validation Loss: 0.0667, Accuracy: 97.78%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 2
Training Loss: 0.0875, Accuracy: 97.40%
Validation Loss: 0.0635, Accuracy: 98.10%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 3
Training Loss: 0.0783, Accuracy: 97.76%
Validation Loss: 0.0563, Accuracy: 98.58%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 4
Training Loss: 0.0717, Accuracy: 97.92%
Validation Loss: 0.0996, Accuracy: 97.26%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 5
Training Loss: 0.0680, Accuracy: 98.16%
Validation Loss: 0.0766, Accuracy: 97.74%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 6
Training Loss: 0.0744, Accuracy: 97.99%
Validation Loss: 0.0925, Accuracy: 97.82%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 7
Training Loss: 0.0715, Accuracy: 98.10%
Validation Loss: 0.0546, Accuracy: 98.50%

Adjust

## 2x more neuron

In [96]:
# module with 2x more neuron
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=(5,5),stride=1, padding=0)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=(5,5),stride=1, padding=0)
        self.fc1 = nn.Linear(16*4*4, 240) 
        self.fc2 = nn.Linear(240, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = torch.flatten(out, start_dim=1) 
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [97]:
# Start training model
model = Net().to(device) # build model
epochs = 20
loss_func = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=0.01)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.1, verbose=True)
for epoch in range(epochs):
    print(f'Epoch: {epoch+1}')
    train(model, loss_func, train_loader, optimizer)
    evaluate(model, loss_func, val_loader, val=True)
    lr_scheduler.step()
    
evaluate(model, loss_func, test_loader, val=False)

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 1
Training Loss: 0.1654, Accuracy: 94.96%
Validation Loss: 0.0823, Accuracy: 97.42%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 2
Training Loss: 0.0777, Accuracy: 97.81%
Validation Loss: 0.0712, Accuracy: 98.14%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 3
Training Loss: 0.0643, Accuracy: 98.23%
Validation Loss: 0.0614, Accuracy: 98.46%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 4
Training Loss: 0.0686, Accuracy: 98.19%
Validation Loss: 0.0687, Accuracy: 98.12%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 5
Training Loss: 0.0669, Accuracy: 98.26%
Validation Loss: 0.0713, Accuracy: 98.28%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 6
Training Loss: 0.0597, Accuracy: 98.47%
Validation Loss: 0.0666, Accuracy: 98.50%

Adjusting learning rate of group 0 to 1.0000e-02.
Epoch: 7
Training Loss: 0.0579, Accuracy: 98.58%
Validation Loss: 0.0812, Accuracy: 98.20%

Adjust

# Optimizer: RMSprop

## Vanilla version

In [98]:
# module
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=(5,5),stride=1, padding=0)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=(5,5),stride=1, padding=0)
        self.fc1 = nn.Linear(16*4*4, 120) 
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = torch.flatten(out, start_dim=1) 
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [100]:
# Start training model
model = Net().to(device) # build model
epochs = 20
loss_func = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(),lr=0.001,alpha=0.5)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.1, verbose=True)
for epoch in range(epochs):
    print(f'Epoch: {epoch+1}')
    train(model, loss_func, train_loader, optimizer)
    evaluate(model, loss_func, val_loader, val=True)
    lr_scheduler.step()
    
evaluate(model, loss_func, test_loader, val=False)

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 1
Training Loss: 0.3627, Accuracy: 88.93%
Validation Loss: 0.0954, Accuracy: 96.90%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 2
Training Loss: 0.0847, Accuracy: 97.35%
Validation Loss: 0.0446, Accuracy: 98.62%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 3
Training Loss: 0.0600, Accuracy: 98.13%
Validation Loss: 0.0474, Accuracy: 98.32%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 4
Training Loss: 0.0486, Accuracy: 98.49%
Validation Loss: 0.0345, Accuracy: 98.90%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 5
Training Loss: 0.0416, Accuracy: 98.78%
Validation Loss: 0.0486, Accuracy: 98.62%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 6
Training Loss: 0.0375, Accuracy: 98.88%
Validation Loss: 0.0407, Accuracy: 98.92%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 7
Training Loss: 0.0345, Accuracy: 98.98%
Validation Loss: 0.0497, Accuracy: 98.72%

Adjust

## Insert two 3x3 convolution layers

In [101]:
# module with two more layers
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=(5,5),stride=1, padding=0)
        self.conv2 = nn.Conv2d(6, 6, kernel_size=(3,3),stride=1, padding=1)
        self.conv3 = nn.Conv2d(6, 6, kernel_size=(3,3),stride=1, padding=1)
        self.conv4 = nn.Conv2d(6, 16, kernel_size=(5,5),stride=1, padding=0)
        self.fc1 = nn.Linear(16*4*4, 120) # make neuron 2x wider
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv3(out))
        out = F.relu(self.conv4(out))
        out = F.max_pool2d(out, 2)
        out = torch.flatten(out, start_dim=1) #flatten
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [102]:
# Start training model
model = Net().to(device) # build model
epochs = 20
loss_func = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(),lr=0.001,alpha=0.5)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.1, verbose=True)
for epoch in range(epochs):
    print(f'Epoch: {epoch+1}')
    train(model, loss_func, train_loader, optimizer)
    evaluate(model, loss_func, val_loader, val=True)
    lr_scheduler.step()
    
evaluate(model, loss_func, test_loader, val=False)

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 1
Training Loss: 0.4352, Accuracy: 85.56%
Validation Loss: 0.0829, Accuracy: 97.32%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 2
Training Loss: 0.0830, Accuracy: 97.39%
Validation Loss: 0.0636, Accuracy: 98.04%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 3
Training Loss: 0.0576, Accuracy: 98.26%
Validation Loss: 0.0487, Accuracy: 98.38%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 4
Training Loss: 0.0466, Accuracy: 98.58%
Validation Loss: 0.0370, Accuracy: 98.90%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 5
Training Loss: 0.0402, Accuracy: 98.78%
Validation Loss: 0.0372, Accuracy: 98.78%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 6
Training Loss: 0.0352, Accuracy: 98.98%
Validation Loss: 0.0410, Accuracy: 98.88%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 7
Training Loss: 0.0319, Accuracy: 99.10%
Validation Loss: 0.0324, Accuracy: 99.12%

Adjust

## 2x more neuron

In [103]:
# module with 2x more neuron
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=(5,5),stride=1, padding=0)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=(5,5),stride=1, padding=0)
        self.fc1 = nn.Linear(16*4*4, 240) 
        self.fc2 = nn.Linear(240, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.max_pool2d(out, 2)
        out = F.relu(self.conv2(out))
        out = F.max_pool2d(out, 2)
        out = torch.flatten(out, start_dim=1) 
        out = F.relu(self.fc1(out))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out

In [104]:
# Start training model
model = Net().to(device) # build model
epochs = 20
loss_func = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(),lr=0.001,alpha=0.5)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.1, verbose=True)
for epoch in range(epochs):
    print(f'Epoch: {epoch+1}')
    train(model, loss_func, train_loader, optimizer)
    evaluate(model, loss_func, val_loader, val=True)
    lr_scheduler.step()
    
evaluate(model, loss_func, test_loader, val=False)

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 1
Training Loss: 0.3235, Accuracy: 90.08%
Validation Loss: 0.0747, Accuracy: 97.48%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 2
Training Loss: 0.0734, Accuracy: 97.71%
Validation Loss: 0.0443, Accuracy: 98.68%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 3
Training Loss: 0.0526, Accuracy: 98.41%
Validation Loss: 0.0478, Accuracy: 98.58%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 4
Training Loss: 0.0443, Accuracy: 98.67%
Validation Loss: 0.0396, Accuracy: 98.90%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 5
Training Loss: 0.0384, Accuracy: 98.88%
Validation Loss: 0.0368, Accuracy: 99.00%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 6
Training Loss: 0.0337, Accuracy: 99.02%
Validation Loss: 0.0356, Accuracy: 99.14%

Adjusting learning rate of group 0 to 1.0000e-03.
Epoch: 7
Training Loss: 0.0305, Accuracy: 99.14%
Validation Loss: 0.0450, Accuracy: 98.92%

Adjust