# 0. Setting
<hr>

In [8]:
# import library
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import math
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

torch.__version__


'1.7.0+cu101'

# 1. Data
<hr>

In [32]:
from torchvision import transforms, datasets

data_path = './MNIST'

data_train  = datasets.MNIST(root = data_path, train= False, download=True)
data_test = datasets.MNIST(root = data_path, train= True, download=True)

data_train_mean = data_train.data.float().mean()/255
data_train_std = data_train.data.float().std()/255

data_test_mean = data_test.data.float().mean()/255
data_test_std = data_test.data.float().std()/255


print("train data mean = {}, std = {}".format(data_train_mean, data_train_std))
print("test data mean = {}, std = {}".format(data_test_mean, data_test_std))


train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((data_train_mean,),(data_train_std,)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((data_test_mean,),(data_test_std,)),
])

data_train  = datasets.MNIST(root = data_path, train= False, download=True, transform= test_transform)
data_test   = datasets.MNIST(root = data_path, train= True, download=True, transform= train_transform)

print("the number of your training data (must be 10,000) = ", data_train.__len__())
print("hte number of your testing data (must be 60,000) = ", data_test.__len__())


train data mean = 0.1325146108865738, std = 0.3104802668094635
test data mean = 0.13066047430038452, std = 0.30810779333114624
the number of your training data (must be 10,000) =  10000
hte number of your testing data (must be 60,000) =  60000


# 2.Model
<hr>

In [128]:
def init_weights(m):
  if type(m) == nn.Linear:
    nn.init.kaiming_normal_(m.weight.data)
    m.bias.data.fill_(0)

class classification(nn.Module):

    def __init__(self):
        super(classification, self).__init__()
        
        # construct layers for a neural network
        self.classifier1 = nn.Sequential(
            nn.Linear(in_features=28*28, out_features=20*20),
            nn.ReLU(),
        ) 
        self.classifier2 = nn.Sequential(
            nn.Linear(in_features=20*20, out_features=10*10),
            nn.ReLU(),
        ) 
        self.classifier3 = nn.Sequential(
            nn.Linear(in_features=10*10, out_features=10),
            nn.ReLU(),
        ) 

        self.classifier1.apply(init_weights)
        self.classifier2.apply(init_weights)
        self.classifier3.apply(init_weights)
        
    def forward(self, inputs):                 # [batchSize, 1, 28, 28]
        x = inputs.view(inputs.size(0), -1)    # [batchSize, 28*28]
        x = self.classifier1(x)                # [batchSize, 20*20]
        x = self.classifier2(x)                # [batchSize, 10*10]
        out = self.classifier3(x)              # [batchSize, 10]
        
        return out


# 3. Loss Function
<hr>

In [48]:
model = classification()
criterion = nn.CrossEntropyLoss()
train_y_pred = model(data_train.data.float())
train_y = data_train.targets
temp_loss = criterion(train_y_pred, train_y)
print(temp_loss.data.item())

9.917991638183594


#4. Optimization
<hr>

Define Train Function

In [60]:
def train(model, criterion, train_loader, optimizer, batch_size):
  
  model.train()
  loss_sum = 0
  acc_sum = 0
  iteration = 0
  for xs, ts in iter(train_loader):
    iteration = iteration + 1
    optimizer.zero_grad()
    y_pred = model(xs)
    loss = criterion(y_pred, ts)
    loss.backward()
    optimizer.step()
    prediction = y_pred.data.max(1)[1] # first column has actual prob
    loss_sum = loss_sum + loss.data.item()
    acc_sum = acc_sum + prediction.eq(ts.data).sum().item()/batch_size
  
  loss_avg = math.trunc((loss_sum/iteration) * 1000) / 1000
  acc_avg = math.trunc((acc_sum/iteration) * 1000) / 1000
  
  return loss_avg, acc_avg

Define Test Function

In [62]:
def test(model,criterion, test_loader, batch_size):
  model.eval()
  loss_sum = 0
  acc_sum = 0
  iteration = 0
  with torch.no_grad():
    for xs, ts in iter(test_loader):
      iteration = iteration + 1
      y_pred = model(xs)
      loss_sum = loss_sum + criterion(y_pred, ts).data.item()
      prediction = y_pred.argmax(dim = 1, keepdim = True)
      acc_sum = acc_sum + prediction.eq(ts.view_as(prediction)).sum().item()/batch_size
  
  loss_avg = math.trunc((loss_sum/iteration) * 1000) / 1000
  acc_avg = math.trunc((acc_sum/iteration) * 1000) / 1000
  
  return loss_avg, acc_avg

Define Gradient Descent Fucntion

In [71]:
def gradient_descent(model, optimizer, criterion, batch_size, num_epochs):

  # batching
  train_loader = torch.utils.data.DataLoader(
      data_train,
      batch_size=batch_size,
      shuffle=True)
  
  test_loader = torch.utils.data.DataLoader(
      data_test,
      batch_size=batch_size,
      shuffle=True)
  
  
  # return variables
  train_loss_list, train_acc_list = [], []
  test_loss_list, test_acc_list = [], []

  
  # run training & testing
  for epoch in range(num_epochs + 1):

    train_loss_avg, train_acc_avg = train(model, criterion, train_loader, optimizer, batch_size)
    test_loss_avg, test_acc_avg = test(model, criterion, test_loader, batch_size)
    
    # add loss and accuracy data
    train_loss_list.append(train_loss_avg)
    train_acc_list.append(train_acc_avg)
    test_loss_list.append(test_loss_avg)
    test_acc_list.append(test_acc_avg)

    # print
    if epoch % 10 != 0 :
      continue

    print("epoch : ", epoch, " -------------------------------------- ")
    print("train loss : {}      accuracy = {}".format(train_loss_avg, train_acc_avg))
    print("test loss : {}       accuracy = {}".format(test_loss_avg, test_acc_avg))


  return train_loss_list, train_acc_list, test_loss_list, test_acc_list


# 5. Select Hyperparameter & Modify/Test Model
<hr>

size of the mini-batch : 64
<br>optimization algorithm : SGD
<br>loss funtion : cross entropy
<br>regularization algorithm : -
<br>learning rate : constant

In [72]:
# model
model = classification()

# mini-batch size
batch_size = 64

# num of epochs
num_epochs = 30

# learning rate
learning_rate = 0.01

# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

# loss function
criterion = nn.CrossEntropyLoss()

# run
train_loss_list1, train_acc_list1, test_loss_list1, test_acc_list1 = gradient_descent(model, optimizer, criterion, batch_size, num_epochs)

epoch :  0  -------------------------------------- 
train loss : 2.062      accuracy = 0.452
test loss : 1.722       accuracy = 0.594
epoch :  10  -------------------------------------- 
train loss : 0.467      accuracy = 0.841
test loss : 0.524       accuracy = 0.829
epoch :  20  -------------------------------------- 
train loss : 0.382      accuracy = 0.86
test loss : 0.465       accuracy = 0.843
epoch :  30  -------------------------------------- 
train loss : 0.333      accuracy = 0.872
test loss : 0.432       accuracy = 0.85


size of the mini-batch : 64
<br>optimization algorithm : Adam
<br>loss funtion : cross entropy
<br>regularization algorithm : -
<br>learning rate : constant

In [88]:
# model
model = classification()

# mini-batch size
batch_size = 64

# num of epochs
num_epochs = 30

# learning rate
learning_rate = 0.01

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# loss function
criterion = nn.CrossEntropyLoss()

# run
train_loss_list2, train_acc_list2, test_loss_list2, test_acc_list2 = gradient_descent(model, optimizer, criterion, batch_size, num_epochs)

epoch :  0  -------------------------------------- 
train loss : 2.02      accuracy = 0.258
test loss : 1.944       accuracy = 0.284
epoch :  10  -------------------------------------- 
train loss : 0.381      accuracy = 0.852
test loss : 0.605       accuracy = 0.818
epoch :  20  -------------------------------------- 
train loss : 0.338      accuracy = 0.862
test loss : 0.633       accuracy = 0.827
epoch :  30  -------------------------------------- 
train loss : 0.303      accuracy = 0.87
test loss : 0.795       accuracy = 0.827


size of the mini-batch : 64
<br>optimization algorithm : SGD
<br>loss funtion : cross entropy
<br>regularization algorithm : wegiht decay
<br>learning rate : constant

In [94]:
# model
model = classification()

# mini-batch size
batch_size = 64

# num of epochs
num_epochs = 30

# learning rate
learning_rate = 0.05

# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, weight_decay=0.1)

# loss function
criterion = nn.CrossEntropyLoss()

# run
train_loss_list3, train_acc_list3, test_loss_list3, test_acc_list3 = gradient_descent(model, optimizer, criterion, batch_size, num_epochs)

epoch :  0  -------------------------------------- 
train loss : 1.276      accuracy = 0.661
test loss : 0.707       accuracy = 0.791
epoch :  10  -------------------------------------- 
train loss : 0.527      accuracy = 0.875
test loss : 0.607       accuracy = 0.853
epoch :  20  -------------------------------------- 
train loss : 0.52      accuracy = 0.875
test loss : 0.637       accuracy = 0.831
epoch :  30  -------------------------------------- 
train loss : 0.522      accuracy = 0.873
test loss : 0.594       accuracy = 0.839


size of the mini-batch : 64
<br>optimization algorithm : Adam
<br>loss funtion : cross entropy
<br>regularization algorithm : wegiht decay
<br>learning rate : constant

In [129]:
# model
model = classification()

# mini-batch size
batch_size = 32

# num of epochs
num_epochs = 200

# learning rate
learning_rate = 0.001

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=0.1)

# loss function
criterion = nn.CrossEntropyLoss()

# run
train_loss_list4, train_acc_list4, test_loss_list4, test_acc_list4 = gradient_descent(model, optimizer, criterion, batch_size, num_epochs)

epoch :  0  -------------------------------------- 
train loss : 0.933      accuracy = 0.736
test loss : 0.71       accuracy = 0.802


KeyboardInterrupt: ignored