# 0. Setting
<hr>

In [1]:
# import library
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import math
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

torch.__version__


'1.7.0+cu101'

# 1. Data
<hr>

In [2]:
from torchvision import transforms, datasets

data_path = './MNIST'

data_train  = datasets.MNIST(root = data_path, train= False, download=True)
data_test = datasets.MNIST(root = data_path, train= True, download=True)

data_train_mean = data_train.data.float().mean()/255
data_train_std = data_train.data.float().std()/255

data_test_mean = data_test.data.float().mean()/255
data_test_std = data_test.data.float().std()/255


print("train data mean = {}, std = {}".format(data_train_mean, data_train_std))
print("test data mean = {}, std = {}".format(data_test_mean, data_test_std))

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,)),  # mean value = 0.1307, standard deviation value = 0.3081
])

'''
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((data_train_mean,),(data_train_std,)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((data_test_mean,),(data_test_std,)),
])
'''

data_train  = datasets.MNIST(root = data_path, train= False, download=True, transform= transform)
data_test   = datasets.MNIST(root = data_path, train= True, download=True, transform= transform)

print("the number of your training data (must be 10,000) = ", data_train.__len__())
print("hte number of your testing data (must be 60,000) = ", data_test.__len__())


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw/train-labels-idx1-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


train data mean = 0.1325146108865738, std = 0.3104802668094635
test data mean = 0.13066047430038452, std = 0.30810779333114624
the number of your training data (must be 10,000) =  10000
hte number of your testing data (must be 60,000) =  60000


# 2.Model
<hr>

In [3]:
def init_weights(m):
  if type(m) == nn.Linear:
    nn.init.xavier_normal_(m.weight.data)
    m.bias.data.fill_(0)

class classification(nn.Module):

    def __init__(self):
        super(classification, self).__init__()
        
        # construct layers for a neural network
        self.classifier1 = nn.Sequential(
            nn.Linear(in_features=28*28, out_features=20*20),
            #nn.ReLU(),
            nn.Sigmoid()
        ) 
        self.classifier2 = nn.Sequential(
            nn.Linear(in_features=20*20, out_features=10*10),
            #nn.ReLU(),
            nn.Sigmoid()
        ) 
        self.classifier3 = nn.Sequential(
            nn.Linear(in_features=10*10, out_features=10),
            #nn.ReLU(),
            nn.Sigmoid()
        ) 

        self.classifier1.apply(init_weights)
        self.classifier2.apply(init_weights)
        self.classifier3.apply(init_weights)
        
    def forward(self, inputs):                 # [batchSize, 1, 28, 28]
        x = inputs.view(inputs.size(0), -1)    # [batchSize, 28*28]
        x = self.classifier1(x)                # [batchSize, 20*20]
        x = self.classifier2(x)                # [batchSize, 10*10]
        out = self.classifier3(x)              # [batchSize, 10]
        
        return out


# 3. Loss Function
<hr>

In [4]:
model = classification()
criterion = nn.CrossEntropyLoss()
train_y_pred = model(data_train.data.float())
train_y = data_train.targets
temp_loss = criterion(train_y_pred, train_y)
print(temp_loss.data.item())



2.3066117763519287


#4. Optimization
<hr>

Define Train Function

In [5]:
def train(model, criterion, train_loader, optimizer, batch_size):
  
  model.train()
  loss_sum = 0
  acc_sum = 0
  iteration = 0
  for xs, ts in iter(train_loader):

    iteration = iteration + 1
    optimizer.zero_grad()
    y_pred = model(xs)
    loss = criterion(y_pred, ts)
    loss.backward()
    optimizer.step()
    
    loss_sum = loss_sum + float(loss)
    zs = y_pred.max(1, keepdim=True)[1] # first column has actual prob
    acc_sum = acc_sum + zs.eq(ts.view_as(zs)).sum().item()/batch_size
  
  loss_avg = math.trunc(loss_sum/iteration * 100) / 100
  acc_avg = math.trunc(acc_sum/iteration * 100) / 100
  
  return loss_avg, acc_avg

Define Test Function

In [6]:
def test(model,criterion, test_loader, batch_size):
  model.eval()
  loss_sum = 0
  acc_sum = 0
  iteration = 0
  with torch.no_grad():
    for xs, ts in iter(test_loader):
      iteration = iteration + 1
      xs = xs.view(-1, 28*28)
      y_pred = model(xs)
      loss_sum = loss_sum + criterion(y_pred, ts).data.item()
      zs = y_pred.max(1, keepdim=True)[1]
      acc_sum = acc_sum + zs.eq(ts.view_as(zs)).sum().item()/batch_size
  
  loss_avg = round(loss_sum/iteration, 2)
  acc_avg = round(acc_sum/iteration, 2)
  
  return loss_avg, acc_avg

Define Gradient Descent Fucntion

In [7]:
def gradient_descent(model, optimizer, criterion, batch_size, num_epochs):

  # batching
  train_loader = torch.utils.data.DataLoader(
      data_train,
      batch_size=batch_size,
      num_workers=2,
      shuffle=True,
      drop_last=True)
  
  test_loader = torch.utils.data.DataLoader(
      data_test,
      batch_size=batch_size,
      num_workers=2,
      shuffle=False,
      drop_last=True)
  
  
  # return variables
  train_loss_list, train_acc_list = [], []
  test_loss_list, test_acc_list = [], []

  
  # run training & testing
  for epoch in range(num_epochs + 1):

    train_loss_avg, train_acc_avg = train(model, criterion, train_loader, optimizer, batch_size)
    test_loss_avg, test_acc_avg = test(model, criterion, test_loader, batch_size)
    
    # add loss and accuracy data
    train_loss_list.append(train_loss_avg)
    train_acc_list.append(train_acc_avg)
    test_loss_list.append(test_loss_avg)
    test_acc_list.append(test_acc_avg)

    # print
    if epoch % 10 != 0 :
      continue

    print("epoch : ", epoch, " -------------------------------------- ")
    print("train loss : {}      accuracy = {}".format(train_loss_avg, train_acc_avg))
    print("test loss : {}       accuracy = {}".format(test_loss_avg, test_acc_avg))


  return train_loss_list, train_acc_list, test_loss_list, test_acc_list


# 5. Select Hyperparameter & Modify/Test Model
<hr>

size of the mini-batch : 64
<br>optimization algorithm : SGD
<br>loss funtion : cross entropy
<br>regularization algorithm : -
<br>learning rate : constant

In [None]:
# model
model = classification()

# mini-batch size
batch_size = 32

# num of epochs
num_epochs = 200

# learning rate
learning_rate = 0.01

# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

# loss function
criterion = nn.CrossEntropyLoss()

# run
train_loss_list1, train_acc_list1, test_loss_list1, test_acc_list1 = gradient_descent(model, optimizer, criterion, batch_size, num_epochs)

epoch :  0  -------------------------------------- 
train loss : 2.302      accuracy = 0.146
test loss : 2.288       accuracy = 0.136
epoch :  10  -------------------------------------- 
train loss : 2.023      accuracy = 0.761
test loss : 2.018       accuracy = 0.753
epoch :  20  -------------------------------------- 
train loss : 1.843      accuracy = 0.821
test loss : 1.846       accuracy = 0.808
epoch :  30  -------------------------------------- 
train loss : 1.761      accuracy = 0.845
test loss : 1.769       accuracy = 0.828
epoch :  40  -------------------------------------- 
train loss : 1.713      accuracy = 0.86
test loss : 1.722       accuracy = 0.845
epoch :  50  -------------------------------------- 
train loss : 1.681      accuracy = 0.872
test loss : 1.691       accuracy = 0.858
epoch :  60  -------------------------------------- 
train loss : 1.657      accuracy = 0.882
test loss : 1.669       accuracy = 0.866
epoch :  70  -------------------------------------- 
trai

size of the mini-batch : 32
<br>optimization algorithm : Adam
<br>loss funtion : cross entropy
<br>regularization algorithm : -
<br>learning rate : constant

In [None]:
# model
model = classification()

# mini-batch size
batch_size = 32

# num of epochs
num_epochs = 200

# learning rate
learning_rate = 0.01

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# loss function
criterion = nn.CrossEntropyLoss()

# run
train_loss_list2, train_acc_list2, test_loss_list2, test_acc_list2 = gradient_descent(model, optimizer, criterion, batch_size, num_epochs)

epoch :  0  -------------------------------------- 
train loss : 1.68      accuracy = 0.728
test loss : 1.582       accuracy = 0.842
epoch :  10  -------------------------------------- 
train loss : 1.513      accuracy = 0.939
test loss : 1.532       accuracy = 0.918
epoch :  20  -------------------------------------- 
train loss : 1.503      accuracy = 0.95
test loss : 1.529       accuracy = 0.918
epoch :  30  -------------------------------------- 
train loss : 1.506      accuracy = 0.948
test loss : 1.532       accuracy = 0.917
epoch :  40  -------------------------------------- 
train loss : 1.501      accuracy = 0.953
test loss : 1.531       accuracy = 0.919
epoch :  50  -------------------------------------- 
train loss : 1.501      accuracy = 0.955
test loss : 1.524       accuracy = 0.925
epoch :  60  -------------------------------------- 
train loss : 1.502      accuracy = 0.952
test loss : 1.53       accuracy = 0.919
epoch :  70  -------------------------------------- 
train 

KeyboardInterrupt: ignored

In [13]:
def gradient_descent_with_scheduler(scheduler, model, optimizer, criterion, batch_size, num_epochs):

  # batching
  train_loader = torch.utils.data.DataLoader(
      data_train,
      batch_size=batch_size,
      num_workers=2,
      shuffle=True,
      drop_last=True)
  
  test_loader = torch.utils.data.DataLoader(
      data_test,
      batch_size=batch_size,
      num_workers=2,
      shuffle=False,
      drop_last=True)
  
  
  # return variables
  train_loss_list, train_acc_list = [], []
  test_loss_list, test_acc_list = [], []

  
  # run training & testing
  for epoch in range(num_epochs + 1):

    train_loss_avg, train_acc_avg = train(model, criterion, train_loader, optimizer, batch_size)
    test_loss_avg, test_acc_avg = test(model, criterion, test_loader, batch_size)
    scheduler.step(train_loss_avg)
        
    # add loss and accuracy data
    train_loss_list.append(train_loss_avg)
    train_acc_list.append(train_acc_avg)
    test_loss_list.append(test_loss_avg)
    test_acc_list.append(test_acc_avg)

    # print
    if epoch % 10 != 0 :
      continue

    print("epoch : ", epoch, " -------------------------------------- ")
    print("train loss : {}      accuracy = {}".format(train_loss_avg, train_acc_avg))
    print("test loss : {}       accuracy = {}".format(test_loss_avg, test_acc_avg))


  return train_loss_list, train_acc_list, test_loss_list, test_acc_list


size of the mini-batch : 32
<br>optimization algorithm : Adam
<br>loss funtion : cross entropy
<br>regularization algorithm : weight decay
<br>learning rate : ExponentialLR

In [None]:
# model
model = classification()

# mini-batch size
batch_size = 32

# num of epochs
num_epochs = 200

# learning rate
learning_rate = 0.01

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=0.0001)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma= 0.99)   

# loss function
criterion = nn.CrossEntropyLoss()

# run
train_loss_list4, train_acc_list4, test_loss_list4, test_acc_list4 = gradient_descent_with_scheduler(scheduler, model, optimizer, criterion, batch_size, num_epochs)

epoch :  0  -------------------------------------- 
train loss : 1.66      accuracy = 0.78
test loss : 1.6       accuracy = 0.84


KeyboardInterrupt: ignored

scheduler 변경

In [16]:
# model
model = classification()

# mini-batch size
batch_size = 32

# num of epochs
num_epochs = 100

# learning rate
learning_rate = 0.01

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=0.0001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, verbose=True)   

# loss function
criterion = nn.CrossEntropyLoss()

# run
train_loss_list6, train_acc_list6, test_loss_list6, test_acc_list6 = gradient_descent_with_scheduler(scheduler, model, optimizer, criterion, batch_size, num_epochs)

epoch :  0  -------------------------------------- 
train loss : 1.65      accuracy = 0.77
test loss : 1.59       accuracy = 0.82
epoch :  10  -------------------------------------- 
train loss : 1.55      accuracy = 0.9
test loss : 1.57       accuracy = 0.88
epoch :  20  -------------------------------------- 
train loss : 1.54      accuracy = 0.9
test loss : 1.56       accuracy = 0.89
Epoch    26: reducing learning rate of group 0 to 5.0000e-03.
epoch :  30  -------------------------------------- 
train loss : 1.51      accuracy = 0.95
test loss : 1.54       accuracy = 0.93
epoch :  40  -------------------------------------- 
train loss : 1.5      accuracy = 0.96
test loss : 1.53       accuracy = 0.94
Epoch    43: reducing learning rate of group 0 to 2.5000e-03.
epoch :  50  -------------------------------------- 
train loss : 1.48      accuracy = 0.98
test loss : 1.52       accuracy = 0.95
Epoch    55: reducing learning rate of group 0 to 1.2500e-03.
epoch :  60  -------------------