<a href="https://colab.research.google.com/github/yesoly/MachineLearningProject/blob/master/Assignment_09.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Classification for Multiple Categories using Pytorch
Build a classifier for the digit classification task with 10 classes on the MNIST dataset

In [None]:
import os

# load data
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# train
import torch
from torch import nn
from torch.nn import functional as F
from torch.autograd import Variable
import numpy as np

# visualization
import matplotlib.pyplot as plt

## 1. Data

- apply normalization

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,)),  # mean value = 0.1307, standard deviation value = 0.3081
])

- load the MNIST dataset

In [None]:
data_path = './MNIST'

training_set = datasets.MNIST(root = data_path, train= True, download=True, transform= transform)
testing_set = datasets.MNIST(root = data_path, train= False, download=True, transform= transform)

## 2. Model
- design a neural network that consists of three fully connected layers with an activation function of Sigmoid
- the activation function for the output layer is LogSoftmax

In [None]:
class classification(nn.Module):
    def __init__(self):
        super(classification, self).__init__()
        
        # construct layers for a neural network
        self.classifier1 = nn.Sequential(
            nn.Linear(in_features=28*28, out_features=20*20),
            nn.Sigmoid(),
        ) 
        self.classifier2 = nn.Sequential(
            nn.Linear(in_features=20*20, out_features=10*10),
            nn.Sigmoid(),
        ) 
        self.classifier3 = nn.Sequential(
            nn.Linear(in_features=10*10, out_features=10),
            nn.LogSoftmax(dim=1),
        ) 
        
        
    def forward(self, inputs):                 # [batchSize, 1, 28, 28]
        x = inputs.view(inputs.size(0), -1)    # [batchSize, 28*28]
        x = self.classifier1(x)                # [batchSize, 20*20]
        x = self.classifier2(x)                # [batchSize, 10*10]
        out = self.classifier3(x)              # [batchSize, 10]
        
        return out

## 3. Loss function

- the log of softmax
- the negative log likelihood loss

In [None]:
criterion = nn.NLLLoss()

## 4. Optimization

- use a stochastic gradient descent algorithm with different mini-batch sizes of 32, 64, 128
- use a constant learning rate for all the mini-batch sizes
- do not use any regularization algorithm such as dropout or weight decay
- compute the average loss and the average accuracy for all the mini-batches within each epoch

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
classifier = classification().to(device)
learning_rate_value = 1e-3
optimizer = torch.optim.SGD(classifier.parameters(), lr=learning_rate_value)

## 6. Train

In [None]:
epochs = 60

In [None]:
def accuracy(log_pred, y_true):
    y_pred = torch.argmax(log_pred, dim=1)
    return (y_pred == y_true).to(torch.float).mean()

In [None]:
def train(model, batch_size, optimizer, criterion):
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0
    total = 0
    train_loader = torch.utils.data.DataLoader(dataset=training_set, batch_size=batch_size, shuffle=True)
    for batch_idx, (train_img, train_label) in enumerate(train_loader):
        train_img, train_label = train_img.to(device), train_label.to(device)
        train_output = model(train_img)
        loss  = criterion(train_output, train_label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss
        _, argmax = torch.max(train_output, 1)
        total += train_label.size(0)
        train_accuracy += (train_label == argmax).sum().item()
        
    print("Training Loss: {:.4f} ".format(train_loss/len(train_loader)),
          "Train Accuracy: {:.4f}".format(train_accuracy/total))
    
    return train_loss/len(train_loader), train_accuracy/total


def test(model, batch_size, optimizer, criterion):
    model.eval()
    total = 0
    test_loss = 0.0
    test_accuracy = 0.0
    test_loader = torch.utils.data.DataLoader(dataset=testing_set, batch_size=batch_size, shuffle=False)
    with torch.no_grad():
        for test_img, test_label in test_loader:
            test_img, test_label = test_img.to(device), test_label.to(device)
            test_output = model(test_img)
            test_loss += criterion(test_output, test_label)

            _, argmax = torch.max(test_output, 1)
            total += test_label.size(0)
            test_accuracy += (argmax == test_label.data).sum().float()

    print("Test Loss: {:.4f} ".format(test_loss/len(test_loader)),
          "Test Accuracy: {:.4f}".format(test_accuracy / total))
    
    return test_loss/len(test_loader), test_accuracy /total

In [None]:
def run_epoch(model, batch_size, optimizer, criterion):
    train_loss_list, train_acc_list, test_loss_list, test_acc_list = [], [], [], []
    for epoch in range(epochs):
        print("Epoch: {}/{} : ".format(epoch+1, epochs))
        train_loss, train_acc = train(model, batch_size, optimizer, criterion)
        test_loss, test_acc = test(model, batch_size, optimizer, criterion)
        
        train_loss_list.append(train_loss)
        train_acc_list.append(train_acc)
        test_loss_list.append(test_loss)
        test_acc_list.append(test_acc)
    
    return train_loss_list, test_loss_list, train_acc_list, test_acc_list

In [None]:
# mini-batch size = 32
train_loss_list, test_loss_list, train_acc_list, test_acc_list = run_epoch(classifier, 32, optimizer, criterion)

In [None]:
# mini-batch size = 64
train_loss, test_loss, train_acc, test_acc = training_mnist (classifier, 64, optimizer, criterion)


In [None]:
# mini-batch size = 128
train_loss, test_loss, train_acc, test_acc = training_mnist (classifier, 128, optimizer, criterion)

## 5. Output

1. Plot the training and testing losses with a batch size of 32 [4pt]

2. Plot the training and testing accuracies with a batch size of 32 [4pt]

3. Plot the training and testing losses with a batch size of 64 [4pt]

4. Plot the training and testing accuracies with a batch size of 64 [4pt]

5. Plot the training and testing losses with a batch size of 128 [4pt]

6. Plot the training and testing accuracies with a batch size of 128 [4pt]

7. Print the loss at convergence with different mini-batch sizes [3pt]

8. Print the accuracy at convergence with different mini-batch sizes [3pt]