In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn 
from torchvision import datasets, transforms
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score  
torch.manual_seed(101)


In [None]:
Transform = transforms.ToTensor()
train = datasets.MNIST(root='../DATA', train=True, download=False, transform=Transform)
test = datasets.MNIST(root='../DATA', train=False, download=False, transform=Transform)

## 1)Multi Layer Neural Network

In [None]:
class MultilayerNetwork_v1(nn.Module):
    '''1 Hidden Layer with 80 neurons'''
    def __init__(self, input_size=784, output_size=10, layers=[100]) -> None:
        super().__init__()
        self.h1 = nn.Linear(input_size, layers[0])
        self.predict = nn.Linear(layers[0], output_size)

    def forward(self, X):
        X = X.view(-1,28 * 28)
        X = F.relu(self.h1(X))
        X = self.predict(X)
        return X

class MultilayerNetwork_v2(nn.Module):
    '''1 Hidden Layer with 50 neurons'''
    def __init__(self, input_size=784, output_size=10, layers=[50]) -> None:
        super().__init__()
        self.h1 = nn.Linear(input_size, layers[0])
        self.predict = nn.Linear(layers[0], output_size)

    def forward(self, X):
        X = X.view(-1,28 * 28)
        X = F.relu(self.h1(X))
        X = self.predict(X)
        return X

class MultilayerNetwork_v3(nn.Module):
    '''2 Hidden Layers with 100 neurons'''
    def __init__(self, input_size=784, output_size=10, layers=[100,100]) -> None:
        super().__init__()
        self.h1 = nn.Linear(input_size, layers[0])
        self.h2 = nn.Linear(layers[0], layers[1])
        self.predict = nn.Linear(layers[1], output_size)

    def forward(self, X):
        X = X.view(-1,28 * 28)
        X = F.relu(self.h1(X))
        X = F.relu(self.h2(X))
        X = self.predict(X)
        return X

class CNN_1(nn.Module):
    '''Conv -> RELU -> Max_Pool -> Conv ->RELU -> Max_Pool -> FC '''
    def __init__(self):
        super(CNN_1, self).__init__() 
               
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )

        self.conv2 = nn.Sequential(         
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )        
        
        self.out = nn.Linear(32 * 7 * 7, 10)    
        
    def forward(self, X):
        X = self.conv1(X)
        X = self.conv2(X)        
        # flatten the output 
        X = X.view(X.size(0), -1)       
        X = self.out(X)
        return X    

class CNN_2(nn.Module):
    '''Conv -> RELU -> Max_Pool -> Conv ->RELU -> Max_Pool -> FC -> RELU -> FC  with dropoutt='''
    def __init__(self):
        super(CNN_2, self).__init__() 
               
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=12,            
                kernel_size=3,              
                stride=1,                   
                padding=1,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )

        self.conv2 = nn.Sequential(         
            nn.Conv2d(12, 24, 3, 1, 1),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )        
        
        self.out_layers = nn.Sequential(
            nn.Linear(24 * 7 * 7, 64), 
            nn.ReLU(),
            nn.Dropout(p=0.2), #dropout with probability .2 to prevent overfitting
            nn.Linear(64, 10),  
        ) 
        
    def forward(self, X):
        X = self.conv1(X)
        X = self.conv2(X)        
        # flatten the output 
        X = X.view(X.size(0), -1)       
        X = self.out_layers(X)
        return X 


class CNN_3(nn.Module):
    """Conv -> RELU -> Max_Pool -> Conv ->RELU -> Max_Pool -> FC -> RELU -> FC -> SOFTMAX"""
    def __init__(self):
        super(CNN_3, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 20, 5, 1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(20, 50, 5, 1),
            nn.ReLU(),
            nn.MaxPool2d(2), 
        ) 

        self.fc1 = nn.Sequential(
            nn.Linear(4*4*50, 500),
            nn.ReLU(),
        )
        

        self.fc2 = nn.Sequential(
            nn.Linear(500, 10),
            nn.LogSoftmax()
        )
        

    def forward(self, X):
        X = self.conv1(X)
        X = self.conv2(X)
        X = X.view(-1, 4*4*50)
        X = self.fc1(X)
        X = self.fc2(X)
        return X



In [None]:
# This is were you select the model that we want to use from above
# model = MultilayerNetwork_v1()
# model = MultilayerNetwork_v2()
# model = MultilayerNetwork_v3()
# model = CNN_1()
# model = CNN_2()
model = CNN_3()

print(model)
criterion = nn.CrossEntropyLoss() #loss function criterion
optimizer = torch.optim.Adam(model.parameters(), lr=.001) #optimizer

In [None]:
epochs = 10 # set the number of epochs
trainloader = torch.utils.data.DataLoader(train, batch_size=64,shuffle=True)
testloader = torch.utils.data.DataLoader(test, batch_size=64,shuffle=True)

In [None]:
def train(model, n_epoch=10):
    print(model)
    model.train()
    loss_history = []
    for epoch in range(n_epoch):
        running_loss = 0.0
        for images, labels in trainloader:
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            
            optimizer.step()
            running_loss += loss.item() * images.size(0)
            
        running_loss = running_loss/len(trainloader.dataset)
        loss_history.append(running_loss)
        print('Epoch: {} \tTraining Loss: {:.6f}'.format(
            epoch+1, 
            running_loss
            ))
    plt.plot(range(len(loss_history)), loss_history)
    plt.xlabel('epoch')
    plt.ylabel('Loss')
    plt.title('Loss over training')
        

def test(model):
    correct_counts = list(0. for i in range(10))
    total_counts = list(0. for i in range(10))
    model.eval()
    test_loss = 0.0
    predicted = []
    actual = []
    for images, labels in testloader:
        output = model(images)
        loss = criterion(output, labels)
        test_loss += loss.item() * images.size(0)
        _, pred = torch.max(output, 1)
        correct = np.squeeze(pred.eq(labels.data.view_as(pred)))
        for i in range(len(labels.data)): 
            label = labels.data[i]
            actual.append(label)
            correct_counts[label] += correct[i].item()
            predicted.append(pred[i])
            total_counts[label] += 1
        
    test_loss = test_loss/len(testloader.dataset)
    print('Test Loss: {:.6f}\n'.format(test_loss))

    for i in range(10):
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (str(i), 100 * correct_counts[i] / total_counts[i], np.sum(correct_counts[i]), np.sum(total_counts[i])))

    print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
        100. * np.sum(correct_counts) / np.sum(total_counts),
        np.sum(correct_counts), np.sum(total_counts)))
    print('Confusion Matrix: \n')
    print(confusion_matrix(predicted, actual))
    precision_scores = precision_score(predicted, actual, average=None)
    recall_scores = recall_score(predicted, actual, average=None)
    f1_scores =  f1_score(predicted, actual, average=None)
    print('Class \t Precision \t\t\t Recall \t\t\t\t\t F1')
    for i in range(len(precision_scores)):
        print(i, '\t', precision_scores[i], '\t', recall_scores[i], '\t', f1_scores[i])
    
    print('Macro-F1: ', f1_score(predicted, actual, average='macro'))
        

### Train and Test the network


In [None]:
train(model)

In [None]:
test(model)
