In [1]:
# This time I would be training a deep neural network aiming for atleast 2 hidden layers
# with 32 and 64 layers
# The plan of action is as follows
# 1 - Download the Data
# 2 - Classify the data
# 3 - Dataloader
# 4 - Model

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import random_split

# Downloading the Data
dataset = MNIST(root="data/", download=True, train=True, transform=transforms.ToTensor())
train_ds, val_ds = random_split(dataset, [50000, 10000])

test_dataset = MNIST(root="data/", download=True, train=False, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [3]:
from torch.utils.data import DataLoader

BATCH_SIZE = 128

train_dl = DataLoader(train_ds, BATCH_SIZE, shuffle=True)
valid_dl = DataLoader(val_ds, BATCH_SIZE)

In [4]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [5]:
import torch.nn as nn
import torch.nn.functional as F

INPUT_FEATURES = 28*28
HIDDEN_SIZE_1 = 32
HIDDEN_SIZE_2 = 64
HIDDEN_SIZE_3 = 128
OUTPUT_FEATURES = 10

class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(INPUT_FEATURES, HIDDEN_SIZE_1)
        self.linear2 = nn.Linear(HIDDEN_SIZE_1, HIDDEN_SIZE_2)
        self.linear3 = nn.Linear(HIDDEN_SIZE_2, HIDDEN_SIZE_3)
        self.linear4 = nn.Linear(HIDDEN_SIZE_3, OUTPUT_FEATURES)
        
    def forward(self, inputs):
        inputs = inputs.reshape(-1, 784)
        
        outputs1 = self.linear1(inputs)
        outputs1 = F.relu(outputs1)
        
        outputs2 = self.linear2(outputs1)
        outputs2 = F.relu(outputs2)
        
        outputs3 = self.linear3(outputs2)
        return outputs3
    
    def training_step(self, batch):
        inputs, labels = batch
        outputs = self(inputs)
        loss = F.cross_entropy(outputs, labels)
        return loss
    
    def validation_step(self, batch):
        inputs, labels = batch
        outputs = self(inputs)
        loss = F.cross_entropy(outputs, labels)
        acc = accuracy(outputs, labels)
        return {'val_loss' : loss, 'val_acc' : acc}
    
    def epoch_progress_report(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_losses = torch.stack(batch_losses).mean()
        batch_accuracy = [x['val_acc'] for x in outputs]
        epoch_accuracy = torch.stack(batch_accuracy).mean()
        return {'epoch_loss' : epoch_losses, 'epoch_accuracy' : epoch_accuracy}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['epoch_loss'], result['epoch_accuracy']))

In [6]:
def evaluate(val_dl, model):
    results = [model.validation_step(batch) for batch in val_dl]
    return model.epoch_progress_report(results)

def fit(num_of_epochs, model, lr, train_dl, val_dl, opt=torch.optim.SGD):
    optimizer = opt(model.parameters(), lr)
    history = []
    
    for epoch in range(num_of_epochs):
        
        for batch in train_dl:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
        result = evaluate(val_dl, model)
        model.epoch_end(epoch, result)
        history.append(result)
        
    return history

In [7]:
model = MnistModel()
history1 = fit(5, model, 0.5, train_dl, valid_dl)

Epoch [0], val_loss: 0.2700, val_acc: 0.9130
Epoch [1], val_loss: 0.1580, val_acc: 0.9507
Epoch [2], val_loss: 0.1317, val_acc: 0.9578
Epoch [3], val_loss: 0.1543, val_acc: 0.9540
Epoch [4], val_loss: 0.1092, val_acc: 0.9671


In [8]:
history2 = fit(5, model, 0.5, train_dl, valid_dl)

Epoch [0], val_loss: 0.1309, val_acc: 0.9598
Epoch [1], val_loss: 0.1209, val_acc: 0.9632
Epoch [2], val_loss: 0.1202, val_acc: 0.9645
Epoch [3], val_loss: 0.0981, val_acc: 0.9701
Epoch [4], val_loss: 0.1080, val_acc: 0.9683


In [9]:
history3 = fit(5, model, 0.5, train_dl, valid_dl)

Epoch [0], val_loss: 0.1162, val_acc: 0.9650
Epoch [1], val_loss: 0.1253, val_acc: 0.9644
Epoch [2], val_loss: 0.1070, val_acc: 0.9683
Epoch [3], val_loss: 0.1152, val_acc: 0.9677
Epoch [4], val_loss: 0.1078, val_acc: 0.9702


In [10]:
history4 = fit(15, model, 0.001, train_dl, valid_dl)

Epoch [0], val_loss: 0.1037, val_acc: 0.9721
Epoch [1], val_loss: 0.1014, val_acc: 0.9730
Epoch [2], val_loss: 0.0999, val_acc: 0.9728
Epoch [3], val_loss: 0.0988, val_acc: 0.9730
Epoch [4], val_loss: 0.0981, val_acc: 0.9730
Epoch [5], val_loss: 0.0976, val_acc: 0.9732
Epoch [6], val_loss: 0.0971, val_acc: 0.9733
Epoch [7], val_loss: 0.0967, val_acc: 0.9735
Epoch [8], val_loss: 0.0964, val_acc: 0.9733
Epoch [9], val_loss: 0.0961, val_acc: 0.9736
Epoch [10], val_loss: 0.0958, val_acc: 0.9735
Epoch [11], val_loss: 0.0956, val_acc: 0.9736
Epoch [12], val_loss: 0.0954, val_acc: 0.9736
Epoch [13], val_loss: 0.0952, val_acc: 0.9738
Epoch [14], val_loss: 0.0950, val_acc: 0.9736


In [11]:
test_loader = DataLoader(test_dataset, batch_size=256)
result = evaluate(test_loader, model)
result

{'epoch_loss': tensor(0.0950, grad_fn=<MeanBackward0>),
 'epoch_accuracy': tensor(0.9750)}