# Analysis
## Target
Hoping to have a more agressive reduction in learning rate so that towards the end the model stablizes faster. Hence reduced gamma to 0.2 and retained step size as 2 as in previous models. Also the training is consistently lower than the test, so probably the dropout of 15% is rather high and not allowing the model to achieve the desired accuracy so reducing the dropout to 10%

## Result
Best Training Accuracy achieved in Epoch 15 :99.01%          
Best Test Accuracy is achieved in Epoch 15:99.43%    

## Analysis
The accuracy had increased and had been 99.39%, 99.41% and 99.43% in the last three epochs, thereby suggesting that the dropout was intially very high, the addition of GAP and 1X1 kernel at the end and adjusting learning rates were important decisions


In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torchvision import datasets, transforms

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 3, padding=0)     #(k=3,p=0,s=1) input = 28, output = 26, rf = 3, jump = 1
        self.batchnorm1 = nn.BatchNorm2d(8)
        self.dropout1 = nn.Dropout(0.10)
        self.conv2 = nn.Conv2d(8, 8, 3, padding=0)     #(k=3,p=0,s=1) input = 26, output = 24, rf = 5, jump = 1
        self.batchnorm2 = nn.BatchNorm2d(8)
        self.dropout2 = nn.Dropout(0.10)
        self.conv3 = nn.Conv2d(8, 8, 3, padding=0)     #(k=3,p=0,s=1) input = 24, output = 22, rf = 7, jump = 1
        self.batchnorm3 = nn.BatchNorm2d(8)
        self.dropout3 = nn.Dropout(0.10)
        self.pool1 = nn.AvgPool2d(2, 2)                #(k=2,p=0,s=2) input = 22, output = 11, rf = 8, jump = 1
        self.conv4 = nn.Conv2d(8, 16, 3, padding=0)    #(k=3,p=0,s=1) input = 11, output = 9, rf = 12, jump = 2
        self.batchnorm4 = nn.BatchNorm2d(16)
        self.dropout4 = nn.Dropout(0.10)
        self.conv5 = nn.Conv2d(16, 16, 3, padding=0)   #(k=3,p=0,s=1) input = 9, output = 7, rf = 16, jump = 2
        self.batchnorm5 = nn.BatchNorm2d(16)
        self.dropout5 = nn.Dropout(0.10)
        self.conv6 = nn.Conv2d(16, 32, 3, padding=0)   #(k=3,p=0,s=1) input = 7, output = 5, rf = 20, jump = 2
        self.batchnorm6 = nn.BatchNorm2d(32)
        self.dropout6 = nn.Dropout(0.10)
        self.pool2 = nn.AvgPool2d(5, 5)                #(k=5,p=0,s=5) input = 5, output = 1, rf = 28, jump = 2
        self.conv8 = nn.Conv2d(32, 10, 1)


    def forward(self, x):
        x = self.pool1(self.dropout3(self.batchnorm3(F.relu(self.conv3(self.dropout2(self.batchnorm2(F.relu(self.conv2(self.dropout1(self.batchnorm1(F.relu(self.conv1(x)))))))))))))
        x = self.pool2(self.dropout6(self.batchnorm6(F.relu(self.conv6(self.dropout5(self.batchnorm5(F.relu(self.conv5(self.dropout4(self.batchnorm4(F.relu(self.conv4(x)))))))))))))
        x = self.conv8(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

In [3]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              80
       BatchNorm2d-2            [-1, 8, 26, 26]              16
           Dropout-3            [-1, 8, 26, 26]               0
            Conv2d-4            [-1, 8, 24, 24]             584
       BatchNorm2d-5            [-1, 8, 24, 24]              16
           Dropout-6            [-1, 8, 24, 24]               0
            Conv2d-7            [-1, 8, 22, 22]             584
       BatchNorm2d-8            [-1, 8, 22, 22]              16
           Dropout-9            [-1, 8, 22, 22]               0
        AvgPool2d-10            [-1, 8, 11, 11]               0
           Conv2d-11             [-1, 16, 9, 9]           1,168
      BatchNorm2d-12             [-1, 16, 9, 9]              32
          Dropout-13             [-1, 16, 9, 9]               0
           Conv2d-14             [-1, 1



In [0]:
torch.manual_seed(1)
batch_size = 64

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-5.0,5.0),fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [0]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    correct = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

    print('\nTrain set: Accuracy: {}/{} ({:.2f}%)\n'.format(
        correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [6]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = StepLR(optimizer,step_size=2,gamma=0.2)

for epoch in range(1, 16):
    print("\n EPOC NUMBER IS :", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    print("\n--------------------------------------------------------------------------- \n")

  0%|          | 0/938 [00:00<?, ?it/s]


 EPOC NUMBER IS : 1


loss=0.21448633074760437 batch_id=937: 100%|██████████| 938/938 [00:17<00:00, 53.22it/s]


Train set: Accuracy: 55684/60000 (92.81%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0562, Accuracy: 9827/10000 (98.27%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 2


loss=0.03152915835380554 batch_id=937: 100%|██████████| 938/938 [00:17<00:00, 54.55it/s]


Train set: Accuracy: 58644/60000 (97.74%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0389, Accuracy: 9880/10000 (98.80%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 3


loss=0.0411776527762413 batch_id=937: 100%|██████████| 938/938 [00:17<00:00, 54.24it/s]


Train set: Accuracy: 58940/60000 (98.23%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0300, Accuracy: 9910/10000 (99.10%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 4


loss=0.01629352569580078 batch_id=937: 100%|██████████| 938/938 [00:17<00:00, 55.04it/s]


Train set: Accuracy: 59049/60000 (98.42%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0265, Accuracy: 9919/10000 (99.19%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 5


loss=0.021258220076560974 batch_id=937: 100%|██████████| 938/938 [00:17<00:00, 54.20it/s]


Train set: Accuracy: 59168/60000 (98.61%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0246, Accuracy: 9925/10000 (99.25%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 6


loss=0.030743002891540527 batch_id=937: 100%|██████████| 938/938 [00:17<00:00, 53.75it/s]


Train set: Accuracy: 59214/60000 (98.69%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0249, Accuracy: 9923/10000 (99.23%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 7


loss=0.0024811923503875732 batch_id=937: 100%|██████████| 938/938 [00:17<00:00, 54.52it/s]


Train set: Accuracy: 59243/60000 (98.74%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0234, Accuracy: 9931/10000 (99.31%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 8


loss=0.0018496662378311157 batch_id=937: 100%|██████████| 938/938 [00:17<00:00, 54.53it/s]


Train set: Accuracy: 59305/60000 (98.84%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0214, Accuracy: 9934/10000 (99.34%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 9


loss=0.01108500361442566 batch_id=937: 100%|██████████| 938/938 [00:17<00:00, 53.91it/s]


Train set: Accuracy: 59296/60000 (98.83%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0213, Accuracy: 9928/10000 (99.28%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 10


loss=0.08165052533149719 batch_id=937: 100%|██████████| 938/938 [00:17<00:00, 54.26it/s]


Train set: Accuracy: 59348/60000 (98.91%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0209, Accuracy: 9933/10000 (99.33%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 11


loss=0.004422739148139954 batch_id=937: 100%|██████████| 938/938 [00:16<00:00, 55.67it/s]


Train set: Accuracy: 59394/60000 (98.99%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0217, Accuracy: 9926/10000 (99.26%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 12


loss=0.017362147569656372 batch_id=937: 100%|██████████| 938/938 [00:16<00:00, 55.63it/s]


Train set: Accuracy: 59376/60000 (98.96%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0193, Accuracy: 9936/10000 (99.36%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 13


loss=0.04646587371826172 batch_id=937: 100%|██████████| 938/938 [00:16<00:00, 55.27it/s]


Train set: Accuracy: 59397/60000 (99.00%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0192, Accuracy: 9939/10000 (99.39%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 14


loss=0.005466267466545105 batch_id=937: 100%|██████████| 938/938 [00:16<00:00, 55.39it/s]


Train set: Accuracy: 59405/60000 (99.01%)




  0%|          | 0/938 [00:00<?, ?it/s]


Test set: Average loss: 0.0179, Accuracy: 9941/10000 (99.41%)


--------------------------------------------------------------------------- 


 EPOC NUMBER IS : 15


loss=0.018329307436943054 batch_id=937: 100%|██████████| 938/938 [00:16<00:00, 55.47it/s]


Train set: Accuracy: 59404/60000 (99.01%)







Test set: Average loss: 0.0184, Accuracy: 9943/10000 (99.43%)


--------------------------------------------------------------------------- 

