Train a Pure CNN with less than 10000 trainable parameters using the MNIST
Dataset having minimum validation accuracy of 99.40%
Note -
1. Code comments should be given for proper code understanding.
2. Implement in both PyTorch and Tensorflow respectively

**Tensorflow**

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dense,Dropout,Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.metrics import Accuracy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

In [None]:
#Loading the dataset 
(X_train,y_train),(x_test,y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
#preprossing the data 
X_train = X_train/255
x_test = x_test/255

y_train = to_categorical(y_train,10)
y_test = to_categorical(y_test,10)

In [None]:
model = Sequential()
model.add(Conv2D(filters=8,  kernel_size=(3,3),activation= 'relu',input_shape = (28,28,1),name = 'Conv-1'))
model.add(Conv2D(filters=16, kernel_size=(3,3),activation = 'relu',padding='same',name = 'Conv-2'))
model.add(Conv2D(filters=32, kernel_size=(3,3),activation = 'relu',name = 'Conv-3'))

model.add(MaxPooling2D(pool_size = (3,3),name = 'Maxpull-1'))

model.add(Conv2D(filters=10,kernel_size =(1,1),activation = 'relu',name = 'Conv-4'))
model.add(Conv2D(filters=10,kernel_size =(2,2),activation = 'relu',padding='same',name = 'Conv-5'))

model.add(MaxPooling2D(pool_size = (2,2),name = 'Maxpull-2'))

#model.add(Conv2D(filters=16,kernel_size =(1,1),activation = 'relu',name = 'Conv-6'))
model.add(Conv2D(filters=16,kernel_size =(3,3),activation = 'relu',name = 'Conv-7'))
#model.add(Conv2D(filters=20,kernel_size =(2,2),activation = 'relu',name = 'Conv-8'))
model.add(Flatten())
model.add(Dense(10,activation= 'relu'))
model.add(Dense(10,activation= 'softmax'))


In [None]:
model.compile(optimizer = Adam(),loss = 'categorical_crossentropy',metrics=['accuracy'])
model.summary()

Model: "sequential_47"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Conv-1 (Conv2D)             (None, 26, 26, 8)         80        
                                                                 
 Conv-2 (Conv2D)             (None, 26, 26, 16)        1168      
                                                                 
 Conv-3 (Conv2D)             (None, 24, 24, 32)        4640      
                                                                 
 Maxpull-1 (MaxPooling2D)    (None, 8, 8, 32)          0         
                                                                 
 Conv-4 (Conv2D)             (None, 8, 8, 10)          330       
                                                                 
 Conv-5 (Conv2D)             (None, 8, 8, 10)          410       
                                                                 
 Maxpull-2 (MaxPooling2D)    (None, 4, 4, 10)        

In [None]:
history = model.fit(X_train,y_train,batch_size=128,epochs=20,validation_data=(x_test,y_test)) 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


**Pytorch**

In [3]:
#!pip install torchvision 

In [11]:
import torch 
import torchvision 
from torchvision import datasets, transforms
import torch.nn as nn
from torchsummary import summary 
from torch.optim import Adam
from torch.autograd import Variable 
import numpy as np


In [12]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [13]:
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)


In [14]:
batch_size = 64

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [15]:
#model Bulding

In [25]:
class CNN(nn.Module):
    def __init__(self,):
      super(CNN,self).__init__()
      #inputshape = (64,1,28,28)
      self.conv1 = nn.Conv2d(in_channels = 1,out_channels=8,kernel_size = 3)
      self.relu_1 = nn.ReLU()
      self.conv2 = nn.Conv2d(in_channels = 8,out_channels=16,kernel_size = 3,padding=1)
      self.relu_2 = nn.ReLU()
      self.conv3 = nn.Conv2d(in_channels = 16,out_channels=32,kernel_size = 3)
      self.relu_3 = nn.ReLU()

      self.maxpoll_1 = nn.MaxPool2d(kernel_size = 2)

      self.conv4 = nn.Conv2d(in_channels = 32,out_channels=10,kernel_size = 1)
      self.relu_4 = nn.ReLU()
      self.conv5 = nn.Conv2d(in_channels = 10,out_channels=10,kernel_size = 2,padding=1)
      self.relu_5 = nn.ReLU()

      self.maxpoll_2 = nn.MaxPool2d(kernel_size = 2)
      self.conv6 = nn.Conv2d(in_channels = 10,out_channels=16,kernel_size = 3,)
      self.relu_6 = nn.ReLU()

      self.fc = nn.Linear(4*4*16,out_features=10)
    
    def forward(self,x) :
        x = self.conv1(x)
        x = self.relu_1(x)
        x = self.conv2(x)
        x = self.relu_2(x)
        x = self.conv3(x)
        x = self.relu_3(x)
      
        x = self.maxpoll_1(x)

        x = self.conv4(x)
        x = self.relu_4(x)
        x = self.conv5(x)
        x = self.relu_5(x)

        x = self.maxpoll_2(x)

        x = self.conv6(x)
        x = self.relu_6(x)

        #flatten 
        x = x.view(-1,4*4*16)
        x = self.fc(x)
        return x


  


In [23]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device


device(type='cuda')

In [26]:
model=CNN().to(device) #insilizing the model 


In [27]:
summary(model,(1,28,28)) #geting model summery 

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              80
              ReLU-2            [-1, 8, 26, 26]               0
            Conv2d-3           [-1, 16, 26, 26]           1,168
              ReLU-4           [-1, 16, 26, 26]               0
            Conv2d-5           [-1, 32, 24, 24]           4,640
              ReLU-6           [-1, 32, 24, 24]               0
         MaxPool2d-7           [-1, 32, 12, 12]               0
            Conv2d-8           [-1, 10, 12, 12]             330
              ReLU-9           [-1, 10, 12, 12]               0
           Conv2d-10           [-1, 10, 13, 13]             410
             ReLU-11           [-1, 10, 13, 13]               0
        MaxPool2d-12             [-1, 10, 6, 6]               0
           Conv2d-13             [-1, 16, 4, 4]           1,456
             ReLU-14             [-1, 1

In [28]:
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001) #inislizing the optimizer
loss_function=nn.CrossEntropyLoss() # insilizing the loss_function 