<a href="https://colab.research.google.com/github/valren-enforcer/ML-Internship/blob/main/CNN_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True, as_frame=False)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.autograd import Variable
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
mnist.target = mnist.target.astype(np.int8) # fetch_openml() returns targets as strings

In [None]:
X, y = mnist["data"], mnist["target"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [None]:
print(y_train.dtype)

int8


In [None]:
BATCH_SIZE = 32

torch_X_train = torch.from_numpy(X_train).type(torch.LongTensor)
torch_y_train = torch.from_numpy(y_train).type(torch.LongTensor) # data type is long

# create feature and targets tensor for test set.
torch_X_test = torch.from_numpy(X_test).type(torch.LongTensor)
torch_y_test = torch.from_numpy(y_test).type(torch.LongTensor) # data type is long

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

In [None]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(784,250)
        self.linear2 = nn.Linear(250,100)
        self.linear3 = nn.Linear(100,10)

    def forward(self,X):
        X = F.relu(self.linear1(X))
        X = F.relu(self.linear2(X))
        X = self.linear3(X)
        return F.log_softmax(X, dim=1)

mlp = MLP()
print(mlp)

MLP(
  (linear1): Linear(in_features=784, out_features=250, bias=True)
  (linear2): Linear(in_features=250, out_features=100, bias=True)
  (linear3): Linear(in_features=100, out_features=10, bias=True)
)


In [None]:
def fit(model, train_loader):
    optimizer = torch.optim.Adam(model.parameters())#,lr=0.001, betas=(0.9,0.999))
    error = nn.CrossEntropyLoss()
    EPOCHS = 5
    model.train()
    for epoch in range(EPOCHS):
        correct = 0
        for batch_idx, (X_batch, y_batch) in enumerate(train_loader):
            var_X_batch = Variable(X_batch).float()
            var_y_batch = Variable(y_batch)
            optimizer.zero_grad()
            output = model(var_X_batch)
            loss = error(output, var_y_batch)
            loss.backward()
            optimizer.step()

            # Total correct predictions
            predicted = torch.max(output.data, 1)[1]
            correct += (predicted == var_y_batch).sum()
            #print(correct)
            if batch_idx % 50 == 0:
                print(f'Epoch : {epoch} \tLoss: {loss.item():.6f}\t Accuracy:{float(BATCH_SIZE*(batch_idx+1)):.3f}')

In [None]:
fit(mlp, train_loader)

Epoch : 0 	Loss: 4.528050	 Accuracy:32.000
Epoch : 0 	Loss: 0.763592	 Accuracy:1632.000
Epoch : 0 	Loss: 0.165692	 Accuracy:3232.000
Epoch : 0 	Loss: 0.419522	 Accuracy:4832.000
Epoch : 0 	Loss: 0.225305	 Accuracy:6432.000
Epoch : 0 	Loss: 0.310715	 Accuracy:8032.000
Epoch : 0 	Loss: 0.525159	 Accuracy:9632.000
Epoch : 0 	Loss: 0.209523	 Accuracy:11232.000
Epoch : 0 	Loss: 0.181472	 Accuracy:12832.000
Epoch : 0 	Loss: 0.215006	 Accuracy:14432.000
Epoch : 0 	Loss: 0.326348	 Accuracy:16032.000
Epoch : 0 	Loss: 0.101830	 Accuracy:17632.000
Epoch : 0 	Loss: 0.357150	 Accuracy:19232.000
Epoch : 0 	Loss: 0.315405	 Accuracy:20832.000
Epoch : 0 	Loss: 0.264099	 Accuracy:22432.000
Epoch : 0 	Loss: 0.330311	 Accuracy:24032.000
Epoch : 0 	Loss: 0.101897	 Accuracy:25632.000
Epoch : 0 	Loss: 0.034556	 Accuracy:27232.000
Epoch : 0 	Loss: 0.101703	 Accuracy:28832.000
Epoch : 0 	Loss: 0.104981	 Accuracy:30432.000
Epoch : 0 	Loss: 0.271280	 Accuracy:32032.000
Epoch : 0 	Loss: 0.094192	 Accuracy:33632.0

In [None]:
def evaluate(model):
#model = mlp
    correct = 0
    for test_imgs, test_labels in test_loader:
        #print(test_imgs.shape)
        test_imgs = Variable(test_imgs).float()
        output = model(test_imgs)
        predicted = torch.max(output,1)[1]
        correct += (predicted == test_labels).sum()
    print("Test accuracy:{:.3f}% ".format( float(correct) / (len(test_loader)*BATCH_SIZE)))

evaluate(mlp)

Test accuracy:0.959% 



Explanation¶

To better understand convolutional neural network I recommend the great section on it here : http://cs231n.github.io/convolutional-networks/

**Convolutional operation** : First let's clarify briefly how we can perform the convolutional operation on an image. For that we need to define a kernel which is a small matrix of size 5 * 5 for example. To perform the convolution operation, we just need to slide the kernel along the image horizontally and vertically and do the dot product of the kernel and the small portion of the image.

**Pooling** : the convolutional operation give an output of the same size of the input image. To reduce the size of the image and thus reduce the number of paramers in the model we perform a Pooling operation. The pooling operation need a window size.. By sliding the window along the image, we compute the mean or the max of the portion of the image inside the window in case of MeanPooling or MaxPooling.

**Stride** is the number of pixels to pass at a time when sliding the convolutional kernel.

**Padding** to preserve exactly the size of the input image, it is useful to add a zero padding on the border of the image.

To remember : What makes a CNN so interesting for images is that it is invariant by translation and for each convolutional layer we only need to store the kernels. Thus we can stack a lot of layers to learn deep features without having too much parameters that would make a model untrainnable.


In [None]:
torch_X_train = torch_X_train.view(-1, 1,28,28).float()
torch_X_test = torch_X_test.view(-1,1,28,28).float()
print(torch_X_train.shape)
print(torch_X_test.shape)

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(torch_X_train,torch_y_train)
test = torch.utils.data.TensorDataset(torch_X_test,torch_y_test)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = BATCH_SIZE, shuffle = False)
test_loader = torch.utils.data.DataLoader(test, batch_size = BATCH_SIZE, shuffle = False)

torch.Size([56000, 1, 28, 28])
torch.Size([14000, 1, 28, 28])


In [None]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32,64, kernel_size=5)
        self.fc1 = nn.Linear(3*3*64, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        #x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(F.max_pool2d(self.conv3(x),2))
        x = F.dropout(x, p=0.5, training=self.training)
        x = x.view(-1,3*3*64 )
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

cnn = CNN()
print(cnn)

it = iter(train_loader)
X_batch, y_batch = next(it)
print(cnn.forward(X_batch).shape)

CNN(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)
torch.Size([32, 10])


In [None]:
fit(cnn,train_loader)

Epoch : 0 	Loss: 18.033966	 Accuracy:32.000
Epoch : 0 	Loss: 2.215352	 Accuracy:1632.000
Epoch : 0 	Loss: 1.279084	 Accuracy:3232.000
Epoch : 0 	Loss: 1.138644	 Accuracy:4832.000
Epoch : 0 	Loss: 0.552001	 Accuracy:6432.000
Epoch : 0 	Loss: 0.752180	 Accuracy:8032.000
Epoch : 0 	Loss: 0.441516	 Accuracy:9632.000
Epoch : 0 	Loss: 0.377081	 Accuracy:11232.000
Epoch : 0 	Loss: 0.235730	 Accuracy:12832.000
Epoch : 0 	Loss: 0.384197	 Accuracy:14432.000
Epoch : 0 	Loss: 0.566501	 Accuracy:16032.000
Epoch : 0 	Loss: 0.308301	 Accuracy:17632.000
Epoch : 0 	Loss: 0.235839	 Accuracy:19232.000
Epoch : 0 	Loss: 0.276869	 Accuracy:20832.000
Epoch : 0 	Loss: 0.828639	 Accuracy:22432.000
Epoch : 0 	Loss: 0.338092	 Accuracy:24032.000
Epoch : 0 	Loss: 0.380655	 Accuracy:25632.000
Epoch : 0 	Loss: 0.256919	 Accuracy:27232.000
Epoch : 0 	Loss: 0.099698	 Accuracy:28832.000


KeyboardInterrupt: 

In [None]:
evaluate(cnn)

Test accuracy:0.914% 
