<a href="https://colab.research.google.com/github/zhrsaghaie/DeepLearning/blob/main/Dense_NN_on_Mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
class Network(nn.Module):
    def __init__(self):
      super().__init__()
      self.flatten = nn.Flatten()
      self.dense_layers = nn.Sequential(
          nn.Linear(28 * 28, 256),
          nn.ReLU(),
          nn.Linear(256, 10)
      )
      self.softmax = nn.Softmax(dim=1)

    def forward(self, x_train_i):
      x = self.flatten(x_train_i)
      logits = self.dense_layers(x)
      predictions = self.softmax(logits)

      return predictions


In [None]:
def one_hot(labels, num_classes):
    return torch.eye(num_classes)[labels]

In [None]:
NUM_CLASSES = 10
BATCH_SIZE = 128
EPOCHS = 10
LEARNING_RATE = 0.001


# download data and create data loader
train_data = datasets.MNIST(root="data", train=True,  download=True, transform=ToTensor())
test_data =  datasets.MNIST(root="data", train=False, download=True, transform=ToTensor())
# print(train_data[0][0])  -> the input: pixels of 5 in 28*28 array
# print(train_data[0][1])  -> the lable: 5


# Convert integer labels to one-hot labels for train and test sets
# num_classes = 10
# train_data.targets = one_hot(train_data.targets, num_classes)
# test_data.targets = one_hot(test_data.targets, num_classes)


train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

# # Iterate over the dataloader
# for i, (images, labels) in enumerate(train_loader):
#   # Do something with the images and labels
#   print(images.shape)
#   print(labels.shape)
#   break


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 92215464.97it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 9480761.82it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 26457525.73it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 16888766.64it/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [None]:
def train(model, data_loader, loss_fn, optimiser, epochs = 1):

  for epoch in range(epochs):

    # Total size of dataset for reference
    size = 0

    correct = 0
    _correct = 0

    # Feed each train sample to the Network
    for input, target in data_loader:

      #
      # calculate loss
      #
      prediction = model(input)
      one_hot_target = one_hot(target, NUM_CLASSES)
      train_loss = loss_fn(prediction, one_hot_target)


      #
      # backpropagate error and update weights
      #

      # Sets gradients as 0
      optimiser.zero_grad()
      #  Compute the gradients using back_prop
      train_loss.backward()
      # update the parameters using the gradients
      optimiser.step()



      #
      #      Some debuging!!
      #
      #

      # # show the first 5 images and predictions
      # imgs = input[:5 , 0 ,  : , : ]

      # for i in range (5):
      #   plt.figure(figsize=(2,2))
      #   plt.imshow(imgs[i])
      #   lable = target[i].item()
      #   plt.xlabel(f"{lable =}")
      #   plt.show()

      #   max_index = torch.argmax(prediction[i])
      #   print(f"predicted class is: {max_index.item()}")
      #   print(f"{one_hot_target[0] = } \n {prediction[0]=}")
      # break



      _correct = (prediction.argmax(1) == target).type(torch.float).sum().item()
      _batch_size = len(input)

      correct += _correct
      size += _batch_size

      loss = train_loss.item()


    correct/=size

    print(f"epoch #{epoch + 1}\t loss: {train_loss.item():.4f}")
    print(f"Train Accuracy: {(100*correct):>0.1f}%")



  print("Training finished!")

In [None]:

# Build model
feed_forward_net = Network()
print(feed_forward_net)


# initialise loss funtion + optimiser
loss_fn = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(feed_forward_net.parameters(),lr=LEARNING_RATE)

# Train model
train(feed_forward_net,train_loader, loss_fn, optimiser, 10)


Network(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dense_layers): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=10, bias=True)
  )
  (softmax): Softmax(dim=1)
)
epoch #1	 loss: 1.5903
Train Accuracy: 87.7%
epoch #2	 loss: 1.5205
Train Accuracy: 93.6%
epoch #3	 loss: 1.5057
Train Accuracy: 95.1%
epoch #4	 loss: 1.4887
Train Accuracy: 96.0%
epoch #5	 loss: 1.5103
Train Accuracy: 96.7%
epoch #6	 loss: 1.4724
Train Accuracy: 97.2%
epoch #7	 loss: 1.5032
Train Accuracy: 97.6%
epoch #8	 loss: 1.5021
Train Accuracy: 97.9%
epoch #9	 loss: 1.4945
Train Accuracy: 98.1%
epoch #10	 loss: 1.4741
Train Accuracy: 98.3%
Training finished!


In [None]:
def predict(model, input):

  model.eval()
  with torch.no_grad():
      predictions = model(input)
      predicted_index = predictions.argmax(1)
  return predicted_index

In [None]:
test_loss = 0
test_accu = 0
size = 0
correct = 0

for i, (input, target) in enumerate(test_loader):

  # make an inference
  predicted = predict(feed_forward_net, input)

  # print(f"{predicted=} , {target=}")

  #
  #      Some debuging!!
  #
  #

  # # show the first 5 images and predictions
  # imgs = input[:5 , 0 ,  : , : ]

  # for i in range (5):
  #   plt.figure(figsize=(2,2))
  #   plt.imshow(imgs[i])
  #   lable = target[i].item()
  #   plt.xlabel(f"{lable =}")
  #   plt.show()

  #   max_index = torch.argmax(prediction[i])
  #   print(f"predicted class is: {max_index.item()}")
  #   print(f"{one_hot_target[0] = } \n {prediction[0]=}")
  # break

  _correct = (predicted == target).type(torch.float).sum().item()
  _batch_size = len(input)

  correct += _correct
  size += _batch_size

correct/=size

print(f"Test Accuracy: {(100*correct):>0.1f}%")

Test Accuracy: 97.5%
