# Digits classifier

Let's build a model that is able to recognize a single digit from 0 to 9 from an image.

In [21]:
# setting up dependencies and data

# loading and setting up dependencies
import fastbook

fastbook.setup_book()

from fastai.vision.all import *
from fastbook import *
from matplotlib import pyplot as plt
from torch.nn import functional as F, CrossEntropyLoss

# checking that we're using GPU for this one
print(torch.cuda.get_device_name())

# import MNIST dataset
path_to_mnist = untar_data(URLs.MNIST)
Path.BASE_PATH = path_to_mnist

# sorting files
mnist_training_files_0s = (path_to_mnist / 'training' / '0').ls().sorted()
mnist_training_files_1s = (path_to_mnist / 'training' / '1').ls().sorted()
mnist_training_files_2s = (path_to_mnist / 'training' / '2').ls().sorted()
mnist_training_files_3s = (path_to_mnist / 'training' / '3').ls().sorted()
mnist_training_files_4s = (path_to_mnist / 'training' / '4').ls().sorted()
mnist_training_files_5s = (path_to_mnist / 'training' / '5').ls().sorted()
mnist_training_files_6s = (path_to_mnist / 'training' / '6').ls().sorted()
mnist_training_files_7s = (path_to_mnist / 'training' / '7').ls().sorted()
mnist_training_files_8s = (path_to_mnist / 'training' / '8').ls().sorted()
mnist_training_files_9s = (path_to_mnist / 'training' / '9').ls().sorted()

# loading images
list_0s = [tensor(Image.open(digit)) for digit in mnist_training_files_0s]
list_1s = [tensor(Image.open(digit)) for digit in mnist_training_files_1s]
list_2s = [tensor(Image.open(digit)) for digit in mnist_training_files_2s]
list_3s = [tensor(Image.open(digit)) for digit in mnist_training_files_3s]
list_4s = [tensor(Image.open(digit)) for digit in mnist_training_files_4s]
list_5s = [tensor(Image.open(digit)) for digit in mnist_training_files_5s]
list_6s = [tensor(Image.open(digit)) for digit in mnist_training_files_6s]
list_7s = [tensor(Image.open(digit)) for digit in mnist_training_files_7s]
list_8s = [tensor(Image.open(digit)) for digit in mnist_training_files_8s]
list_9s = [tensor(Image.open(digit)) for digit in mnist_training_files_9s]

# stacking images into rank-3 tensors and normalizing them
stacked_0s = torch.stack(list_0s).float()/255
stacked_1s = torch.stack(list_1s).float()/255
stacked_2s = torch.stack(list_2s).float()/255
stacked_3s = torch.stack(list_3s).float()/255
stacked_4s = torch.stack(list_4s).float()/255
stacked_5s = torch.stack(list_5s).float()/255
stacked_6s = torch.stack(list_6s).float()/255
stacked_7s = torch.stack(list_7s).float()/255
stacked_8s = torch.stack(list_8s).float()/255
stacked_9s = torch.stack(list_9s).float()/255

# `torch.cat` concatenates tensors along the first dimension;
# then `view` reshapes the concatenated tensor into a rank-2 tensor,
# this new tensor has 28*28 columns and a number of rows equal to the number of images in the concatenated tensors;
# a 28*28 image is flattened into a 784 pixels vector
train_x = torch.cat([stacked_0s, stacked_1s, stacked_2s, stacked_3s, 
    stacked_4s, stacked_5s, stacked_6s, stacked_7s, 
    stacked_8s, stacked_9s]).view(-1, 28*28)
# let's label each image
train_y = tensor([0]*len(list_0s) + [1]*len(list_1s) + [2]*len(list_2s) + [3]*len(list_3s) + [4]*len(list_4s) + [5]*len(list_5s) + [6]*len(list_6s) + [7]*len(list_7s) + [8]*len(list_8s)+ [9]*len(list_9s)).unsqueeze(1)
# When indexed, a `Dataset` is required to return a tuple of `(x,y)`, where `x` is the input data and `y` is the label.
train_set = list(zip(train_x,train_y))

# let's prepare our valid set by using the MNIST testing set
mnist_valid_files_0s = (path_to_mnist / 'testing' / '0').ls().sorted()
mnist_valid_files_1s = (path_to_mnist / 'testing' / '1').ls().sorted()
mnist_valid_files_2s = (path_to_mnist / 'testing' / '2').ls().sorted()
mnist_valid_files_3s = (path_to_mnist / 'testing' / '3').ls().sorted()
mnist_valid_files_4s = (path_to_mnist / 'testing' / '4').ls().sorted()
mnist_valid_files_5s = (path_to_mnist / 'testing' / '5').ls().sorted()
mnist_valid_files_6s = (path_to_mnist / 'testing' / '6').ls().sorted()
mnist_valid_files_7s = (path_to_mnist / 'testing' / '7').ls().sorted()
mnist_valid_files_8s = (path_to_mnist / 'testing' / '8').ls().sorted()
mnist_valid_files_9s = (path_to_mnist / 'testing' / '9').ls().sorted()

list_0s_valid = [tensor(Image.open(digit)) for digit in mnist_valid_files_0s]
list_1s_valid = [tensor(Image.open(digit)) for digit in mnist_valid_files_1s]
list_2s_valid = [tensor(Image.open(digit)) for digit in mnist_valid_files_2s]
list_3s_valid = [tensor(Image.open(digit)) for digit in mnist_valid_files_3s]
list_4s_valid = [tensor(Image.open(digit)) for digit in mnist_valid_files_4s]
list_5s_valid = [tensor(Image.open(digit)) for digit in mnist_valid_files_5s]
list_6s_valid = [tensor(Image.open(digit)) for digit in mnist_valid_files_6s]
list_7s_valid = [tensor(Image.open(digit)) for digit in mnist_valid_files_7s]
list_8s_valid = [tensor(Image.open(digit)) for digit in mnist_valid_files_8s]
list_9s_valid = [tensor(Image.open(digit)) for digit in mnist_valid_files_9s]

stacked_0s_valid = torch.stack(list_0s_valid).float()/255
stacked_1s_valid = torch.stack(list_1s_valid).float()/255
stacked_2s_valid = torch.stack(list_2s_valid).float()/255
stacked_3s_valid = torch.stack(list_3s_valid).float()/255
stacked_4s_valid = torch.stack(list_4s_valid).float()/255
stacked_5s_valid = torch.stack(list_5s_valid).float()/255
stacked_6s_valid = torch.stack(list_6s_valid).float()/255
stacked_7s_valid = torch.stack(list_7s_valid).float()/255
stacked_8s_valid = torch.stack(list_8s_valid).float()/255
stacked_9s_valid = torch.stack(list_9s_valid).float()/255

# now, let's put together our validation set
valid_x = torch.cat([stacked_0s_valid, stacked_1s_valid, stacked_2s_valid, stacked_3s_valid, 
    stacked_4s_valid, stacked_5s_valid, stacked_6s_valid, stacked_7s_valid, 
    stacked_8s_valid, stacked_9s_valid]).view(-1, 28*28)
valid_y = tensor([0]*len(list_0s_valid) + [1]*len(list_1s_valid) + [2]*len(list_2s_valid) + [3]*len(list_3s_valid) + [4]*len(list_4s_valid) + [5]*len(list_5s_valid) + [6]*len(list_6s_valid) + [7]*len(list_7s_valid) + [8]*len(list_8s_valid)+ [9]*len(list_9s_valid)).unsqueeze(1)
valid_set = list(zip(valid_x,valid_y))

NVIDIA GeForce RTX 3070 Laptop GPU


In [27]:
# let's define our linear model
# The equation `batch @ weights + bias` is a fundamental equation of any neural network.
def linear_sgd_model(independent_variables): return (independent_variables@weights + bias)

# let's define a metric to measure the accuracy of our model
def batch_accuracy(preds, targets):
    probs = torch.softmax(preds, dim=1)
    preds_labels = torch.argmax(probs, dim=1)
    true_labels = targets.view(preds_labels.shape)
    correct_count = (preds_labels == true_labels).sum().item()
    return tensor([correct_count / len(true_labels)])

# let's define our loss function
def mnist_loss(preds, targets):
    # flatten the targets tensor to make it one-dimensional
    targets = targets.flatten()
    # measures the dissimilarity between the predicted probability distribution and the true distribution, aiming to minimize this difference
    loss_function = CrossEntropyLoss()
    return loss_function(preds, targets.long())

#  function to calculate the gradient of the loss
def calc_grad(xb, yb, model):
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()

# defining training and validation functions
def train_epoch(model, lr, params):
    for xb,yb in training_data_loader:
        calc_grad(xb, yb, model)
        for p in params:
            p.data -= p.grad*lr
            p.grad.zero_()

def validate_epoch(model):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in validation_data_loader]
    return round(torch.stack(accs).mean().item(), 4)

# let's initialize our parameters (weights) for every pixel with random values
# this function returns a tensor of size `size` filled with random values from a normal distribution with a standard deviation of `std`
def init_params(size, std=1.0): return (torch.randn(size)*std).requires_grad_()
# let's initialize our weights with 10 output nodes, one for each digit
weights = init_params((28*28,10))
# the bias as well will be subject to gradient descent
bias = init_params(1)

# let's create datas loaders for training and validation sets
training_data_loader = DataLoader(train_set, batch_size=64)
validation_data_loader = DataLoader(valid_set, batch_size=64)

for i in range(299):
    params = weights,bias
    train_epoch(linear_sgd_model, 0.001, params)
    # validate_epoch(linear_model)
    # print(validate_epoch(linear_sgd_model), end='\n')

# printing the last accuracy achieved
print(validate_epoch(linear_sgd_model), end=' ')


0.8538 

Reaching almost 86% accuracy with a simple linear SGD classifier for all digits is not bad at all!