## MNIST Classifier for all digits

In [1]:
#hide
from utils import *
from fastai2.vision.widgets import *

## MNIST NN For all Digits

As part of this exercise We will try to create a neural network that can identify the MNIST character dataset from several digits using Stochastic Gradient Descent using Jeremy's approach

In [2]:
# Data set download
path = untar_data(URLs.MNIST)
Path.BASE_PATH = path

In [3]:
# Check data set
(path/'training').ls()

(#10) [Path('training/0'),Path('training/2'),Path('training/9'),Path('training/8'),Path('training/7'),Path('training/1'),Path('training/5'),Path('training/4'),Path('training/6'),Path('training/3')]

In [4]:
# Create a new path for the training and the validation datsets
training = (path/'training').ls().sorted()
validation = (path/'testing').ls().sorted()
training

(#10) [Path('training/0'),Path('training/1'),Path('training/2'),Path('training/3'),Path('training/4'),Path('training/5'),Path('training/6'),Path('training/7'),Path('training/8'),Path('training/9')]

In [5]:
# Load all the image paths according to the data in the training set
training_paths = [o.ls().sorted() for o in training]

# Do the same for the validation data set
validation_paths = [o.ls().sorted() for o in validation]

In [6]:
# Load all the training data paths into 1 list for transformations
training_data = []
for image_path in training_paths:
    training_data += image_path.sorted()

# Do the same for validation dataset
validation_data = []
for image_path in validation_paths:
    validation_data += image_path.sorted()

In [7]:
# Stack all images and convert them into a tensor of images as well as
# creating float values for the images
train_tensors = [tensor(Image.open(o)) for o in training_data]
train_x_stacked = torch.stack(train_tensors).float()/255

# Same operation for validation dataset
valid_tensors = [tensor(Image.open(o)) for o in validation_data]
valid_x_stacked = torch.stack(valid_tensors).float()/255

In [8]:
### Image represented inside our stacked tensors
df = pd.DataFrame(train_x_stacked[57000,:,2:24])
df.style.set_properties(**{'font-size':'6pt'}).background_gradient('Greys')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.184314,0.427451,0.427451,0.427451,0.431373,0.427451,0.427451,0.184314,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.207843,0.286275,0.529412,0.85098,0.85098,0.85098,0.913725,0.988235,0.988235,0.988235,0.992157,0.988235,0.988235,0.580392,0.0


In [9]:
# This function will concatenate all images into 1 tensor and will make it a rank 2
# tensor 1st is is the image number 2nd will be the image decomposed into a vector
train_x = train_x_stacked.view(-1, 28*28)
train_x.shape

# Perform same preparation for validation data
valid_x = valid_x_stacked.view(-1, 28*28)
valid_x.shape

torch.Size([10000, 784])

In [10]:
# Define the labels of the training set
train_y = tensor([])
for image_paths in training_paths:
    label = re.findall("\d", str(image_paths[0].parent))
    label_tensor = tensor([float(label[0])]*len(image_paths))
    train_y = torch.cat((train_y, label_tensor),0) 
    
# Define the labels of the validation dataset
valid_y = tensor([])
for image_paths in validation_paths:
    label = re.findall("\d", str(image_paths[0].parent))
    label_tensor = tensor([float(label[0])]*len(image_paths))
    valid_y = torch.cat((valid_y, label_tensor),0)

In [11]:
train_y = train_y.type(torch.LongTensor)
valid_y = valid_y.type(torch.LongTensor)

In [12]:
# Creating a tuple which will contain the image vector
# and the label assiged and will be matched based on the index of the
# original 2 tensors
train_dset = list(zip(train_x,train_y))

# Finalize validation dataset
valid_dset = list(zip(valid_x,valid_y))
len(train_dset)

60000

In [13]:
train_dset[1000]

(tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0

In [14]:
## This function will return the mean of the different labels passed

# We've incorprated the softmax process because our loss function
# expects values between 0 and 1 and all the different predictions should be between 0 and 1
# and the softmax will ensure all values are squeezed into 0 and 1 and all predictions also are 
# in that range

def mnist_loss(predictions, targets):
    predictions = torch.log_softmax(predictions,1)
    return F.nll_loss(predictions, targets)

In [15]:
## This accuracy function will calculate the accuracy comparing the max values of the
# inputs using argmax to get the max values of the passed inputs and then checking it agaist the
# targets and finally calculate the mean

def batch_accuracy(xb, yb):
    xb = np.argmax(xb, 1)
    correct = (xb==yb)
    return correct.float().mean()

In [16]:
# In order to perform training more efficiently we will
# be using a data loader which is a way for us manage the data in the
# training and validation set as well as creation of shuffled minibatches
# for training in SGD

dl = DataLoader(train_dset, batch_size=256, shuffle=True)
valid_dl = DataLoader(valid_dset, batch_size=256, shuffle=True)

# Next we will crate a learner which needs a DataLoaders class that
# holds our training and validation dataset used for our model
dls = DataLoaders(dl, valid_dl)

This is a very basic representation of a neural network
we have 2 lenar layers and a non linearity or actiation function
this can be used to train our model
nn.Linear is represented as:
```
  res = xb@w1 + b1
  res = res.max(tensor(0.0))
  res = res@w2 + b2
```
And similarly we can use an function in pytorch that represents this neural network

In [17]:
simple_net = nn.Sequential(
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30,10)
)

In [18]:
learn = Learner(dls, simple_net, opt_func=SGD,
                loss_func=mnist_loss, metrics=batch_accuracy)

In [19]:
learn.fit(40, 0.1)

epoch,train_loss,valid_loss,batch_accuracy,time
0,0.474929,0.40137,0.8876,00:05
1,0.345016,0.319639,0.9108,00:07
2,0.30273,0.289878,0.9182,00:09
3,0.286363,0.275512,0.9235,00:09
4,0.267609,0.259965,0.9269,00:10
5,0.248123,0.246209,0.9287,00:10
6,0.24112,0.229994,0.9352,00:08
7,0.224022,0.226563,0.9359,00:12
8,0.212422,0.214662,0.9372,00:11
9,0.202091,0.204211,0.9399,00:08
