# Lesson 40: Deep Learning - MNIST example

This is "Hello World!" of Deep Learning: image recognition and classification.

## Import packages

In [1]:
import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds

## Data

### Loading, cleaning

In [2]:
# Datasets are registerd sets of data which are loaded by name. "as_supervised = True" means that the data 
# will be organized as two tuples: input and output.

mnist_dataset, mnist_info = tfds.load(name = "mnist", with_info = True, as_supervised = True)

mnist_train, mnist_test = mnist_dataset["train"], mnist_dataset["test"]

# By default, there is no validation set and we have to create it by ourselves.

num_validation_samples = 0.1 * mnist_info.splits["train"].num_examples

# At this point we cannot be sure if the validation sample is int type, so we convert it to int:

num_validation_samples = tf.cast(num_validation_samples, tf.int64)

# Let us create a variable dedicated to test samples:

num_test_samples = mnist_info.splits["test"].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)
mnist_info

tfds.core.DatasetInfo(
    name='mnist',
    full_name='mnist/3.0.1',
    description="""
    The MNIST database of handwritten digits.
    """,
    homepage='http://yann.lecun.com/exdb/mnist/',
    data_path='/home/alina/tensorflow_datasets/mnist/3.0.1',
    file_format=tfrecord,
    download_size=11.06 MiB,
    dataset_size=21.00 MiB,
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=uint8),
        'label': ClassLabel(shape=(), dtype=int64, num_classes=10),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'test': <SplitInfo num_examples=10000, num_shards=1>,
        'train': <SplitInfo num_examples=60000, num_shards=1>,
    },
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
)

### Preprocessing (scaling, shuffling, batching)

In [3]:
# Now we scale the numerical variables so that they are between 0 and 1 (now they correspond to grey scale numbers
# which are between 0 and 255), to make them more stable.

# Transforming function is:

def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /=  255.
    return image, label
    
scaled_train_and_validation_data = mnist_train.map(scale)

scaled_test_data = mnist_test.map(scale)

# Shuffling:

# "buffer_size" - used for huge data, because we cannot shuffle all data at once, and we need to shuffle them
# in packages:

# 1 < buffer_sze < num_samples - optimal way to shuffle data

BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

# Extracting validation and train data:

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

# Preparing for batching:

# We will be using "mini-batch gradient descent" method as it is the train accuracy and speed is most optimal.
# batch size = 1 is the stochastic gradient descent
# batch size = num_samples is the sigle batch gradient descent
# 1 < batch size < num_samples: mini-batch GD

# A number of samples taken to each batch:

BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)

# Batching is needed for the train_data as it needs backpropagation. 
# Validation_data will need forward propagation so batching is not needed.

# Note that batching is useful for updating weights, but doing it once per a batch is enough.

# Whenever we validate or test we simply forward propagate once. 
# When we are batching during training we find an average LOSS

# HOWEVER, the model needs to specify butching for validation_data and test_data:

validation_data = validation_data.batch(num_validation_samples)
test_data = scaled_test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))

# iter() makes the elements of validation_data iterable; next() - loads the next element of the iterable object.


## Model

### Outlining the model

In [4]:
input_size = 784
output_size = 10
hidden_layer_size = 300

# Our data is 28*28*1 each input.

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape = (28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation = "relu"),
    # tf.keras.layers.Dense(hidden_layer_size, activation = "sigmoid"),
    tf.keras.layers.Dense(hidden_layer_size, activation = "tanh"),
    tf.keras.layers.Dense(output_size, activation = "softmax")
])

### Optimizer and loss function

In [5]:
custom_optimizer = "adam"

model.compile(optimizer = custom_optimizer, loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])

### Training 

In [6]:
NUM_EPOCHS = 10

EARLY_STOPPING = tf.keras.callbacks.EarlyStopping(monitor = "val_loss", min_delta = 0, patience = 1)

model.fit(train_data, 
          epochs = NUM_EPOCHS, 
          callbacks = [EARLY_STOPPING],
          validation_data = (validation_inputs, validation_targets), 
          verbose = 2)

Epoch 1/10
540/540 - 12s - loss: 0.2316 - accuracy: 0.9305 - val_loss: 0.1169 - val_accuracy: 0.9658 - 12s/epoch - 23ms/step
Epoch 2/10
540/540 - 10s - loss: 0.0902 - accuracy: 0.9717 - val_loss: 0.0812 - val_accuracy: 0.9765 - 10s/epoch - 18ms/step
Epoch 3/10
540/540 - 9s - loss: 0.0596 - accuracy: 0.9810 - val_loss: 0.0483 - val_accuracy: 0.9855 - 9s/epoch - 16ms/step
Epoch 4/10
540/540 - 11s - loss: 0.0440 - accuracy: 0.9857 - val_loss: 0.0418 - val_accuracy: 0.9870 - 11s/epoch - 19ms/step
Epoch 5/10
540/540 - 10s - loss: 0.0340 - accuracy: 0.9889 - val_loss: 0.0352 - val_accuracy: 0.9890 - 10s/epoch - 18ms/step
Epoch 6/10
540/540 - 10s - loss: 0.0255 - accuracy: 0.9918 - val_loss: 0.0270 - val_accuracy: 0.9915 - 10s/epoch - 18ms/step
Epoch 7/10
540/540 - 11s - loss: 0.0222 - accuracy: 0.9923 - val_loss: 0.0199 - val_accuracy: 0.9938 - 11s/epoch - 20ms/step
Epoch 8/10
540/540 - 10s - loss: 0.0223 - accuracy: 0.9925 - val_loss: 0.0228 - val_accuracy: 0.9937 - 10s/epoch - 19ms/step


<keras.callbacks.History at 0x7fe480382f10>

### Testing

In [7]:
test_loss, test_accuracy = model.evaluate(test_data)



In [8]:
print("Test loss: {:.2f}. Test accuracy: {:.2f}%.".format(test_loss, test_accuracy * 100.))

Test loss: 0.08. Test accuracy: 97.87%.
