<a href="https://colab.research.google.com/github/SimeonHristov99/ML_21-22/blob/main/ann_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST Digit Classification Challenge
- Goal: Classify handwritten digits
- Type: Multiclass classification

## Imports and Constants

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

from tensorflow.keras.datasets.mnist import load_data

from tensorflow.keras.layers import Dense, Input

from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam

In [None]:
FIGSIZE = (12, 10)

plt.rc('figure', figsize=FIGSIZE)
sns.set_style('whitegrid')

## Get the data

In [None]:
(X_train, y_train), (X_test, y_test) = load_data()

assert X_train.shape == (60000, 28, 28)
assert X_test.shape == (10000, 28, 28)
assert y_train.shape == (60000,)
assert y_test.shape == (10000,)

In [None]:
X_train

## Exploratory Data Analysis

In [None]:
pd.Series(y_train).value_counts()

In [None]:
plt.imshow(X_train[0], cmap='gray')
plt.axis('off')
plt.show()

In [None]:
def plot_random(n):
  indices = np.random.choice(X_train.shape[0], n, replace=False)

  for i in indices:
    plt.title(y_train[i])
    plt.imshow(X_train[i], cmap='gray')
    plt.axis('off')
    plt.show()

plot_random(5)

## Preprocessing

In [None]:
def preprocess_inputs(data):
  # since we want to pass a whole image to a NN
  # that image must be one vector, not a matrix
  data = data.reshape(-1, 28 * 28)

  # Scale values to be between 0 and 1
  data = data.astype(float) / 255.0

  return data

In [None]:
X_train = preprocess_inputs(X_train)
X_test = preprocess_inputs(X_test)

In [None]:
X_train.shape, X_test.shape

## Model building (using the [Sequential API](https://www.tensorflow.org/guide/keras/sequential_model))

In [None]:
model = tf.keras.Sequential([
    Input(X_train.shape[1]),
    Dense(512, activation='relu'),
    Dense(256, activation='relu'),
    Dense(10)
])

# Another way of doing the same
# model = tf.keras.Sequential()
# model.add(Input(X_train.shape[1]))
# model.add(Dense(512, activation='relu'))
# model.add(Dense(256, activation='relu'))
# model.add(Dense(10))

model.summary()

In [None]:
model.compile(
    loss=SparseCategoricalCrossentropy(from_logits=True),
    optimizer=Adam(),
    metrics=['accuracy']
)

history = model.fit(
    X_train,
    y_train,
    epochs=5,
    validation_split=0.30,
    batch_size=32,
    verbose=2
)

### Introducing the validation split! From now on tune hyperparameters only on the validation set. Use the training set only when you achieve maximum validation accuracy!

In [None]:
loss, acc = model.evaluate(
    X_test,
    y_test,
    batch_size=32,
    verbose=2
)

In [None]:
loss

In [None]:
acc

### But what are logits?

In [None]:
logits = model(X_train[0].reshape(1, -1))[0]
logits

In [None]:
# logits don't sum up to 1
tf.reduce_sum(logits)

In [None]:
# tf.keras.activations.softmax expects a batch of images
batch_one_image = logits[tf.newaxis, ...]
batch_one_image_probs = tf.keras.activations.softmax(batch_one_image)
batch_one_image_probs

In [None]:
tf.argmax(batch_one_image_probs[0])

In [None]:
tf.argmax(batch_one_image_probs[0]).numpy()

In [None]:
y_train[0]

In [None]:
y_true = y_train[0]
y_pred = model(X_train[0].reshape(1, -1))[0]

In [None]:
y_true

In [None]:
y_pred

In [None]:
scce = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
scce(y_true, y_pred).numpy()

### Plot the training and testing accuracies

In [None]:
history.history.keys()

#### Summarize history for accuracy

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')

plt.show()

#### Summarize history for loss

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')

plt.show()

From both plots we can see that we have overfitting after epoch 4. That means that we should only train for a maximum of three epochs.

## Model building (using the [Functional API](https://www.tensorflow.org/guide/keras/functional))

In [None]:
# Using the Functional API
inputs = Input(X_train.shape[1])

x = Dense(512, activation='relu', name='first_layer')(inputs)
x = Dense(256, activation='relu', name='second_layer')(x)
outputs = Dense(10, activation='softmax')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.summary()

In [None]:
model.compile(
    loss=SparseCategoricalCrossentropy(),
    optimizer=Adam(),
    metrics=['accuracy']
)

history = model.fit(
    X_train,
    y_train,
    epochs=5,
    validation_split=0.3,
    batch_size=32,
    verbose=2
)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')

plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')

plt.legend(['train', 'test'], loc='upper left')

plt.show()

In [None]:
model.evaluate(
    X_test,
    y_test,
    batch_size=32,
    verbose=2
)