The MNIST database of handwritten digits.

label
call_split
bar_chart
70,000 items
Value
arrow_drop_up
Count
0
6,903
1
7,877
2
6,990
3
7,141
4
6,824
5
6,313
6
6,876
7
7,293
8
6,825
9
6,958
split
call_split
bar_chart
70,000 items
Value
arrow_drop_up
Count
test
10,000
train


In [5]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import scipy.stats as stats
from tensorflow.keras.callbacks import EarlyStopping


# You cannot modify from here until it is indicated by a comment
(test_data),test_data_info=tfds.load('mnist',split='test',with_info=True,as_supervised=True)

(train_data),ds_info=tfds.load('mnist',split=['train[10000:45000]'],with_info=True,as_supervised=True)

def getnewtst():
  (new_test),new_test_info=tfds.load('mnist',split=['train[0:9999]'],with_info=True,as_supervised=True)
  new_test = new_test[0].map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
  new_test = new_test.batch(64)
  new_test = new_test.cache()
  new_test = new_test.prefetch(tf.data.AUTOTUNE)
  return new_test


# Can modify code now below this comment

def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`.
  The model wants the float and tfds gives you 0-255."""
  return tf.cast(image, tf.float32) / 255., label


train_data = train_data[0].map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
train_data = train_data.cache()
train_data = train_data.shuffle(ds_info.splits['train'].num_examples)
train_data = train_data.batch(64)
train_data = train_data.prefetch(tf.data.AUTOTUNE)


test_data = test_data.map(normalize_img, num_parallel_calls=tf.data.AUTOTUNE)
test_data = test_data.batch(128)
test_data = test_data.cache()
test_data = test_data.prefetch(tf.data.AUTOTUNE)

# in order to improve the performance, I increased the complexity of the convolutional neural network. By adding more convolutional and dense layers, the network becomes deeper and can learn more patterns and features. With a more complex architecture, the network has the potential to achieve better accuracy by capturing more detailed information from the input images.

# I incorporated Dropout into the model. Which randomly sets a fraction rate of input units to 0 at each update during training time, this helps prevent overfitting. The fraction rate is defined as the parameter in Dropout().


model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, kernel_size=(3, 3), 
                           activation='relu'
                           ),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])


# the previous optimizer used was a stochastic gradient descent with a momentum of 0.3. I am trying Adam as an optimizer, since it often provides better convergence and performance. Adam optimizer uses adaptive learning rates and momentum, which can lead to faster convergence and potentially better accuracy.

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)


# the code previously trained the model for 7 epochs. I increased the number of epochs to allow the model more training iterations. Increasing the number of epochs allows the model to see the data multiple times, which can lead to improved accuracy as the model learns from the data more extensively.

# increasing the number of epochs gives the model with more opportunities to learn from the data, however it also increases the risk of overfitting. Overfitting happens when the model starts to memorize the training data, which leads to high accuracy on the training set but poor performance on unseen data. To mitigate this, I introduced an early stopping callback. Early stopping monitors, in this case 'val_loss', across epochs during training. If the model's performance on the validation set doesn't improve for a specified number of epochs, or 'patience', the training is halted. This prevents overfitting and reduces computational cost by avoiding unnecessary epochs of training.

early_stopping = EarlyStopping(monitor='val_loss', patience=3)

model.fit(
    train_data,
    epochs=15,
    validation_data=test_data,
    callbacks=[early_stopping]
)


results = model.evaluate(getnewtst())
print("test loss, test acc:", results)


# I introduced a function to calculate the confidence interval, which helps determine the bounds of accuracy the model can achieve. using the Z-score for 95% confidence interval
def compute_confidence_interval(accuracy, n):
    z = stats.norm.ppf(1 - (1 - 0.95) / 2)
    interval = z * np.sqrt((accuracy * (1 - accuracy)) / n)
    lower_bound = accuracy - interval
    upper_bound = accuracy + interval
    return lower_bound, upper_bound


# confidence interval
accuracy = results[1]  # Test accuracy
n = 10000  # Number of test examples
lower_bound, upper_bound = compute_confidence_interval(accuracy, n)
print(f"95% confidence interval for the accuracy: [{lower_bound:.4f}, {upper_bound:.4f}]")



Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
test loss, test acc: [0.05394825339317322, 0.9885988831520081]
95% confidence interval for the accuracy: [0.9865, 0.9907]
