<a href="https://colab.research.google.com/github/wooihaw/ml_dl_comparison/blob/main/mnist_dl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST Classification using LeNet-5 (Deep Learning)

In [None]:
# Initialization
%matplotlib inline
from warnings import filterwarnings
filterwarnings('ignore')

In [None]:
# Import libraries
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import ConfusionMatrixDisplay, classification_report
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# ------------------------------------------------------------
# 1. Load and preprocess the MNIST dataset
# ------------------------------------------------------------
(X_train, y_train), (X_test, y_test) = datasets.mnist.load_data()

# Reshape to (samples, height, width, channels)
X_train = X_train.reshape(-1, 28, 28, 1).astype('float32')
X_test = X_test.reshape(-1, 28, 28, 1).astype('float32')

# Normalize pixel values (0–255 → 0–1)
X_train /= 255.0
X_test /= 255.0

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [None]:
# ------------------------------------------------------------
# 2. Define the LeNet-5 model
# ------------------------------------------------------------
model = models.Sequential([
    # Input layer
    layers.Input(shape=(28, 28, 1)),
    # C1: Convolutional layer
    layers.Conv2D(filters=6, kernel_size=(5, 5), activation='relu', padding='same'),
    # S2: Average pooling
    layers.MaxPooling2D(pool_size=(2, 2), strides=2),

    # C3: Convolutional layer
    layers.Conv2D(filters=16, kernel_size=(5, 5), activation='relu'),
    # S4: Average pooling
    layers.MaxPooling2D(pool_size=(2, 2), strides=2),

    # Flatten before entering fully connected layers
    layers.Flatten(),

    # C5: Fully connected layer
    layers.Dense(120, activation='relu'),
    # F6: Fully connected layer
    layers.Dense(84, activation='relu'),
    # Output layer
    layers.Dense(10, activation='softmax')
])

In [None]:
# ------------------------------------------------------------
# 3. Compile the model
# ------------------------------------------------------------
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Display model summary
model.summary()

In [None]:
# ------------------------------------------------------------
# 4. Train the model
# ------------------------------------------------------------
# Use EarlyStopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train, y_train,
                    epochs=15,
                    batch_size=128,
                    validation_split=0.1,
                    callbacks=[early_stop],
                    verbose=2)

In [None]:
# ------------------------------------------------------------
# 5. Evaluate the model
# ------------------------------------------------------------
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Accuracy: {test_acc:.4f}")

# ------------------------------------------------------------
# 6. Visualize training history
# ------------------------------------------------------------
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# ------------------------------------------------------------
# Confusion Matrix for CNN Model
# ------------------------------------------------------------
# Convert probabilities to predicted classes
y_pred = model.predict(X_test).argmax(axis=1)

# Convert one-hot test labels to integers
y_true = y_test.argmax(axis=1)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_true, y_pred))

# Plot confusion matrix
plt.figure(figsize=(10, 8))
ConfusionMatrixDisplay.from_predictions(y_true, y_pred, cmap='Blues')
plt.title("Confusion Matrix for LeNet-5 CNN on MNIST")
plt.show()