# MNIST Accuracy = 99.79%
It's amazing that convolutional neural networks can classify handwritten digits so accurately. In this notebook, we witness an ensemble of 15 CNNs classify MNIST's 10,000 test images after training on MNIST's 60,000 training images plus 25 million more images created by rotating, scaling, and shifting MNIST's training images. Learning from 25,060,000 images, this ensemble of CNNs achieves 99.79% classification accuracy (with average accuracy 99.745% and standard deviation of 0.020 as indicated by 100 trials). This accuracy revivals the best to date. This notebook uses ideas from the best published models found on the internet. Advanced techniques include data augmentation, nonlinear convolution layers, learnable pooling layers, ReLU activation, ensembling, bagging, decaying learning rates, dropout, batch normalization, and adam optimization.

More information about this ensemble of CNNs can be found [here][1].
[1]:https://www.kaggle.com/cdeotte/25-million-images-0-99757-mnist

In [13]:
import os
import json
import numpy as np
from skimage import io, color

from myutils import *

In [2]:
def images_to_numpy_array(folder_path):
    image_arrays = []
    labels = []

    # Iterate through each folder (assuming each folder represents a label)
    for label in os.listdir(folder_path):
        label_path = os.path.join(folder_path, label)
        if not os.path.isdir(label_path):
            continue

        # Read images from each folder
        for filename in os.listdir(label_path):
            image_path = os.path.join(label_path, filename)
            if os.path.isfile(image_path) and filename.endswith(('.png')):
                # Open image using scikit-image
                image = io.imread(image_path, as_gray=True)

                # Convert image to numpy array
                image_array = np.array(image)

                # Append to list
                image_arrays.append(image_array)
                labels.append(label.replace('_', ''))
                # print(f'Image label: {label.replace("_", "")}, image_file: {image_path}')

    # Convert lists to numpy arrays
    image_arrays = np.array(image_arrays)
    labels = np.array(labels)

    return image_arrays, labels

In [3]:
import os
import datetime
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.dataloader import default_collate
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torchvision import transforms

from myutils import *
from keras.preprocessing.image import ImageDataGenerator

In [4]:
# Load images and labels
folder_path = "dataset"
images, eng_labels = images_to_numpy_array(folder_path)

# Normalize images and encode labels
images = images.astype("float32") / 255.0
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(eng_labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Create a dictionary of labels and their inverse transformed values
label_dict = [label_encoder.inverse_transform([label])[0] for label in set(labels)]

# Write the dictionary to a file
with open('labels.list', 'w') as file:
    json.dump(label_dict, file)

In [5]:
# LOAD LIBRARIES
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# USE KERAS WITH DEFAULT TENSORFLOW BACKEND
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras.datasets import mnist

# Load MNIST's 60,000 training images

In [6]:
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

In [7]:
# PREPARE DATA FOR NEURAL NETWORK
X_train = x_train / 255.0
X_test = x_test / 255.0
X_train = X_train.reshape(-1,32,32,1)
X_test = X_test.reshape(-1,32,32,1)
Y_train = to_categorical(y_train, num_classes = NUM_LABELS)

# Generate 25 million more images!!
by randomly rotating, scaling, and shifting MNIST's 60,000 training images.

In [8]:
# CREATE MORE IMAGES WITH DATA AUGMENTATION
datagen = ImageDataGenerator(
        rotation_range=15,
        zoom_range = 0.15,  
        width_shift_range=0.1, 
        height_shift_range=0.1)

In [9]:
# BUILD CONVOLUTIONAL NEURAL NETWORKS
nets = NUM_CNNS
model = [0] * nets
for j in range(nets):
    model[j] = create_TF_CNN()

2024-02-17 21:04:34.258296: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2024-02-17 21:04:34.258321: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 36.00 GB
2024-02-17 21:04:34.258324: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 13.50 GB
2024-02-17 21:04:34.258367: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-02-17 21:04:34.258387: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


# Train 15 CNN

In [10]:
# DECREASE LEARNING RATE BY 0.95 EACH EPOCH
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)

# TRAIN CNNs AND DISPLAY ACCURACIES
epochs = 30
history = [0] * nets
results = [0] * nets
for j in range(nets):
    X_train2, X_val2, Y_train2, Y_val2 = train_test_split(X_train, Y_train, test_size = 0.1)
    history[j] = model[j].fit(datagen.flow(X_train2,Y_train2, batch_size=64),
      epochs = epochs, steps_per_epoch = X_train2.shape[0]//64,
      validation_data = (X_val2,Y_val2), callbacks=[annealer], verbose=0)
    print(f'final epoch loss:',history[0].history['loss'][-1])
    print(f'final epoch accuracy:',history[0].history['accuracy'][-1])
  
    # predict
    results[j] = model[j].predict(X_test)
    results2 = np.argmax(results[j],axis = 1)

    # calc accuracy
    c=0
    for i in range(len(X_test)):
        if results2[i]!=y_test[i]:
            c +=1
    print("CNN %d: Test accuracy = %f" % (j+1,1-c/len(X_test)))

2024-02-17 21:04:35.412031: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


KeyboardInterrupt: 

In [20]:
X_test.shape

(2072, 32, 32, 1)

In [26]:
# predict
results2 = np.zeros((X_test.shape[0],NUM_LABELS))
for j in range(nets):
    results2 = results2 + results[j]
results2 = np.argmax(results2, axis = 1)

# calculate accuracy on test fold
c=0
for i in range(X_test.shape[0]):
    if results2[i]!=y_test[i]:
        c +=1
print("Ensemble Accuracy = %f" % (1-c/X_test.shape[0]))

Ensemble Accuracy = 0.013514


In [None]:
print(y_test.shape)

(2072,)
