<a href="https://colab.research.google.com/github/tungmqse/emnist-solution/blob/master/emnist_solution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install keras + idx2numpy
!pip install keras
!pip install idx2numpy

Collecting idx2numpy
  Downloading https://files.pythonhosted.org/packages/23/6b/abab4652eb249f432c62431907c8de32bdcedb5abdf869ff86653efff981/idx2numpy-1.2.2.tar.gz
Building wheels for collected packages: idx2numpy
  Building wheel for idx2numpy (setup.py) ... [?25l[?25hdone
  Created wheel for idx2numpy: filename=idx2numpy-1.2.2-cp36-none-any.whl size=8032 sha256=1e2e7722cde386a485351de98120274c217f825ff53a1251441b8894f501e92d
  Stored in directory: /root/.cache/pip/wheels/7a/b5/69/3e0757b3086607e95db70661798fdf98a77a0bb79c54e1f320
Successfully built idx2numpy
Installing collected packages: idx2numpy
Successfully installed idx2numpy-1.2.2


In [2]:
# Import libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import idx2numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, BatchNormalization
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint

Using TensorFlow backend.


In [0]:
root_path = '/content'

In [4]:
# Use google drive to store data

from google.colab import drive
drive.mount('/content/gdrive')
os.chdir('/content/gdrive/My Drive')
project_name = 'EMNIST-Project'
if not os.path.exists(project_name):
  !mkdir EMNIST-Project
root_path = '/content/gdrive/My Drive/{}'.format(project_name)
os.chdir(root_path)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
# Check if there's file, we do not download anymore
if (not os.path.isfile("gzip.zip")):
  !wget http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip

# Check there's gzip folder, we do not unzip anymore
if (not os.path.isdir("gzip")):
  !unzip gzip.zip
  !gunzip "{root_path}/gzip/emnist-balanced-train-images-idx3-ubyte.gz"
  !gunzip "{root_path}/gzip/emnist-balanced-train-labels-idx1-ubyte.gz"
  !gunzip "{root_path}/gzip/emnist-balanced-test-images-idx3-ubyte.gz"
  !gunzip "{root_path}/gzip/emnist-balanced-test-labels-idx1-ubyte.gz"

In [0]:
# Read data from ubyte & then convert them to numpy arrays
train_image_file = '{}/gzip/emnist-balanced-train-images-idx3-ubyte'.format(root_path)
train_label_file = '{}/gzip/emnist-balanced-train-labels-idx1-ubyte'.format(root_path)
test_image_file = '{}/gzip/emnist-balanced-test-images-idx3-ubyte'.format(root_path)
test_label_file = '{}/gzip/emnist-balanced-test-labels-idx1-ubyte'.format(root_path)

train_images = idx2numpy.convert_from_file(train_image_file)
train_labels = idx2numpy.convert_from_file(train_label_file)
test_images = idx2numpy.convert_from_file(test_image_file)
test_labels = idx2numpy.convert_from_file(test_label_file)

In [0]:
# Normalize the image values to have range [0, 255]
train_images = train_images / 255
test_images = test_images / 255

# Reshape the matrix of train & test images to (Number of rows, width, height, layers)
train_images = train_images.reshape((-1, train_images.shape[1], train_images.shape[2], 1))
test_images = test_images.reshape((-1, test_images.shape[1], test_images.shape[2], 1))

In [8]:
print('train_images.shape', train_images.shape)
print('train_labels.shape', train_labels.shape)
print('test_images.shape', test_images.shape)
print('test_labels.shape', test_labels.shape)

train_images.shape (112800, 28, 28, 1)
train_labels.shape (112800,)
test_images.shape (18800, 28, 28, 1)
test_labels.shape (18800,)


In [0]:
# Model structure
# Layer (type)                 Output Shape              Param #   
# =================================================================
# conv2d_1 (Conv2D)            (None, 28, 28, 32)        832       
# _________________________________________________________________
# conv2d_2 (Conv2D)            (None, 28, 28, 32)        25632     
# _________________________________________________________________
# max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
# _________________________________________________________________
# dropout_1 (Dropout)          (None, 14, 14, 32)        0         
# _________________________________________________________________
# conv2d_3 (Conv2D)            (None, 14, 14, 64)        18496     
# _________________________________________________________________
# conv2d_4 (Conv2D)            (None, 14, 14, 64)        36928     
# _________________________________________________________________
# max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
# _________________________________________________________________
# dropout_2 (Dropout)          (None, 7, 7, 64)          0         
# _________________________________________________________________
# conv2d_5 (Conv2D)            (None, 7, 7, 128)         73856     
# _________________________________________________________________
# conv2d_6 (Conv2D)            (None, 7, 7, 256)         295168    
# _________________________________________________________________
# flatten_1 (Flatten)          (None, 12544)             0         
# _________________________________________________________________
# dense_1 (Dense)              (None, 256)               3211520   
# _________________________________________________________________
# dropout_3 (Dropout)          (None, 256)               0         
# _________________________________________________________________
# dense_2 (Dense)              (None, 47)                12079     
# =================================================================
# Total params: 3,674,511
# Trainable params: 3,674,511
# Non-trainable params: 0
# _________________________________________________________________

# Build model
num_classes = 47
img_size = 28
epochs = 200
batch_size = 512
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu', input_shape = (img_size,img_size,1)))
model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))


model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))


model.add(Conv2D(filters = 128, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))
model.add(Conv2D(filters = 256, kernel_size = (3,3),padding = 'Same', 
                 activation ='relu'))

model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation = 'softmax'))

# Use augmentation technique
datagen = ImageDataGenerator(
        zoom_range = 0.2, # Randomly zoom image 
        width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.2)  # randomly shift images vertically (fraction of total height)

# Fit training data to generator
datagen.fit(train_images)

# Create check point callback to save weight every epoch
# checkpoint_path = "gdrive/My Drive/Checkpoints/emnist-cp.ckpt"
checkpoint_path = "emnist-cp.ckpt"
cp_callback = ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

# Compile model, use adam optimizer, loss is calculated by categorical_crossentropy and check accuracy every epoch
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Load weights from last checkpoint if any
if (os.path.isfile(checkpoint_path)):
  model.load_weights(checkpoint_path)
history = model.fit_generator(datagen.flow(train_images, to_categorical(train_labels), batch_size=batch_size), 
                              epochs=epochs, validation_data=(test_images, to_categorical(test_labels)), 
                              callbacks=[cp_callback])

Epoch 1/200

Epoch 00001: saving model to emnist-cp.ckpt
Epoch 2/200

Epoch 00002: saving model to emnist-cp.ckpt
Epoch 3/200

Epoch 00003: saving model to emnist-cp.ckpt
Epoch 4/200

Epoch 00004: saving model to emnist-cp.ckpt
Epoch 5/200

Epoch 00005: saving model to emnist-cp.ckpt
Epoch 6/200

Epoch 00006: saving model to emnist-cp.ckpt
Epoch 7/200

In [0]:
# Save model
saved_model_file_name = 'model-final.h5'
if (not os.path.isfile(saved_model_file_name)):
  model.save_weights(saved_model_file_name)

In [0]:
start_index = 0
end_index = 500
predictions = model.predict(test_images[start_index:end_index])
# print(np.argmax(predictions, axis = 1))
# print(test_labels[start_index:end_index])
wrong_prediction_count = 0
for i in range(start_index, end_index):
  if test_labels[i] == np.argmax(predictions, axis = 1)[i-start_index]:
    continue
  first_image = test_images[i]
  first_image = np.array(first_image, dtype='float')
  pixels = first_image.reshape((28,28))
  pixels = pixels
  plt.imshow(pixels, cmap='gray')
  plt.show()
  wrong_prediction_count += 1
  print('label: ' + str(test_labels[i]) + ' - predict: ' + str(np.argmax(predictions, axis = 1)[i-start_index]))
print('Total wrong predictions: {}'.format(wrong_prediction_count))

In [0]:
# Validate on unbalanced dataset

# !gunzip "{root_path}/gzip/emnist-bymerge-test-images-idx3-ubyte.gz"
# !gunzip "{root_path}/gzip/emnist-bymerge-test-labels-idx1-ubyte.gz"
# unbalanced_image_file = '{}/gzip/emnist-bymerge-test-images-idx3-ubyte'.format(root_path)
# unbalanced_label_file = '{}/gzip/emnist-bymerge-test-labels-idx1-ubyte'.format(root_path)
# unbalanced_images = idx2numpy.convert_from_file(unbalanced_image_file)
# unbalanced_labels = idx2numpy.convert_from_file(unbalanced_label_file)

# unbalanced_images = unbalanced_images / 255
# unbalanced_images = unbalanced_images.reshape((-1, unbalanced_images.shape[1], unbalanced_images.shape[2], 1))
# model.evaluate(unbalanced_images, to_categorical(unbalanced_labels))