In [1]:
# import numpy and pandas for data manipulation
import numpy as np
import pandas as pd

# import os to read system directory
import os

# import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, BatchNormalization, MaxPooling2D
from keras.utils import to_categorical

In [2]:
# get the current working directory
path = os.getcwd()

# read the training data
train_data_full = pd.read_csv(path+'/train.csv')

In [3]:
# get an overview of the training data
train_data_full.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 785 entries, label to pixel783
dtypes: int64(785)
memory usage: 251.5 MB


In [4]:
# Notice that the data type is int64, here we need to convert them into float32.
train_data_full = train_data_full.astype('float32')

In [5]:
# split the features and target from the training data
X_train_full = np.array(train_data_full.iloc[:,1:])
y_train_full = np.array(train_data_full.iloc[:,0])

In [6]:
# Notice that each image is 28 pixels in height and 28 pixels in width. To train the data using Keras, we need to reshape each image into a 3-dimentional array (28, 28, 1)
X_train_full = X_train_full.reshape(X_train_full.shape[0], 28, 28, 1)
print(X_train_full.shape)

(42000, 28, 28, 1)


In [7]:
# normalize the data
X_train = X_train_full / 255

In [8]:
# encode the labels to one-hot vectors
y_train = to_categorical(y_train_full)

In [9]:
# create a CNN model for image classification problem
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=X_train.shape[1:]))
model.add(MaxPooling2D((2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(MaxPooling2D((2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(10, activation='softmax'))

# compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
# get an overview of the network
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
batch_normalization (BatchNo (None, 13, 13, 32)        128       
_________________________________________________________________
dropout (Dropout)            (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 32)          0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 5, 5, 32)          1

In [11]:
# fit the model
model.fit(X_train, y_train, validation_split=0.2, epochs=30, batch_size=64)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x1a3d0f48>

In [12]:
# read the testing data
test_data_full = pd.read_csv(path+'/test.csv')

In [14]:
# convert the data into float32 type
test_data_full = test_data_full.astype('float32')

In [15]:
# normalize the data
X_test_full = np.array(test_data_full) / 255

In [16]:
# reshape the testing images into 3-dimentional arrays
X_test = X_test_full.reshape(X_test_full.shape[0], 28, 28, 1)
print(X_test.shape)

(28000, 28, 28, 1)


In [18]:
# predict the result for the testing images
y_test = model.predict_classes(X_test)

In [19]:
res = pd.DataFrame(y_test, columns=['label'])

In [20]:
# create the ImageId
Id = pd.DataFrame(np.arange(1, y_test.shape[0]+1), columns=['ImageId'])

In [21]:
# export the result as a csv file
pd.concat([Id, res], axis=1).to_csv('predictions.csv', index=False)