In [167]:
%matplotlib inline
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten
from keras.optimizers import SGD, Adam, RMSprop
from keras.utils import np_utils
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras import backend as K
import matplotlib.pyplot as plt

batch_size = 32 # number of samples per each training in the dataset
num_classes = 10 # number of Y
epochs = 10 # number of training; assign 30 or more for better accuracy
img_rows, img_cols = 28, 28 # pic format
input_shape = (img_rows, img_cols, 1) # input node

# 构建神经网络模型
def build_nn():
    # my CNN architechture is In -> [[Conv2D->relu]*2 -> MaxPool2D -> Dropout]*2 -> Flatten -> Dense -> Dropout -> Out

    model = Sequential()

    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                     activation ='relu', input_shape = (28,28,1)))
    model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'Same', 
                     activation ='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))


    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', 
                     activation ='relu'))
    model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
    model.add(Dropout(0.25))


    model.add(Flatten())
    model.add(Dense(256, activation = "relu"))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation = "softmax"))
    # Define the optimizer
    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
    # Compile the model
    model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])
    return model
    
def load_data(fpath='./data/train.csv', istest=False):
    train = pd.read_csv(fpath).values
    if not istest:
    #trainY = np_utils.to_categorical(train[:,0].astype('int32'), num_classes) # labels
        trainY = pd.get_dummies(train[:,0].astype('int32'))
        trainX = train[:, 1:].astype('float32') # Pixel values
    else:
        trainY = None
        trainX = train.astype('float32') # Pixel values
    trainX /= 255 # Normalize values for training
    return trainX, trainY

#trainX, trainY = load_data()

In [61]:
# turn data to 3D tensor with shape of (28, 28, 1)
trainX = trainX.reshape(trainX.shape[0], img_rows, img_cols, 1)

(42000, 28, 28, 1)

In [168]:
model = build_nn()
model.fit(
          trainX, trainY,
          batch_size=batch_size,
          epochs=epochs,
          verbose=2
         )
score = model.evaluate(trainX, trainY, verbose=0)
print('Train loss:', score[0])
print('Train accuracy:', score[1])

Epoch 1/10
 - 338s - loss: 0.1781 - acc: 0.9457
Epoch 2/10
 - 338s - loss: 0.0692 - acc: 0.9800
Epoch 3/10
 - 338s - loss: 0.0599 - acc: 0.9828
Epoch 4/10
 - 338s - loss: 0.0587 - acc: 0.9849
Epoch 6/10
 - 336s - loss: 0.0624 - acc: 0.9844
Epoch 7/10
 - 340s - loss: 0.0582 - acc: 0.9850
Epoch 8/10
 - 337s - loss: 0.0670 - acc: 0.9840
Epoch 9/10
 - 339s - loss: 0.0679 - acc: 0.9838
Epoch 10/10
 - 337s - loss: 0.0710 - acc: 0.9835
Train loss: 0.031175731078686937
Train accuracy: 0.9904285714285714


In [86]:
# predication
testX, _  = load_data('./data/test.csv', istest=True)
testX = testX.reshape(testX.shape[0], img_rows, img_cols, 1)
# Predict with trained model and record results on csv file
testY = model.predict_classes(testX, verbose=2)
pd.DataFrame({"ImageId": list(range(1,len(testY)+1)),
              "Label": testY}
            ).to_csv('digit_recognizer_result.csv', index=False, header=True)