## keras mnist手写识别 (18/06/08)

In [1]:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import numpy as np
np.random.seed(1337)
from keras.layers.core import Dense, Dropout, Activation


# minist 数据源加载
# sequential 可以封装各个神经网络层 包括dropout,conv2D 等等


Using TensorFlow backend.


In [2]:
# batch_size 太小会导致训练慢，过拟合等问题，太大会导致欠拟合。所以要适当选择
batch_size = 128
# 0-9手写数字一个有10个类别
num_classes = 10
# 12次完整迭代，差不多够了
epochs = 12

# 输入的图片是28*28像素的灰度图
img_rows, img_cols = 28, 28


# 训练集，测试集
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
# keras输入数据有两种格式，一种是通道数放在前面，一种是通道数放在后面，
# 其实就是格式差别而已
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

In [4]:
# 把数据变成float32更精确
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
X_train = x_train.reshape(60000,784)
X_test = x_test.reshape(10000,784)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)
X_train /= 255
X_test /= 255
print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# 把类别0-9变成2进制，方便训练
y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)

(60000, 784) (10000, 784) (60000,) (10000,)
x_train shape: (60000, 784)
60000 train samples
10000 test samples


In [5]:
# Sequential类可以灵活地插入不同的神经网络层
# 建立顺序型模型
model = Sequential()
# 模型需要知道输入数据的shape，
# 因此，Sequential的第一层需要接受一个关于输入数据shape的参数，
# 后面的各个层则可以自动推导出中间数据的shape，
# 因此不需要为每个层都指定这个参数

# 输入层有784个神经元
# 第一个隐层有512个神经元，激活函数为ReLu，Dropout比例为0.2
model.add(Dense(500, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dropout(0.2))

In [6]:
# 第二个隐层有512个神经元，激活函数为ReLu，Dropout比例为0.2
model.add(Dense(500))
model.add(Activation('relu'))
model.add(Dropout(0.2))


In [7]:
# 输出层有10个神经元，激活函数为SoftMax，得到分类结果
model.add(Dense(10))
model.add(Activation('softmax'))

In [8]:
# 输出模型的整体信息
# 总共参数数量为784*512+512 + 512*512+512 + 512*10+10 = 669706
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 500)               392500    
_________________________________________________________________
activation_1 (Activation)    (None, 500)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 500)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 500)               250500    
_________________________________________________________________
activation_2 (Activation)    (None, 500)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 500)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5010      
__________

In [11]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

history = model.fit(X_train, y_train,
                    batch_size = 200,
                    epochs = 20,
                    verbose = 1,
                    validation_data = (X_test, y_test))

score = model.evaluate(X_test, y_test, verbose=0)


# 输出训练好的模型在测试集上的表现
print('Test score:', score[0])
print('Test accuracy:', score[1])

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test score: 0.0672841819136
Test accuracy: 0.9846
