# keras 快速开始
- 以mnist 数据集和CNN算法为例

In [1]:
# GPU 使用配置
# import os
# import tensorflow as tf
# from keras.backend.tensorflow_backend import set_session

# os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# config = tf.ConfigProto()
# config.gpu_options.per_process_gpu_memory_fraction = 0.3
# set_session(tf.Session(config=config))

In [1]:
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils

Using TensorFlow backend.


## 核心组件
- 神经网络的核心组件是层layers, 它是一种数据处理模块。

##  数据处理

In [2]:
# 导入数据
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [3]:
# 查看数据shape
X_train.shape

(60000, 28, 28)

In [4]:
X_train[0][5]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,
        18,  18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,
         0,   0], dtype=uint8)

In [5]:
img_rows, img_cols = X_train.shape[1], X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) # matrix
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

In [6]:
X_train[0][5]

array([[  0],
       [  0],
       [  0],
       [  0],
       [  0],
       [  0],
       [  0],
       [  0],
       [  0],
       [  0],
       [  0],
       [  0],
       [  3],
       [ 18],
       [ 18],
       [ 18],
       [126],
       [136],
       [175],
       [ 26],
       [166],
       [255],
       [247],
       [127],
       [  0],
       [  0],
       [  0],
       [  0]], dtype=uint8)

In [7]:
# 讲数据缩放为[0, 1]之间的浮点数
x_train = X_train/255.
x_test = X_test/255.

In [8]:
x_train[0][5]

array([[0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.01176471],
       [0.07058824],
       [0.07058824],
       [0.07058824],
       [0.49411765],
       [0.53333333],
       [0.68627451],
       [0.10196078],
       [0.65098039],
       [1.        ],
       [0.96862745],
       [0.49803922],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ]])

In [9]:
y_train

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [10]:
# 把y变成one-hot格式
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

In [11]:
y_train[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

## 构建网络（模型）
- Convolution1D主要用于nlp，Convolution2D主要用于cv
- filters = 32          使用的卷积滤波器(卷积核)的数量
- kernel_size = (3, 3)  卷积核的尺寸
- activation='relu'     激活函数：relu
- padding='same'        过滤模式
- strides=1             步长
- input_shape  = (img_rows, img_cols, 1)  输入数据的维度
- MaxPooling2D(2, 2)    用于 max pooling 的池化面积，降维的作用
- Flatten()             扁平层用来将输入“压平”，即把多维的输入一维化，常用在从卷积层到全连接层的过渡。Flatten不影响batch的大小。
- Dense()               密集连接，全连接层

In [12]:
network = Sequential()

network.add(Convolution2D(filters=16, kernel_size=(5, 5), activation='relu', padding='same', strides=1,input_shape=(img_rows, img_cols, 1)))
network.add(MaxPooling2D(2, 2))

network.add(Convolution2D(32, kernel_size=(5, 5), activation='relu', padding='same',strides=1))
network.add(MaxPooling2D(2, 2))

network.add(Convolution2D(8, kernel_size=(5, 5), activation='relu', padding='same',strides=1))
network.add(MaxPooling2D(2, 2))

network.add(Flatten())
network.add(Dense(1000, activation='relu'))
# 最后分类的类别数 10
network.add(Dense(10, activation='softmax'))

## 查看模型概要

In [13]:
network.summary()  

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 16)        416       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 32)        12832     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 32)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 8)           6408      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 3, 3, 8)           0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 72)                0         
__________

## 编译
- loss 损失函数
- optimizer 优化器，基于训练数据和损失函数来更新网络机制
- metrics 在训练和测试过程中需要监控的指标：本例只关心精度，即正确分类的图像所占的比例(acc)。

In [14]:
network.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['accuracy'])

## 训练
- batch_size = 128  batch大小代表每次更新权重时候喂入的样本数
- epoch = 12        样本整体循环轮次

In [15]:
network.fit(x=x_train, y=y_train, batch_size=128, epochs=12, verbose=1, validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0xb3daefd68>