# 核心作业6， 20201205， 石敏

以下使用的是第一次核心课给的Sklearn里的MINIST，但它只有8*8的图，没有老师课上讲的28×28

题目要求

- 使用LSTM 进行手写字体识别
- 数据切割出一份训练集，一份验证集。
- LSTM第一层接32个神经元
- 第一层lstm 后接一个dropout0.2
- LSTM第二层接32个神经元
- 第二层lstm 后接一个dropout0.3
- 需使用callbacks函数分别用到earlystop，ModelCheckpoint，ReduceLROnPlateau
- 使用load_weight的形式导入以上训练的模型，并对验证集进行预测


In [1]:
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits
from sklearn.svm import SVC
from sklearn.metrics import mean_squared_error

import tensorflow as  tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, SimpleRNN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
from tensorflow.keras.datasets import mnist
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

import numpy as np

In [2]:
# 加载数据
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [3]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [4]:
x_train[0]

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,
         18,  18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,
          0,   0],
       [  

In [5]:
# 黑白色，数值范围0-255
# 归一化很简单， 除以255就行了。 
x_train = x_train / 255.0
x_test = x_test / 255.0

In [6]:
x_train[0]

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.    

In [7]:
x_train.shape[1:]

(28, 28)

In [8]:
def build_lstm(shape):
    print("input shape" + str(shape))
    # bulid model
    model = Sequential()
    model.add(LSTM(32, input_shape=(shape), activation='relu', return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(32, activation='relu'))
    model.add(Dropout(0.3))

    model.add(Dense(15, activation='relu'))
    model.add(Dropout(0.2))

    model.add(Dense(10, activation='softmax'))
    
    model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
    return model

early_stopping = EarlyStopping(monitor='val_loss',
                               min_delta=0,
                               patience=5,
                               verbose=0,
                               mode='auto',
                               baseline=None,
                               restore_best_weights=False)
model_checkpoint = ModelCheckpoint(filepath='./best_model.h5',
                                  monitor='val_loss',
                                  save_best_only=True)
reduce_lr_on_plateau = ReduceLROnPlateau(monitor='val_loss',
                                         factor=0.1,
                                         patience=5,
                                         verbose=0,
                                         mode='auto',
                                         epsilon=0.0001,
                                         cooldown=0,
                                         min_lr=0)
lstm_callbacks = [early_stopping, model_checkpoint, reduce_lr_on_plateau]



In [9]:
model = build_lstm(x_train.shape[1:])
model.summary()

input shape(28, 28)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 28, 32)            7808      
_________________________________________________________________
dropout (Dropout)            (None, 28, 32)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 15)                495       
_________________________________________________________________
dropout_2 (Dropout)          (None, 15)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 

In [11]:
model.fit(x_train, y_train, 
          epochs=30, validation_data=(x_test, y_test), verbose=1,
          callbacks=lstm_callbacks)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30


<tensorflow.python.keras.callbacks.History at 0x7f3e7b1684c0>

### 保存模型全部

In [12]:
model.save('mnist_lstm_model')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: mnist_lstm_model/assets


### 保存模型的参数

In [13]:
model.save_weights('mnist_lstm_model_weight')

## 重新调入模型参数

In [15]:
model = build_lstm(x_train.shape[1:])
model.load_weights('mnist_lstm_model_weight')

input shape(28, 28)


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f3e580d8730>

In [16]:
test_predict = model.predict(x_test)
test_predict[0]

array([1.1844843e-23, 3.2706526e-12, 2.5013597e-10, 3.8799272e-14,
       3.3262788e-16, 4.1347934e-15, 3.1431172e-28, 1.0000000e+00,
       7.9682253e-16, 1.5682831e-08], dtype=float32)

### 以上表示对每个数字可能性的预测。哪个最大，即表示那个手写字体是某个数字的可能性最大。

In [17]:
np.argmax(test_predict[0]), y_test[0], np.argmax(test_predict[1]), y_test[1], np.argmax(test_predict[2]), y_test[2]

(7, 7, 2, 2, 1, 1)

In [18]:
y_predict = [np.argmax(l) for l in test_predict]
y_predict[1:10]

[2, 1, 0, 4, 1, 4, 9, 5, 9]

In [19]:
rmse = mean_squared_error(y_test, y_predict, squared=False)
rmse

0.5394441583704471

# 结论：运行效率低，效果差