## 題目

1. Use LSTM & CNN model to classify MNIST dataset with at least 90%

### 執行

所有檔案: mnist_train_all.py

#### 1. Use LSTM & CNN model to classify MNIST
* mnist_train_all.py

In [None]:
from sklearn.metrics import confusion_matrix
import keras
from keras.layers import LSTM
from keras.layers import Dense, Activation, Conv2D, MaxPool2D, Dropout, Flatten
from keras.datasets import mnist
from keras.models import Sequential
from keras.optimizers import Adam


def lstm_preprocess(x_train, x_test, y_train, y_test, n_step, n_input, n_classes):
    x_train = x_train.reshape(-1, n_step, n_input)
    x_test = x_test.reshape(-1, n_step, n_input)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
    y_train = keras.utils.to_categorical(y_train, n_classes)
    y_test = keras.utils.to_categorical(y_test, n_classes)
    return (x_train, x_test, y_train, y_test)

def cnn_preprocess(x_train, x_test, y_train, y_test):
    x_train = x_train.reshape(-1, 28, 28, 1)
    x_test = x_test.reshape(-1, 28, 28, 1)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
    y_train = keras.utils.to_categorical(y_train, 10)
    y_test = keras.utils.to_categorical(y_test, 10)
    return (x_train, x_test, y_train, y_test)

def lstm_model(n_input, n_step, n_hidden, n_classes):
    model = Sequential()
    model.add(LSTM(n_hidden, batch_input_shape=(None, n_step, n_input), unroll=True))
    model.add(Dense(n_classes))
    model.add(Activation('softmax'))
    return model

def cnn_model():
    model = Sequential()
    model.add(Conv2D(filters=32, kernel_size=(5,5), padding='same', activation='relu', input_shape=(28, 28, 1)))
    model.add(MaxPool2D(strides=2))
    model.add(Conv2D(filters=48, kernel_size=(5,5), padding='valid', activation='relu'))
    model.add(MaxPool2D(strides=2))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(84, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    return model

def trainning(model, x_train, y_train, x_test, y_test, 
              learning_rate, training_iters, batch_size):
    adam = Adam(lr=learning_rate)
    model.summary()
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(x_train, y_train,
              batch_size=batch_size, epochs=training_iters,
              verbose=1, validation_data=(x_test, y_test))

def print_confusion_result(x_train, x_test, y_train, y_test, model):
    # get train & test predictions
    train_pred = model.predict_classes(x_train)
    test_pred = model.predict_classes(x_test)
    
    # get train & test true labels
    train_label = y_train
    test_label =  y_test
    
    # confusion matrix
    train_result_cm = confusion_matrix(train_label, train_pred, labels=range(10))
    test_result_cm = confusion_matrix(test_label, test_pred, labels=range(10))
    print(train_result_cm, '\n'*2, test_result_cm)

def mnist_lstm_main():
    # training parameters
    learning_rate = 0.001
    training_iters = 1
    batch_size = 128

    # model parameters
    n_input = 28
    n_step = 28
    n_hidden = 256
    n_classes = 10

    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test, y_train_o, y_test_o = lstm_preprocess(x_train, x_test, y_train, y_test, n_step, n_input, n_classes)

    model = lstm_model(n_input, n_step, n_hidden, n_classes)
    trainning(model, x_train, y_train_o, x_test, y_test_o, learning_rate, training_iters, batch_size)
    scores = model.evaluate(x_test, y_test_o, verbose=0)
    print('LSTM test accuracy:', scores[1])
    print_confusion_result(x_train, x_test, y_train, y_test, model)

def mnist_cnn_main():
    # training parameters
    learning_rate = 0.001
    training_iters = 1
    batch_size = 64

    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test, y_train_o, y_test_o = cnn_preprocess(x_train, x_test, y_train, y_test)

    model = cnn_model()
    trainning(model, x_train, y_train_o, x_test, y_test_o, learning_rate, training_iters, batch_size)
    scores = model.evaluate(x_test, y_test_o, verbose=0)
    print('CNN test accuracy:', scores[1])
    print_confusion_result(x_train, x_test, y_train, y_test, model)

- LSTM

In [None]:
mnist_lstm_main()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 256)               291840    
_________________________________________________________________
dense (Dense)                (None, 10)                2570      
_________________________________________________________________
activation (Activation)      (None, 10)                0         
Total params: 294,410
Trainable params: 294,410
Non-trainable params: 0
_________________________________________________________________
LSTM test accuracy: 0.9485999941825867




[[5570    0   44   13   59   26   58    4   48  101]
 [   0 6511   54   21   24    5   20   69   18   20]
 [   1    5 5795   24   29    2    9   48   32   13]
 [   3    6   99 5752    3   22    1  115   49   81]
 [   2    2   15    0 5728    3   27   13    1   51]
 [  10    8   19   70   34 5030   27   12  117   94]
 [   5    2   12    1  122   51 5690    0   33    2]
 [   6    2   55    2   47    0    0 6059    3   91]
 [   4   11   25  183   44   70    7   16 5279  212]
 [   7    1   14   20  339    5    3   62   22 5476]] 

 [[ 927    0    3    1   16    6   10    1    6   10]
 [   0 1105    5    2    2    0    4    9    6    2]
 [   0    0 1005    6    1    1    3   10    4    2]
 [   0    0   14  962    0    0    0   18    7    9]
 [   0    0    2    0  966    0    6    0    1    7]
 [   4    0    4   19    4  824    6    1   19   11]
 [   5    2    1    0   19   11  913    0    4    3]
 [   0    1   16    1   10    0    0  987    0   13]
 [   3    0    4   34   11   11    0    8 

- CNN

In [None]:
mnist_cnn_main()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 10, 10, 48)        38448     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 48)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1200)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               307456    
_________________________________________________________________
dense_2 (Dense)              (None, 84)               



[[5895    1    1    0    1    1    8    0   15    1]
 [   0 6707   15    1    0    0    0    7   10    2]
 [   2    9 5902    7    0    2    1    9   24    2]
 [   2    1   14 6050    0   31    0    5   23    5]
 [   2   28    8    0 5741    2    7    3   23   28]
 [   7    1    4   12    0 5359    8    0   26    4]
 [  14   12    7    1    4    9 5844    0   27    0]
 [  14   17   30   13    9    4    0 6082   21   75]
 [   2   18   16    8    0   12    2    1 5789    3]
 [  21    4    2   14   18   24    3    6   73 5784]] 

 [[ 974    0    0    0    0    1    2    1    2    0]
 [   0 1131    1    0    0    1    0    1    1    0]
 [   1    1 1021    2    0    0    0    3    4    0]
 [   0    0    2 1000    0    4    0    0    4    0]
 [   1    5    2    0  962    0    1    0    5    6]
 [   2    0    0    4    0  884    1    0    1    0]
 [   7    3    0    1    1    3  938    0    5    0]
 [   1    5    5    2    0    1    0  998    3   13]
 [   3    1    2    1    0    1    0    0 

## Reference