# 用轉移學習的方式做NN圖形辨識模型

### NN 標準神經網路

先做一次一般的0-9手寫數字判讀模型，再用轉移學習的方式做一個只有0或1的手寫數字判讀模型。

In [1]:
# 初始準備
%env KERAS_BACKEND=tensorflow

env: KERAS_BACKEND=tensorflow


In [2]:
# KERAS function
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD

# KERAS dataset - modified NIST
from keras.datasets import mnist

# KERAS utils function
from keras.utils import np_utils

In [3]:
# read in train, test data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# check the shape
print('x_train:',x_train.shape)
print('x_test:',x_test.shape)
print('y_train:',y_train.shape)
print('y_test:',y_test.shape)


x_train: (60000, 28, 28)
x_test: (10000, 28, 28)
y_train: (60000,)
y_test: (10000,)


In [4]:
# reshape x_train, x_test
x_train = x_train.reshape(60000,784)
x_test = x_test.reshape(10000,784)

# seperate 0 and 1 data
x_train_01 = x_train[y_train <= 1]
x_test_01 = x_test[y_test <= 1]


# transfer y_train, y_test to one-hot encoding
y_train_10 = np_utils.to_categorical(y_train, 10)
y_test_10 = np_utils.to_categorical(y_test, 10)

# transfer y_train_01, y_test_01 into one-hot encoding
y_train_01 = y_train[y_train <= 1]
y_train_01 = np_utils.to_categorical(y_train_01, 2)
y_test_01 = y_test[y_test <= 1]
y_test_01 = np_utils.to_categorical(y_test_01, 2)

In [5]:
print('y_train_10:',x_train.shape)
print('y_test_10:',x_test.shape)
print('y_train_01:',y_train_01.shape)
print('y_test_01:',y_test_01.shape)

y_train_10: (60000, 784)
y_test_10: (10000, 784)
y_train_01: (12665, 2)
y_test_01: (2115, 2)


In [6]:
# build layers 
all_except_last = [Dense(500, input_dim=(784)), 
                   Activation('sigmoid'),
                   Dense(500, Activation('sigmoid'))]

output_layer = [Dense(10, Activation('softmax'))]

# assemble all layers
model_0_to_9 = Sequential(all_except_last + output_layer)
model_0_to_9.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 500)               392500    
_________________________________________________________________
activation (Activation)      (None, 500)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 500)               250500    
_________________________________________________________________
dense_2 (Dense)              (None, 10)                5010      
Total params: 648,010
Trainable params: 648,010
Non-trainable params: 0
_________________________________________________________________


In [7]:
# compile 
model_0_to_9.compile(loss='mse',
                     optimizer = SGD(lr=0.1),
                     metrics=['accuracy'])

In [8]:
# fit
model_0_to_9.fit(x_train, y_train_10, batch_size=100, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fe730d11350>

In [9]:
# check accuracy
model_0_to_9.evaluate(x_test,y_test_10)



[0.014354252256453037, 0.9146000146865845]

In [10]:
# new_output_layer for 0,1 
new_output_layer = [Dense(2), Activation('softmax')]

# assemble new model
model_0_to_1 = Sequential(all_except_last + new_output_layer)
model_0_to_1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 500)               392500    
_________________________________________________________________
activation (Activation)      (None, 500)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 500)               250500    
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 1002      
_________________________________________________________________
activation_3 (Activation)    (None, 2)                 0         
Total params: 644,002
Trainable params: 644,002
Non-trainable params: 0
_________________________________________________________________


In [11]:
# training models except first & second layer

for layer in all_except_last:
    layer.trainable = False
    
# check the amount of trainable and non-trainable
model_0_to_1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 500)               392500    
_________________________________________________________________
activation (Activation)      (None, 500)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 500)               250500    
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 1002      
_________________________________________________________________
activation_3 (Activation)    (None, 2)                 0         
Total params: 644,002
Trainable params: 1,002
Non-trainable params: 643,000
_________________________________________________________________


In [12]:
# compile
model_0_to_1.compile(loss='mse',
                    optimizer=SGD(lr=0.1),
                    metrics=['accuracy'])

In [13]:
# as data amount decrease
# epochs should decrease as well to avoid over-fitting
model_0_to_1.fit(x_train_01, y_train_01, batch_size=100, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fe735e9ead0>

In [14]:
# check score
model_0_to_1.evaluate(x_test_01, y_test_01)



[0.0016297576949000359, 0.9976359605789185]

首先利用MNIST資料庫與NN標準神經網路，利用list的方式寫出各層layer，再利用Sequential函式建構0-9的手寫數字辨識模型，再來訓練與評分可以發現準確度有91%，若增加訓練次數可以再近一步改善準確率。

再來，我們想將這個神經網路的概念移植到0、1兩種數字的手寫判讀模型，首先抽取出0、1的資料，更改產出層(new_output_layer)將Dense層改成只有2種output，再來訓練與組裝都與之前相同，但要注意因為資料量減少所以訓練次數epochs不要太多，避免over-fitting。

In [15]:
# save model
model_0_to_1.json = model_0_to_1.to_json()
open('handwriting_model_nn_transferlearning.json','w').write(model_0_to_1.json)

# save weights
model_0_to_1.save_weights('handwriting_weights_nn_transferlearning.h5')