In [None]:
# 從 TensorFlow CNN tutorials 改成可以參加 kaggle

In [None]:
# Score: 0.99160
# 很多前面排名的 有自己增加 dataset

In [None]:
import tensorflow as tf

from tensorflow.keras import datasets, layers, models
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

In [None]:
# settings
checkpoint_path = "./checkpoint/train_checkpoint"
checkpoint_dir = "./checkpoint"
final_checkpoint = './checkpoint/final_checkpoint'
final_model = './saved_model/final_model'

# loading and resize data 

In [None]:
# 讀取資料
train_val_data = pd.read_csv('./input/train.csv')
test_data = pd.read_csv('./input/test.csv')

In [None]:
# 取得 train_data 的 label
train_val_label = train_val_data['label']
# 取得 train_data 的 image
train_val_pic = train_val_data.iloc[:,1:].values.reshape(-1,28,28,1)  # 從1開始是因為第一行是 train_label 
# 取得 test_data 的 image
test_pic = test_data.iloc[:,:].values.reshape(-1,28,28,1)

print('train_val_label shape:', train_val_label.shape)
print('train_val_pic shape:', train_val_pic.shape)
print('test_pic shape:', test_pic.shape)

In [None]:
# 確認 train_val_pic 沒問題
class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
plt.figure(figsize=(7,2))  # 設定這個 plt 要顯示的大小
for i in range(14):
    plt.subplot(2,7,i+1)     # 切換到對應要顯示的位置
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_val_pic[i], cmap=plt.get_cmap('gray'))
    plt.xlabel(class_names[train_val_label[i]])
plt.show()

In [None]:
# 確認 test_pic 沒問題
# test_pic 沒有 label
# class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
plt.figure(figsize=(7,2))  # 設定這個 plt 要顯示的大小
for i in range(14):
    plt.subplot(2,7,i+1)     # 切換到對應要顯示的位置
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(test_pic[i], cmap=plt.get_cmap('gray'))
#     plt.xlabel(class_names[test_label[i]])
plt.show()

In [None]:
# 確定資料沒問題後 開始調整至符合 input 跟 output 的形狀
# train_pic = train_pic.reshape((train_data.shape[0], 28*28)).astype('float64') / 255
# test_pic = test_pic.reshape((test_data.shape[0], 28*28)).astype('float64') / 255
train_val_pic = train_val_pic.reshape((42000, 28, 28, 1)).astype('float64') / 255.0
test_pic  = test_pic.reshape((28000, 28, 28, 1)).astype('float64') / 255.0

print('train_val_pic shape:', train_val_pic.shape)
print('test_pic shape:', test_pic.shape)

In [None]:
# 通常會用 test_data 當作 valid_data
# 但因為kaggle沒有給 所以就再從train_data分出來

# Split data
from sklearn.model_selection import train_test_split
train_pic, val_pic , train_label, val_label = train_test_split(train_val_pic, train_val_label, test_size=0.01, random_state=40)
    # random_state 如果想要重現一樣的結果 就代入同樣的數字 有點像 random seed

# 這時候就不能再用 for 印 plot 
# 因為 dataset 已經分成兩組 且打亂了
# 所以有些 位置 已經被抽走了 所以會造成 Key error
    
print('train_pic shape:', train_pic.shape)
print('val_pic shape:', val_pic.shape)
print('train_label shape:', train_label.shape)
print('val_label shape:', val_label.shape)

# create the model

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

In [None]:
model.summary()

# loading checkpoint

In [None]:
latest = tf.train.latest_checkpoint(checkpoint_dir)
if latest != None :
    print("using check points :", latest)
    model.load_weights(latest)
else :
    print("training from begin")
    

In [None]:
# save training process
his_acc = []
his_val_acc = []

# training

In [None]:
# 建立 儲存 checkpoint 的 callback function
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path, save_weights_only=True, verbose=0)
        # verbose 在 jupyter 好像沒什麼用
        # verbose = 0 不輸出, 1 顯示進度條, 2 為每一個 epoch 輸出一條紀錄

In [None]:
# training config
# lr [(每次epoch次數, 使用的 learing rate)]
learning_rate = [(10, 0.001), (5, 0.0005), (3, 0.0002)]

In [None]:
# start training
for epoch, lr in learning_rate:
    model.compile(optimizer = tf.keras.optimizers.Adam(lr),
          loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
          metrics=['accuracy'])

    history = model.fit(train_pic, train_label, epochs=epoch, 
                        validation_data=(val_pic, val_label),
                       callbacks=[cp_callback])
        # callbacks 如果有給的話 每幾分鐘會觸發一次
        
    his_acc += history.history['acc']
    his_val_acc += history.history['val_acc']

In [None]:
# 確認訓練完之後 在存一版 final  
model.save_weights(final_checkpoint)
model.save(final_model)

# Evaluate the model

In [None]:
# training 紀錄 (目前只會紀錄最後一次 for 迴圈的訓練結果)
plt.plot(his_acc, label='accuracy')
plt.plot(his_val_acc, label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
# plt.ylim([0.5, 1])  # 如果要顯示數據區間
plt.legend(loc='lower right')

In [None]:
# get identify output
# 這裡原本要算 test case 的辨識率 但因為 kaggle 沒有給
# 所以我要輸出到 csv 再上傳到 kaggle 進行評分
# test_loss, test_acc = model.evaluate(test_pic,  test_label, verbose=2)

In [None]:
# 辨識結果
y_pred = model.predict(test_pic)
y_pred = np.argmax(y_pred, axis=1)

In [None]:
# 把結果輸出到 csv
submission_df = pd.DataFrame()
image_id = [i for i in range(1, 28001)]
submission_df['ImageId'] = image_id
submission_df['Label'] = y_pred
# print(submission_df)
submission_df.to_csv('submission.csv', index = False)