# README
このnotebookでは，実際にモデルを学習していきます．   
パラメータは，記事と同一ですが，初期値やAugmentationの関係で記事と同じにはなりません．   
GPU環境でも，一つのモデルの学習に1.5時間程度かかります．   

# 作業ディレクトリの指定
以下のcellで，data_dirに作業をするディレクトリを末尾の"/"まで含めて指定してください.   
data_dir 直下には以下の5つのディレクトリを作成してください
- comparisons (historyの比較結果の保存)
- histories (historyの保存)
- loss_and_acc (lossとaccの視覚化結果の保存)
- models (学習したmodelの保存)
- predictions (予測結果の保存)   

In [11]:
data_dir = "hoge/" #作業をするディレクトリ

In [None]:
!pip3 install keras
import numpy as np
import keras
from keras.models import  Model,load_model
from keras.layers import Dense,Dropout,Conv2D,MaxPooling2D,Input,GlobalAveragePooling2D,BatchNormalization
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras.datasets import cifar10
from keras.utils.np_utils import to_categorical
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import pickle

# 準備

### データの評価と可視化に用いる関数

In [4]:
## modelの評価をする関数. lossとaccuracyを標準出力
def my_eval(model,x,t):
    #model: 評価したいモデル, x: 予測する画像 shape = (batch,32,32,3) t:one-hot表現のlabel
    ev = model.evaluate(x,t)
    print("loss:" ,end = " ")
    print(ev[0])
    print("acc: ", end = "")
    print(ev[1])


## historyの可視化．nameを指定した場合は historiesに引数として与えたhistory， loss_and_accに可視化結果をそれぞれ保存
def loss_and_acc(history,file_name = None):
    fig,ax = plt.subplots(1,2,figsize = (10,5))
    epochs = len(history.history["loss"])
    ax[0].plot(range(epochs), history.history["loss"],label = "train_loss",c = "red")
    ax[0].plot(range(epochs), history.history["val_loss"],label = "valid_loss",c = "green")
    ax[0].set_xlabel("epochs",fontsize = 14)
    ax[0].set_ylabel("loss",fontsize  = 14)
    ax[0].legend(fontsize = 14)

    ax[1].plot(range(epochs), history.history["acc"],label = "train_acc",c = "red")
    ax[1].plot(range(epochs), history.history["val_acc"],label = "valid_acc",c = "green")
    ax[1].set_xlabel("epochs",fontsize = 14)
    ax[1].set_ylabel("acc",fontsize = 14)
    ax[1].legend(fontsize = 14)
    if(file_name != None):
        with open(data_dir + "histories/" + file_name + ".binaryfile",mode = "wb") as f:
            pickle.dump(history,f)
    if(file_name != None):
        fig.savefig(data_dir + "loss_and_acc/" + file_name + "_acc" )


## historyのロード  loss_and_acc()で与えたfile_nameを指定すると，そのhistoryをロードして返り値とする
def load_history(file_name):
    with open(data_dir + "histories/" + file_name + ".binaryfile",mode = "rb") as f:
        res = pickle.load(f)
    return res


## 2つのhistoryの比較と保存
def compare(his1,his1_name,his2,his2_name, file_name = None):
    #his1: 比較したいヒストリー , his1_name: his1のラベル   his2も同様
    #file_name  与えると，可視化結果を保存
    keys = ["loss","val_loss","acc","val_acc"]
    fig, ax = plt.subplots(2,2,figsize = (12,12))
    epochs = min( [len(his1.history["loss"]), len(his2.history["loss"])] )

    ind = 0
    for i in range(2):
        for j in range(2):
            ax[i,j].plot(range(epochs),his1.history[keys[ind]][:epochs],label = his1_name)
            ax[i,j].plot(range(epochs),his2.history[keys[ind]][:epochs],label = his2_name)
            ax[i,j].set_xlabel("epochs",fontsize = 14)
            ax[i,j].set_ylabel(keys[ind],fontsize = 14)
            ax[i,j].legend(fontsize = 14)

            ind += 1

    if(file_name != None):
        fig.savefig(data_dir + "comparisons/" + file_name + "_comp")

### モデルを作成する関数
ベンチマークとして，比較的小さい規模のモデルを返り値とする`create_bench_model`と，   
深い層の`create_deep_model`

In [5]:
def create_bench_model():
    inputs = Input(shape = (32,32,3))
    x = Conv2D(64,(3,3),padding = "SAME",activation= "relu")(inputs)
    x = Conv2D(64,(3,3),padding = "SAME",activation= "relu")(x)
    x = Dropout(0.25)(x)
    x = MaxPooling2D()(x)

    x = Conv2D(128,(3,3),padding = "SAME",activation= "relu")(x)
    x = Conv2D(128,(3,3),padding = "SAME",activation= "relu")(x)
    x = Dropout(0.25)(x)
    x = MaxPooling2D()(x)

    x = Conv2D(256,(3,3),padding = "SAME",activation= "relu")(x)
    x = Conv2D(256,(3,3),padding = "SAME",activation= "relu")(x)
    x = GlobalAveragePooling2D()(x)

    x = Dense(1024,activation = "relu")(x)
    x = Dropout(0.25)(x)
    y = Dense(10,activation = "softmax")(x)
    return Model(input = inputs, output = y)


def create_deep_model():
    inputs = Input(shape = (32,32,3))

    x = Conv2D(64,(3,3),padding = "SAME",activation= "relu")(inputs)
    x = Conv2D(64,(3,3),padding = "SAME",activation= "relu")(x)
    x = BatchNormalization()(x)
    x = Conv2D(64,(3,3),padding = "SAME",activation= "relu")(x)
    x = MaxPooling2D()(x)
    x = Dropout(0.25)(x)

    x = Conv2D(128,(3,3),padding = "SAME",activation= "relu")(x)
    x = Conv2D(128,(3,3),padding = "SAME",activation= "relu")(x)
    x = BatchNormalization()(x)
    x = Conv2D(128,(3,3),padding = "SAME",activation= "relu")(x)
    x = MaxPooling2D()(x)
    x = Dropout(0.25)(x)

    x = Conv2D(256,(3,3),padding = "SAME",activation= "relu")(x)
    x = Conv2D(256,(3,3),padding = "SAME",activation= "relu")(x)
    x = BatchNormalization()(x)
    x = Conv2D(256,(3,3),padding = "SAME",activation= "relu")(x)
    x = Conv2D(256,(3,3),padding = "SAME",activation= "relu")(x)
    x = Conv2D(256,(3,3),padding = "SAME",activation= "relu")(x)
    x = BatchNormalization()(x)
    x = Conv2D(512,(3,3),padding = "SAME",activation= "relu")(x)
    x = Conv2D(512,(3,3),padding = "SAME",activation= "relu")(x)
    x = GlobalAveragePooling2D()(x)

    x = Dense(1024,activation = "relu")(x)
    x = Dropout(0.5)(x)
    x = Dense(1024,activation = "relu")(x)
    x = Dropout(0.5)(x)
    y  = Dense(10,activation = "softmax")(x)
    return Model(inputs,y)

### 各種手法で用いる関数
- Data Augmentation
- step decay
- TTA

In [6]:
def da_generator():
    return ImageDataGenerator(rotation_range = 20, horizontal_flip = True, height_shift_range = 0.2,\
                                width_shift_range = 0.2,zoom_range = 0.2, channel_shift_range = 0.2
                                ).flow(x_train,t_train, batch_size )


def da_generator_strong():
    return ImageDataGenerator(rotation_range = 20, horizontal_flip = True, height_shift_range = 0.3,\
                                width_shift_range = 0.3,zoom_range = 0.3, channel_shift_range = 0.3
                                ).flow(x_train,t_train, batch_size )

def step_decay(epoch):
    lr = 0.001
    if(epoch >= 100):
        lr/=5
    if(epoch>=140):
        lr/=2
    return lr


def tta(model,test_size,generator,batch_size ,epochs = 10):
    #test_time_augmentation
    #batch_sizeは，test_sizeの約数でないといけない．
    pred = np.zeros(shape = (test_size,10), dtype = float)
    step_per_epoch = test_size //batch_size
    for epoch in range(epochs):
        for step in range(step_per_epoch):
            sta = batch_size * step
            end = sta + batch_size
            tmp_x = generator.__next__()
            pred[sta:end] += model.predict(tmp_x)

    return pred / epochs


def tta_generator():
    return ImageDataGenerator(rotation_range = 20 , horizontal_flip = True,height_shift_range = 0.2,\
                                 width_shift_range = 0.2,zoom_range = 0.2,channel_shift_range = 0.2\
                                  ).flow(x_test,batch_size = batch_size,shuffle = False)

### データのロードと正規化

In [7]:
(x_train_raw, t_train_raw), (x_test_raw,t_test_raw) = cifar10.load_data()
t_train = to_categorical(t_train_raw)
t_test = to_categorical(t_test_raw)
x_train = x_train_raw / 255
x_test  = x_test_raw / 255

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [8]:
x_train = x_train[:10000]
t_train = t_train[:10000]

### バッチサイズとエポックの決定
batch_sizeは，訓練データとテストデータの枚数の公約数にしないと，TTAでバグるかも

In [19]:
batch_size = 500
epochs = 20
steps_per_epoch = x_train.shape[0] // batch_size
validation_steps = x_test.shape[0] // batch_size

# ベンチマーク
層の浅いモデルで学習・予測

In [None]:
model = create_bench_model()
model.compile(loss = "categorical_crossentropy",optimizer = Adam(), metrics = ["accuracy"])
train_gen = ImageDataGenerator().flow(x_train,t_train, batch_size )
val_gen = ImageDataGenerator().flow(x_test,t_test, batch_size)
history = model.fit_generator(train_gen, epochs=epochs, steps_per_epoch = steps_per_epoch,\
                          validation_data = val_gen, validation_steps =validation_steps)

### 評価と保存
my_eval(model,x_test,t_test)
loss_and_acc(history,"bench")
model.save(data_dir + "models/bench.hdf5")

# Data Augmentation
Data Augmentationで汎化性能向上を目指す

In [None]:
model = create_bench_model()
model.compile(loss = "categorical_crossentropy",optimizer = Adam(), metrics = ["accuracy"])
val_gen = ImageDataGenerator().flow(x_test,t_test, batch_size)
history = model.fit_generator(da_generator(), epochs=epochs, steps_per_epoch = steps_per_epoch,\
                          validation_data = val_gen, validation_steps = validation_steps)

###評価と保存
my_eval(model,x_test,t_test)
loss_and_acc(history,"DA")
bench_history = load_history("bench")
compare(bench_history,"no_DA",history,"DA","no_DA_vs_DA")
model.save(data_dir + "models/DA.hdf5") 

# 層を増やす
層を増やすことにより，モデルの表現力向上を目指す

In [None]:
model = create_deep_model()
model.compile(loss = "categorical_crossentropy",optimizer = Adam(), metrics = ["accuracy"])
val_gen = ImageDataGenerator().flow(x_test,t_test, batch_size)
history = model.fit_generator(da_generator(), epochs=epochs, steps_per_epoch = steps_per_epoch,\
                          validation_data = val_gen, validation_steps = validation_steps)

###評価と保存
my_eval(model,x_test,t_test)
loss_and_acc(history,"deep")
bench_history = load_history("DA")
compare(bench_history,"bench",history,"deep","bench_vs_deep")
model.save(data_dir + "models/deep.hdf5")

# 学習率減衰
学習率減衰させることにより，パラメータを細かく調整する

In [None]:
model = create_deep_model()
model.compile(loss = "categorical_crossentropy",optimizer = Adam(), metrics = ["accuracy"])
val_gen = ImageDataGenerator().flow(x_test,t_test, batch_size)
lr_decay = LearningRateScheduler(step_decay)
history = model.fit_generator(da_generator(), epochs=epochs, steps_per_epoch = steps_per_epoch,\
                        validation_data = val_gen, validation_steps = validation_steps,callbacks = [lr_decay])

###評価と保存
my_eval(model,x_test,t_test)
loss_and_acc(history,"lr_decay")
no_decay_history = load_history("deep")
compare(no_decay_history,"no_decay",history,"decay","no_decay_vs_decay")
model.save(data_dir + "models/lr_decay.hdf5")

# Test Time Augmentation
予測時にAugmentationを行うことで，精度の向上を目指す

In [None]:
tta_epochs = 5
model = load_model(data_dir + "models/lr_decay.hdf5")
tta_pred = tta(model,x_test.shape[0],tta_generator(),batch_size ,epochs = tta_epochs)

print("tta_acc: ",end = "")
print( accuracy_score( np.argmax(tta_pred,axis = 1) , np.argmax(t_test,axis = 1)))

# アンサンブル学習

アンサンブル学習で精度の向上を目指す   
多様性を持たせるため半分のモデルはより強いdata augmentationで学習

In [21]:
ens_epochs = 2
tta_epochs = 5

for i in range(ens_epochs):
    model = create_deep_model()
    model.compile(loss = "categorical_crossentropy",optimizer = Adam(), metrics = ["accuracy"])
    val_gen = ImageDataGenerator().flow(x_test,t_test, batch_size)   
    lr_decay = LearningRateScheduler(step_decay)

    if(i < ens_epochs/2 ):
        train_gen = da_generator()
    else:
        train_gen = da_generator_strong()
    his = model.fit_generator(train_gen, epochs=epochs, steps_per_epoch = steps_per_epoch,\
                          validation_data = val_gen, validation_steps = validation_steps,verbose = 0,callbacks = [lr_decay])

    pred = tta(model,x_test.shape[0],tta_generator(),batch_size ,epochs = tta_epochs)
    np.save(data_dir + "predictions/" + "pred_" + str(i),pred)
    model.save(data_dir + "models/ensemble_" +str(i) +".hdf5")

### 予測

acc_meanは，各モデル単体で予測した時のaccuracyの平均値   
final_accは，各モデルの予測の平均値のaccuracy

In [None]:
acc_list = []
final_pred = np.zeros_like(t_test)
for i in range(ens_epochs):
    pred = np.load(data_dir + "predictions/pred_" + str(i) + ".npy")
    acc_list.append(accuracy_score( np.argmax(pred,axis = 1), np.argmax(t_test,axis = 1)) )
    final_pred += pred

final_pred /= ens_epochs
np.save(data_dir + "predictions/final_pred",final_pred)
print("acc_mean: ",end = "")
print( np.mean(acc_list))
print("final_acc: " ,end = "")
print( accuracy_score(np.argmax(final_pred,axis = 1), np.argmax(t_test,axis = 1)))