## ResNet50-fine-tuning-V1
整体说明：
- 使用keras的ResNet50预训练模型进行fine-tuning，分别进行下面四种情况的fine-tuning
- 1、只训练自定义的输出层
- 2、冻结前698层，训练后面的层
- 3、冻结前618层，训练后面的层
- 4、冻结前499层，训练后面的层  
看哪种情况效果最好



In [None]:
'''
数据目录结构：
data/
    train/    #原始数据，train.zip解压后生成
        dog.0.jpg
        cat.0.jpg
        ...
    train2/   #按标签分目录后的数据（连接文件）
        dog/
            dog.0.jpg
            dog.1.jpg
            ...
        cat/
            cat.0.jpg
            cat.1.jpg
            ...
    train3/   #去除异常图片后的训练数据（连接文件）
        dog/    #9983张图片
            dog.0.jpg
            dog.1.jpg
            ...
        cat/    #9961张图片
            cat.0.jpg
            cat.1.jpg
            ...
    validation/  #去除异常图片后的验证数据（连接文件）
        dog/   #2496张图片
            dog001.jpg
            dog002.jpg
            ...
        cat/   #2490张图片
            cat001.jpg
            cat002.jpg
            ...
    test/     
        test/  #测试集数据，12500张图片
            1.jpg
            2.jpg
            ...
'''
import cv2
import time
import pandas as pd
from tqdm import tqdm   #进度条
from PIL import Image
from helper import *

from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *
from keras.callbacks import *
from keras.optimizers import *
from keras.utils import *
from sklearn.utils import shuffle

# dimensions of our images.
img_width, img_height = 299, 299

train_data_dir = 'data/train2'
test_data_dir='data/test'


batch_size = 72   #19944/72=277  4986/72=69.25
epochs=20
VER=1
#模型权重文件
model_h5file_base="ResNet50-base-tuning-v{}.h5".format(VER)
model_h5file_tunig1="ResNet50-fine-tuning-1-v{}.h5".format(VER)
model_h5file_tunig2="ResNet50-fine-tuning-2-v{}.h5".format(VER)
model_h5file_tunig3="ResNet50-fine-tuning-3-v{}.h5".format(VER)

#预测结果文件
pred_file_base="pred-ResNet50-base-tuning-v{}.csv".format(VER)
pred_file_tuning1="pred-ResNet50-fine-tuning-1-v{}.csv".format(VER)
pred_file_tuning2="pred-ResNet50-fine-tuning-2-v{}.csv".format(VER)
pred_file_tuning3="pred-ResNet50-fine-tuning-3-v{}.csv".format(VER)

In [None]:
#构造InceptionResNetV2模型
start = time.clock()
x_input = Input((img_width, img_height, 3))
x_input = Lambda(resnet50.preprocess_input)(x_input)

base_model = ResNet50(input_tensor=x_input, weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3), pooling = 'avg')
for layer in base_model.layers:
    layer.trainable = False
    
y = Dropout(0.5)(base_model.output) #0.25-->0.5
y = Dense(1, activation='sigmoid',kernel_regularizer=regularizers.l2(0.001))(y)  #增加正则项
model = Model(inputs=base_model.input, outputs=y)

print("Load base model used time:", (time.clock() - start))


In [None]:
adam = optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

model.compile(optimizer=adam,
             loss='binary_crossentropy',
             metrics=['accuracy'])


In [None]:
# 输出模型结构图
#plot_model(model, to_file='model_InceptionResNetV2.png')

In [None]:
#生成数据--全量读入内存
#X_train, Y_train=read_images_to_memory(train_data_dir, img_width, img_height, test=True)
#训练数据
X_train, Y_train=read_images_to_memory(train_data_dir, img_width, img_height)
X_train, Y_train=shuffle(X_train, Y_train)

#测试数据
X_test = load_test_data(12500, img_height, img_width, test_data_dir)


In [None]:
model.summary()


In [None]:
#训练模型并保存在验证集上损失函数最小的权重
checkpoint = ModelCheckpoint(model_h5file_base, monitor='val_loss', verbose=1, save_best_only=True, mode='min',save_weights_only=True)
stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min') #如发现loss相比上一个epoch训练没有下降,则经过3个epoch后停止训练
callbacks_list = [stopping,checkpoint]

history=model.fit(X_train, Y_train, batch_size=128, epochs=epochs, validation_split=0.2, shuffle=True, callbacks=callbacks_list)

In [None]:
#可视化学习曲线
show_learning_curve(history)

In [None]:
#预测结果    
predict_on_model(X_test, model, model_h5file_base, pred_file_base)

## 冻结前698层，再训练
放开Inception-resnet-C第5个block之后的层，不含Inception-resnet-C_5  
698 block8_5_ac 

In [None]:
# 设置需要冻结的层数
lock_layers(model, locked_layer_nums=698)

In [None]:
model.load_weights(model_h5file_base)
adam = optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer=adam,             
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
#训练模型并保存在验证集上损失函数最小的权重
checkpoint = ModelCheckpoint(model_h5file_tunig1, monitor='val_loss', verbose=1, save_best_only=True, mode='min',save_weights_only=True)
stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min') #如发现loss相比上一个epoch训练没有下降,则经过3个epoch后停止训练
callbacks_list = [stopping,checkpoint]

history=model.fit(X_train, Y_train, batch_size=128, epochs=epochs, validation_split=0.2, shuffle=True, callbacks=callbacks_list)

In [None]:
#可视化学习曲线
show_learning_curve(history)

In [None]:
#预测结果
predict_on_model(X_test, model, model_h5file_tunig1, pred_file_tuning1)

## 冻结前618层，再训练
放开Reduction-B之后的层，不含Reduction-B  
618 mixed_7a

In [None]:
# 设置需要冻结的层数
lock_layers(model, locked_layer_nums=618)

In [None]:
model.load_weights(model_h5file_base) 
adam = optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer=adam,             
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
#训练模型并保存在验证集上损失函数最小的权重
checkpoint = ModelCheckpoint(model_h5file_tunig2, monitor='val_loss', verbose=1, save_best_only=True, mode='min',save_weights_only=True)
stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min') #如发现loss相比上一个epoch训练没有下降,则经过3个epoch后停止训练
callbacks_list = [stopping,checkpoint]
history=model.fit(X_train, Y_train, batch_size=128, epochs=epochs, validation_split=0.2, shuffle=True, callbacks=callbacks_list)

In [None]:
#可视化学习曲线
show_learning_curve(history)

In [None]:
#预测结果
predict_on_model(X_test, model, model_h5file_tunig2, pred_file_tuning2)

## 冻结前746层，再训练
放开Inception-resnet-C 第8个block之后的层，不含包Inception-resnet-C_8  
746 block8_8_ac

In [None]:
# 设置需要冻结的层数
lock_layers(model, locked_layer_nums=746)

In [None]:
model.load_weights(model_h5file_base) 
adam = optimizers.Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(optimizer=adam,             
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
#训练模型并保存在验证集上损失函数最小的权重
checkpoint = ModelCheckpoint(model_h5file_tunig3, monitor='val_loss', verbose=1, save_best_only=True, mode='min',save_weights_only=True)
stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min') #如发现loss相比上一个epoch训练没有下降,则经过3个epoch后停止训练
callbacks_list = [stopping,checkpoint]

history=model.fit(X_train, Y_train, batch_size=128, epochs=epochs, validation_split=0.2, shuffle=True, callbacks=callbacks_list)


In [None]:
#可视化学习曲线
show_learning_curve(history)

In [None]:
#预测结果
predict_on_model(X_test, model, model_h5file_tunig3, pred_file_tuning3)