# 模型训练

# 模型训练执行流程线索


In [1]:
#-----GPU配置-----
#  提示：在其他代码之前进行以下配置

#禁用GPU
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0" #-1:禁用,0-n启用第几块显卡，多个以逗号隔开

import tensorflow as tf
import keras.backend.tensorflow_backend as KTF

#ConfigProto配置
config = tf.ConfigProto()

#设置GPU的百分比，程序需要还是会突破阈值
config.gpu_options.per_process_gpu_memory_fraction = 0.8 #0-1之间的浮点数表示占用百分比
#GPU按需使用,不全部占满显存, 按需分配
config.gpu_options.allow_growth=True #True:按需分配,False:一次性满额分配

# 设置session
sess = tf.Session(config=config)
KTF.set_session(sess)

Using TensorFlow backend.


In [1]:
"""
Retrain the YOLO model for your own dataset.
"""
import pdb
#pdb.set_trace()
import os
import numpy as np
import keras.backend as K
from keras.layers import Input, Lambda
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
from yolo3.utils import get_random_data

path='%s/work/data/yolo/mytrain_model_data'%(os.getenv('HOME')) #数据存放根目录

def _main():
    his1=train_model(
        path,           #数据目录
        start_layer=-1, #起始解冻层,-1不解冻
        lr=1e-3,        #学习率
        batch_size=14,  #批大小
        epochs=50,      #迭代次数
        init_epochs=0
        )
    
    train_model(
        path,           #数据目录
        start_layer=0, #起始解冻层,-1不解冻
        lr=1e-4,        #学习率
        batch_size=14,  #批大小
        epochs=200,     #迭代次数
        init_epochs=50
        )
#模型训练
def train_model(
        path,           #数据目录
        start_layer=-1, #起始解冻层,-1不解冻
        lr=1e-3,        #学习率
        batch_size=32,  #批大小
        epochs=20,      #迭代次数
        init_epochs=0   #初始迭代次数
        ):
    print('===============train_model================')
    #===============训练参数配置==================
    annotation_path = '%s/train.txt'%(path)      #训练样本，注释文件,格式：/img_file 6,1,314,262,19 40,97,121,411,4 137,36,169,109,14 180,36,216,104,14 96,39,123,103,14
                                                 #由voc_annotation.py生成
    log_dir = '%s/log'%(path)                    #日志目录
    classes_path = '%s/voc_classes.txt'%(path)   #VOC数据集标签类别(20类)
    anchors_path = '%s/yolo_anchors.txt'%(path)  #yolo anchor配置文件[10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326]
    class_names = get_classes(classes_path)      #读取VOC数据集标签类别[aeroplane,bicycle,bird,boat,bottle,bus,car,cat,chair,cow,diningtable,dog,horse,motorbike,person,pottedplant,sheep,sofa,train,tvmonitor]
    num_classes = len(class_names)               #标签类别数(20)
    anchors = get_anchors(anchors_path)          #锚点wh,anchors [[10,13],  [16,30],  [33,23],  [30,61],  [62,45],  [59,119],  [116,90],  [156,198],  [373,326]]
    input_shape = (416,416)                      # multiple of 32, hw #模型输入尺寸

    #=================加载训练样本文件================
    val_split = 0.1                     #留出百分之10的数据用于校验
    with open(annotation_path) as f:
        lines = f.readlines()           #每行形如以下格式：img_file 6,1,314,262,19 40,97,121,411,4 137,36,169,109,14 180,36,216,104,14 96,39,123,103,14
    np.random.seed(10101)               #设置随机种子，固化每次的随机序列(伪随机)
    np.random.shuffle(lines)            #打乱顺序
    np.random.seed(None)                #恢复随机状态
    num_val = int(len(lines)*val_split) #校验样本数
    num_train = len(lines) - num_val    #训练样本数

    is_tiny_version = len(anchors)==6 # default setting
    if is_tiny_version:
        model_file='%s/tiny_yolo_weights.h5'%path
        cp_file='%s/tiny_cp_file.h5'%path
    else:
        model_file='%s/yolo_weights.h5'%path
        cp_file='%s/cp_file.h5'%path
        
    print('--------params--------')
    print('path:',path)
    print('start_layer:',start_layer)
    print('lr:',lr)
    print('batch_size:',batch_size)
    print('epochs:',epochs)
    print('log_dir:',log_dir)
    print('annotation_path:',annotation_path)
    print('classes_path:',classes_path)
    print('anchors_path:',anchors_path)
    print('input_shape:',input_shape)
    print('num_classes:',num_classes)
    print('class_names:',class_names)
    print('anchors:',anchors)
    print('num_anchors:',len(anchors))
    print('\n')
    
    #===============创建训练模型=================
    #freeze_body=2 => 冻结除y1,y2,y3的所有层
    if is_tiny_version:
        print('create_tiny_model!')
        model = create_tiny_model(input_shape, anchors, num_classes,
            freeze_body=2, weights_path=model_file)
    else:
        print('create_model!')
        #freeze_body:1-解冻所有层，2-冻结darknet
        model = create_model(input_shape, anchors, num_classes,
            freeze_body=2, weights_path=model_file) # make sure you know what you freeze
        print('mdoel.input_shape:',model.input_shape)   #mdoel.input_shape: [(None, None, None, 3), (None, 13, 13, 3, 25), (None, 26, 26, 3, 25), (None, 52, 52, 3, 25)]
        print('mdoel.output_shape:',model.output_shape) #mdoel.output_shape: (None, 1) --- loss
    
    #================训练回调函数设置=================
    #TensorBoard可视化日志
    logging = TensorBoard(log_dir=log_dir)
    #断点保存
    checkpoint = ModelCheckpoint(cp_file,
        monitor='val_loss', save_weights_only=True, save_best_only=True, period=1)
    #学习率
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)
    #退出条件
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)

    #=================解冻层====================
    if start_layer>=0:
        print('unfreeze from start_layer:',start_layer)
        for i in range(start_layer,len(model.layers)):
            model.layers[i].trainable = True

    #=================模型编译===================
    model.compile(optimizer=Adam(lr=lr), loss={'yolo_loss': lambda y_true, y_pred: y_pred},metrics=['accuracy'])

    #=================构造训练数据生成器==========
    print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
    #anchors=>[[10,13],  [16,30],  [33,23],  [30,61],  [62,45],  [59,119],  [116,90],  [156,198],  [373,326]]
    data_gen=data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes)
    print(type(data_gen))
    #print('data_gen.shape:',data_gen.shape) #
    #data_gen.shape=>([(32, 416, 416, 3), (32, 13, 13, 3, 25), (32, 26, 26, 3, 25), (32, 52, 52, 3, 25)],(32,))

    #=================模型训练====================
    history=model.fit_generator(data_gen,
            steps_per_epoch=max(1, num_train//batch_size),
            validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
            validation_steps=max(1, num_val//batch_size),
            epochs=epochs,#epochs=50,
            initial_epoch=init_epochs,
            callbacks=[logging, checkpoint])

    #=================保存训练参数================
    print('save model to file:',model_file)
    model.save_weights(model_file)
    print('\n\n')
    return history
    
def get_classes(classes_path):
    '''loads the classes'''
    '''加载检测类别名称
    @param classes_path 检测类别文件,文件内容每行表示一个类别名称，如：
        dog
        cat
    @return class_names [list]检测类别名称
    '''
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names

def get_anchors(anchors_path):
    '''loads the anchors from a file'''
    '''加载锚点数组
    @param anchors_path 锚点文件路径,文件内容如下所示：
        10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
        每两个组成一个锚点，标识边框的宽度与高度wh
    @return np.array(anchors).reshape(-1, 2) 
    '''
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape(-1, 2)


def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
            weights_path='model_data/yolo_weights.h5'):
    '''create the training model'''
    '''构建模型
    @param input_shape     [tuple]模型输入尺寸,val=>(416,416)
    @param anchors         [array]锚点数组,shape=>(9,2)
    @param num_classes     [int  ]检测类别数,val=>20
    @param load_pretrained [bool ]是否预加载参数
    @param freeze_body     [int  ]模型层的冻结方式，1-冻结darknet53,2-除y1y2y3的所有层，其他-不做冻结设置
    @param weights_path    [str  ]预训练模型参数路径

    @return model
        模型输入:image_data,y1,y2,y3
        模型输出:loss
    '''
    K.clear_session() # get a new session
    image_input = Input(shape=(None, None, 3))
    h, w = input_shape
    num_anchors = len(anchors)

    #=============定义输出标签值:y_true=[y1,y2,y3]=============
    y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
        num_anchors//3, num_classes+5)) for l in range(3)]
    '''
    h=>416
    w=>416
    num_anchors=>9
    num_classes=>20
    y_true=>[<tf.Tensor 'input_1:0' shape=(?, 13, 13, 3, 25) dtype=float32>,
             <tf.Tensor 'input_2:0' shape=(?, 26, 26, 3, 25) dtype=float32>,
             <tf.Tensor 'input_3:0' shape=(?, 52, 52, 3, 25) dtype=float32>]
    '''
    
    #=============yolo主体模型================================
    model_body = yolo_body(image_input, num_anchors//3, num_classes)
    print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))

    #=============加载预训练模型参数==========================
    if load_pretrained and os.path.exists(weights_path):
        model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
        print('Load weights {}.'.format(weights_path))
        if freeze_body in [1, 2]:
            # Freeze darknet53 body or freeze all but 3 output layers.
            num = (185, len(model_body.layers)-3)[freeze_body-1]
            for i in range(num): model_body.layers[i].trainable = False
            print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))

    #=============定义网络loss===============================
    model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
        [*model_body.output, *y_true])

    #=============重构模型===================================
    model = Model([model_body.input, *y_true], model_loss)

    #输出最终训练模型
    return model

def create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
            weights_path='model_data/tiny_yolo_weights.h5'):
    '''create the training model, for Tiny YOLOv3'''
    K.clear_session() # get a new session
    image_input = Input(shape=(None, None, 3))
    h, w = input_shape
    num_anchors = len(anchors)

    y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], \
        num_anchors//2, num_classes+5)) for l in range(2)]

    model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes)
    print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))

    if load_pretrained and os.path.exists(weights_path):
        model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
        print('Load weights {}.'.format(weights_path))
        if freeze_body in [1, 2]:
            # Freeze the darknet body or freeze all but 2 output layers.
            num = (20, len(model_body.layers)-2)[freeze_body-1]
            for i in range(num): model_body.layers[i].trainable = False
            print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))

    model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})(
        [*model_body.output, *y_true])
    model = Model([model_body.input, *y_true], model_loss)

    return model

def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
    '''data generator for fit_generator'''
    '''数据生成器
    @param annotations_lines 训练样本，每行形如以下格式：img_file 6,1,314,262,19 40,97,121,411,4 137,36,169,109,14 180,36,216,104,14 96,39,123,103,14 
    @param batch_size 批大小,32
    @param input_shape 模型输入尺寸(416,416)
    @param anchors bounding box 锚点wh:[[10,13],  [16,30],  [33,23],  [30,61],  [62,45],  [59,119],  [116,90],  [156,198],  [373,326]]
    @param num_classes 标签类别数,20
    @return yield [image_data, *y_true], np.zeros(batch_size) => ([image_data,y1,y2,y3],loss)
                                                              => ([(32,416,416,3),(32,13,13,3,25),(32,26,26,3,25),(32,52,52,3,25),(32,)])
    '''
    n = len(annotation_lines)
    i = 0
    while True:
        image_data = []
        box_data = []
        for b in range(batch_size):
            if i==0: #一轮
                np.random.shuffle(annotation_lines) #打乱排序
            #生成一个样本数据,image.shape=>(416,416,3),box.shape=>(20,5)
            #image做数据增强、归一化处理；box与image同步做变形、偏移处理，未归一化
            image, box = get_random_data(annotation_lines[i], input_shape, random=True)  
            image_data.append(image) #图像序列
            box_data.append(box)     #bounding box序列
            i = (i+1) % n
        image_data = np.array(image_data)                                           #=>shape:(32,416,416,3)
        box_data = np.array(box_data)                                               #=>shape:(32,20,5)
        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) #=>把样本原始数据转换为训练需要的数据格式
        '''
        image_data:shape=>(32,416,416,3),模型输入图像数据
        y_true:[y1,y2,y3],               模型输入Box标签数据
            y1:shape=>(32,13,13,3,25)
            y2:shape=>(32,26,26,3,25)
            y3:shape=>(32,52,52,3,25)
        np.zeros(batch_size):            模型输出loss

        yield [image_data, *y_true], np.zeros(batch_size) => ([image_data,y1,y2,y3],loss)
                                                          => ([(32,416,416,3),(32,13,13,3,25),(32,26,26,3,25),(32,52,52,3,25),(32,)])
        '''        
        yield [image_data, *y_true], np.zeros(batch_size)

def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):
    n = len(annotation_lines)
    if n==0 or batch_size<=0: return None
    return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)


_main() #训练函数入口

Using TensorFlow backend.


--------params--------
path: /home/hjw/work/data/yolo/mytrain_model_data
start_layer: -1
lr: 0.001
batch_size: 14
epochs: 50
log_dir: /home/hjw/work/data/yolo/mytrain_model_data/log
annotation_path: /home/hjw/work/data/yolo/mytrain_model_data/train.txt
classes_path: /home/hjw/work/data/yolo/mytrain_model_data/voc_classes.txt
anchors_path: /home/hjw/work/data/yolo/mytrain_model_data/yolo_anchors.txt
input_shape: (416, 416)
num_classes: 20
class_names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
anchors: [[ 22.  33.]
 [ 46.  52.]
 [ 58. 113.]
 [106. 186.]
 [107.  76.]
 [188. 274.]
 [202. 132.]
 [374. 195.]
 [374. 337.]]
num_anchors: 9


create_model!
Create YOLOv3 model with 9 anchors and 20 classes.
Load weights /home/hjw/work/data/yolo/mytrain_model_data/yolo_weights.h5.
Freeze the first 249 layers of total 252 layers.
mdoel.input_shape

Epoch 49/50
Epoch 50/50
save model to file: /home/hjw/work/data/yolo/mytrain_model_data/yolo_weights.h5



--------params--------
path: /home/hjw/work/data/yolo/mytrain_model_data
start_layer: 0
lr: 0.0001
batch_size: 14
epochs: 200
log_dir: /home/hjw/work/data/yolo/mytrain_model_data/log
annotation_path: /home/hjw/work/data/yolo/mytrain_model_data/train.txt
classes_path: /home/hjw/work/data/yolo/mytrain_model_data/voc_classes.txt
anchors_path: /home/hjw/work/data/yolo/mytrain_model_data/yolo_anchors.txt
input_shape: (416, 416)
num_classes: 20
class_names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
anchors: [[ 22.  33.]
 [ 46.  52.]
 [ 58. 113.]
 [106. 186.]
 [107.  76.]
 [188. 274.]
 [202. 132.]
 [374. 195.]
 [374. 337.]]
num_anchors: 9


create_model!
Create YOLOv3 model with 9 anchors and 20 classes.
Load weights /home/hjw/work/data

Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200

KeyboardInterrupt: 

In [2]:
model.save_weights('model_file.h5')





NameError: name 'model' is not defined