# Main function
Thie file is to train the model.    
<span style="color:red">Warning: our model is trained on multi-gpu server, if you don't have multi-gpu on your server, you should cancel the two lines, 'model = multi_gpu_model(model, gpus=4)'</span>

In [5]:
import import_ipynb
import keras
from keras.utils.np_utils import to_categorical
from keras.optimizers import Adam
from keras.models import load_model
import xml.etree.ElementTree as ET
from keras.preprocessing.image import load_img, img_to_array, array_to_img
# from keras.utils import multi_gpu_model
import numpy as np
#from IPython.display import SVG
#from keras.utils.vis_utils import model_to_dot
from datetime import datetime
import os
from settings import setting

## loading data
LOAD_IMAGE is a tool function for loading the image into binary form
readXML is a tool function for reading the label information with respect to a image
load_DATA is to load all images into a path

In [6]:
def LOAD_IMAGE(path):
    image1 = load_img(path)
    train_example = img_to_array(image1, data_format='channels_first')
    img = array_to_img(train_example, data_format='channels_first')
    train_example = train_example.transpose()
    return train_example

def readXML(f):
    mapped = {}
    tree = ET.parse(f)
    root = tree.getroot()
    for elem in root:
        if len(elem) == 0:
            mapped[elem.tag] = elem.text
        for subelem in elem:
            if len(subelem) == 0:
                mapped[subelem.tag] = subelem.text
            for sub2elem in subelem:
                if len(sub2elem) == 0:
                    mapped[sub2elem.tag] = sub2elem.text
    xmax = int(mapped['xmax'])
    xmin = int(mapped['xmin'])
    ymax = int(mapped['ymax'])
    ymin = int(mapped['ymin'])
    C_index = int(mapped['name'][1])
    yTrue = np.zeros((19, 14, 7), dtype='float32')
    yTrue[:, :] = [xmax, xmin, ymax, ymin, 0, 0, C_index]
    return yTrue

def load_DATA(srcDir):
    fileCount = len([name for name in os.listdir(srcDir) if name.endswith(".xml")])
    train_data = np.empty(shape=[fileCount, 640, 480, 3], dtype='float32')
    train_label = np.empty(shape=[fileCount, 19, 14, 7], dtype='float32')
    count = 0
    for filename in os.listdir(srcDir):
        if not filename.endswith(".xml"): continue
        count += 1
        xmlFile = srcDir + "/" + filename
        print(xmlFile)
        imgFile = xmlFile.replace(".xml", ".jpg")
        print(imgFile)
        train_data[count - 1] = LOAD_IMAGE(imgFile)
        train_label[count - 1] = readXML(xmlFile)
    train_label[:, :, :, 4] = np.arange(0, 19, 1).reshape(19, 1)
    train_label[:, :, :, 5] = np.arange(0, 14, 1).reshape(1, 14)
    return [train_data, train_label]

## main function is loading corresponding networks with specific loss function according to setting parameters

A setting format is like:  
    "setting1": {  
        "loss": "XXX",  
        "model": "DecayByBatch",    
        "weight_Classification_loss": X,  
        "weight_Object_loss": X,  
        "weight_Localization_loss": X,  
        "lr": XXX,  
        "decay": XXX,  
        "weight_file": "weight-settingX",  
        "loss_file": "losses-settingX.txt",  
        "batch_size": XXX,  
        "epochs": XXX  
    }  
Thus, with a setting parameter, the main function will automatically build up or load a model on this setting.

In [11]:
# initModel is a sign for if we already have a trained model stored in .h5 file.
def main(iniModel, setting):      
    # Choosing different architecture
    import net_2 as yolo
    import Util_V2 as U
         
    # Choosing different decay mechanism
    if setting['model'] == "DecayByBatch":
        optimizer = Adam(lr=setting["lr"])
        CallBackFun = U.lr_minimum()
    elif setting['model'] == "DecayByEpoch":
        optimizer = Adam(lr=setting["lr"])
        CallBackFun = U.DecayByEpoch()
    
    # Choosing different loss function
    if setting['loss'] == 'Loss_v2':
        lossFunction = U.Loss_v2
    elif setting['loss'] == 'Loss_v3':
        lossFunction = U.Loss_v3  
    
    # folderCount is a variable whose value represents how many folders the model are loading
    # each folder has 1000 images with .xml files - deleted
    # folder 0 has 20,000 images with .xml files
    # folder 1 has 2,410 images with .xml files 
    # isDone is a variable that represents if the model has been loaded, to avoid repeatedly loading
    folderCount = 1
    isDone = False
    for count in range(0, folderCount, 1):
        srcDir = "group2/images/" + str(count)  
        print("+++ run: "+ srcDir + " " + str(datetime.now()) + "+++")
        train_data, train_label = load_DATA(srcDir)
        
        # to transform the class representation into one-hot encoding for cross-entropy loss  
        one_hot_encoding = to_categorical(y=train_label[:, :, :, 6], num_classes=10)
        train_label = np.concatenate((train_label, one_hot_encoding), axis = -1)

        if iniModel:
            model = yolo.network_architecture(input_data=[640, 480, 3])
#             model = multi_gpu_model(model, gpus=4)
            model.compile(optimizer=optimizer, loss=lossFunction)
            iniModel = False
        elif not isDone:
            model = load_model(str(setting["weight_file"]+'.h5'), custom_objects={setting["loss"]: lossFunction})
#             model = multi_gpu_model(model, gpus=4)
        isDone = True
        
        checkpoint = keras.callbacks.ModelCheckpoint(setting["weight_file"]+'-{epoch:08d}.h5', save_weights_only=True, period=1)
        history = model.fit(x=train_data, y=train_label, validation_split=0.20, batch_size=setting["batch_size"], 
                            epochs=setting["epochs"], callbacks=[CallBackFun, checkpoint])
        
        # saving a log in case the exception occurs
        f = open(setting["loss_file"], "a+")
        f.write("\n")
        description = "+++ run: "+ srcDir + " " + str(datetime.now()) + "+++"
        f.write(description)
        f.write("\n")
        f.write(str(history.history))
        f.close()
        print("+++ saved: " + str(datetime.now()) + "+++")
        model.save(str(setting["weight_file"]+'.h5'))

In [None]:
print("+++ start: " + str(datetime.now()) + "+++")
iniModel = True
if os.path.isfile(str(setting["weight_file"]+'.h5')):
    iniModel = False
main(iniModel, setting)
print("+++ finished: " + str(datetime.now()) + "+++")

+++ start: 2018-11-14 19:10:23.115700+++
+++ run: group2/images/0 2018-11-14 19:10:23.134488+++
group2/images/0/img_88100.xml
group2/images/0/img_88100.jpg
group2/images/0/img_34092.xml
group2/images/0/img_34092.jpg
group2/images/0/img_2786.xml
group2/images/0/img_2786.jpg
group2/images/0/img_41655.xml
group2/images/0/img_41655.jpg
group2/images/0/img_96723.xml
group2/images/0/img_96723.jpg
group2/images/0/img_12368.xml
group2/images/0/img_12368.jpg
group2/images/0/img_48255.xml
group2/images/0/img_48255.jpg
group2/images/0/img_46410.xml
group2/images/0/img_46410.jpg
group2/images/0/img_83354.xml
group2/images/0/img_83354.jpg
group2/images/0/img_11431.xml
group2/images/0/img_11431.jpg
group2/images/0/img_65563.xml
group2/images/0/img_65563.jpg
group2/images/0/img_7367.xml
group2/images/0/img_7367.jpg
group2/images/0/img_98954.xml
group2/images/0/img_98954.jpg
group2/images/0/img_28506.xml
group2/images/0/img_28506.jpg
group2/images/0/img_50626.xml
group2/images/0/img_50626.jpg
group2/i

group2/images/0/img_74903.xml
group2/images/0/img_74903.jpg
group2/images/0/img_89957.xml
group2/images/0/img_89957.jpg
group2/images/0/img_94752.xml
group2/images/0/img_94752.jpg
group2/images/0/img_73917.xml
group2/images/0/img_73917.jpg
group2/images/0/img_60543.xml
group2/images/0/img_60543.jpg
group2/images/0/img_74036.xml
group2/images/0/img_74036.jpg
group2/images/0/img_89731.xml
group2/images/0/img_89731.jpg
group2/images/0/img_100776.xml
group2/images/0/img_100776.jpg
group2/images/0/img_86053.xml
group2/images/0/img_86053.jpg
group2/images/0/img_38312.xml
group2/images/0/img_38312.jpg
group2/images/0/img_8191.xml
group2/images/0/img_8191.jpg
group2/images/0/img_10154.xml
group2/images/0/img_10154.jpg
group2/images/0/img_28054.xml
group2/images/0/img_28054.jpg
group2/images/0/img_51101.xml
group2/images/0/img_51101.jpg
group2/images/0/img_2205.xml
group2/images/0/img_2205.jpg
group2/images/0/img_20748.xml
group2/images/0/img_20748.jpg
group2/images/0/img_43668.xml
group2/image

group2/images/0/img_82718.xml
group2/images/0/img_82718.jpg
group2/images/0/img_34235.xml
group2/images/0/img_34235.jpg
group2/images/0/img_18242.xml
group2/images/0/img_18242.jpg
group2/images/0/img_80967.xml
group2/images/0/img_80967.jpg
group2/images/0/img_40014.xml
group2/images/0/img_40014.jpg
group2/images/0/img_65100.xml
group2/images/0/img_65100.jpg
group2/images/0/img_63239.xml
group2/images/0/img_63239.jpg
group2/images/0/img_51434.xml
group2/images/0/img_51434.jpg
group2/images/0/img_28308.xml
group2/images/0/img_28308.jpg
group2/images/0/img_97888.xml
group2/images/0/img_97888.jpg
group2/images/0/img_53555.xml
group2/images/0/img_53555.jpg
group2/images/0/img_6471.xml
group2/images/0/img_6471.jpg
group2/images/0/img_50540.xml
group2/images/0/img_50540.jpg
group2/images/0/img_56251.xml
group2/images/0/img_56251.jpg
group2/images/0/img_43834.xml
group2/images/0/img_43834.jpg
group2/images/0/img_98532.xml
group2/images/0/img_98532.jpg
group2/images/0/img_28996.xml
group2/image

group2/images/0/img_14205.xml
group2/images/0/img_14205.jpg
group2/images/0/img_33672.xml
group2/images/0/img_33672.jpg
group2/images/0/img_2795.xml
group2/images/0/img_2795.jpg
group2/images/0/img_93870.xml
group2/images/0/img_93870.jpg
group2/images/0/img_64198.xml
group2/images/0/img_64198.jpg
group2/images/0/img_91124.xml
group2/images/0/img_91124.jpg
group2/images/0/img_25050.xml
group2/images/0/img_25050.jpg
group2/images/0/img_59936.xml
group2/images/0/img_59936.jpg
group2/images/0/img_14641.xml
group2/images/0/img_14641.jpg
group2/images/0/img_8860.xml
group2/images/0/img_8860.jpg
group2/images/0/img_36872.xml
group2/images/0/img_36872.jpg
group2/images/0/img_100503.xml
group2/images/0/img_100503.jpg
group2/images/0/img_92009.xml
group2/images/0/img_92009.jpg
group2/images/0/img_94577.xml
group2/images/0/img_94577.jpg
group2/images/0/img_59937.xml
group2/images/0/img_59937.jpg
group2/images/0/img_38824.xml
group2/images/0/img_38824.jpg
group2/images/0/img_88749.xml
group2/image

group2/images/0/img_69668.xml
group2/images/0/img_69668.jpg
group2/images/0/img_74129.xml
group2/images/0/img_74129.jpg
group2/images/0/img_70727.xml
group2/images/0/img_70727.jpg
group2/images/0/img_58499.xml
group2/images/0/img_58499.jpg
group2/images/0/img_54016.xml
group2/images/0/img_54016.jpg
group2/images/0/img_98271.xml
group2/images/0/img_98271.jpg
group2/images/0/img_95577.xml
group2/images/0/img_95577.jpg
group2/images/0/img_33972.xml
group2/images/0/img_33972.jpg
group2/images/0/img_41812.xml
group2/images/0/img_41812.jpg
group2/images/0/img_82683.xml
group2/images/0/img_82683.jpg
group2/images/0/img_67160.xml
group2/images/0/img_67160.jpg
group2/images/0/img_6740.xml
group2/images/0/img_6740.jpg
group2/images/0/img_24598.xml
group2/images/0/img_24598.jpg
group2/images/0/img_51655.xml
group2/images/0/img_51655.jpg
group2/images/0/img_63863.xml
group2/images/0/img_63863.jpg
group2/images/0/img_14305.xml
group2/images/0/img_14305.jpg
group2/images/0/img_45217.xml
group2/image

group2/images/0/img_38982.xml
group2/images/0/img_38982.jpg
group2/images/0/img_85059.xml
group2/images/0/img_85059.jpg
group2/images/0/img_75893.xml
group2/images/0/img_75893.jpg
group2/images/0/img_12279.xml
group2/images/0/img_12279.jpg
group2/images/0/img_86103.xml
group2/images/0/img_86103.jpg
group2/images/0/img_96272.xml
group2/images/0/img_96272.jpg
group2/images/0/img_91256.xml
group2/images/0/img_91256.jpg
group2/images/0/img_12612.xml
group2/images/0/img_12612.jpg
group2/images/0/img_49041.xml
group2/images/0/img_49041.jpg
group2/images/0/img_29983.xml
group2/images/0/img_29983.jpg
group2/images/0/img_52640.xml
group2/images/0/img_52640.jpg
group2/images/0/img_73366.xml
group2/images/0/img_73366.jpg
group2/images/0/img_90321.xml
group2/images/0/img_90321.jpg
group2/images/0/img_18391.xml
group2/images/0/img_18391.jpg
group2/images/0/img_93416.xml
group2/images/0/img_93416.jpg
group2/images/0/img_96710.xml
group2/images/0/img_96710.jpg
group2/images/0/img_67273.xml
group2/ima

group2/images/0/img_23109.xml
group2/images/0/img_23109.jpg
group2/images/0/img_1191.xml
group2/images/0/img_1191.jpg
group2/images/0/img_13764.xml
group2/images/0/img_13764.jpg
group2/images/0/img_18367.xml
group2/images/0/img_18367.jpg
group2/images/0/img_23655.xml
group2/images/0/img_23655.jpg
group2/images/0/img_10812.xml
group2/images/0/img_10812.jpg
group2/images/0/img_60617.xml
group2/images/0/img_60617.jpg
group2/images/0/img_97950.xml
group2/images/0/img_97950.jpg
group2/images/0/img_4672.xml
group2/images/0/img_4672.jpg
group2/images/0/img_4765.xml
group2/images/0/img_4765.jpg
group2/images/0/img_85062.xml
group2/images/0/img_85062.jpg
group2/images/0/img_44586.xml
group2/images/0/img_44586.jpg
group2/images/0/img_21400.xml
group2/images/0/img_21400.jpg
group2/images/0/img_21286.xml
group2/images/0/img_21286.jpg
group2/images/0/img_93669.xml
group2/images/0/img_93669.jpg
group2/images/0/img_20583.xml
group2/images/0/img_20583.jpg
group2/images/0/img_18439.xml
group2/images/0/

group2/images/0/img_63963.xml
group2/images/0/img_63963.jpg
group2/images/0/img_64485.xml
group2/images/0/img_64485.jpg
group2/images/0/img_64666.xml
group2/images/0/img_64666.jpg
group2/images/0/img_93555.xml
group2/images/0/img_93555.jpg
group2/images/0/img_96912.xml
group2/images/0/img_96912.jpg
group2/images/0/img_56388.xml
group2/images/0/img_56388.jpg
group2/images/0/img_7006.xml
group2/images/0/img_7006.jpg
group2/images/0/img_88744.xml
group2/images/0/img_88744.jpg
group2/images/0/img_11553.xml
group2/images/0/img_11553.jpg
group2/images/0/img_55894.xml
group2/images/0/img_55894.jpg
group2/images/0/img_84765.xml
group2/images/0/img_84765.jpg
group2/images/0/img_101726.xml
group2/images/0/img_101726.jpg
group2/images/0/img_66709.xml
group2/images/0/img_66709.jpg
group2/images/0/img_89815.xml
group2/images/0/img_89815.jpg
group2/images/0/img_51970.xml
group2/images/0/img_51970.jpg
group2/images/0/img_50823.xml
group2/images/0/img_50823.jpg
group2/images/0/img_51827.xml
group2/ima

##  An exmaple of training using 20,000 labelled images
After we tuned the hyper parameters (loss weights, learning rate, decay rate) with smaller volume of data (2000 labelled images), the same setting was applied to larger training data set (20,000 labelled images). Overall the network is designed in such a proper way that the loss decreases to a low value (loss=6). In this figure, the validation loss starts to increase at Epoch 44, where we think the model is most generalized through the whole history.

<img src="figures/loss_set7_final_onetime.png"
     alt="Markdown Monster icon"
     style="float: left; margin-right: 5px;" width="500" />