In [1]:
%%writefile yolov3/configs.py
#================================================================
#
#   File name   : configs.py
#   Author      : PyLessons
#   Created date: 2020-06-04
#   Website     : https://pylessons.com/
#   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
#   Description : yolov3 configuration file
#
#================================================================

# YOLO options
YOLO_DARKNET_WEIGHTS        = "model_data/yolov3.weights"
YOLO_DARKNET_TINY_WEIGHTS   = "model_data/yolov3-tiny.weights"
YOLO_COCO_CLASSES           = "model_data/coco.names"
YOLO_STRIDES                = [8, 16, 32]
YOLO_IOU_LOSS_THRESH        = 0.5
YOLO_ANCHOR_PER_SCALE       = 3
YOLO_MAX_BBOX_PER_SCALE     = 100
YOLO_INPUT_SIZE             = 416
#YOLO_INPUT_SIZE             = 800
YOLO_ANCHORS                = [[[10,  13], [16,   30], [33,   23]],
                               [[30,  61], [62,   45], [59,  119]],
                               [[116, 90], [156, 198], [373, 326]]]
# Train options
TRAIN_YOLO_TINY             = False
TRAIN_SAVE_BEST_ONLY        = True # saves only best model according validation loss (True recommended)
TRAIN_SAVE_CHECKPOINT       = False # saves all best validated checkpoints in training process (may require a lot disk space) (False recommended)
#TRAIN_CLASSES               = "mnist/mnist.names"
TRAIN_CLASSES               = "dataset/LEs/LEs_name.txt"
#TRAIN_ANNOT_PATH            = "mnist/mnist_train.txt"
TRAIN_ANNOT_PATH            = "dataset/LEs/LEs_train.txt"
TRAIN_LOGDIR                = "log"
TRAIN_CHECKPOINTS_FOLDER    = "checkpoints"
TRAIN_MODEL_NAME            = "yolov3_LE800"
TRAIN_LOAD_IMAGES_TO_RAM    = False # faster training, but need more RAM
#TRAIN_BATCH_SIZE            = 8
TRAIN_BATCH_SIZE            = 2 
TRAIN_INPUT_SIZE            = 416
TRAIN_DATA_AUG              = True
TRAIN_TRANSFER              = False
TRAIN_FROM_CHECKPOINT       = False # "checkpoints/yolov3_custom_2"
TRAIN_LR_INIT               = 1e-4
TRAIN_LR_END                = 1e-6
TRAIN_WARMUP_EPOCHS         = 2
TRAIN_EPOCHS                = 100

# TEST options
TEST_ANNOT_PATH             = "dataset/LEs/LEs_test.txt"
TEST_BATCH_SIZE             = 4
TEST_INPUT_SIZE             = 416
TEST_DATA_AUG               = False
TEST_DECTECTED_IMAGE_PATH   = ""
TEST_SCORE_THRESHOLD        = 0.3
TEST_IOU_THRESHOLD          = 0.45


#YOLOv3-TINY WORKAROUND
if TRAIN_YOLO_TINY:
    YOLO_STRIDES            = [16, 32, 64]    
    YOLO_ANCHORS            = [[[10,  14], [23,   27], [37,   58]],
                               [[81,  82], [135, 169], [344, 319]],
                               [[0,    0], [0,     0], [0,     0]]]


Overwriting yolov3/configs.py


In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import shutil
import numpy as np
import tensorflow as tf
#from tensorflow.keras.utils import plot_model
from yolov3.dataset import Dataset
from yolov3.yolov3 import Create_Yolov3, YOLOv3, decode, compute_loss
from yolov3.utils import load_yolo_weights#, load_tiny_yolo_weights
from yolov3.configs import *


In [4]:
TRAIN_CLASSES, TRAIN_INPUT_SIZE

('dataset/LEs/LEs_name.txt', 416)

In [9]:
# !python train.py

2020-07-10 08:05:50.633349: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2020-07-10 08:05:50.729147: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fb152cb8750 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-07-10 08:05:50.729175: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
Traceback (most recent call last):
  File "train.py", line 173, in <module>
    main()
  File "train.py", line 38, in main
    trainset = Dataset('train')
  File "/Users/home/Documents/Research/repos/TensorFlow-2.x-YOLOv3/yolov3/dataset.py", line 38, in __init__
    self.annotations = self.load_annotations(dataset_type)
  File "/Users/home/Documents/Research/repos/TensorFlow-2.x-YOLOv3/yolov3/dataset.py", line 63, in load_annotations
    raise KeyError("%s does not exist ... " %image_path)
KeyError:

In [5]:
Darknet_weights = YOLO_DARKNET_WEIGHTS
if TRAIN_YOLO_TINY:
    TRAIN_MODEL_NAME = TRAIN_MODEL_NAME+"_Tiny"
    Darknet_weights = YOLO_DARKNET_TINY_WEIGHTS

gpus = tf.config.experimental.list_physical_devices('GPU')
if len(gpus) > 0:
    try: tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError: pass

if os.path.exists(TRAIN_LOGDIR): shutil.rmtree(TRAIN_LOGDIR)


In [6]:
trainset = Dataset('train')
testset = Dataset('test')

steps_per_epoch = len(trainset)
global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
warmup_steps = TRAIN_WARMUP_EPOCHS * steps_per_epoch
total_steps = TRAIN_EPOCHS * steps_per_epoch


In [7]:
if TRAIN_TRANSFER:
    Darknet = Create_Yolov3(input_size=YOLO_INPUT_SIZE)
    load_yolo_weights(Darknet, Darknet_weights) # use darknet weights
    #load_tiny_yolo_weights(Darknet, Darknet_weights) # use darknet weights

yolo = Create_Yolov3(input_size=YOLO_INPUT_SIZE, training=True, CLASSES=TRAIN_CLASSES)
if TRAIN_FROM_CHECKPOINT:
    try:
        yolo.load_weights(TRAIN_FROM_CHECKPOINT)
    except ValueError:
        print("Shapes are incompatible, transfering Darknet weights")
        TRAIN_FROM_CHECKPOINT = False

if TRAIN_TRANSFER and not TRAIN_FROM_CHECKPOINT:
    for i, l in enumerate(Darknet.layers):
        layer_weights = l.get_weights()
        if layer_weights != []:
            try:
                yolo.layers[i].set_weights(layer_weights)
            except:
                print("skipping", yolo.layers[i].name)


In [8]:
optimizer = tf.keras.optimizers.Adam()


In [9]:
writer = tf.summary.create_file_writer(TRAIN_LOGDIR)

def train_step(image_data, target):
    with tf.GradientTape() as tape:
        pred_result = yolo(image_data, training=True)
        giou_loss=conf_loss=prob_loss=0

        # optimizing process
        grid = 3 if not TRAIN_YOLO_TINY else 2
        for i in range(grid):
            conv, pred = pred_result[i*2], pred_result[i*2+1]
            loss_items = compute_loss(pred, conv, *target[i], i, CLASSES=TRAIN_CLASSES)
            giou_loss += loss_items[0]
            conf_loss += loss_items[1]
            prob_loss += loss_items[2]

        total_loss = giou_loss + conf_loss + prob_loss

        gradients = tape.gradient(total_loss, yolo.trainable_variables)
        optimizer.apply_gradients(zip(gradients, yolo.trainable_variables))

        # update learning rate
        # about warmup: https://arxiv.org/pdf/1812.01187.pdf&usg=ALkJrhglKOPDjNt6SHGbphTHyMcT0cuMJg
        global_steps.assign_add(1)
        if global_steps < warmup_steps:# and not TRAIN_TRANSFER:
            lr = global_steps / warmup_steps * TRAIN_LR_INIT
        else:
            lr = TRAIN_LR_END + 0.5 * (TRAIN_LR_INIT - TRAIN_LR_END)*(
                (1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi)))
        optimizer.lr.assign(lr.numpy())

        # writing summary data
        with writer.as_default():
            tf.summary.scalar("lr", optimizer.lr, step=global_steps)
            tf.summary.scalar("loss/total_loss", total_loss, step=global_steps)
            tf.summary.scalar("loss/giou_loss", giou_loss, step=global_steps)
            tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps)
            tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps)
        writer.flush()
        
    return global_steps.numpy(), optimizer.lr.numpy(), giou_loss.numpy(), conf_loss.numpy(), prob_loss.numpy(), total_loss.numpy()


In [10]:
validate_writer = tf.summary.create_file_writer(TRAIN_LOGDIR)
def validate_step(image_data, target):
    with tf.GradientTape() as tape:
        pred_result = yolo(image_data, training=False)
        giou_loss=conf_loss=prob_loss=0

        # optimizing process
        grid = 3 if not TRAIN_YOLO_TINY else 2
        for i in range(grid):
            conv, pred = pred_result[i*2], pred_result[i*2+1]
            loss_items = compute_loss(pred, conv, *target[i], i, CLASSES=TRAIN_CLASSES)
            giou_loss += loss_items[0]
            conf_loss += loss_items[1]
            prob_loss += loss_items[2]

        total_loss = giou_loss + conf_loss + prob_loss
        
    return giou_loss.numpy(), conf_loss.numpy(), prob_loss.numpy(), total_loss.numpy()

In [11]:
# main
best_val_loss = 1000 # should be large at start
for epoch in range(TRAIN_EPOCHS):
    for image_data, target in trainset:
        results = train_step(image_data, target)
        cur_step = results[0]%steps_per_epoch
        print("epoch:{:2.0f} step:{:5.0f}/{}, lr:{:.6f}, giou_loss:{:7.2f}, conf_loss:{:7.2f}, prob_loss:{:7.2f}, total_loss:{:7.2f}"
              .format(epoch, cur_step, steps_per_epoch, results[1], results[2], results[3], results[4], results[5]))

    if len(testset) == 0:
        print("configure TEST options to validate model")
        yolo.save_weights(os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME))
        continue
    
    count, giou_val, conf_val, prob_val, total_val = 0., 0, 0, 0, 0
    for image_data, target in testset:
        results = validate_step(image_data, target)
        count += 1
        giou_val += results[0]
        conf_val += results[1]
        prob_val += results[2]
        total_val += results[3]
    # writing validate summary data
    with validate_writer.as_default():
        tf.summary.scalar("validate_loss/total_val", total_val/count, step=epoch)
        tf.summary.scalar("validate_loss/giou_val", giou_val/count, step=epoch)
        tf.summary.scalar("validate_loss/conf_val", conf_val/count, step=epoch)
        tf.summary.scalar("validate_loss/prob_val", prob_val/count, step=epoch)
    validate_writer.flush()
        
    print("\n\ngiou_val_loss:{:7.2f}, conf_val_loss:{:7.2f}, prob_val_loss:{:7.2f}, total_val_loss:{:7.2f}\n\n".
          format(giou_val/count, conf_val/count, prob_val/count, total_val/count))

    if TRAIN_SAVE_CHECKPOINT and not TRAIN_SAVE_BEST_ONLY:
        yolo.save_weights(os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME+"_val_loss_{:7.2f}".format(total_val/count)))
    if TRAIN_SAVE_BEST_ONLY and best_val_loss>total_val/count:
        yolo.save_weights(os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME))
        best_val_loss = total_val/count
    if not TRAIN_SAVE_BEST_ONLY and not TRAIN_SAVE_CHECKPOINT:
        yolo.save_weights(os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME))


epoch: 0 step:    2/6, lr:0.000017, giou_loss:  50.20, conf_loss:1838.61, prob_loss:  58.16, total_loss:1946.98
epoch: 0 step:    3/6, lr:0.000025, giou_loss:  37.20, conf_loss:1716.71, prob_loss:  41.22, total_loss:1795.13
epoch: 0 step:    4/6, lr:0.000033, giou_loss:  48.72, conf_loss:1692.52, prob_loss:  58.57, total_loss:1799.81
epoch: 0 step:    5/6, lr:0.000042, giou_loss:  70.51, conf_loss:1601.04, prob_loss:  82.68, total_loss:1754.23
epoch: 0 step:    0/6, lr:0.000050, giou_loss:  48.39, conf_loss:1526.08, prob_loss:  53.62, total_loss:1628.08
epoch: 0 step:    1/6, lr:0.000058, giou_loss:  50.93, conf_loss:1493.49, prob_loss:  59.10, total_loss:1603.51


giou_val_loss:  27.11, conf_val_loss:1830.61, prob_val_loss:  32.78, total_val_loss:1890.50


epoch: 1 step:    2/6, lr:0.000067, giou_loss:  62.22, conf_loss:1476.86, prob_loss:  73.52, total_loss:1612.60
epoch: 1 step:    3/6, lr:0.000075, giou_loss:  54.02, conf_loss:1435.04, prob_loss:  65.05, total_loss:1554.11
epoch: 1

KeyboardInterrupt: 

In [36]:
image, target = next(trainset)  

In [38]:
image.shape

(4, 416, 416, 3)

In [44]:
target[0][0].shape

(4, 52, 52, 3, 7)