In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
def parse_model_config(path):
    """Parses the yolo-v3 layer configuration file and returns module definitions"""
    file = open(path, 'r')
    lines = file.read().split('\n')
    lines = [x for x in lines if x and not x.startswith('#')]
    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
    module_defs = []
    for line in lines:
        if line.startswith('['): # This marks the start of a new block
            module_defs.append({})
            module_defs[-1]['type'] = line[1:-1].rstrip()
            if module_defs[-1]['type'] == 'convolutional':
                module_defs[-1]['batch_normalize'] = 0
        else:
            key, value = line.split("=")
            value = value.strip()
            module_defs[-1][key.rstrip()] = value.strip()

    return module_defs


def save_model_config(model_defs, path):
    """Saves the yolo-v3 layer configuration file"""
    with open(path, 'w') as writer:
        
        for block in model_defs:
            writer.write('['+ block['type'] +']'+'\n')
            [writer.write(str(k)+'='+str(v)+'\n') for k,v in block.items() if k != 'type']
            writer.write('\n')
    return path

            
def save_data_config(data_config, path):
    """Saves the yolo-v3 data configuration file"""
    with open(path, 'w') as writer:
        [writer.write(str(k)+'='+str(v)+'\n') for k,v in data_config.items()]
    return path


def inject_model_config(dataset, model_config, hyperparams):
    for i, block in enumerate(model_config):        
        if block['type'] == 'net':
            block['learning_rate'] = hyperparams['lr']
            block['batch'] = hyperparams['batch']
            block['subdivisions'] = hyperparams['subdivisions']
            block['burn_in'] = len(dataset._images.items())//(hyperparams['gpus'] * hyperparams['batch'])
            block['max_batches'] = len(dataset._images.items())//(hyperparams['gpus'] * hyperparams['batch']) * hyperparams['epochs']
        elif block['type'] == 'yolo':
            block['classes'] = len(dataset.category_names)
            model_config[i-1]['filters'] = (len(dataset.category_names)+5)*3
    return model_config


def inject_data_config(dataset, data_config):
    data_config['train'] = dataset.darknet_manifast
    data_config['classes'] = len(dataset.category_names)
    ## TODO: Add Validation Set
    data_config['valid'] = dataset.darknet_manifast
    data_config['names'] = dataset.names_config
    backup_path = os.path.abspath(os.path.join(dataset.output_path, os.pardir, 'backup'))
    os.makedirs(backup_path, exist_ok = True)
    data_config['backup'] = os.path.abspath(os.path.join(dataset.output_path, os.pardir, 'backup'))
    num_gpus = int(dataset.parse_nvidia_smi()['Attached GPUs'])
    data_config['gpus'] = ','.join(str(i) for i in range(num_gpus))
    
    
    return data_config
            

def parse_data_config(path):
    """Parses the data configuration file"""
    options = dict()
    options['gpus'] = '0,1'
    with open(path, 'r') as fp:
        lines = fp.readlines()
    for line in lines:
        line = line.strip()
        if line == '' or line.startswith('#'):
            continue
        key, value = line.split('=')
        options[key.strip()] = value.strip()
    return options



In [3]:
import os
import yaml
import urllib
from PIL import Image
from enum import Enum
from pycocotools.coco import COCO
import xml.etree.cElementTree as ET
import glob
import argparse
import numpy as np
import json
import numpy
import cv2
from collections import OrderedDict
import scipy.misc
from skimage import measure   
import random
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
import shutil
import pickle
import pandas as pd
from subprocess import Popen,PIPE,STDOUT,call 


from utils import datasets

BDD100K_DIRECTORY = os.path.join('/media/dean/datastore1/datasets/BerkeleyDeepDrive', 'bdd100k')
WORKING_DIRECTORY = '/media/dean/datastore1/datasets/darknet'
DATA_DIRECTORY = os.path.join(WORKING_DIRECTORY, 'data')
COCO_DIRECTORY = os.path.join(DATA_DIRECTORY, 'coco')

TRAINERS_DIRECTORY = os.path.join(WORKING_DIRECTORY, 'trainers')
ANNOTATIONS_FILE = os.path.join(BDD100K_DIRECTORY, 'labels/bdd100k_labels_images_val.json')

BASE_DATA_CONFIG = os.path.join(WORKING_DIRECTORY, 'cfg', 'bdd100k.data')
BASE_MODEL_CONFIG = os.path.join(WORKING_DIRECTORY, 'cfg', 'yolov3-bdd100k.cfg')

In [None]:
# For Run in Training Runs    
trainers = os.listdir(TRAINERS_DIRECTORY)
all_training_runs = []

for trainer in trainers:
    ## Prepare Dataset ##
    bdd_set = datasets.DataFormatter(annotations_list = ANNOTATIONS_FILE, input_format = datasets.Format.bdd,
                                     #pickle_file = 'BerkeleyDeepDrive_bdd100k_labels_kache_bdd100k_labels_images_train.json.pickle',
                                     output_path = os.path.join(TRAINERS_DIRECTORY,trainer,'data'),
                                     trainer_prefix = 'COCO_val2014_0000', 
                                     s3_bucket = 'kache-scalabel/bdd100k/images/100k/val/')
    bdd_set.export(datasets.Format.darknet)

    print('Initiating Trainer:', os.path.join(TRAINERS_DIRECTORY,trainer))
    # Grab hyperparameters from filename
    tokens = trainer.split('_')
    hyperparams = {'name': tokens[0], 
                   'gpus': int(tokens[1].replace('gpu','')),
                   'lr': float('0.'+tokens[2].replace('lr','')),
                   'batch': int(tokens[3].replace('bat','')),
                   'subdivisions': int(tokens[4].replace('sd','')),
                   'epochs': int(tokens[5].replace('ep',''))}
    
    
    # Update data config
    data_config = parse_data_config(BASE_DATA_CONFIG)
    print(data_config,'\n')
    os.makedirs(os.path.join(TRAINERS_DIRECTORY,trainer, 'data'), exist_ok = True)
    data_config = inject_data_config(bdd_set, data_config)
    print(data_config,'\n')
    data_cfg_path = save_data_config(data_config, os.path.join(TRAINERS_DIRECTORY,trainer, 'cfg', os.path.split(BASE_DATA_CONFIG)[-1]))
    
    # Update model config
    model_config = parse_model_config(BASE_MODEL_CONFIG)
    model_config = inject_model_config(bdd_set, model_config, hyperparams)
    os.makedirs(os.path.join(TRAINERS_DIRECTORY,trainer, 'cfg'), exist_ok = True)
    model_cfg_path = save_model_config(model_config, os.path.join(TRAINERS_DIRECTORY,trainer, 'cfg', os.path.split(BASE_MODEL_CONFIG)[-1]))
    
    ##TODO: Download Darknet 53 weights into backup folder
    
    ## Run Training ##
    TRAINER_DIR = os.path.join(TRAINERS_DIRECTORY,trainer)
    train_results_file = os.path.join(TRAINER_DIR, 'training_results.txt')
    CURRENT_WEIGHT = os.path.join(TRAINERS_DIRECTORY,trainer, 'darknet53.conv.74')
    num_gpus = data_config['gpus']
    darknet_train = "cd {} && ./darknet detector train {} {} {} -gpus {} | tee -a {}".format(WORKING_DIRECTORY,
                                data_cfg_path, model_cfg_path, CURRENT_WEIGHT, num_gpus, train_results_file)
    print('Initializing Training with the following parameters:','\n', darknet_train)
    proc=Popen(darknet_train, shell=True, stdout=PIPE)

    with open(train_results_file+'.backup',"w+") as f:
        f.write(proc.communicate()[0].decode("utf-8"))

Length of COCO Images 10000
Length of Coco Annotations: 185526
Initiating Trainer: /media/dean/datastore1/datasets/darknet/trainers/run6_1gpu_001lr_64bat_16sd_600ep
{'gpus': '0,1', 'classes': '10', 'train': '/media/anthony/deans_data/darknet/data/coco/labels/train2014/manifast.txt', 'valid': '/media/anthony/deans_data/darknet/data/coco/labels/train2014/image_list.txt', 'names': '/media/anthony/deans_data/darknet/data/coco.bdd100k.names', 'backup': '/media/anthony/deans_data/darknet/backup', 'eval': 'coco'} 

{'gpus': '0', 'classes': 12, 'train': '/media/dean/datastore1/datasets/darknet/trainers/run6_1gpu_001lr_64bat_16sd_600ep/data/coco/labels/val2014/manifast.txt', 'valid': '/media/dean/datastore1/datasets/darknet/trainers/run6_1gpu_001lr_64bat_16sd_600ep/data/coco/labels/val2014/manifast.txt', 'names': '/media/dean/datastore1/datasets/darknet/trainers/run6_1gpu_001lr_64bat_16sd_600ep/cfg/COCO_val2014_0000.names', 'backup': '/media/dean/datastore1/datasets/darknet/trainers/run6_1gpu_0

In [None]:
bdd_set.export(datasets.Format.scalabel)