### Imports / Constants

In [11]:
import os
import yaml
import urllib
from PIL import Image
from enum import Enum
from pycocotools.coco import COCO

import xml.etree.cElementTree as ET
import glob
import argparse
import numpy as np
import json
import numpy
import cv2
from collections import OrderedDict
import scipy.misc
from skimage import measure   
from shapely.geometry import Polygon, MultiPolygon, MultiPoint
import random
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
import shutil
import pickle
import pandas as pd
import ast


BASE_DIR = '/media/dean/datastore1/datasets/BerkeleyDeepDrive/'
WORKING_DIR = os.path.join(BASE_DIR, 'scalabel/darknet/')
IMAGE_LIST_DIR = os.path.join(BASE_DIR, 'bdd100k/images/100k/val/image_list.yml')
LABEL_LIST_DIR = os.path.join(BASE_DIR, 'bdd100k/labels/100k/val/')
COCO_DIRECTORY = os.path.join(WORKING_DIR, 'data/coco')
DATACACHE = os.path.join(COCO_DIRECTORY, 'images/train2014')
img_prefix = 'COCO_train2014_0000'
DEFAULT_IMG_EXTENSION = '.jpg'

FIXED_COCO_ANNOTATIONS_FILE = os.path.join(COCO_DIRECTORY,'annotations/fixed_instances_train2014.json')
BDD10K_ANNOTATIONS_FILE = os.path.join(COCO_DIRECTORY,'annotations/bdd10k_instances_val2014.json')
SCALABEL_FORMAT_ANNOTATIONS = os.path.join(COCO_DIRECTORY,'annotations/vgglabels_scalabel_format.json')

## VGG Labeler Dataset Extraction Parameters ##
VGG_ANNS_CSV = os.path.join(BASE_DIR,'data', 'night_detections.csv')
HEADER_ROW=['filename', 'file_size', 'file_attributes', 'region_count', 'region_id', 'region_shape_attributes', 'region_attributes']


In [2]:
def maybe_download(source_url, filename):
    os.makedirs(DATACACHE, exist_ok = True)
    filepath = os.path.join(DATACACHE, filename)
    if os.path.exists(source_url) and not os.path.exists(filepath):
        # Copy image into training directory
        print('Copying File', source_url, 'to file:', filepath)
        shutil.copyfile(source_url, filepath)
    elif not os.path.exists(filepath):
        filepath, _ = urllib.request.urlretrieve(source_url, filepath)
        statinfo = os.stat(filepath)
        #print('Succesfully downloaded:', filepath, '| % d MB.\n' % int(statinfo.st_size*1e-6))
    return filepath

In [3]:
vgg_annotations = pd.read_csv(VGG_ANNS_CSV, names=HEADER_ROW, skiprows=1)
vgg_annotations.head()

Unnamed: 0,filename,file_size,file_attributes,region_count,region_id,region_shape_attributes,region_attributes
0,http://ec2-18-236-156-72.us-west-2.compute.ama...,441189,{},5,0,"{""name"":""rect"",""x"":765,""y"":515,""width"":51,""hei...","{""type"":""car""}"
1,http://ec2-18-236-156-72.us-west-2.compute.ama...,441189,{},5,1,"{""name"":""rect"",""x"":590,""y"":512,""width"":72,""hei...","{""type"":""car""}"
2,http://ec2-18-236-156-72.us-west-2.compute.ama...,441189,{},5,2,"{""name"":""rect"",""x"":507,""y"":524,""width"":25,""hei...","{""type"":""car""}"
3,http://ec2-18-236-156-72.us-west-2.compute.ama...,441189,{},5,3,"{""name"":""rect"",""x"":683,""y"":521,""width"":19,""hei...","{""type"":""car""}"
4,http://ec2-18-236-156-72.us-west-2.compute.ama...,441189,{},5,4,"{""name"":""rect"",""x"":668,""y"":520,""width"":14,""hei...","{""type"":""car""}"


In [4]:
BDD100K_LABELS_PATH = os.path.join('/media/dean/datastore1/datasets/Scripts/','BDD100k_Classes.csv')
BDD100K_HEADER_ROW = ['class', 'super-category', 'special', 'description']

In [5]:
# Get RoadCOCO Labels to Use as Ground Truth
gt_labels = pd.read_csv(BDD100K_LABELS_PATH, names=BDD100K_HEADER_ROW, skiprows=1)
gt_labels.head()

Unnamed: 0,class,super-category,special,description
0,person,person,,
1,rider,rider,,
2,car,car,,
3,truck,truck,,
4,bus,bus,,


In [6]:
# Represent Category IDs using RoadCOCO Labels
cats2ids = {}
for i, label in enumerate(gt_labels['class'].tolist()):
    cats2ids[str(label).lower()] = i
ids2cats = {i: v for v, i in cats2ids.items()}
    


# Build Categories List in MS RoadCOCO Format
categories = [] 
for label in gt_labels.as_matrix():
    category = str(label[0]).lower()
    cat_id = cats2ids[category]
    
    
    
    sup_cat = ids2cats[cats2ids[str(label[1]).lower()]]
    
    categories.append({"id": cat_id, "name": category, "supercategory":sup_cat})   
category_names = [category['name'] for category in categories]
print('Custom BDD100k categories:\n{}\n'.format('\n'.join(category_names)))

Custom BDD100k categories:
person
rider
car
truck
bus
train
motor
bike
traffic sign
traffic light





In [7]:
class Format(Enum):
    scalabel = 0
    coco = 1
    darknet = 2
    bdd = 3
    vgg = 4

In [8]:
class Dataset(object):
    def __init__(self, annotations_list, image_list = None, data_format=Format.scalabel, output_path=WORKING_DIR, pickle_file = None):
        self._images = {}
        self._annotations = {}
        
        # Check if pickle_file is None or does not exist
        if pickle_file and os.path.exists(pickle_file):
            self._pickle_file = pickle_file
            pickle_in = open(self._pickle_file,"rb")
            pickle_dict = pickle.load(pickle_in)
            self._images = pickle_dict['images']
            self._annotations = pickle_dict['annotations']
        else:
            path = os.path.normpath(image_list)
            self._pickle_file = "{}.pickle".format('_'.join(path.split(os.sep)[5:]))
        
            # Scalabel Data Handler 
            if data_format == Format.scalabel:
                with open(image_list, 'r') as stream:
                    image_data = yaml.load(stream)
                    if image_data:
                        for img in image_data:
                            img_url = img['url']
                            fname = os.path.split(img_url)[-1]
                            full_path = maybe_download(img_url, img_prefix+fname)
                            im = Image.open(full_path)
                            width, height = im.size
                            self._images[img_prefix+fname] = {'url': img_url, 'coco_path': full_path,
                                                 'width': width, 'height': height}


                # Import Labels            
                with open(annotations_list, 'r') as f:
                    data = json.load(f)

                    for ann in data:
                        fname = os.path.split(ann['url'])[-1]
                        self._annotations[img_prefix+fname] = ann['labels']
                        img_data = self._images[img_prefix+fname]
                        img_data['attributes'] = ann['attributes']
                        img_data['videoName'] = ann['videoName']
                        img_data['timestamp'] = ann['timestamp']
                        img_data['index'] = ann['index']
                        self._images[img_prefix+fname] = img_data

                        
            # BDD100K Data Handler 
            elif data_format == Format.bdd:
                with open(image_list, 'r') as stream:
                    image_data = yaml.load(stream)
                    if image_data:
                        for img in image_data:
                            img_url = img['url']
                            fname = os.path.split(img_url)[-1]
                            full_path = maybe_download(img_url, img_prefix+fname)

                            im = Image.open(full_path)
                            width, height = im.size
                            self._images[img_prefix+fname] = {'url': img_url, 'coco_path': full_path,
                                                 'width': width, 'height': height}
                    print('Image Length:', len(self._images))
                # Get labels
                img_labels = glob.glob(os.path.join(annotations_list, '*.json'))
                for i, img_label in enumerate(img_labels):
                    with open(img_label, 'r') as f:
                        data = json.load(f)
                        fname = data['name']
                        if not fname.endswith(DEFAULT_IMG_EXTENSION):
                            fname = data['name']+DEFAULT_IMG_EXTENSION

                        self._annotations[img_prefix+fname] = []
                        for img_frame in data['frames']:
                            self._annotations[img_prefix+fname].extend(img_frame['objects'])
                        
                        img_data = self._images[img_prefix+fname]
                        img_data['attributes'] = data['attributes']
                        self._images[img_prefix+fname] = img_data
               
            
            # VGG Data Handler (Legacy system)
            elif data_format == Format.vgg:
                HEADER_ROW=['filename', 'file_size', 'file_attributes', 'region_count', 'region_id', 'region_shape_attributes', 'region_attributes']
                vgg_annotations = pd.read_csv(annotations_list, names=HEADER_ROW, skiprows=1)
                img_paths = sorted(set(vgg_annotations['filename'].tolist()))

                num_imgs = len(img_paths)
                ann_idx = int(5e6)

                # loop through each image
                urlstofilepaths = {}
                img = {}
                start_idx = int(1e6)
                for idx, img_url in enumerate(img_paths, start=start_idx):
                    img = {}
                    # Download Image if not exist
                    fname = '_'.join(img_url.split('/')[-2:])
                    urlstofilepaths[img_url] = maybe_download(img_url, os.path.join(DATACACHE, img_prefix+fname))

                    # Get Image Size in Bytes
                    img_file_size =  os.stat(urlstofilepaths[img_url]).st_size
                    img['name'] = img_prefix+fname
                    img['url'] = img_url
                    img['videoName'] = ''
                    img['file_size'] = img_file_size
                    img['index'] = idx
                    img['timestamp'] = 10000                    
                    img['labels'] = []
                    img['attributes'] = {'weather': 'clear',
                                         'scene': 'highway',
                                         'timeofday': 'night'}                    
                    self._images[img_prefix+fname] = img
                    self._annotations[img_prefix+fname] = []
                    
                    for annotation in [x for x in vgg_annotations.as_matrix() if x[0].lower() == img_url.lower()]:
                        ann = {}
                        ann['id'] = ann_idx
                        ann['attributes'] = {'Occluded': False, 'Truncated': False}
                        ann['manual'] = True
                        ann['poly2d'] = None
                        ann['box3d'] = None
                        ann['box2d'] = None
                        d = ast.literal_eval(annotation[5])
        
                        if d:
                            if float(d['x']) < 0.0:
                                d['x'] = 0.0
                            if float(d['y']) < 0.0:
                                d['y'] = 0.0
                            if float(d['height']) <= 0.0:
                                d['height'] = 1.0

                            if float(d['width']) <= 0.0:
                                d['width'] = 1.0   
                
                            ann['box2d'] = {'x1': d['x'],
                                            'x2': d['x'] + d['width'],
                                            'y1': d['y'],
                                            'y2': d['y'] + d['height']}
                        
                        
                        cls = ast.literal_eval(annotation[6])
                        cat = cls['type'].lower().strip()
                        if not cat or cat == '' or cat == 'fire hydrant':
                            continue
                        elif cat == 'tlr':
                            ann['attributes']['Traffic Light Color'] = [2, 'R']
                            ann['category'] = 'traffic light'
                        elif cat == 'tlg':
                            ann['attributes']['Traffic Light Color'] = [1, 'G']
                            ann['category'] = 'traffic light'
                        elif cat == 'tla':
                            ann['attributes']['Traffic Light Color'] = [3, 'Y']
                            ann['category'] = 'traffic light'
                        elif cat == 'tlna' or cat == 'traffic light':
                            ann['attributes']['Traffic Light Color'] = [0, 'NA']
                            ann['category'] = 'traffic light'
                        elif cat == 'motorbike':
                            ann['category'] = 'motor bike'
                        elif cat == 'speedlimitsign' or cat == 'stop sign' or cat == 'cone' or cat == 'clock':
                            cat = 'traffic sign'
                        elif cat not in category_names:
                            continue
                        else: # Verify category exists
                            ann['category'] =  ids2cats[cats2ids[cat]]
                            
                        
                        img['labels'].append(ann)
                        ann_idx += 1
                    self._annotations[img_prefix+fname].extend(img['labels'])
                        
                        
            # Save object to picklefile
            pickle_dict = {'images':self._images,'annotations':self._annotations}
            with open(self._pickle_file,"wb") as pickle_out:
                pickle.dump(pickle_dict, pickle_out)            
            
        print(len(self._images))


In [9]:
example_set = Dataset(image_list = IMAGE_LIST_DIR, annotations_list = VGG_ANNS_CSV, data_format = Format.vgg)



4056


In [12]:
with open(SCALABEL_FORMAT_ANNOTATIONS, 'w') as output_json_file:
    imgs_list = list(example_set._images.values())
    json.dump(imgs_list, output_json_file)

In [None]:
night_images = [example_set._images[img] for img in example_set._images if 'night' in example_set._images[img]['attributes']['timeofday']]
print('There are {} night images in this dataset.'.format(len(night_images)))
print(night_images[:10])

In [None]:
#fixed_coco = COCO(FIXED_COCO_ANNOTATIONS_FILE)
#categories = fixed_coco.loadCats(fixed_coco.getCatIds())

category_names = [category['name'] for category in categories]
print('Custom BDD100k categories:\n{}\n'.format('\n'.join(category_names)))

In [None]:
# load and display instance annotations
image = io.imread(os.path.join(DATACACHE ,image_data['file_name']))
plt.imshow(image); plt.axis('off')
pylab.rcParams['figure.figsize'] = (128.0, 180.0)
annotation_ids = testing_coco.getAnnIds( catIds=category_ids, iscrowd=None)


annotations = testing_coco.loadAnns(annotation_ids)
print(len(annotations))


In [None]:
# Get Dataset Distribution

dataset = {}

for cat in category_ids:
    annotation_ids = testing_coco.getAnnIds(catIds=[cat])
    image_ids = testing_coco.getImgIds(catIds=[cat])
    cat_nm = testing_coco.loadCats(ids=[cat])[0]['name']
    dataset[cat] = (len(annotation_ids), len(image_ids))
    
    print(cat_nm.upper(), '| Annotations:', dataset[cat][0], ' | Images: ',  dataset[cat][1])

In [None]:
# Prepare Annotations for Darknet training
WORKING_DIRECTORY ='/media/dean/datastore/datasets/darknet_evaluate'
COCO_DIRECTORY = os.path.join(WORKING_DIRECTORY, 'data/coco')
BDD10K_COCO_ANNOTATIONS_FILE = os.path.join(COCO_DIRECTORY, 'annotations', 'bdd10k_instances_train2014.json')
IMAGES_DIRECTORY = os.path.join(COCO_DIRECTORY, 'images', 'train2014')
LABELS_DIRECTORY = os.path.join(COCO_DIRECTORY, 'labels','train2014')
CATEGORY_NAMES = os.path.join(WORKING_DIRECTORY, 'data', 'coco.bdd100k.names')


if not os.path.exists(os.path.join(COCO_DIRECTORY, 'labels/train2014/manifast.txt')):
    yolo_convert_output = os.path.join(COCO_DIRECTORY, 'labels','convert2yolo_results.txt')
    !python3 $WORKING_DIRECTORY/convert2Yolo/example.py --datasets COCO --img_path "{IMAGES_DIRECTORY}" --label "{BDD10K_COCO_ANNOTATIONS_FILE}" --convert_output_path "{LABELS_DIRECTORY}" --img_type [".jpg"] --manipast_path $LABELS_DIRECTORY --cls_list_file $CATEGORY_NAMES &>> $yolo_convert_output
        