In [1]:
import os
import yaml
import urllib
from PIL import Image
from enum import Enum
from pycocotools.coco import COCO

import xml.etree.cElementTree as ET
import glob
import argparse
import numpy as np
import json
import numpy
import cv2
from collections import OrderedDict
import scipy.misc
from skimage import measure   
from shapely.geometry import Polygon, MultiPolygon, MultiPoint
import random
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
import shutil
import pickle
import pandas as pd

BASE_DIR = '/media/dean/datastore1/datasets/BerkeleyDeepDrive/'
WORKING_DIR = os.path.join(BASE_DIR, 'scalabel/darknet/')
IMAGE_LIST_DIR = os.path.join(BASE_DIR, 'bdd100k/images/100k/val/image_list.yml')
LABEL_LIST_DIR = os.path.join(BASE_DIR, 'bdd100k/labels/bdd100k_labels_images_val.json')
COCO_DIRECTORY = os.path.join(WORKING_DIR, 'data/coco')
DATACACHE = os.path.join('/media/dean/datastore1/datasets/darknet_evaluate/data/coco/images/train2014')
img_prefix = 'COCO_train2014_0000'
DEFAULT_IMG_EXTENSION = '.jpg'

FIXED_COCO_ANNOTATIONS_FILE = os.path.join(COCO_DIRECTORY,'annotations/fixed_instances_train2014.json')
BDD10K_ANNOTATIONS_FILE = os.path.join(COCO_DIRECTORY,'annotations/bdd10k_instances_val2014.json')

In [2]:
def maybe_download(source_url, filename):
    os.makedirs(DATACACHE, exist_ok = True)
    filepath = os.path.join(DATACACHE, filename)
    if os.path.exists(source_url) and not os.path.exists(filepath):
        # Copy image into training directory
        print('Copying File', source_url, 'to file:', filepath)
        shutil.copyfile(source_url, filepath)
    elif not os.path.exists(filepath):
        filepath, _ = urllib.request.urlretrieve(source_url, filepath)
        statinfo = os.stat(filepath)
        #print('Succesfully downloaded:', filepath, '| % d MB.\n' % int(statinfo.st_size*1e-6))
    return filepath

In [3]:
class Format(Enum):
    scalabel = 0
    coco = 1
    darknet = 2
    bdd = 3

In [4]:
class Dataset(object):
    def __init__(self, image_list, label_list, data_format=Format.scalabel, output_path=WORKING_DIR, pickle_file = None):
        self._images = {}
        self._annotations = {}
        
        # Check if pickle_file is None or does not exist
        if pickle_file and os.path.exists(pickle_file):
            self._pickle_file = pickle_file
            pickle_in = open(self._pickle_file,"rb")
            pickle_dict = pickle.load(pickle_in)
            self._images = pickle_dict['images']
            self._annotations = pickle_dict['annotations']
        else:
            path = os.path.normpath(image_list)
            self._pickle_file = "{}.pickle".format('_'.join(path.split(os.sep)[5:]))
        
        
            if data_format == Format.scalabel:
                with open(image_list, 'r') as stream:
                    image_data = yaml.load(stream)
                    if image_data:
                        for img in image_data:
                            img_url = img['url']
                            fname = os.path.split(img_url)[-1]
                            full_path = maybe_download(img_url, img_prefix+fname)
                            im = Image.open(full_path)
                            width, height = im.size
                            self._images[img_prefix+fname] = {'url': img_url, 'coco_path': full_path,
                                                 'width': width, 'height': height}


                # Import Labels            
                with open(label_list, 'r') as f:
                    data = json.load(f)

                    for ann in data:
                        fname = os.path.split(ann['url'])[-1]
                        self._annotations[img_prefix+fname] = ann['labels']
                        img_data = self._images[img_prefix+fname]
                        img_data['attributes'] = ann['attributes']
                        img_data['videoName'] = ann['videoName']
                        img_data['timestamp'] = ann['timestamp']
                        img_data['index'] = ann['index']
                        self._images[img_prefix+fname] = img_data


            elif data_format == Format.bdd:
                with open(image_list, 'r') as stream:
                    image_data = yaml.load(stream)
                    if image_data:
                        for img in image_data:
                            img_url = img['url']
                            fname = os.path.split(img_url)[-1]
                            full_path = maybe_download(img_url, img_prefix+fname)

                            im = Image.open(full_path)
                            width, height = im.size
                            self._images[img_prefix+fname] = {'url': img_url, 'coco_path': full_path,
                                                 'width': width, 'height': height}
                    print('Image Length:', len(self._images))


                # Get labels
                
                # Import Labels            
                with open(label_list, 'r') as f:
                    data = json.load(f)

                    for ann in data:
                        fname = ann['name']
                        self._annotations[img_prefix+fname] = ann['labels']
                        img_data = self._images[img_prefix+fname]
                        img_data['attributes'] = ann['attributes']
                        img_data['timestamp'] = ann['timestamp']
                        self._images[img_prefix+fname] = img_data

                        
                        
#                 img_labels = glob.glob(os.path.join(label_list, '*.json'))
#                 for i, img_label in enumerate(img_labels):
#                     with open(img_label, 'r') as f:
#                         data = json.load(f)
#                         fname = data['name']
#                         if not fname.endswith(DEFAULT_IMG_EXTENSION):
#                             fname = data['name']+DEFAULT_IMG_EXTENSION

#                         self._annotations[img_prefix+fname] = []
#                         for img_frame in data['frames']:
#                             self._annotations[img_prefix+fname].extend(img_frame['objects'])
                        
#                         img_data = self._images[img_prefix+fname]
#                         img_data['attributes'] = data['attributes']
#                         self._images[img_prefix+fname] = img_data
                        
                        
            # Save object to picklefile
            pickle_dict = {'images':self._images,'annotations':self._annotations}
            with open(self._pickle_file,"wb") as pickle_out:
                pickle.dump(pickle_dict, pickle_out)            
            
        print(len(self._annotations))

In [5]:
example_set = Dataset(image_list = IMAGE_LIST_DIR, label_list = LABEL_LIST_DIR, data_format = Format.bdd)

Image Length: 10000
10000


In [6]:
night_images = [example_set._images[img] for img in example_set._images if 'night' in example_set._images[img]['attributes']['timeofday']]
print('There are {} night images in this dataset.'.format(len(night_images)))
print(night_images[:10])

There are 3929 night images in this dataset.
[{'url': '/media/dean/datastore1/datasets/BerkeleyDeepDrive/bdd100k/images/100k/val/b1c81faa-3df17267.jpg', 'coco_path': '/media/dean/datastore1/datasets/darknet_evaluate/data/coco/images/train2014/COCO_train2014_0000b1c81faa-3df17267.jpg', 'width': 1280, 'height': 720, 'attributes': {'weather': 'clear', 'scene': 'highway', 'timeofday': 'night'}, 'timestamp': 10000}, {'url': '/media/dean/datastore1/datasets/BerkeleyDeepDrive/bdd100k/images/100k/val/b1c81faa-c80764c5.jpg', 'coco_path': '/media/dean/datastore1/datasets/darknet_evaluate/data/coco/images/train2014/COCO_train2014_0000b1c81faa-c80764c5.jpg', 'width': 1280, 'height': 720, 'attributes': {'weather': 'clear', 'scene': 'highway', 'timeofday': 'night'}, 'timestamp': 10000}, {'url': '/media/dean/datastore1/datasets/BerkeleyDeepDrive/bdd100k/images/100k/val/b1ca2e5d-84cf9134.jpg', 'coco_path': '/media/dean/datastore1/datasets/darknet_evaluate/data/coco/images/train2014/COCO_train2014_0000

In [7]:
BDD100K_LABELS_PATH = os.path.join('/media/dean/datastore1/datasets/Scripts/','BDD100k_Classes.csv')
BDD100K_HEADER_ROW = ['class', 'super-category', 'special', 'description']

In [8]:
# Get RoadCOCO Labels to Use as Ground Truth
gt_labels = pd.read_csv(BDD100K_LABELS_PATH, names=BDD100K_HEADER_ROW, skiprows=1)
gt_labels.head()

Unnamed: 0,class,super-category,special,description
0,person,person,,
1,rider,rider,,
2,car,car,,
3,truck,truck,,
4,bus,bus,,


In [9]:
# Represent Category IDs using RoadCOCO Labels
cats2ids = {}
for i, label in enumerate(gt_labels['class'].tolist()):
    cats2ids[str(label).lower()] = i
ids2cats = {i: v for v, i in cats2ids.items()}
    


# Build Categories List in MS RoadCOCO Format
categories = [] 
for label in gt_labels.as_matrix():
    category = str(label[0]).lower()
    cat_id = cats2ids[category]
    
    
    
    sup_cat = ids2cats[cats2ids[str(label[1]).lower()]]
    
    categories.append({"id": cat_id, "name": category, "supercategory":sup_cat})   
print (categories)

[{'id': 0, 'name': 'person', 'supercategory': 'person'}, {'id': 1, 'name': 'rider', 'supercategory': 'rider'}, {'id': 2, 'name': 'car', 'supercategory': 'car'}, {'id': 3, 'name': 'truck', 'supercategory': 'truck'}, {'id': 4, 'name': 'bus', 'supercategory': 'bus'}, {'id': 5, 'name': 'train', 'supercategory': 'train'}, {'id': 6, 'name': 'motor', 'supercategory': 'motor'}, {'id': 7, 'name': 'bike', 'supercategory': 'bike'}, {'id': 8, 'name': 'traffic sign', 'supercategory': 'traffic sign'}, {'id': 9, 'name': 'traffic light', 'supercategory': 'traffic light'}]




In [10]:
#fixed_coco = COCO(FIXED_COCO_ANNOTATIONS_FILE)
#categories = fixed_coco.loadCats(fixed_coco.getCatIds())

category_names = [category['name'] for category in categories]
print('Custom BDD100k categories:\n{}\n'.format('\n'.join(category_names)))

Custom BDD100k categories:
person
rider
car
truck
bus
train
motor
bike
traffic sign
traffic light



In [11]:
images, anns = [], []
img_offset, ann_index = 10000001, 100000000
num_imgs = len(example_set._annotations.keys())
    
for img_id, fname in enumerate(example_set._annotations.keys()):
    width, height = example_set._images[fname]['width'], example_set._images[fname]['height'] 
    
    if not fname.startswith(img_prefix):
        fname = img_prefix+fname
    dic = {'file_name': fname, 'id': img_offset+img_id, 'height': height, 'width': width}
    images.append(dic)
    
    # xy coords: [xstart, ystart, xstop, ystop] -> bbox = [x,y,width,height]
    for annotation in [x for x in example_set._annotations[fname] if x['category'] in category_names]:
        bbox = annotation['box2d']

        if bbox:
            # xy coords: [xstart, ystart, xstop, ystop] -> bbox = [x,y,width,height]
            xstart, ystart, xstop, ystop = float(bbox['x1']),float(bbox['y1']),float(bbox['x2']),float(bbox['y2'])

            if xstart < 0:
                xstart = 0.0
            if ystart < 0:
                ystart = 0.0
            if ystop <= 0:
                ystop = 3.0
            if xstop <= 0:
                xstop = 3.0

            # Get Points from Bounding Box
            pts = []
            pts.append((xstart , xstop))
            pts.append((xstop , ystart))
            pts.append((xstop , ystop))
            pts.append((xstart , ystop))

            segmentations = []
            segmentations.append([])  
            width = xstop - xstart
            height = ystop - ystart
            bbox = (xstart, ystart, width, height)
            area = float(width*height)

            annotation = {
                'segmentation': segmentations,
                'iscrowd': 0,
                'image_id': img_offset+img_id, # Don't want to conflict with existing dataset
                'category_id': cats2ids[annotation['category']],
                'id': ann_index,
                'bbox': bbox,
                'area': area
            }
            ann_index+=1
            anns.append(annotation)
        

In [12]:
print(len(anns))

185526


In [13]:
from datetime import datetime

INFO = {
    "description": "Road Object-Detections Dataset based on MS COCO",
    "url": "https://kache.ai",
    "version": "0.0.1",
    "year": 2018,
    "contributor": "deanwebb",
    "date_created": datetime.utcnow().isoformat(' ')
}

LICENSES = [
    {
        "id": 1,
        "name": "The MIT License (MIT)",
        "url": "https://opensource.org/licenses/MIT",
        "description":  """
                        The MIT License (MIT)
                        Copyright (c) 2017 Matterport, Inc.

                        Permission is hereby granted, free of charge, to any person obtaining a copy
                        of this software and associated documentation files (the "Software"), to deal
                        in the Software without restriction, including without limitation the rights
                        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
                        copies of the Software, and to permit persons to whom the Software is
                        furnished to do so, subject to the following conditions:

                        The above copyright notice and this permission notice shall be included in
                        all copies or substantial portions of the Software.

                        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
                        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
                        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
                        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
                        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
                        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
                        THE SOFTWARE.
                        """
    }
]

coco_output = {'info': INFO, 'licenses': LICENSES, 'images':images, 'annotations':anns, 'categories': categories}
with open(BDD10K_ANNOTATIONS_FILE, 'w') as output_json_file:
    json.dump(coco_output, output_json_file)

In [14]:
testing_coco = COCO(BDD10K_ANNOTATIONS_FILE)
category_ids = testing_coco.getCatIds(catNms=list(category_names))
image_ids = testing_coco.getImgIds()
image_data = testing_coco.loadImgs(image_ids[np.random.randint(0, len(image_ids))])[0]
print(image_data)

loading annotations into memory...
Done (t=1.35s)
creating index...
index created!
{'file_name': 'COCO_train2014_0000bf42f468-f2b3301f.jpg', 'id': 10005333, 'height': 720, 'width': 1280}


In [15]:
# load and display instance annotations
image = io.imread(os.path.join(DATACACHE ,image_data['file_name']))
plt.imshow(image); plt.axis('off')
pylab.rcParams['figure.figsize'] = (128.0, 180.0)
annotation_ids = testing_coco.getAnnIds( catIds=category_ids, iscrowd=None)


annotations = testing_coco.loadAnns(annotation_ids)
print(len(annotations))


185526


In [16]:
# Get Dataset Distribution

dataset = {}

for cat in category_ids:
    annotation_ids = testing_coco.getAnnIds(catIds=[cat])
    image_ids = testing_coco.getImgIds(catIds=[cat])
    cat_nm = testing_coco.loadCats(ids=[cat])[0]['name']
    dataset[cat] = (len(annotation_ids), len(image_ids))
    
    print(cat_nm.upper(), '| Annotations:', dataset[cat][0], ' | Images: ',  dataset[cat][1])

PERSON | Annotations: 13262  | Images:  3220
RIDER | Annotations: 649  | Images:  515
CAR | Annotations: 102506  | Images:  9879
TRUCK | Annotations: 4245  | Images:  2689
BUS | Annotations: 1597  | Images:  1242
TRAIN | Annotations: 15  | Images:  14
MOTOR | Annotations: 452  | Images:  334
BIKE | Annotations: 1007  | Images:  578
TRAFFIC SIGN | Annotations: 34908  | Images:  8221
TRAFFIC LIGHT | Annotations: 26885  | Images:  5653


In [17]:
# Prepare Annotations for Darknet training
WORKING_DIRECTORY ='/media/dean/datastore1/datasets/darknet_evaluate'
COCO_DIRECTORY = os.path.join(WORKING_DIRECTORY, 'data/coco')
BDD10K_COCO_ANNOTATIONS_FILE = os.path.join(COCO_DIRECTORY, 'annotations', 'bdd10k_instances_train2014.json')
IMAGES_DIRECTORY = os.path.join(COCO_DIRECTORY, 'images', 'train2014')
LABELS_DIRECTORY = os.path.join(COCO_DIRECTORY, 'labels','train2014')
CATEGORY_NAMES = os.path.join(WORKING_DIRECTORY, 'data', 'coco.bdd100k.names')


if not os.path.exists(os.path.join(COCO_DIRECTORY, 'labels/train2014/manifast.txt')):
    yolo_convert_output = os.path.join(COCO_DIRECTORY, 'labels','convert2yolo_results.txt')
    !python3 $WORKING_DIRECTORY/convert2Yolo/example.py --datasets COCO --img_path "{IMAGES_DIRECTORY}" --label "{BDD10K_COCO_ANNOTATIONS_FILE}" --convert_output_path "{LABELS_DIRECTORY}" --img_type [".jpg"] --manipast_path $LABELS_DIRECTORY --cls_list_file $CATEGORY_NAMES &>> $yolo_convert_output
        