In [1]:
import sys
sys.path.append('../')

In [2]:
from dvalib import yolo
import argparse
import colorsys
import imghdr
import os
import random
import numpy as np
from keras import backend as K
from keras.models import load_model
from PIL import Image
from dvalib.yolo.yad2k.models.keras_yolo import yolo_eval, yolo_head

Using TensorFlow backend.


In [6]:
path = '/Users/aub3/Dropbox/DeepVideoAnalytics/dvalib/yolo/'
args = {
    'model_path':'{}/model_data/yolo.h5'.format(path),
    'anchors_path': '{}/model_data/yolo_anchors.txt'.format(path),
    'classes_path': '{}/model_data/coco_classes.txt'.format(path),
    'test_path': '{}/images'.format(path),
    'output_path': '{}/images/out'.format(path),
    'score_threshold': 0.3,
    'iou_threshold': 0.5,
}

In [7]:
model_path = os.path.expanduser(args['model_path'])
anchors_path = os.path.expanduser(args['anchors_path'])
classes_path = os.path.expanduser(args['classes_path'])
test_path = os.path.expanduser(args['test_path'])
output_path = os.path.expanduser(args['output_path'])
if not os.path.exists(output_path):
    print('Creating output path {}'.format(output_path))
    os.mkdir(output_path)
sess = K.get_session()
with open(classes_path) as f:
    class_names = f.readlines()
class_names = [c.strip() for c in class_names]
with open(anchors_path) as f:
    anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    anchors = np.array(anchors).reshape(-1, 2)
yolo_model = load_model(model_path)
num_classes = len(class_names)
num_anchors = len(anchors)
# TODO: Assumes dim ordering is channel last
model_output_channels = yolo_model.layers[-1].output_shape[-1]
assert model_output_channels == num_anchors * (num_classes + 5), \
    'Mismatch between model and given anchor and class sizes. ' \
    'Specify matching anchors and classes with --anchors_path and ' \
    '--classes_path flags.'
print('{} model, anchors, and classes loaded.'.format(model_path))

# Check if model is fully convolutional, assuming channel last order.
model_image_size = yolo_model.layers[0].input_shape[1:3]
is_fixed_size = model_image_size != (None, None)
hsv_tuples = [(x / len(class_names), 1., 1.)for x in range(len(class_names))]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),colors))
random.seed(10101)  # Fixed seed for consistent colors across runs.
random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
random.seed(None)  # Reset seed to default.
yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))
input_image_shape = K.placeholder(shape=(2, ))
boxes, scores, classes = yolo_eval(yolo_outputs,input_image_shape,score_threshold=args['score_threshold'],iou_threshold=args['iou_threshold'])

/Users/aub3/Dropbox/DeepVideoAnalytics/dvalib/yolo//model_data/yolo.h5 model, anchors, and classes loaded.


In [8]:
for image_file in os.listdir(test_path):
    try:
        image_type = imghdr.what(os.path.join(test_path, image_file))
        if not image_type:
            continue
    except:
        continue

    image = Image.open(os.path.join(test_path, image_file))
    if is_fixed_size:  # TODO: When resizing we can use minibatch input.
        resized_image = image.resize(
            tuple(reversed(model_image_size)), Image.BICUBIC)
        image_data = np.array(resized_image, dtype='float32')
    else:
        # Due to skip connection + max pooling in YOLO_v2, inputs must have
        # width and height as multiples of 32.
        new_image_size = (image.width - (image.width % 32),
                          image.height - (image.height % 32))
        resized_image = image.resize(new_image_size, Image.BICUBIC)
        image_data = np.array(resized_image, dtype='float32')
        print(image_data.shape)
    image_data /= 255.
    image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
    out_boxes, out_scores, out_classes = sess.run([boxes, scores, classes],feed_dict={yolo_model.input: image_data,input_image_shape: [image.size[1], image.size[0]],K.learning_phase(): 0})
    print('Found {} boxes for {}'.format(len(out_boxes), image_file))
    thickness = (image.size[0] + image.size[1]) // 300
    for i, c in reversed(list(enumerate(out_classes))):
        predicted_class = class_names[c]
        box = out_boxes[i]
        score = out_scores[i]
        label = '{} {:.2f}'.format(predicted_class, score)
        top, left, bottom, right = box
        top = max(0, np.floor(top + 0.5).astype('int32'))
        left = max(0, np.floor(left + 0.5).astype('int32'))
        bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
        right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
        print(label, (left, top), (right, bottom))
    image.save(os.path.join(output_path, image_file), quality=90)
sess.close()

Found 3 boxes for dog.jpg
('truck 0.79', (441, 80), (703, 171))
('dog 0.79', (123, 222), (327, 528))
('bicycle 0.83', (128, 127), (570, 435))
Found 1 boxes for eagle.jpg
('bird 0.90', (144, 89), (618, 445))
Found 2 boxes for giraffe.jpg
('zebra 0.80', (290, 199), (430, 451))
('giraffe 0.88', (172, 31), (452, 420))
Found 5 boxes for horses.jpg
('horse 0.42', (98, 176), (463, 349))
('horse 0.69', (237, 201), (426, 370))
('horse 0.71', (0, 197), (153, 384))
('horse 0.83', (436, 211), (603, 348))
('horse 0.90', (0, 191), (335, 414))
Found 3 boxes for person.jpg
('dog 0.82', (61, 264), (200, 351))
('horse 0.86', (404, 133), (602, 344))
('person 0.87', (190, 94), (276, 380))
Found 0 boxes for scream.jpg
