In [10]:
# ! pip install keras==2.0.7
# ! pip install tensorflow==1.2.1

In [46]:
import os
import matplotlib.pyplot as plt
import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import PIL
import tensorflow as tf
from keras import backend as K
import keras.layers as ly
from keras.models import Model, load_model
import colorsys
import imghdr
import random

In [6]:
def yolo_filter_boxes(box_conf, boxes, box_class_probs, threshold = 0.6):
    box_scores = np.multiply(box_conf, box_class_probs)
    box_classes = K.argmax(box_scores, axis=-1)
    box_class_scores = K.max(box_scores, axis=-1)
    
    filtering_mask = box_class_scores >= threshold
    
    scores = tf.boolean_mask(box_class_scores, filtering_mask)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    classes = tf.boolean_mask(box_classes, filtering_mask)
    
    return scores, boxes, classes

In [7]:
with tf.Session() as test_a:
    box_conf = tf.random_normal([19, 19, 5, 1], mean=1, stddev=4)
    boxes = tf.random_normal([19, 19, 5, 4], mean=1, stddev=4)
    box_class_probs = tf.random_normal([19, 19, 5, 80], mean=1, stddev=4)
    scores, boxes, classes = yolo_filter_boxes(box_conf, boxes, box_class_probs, threshold=0.5)
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.shape))
    print("boxes.shape = " + str(boxes.shape))
    print("classes.shape = " + str(classes.shape))

scores[2] = 59.888718
boxes[2] = [ 6.191773  -1.8889081  2.7372346  3.96097  ]
classes[2] = 7
scores.shape = (?,)
boxes.shape = (?, 4)
classes.shape = (?,)


In [8]:
def iou(box1, box2):
    (box1_x1, box1_y1, box1_x2, box1_y2) = box1
    (box2_x1, box2_y1, box2_x2, box2_y2) = box2
    
    xi1 = max(box1[0], box2[0])
    yi1 = max(box1[1], box2[1])
    xi2 = max(box1[2], box2[2])
    yi2 = max(box1[3], box2[3])
    inter_width = max(xi2 - xi1, 0)
    inter_height = max(yi2 - yi1, 0)
    inter_area = inter_width * inter_height
    
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])    
    union_area = box1_area + box2_area - inter_area
    
    iou = inter_area / union_area
    
    return iou

In [9]:
box1 = (2, 1, 4, 3)
box2 = (1, 2, 3, 4) 
print("iou for intersecting boxes = " + str(iou(box1, box2)))

box1 = (1,1,3,3)
box2 = (2,3,3,4)
print("iou for boxes that only touch at edges = " + str(iou(box1,box2)))

iou for intersecting boxes = 1.0
iou for boxes that only touch at edges = 0.25


In [16]:
def yolo_boxes_to_corners(box_xy, box_wh):
    box_mins = box_xy - (box_wh / 2.)
    box_maxes = box_xy + (box_wh / 2.)

    return K.concatenate([
        box_mins[..., 1:2],  # y_min
        box_mins[..., 0:1],  # x_min
        box_maxes[..., 1:2],  # y_max
        box_maxes[..., 0:1]  # x_max
    ])

In [17]:
def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):
    max_boxes_tensor = K.variable(max_boxes, dtype="int32")
    K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
    
    nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold)
    
    scores = K.gather(scores, nms_indices)
    boxes = K.gather(boxes, nms_indices)
    classes = K.gather(classes, nms_indices)
    
    return scores, boxes, classes

In [18]:
with tf.Session() as test_b:
    scores = tf.random_normal([54,], mean=1, stddev=4)
    boxes = tf.random_normal([54, 4], mean=1, stddev=4)    
    classes = tf.random_normal([54,], mean=1, stddev=4)   
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.eval().shape))
    print("boxes.shape = " + str(boxes.eval().shape))
    print('classes.shape = ' + str(classes.eval().shape))

scores[2] = 8.5403595
boxes[2] = [6.99681   1.8585151 4.0391297 9.1974   ]
classes[2] = -0.42663074
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)


In [26]:
def scale_boxes(boxes, img_shape):
  height = img_shape[0]
  width = img_shape[1]
  img_dims = K.stack([height, width, height, width])
  img_dims = K.reshape(img_dims, [1, 4])
  boxes = boxes * img_dims
  return boxes

In [35]:
def yolo_eval(yolo_outputs, image_shape=(720., 1280.), max_boxes=10, scores_threshold=0.6,
             iou_threshold=0.5):
    box_conf, box_xy, box_wh, box_class_probs = yolo_outputs
    
    boxes = yolo_boxes_to_corners(box_xy, box_wh)
    
    scores, boxes, classes = yolo_filter_boxes(box_conf, boxes, box_class_probs, scores_threshold)
    
    boxes = scale_boxes(boxes, image_shape)
    
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes=max_boxes,
                                                    iou_threshold=iou_threshold)
    return scores, boxes, classes

In [36]:
with tf.Session() as test_b:
    yolo_outputs = (tf.random_normal([19, 19, 5, 1], mean=1, stddev=4),
                    tf.random_normal([19, 19, 5, 2], mean=1, stddev=4),
                    tf.random_normal([19, 19, 5, 2], mean=1, stddev=4),
                    tf.random_normal([19, 19, 5, 80], mean=1, stddev=4))
    scores, boxes, classes = yolo_eval(yolo_outputs)
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.eval().shape))
    print("boxes.shape = " + str(boxes.eval().shape))
    print("classes.shape = " + str(classes.eval().shape))

scores[2] = 152.44345
boxes[2] = [ -260.47098  7199.534    1308.687   12129.585  ]
classes[2] = 3
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)


In [37]:
sess = K.get_session()

In [38]:
def read_classes():
    with open("/content/coco_classes.txt") as f:
      class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names

In [39]:
def read_anchors():
  with open("/content/yolo_anchors.txt") as f:
    anchors = f.readline()
    anchors = [float(x) for x in anchors.split(",")]
    anchors = np.array(anchors).reshape(-1, 2)
  return anchors

In [41]:
class_names = read_classes()
anchors = read_anchors()
image_shape = (720., 1280.)

In [None]:
yolo_model = load_model("data/yolo.h5")

In [None]:
yolo_model.summary()

In [None]:
yolo_outputs = yolo_head(yolo_model.output, anchors len(class_names))

In [None]:
scores, boxes, classes = yolo_eval(yolo_outputs, img_shape)

In [43]:
def generate_colors(class_names):
    hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
    random.shuffle(colors)
    return colors

In [44]:
def preprocess_img(img_path, model_img_size):
  img_type = imghdr.what(img_path)
  img = Image.open(img_path)
  resized_img = img.resize(tuple(reversed(model_img_size)), img.BICUBIC)
  img_data = np.array(resized_img, dtype="float32")
  img_data = img_data / 255.
  img_data = np.expand_dims(img_data, 0)
  return img, img_data

In [45]:
def draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors):
    
    font = ImageFont.truetype(font='font/FiraMono-Medium.otf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
    thickness = (image.size[0] + image.size[1]) // 300

    for i, c in reversed(list(enumerate(out_classes))):
        predicted_class = class_names[c]
        box = out_boxes[i]
        score = out_scores[i]

        label = '{} {:.2f}'.format(predicted_class, score)

        draw = ImageDraw.Draw(image)
        label_size = draw.textsize(label, font)

        top, left, bottom, right = box
        top = max(0, np.floor(top + 0.5).astype('int32'))
        left = max(0, np.floor(left + 0.5).astype('int32'))
        bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
        right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
        print(label, (left, top), (right, bottom))

        if top - label_size[1] >= 0:
            text_origin = np.array([left, top - label_size[1]])
        else:
            text_origin = np.array([left, top + 1])

        # My kingdom for a good redistributable image drawing library.
        for i in range(thickness):
            draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[c])
        draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c])
        draw.text(text_origin, label, fill=(0, 0, 0), font=font)
        del draw

In [42]:
def predict(sess, img_file):
  img, img_data = preprocess_img("images_yolo/"+ img_file, model_img_size=(608, 608))
  out_scores, out_boxes, out_classes = sess.run(yolo_eval(yolo_outputs, img_shape),
                                                feed_dict={yolo_model.input: img_data, K.learning_phase(): 0})
  print("Found {} boxes for {}".format(len(out_boxes), img_file))
  colors = generate_colors(class_names)
  draw_boxes(img, out_scores, out_boxes, out_classes, class_names, colors)
  img.save(osath.join("out", img_file), quality=90)
  output_img = scipy.misc.imread(op.path.join("out", img_file))
  plt.imshow(output_img)

  return out_scores, out_boxes, out_classes

In [None]:
out_scores, out_boxes, out_lasses = predict(sess ,"test.jpg")