add yolo2

xiaohu2015 · web-flow · commit 6ea1b86c38f9 · 2018-04-05T21:21:44.000+08:00
diff --git a/ObjectDetections/yolo2/config.py b/ObjectDetections/yolo2/config.py
@@ -0,0 +1,25 @@
+"""
+Yolov2 anchors and coco classes
+"""
+
+"""
+anchors = [[0.738768, 0.874946],
+           [2.42204, 2.65704],
+           [4.30971, 7.04493],
+           [10.246, 4.59428],
+           [12.6868, 11.8741]]
+"""
+anchors = [[0.57273, 0.677385],
+           [1.87446, 2.06253],
+           [3.33843, 5.47434],
+           [7.88282, 3.52778],
+           [9.77052, 9.16828]]
+
+def read_coco_labels():
+    f = open("./data/coco_classes.txt")
+    class_names = []
+    for l in f.readlines():
+        class_names.append(l[:-1])
+    return class_names
+
+class_names = read_coco_labels()
diff --git a/ObjectDetections/yolo2/demo.py b/ObjectDetections/yolo2/demo.py
@@ -0,0 +1,50 @@
+"""
+Demo for yolov2
+"""
+
+import numpy as np
+import tensorflow as tf
+import cv2
+from PIL import Image
+
+from model import darknet
+from detect_ops import decode
+from utils import preprocess_image, postprocess, draw_detection
+from config import anchors, class_names
+
+
+input_size = (416, 416)
+image_file = "./images/car.jpg"
+image = cv2.imread(image_file)
+image_shape = image.shape[:2]
+image_cp = preprocess_image(image, input_size)
+"""
+image = Image.open(image_file)
+image_cp = image.resize(input_size, Image.BICUBIC)
+image_cp = np.array(image_cp, dtype=np.float32)/255.0
+image_cp = np.expand_dims(image_cp, 0)
+#print(image_cp)
+"""
+
+
+images = tf.placeholder(tf.float32, [1, input_size[0], input_size[1], 3])
+detection_feat = darknet(images)
+feat_sizes = input_size[0] // 32, input_size[1] // 32
+detection_results = decode(detection_feat, feat_sizes, len(class_names), anchors)
+
+checkpoint_path = "./checkpoint_dir/yolo2_coco.ckpt"
+saver = tf.train.Saver()
+with tf.Session() as sess:
+    saver.restore(sess, checkpoint_path)
+    bboxes, obj_probs, class_probs = sess.run(detection_results, feed_dict={images: image_cp})
+
+bboxes, scores, class_inds = postprocess(bboxes, obj_probs, class_probs,
+                                         image_shape=image_shape)
+img_detection = draw_detection(image, bboxes, scores, class_inds, class_names)
+cv2.imwrite("detection.jpg", img_detection)
+cv2.imshow("detection results", img_detection)
+
+cv2.waitKey(0)
+
+
+
diff --git a/ObjectDetections/yolo2/detect_ops.py b/ObjectDetections/yolo2/detect_ops.py
@@ -0,0 +1,39 @@
+"""
+Detection ops for Yolov2
+"""
+
+import tensorflow as tf
+import numpy as np
+
+
+def decode(detection_feat, feat_sizes=(13, 13), num_classes=80,
+           anchors=None):
+    """decode from the detection feature"""
+    H, W = feat_sizes
+    num_anchors = len(anchors)
+    detetion_results = tf.reshape(detection_feat, [-1, H * W, num_anchors,
+                                        num_classes + 5])
+
+    bbox_xy = tf.nn.sigmoid(detetion_results[:, :, :, 0:2])
+    bbox_wh = tf.exp(detetion_results[:, :, :, 2:4])
+    obj_probs = tf.nn.sigmoid(detetion_results[:, :, :, 4])
+    class_probs = tf.nn.softmax(detetion_results[:, :, :, 5:])
+
+    anchors = tf.constant(anchors, dtype=tf.float32)
+
+    height_ind = tf.range(H, dtype=tf.float32)
+    width_ind = tf.range(W, dtype=tf.float32)
+    x_offset, y_offset = tf.meshgrid(height_ind, width_ind)
+    x_offset = tf.reshape(x_offset, [1, -1, 1])
+    y_offset = tf.reshape(y_offset, [1, -1, 1])
+
+    # decode
+    bbox_x = (bbox_xy[:, :, :, 0] + x_offset) / W
+    bbox_y = (bbox_xy[:, :, :, 1] + y_offset) / H
+    bbox_w = bbox_wh[:, :, :, 0] * anchors[:, 0] / W * 0.5
+    bbox_h = bbox_wh[:, :, :, 1] * anchors[:, 1] / H * 0.5
+
+    bboxes = tf.stack([bbox_x - bbox_w, bbox_y - bbox_h,
+                       bbox_x + bbox_w, bbox_y + bbox_h], axis=3)
+
+    return bboxes, obj_probs, class_probs
diff --git a/ObjectDetections/yolo2/loss.py b/ObjectDetections/yolo2/loss.py
@@ -0,0 +1,86 @@
+"""
+Loss function for YOLOv2
+"""
+
+import numpy as np
+import tensorflow as tf
+
+def compute_loss(predictions, targets, anchors, scales, num_classes=20, feat_sizes=(13, 13)):
+    """
+    Compute the loss of Yolov2 for training
+    """
+    H, W = feat_sizes
+    C = num_classes
+    B = len(anchors)
+    anchors = tf.constant(anchors, dtype=tf.float32)
+    anchors = tf.reshape(anchors, [1, 1, B, 2])
+
+    sprob, sconf, snoob, scoor = scales  # the scales for different parts
+
+    _coords = targets["coords"]  # ground truth [-1, H*W, B, 4]
+    _probs = targets["probs"]    # class probability [-1, H*W, B, C] one hot
+    _confs = targets["confs"]    # 1 for object, 0 for background, [-1, H*W, B]
+
+    # decode the net output
+    predictions = tf.reshape(predictions, [-1, H, W, B, (5 + C)])
+    coords = predictions[:, :, :, :, 0:4]   # t_x, t_y, t_w, t_h
+    coords = tf.reshape(coords, [-1, H*W, B, 4])
+    coords_xy = tf.nn.sigmoid(coords[:, :, :, 0:2])  # (0, 1) relative cell top left
+    coords_wh = tf.sqrt(tf.exp(coords[:, :, :, 2:4]) * anchors /
+                        np.reshape([W, H], [1, 1, 1, 2])) # sqrt of w, h (0, 1)
+    coords = tf.concat([coords_xy, coords_wh], axis=3)  # [batch_size, H*W, B, 4]
+
+    confs = tf.nn.sigmoid(predictions[:, :, :, :, 4])  # object confidence
+    confs = tf.reshape(confs, [-1, H*W, B, 1])
+
+    probs = tf.nn.softmax(predictions[:, :, :, :, 5:])  # class probability
+    probs = tf.reshape(probs, [-1, H*W, B, C])
+
+    preds = tf.concat([coords, confs, probs], axis=3)  # [-1, H*W, B, (4+1+C)]
+
+    # match ground truths with anchors (predictions in fact)
+    # assign ground truths to the predictions with the best IOU (select 1 among 5 anchors)
+    wh = tf.pow(coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2])
+    areas = wh[:, :, :, 0] * wh[:, :, :, 1]
+    centers = coords[:, :, :, 0:2]
+    up_left, down_right = centers - (wh * 0.5), centers + (wh * 0.5)
+
+    # the ground truth
+    _wh = tf.pow(_coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2])
+    _areas = _wh[:, :, :, 0] * _wh[:, :, :, 1]
+    _centers = _coords[:, :, :, 0:2]
+    _up_left, _down_right = _centers - (_wh * 0.5), _centers + (_wh * 0.5)
+
+    # compute IOU
+    inter_upleft = tf.maximum(up_left, _up_left)
+    inter_downright = tf.minimum(down_right, _down_right)
+    inter_wh = tf.maximum(inter_downright - inter_upleft, 0.0)
+    intersects = inter_wh[:, :, :, 0] * inter_wh[:, :, :, 1]
+    ious = tf.truediv(intersects, areas + _areas - intersects)
+
+    best_iou_mask = tf.equal(ious, tf.reduce_max(ious, axis=2, keep_dims=True))
+    best_iou_mask = tf.cast(best_iou_mask, tf.float32)
+    mask = best_iou_mask * _confs  # [-1, H*W, B]
+    mask = tf.expand_dims(mask, -1)  # [-1, H*W, B, 1]
+
+    # compute weight terms
+    confs_w = snoob * (1 - mask) + sconf * mask
+    coords_w = scoor * mask
+    probs_w = sprob * mask
+    weights = tf.concat([coords_w, confs_w, probs_w], axis=3)
+
+    truths = tf.concat([_coords, tf.expand_dims(_confs, -1), _probs], 3)
+
+    loss = tf.pow(preds - truths, 2) * weights
+    loss = tf.reduce_sum(loss, axis=[1, 2, 3])
+    loss = 0.5 * tf.reduce_mean(loss)
+    return loss
+
+
+
+
+
+
+
+
+
diff --git a/ObjectDetections/yolo2/model.png b/ObjectDetections/yolo2/model.png
diff --git a/ObjectDetections/yolo2/model.py b/ObjectDetections/yolo2/model.py
@@ -0,0 +1,89 @@
+"""
+YOLOv2 implemented by Tensorflow, only for predicting
+"""
+import os
+
+import numpy as np
+import tensorflow as tf
+
+
+
+######## basic layers #######
+
+def leaky_relu(x):
+    return tf.nn.leaky_relu(x, alpha=0.1, name="leaky_relu")
+
+# Conv2d
+def conv2d(x, filters, size, pad=0, stride=1, batch_normalize=1,
+           activation=leaky_relu, use_bias=False, name="conv2d"):
+    if pad > 0:
+        x = tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]])
+    out = tf.layers.conv2d(x, filters, size, strides=stride, padding="VALID",
+                           activation=None, use_bias=use_bias, name=name)
+    if batch_normalize == 1:
+        out = tf.layers.batch_normalization(out, axis=-1, momentum=0.9,
+                                            training=False, name=name+"_bn")
+    if activation:
+        out = activation(out)
+    return out
+
+# maxpool2d
+def maxpool(x, size=2, stride=2, name="maxpool"):
+    return tf.layers.max_pooling2d(x, size, stride)
+
+# reorg layer
+def reorg(x, stride):
+    return tf.extract_image_patches(x, [1, stride, stride, 1],
+                        [1, stride, stride, 1], [1,1,1,1], padding="VALID")
+
+
+def darknet(images, n_last_channels=425):
+    """Darknet19 for YOLOv2"""
+    net = conv2d(images, 32, 3, 1, name="conv1")
+    net = maxpool(net, name="pool1")
+    net = conv2d(net, 64, 3, 1, name="conv2")
+    net = maxpool(net, name="pool2")
+    net = conv2d(net, 128, 3, 1, name="conv3_1")
+    net = conv2d(net, 64, 1, name="conv3_2")
+    net = conv2d(net, 128, 3, 1, name="conv3_3")
+    net = maxpool(net, name="pool3")
+    net = conv2d(net, 256, 3, 1, name="conv4_1")
+    net = conv2d(net, 128, 1, name="conv4_2")
+    net = conv2d(net, 256, 3, 1, name="conv4_3")
+    net = maxpool(net, name="pool4")
+    net = conv2d(net, 512, 3, 1, name="conv5_1")
+    net = conv2d(net, 256, 1, name="conv5_2")
+    net = conv2d(net, 512, 3, 1, name="conv5_3")
+    net = conv2d(net, 256, 1, name="conv5_4")
+    net = conv2d(net, 512, 3, 1, name="conv5_5")
+    shortcut = net
+    net = maxpool(net, name="pool5")
+    net = conv2d(net, 1024, 3, 1, name="conv6_1")
+    net = conv2d(net, 512, 1, name="conv6_2")
+    net = conv2d(net, 1024, 3, 1, name="conv6_3")
+    net = conv2d(net, 512, 1, name="conv6_4")
+    net = conv2d(net, 1024, 3, 1, name="conv6_5")
+    # ---------
+    net = conv2d(net, 1024, 3, 1, name="conv7_1")
+    net = conv2d(net, 1024, 3, 1, name="conv7_2")
+    # shortcut
+    shortcut = conv2d(shortcut, 64, 1, name="conv_shortcut")
+    shortcut = reorg(shortcut, 2)
+    net = tf.concat([shortcut, net], axis=-1)
+    net = conv2d(net, 1024, 3, 1, name="conv8")
+    # detection layer
+    net = conv2d(net, n_last_channels, 1, batch_normalize=0,
+                 activation=None, use_bias=True, name="conv_dec")
+    return net
+
+
+
+if __name__ == "__main__":
+    x = tf.random_normal([1, 416, 416, 3])
+    model = darknet(x)
+
+    saver = tf.train.Saver()
+    with tf.Session() as sess:
+        saver.restore(sess, "./checkpoint_dir/yolo2_coco.ckpt")
+        print(sess.run(model).shape)
+
diff --git a/ObjectDetections/yolo2/utils.py b/ObjectDetections/yolo2/utils.py