Skip to content

Commit 6ea1b86

Browse files
authored
add yolo2
1 parent 02d7162 commit 6ea1b86

File tree

7 files changed

+452
-0
lines changed

7 files changed

+452
-0
lines changed

ObjectDetections/yolo2/config.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"""
2+
Yolov2 anchors and coco classes
3+
"""
4+
5+
"""
6+
anchors = [[0.738768, 0.874946],
7+
[2.42204, 2.65704],
8+
[4.30971, 7.04493],
9+
[10.246, 4.59428],
10+
[12.6868, 11.8741]]
11+
"""
12+
anchors = [[0.57273, 0.677385],
13+
[1.87446, 2.06253],
14+
[3.33843, 5.47434],
15+
[7.88282, 3.52778],
16+
[9.77052, 9.16828]]
17+
18+
def read_coco_labels():
19+
f = open("./data/coco_classes.txt")
20+
class_names = []
21+
for l in f.readlines():
22+
class_names.append(l[:-1])
23+
return class_names
24+
25+
class_names = read_coco_labels()

ObjectDetections/yolo2/demo.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""
2+
Demo for yolov2
3+
"""
4+
5+
import numpy as np
6+
import tensorflow as tf
7+
import cv2
8+
from PIL import Image
9+
10+
from model import darknet
11+
from detect_ops import decode
12+
from utils import preprocess_image, postprocess, draw_detection
13+
from config import anchors, class_names
14+
15+
16+
input_size = (416, 416)
17+
image_file = "./images/car.jpg"
18+
image = cv2.imread(image_file)
19+
image_shape = image.shape[:2]
20+
image_cp = preprocess_image(image, input_size)
21+
"""
22+
image = Image.open(image_file)
23+
image_cp = image.resize(input_size, Image.BICUBIC)
24+
image_cp = np.array(image_cp, dtype=np.float32)/255.0
25+
image_cp = np.expand_dims(image_cp, 0)
26+
#print(image_cp)
27+
"""
28+
29+
30+
images = tf.placeholder(tf.float32, [1, input_size[0], input_size[1], 3])
31+
detection_feat = darknet(images)
32+
feat_sizes = input_size[0] // 32, input_size[1] // 32
33+
detection_results = decode(detection_feat, feat_sizes, len(class_names), anchors)
34+
35+
checkpoint_path = "./checkpoint_dir/yolo2_coco.ckpt"
36+
saver = tf.train.Saver()
37+
with tf.Session() as sess:
38+
saver.restore(sess, checkpoint_path)
39+
bboxes, obj_probs, class_probs = sess.run(detection_results, feed_dict={images: image_cp})
40+
41+
bboxes, scores, class_inds = postprocess(bboxes, obj_probs, class_probs,
42+
image_shape=image_shape)
43+
img_detection = draw_detection(image, bboxes, scores, class_inds, class_names)
44+
cv2.imwrite("detection.jpg", img_detection)
45+
cv2.imshow("detection results", img_detection)
46+
47+
cv2.waitKey(0)
48+
49+
50+
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""
2+
Detection ops for Yolov2
3+
"""
4+
5+
import tensorflow as tf
6+
import numpy as np
7+
8+
9+
def decode(detection_feat, feat_sizes=(13, 13), num_classes=80,
10+
anchors=None):
11+
"""decode from the detection feature"""
12+
H, W = feat_sizes
13+
num_anchors = len(anchors)
14+
detetion_results = tf.reshape(detection_feat, [-1, H * W, num_anchors,
15+
num_classes + 5])
16+
17+
bbox_xy = tf.nn.sigmoid(detetion_results[:, :, :, 0:2])
18+
bbox_wh = tf.exp(detetion_results[:, :, :, 2:4])
19+
obj_probs = tf.nn.sigmoid(detetion_results[:, :, :, 4])
20+
class_probs = tf.nn.softmax(detetion_results[:, :, :, 5:])
21+
22+
anchors = tf.constant(anchors, dtype=tf.float32)
23+
24+
height_ind = tf.range(H, dtype=tf.float32)
25+
width_ind = tf.range(W, dtype=tf.float32)
26+
x_offset, y_offset = tf.meshgrid(height_ind, width_ind)
27+
x_offset = tf.reshape(x_offset, [1, -1, 1])
28+
y_offset = tf.reshape(y_offset, [1, -1, 1])
29+
30+
# decode
31+
bbox_x = (bbox_xy[:, :, :, 0] + x_offset) / W
32+
bbox_y = (bbox_xy[:, :, :, 1] + y_offset) / H
33+
bbox_w = bbox_wh[:, :, :, 0] * anchors[:, 0] / W * 0.5
34+
bbox_h = bbox_wh[:, :, :, 1] * anchors[:, 1] / H * 0.5
35+
36+
bboxes = tf.stack([bbox_x - bbox_w, bbox_y - bbox_h,
37+
bbox_x + bbox_w, bbox_y + bbox_h], axis=3)
38+
39+
return bboxes, obj_probs, class_probs

ObjectDetections/yolo2/loss.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""
2+
Loss function for YOLOv2
3+
"""
4+
5+
import numpy as np
6+
import tensorflow as tf
7+
8+
def compute_loss(predictions, targets, anchors, scales, num_classes=20, feat_sizes=(13, 13)):
9+
"""
10+
Compute the loss of Yolov2 for training
11+
"""
12+
H, W = feat_sizes
13+
C = num_classes
14+
B = len(anchors)
15+
anchors = tf.constant(anchors, dtype=tf.float32)
16+
anchors = tf.reshape(anchors, [1, 1, B, 2])
17+
18+
sprob, sconf, snoob, scoor = scales # the scales for different parts
19+
20+
_coords = targets["coords"] # ground truth [-1, H*W, B, 4]
21+
_probs = targets["probs"] # class probability [-1, H*W, B, C] one hot
22+
_confs = targets["confs"] # 1 for object, 0 for background, [-1, H*W, B]
23+
24+
# decode the net output
25+
predictions = tf.reshape(predictions, [-1, H, W, B, (5 + C)])
26+
coords = predictions[:, :, :, :, 0:4] # t_x, t_y, t_w, t_h
27+
coords = tf.reshape(coords, [-1, H*W, B, 4])
28+
coords_xy = tf.nn.sigmoid(coords[:, :, :, 0:2]) # (0, 1) relative cell top left
29+
coords_wh = tf.sqrt(tf.exp(coords[:, :, :, 2:4]) * anchors /
30+
np.reshape([W, H], [1, 1, 1, 2])) # sqrt of w, h (0, 1)
31+
coords = tf.concat([coords_xy, coords_wh], axis=3) # [batch_size, H*W, B, 4]
32+
33+
confs = tf.nn.sigmoid(predictions[:, :, :, :, 4]) # object confidence
34+
confs = tf.reshape(confs, [-1, H*W, B, 1])
35+
36+
probs = tf.nn.softmax(predictions[:, :, :, :, 5:]) # class probability
37+
probs = tf.reshape(probs, [-1, H*W, B, C])
38+
39+
preds = tf.concat([coords, confs, probs], axis=3) # [-1, H*W, B, (4+1+C)]
40+
41+
# match ground truths with anchors (predictions in fact)
42+
# assign ground truths to the predictions with the best IOU (select 1 among 5 anchors)
43+
wh = tf.pow(coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2])
44+
areas = wh[:, :, :, 0] * wh[:, :, :, 1]
45+
centers = coords[:, :, :, 0:2]
46+
up_left, down_right = centers - (wh * 0.5), centers + (wh * 0.5)
47+
48+
# the ground truth
49+
_wh = tf.pow(_coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2])
50+
_areas = _wh[:, :, :, 0] * _wh[:, :, :, 1]
51+
_centers = _coords[:, :, :, 0:2]
52+
_up_left, _down_right = _centers - (_wh * 0.5), _centers + (_wh * 0.5)
53+
54+
# compute IOU
55+
inter_upleft = tf.maximum(up_left, _up_left)
56+
inter_downright = tf.minimum(down_right, _down_right)
57+
inter_wh = tf.maximum(inter_downright - inter_upleft, 0.0)
58+
intersects = inter_wh[:, :, :, 0] * inter_wh[:, :, :, 1]
59+
ious = tf.truediv(intersects, areas + _areas - intersects)
60+
61+
best_iou_mask = tf.equal(ious, tf.reduce_max(ious, axis=2, keep_dims=True))
62+
best_iou_mask = tf.cast(best_iou_mask, tf.float32)
63+
mask = best_iou_mask * _confs # [-1, H*W, B]
64+
mask = tf.expand_dims(mask, -1) # [-1, H*W, B, 1]
65+
66+
# compute weight terms
67+
confs_w = snoob * (1 - mask) + sconf * mask
68+
coords_w = scoor * mask
69+
probs_w = sprob * mask
70+
weights = tf.concat([coords_w, confs_w, probs_w], axis=3)
71+
72+
truths = tf.concat([_coords, tf.expand_dims(_confs, -1), _probs], 3)
73+
74+
loss = tf.pow(preds - truths, 2) * weights
75+
loss = tf.reduce_sum(loss, axis=[1, 2, 3])
76+
loss = 0.5 * tf.reduce_mean(loss)
77+
return loss
78+
79+
80+
81+
82+
83+
84+
85+
86+

ObjectDetections/yolo2/model.png

645 KB
Loading

ObjectDetections/yolo2/model.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
"""
2+
YOLOv2 implemented by Tensorflow, only for predicting
3+
"""
4+
import os
5+
6+
import numpy as np
7+
import tensorflow as tf
8+
9+
10+
11+
######## basic layers #######
12+
13+
def leaky_relu(x):
14+
return tf.nn.leaky_relu(x, alpha=0.1, name="leaky_relu")
15+
16+
# Conv2d
17+
def conv2d(x, filters, size, pad=0, stride=1, batch_normalize=1,
18+
activation=leaky_relu, use_bias=False, name="conv2d"):
19+
if pad > 0:
20+
x = tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]])
21+
out = tf.layers.conv2d(x, filters, size, strides=stride, padding="VALID",
22+
activation=None, use_bias=use_bias, name=name)
23+
if batch_normalize == 1:
24+
out = tf.layers.batch_normalization(out, axis=-1, momentum=0.9,
25+
training=False, name=name+"_bn")
26+
if activation:
27+
out = activation(out)
28+
return out
29+
30+
# maxpool2d
31+
def maxpool(x, size=2, stride=2, name="maxpool"):
32+
return tf.layers.max_pooling2d(x, size, stride)
33+
34+
# reorg layer
35+
def reorg(x, stride):
36+
return tf.extract_image_patches(x, [1, stride, stride, 1],
37+
[1, stride, stride, 1], [1,1,1,1], padding="VALID")
38+
39+
40+
def darknet(images, n_last_channels=425):
41+
"""Darknet19 for YOLOv2"""
42+
net = conv2d(images, 32, 3, 1, name="conv1")
43+
net = maxpool(net, name="pool1")
44+
net = conv2d(net, 64, 3, 1, name="conv2")
45+
net = maxpool(net, name="pool2")
46+
net = conv2d(net, 128, 3, 1, name="conv3_1")
47+
net = conv2d(net, 64, 1, name="conv3_2")
48+
net = conv2d(net, 128, 3, 1, name="conv3_3")
49+
net = maxpool(net, name="pool3")
50+
net = conv2d(net, 256, 3, 1, name="conv4_1")
51+
net = conv2d(net, 128, 1, name="conv4_2")
52+
net = conv2d(net, 256, 3, 1, name="conv4_3")
53+
net = maxpool(net, name="pool4")
54+
net = conv2d(net, 512, 3, 1, name="conv5_1")
55+
net = conv2d(net, 256, 1, name="conv5_2")
56+
net = conv2d(net, 512, 3, 1, name="conv5_3")
57+
net = conv2d(net, 256, 1, name="conv5_4")
58+
net = conv2d(net, 512, 3, 1, name="conv5_5")
59+
shortcut = net
60+
net = maxpool(net, name="pool5")
61+
net = conv2d(net, 1024, 3, 1, name="conv6_1")
62+
net = conv2d(net, 512, 1, name="conv6_2")
63+
net = conv2d(net, 1024, 3, 1, name="conv6_3")
64+
net = conv2d(net, 512, 1, name="conv6_4")
65+
net = conv2d(net, 1024, 3, 1, name="conv6_5")
66+
# ---------
67+
net = conv2d(net, 1024, 3, 1, name="conv7_1")
68+
net = conv2d(net, 1024, 3, 1, name="conv7_2")
69+
# shortcut
70+
shortcut = conv2d(shortcut, 64, 1, name="conv_shortcut")
71+
shortcut = reorg(shortcut, 2)
72+
net = tf.concat([shortcut, net], axis=-1)
73+
net = conv2d(net, 1024, 3, 1, name="conv8")
74+
# detection layer
75+
net = conv2d(net, n_last_channels, 1, batch_normalize=0,
76+
activation=None, use_bias=True, name="conv_dec")
77+
return net
78+
79+
80+
81+
if __name__ == "__main__":
82+
x = tf.random_normal([1, 416, 416, 3])
83+
model = darknet(x)
84+
85+
saver = tf.train.Saver()
86+
with tf.Session() as sess:
87+
saver.restore(sess, "./checkpoint_dir/yolo2_coco.ckpt")
88+
print(sess.run(model).shape)
89+

0 commit comments

Comments
 (0)