In [43]:
def create_network(anchors, class_names, is_training=False, scope="yolo", input_shape=(416, 416, 3)):
    tf.reset_default_graph()
    no_b = len(anchors) // 2
    no_c = len(class_names)

    anchor_split = no_b // 3 * 2

    layers = []

    def _conv_shortcut(filter_size):
        layers.append(conv2d_bn_act(layers[-1].out, filter_size, 1))
        layers.append(conv2d_bn_act(layers[-1].out, filter_size * 2, 3))
        layers.append(shortcut(layers[-1].out, layers[-3].out))

    with tf.variable_scope(scope):
        layers.append(input_layer([None, input_shape[0], input_shape[1], input_shape[2]], "input"))

        # START darknet53
        layers.append(conv2d_bn_act(layers[-1].out, 32, 3, 1))
        layers.append(conv2d_bn_act(layers[-1].out, 64, 3, 2))
        _conv_shortcut(32)
        layers.append(conv2d_bn_act(layers[-1].out, 128, 3, 2))
        for _ in range(2):
            _conv_shortcut(64)
        layers.append(conv2d_bn_act(layers[-1].out, 256, 3, 2))
        for _ in range(8):
            _conv_shortcut(128)

        layers.append(conv2d_bn_act(layers[-1].out, 512, 3, 2))
        for _ in range(8):
            _conv_shortcut(256)

        layers.append(conv2d_bn_act(layers[-1].out, 1024, 3, 2))
        for _ in range(4):
            _conv_shortcut(512)
        # END darknet53

        # START yolo
        for _ in range(3):
            layers.append(conv2d_bn_act(layers[-1].out, 512, 1))
            layers.append(conv2d_bn_act(layers[-1].out, 1024, 3))
        sub_anchors = anchors[:anchor_split]
        sub_no_b = len(sub_anchors) // 2
        layers.append(conv2d_bn_act(layers[-1].out, sub_no_b * (5 + no_c), 1, 1,
                                    use_batch_normalization=False,
                                    activation_fn="linear",
                                    is_training=is_training))

        layers.append(yolo_layer(layers[-1].out, sub_anchors, no_c))
        yolo_1 = layers[-1]

        layers.append(route([layers[-4].out]))
        layers.append(conv2d_bn_act(layers[-1].out, 256, 1))
        layers.append(upsample(layers[-1].out, 2))
        layers.append(route([layers[-1].out, layers[61].out]))
        for _ in range(3):
            layers.append(conv2d_bn_act(layers[-1].out, 256, 1))
            layers.append(conv2d_bn_act(layers[-1].out, 512, 3))
        sub_anchors = anchors[anchor_split:2 * anchor_split]
        sub_no_b = len(sub_anchors) // 2
        layers.append(conv2d_bn_act(layers[-1].out, sub_no_b * (5 + no_c), 1, 1,
                                    use_batch_normalization=False,
                                    activation_fn="linear",
                                    is_training=is_training))

        layers.append(yolo_layer(layers[-1].out, sub_anchors, no_c))
        yolo_2 = layers[-1]

        layers.append(route([layers[-4].out]))
        layers.append(conv2d_bn_act(layers[-1].out, 128, 1))
        layers.append(upsample(layers[-1].out, 2))
        layers.append(route([layers[-1].out, layers[36].out]))
        for _ in range(3):
            layers.append(conv2d_bn_act(layers[-1].out, 128, 1))
            layers.append(conv2d_bn_act(layers[-1].out, 256, 3))
        sub_anchors = anchors[2 * anchor_split:]
        sub_no_b = len(sub_anchors) // 2
        layers.append(conv2d_bn_act(layers[-1].out, sub_no_b * (5 + no_c), 1, 1,
                                    use_batch_normalization=False,
                                    activation_fn="linear",
                                    is_training=is_training))

        layers.append(yolo_layer(layers[-1].out, sub_anchors, no_c))
        yolo_3 = layers[-1]
        # END yolo

        # combine all
        layers.append(detection_layer([yolo_1.out, yolo_2.out, yolo_3.out]))

    return layers, [yolo_1.yolo, yolo_2.yolo, yolo_3.yolo]


def load_weights(layers, weights_path):
    print("Reading pre-trained weights from {}".format(weights_path))

    # header
    with open(weights_path, "rb") as f:
        major, minor, revision, subversion, n = np.fromfile(f, count=5, dtype=np.int32)
        print("{} {} {} {} {}".format(major, minor, revision, subversion, n))
        weights = np.fromfile(f, dtype=np.float32)

    print("Found {} weight values.".format(len(weights)))

    return _load_weights(layers, weights)


def preprocess(img, size=(416, 416)):
    imsz = cv2.resize(img, size)
    imsz = imsz / 255.  # to make values lie between 0 and 1
    imsz = imsz[:, :, ::-1]  # BGR to RGB
    return imsz

In [47]:
with open("./resource/voc.names", "r") as f:
    v_class_names = [l.strip() for l in f.readlines()]
v_anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
layers, yolo_layers = create_network(v_anchors, v_class_names)

import cv2

img = cv2.imread("./img/person.jpg")
img = preprocess(img)
net_img = np.expand_dims(img, axis=0)
print(net_img.shape)
#ops = load_weights(layers, "./bin/yolov3.weights")
sess = tf.Session()
sess.run(tf.global_variables_initializer())
print(layers[0].out.get_shape())
for y in yolo_layers:
    print("yolo layers: ", y)
net_outs = sess.run(yolo_layers, feed_dict={layers[0].out: net_img})

(1, 416, 416, 3)
(?, 416, 416, 3)
yolo layers:  Tensor("yolo/Reshape_1:0", shape=(?, 13, 13, 3, 25), dtype=float32)
yolo layers:  Tensor("yolo/Reshape_3:0", shape=(?, 26, 26, 3, 25), dtype=float32)
yolo layers:  Tensor("yolo/Reshape_5:0", shape=(?, 52, 52, 3, 25), dtype=float32)


In [49]:
print(type(net_outs))
print(net_outs[0].shape)

<class 'list'>
(1, 13, 13, 3, 25)


In [61]:

def sigmoid(x):
    return 1. / (1. + np.exp(-x))


def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()


def iou_score(box1, box2):
    box1_min, box1_max = box1.get_top_left(), box1.get_bottom_right()
    box1_area = box1.w * box1.h
    box2_min, box2_max = box2.get_top_left(), box2.get_bottom_right()
    box2_area = box2.w * box2.h

    intersect_min = np.maximum(box1_min, box2_min)
    intersect_max = np.minimum(box1_max, box2_max)
    intersect_wh = np.maximum(intersect_max - intersect_min, 0)
    intersect_area = intersect_wh[0] * intersect_wh[1]
    union_area = np.maximum(box1_area + box2_area - intersect_area, 1e-8)

    return intersect_area / union_area


def _find_bounding_boxes(result, anchors, threshold):
    no_b = len(anchors) // 2
    output_h, output_w = result.shape[1:3]
    bboxes = []
    for cy in range(output_h):
        for cw in range(output_w):
            for b in range(no_b):
                # calculate p(class|obj)
                prob_obj = sigmoid(result[cy, cw, b, 4])
                prob_classes = softmax(result[cy, cw, b, 5:])
                class_idx = np.argmax(prob_classes)
                class_prob = prob_classes[class_idx]
                p = prob_obj * class_prob
                if p < threshold:  # if lower than threshold, pass
                    continue

                coords = result[cy, cw, b, 0:4]
                bbox = BoundingBox()
                bbox.x = (sigmoid(coords[0]) + cw) / output_w
                bbox.y = (sigmoid(coords[1]) + cy) / output_h
                bbox.w = (anchors[2 * b] * np.exp(coords[2])) / output_w
                bbox.h = (anchors[2 * b + 1] * np.exp(coords[3])) / output_h
                bbox.class_idx = class_idx
                bbox.prob = p
                bboxes.append(bbox)
    return bboxes


class BoundingBox(object):
    def __init__(self, x=float(), y=float(), w=float(), h=float()):
        self.x = x
        self.y = y
        self.w = w
        self.h = h
        self.class_idx = -1
        self.prob = 0

    def get_top_left(self, h=1., w=1.):
        return np.maximum((self.x - self.w / 2.) * w, 0.), np.maximum((self.y - self.h / 2.) * h, 0.)

    def get_bottom_right(self, h=1., w=1.):
        return np.minimum((self.x + self.w / 2.) * w, w), np.minimum((self.y + self.h / 2.) * h, h)


In [66]:
def postprocess(net_outs, anchors, class_names, threshold, iou_threshold):
    results = []
    
    anchor_idx = (len(anchors) // 2) // len(net_outs)
    
    boxes = {}
    for net_out in net_outs: # [3, ?, H, W, B, 5 + C]
        for i, out in enumerate(net_out): # [?, H, W, B, 5 + C]
            if i in boxes:
                boxes[i].extend(_find_bounding_boxes(out, anchors[i*anchor_idx:(i+1)*anchor_idx], threshold))
            else:
                boxes[i] = _find_bounding_boxes(out, anchors[i*anchor_idx:(i+1)*anchor_idx], threshold)
    for box in boxes.values():
        print(box)
    return results
        
postprocess(net_outs, v_anchors, v_class_names, 0.5, 0.5)

[[[[-2.54141092e-02 -1.51251048e-01  1.79777265e-01 ...  2.61811346e-01
    -2.39476949e-01  2.19029412e-02]
   [ 6.01425841e-02  1.34186298e-01 -1.01775043e-01 ... -1.13960072e-01
    -6.79150969e-02 -1.55140497e-02]
   [ 1.81884423e-01  1.16728291e-01 -2.03111768e-02 ...  1.03151262e-01
    -2.08363086e-01 -6.05359562e-02]]

  [[-9.98181850e-02 -3.31552505e-01  2.68142402e-01 ...  2.84548908e-01
    -2.39075199e-01 -1.69615865e-01]
   [ 2.72351727e-02  9.94891673e-02 -1.10031471e-01 ... -1.62423268e-01
    -1.72672957e-01 -3.83877009e-03]
   [ 2.18386427e-01 -1.03365518e-02 -5.02472818e-02 ...  1.51534081e-01
    -3.41233730e-01  1.58384621e-01]]

  [[-1.26651153e-01 -4.95189726e-01  3.61937612e-01 ...  2.18169689e-01
    -2.15758920e-01 -1.83681250e-01]
   [ 6.70666695e-02  7.50083774e-02 -5.26051745e-02 ... -2.47048452e-01
    -1.29977703e-01 -1.22916184e-01]
   [ 1.91945985e-01 -9.66382176e-02 -1.06784314e-01 ...  2.01292560e-02
    -5.23408413e-01  2.25858659e-01]]

  ...

  [[-1

[]