In [1]:
import argparse
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow

import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import PIL
import tensorflow as tf
from keras import backend as K
from keras.layers import Input,Lambda,Conv2D
from keras.models import load_model,Model

from yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body
import yolo_utils

%matplotlib inline

Using TensorFlow backend.


In [19]:
# YOLO (you only look once)
def yolo_filter_boxes(box_confidence,boxes,box_class_probs,threshold=0.6):
    '''
    通过阈值来过滤对象和分类的置信度
    
    参数：
        box_confidence - tensor类型，维度为（19x19,5,1),包括19*19个单元格中预测的5个anchor box 中的p
        boxes - tensor类型，维度为（19x19,5,4)anchor box 的(bx,by,bw,bh)
        box_class_probs - tensor类型，维度为（19x19,5,80)，包括所有anchor box的所有类别（80种）的概率
        threshold - 实数，阈值，如果分类的概率高过他，则分类概率被保留
        
    返回：
        scores - tensor类型，维度为(None,),包含了保留的anchor box的分类概率
        boxes - tensor类型，维度为(None,4),包含了保留的anchor box的(bx,by,bw,bh)
        classes - tensor类型，维度为(None,),包含了保留的anchor box的索引
    
    注意："None"是因为你不知道所选框的确切数量，因为它取决于阈值。
    '''
    
    # 第一步，计算anchor box 的得分
    box_scores = box_confidence * box_class_probs
    
    # 第二步，找到最大值的anchor box 的索引，和对应的分数
    box_classes = K.argmax(box_scores,axis=-1)
    box_classes_score = K.max(box_scores,axis=-1)
    
    # 第三步，根据阈值创建掩码
    filtering_mask = (box_classes_score >= threshold)
    
    # 对score，boxes和classes分别使用掩码
    scores = tf.boolean_mask(box_classes_score,filtering_mask)
    boxes = tf.boolean_mask(boxes,filtering_mask)
    classes = tf.boolean_mask(box_classes,filtering_mask)
    
    return scores,boxes,classes

In [20]:
with tf.Session() as s:
    box_confidence = tf.random_normal([19,19,5,1],mean=1,stddev=4,seed=1)
    boxes = tf.random_normal([19,19,5,4],mean=1,stddev=4,seed=1)
    box_class_prob = tf.random_normal([19,19,5,8],mean=1,stddev=4,seed=1)
    scores,boxes,classes = yolo_filter_boxes(box_confidence,boxes,box_class_prob,threshold=0.5)
    
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.shape))
    print("boxes.shape = " + str(boxes.shape))
    print("classes.shape = " + str(classes.shape))
    
    s.close()

scores[2] = 8.994777
boxes[2] = [ 8.426533   3.2713668 -0.5313436 -4.9413733]
classes[2] = 6
scores.shape = (?,)
boxes.shape = (?, 4)
classes.shape = (?,)


In [21]:
def iou(box1,box2):
    '''
    实现两个anchor box的交并比
    
    参数：
        box1 - 元组类型，(x1,y1,x2,y2)
        box2 - 元组类型，(x1,y1,x2,y2)
        
    返回：
        交并比
    '''
    # 计算相交区域的面积
    xi1 = np.maximum(box1[0],box2[0])
    yi1 = np.maximum(box1[1],box2[1])
    xi2 = np.minimum(box1[2],box2[2])
    yi2 = np.minimum(box1[3],box2[3])
    inter_area = (xi1 - xi2) * (yi1 - yi2)
    
    # 计算并集，公式为 A + B - (A ∩ B)
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])  
    union_area = box1_area + box2_area - inter_area
    
    # 计算交并比
    iou = inter_area / union_area
    return iou

In [22]:
box1 = (2,1,4,3)
box2 = (1,2,3,4)
iou(box1,box2)

0.14285714285714285

In [34]:
def yolo_non_max_suppression(scores,boxes,classes,max_boxes=10,iou_threshold=0.5):
    '''
    为anchor box 实现最大值抑制
    参数：
        scores - tensor类型，维度为(None,)，yolo_filter_boxes()的输出
        boxes - tensor类型，维度为(None,4)，yolo_filter_boxes()的输出，已缩放到图像大小（见下文）
        classes - tensor类型，维度为(None,)，yolo_filter_boxes()的输出
        max_boxes - 整数，预测的锚框数量的最大值
        iou_threshold - 实数，交并比阈值。
    
    返回:
        scores - tensor类型，维度为(,None)，每个锚框的预测的可能值
        boxes - tensor类型，维度为(4,None)，预测的锚框的坐标
        classes - tensor类型，维度为(,None)，每个锚框的预测的分类
    '''
    max_boxes_tensor = K.variable(max_boxes,dtype='float32') # 用于tf.image.non_max_suppression()
    K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # 初始化变量
    # 使用内置函数进行最大值抑制
    nms_indices = tf.image.non_max_suppression(boxes,scores,max_boxes,iou_threshold)
    
    scores = K.gather(scores,nms_indices)
    boxes = K.gather(boxes,nms_indices)
    classes = K.gather(classes,nms_indices)
    
    return scores,boxes,classes

In [35]:
with tf.Session() as t:
    scores = tf.random_normal([54,],mean=1,stddev=4,seed=0)
    boxes = tf.random_normal([54,4],mean=1,stddev=4,seed=0)
    classes = tf.random_normal([54,],mean=1,stddev=4,seed=0)
    scores,boxes,classes = yolo_non_max_suppression(scores,boxes,classes)
    
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.eval().shape))
    print("boxes.shape = " + str(boxes.eval().shape))
    print("classes.shape = " + str(classes.eval().shape))
    
    t.close()

scores[2] = 9.417757
boxes[2] = [ 3.3530188   7.220135   -5.965314    0.83462703]
classes[2] = 2.0211177
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)


In [36]:
def yolo_eval(yolo_outputs,image_shape=(720.,1080.),
                   max_boxes=10,scores_threshold=0.6,iou_threshold=0.5):
    '''
    将yolo编码的输出，转换为预测框以及它们的分数，框坐标和类
    参数：
        yolo_outputs - 编码模型的输出（对于维度为（608,608,3）的图片），包含4个tensors类型的变量：
                        box_confidence ： tensor类型，维度为(None, 19, 19, 5, 1)
                        box_xy         ： tensor类型，维度为(None, 19, 19, 5, 2)
                        box_wh         ： tensor类型，维度为(None, 19, 19, 5, 2)
                        box_class_probs： tensor类型，维度为(None, 19, 19, 5, 80)
        image_shape - tensor类型，维度为（2,），包含了输入的图像的维度，这里是(608.,608.)
        max_boxes - 整数，预测的锚框数量的最大值
        score_threshold - 实数，可能性阈值。
        iou_threshold - 实数，交并比阈值。
        返回：
        scores - tensor类型，维度为(,None)，每个锚框的预测的可能值
        boxes - tensor类型，维度为(4,None)，预测的锚框的坐标
        classes - tensor类型，维度为(,None)，每个锚框的预测的分类
    '''
    
    # 获取yolo模型的输出
    box_confidence,boxes_xy,box_wh,box_class_probs = yolo_outputs
    
    # 中心点转换为边角
    boxes = yolo_boxes_to_corners(boxes_xy,box_wh)
    
    # 可信度分值过滤
    scores,boxes,classes = yolo_filter_boxes(box_confidence,boxes,box_class_prob,scores_threshold)
    
    # 缩放anchor box 以适应原图
    boxes = yolo_utils.scale_boxes(boxes,image_shape)
    
    # 使用非最大值抑制
    scores,boxes,classes = yolo_non_max_suppression(scores,boxes,classes,max_boxes,iou_threshold)
    
    return scores,boxes,classes

In [37]:
with tf.Session() as test_c:
    yolo_outputs = (tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed = 1),
                    tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed = 1),
                    tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed = 1),
                    tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed = 1))
    scores, boxes, classes = yolo_eval(yolo_outputs)

    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.eval().shape))
    print("boxes.shape = " + str(boxes.eval().shape))
    print("classes.shape = " + str(classes.eval().shape))

    test_c.close()

scores[2] = 119.651505
boxes[2] = [-1430.3906 -1518.1752 -4291.172  -4554.5254]
classes[2] = 2
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)


In [46]:
sess = K.get_session()
class_names = yolo_utils.read_classes('model_data/coco_classes.txt')
anchors = yolo_utils.read_anchors('model_data/yolo_anchors.txt')
image_shape=(720.,1080.)

In [41]:
yolo_model  = load_model('model_data/yolov2.h5')



In [42]:
yolo_model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 608, 608, 3)   0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, 608, 608, 32)  864         input_1[0][0]                    
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, 608, 608, 32)  128         conv2d_1[0][0]                   
____________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)        (None, 608, 608, 32)  0           batch_normalization_1[0][0]      
___________________________________________________________________________________________

In [43]:
yolo_outputs = yolo_head(yolo_model.output,anchors,len(class_name))
scores,boxes,classes = yolo_eval(yolo_outputs,image_shape)

In [49]:
def predict(sess,image_file,is_show_info=True,is_plot=True):
    '''
    运行存储在sess的计算图以预测image_file的边界框，打印出预测的图和信息
    
    参数：
        sess - 包含了Yolo计算图的会话
        image_file - 图片文件名称
    返回：
        out_scores - tensor类型，维度为(None,)，锚框的预测的可能值。
        out_boxes - tensor类型，维度为(None,4)，包含了锚框位置信息。
        out_classes - tensor类型，维度为(None,)，锚框的预测的分类索引。
    '''
    
    # 图像预处理
    image,image_data = yolo_utils.preprocess_image('images/'+image_file,model_image_size=(608,608))
    
    # 运行会话，在feed_dict选择正确的占位符
    out_scores,out_boxes,out_classes = sess.run([scores,boxes,classes],feed_dict={yolo_model.input:image_data,K.learning_phase():0})
    
    # 打印预测信息
    if is_show_info:
        print('在'+str(image_file)+'中找到了'+str(len(out_boxes))+"个锚框")
    
    # 指定要绘制的边框颜色
    colors = yolo_utils.generate_colors(class_names)
    
    # 在图中绘制边界框
    yolo_utils.draw_boxes(image,out_scores,out_boxes,out_classes,class_names,colors)
    
    #保存已经绘制边界框的图
    image.save(os.path.join('out',image_file),quality=100)
    
    # 打印已经绘制边界的图
    if is_plot:
        output_image = scipy.misc.imread(os.path.join('out',image_file))
        plt.show(output_image)
        
    return out_scores,out_boxes,out_classes

In [50]:
out_scores,out_boxes,out_classes = predict(sess,'test.jpg')

在test.jpg中找到了10个锚框
bus 2.54 (358, 167) (370, 187)
bus 2.55 (0, 258) (176, 442)
person 2.67 (284, 296) (319, 335)
bicycle 2.99 (574, 195) (584, 214)
aeroplane 4.01 (780, 285) (882, 374)
truck 4.77 (596, 279) (663, 350)
car 5.00 (309, 300) (629, 648)
car 5.75 (642, 282) (795, 412)
truck 6.26 (134, 303) (292, 440)
bicycle 6.50 (799, 324) (1086, 705)


`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [52]:
for i in range(1,121):
    num_fill = int(len('0000') - len(str(str(1)))) + 1
    file_name = str(i).zfill(num_fill)+'.jpg'
    print('当前文件：'+str(file_name))
    
    out_scores,out_boxes,out_classes = predict(sess,file_name,is_show_info=False,is_plot=False)

当前文件：0001.jpg
bicycle 0.93 (652, 290) (673, 305)
train 0.96 (533, 289) (557, 302)
bicycle 1.10 (473, 291) (498, 305)
person 1.61 (462, 296) (501, 316)
person 1.72 (281, 309) (323, 328)
aeroplane 2.29 (119, 308) (169, 334)
bicycle 2.30 (515, 296) (558, 318)
train 3.09 (244, 313) (284, 333)
motorbike 3.71 (537, 285) (612, 327)
car 4.01 (569, 296) (610, 323)
当前文件：0002.jpg
bus 1.90 (560, 203) (569, 224)
bicycle 2.00 (260, 298) (340, 336)
car 2.09 (673, 279) (712, 299)
bicycle 2.68 (225, 301) (269, 325)
car 3.17 (415, 295) (477, 320)
train 3.40 (984, 5) (1075, 231)
person 3.70 (270, 304) (325, 331)
car 3.81 (332, 295) (382, 318)
aeroplane 3.86 (347, 153) (365, 182)
bicycle 5.12 (646, 280) (698, 304)
当前文件：0003.jpg
bicycle 1.16 (185, 305) (225, 329)
motorbike 1.17 (480, 197) (490, 216)
aeroplane 1.41 (816, 276) (851, 301)
truck 1.86 (14, 305) (49, 330)
train 2.62 (102, 305) (142, 324)
car 2.80 (853, 279) (891, 303)
car 4.54 (71, 302) (132, 326)
car 5.46 (568, 284) (642, 312)
truck 5.64 (194, 

truck 0.82 (478, 291) (497, 303)
car 0.93 (529, 289) (549, 303)
bicycle 0.97 (304, 307) (330, 332)
bus 1.14 (206, 317) (232, 337)
bicycle 1.61 (471, 300) (492, 315)
person 1.97 (367, 303) (400, 331)
person 2.10 (540, 298) (565, 314)
bicycle 3.99 (404, 300) (435, 322)
bus 5.90 (581, 299) (632, 330)
motorbike 7.16 (300, 298) (365, 358)
当前文件：0028.jpg
bicycle 0.89 (479, 288) (497, 301)
bicycle 1.05 (529, 287) (549, 301)
train 1.14 (372, 303) (406, 332)
bus 1.23 (303, 310) (327, 335)
bicycle 1.27 (588, 301) (611, 323)
bus 1.29 (575, 289) (591, 305)
person 1.37 (537, 299) (560, 313)
aeroplane 1.90 (406, 298) (436, 320)
person 3.26 (598, 316) (671, 369)
train 5.10 (259, 302) (333, 369)
当前文件：0029.jpg
motorbike 1.61 (863, 192) (1030, 432)
aeroplane 1.80 (407, 297) (438, 321)
motorbike 1.87 (419, 289) (442, 306)
bus 2.00 (75, 318) (108, 340)
bus 2.46 (243, 331) (273, 379)
car 2.47 (174, 308) (257, 394)
motorbike 2.95 (136, 322) (164, 342)
aeroplane 3.97 (369, 301) (407, 331)
bus 4.21 (302, 297) 

bus 0.79 (968, 318) (991, 345)
bus 0.84 (1055, 430) (1081, 670)
train 0.92 (293, 305) (327, 324)
aeroplane 0.93 (774, 317) (808, 377)
aeroplane 1.19 (1000, 200) (1083, 281)
motorbike 1.33 (762, 243) (809, 328)
person 1.66 (634, 297) (662, 327)
bicycle 2.04 (1023, 396) (1077, 694)
motorbike 2.51 (572, 291) (595, 306)
bicycle 3.20 (595, 294) (634, 324)
当前文件：0053.jpg
bus 0.78 (191, 288) (206, 307)
bicycle 1.00 (401, 289) (416, 301)
person 1.04 (750, 278) (767, 295)
bicycle 1.35 (901, 295) (933, 346)
train 1.40 (611, 293) (687, 355)
bicycle 1.43 (484, 286) (499, 296)
bus 2.05 (902, 276) (949, 348)
bus 2.42 (588, 295) (619, 325)
person 2.93 (938, 360) (981, 458)
bus 4.10 (1025, 238) (1077, 354)
当前文件：0054.jpg
motorbike 0.87 (314, 288) (332, 309)
train 1.08 (5, 326) (30, 368)
aeroplane 1.22 (587, 288) (609, 304)
person 1.27 (821, 283) (836, 300)
bus 1.55 (482, 285) (496, 294)
bus 2.10 (671, 288) (783, 364)
person 3.18 (585, 292) (616, 324)
truck 3.82 (4, 304) (54, 364)
car 4.19 (657, 278) (80

car 1.22 (766, 269) (782, 287)
car 1.77 (343, 111) (379, 152)
aeroplane 1.99 (569, 276) (590, 290)
train 2.64 (324, 96) (378, 161)
bus 4.11 (453, 66) (482, 111)
car 4.12 (683, 181) (706, 220)
motorbike 4.74 (204, 295) (286, 367)
car 4.75 (320, 92) (343, 146)
train 6.82 (126, 310) (209, 369)
train 7.32 (306, 293) (370, 354)
当前文件：0079.jpg
train 2.28 (323, 97) (378, 162)
aeroplane 2.57 (345, 112) (379, 152)
truck 3.18 (297, 292) (360, 353)
bicycle 3.24 (402, 287) (430, 308)
car 3.44 (319, 92) (343, 148)
bicycle 4.05 (285, 304) (328, 343)
motorbike 4.24 (198, 298) (276, 367)
train 4.70 (127, 314) (205, 373)
bicycle 6.26 (452, 65) (482, 111)
truck 6.55 (682, 179) (708, 225)
当前文件：0080.jpg
person 2.45 (689, 235) (701, 254)
aeroplane 2.86 (0, 316) (14, 398)
person 2.92 (284, 294) (357, 369)
motorbike 3.03 (322, 95) (377, 160)
bicycle 3.11 (452, 66) (484, 112)
motorbike 3.21 (200, 298) (274, 366)
truck 3.28 (685, 181) (709, 223)
person 3.49 (595, 287) (757, 362)
bicycle 3.72 (320, 92) (343, 147

bicycle 1.30 (900, 283) (956, 304)
car 1.63 (889, 268) (999, 317)
car 1.73 (712, 262) (726, 280)
car 1.87 (453, 129) (472, 159)
bus 1.93 (926, 262) (1044, 324)
train 2.85 (651, 230) (663, 248)
car 3.83 (288, 276) (350, 369)
truck 4.28 (327, 137) (342, 179)
bicycle 5.01 (194, 338) (244, 381)
bicycle 5.43 (0, 286) (171, 420)
当前文件：0104.jpg
truck 1.50 (339, 10) (409, 87)
person 2.02 (237, 5) (271, 49)
aeroplane 2.41 (650, 301) (673, 329)
car 2.94 (203, 24) (230, 97)
car 3.30 (390, 1) (461, 63)
train 4.14 (722, 202) (738, 229)
person 4.16 (738, 335) (812, 385)
bicycle 4.48 (321, 36) (513, 222)
truck 4.54 (754, 343) (799, 381)
car 7.88 (0, 223) (246, 421)
当前文件：0105.jpg
bicycle 0.66 (419, 294) (434, 304)
bicycle 0.84 (127, 326) (154, 343)
train 1.02 (405, 303) (421, 315)
truck 1.15 (878, 189) (1035, 367)
car 1.35 (73, 336) (99, 358)
car 1.56 (169, 326) (196, 345)
motorbike 2.03 (483, 289) (502, 302)
truck 3.27 (376, 305) (401, 320)
person 3.33 (124, 333) (161, 358)
bicycle 3.74 (164, 332) (19