# Autonomous driving - Car detection目标检测

In [9]:
import argparse
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import scipy.io
import scipy.misc
import numpy as np
import pandas as pd
import PIL
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Lambda, Conv2D
from keras.models import load_model, Model
from yolo_utils import read_classes, read_anchors, generate_colors, preprocess_image, draw_boxes, scale_boxes
from yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners, preprocess_true_boxes, yolo_loss, yolo_body

%matplotlib inline

Using TensorFlow backend.


**Exercise**: Implement `yolo_filter_boxes()`.
1. Compute box scores by doing the elementwise product as described in Figure 4. The following code may help you choose the right operator: 
```python
a = np.random.randn(19*19, 5, 1)
b = np.random.randn(19*19, 5, 80)
c = a * b # shape of c will be (19*19, 5, 80)
```
2. For each box, find:
    - the index of the class with the maximum box score ([Hint](https://keras.io/backend/#argmax)) (Be careful with what axis you choose; consider using axis=-1)
    - the corresponding box score ([Hint](https://keras.io/backend/#max)) (Be careful with what axis you choose; consider using axis=-1)
3. Create a mask by using a threshold. As a reminder: `([0.9, 0.3, 0.4, 0.5, 0.1] < 0.4)` returns: `[False, True, False, False, True]`. The mask should be True for the boxes you want to keep. 
4. Use TensorFlow to apply the mask to box_class_scores, boxes and box_classes to filter out the boxes we don't want. You should be left with just the subset of boxes you want to keep. ([Hint](https://www.tensorflow.org/api_docs/python/tf/boolean_mask))

Reminder: to call a Keras function, you should use `K.function(...)`.

In [10]:
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = 6):
    box_scores = np.multiply(box_confidence, box_class_probs)
    
    box_classes =  K.argmax(box_scores, axis = -1)
    #argmax返回的是最大数的索引
    box_class_scores = K.max(box_scores, axis= -1)
    #max(x, axis=None, keepdims=False) 求张量中的最大值
    
    filtering_mask = K.greater_equal(box_class_scores, threshold)
    #逐元素判断x>=y关系，返回布尔张量
    
    scores = tf.boolean_mask(box_class_scores, filtering_mask)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    classes = tf.boolean_mask(box_classes, filtering_mask)
    # tf.boolean_mask(a,b) 将使a (m维)矩阵仅保留与b中“True”元素同下标的部分，并将结果展开到m-1维
    
    return scores, boxes, classes

# 此处关于scores[2]的索引问题不明白

In [11]:
with tf.Session() as test_a:
    box_confidence = tf.random_normal([19,19,5,1], mean = 1, stddev = 4, seed = 1)
    boxes = tf.random_normal([19,19,5,4], mean = 1, stddev =4, seed = 1)
    box_class_probs = tf.random_normal([19,19,5,80],mean = 1, stddev = 4, seed =1)
    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.5)
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores))
    print("boxes.shape = " + str(boxes))
    print("classes.shape = " + str(classes))

scores[2] = 10.750582
boxes[2] = [ 8.426533   3.2713668 -0.5313436 -4.9413733]
classes[2] = 7
scores.shape = Tensor("boolean_mask/GatherV2:0", shape=(?,), dtype=float32)
boxes.shape = Tensor("boolean_mask_1/GatherV2:0", shape=(?, 4), dtype=float32)
classes.shape = Tensor("boolean_mask_2/GatherV2:0", shape=(?,), dtype=int64)


**tf.boolean_mask(a,b)**

    tensorflow 里的一个函数，在做目标检测（YOLO）时常常用到。

     其中b一般是bool型的n维向量，若a.shape=[3,3,3]    b.shape=[3,3]    

      则  tf.boolean_mask(a,b) 将使a (m维)矩阵仅保留与b中“True”元素同下标的部分，并将结果展开到m-1维。

      例：应用在YOLO算法中返回所有检测到的各类目标（车辆、行人、交通标志等）的位置信息（bx,by,bh,bw）
复制代码
```python
a = np.random.randn(3, 3,3)
b = np.max(a,-1)
c=  b >0.5
print("a="+str(a))
print("b="+str(b))
print("c="+str(c))
with tf.Session() as sess:
    d=tf.boolean_mask(a,c)
print("d="+str(d.eval(session=sess)))
```

a=[[[-1.25508127  1.76972539  0.21302597]
  [-0.2757053  -0.28133549 -0.50394556]
  [-0.70784415  0.52658374 -3.04217963]]
 
 [[ 0.63942957 -0.76669861 -0.2002611 ]
  [-0.38026374  0.42007134 -1.08306957]
  [ 0.30786828  1.80906798 -0.44145949]]
 
 [[ 0.22965498 -0.23677034  0.24160667]
  [ 0.3967085   1.70004822 -0.19343556]
  [ 0.18405488 -0.95646895 -0.5863234 ]]] 
  
b=[[ 1.76972539 -0.2757053   0.52658374]
 [ 0.63942957  0.42007134  1.80906798]
 [ 0.24160667  1.70004822  0.18405488]]
 
c=[[ True False  True] 
 [ True False  True]
 [False  True False]]
 
d=[[-1.25508127  1.76972539  0.21302597] 
 [-0.70784415  0.52658374 -3.04217963]
 [ 0.63942957 -0.76669861 -0.2002611 ]
 [ 0.30786828  1.80906798 -0.44145949]
 [ 0.3967085   1.70004822 -0.19343556]]

tf.random_normal()函数用于从服从指定正太分布的数值中取出指定个数的值。

tf.random_normal(shape, mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)

      -  shape: 输出张量的形状，必选
      -  mean: 正态分布的均值，默认为0
      -  stddev: 正态分布的标准差，默认为1.0
      - dtype: 输出的类型，默认为tf.float32
      -  seed: 随机数种子，是一个整数，当设置之后，每次生成的随机数都一样
      -  name: 操作的名称

# "Intersection over Union", or IoU. 

In [12]:
def iou(box1, box2):
    xi1 = max(box1[0], box2[0])
    yi1 = max(box1[1], box2[1])
    xi2 = min(box1[2], box2[2])
    yi2 = min(box1[3], box2[3])
    inter_area = (xi2 - xi1)*(yi2 - yi1)
    
    box1_area = (box1[3] - box1[1])*(box1[2] - box1[0])
    box2_area = (box2[3] - box2[1])*(box2[2] - box2[0])
    union_area = (box1_area + box2_area) - inter_area
    
    iou = inter_area/union_area
    
    return iou

**此处iou计算好像只符合一部分情况**

In [13]:
box1 = (2, 1, 4, 3)
box2 = (1, 2, 3, 4) 
print("iou = " + str(iou(box1, box2)))

iou = 0.14285714285714285


## Exercise: Implement yolo_non_max_suppression() using TensorFlow. TensorFlow has two built-in functions that are used to implement non-max suppression (so you don't actually need to use your iou() implementation)

In [17]:
def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5):
    max_boxes_tensor = K.variable(max_boxes, dtype = 'int32')
    #此处为什么要进行实例化
    K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
    
    nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold = iou_threshold)
    #非极大值抑制算法
    scores = K.gather(scores, nms_indices)
    boxes = K.gather(boxes, nms_indices)
    classes = K.gather(classes, nms_indices)
    
    return scores, boxes, classes

    #variable
    variable(value, dtype='float32', name=None)

实例化一个张量，返回之

参数：

    value：用来初始化张量的值
    dtype：张量数据类型
    name：张量的名字（可选）


In [18]:
with tf.Session() as test_b:
    scores = tf.random_normal([54,], mean = 1, stddev=4, seed=1)
    boxes = tf.random_normal([54,4], mean=1, stddev=4, seed=1)
    classes = tf.random_normal([54, ], mean=1, stddev=4, seed=1)
    
    scores, boxes, classes  = yolo_non_max_suppression(scores, boxes, classes)
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.eval().shape))
    print("boxes.shape = " + str(boxes.eval().shape))
    print("classes.shape = " + str(classes.eval().shape))

scores[2] = 6.938395
boxes[2] = [-5.299932    3.1379814   4.450367    0.95942086]
classes[2] = -2.2452729
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)


## Implement yolo_eval() which takes the output of the YOLO encoding and filters the boxes using score threshold and NMS

In [19]:
def yolo_eval(yolo_outputs, image_shape = (720. , 1280.), max_boxes = 10, score_threshold = .6, iou_threshold = .5):
    box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
    
    boxes = yolo_boxes_to_corners(box_xy, box_wh)
    
    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold= score_threshold)
    
    boxes = scale_boxes(boxes, image_shape)
    
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes = max_boxes, iou_threshold = iou_threshold)
    
    return scores, boxes, classes

In [20]:
with tf.Session() as test_b:
    yolo_outputs = (tf.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed = 1),
                    tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed = 1),
                    tf.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed = 1),
                    tf.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed = 1))
    scores, boxes, classes = yolo_eval(yolo_outputs)
    print("scores[2] = " + str(scores[2].eval()))
    print("boxes[2] = " + str(boxes[2].eval()))
    print("classes[2] = " + str(classes[2].eval()))
    print("scores.shape = " + str(scores.eval().shape))
    print("boxes.shape = " + str(boxes.eval().shape))
    print("classes.shape = " + str(classes.eval().shape))

scores[2] = 138.79124
boxes[2] = [1292.3297  -278.52167 3876.9893  -835.56494]
classes[2] = 54
scores.shape = (10,)
boxes.shape = (10, 4)
classes.shape = (10,)


## Test YOLO pretrained model on images

In [21]:
sess = K.get_session()


##  Defining classes, anchors and image shape

In [22]:
class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
image_shape = (720., 1280.)

## Loading a pretrained model

In [23]:
yolo_model = load_model("model_data/yolo.h5")

OSError: Unable to open file (unable to open file: name = 'model_data/yolo.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)