# Object Detection API Test

In [1]:
import tensorflow as tf
import numpy as np
import PIL
import PIL.ImageDraw as ImageDraw
from IPython import display
import os

In [6]:
testimage = PIL.Image.open('test_images/4.jpg')
imgs = []
imgs.append(np.array(testimage.resize((160,160))).astype(np.float32)/128-1)
imgs = np.array(imgs)

tensorname = 'FeatureExtractor/MobilenetV1/MobilenetV1/Conv2d_11_pointwise/add_fold'
gd = tf.GraphDef.FromString(open("inference/tflite_graph.pb", 'rb').read())
inp, x = tf.import_graph_def(gd, return_elements = ['normalized_input_image_tensor:0', tensorname+':0'])

with tf.Session(graph=inp.graph) as sess:
    x = sess.run(x, feed_dict={inp: imgs})
    
print(np.min(x))
print(np.max(x))

-14.825256
1.4318237


In [7]:
# for filename in os.listdir('image'):
#     imgs.append(np.array(PIL.Image.open('image/'+filename).resize((160,160))).astype(np.float32) / 128 - 1)
#     ids.append(int(filename.split('_')[2].split('.')[0]))
    
# imgs = np.array(imgs)
testimage = PIL.Image.open('test_images/4.jpg')
imgs = []
imgs.append(np.array(testimage.resize((160,160))).astype(np.float32)/128-1)
imgs = np.array(imgs)

# tensorname = 'convert_scores'
gd = tf.GraphDef.FromString(open("inference/tflite_graph.pb", 'rb').read())
inp, x, y = tf.import_graph_def(gd, return_elements = 
    ['normalized_input_image_tensor:0', 'convert_scores:0', 'concat:0'])

with tf.Session(graph=inp.graph) as sess:
    scores, box_preds = sess.run([x,y], feed_dict={inp: imgs})

In [10]:
pred_id = np.argmax(scores[0,:,1])
encoded_box = box_preds[0,pred_id,:] # [ty,tx,th,tw]
anchor_box = get_anchor_box(pred_id) # [ycenter,xcenter,h,w]

box = decode(encoded_box, anchor_box)

draw_bbox(testimage, box)

print(decode(encoded_box, anchor_box))

# print('box_preds', box_preds[0,pred_id,:])

[0.14544197057407338, 0.25922437696264433, 0.6004415299370112, 0.8322575628533653]


### Order of Box/Class prediction tensor
#### 1. Class predictors
Before reshape & concat:

**H x W x Anchors x Classes**

After reshape & concat:

**\[H1xW1xAnchors H2xW2xAnchors ... H5xW5xAnchors\]xClasses**  (1,834,2)

#### 2. Box predictors
Before reshape & concat:

**H x W x Anchors x 4**

After reshape & concat:

**\[H1xW1xAnchors H2xW2xAnchors ... H5xW5xAnchors\]x4**  (1,834,4)

#### 3. Anchors
- num_layers: 5
- min_scale: 0.2
- max_scale: 0.95
- aspect_ratios: 1.0
- aspect_ratios: 2.0
- aspect_ratios: 0.5
- aspect_ratios: 3.0
- aspect_ratios: 0.3333


In [9]:
def get_fmap_size(fmap_id):
    fmap_size = [
        [10,10],
        [5,5],
        [3,3],
        [2,2],
        [1,1]
    ]
    return fmap_size[fmap_id]

def get_anchor_box(pred_id):
    anchor_box_id = pred_id%6 # 6 anchor boxes per pixel
    pixel_id = np.floor(pred_id/6)
    
    if pixel_id < 100: 
        fmap_id = 0
    elif pixel_id < 125:
        fmap_id = 1
        pixel_id -= 100
    elif pixel_id < 134:
        fmap_id = 2
        pixel_id -= 125
    elif pixel_id < 138:
        fmap_id = 3
        pixel_id -= 134
    else:
        fmap_id = 4
        pixel_id -= 138
        
    fmap_size = get_fmap_size(fmap_id)
        
    anchor_box_pos = [np.floor(pixel_id/fmap_size[1]), pixel_id%fmap_size[1]]
    anchor_box_center = [(anchor_box_pos[0]+0.5)/fmap_size[0], (anchor_box_pos[1]+0.5)/fmap_size[1]]
    # anchor box center is in [H,W]
    xcenter_a = anchor_box_center[1]
    ycenter_a = anchor_box_center[0]
    
    ## calculate wa and ha
    s = 0.2+(0.95-0.2)/4*np.array([0,1,2,3,4])
    s = list(s)+[1.0]
    
    scale = s[fmap_id]
    scale_next = s[fmap_id+1]
    
    ar = [1.0, 2.0, 0.5, 3.0, 0.3333]
    if anchor_box_id == 5: # the last square box
        wa = ha = np.sqrt(scale*scale_next)
    else:
        wa = scale*np.sqrt(ar[anchor_box_id])
        ha = scale/np.sqrt(ar[anchor_box_id])
    
    return [ycenter_a, xcenter_a, ha, wa]

def decode(encoded_box, anchor_box):
    # encoded_box: [ty,tx,th,tw]
    # anchor_box: [ycentera,xcentera,ha,wa]
    ty = encoded_box[0]
    tx = encoded_box[1]
    th = encoded_box[2]
    tw = encoded_box[3]
    
    ycenter_a = anchor_box[0]
    xcenter_a = anchor_box[1]
    ha        = anchor_box[2]
    wa        = anchor_box[3]
    
    scale_factors = [10.0,10.0,5.0,5.0]
    
    ty /= scale_factors[0]
    tx /= scale_factors[1]
    th /= scale_factors[2]
    tw /= scale_factors[3]
    
    w = np.exp(tw) * wa
    h = np.exp(th) * ha
    ycenter = ty * ha + ycenter_a
    xcenter = tx * wa + xcenter_a
    ymin = ycenter - h / 2.
    xmin = xcenter - w / 2.
    ymax = ycenter + h / 2.
    xmax = xcenter + w / 2.   
    
    return [ymin,xmin,ymax,xmax]

def draw_bbox(image, box):
    ymin = box[0]
    xmin = box[1]
    ymax = box[2]
    xmax = box[3]
    draw = PIL.ImageDraw.Draw(image)
    im_width, im_height = image.size
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                  ymin * im_height, ymax * im_height)
    draw.line([(left, top), (left, bottom), (right, bottom),
             (right, top), (left, top)], width=2, fill='red')
    image.show()