# Object Recognition


In [1]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
%matplotlib inline
from PIL import Image

## Load files

In [2]:
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
MODEL_FILE_NAME = MODEL_NAME + '.tar.gz'
DESTINATION_FOLDER = 'datasets'
MODEL_FILE = os.path.join(DESTINATION_FOLDER, MODEL_FILE_NAME)

DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = os.path.join(DESTINATION_FOLDER, MODEL_NAME, 'frozen_inference_graph.pb')

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(DESTINATION_FOLDER, 'data', 'mscoco_label_map.pbtxt')
NUM_CLASSES = 90

In [3]:
# download pre-trained file if not exit locally
if not os.path.isfile(MODEL_FILE) or \
    not os.path.isfile(os.path.join(DESTINATION_FOLDER, MODEL_NAME, 'frozen_inference_graph.pb')):
        
        opener = urllib.request.URLopener()
        opener.retrieve(DOWNLOAD_BASE + MODEL_FILE_NAME, MODEL_FILE)
        tar_file = tarfile.open(MODEL_FILE)
        for file in tar_file.getmembers():
            file_name = os.path.basename(file.name)
            if 'frozen_inference_graph.pb' in file_name:
                tar_file.extract(file, DESTINATION_FOLDER)

## Load Tensorflow trained model

In [4]:
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

## Load labels

In [5]:
from google.protobuf import text_format
from protos import string_int_label_map_pb2

label_map = string_int_label_map_pb2.StringIntLabelMap()
with tf.gfile.GFile(PATH_TO_LABELS, 'r') as fid:
    label_map_string = fid.read()
    text_format.Merge(label_map_string, label_map)
    

label_dict = { item.id: item.display_name for item in label_map.item }
# label_dict = {}
# for item in label_map.item:
#     label_dict[item.id] = item.display_name

# print('\n'.join(item[1] for item in sorted(label_dict.items(), key=lambda tup: tup[1])))
print('\n'.join(item.display_name for item in sorted(label_map.item, key=lambda k: k.display_name)))

airplane
apple
backpack
banana
baseball bat
baseball glove
bear
bed
bench
bicycle
bird
boat
book
bottle
bowl
broccoli
bus
cake
car
carrot
cat
cell phone
chair
clock
couch
cow
cup
dining table
dog
donut
elephant
fire hydrant
fork
frisbee
giraffe
hair drier
handbag
horse
hot dog
keyboard
kite
knife
laptop
microwave
motorcycle
mouse
orange
oven
parking meter
person
pizza
potted plant
refrigerator
remote
sandwich
scissors
sheep
sink
skateboard
skis
snowboard
spoon
sports ball
stop sign
suitcase
surfboard
teddy bear
tennis racket
tie
toaster
toilet
toothbrush
traffic light
train
truck
tv
umbrella
vase
wine glass
zebra


# Helpers

In [6]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

# Demo

In [9]:
img1 = './img/image1.jpg'
img2 = './img/image2.jpg'
img3 = './img/image3.jpg'

with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        # Definite input and output Tensors for detection_graph
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        image = Image.open(img3)
        image_np = load_image_into_numpy_array(image)
        image_np_expanded = np.expand_dims(image_np, axis=0)
        (boxes, scores, classes, num) = sess.run(
          [detection_boxes, detection_scores, detection_classes, num_detections],
          feed_dict={image_tensor: image_np_expanded})

In [10]:
scores = np.squeeze(scores)
classes = np.squeeze(classes).astype(np.int32)
boxes = np.squeeze(boxes)
print('Probablity = {0:.2f}% that this is a {1}'.format(scores[0] * 100, label_dict[classes[0]]))
print('Probablity = {0:.2f}% that this is a {1}'.format(scores[1] * 100, label_dict[classes[1]]))
print(boxes[0])
print(boxes[1])

Probablity = 98.58% that this is a train
Probablity = 15.29% that this is a person
[ 0.40593365  0.1493234   0.74471927  0.8904143 ]
[ 0.66618496  0.19311416  0.68574136  0.20529506]


In [142]:
labels = (np.vectorize(label_dict.get)(classes))
coord_labels = ['x', 'y', 'w', 'h']

def box_to_dict(box):
    return dict(zip(coord_labels, box))

boxes_dict = np.apply_along_axis(box_to_dict, 1, boxes)

result_labels = [ 'label', 'probability', 'box']

def result_to_dict(row):
    return dict(zip(result_labels, row))

result = np.apply_along_axis(result_to_dict, 1, np.c_[labels, scores, boxes_dict])
result[0]

{'box': {'h': 0.8904143, 'w': 0.74471927, 'x': 0.40593365, 'y': 0.1493234},
 'label': 'train',
 'probability': 0.985750675201416}