# Object Detection Demo
Welcome to the object detection inference walkthrough!  This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md) before you start.

# Imports

In [5]:
from distutils.version import StrictVersion
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

import cv2


# This is needed since the notebook is stored in the object_detection folder.
research_path = '/Users/Melody/OneDrive/MLprojects/models-master/research'
sys.path.append(research_path)
from object_detection.utils import ops as utils_ops
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
  raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')


# Model preparation 

## Variables

Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_FROZEN_GRAPH` to point to a new .pb file.  

By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.

In [6]:
MODEL_NAME = os.path.join(research_path, 'object_detection/rodent_dataset/model')

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(research_path, 'object_detection/rodent_dataset/data/labelmap.pbtxt')

NUM_CLASSES = 1

## Load a (frozen) Tensorflow model into memory.

In [7]:
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [8]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Helper code

In [9]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

# Detection

In [17]:
# This is the place to set your image folder need to be cropped.
PATH_TO_TEST_IMAGES_DIR = '/Users/Melody/OneDrive/lab_data/data/video/behavior/rat_side_20180918_2'
TEST_IMAGE_PATHS = os.listdir(PATH_TO_TEST_IMAGES_DIR)
TEST_IMAGE_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR,x) for x in TEST_IMAGE_PATHS if x[-4:]=='.jpg']

# Size, in inches, of the output images.
#IMAGE_SIZE = (12, 8)

In [18]:
def run_inference_for_single_image(image, graph):
  with graph.as_default():
    with tf.Session() as sess:
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

      # Run inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: np.expand_dims(image, 0)})

      # all outputs are float32 numpy arrays, so convert types as appropriate
      output_dict['num_detections'] = int(output_dict['num_detections'][0])
      output_dict['detection_classes'] = output_dict[
          'detection_classes'][0].astype(np.uint8)
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
  return output_dict

# Crop the image

In [19]:
def crop_object_image(image_np_array, savepath, output_dict, normalized = True, save_cropped_img = True, 
                      show_cropped_img = False, img_size = (12, 8), score_threshold = 0.5):
    
    # get the boxes
    idxes = output_dict['detection_scores'] > score_threshold
    boxes = output_dict['detection_boxes'][idxes]
    
    # box is a list with size 4
    height, width,__ = image_np_array.shape
    
    for box in boxes:
        newbox = np.copy(box)
        if normalized:
            newbox = (newbox * [height, width, height, width]).astype(int)
        
        print(newbox)

        image_crop = image_np_array[newbox[0]:newbox[2], newbox[1]:newbox[3], :]

        if save_cropped_img:
            im = Image.fromarray(image_crop)
            im.save(savepath)

        if show_cropped_img:
            plt.figure(figsize=img_size)
            plt.imshow(image_crop)

In [20]:
# This part is tested good to produce cropped data.

for image_path in TEST_IMAGE_PATHS:
  image = Image.open(image_path)
  # the array based representation of the image will be used later in order to prepare the
  # result image with boxes and labels on it.
  image_np = load_image_into_numpy_array(image)
  # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
  # image_np_expanded = np.expand_dims(image_np, axis=0)
  # Actual detection.
  output_dict = run_inference_for_single_image(image_np, detection_graph)
  # crop the image_np and save to local
  folderpath = os.path.dirname(image_path)
  filename = os.path.basename(image_path)
  savepath = os.path.join(folderpath, 'crop_'+filename)
  crop_object_image(image_np, savepath, output_dict)




[ 41 819 205 959]
[161 811 491 946]
[ 74 791 271 942]
[379 761 718 943]
[ 30 798 390 945]
[  74 1082  266 1231]
[ 50 815 193 939]
[ 39 789 207 943]
[ 37 811 199 939]
[  73 1083  262 1237]
[375 805 646 932]
[ 40 817 200 961]
[ 50 811 187 967]
[ 59 793 210 960]
[ 40 820 198 953]
[116 753 297 939]
[ 33 781 229 932]
[ 63 776 243 943]
[ 67 801 207 935]
[ 41 815 196 953]
[ 62 822 266 955]
[441 770 671 933]
[ 39 822 199 956]
[ 50 645 254 945]
[  77 1086  263 1241]
[ 44 814 200 973]
[  71 1094  264 1245]
[ 72 803 261 938]
[ 57 808 256 948]
[  73 1094  262 1248]
[ 38 819 199 958]
[  73 1091  262 1238]
[ 62 808 188 945]
[  71 1091  266 1237]
[ 58 803 244 951]
[  70 1089  267 1234]
[ 91 828 456 945]
[  67 1088  265 1237]
[ 38 818 196 953]
[ 44 816 200 961]
[ 57 749 190 941]
[133 816 512 945]
[ 42 805 242 943]
[132 810 226 923]
[ 43 817 202 980]
[448 692 562 883]
[  70 1091  268 1235]
[ 72 806 232 954]
[ 63 824 265 950]
[ 57 801 192 934]
[361 827 578 926]
[ 47 819 201 965]
[ 49 803 192 937]
[  69 