# Object Detection Demo
Welcome to the object detection inference walkthrough!  This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/installation.md) before you start.

# Imports

In [32]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import pandas as pd
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
import scipy.misc
from time import time



## Env setup

In [33]:
# This is needed to display the images.
%matplotlib inline

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")

## Object detection imports
Here are the imports from the object detection module.

In [34]:
from utils import label_map_util

from utils import visualization_utils as vis_util

# Model preparation 

## Variables

Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.  

By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.

In [35]:
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('mscoco_label_map.pbtxt')

NUM_CLASSES = 90

## Download Model

In [36]:
if(not os.path.exists(MODEL_FILE)):
    print("start download model")
    opener = urllib.request.URLopener()
    opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
  file_name = os.path.basename(file.name)
  if 'frozen_inference_graph.pb' in file_name:
    tar_file.extract(file, os.getcwd())

## Load a (frozen) Tensorflow model into memory.

In [37]:
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [38]:
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Helper code

In [39]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [46]:
out_path = os.path.join("auto_box")
if not os.path.exists(out_path):
    os.makedirs(out_path)
out_filename = os.path.join("auto_box", "auto_box.csv")
if not os.path.exists(out_filename):
    df = pd.DataFrame(columns = ["filename", "width", "height", "class", "xmin", "ymin", "xmax", "ymax", "score"])
    df.to_csv(out_filename, index= False)
    print("no file, makefile => %s" % out_filename)
multi_out_filename = os.path.join("auto_box", "multi_box_filename.csv")
if not os.path.exists(multi_out_filename):
    df = pd.DataFrame(columns = ["filename"])
    df.to_csv(multi_out_filename, index= False)
    print("no file, makefile => %s" % multi_out_filename)
out_box_image_dir = os.path.join("auto_box", "box_image")
if not os.path.exists(out_box_image_dir):
    os.makedirs(out_box_image_dir)
    print("no dir, makedir => %s" % out_box_image_dir)




#     df.to_csv(out_filename, mode='a', header=False)

no file, makefile => auto_box\auto_box.csv


# Detection

In [41]:
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test_images'
# TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ]
# TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, target_class + '.6.jpg') ]
# TEST_IMAGE_PATHS = ["C:/Users/VIPLAB/Desktop/dog_vs_cat_detection/dataset/self_divide/eval/cat/cat.1216.jpg"]
# TEST_IMAGE_PATHS = ["C:/Users/VIPLAB/Desktop/dog_vs_cat_detection/dataset/self_divide/eval/cat/cat.30.jpg"]
# TEST_IMAGE_PATHS = ["C:/Users/VIPLAB/Desktop/dog_vs_cat_detection/dataset/self_divide/eval/cat/cat.174.jpg"]
# TEST_IMAGE_PATHS = ["C:/Users/VIPLAB/Desktop/dog_vs_cat_detection/dataset/self_divide/eval/dog/dog.6.jpg"]
# TEST_IMAGE_PATHS = ["C:/Users/VIPLAB/Desktop/dog_vs_cat_detection/dataset/self_divide/eval/dog/dog.67.jpg"]
# TEST_IMAGE_PATHS = ["C:/Users/VIPLAB/Desktop/dog_vs_cat_detection/dataset/self_divide/train/dog/dog.12168.jpg"]
# TEST_IMAGE_PATHS = ["C:/Users/VIPLAB/Desktop/dog_vs_cat_detection/dataset/self_divide/train/dog/dog.11852.jpg"]

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

# iterator auto box picture

In [49]:
with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        start_time = time()
        each_batch_time = time()
        # collect the dirs
        mode_list = ["train", "eval"]
        for mode in mode_list:
            cwd = "./dataset/self_divide/" + mode + "/"
            # classes = ["cat", "dog"]
            classes = ["dog", "cat"]
            for index, name in enumerate(classes):
                class_path = cwd + name + "/"
                for img_count, img_name in enumerate(os.listdir(class_path)):
                    if (img_count % 100 == 0):
                        output_str = mode + " step -- " + str(img_count)
                        print(output_str, " compute 100 image_batch time = ", time() - each_batch_time)
                        each_batch_time = time()
                    image = Image.open(os.path.join(class_path, img_name))
                    # the array based representation of the image will be used later in order to prepare the
                    # result image with boxes and labels on it.
                    image_np = load_image_into_numpy_array(image)
                    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                    image_np_expanded = np.expand_dims(image_np, axis=0)
                    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
                    # Each box represents a part of the image where a particular object was detected.
                    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                    # Each score represent how level of confidence for each of the objects.
                    # Score is shown on the result image, together with the class label.
                    scores = detection_graph.get_tensor_by_name('detection_scores:0')
                    classes = detection_graph.get_tensor_by_name('detection_classes:0')
                    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
                    # Actual detection.
                    (boxes, scores, classes, num_detections) = sess.run(
                      [boxes, scores, classes, num_detections],
                      feed_dict={image_tensor: image_np_expanded})
                    # Visualization of the results of a detection.
                    vis_util.visualize_boxes_and_labels_on_image_array(
                      image_np,
                      np.squeeze(boxes),
                      np.squeeze(classes).astype(np.int32),
                      np.squeeze(scores),
                      category_index,
                      use_normalized_coordinates=True,
                      line_thickness=8)
        #             plt.figure(figsize=IMAGE_SIZE)
        #             plt.imshow(image_np)
                    #       print(boxes)
                    pet_count = 0

                    for i in range(boxes.shape[0]):
                        flat_classes = np.squeeze(classes).astype(np.int32)
#                             class_name = category_index[flat_classes[i]]['name']
                        class_name = name
                        score = np.squeeze(scores)[i] * 100                        
                        if(class_name == "dog" or class_name == "cat"):
                            pet_count += 1
                            if(pet_count > 1):
                                df = pd.DataFrame([[img_name]], columns = ["filename"])
                                df.to_csv(multi_out_filename, mode='a', header=False, index = False)
                                break
                            box = tuple(boxes[i].tolist())
                            ymin, xmin, ymax, xmax = box[i]

                            df = pd.DataFrame([[img_name, image.width, image.height, class_name, xmin, ymin, xmax, ymax, score]],columns = ["filename", "width", "height", "class", "xmin", "ymin", "xmax", "ymax", "score"])
                            df.to_csv(out_filename, mode='a', header=False, index = False)
#                             out_image_name = os.path.join(out_box_image_dir, image_path.split("/")[-1])
                            out_image_name = os.path.join(out_box_image_dir, img_name)

                            scipy.misc.imsave(out_image_name, image_np)


                #             print(box)
                #             print(ymin, xmin, ymax, xmax)

train step -- 0  compute 100 image _ batch time =  0.01905083656311035
train step -- 100  compute 100 image _ batch time =  55.88991332054138
train step -- 200  compute 100 image _ batch time =  51.78047513961792
train step -- 300  compute 100 image _ batch time =  52.71752738952637
train step -- 400  compute 100 image _ batch time =  52.792128801345825
train step -- 500  compute 100 image _ batch time =  51.112850189208984
train step -- 600  compute 100 image _ batch time =  51.86382699012756
train step -- 700  compute 100 image _ batch time =  51.090667486190796
train step -- 800  compute 100 image _ batch time =  51.58588099479675
train step -- 900  compute 100 image _ batch time =  52.16192030906677
train step -- 1000  compute 100 image _ batch time =  51.74716925621033
train step -- 1100  compute 100 image _ batch time =  52.033764600753784
train step -- 1200  compute 100 image _ batch time =  50.92648506164551
train step -- 1300  compute 100 image _ batch time =  52.0119650363922

train step -- 1400  compute 100 image _ batch time =  51.15798568725586
train step -- 1500  compute 100 image _ batch time =  52.670005083084106
train step -- 1600  compute 100 image _ batch time =  52.19975519180298
train step -- 1700  compute 100 image _ batch time =  53.1813645362854
train step -- 1800  compute 100 image _ batch time =  53.50321960449219
train step -- 1900  compute 100 image _ batch time =  52.40429878234863
train step -- 2000  compute 100 image _ batch time =  53.25355625152588
train step -- 2100  compute 100 image _ batch time =  52.78631401062012
train step -- 2200  compute 100 image _ batch time =  53.39794039726257
train step -- 2300  compute 100 image _ batch time =  53.336777210235596
train step -- 2400  compute 100 image _ batch time =  52.22682762145996
train step -- 2500  compute 100 image _ batch time =  53.416990756988525
train step -- 2600  compute 100 image _ batch time =  53.2174608707428
train step -- 2700  compute 100 image _ batch time =  52.799348

eval step -- 400  compute 100 image _ batch time =  52.715670585632324
eval step -- 500  compute 100 image _ batch time =  53.211326122283936
eval step -- 600  compute 100 image _ batch time =  53.99156594276428
eval step -- 700  compute 100 image _ batch time =  52.36043572425842
eval step -- 800  compute 100 image _ batch time =  53.115938901901245
eval step -- 900  compute 100 image _ batch time =  53.94183826446533
eval step -- 1000  compute 100 image _ batch time =  52.87324666976929
eval step -- 1100  compute 100 image _ batch time =  52.54030799865723
eval step -- 1200  compute 100 image _ batch time =  53.087090730667114
eval step -- 1300  compute 100 image _ batch time =  53.523624658584595
eval step -- 1400  compute 100 image _ batch time =  53.07315516471863
eval step -- 1500  compute 100 image _ batch time =  53.28983235359192
eval step -- 1600  compute 100 image _ batch time =  52.47788381576538
eval step -- 1700  compute 100 image _ batch time =  53.74917936325073
eval st