In [1]:
import numpy as np
import os
import sys
import tensorflow as tf

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

sys.path.append("..")
from object_detection.utils import ops as utils_ops, label_map_util, visualization_utils as vis_util

if tf.__version__ < '1.4.0':
  raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')

%matplotlib inline

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/jet/var/python/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/jet/var/python/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/jet/var/python/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/jet/var/python/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/jet/var/python/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/jet/var/python/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start


## Load a (frozen) Tensorflow model into memory.

In [2]:
# !!!!! MODIFY THIS !!!!!
model = 'russian_signs_ssd'

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(model+'/fine_tuned_model/frozen_inference_graph.pb', 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

### utils

In [3]:
# !!!!! MODIFY THIS !!!!!
label_map = label_map_util.load_labelmap('russian_signs_data/label_map.pbtxt')
NUM_CLASSES= 198 #20, 198
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

In [4]:
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)

In [5]:
def black_background_thumbnail(image, thumbnail_size):
    background = Image.new('RGB', thumbnail_size, "black")    
    source_image = image
    source_image.thumbnail(thumbnail_size)
    (w, h) = source_image.size
    background.paste(source_image, (int((thumbnail_size[0] -w) / 2), int((thumbnail_size[1] - h) / 2 )))
    return background

# Detection

In [6]:
import time

def run_inference_for_single_image(image):
    image_expanded = np.expand_dims(image, 0)

    # Run inference
    #start = time.time()
    (boxes, scores, classes, num) = \
        sess.run([detection_boxes, detection_scores, detection_classes, num_detections], 
                 feed_dict={image_tensor: image_expanded})
    
    #end = time.time()
    #print("only session {}".format(end-start))

    # all outputs are float32 numpy arrays, so convert types as appropriate
    #output_dict['num_detections'] = int(output_dict['num_detections'][0])
    output_dict = {}
    output_dict['detection_classes'] =  np.squeeze(classes).astype(np.uint8)
    output_dict['detection_boxes'] =  np.squeeze(boxes)
    output_dict['detection_scores'] =  np.squeeze(scores)
    if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
        
    return output_dict

# classification

In [7]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
 
def get_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=input_shape))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(len(LABELS), activation='softmax'))
    return model

Using TensorFlow backend.


## get data

In [8]:
import os
os.chdir('../../../')

from russian_signs.get_data import get_data as get_data_russian
from swedish_signs.get_data import get_data as get_data_swedish

os.chdir('models/research/object_detection')

# !!!!! MODIFY THIS !!!!!
data = get_data_russian()
LABELS = data['LABELS']
all_images = data['images']
input_shape = data['input_shape']

In [9]:
images = np.random.choice(all_images, 2000)

# inspect ground truth and model predictions

In [9]:
from keras.preprocessing.image import img_to_array


with detection_graph.as_default():
    with tf.Session() as sess:
        
        class_model = get_model()
        # !!! MODIFY THIS !!!!
        class_model.load_weights('../../../classify_croped_weights_russian')
        
        # Definite input and output Tensors for detection_graph
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        
        for image in images:
            print(image['filename'])
            img = Image.open(image['filename'])
            image_np = load_image_into_numpy_array(img)

            copy_image = image_np.copy()
            copy_image_2 = image_np.copy()
            copy_img = img.copy()

            print("GROUND TRUTH")

            signs = image['signs']

            boxes = np.array([[sign['ymin'], sign['xmin'], sign['ymax'], sign['xmax']] for sign in signs])
            classes =  np.array([LABELS.index(sign['name'])+1 for sign in signs])
            scores = np.array([1.0 for sign in signs])

            vis_util.visualize_boxes_and_labels_on_image_array(
              image_np,
              boxes,
              classes,
              scores,
              category_index,
              use_normalized_coordinates=True,
              line_thickness=3)

            plt.figure(figsize=(20,20))
            plt.imshow(image_np)
            plt.show()

            print("MODEL PREDICTIONS RAW")
            output_dict = run_inference_for_single_image(copy_image)
            boxes = output_dict['detection_boxes']
            classes = output_dict['detection_classes']
            scores = output_dict['detection_scores']

            vis_util.visualize_boxes_and_labels_on_image_array(
              copy_image,
              boxes,
              classes,
              scores,
              category_index,
              use_normalized_coordinates=True,
              line_thickness=3)

            plt.figure(figsize=(20,20))
            plt.imshow(copy_image)
            plt.show()
            
            print("MODEL PREDICTIONS CLASS")
            output_dict = run_inference_for_single_image(copy_image_2)
            boxes = output_dict['detection_boxes']
            classes = output_dict['detection_classes']
            scores = output_dict['detection_scores']
            
            classes = []

            w, h = copy_img.size

            for i, box in enumerate(boxes):

                left = box[1] * w
                top = box[0] * h
                right = box[3] * w
                bottom = box[2] * h

                crop_img = copy_img.crop((left, top, right, bottom))
                crop_img = crop_img.resize((input_shape[0], input_shape[1]))
                crop_img = img_to_array(crop_img)
                pred = class_model.predict(np.expand_dims(crop_img, axis=0))

                classes.append(np.argmax(pred[0])+1)
            classes = np.array(classes)

            vis_util.visualize_boxes_and_labels_on_image_array(
              copy_image_2,
              boxes,
              classes,
              scores,
              category_index,
              use_normalized_coordinates=True,
              line_thickness=3)

            plt.figure(figsize=(20,20))
            plt.imshow(copy_image_2)
            plt.show()

NameError: name 'images' is not defined

# calculate mAP

In [None]:
len(images)

In [10]:
from keras.preprocessing.image import img_to_array

def get_preds(image_dict, use_classification_croped=False, box_scores_thres=0.6):
    
    image = Image.open(image_dict['filename'])
    #image = black_background_thumbnail(image, (1280, 960))
    image_np = load_image_into_numpy_array(image)
    image_np_expanded = np.expand_dims(image_np, axis=0)
    
    #start = time.time()
    output_dict = run_inference_for_single_image(image_np)
    #end = time.time()
    
    #print("all {}".format(end-start))
    
    inds = output_dict['detection_scores'] > box_scores_thres
    boxes = output_dict['detection_boxes'][inds]
    scores = output_dict['detection_scores'][inds]
    classes = output_dict['detection_classes'][inds]

    if use_classification_croped:
        classes = []

        w, h = image.size

        for i, box in enumerate(boxes):
            
            left = box[1] * w
            top = box[0] * h
            right = box[3] * w
            bottom = box[2] * h

            crop_img = image.crop((left, top, right, bottom))
            crop_img = crop_img.resize((input_shape[0], input_shape[1]))
            crop_img = img_to_array(crop_img)
            pred = class_model.predict(np.expand_dims(crop_img, axis=0))

            classes.append(np.argmax(pred[0])+1)
        classes = np.array(classes)
                
    return boxes, scores, classes     

Using TensorFlow backend.


In [28]:
all_detections     = [[None for i in range(len(LABELS))] for j in range(len(images))]
all_annotations    = [[None for i in range(len(LABELS))] for j in range(len(images))]

num_preds = 0

import time

start = time.time()
with detection_graph.as_default():
    with tf.Session() as sess:
        class_model = get_model()
        # !!! MODIFY THIS !!!!
        class_model.load_weights('../../../classify_croped_weights_russian')
        # Definite input and output Tensors for detection_graph
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        
        for i, image_dict in enumerate(images):
            boxes, scores, classes = get_preds(image_dict, use_classification_croped=True)
            score_sort = np.argsort(-scores)
            pred_labels = classes[score_sort]
            pred_boxes  = boxes[score_sort]
            scores = scores[score_sort]

            if not (len(boxes) == len(scores) == len(classes)):
                print("!!!!!!!!!!")

            if not (len(boxes) == len(pred_boxes)):
                print("??????")

            num_preds += len(scores)

            if i % 100 == 0:
                print(i, "{}".format(time.time() - start))
                start = time.time()

            new_pred_boxes = [None for i in range(len(pred_boxes))]

            for ind in range(len(pred_boxes)):
                new_pred_boxes[ind] = np.append(pred_boxes[ind], scores[ind])

            pred_boxes = np.array(new_pred_boxes).copy()

            #print("all preds cnt", len(scores))

            d1 = {}
            for label in range(len(LABELS)):
                d1[label] = pred_boxes[pred_labels == label+1]

            cn_ = 0
            for label in range(len(LABELS)):
                all_detections[i][label] = d1[label].copy()
                cn_ += len(all_detections[i][label])
            #print("have", cn_)

            #print(all_detections[i])

            annotations = [[sign['xmin'], sign['ymin'], sign['xmax'], sign['ymax']] for sign in image_dict['signs']]

            d2 = {}

            for label in range(len(LABELS)):
                d2[label] = []
                for ind,sign in enumerate(image_dict['signs']):
                    if LABELS.index(sign['name']) == label:
                        d2[label].append(annotations[ind])

            for label in range(len(LABELS)):
                all_annotations[i][label] = d2[label].copy()
        
print("all predictions cnt:", num_preds)

0 17.719529151916504
100 117.4406681060791
200 115.10358500480652
300 121.64576148986816
400 121.41639375686646
500 117.52361369132996
600 112.57298827171326
700 115.53066754341125
800 108.85897421836853
900 109.4376175403595
1000 115.0607647895813
1100 114.40808820724487
1200 115.617258310318
1300 114.59703183174133
1400 119.73213529586792
1500 120.45418095588684
1600 118.28280186653137
1700 115.30648946762085
1800 117.72275876998901
1900 113.3415675163269
all predictions cnt: 565


In [15]:
print(12)

12


In [29]:
cnt = 0
for i in range(len(images)):
    for j in range(len(LABELS)):
        cnt += len(all_detections[i][j])
cnt

565

In [178]:
with open("all_detections_swedish_with_class", "w") as f:
    f.write(str(all_detections))
    
with open("all_annotations_swedish_with_class", "w") as f:
    f.write(str(all_annotations))

In [20]:
def compute_overlap(a, b):
    a = np.array(a)
    b = np.array(b)
    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])

    iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
    ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
        
    iw = np.maximum(iw, 0)
    ih = np.maximum(ih, 0)

    ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih

    ua = np.maximum(ua, np.finfo(float).eps)
    
    
    intersection = iw * ih
    return intersection / ua  
    
def compute_ap(recall, precision):
    mrec = np.concatenate(([0.], recall, [1.]))
    mpre = np.concatenate(([0.], precision, [0.]))

    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    i = np.where(mrec[1:] != mrec[:-1])[0]
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap

In [30]:
print("start computing precisions")
        
average_precisions = {}
    
iou_threshold = 0.2
    
for label in range(len(LABELS)):
    
    false_positives = np.zeros((0,))
    true_positives  = np.zeros((0,))
    scores          = np.zeros((0,))
    num_annotations = 0
    num_preds = 0

    for i, image_dict in enumerate(images):
        detections           = all_detections[i][label]
        annotations          = all_annotations[i][label]
        num_annotations     += len(annotations)
        detected_annotations = []
        num_preds += len(detections)
        
        #print(detections)
        
        for d in detections:
            scores = np.append(scores, d[4])

            if len(annotations) == 0:
                print("false prediction")
                false_positives = np.append(false_positives, 1)
                true_positives  = np.append(true_positives, 0)
                continue

            overlaps            = compute_overlap(np.expand_dims([d[1], d[0], d[3], d[2]], axis=0), annotations)
            #print(overlaps)
            assigned_annotation = np.argmax(overlaps, axis=1)
            max_overlap         = overlaps[0, assigned_annotation]
            if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
                print("accepted prediction")
                false_positives = np.append(false_positives, 0)
                true_positives  = np.append(true_positives, 1)
                detected_annotations.append(assigned_annotation)
            else:
                print("rejected prediction")
                false_positives = np.append(false_positives, 1)
                true_positives  = np.append(true_positives, 0)
                
    #print("predictions cnt for", label, ":", num_preds)
    
    # no annotations -> AP for this class is 0 (is this correct?)
    if num_annotations == 0:
        average_precisions[label] = 0
        continue
    
    # sort by score
    indices         = np.argsort(-scores)
    false_positives = false_positives[indices]
    true_positives  = true_positives[indices]
    
    # compute false positives and true positives
    false_positives = np.cumsum(false_positives)
    true_positives  = np.cumsum(true_positives)

    # compute recall and precision
    recall    = true_positives / float(num_annotations)
    precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)

    # compute average precision
    average_precision  = compute_ap(recall, precision)  
    average_precisions[label] = average_precision

start computing precisions
accepted prediction
false prediction
accepted prediction
false prediction
false prediction
false prediction
false prediction
false prediction
false prediction
false prediction
accepted prediction
false prediction
accepted prediction
accepted prediction
rejected prediction
false prediction
accepted prediction
false prediction
false prediction
accepted prediction
accepted prediction
accepted prediction
false prediction
false prediction
accepted prediction
rejected prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
false prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted predictio

false prediction
false prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
false prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
accepted prediction
false prediction
false prediction
false prediction
false prediction
false prediction
false prediction
false prediction
false prediction
accepted prediction
false prediction
false prediction
accepted prediction
accepted prediction
false prediction
accepted prediction


In [31]:
average_precisions

{0: 0.0,
 1: 0,
 2: 0.092592592592592587,
 3: 0.0,
 4: 0.0,
 5: 0.0,
 6: 0,
 7: 0.0,
 8: 0.0,
 9: 0.0,
 10: 0.0,
 11: 0.33333333333333331,
 12: 0.0,
 13: 0,
 14: 0.0,
 15: 0.18518518518518517,
 16: 0.0,
 17: 0,
 18: 0,
 19: 0.0,
 20: 0.0,
 21: 0.5643915939581573,
 22: 0.5,
 23: 0.0,
 24: 0,
 25: 0.45804195804195807,
 26: 0.5,
 27: 0,
 28: 0,
 29: 0.10000000000000001,
 30: 0,
 31: 0,
 32: 0.0,
 33: 0.0,
 34: 0,
 35: 0.0,
 36: 0.21464019851116625,
 37: 0.0,
 38: 0.0,
 39: 0.25,
 40: 0.0,
 41: 0.0,
 42: 0.0,
 43: 0,
 44: 0.5,
 45: 0.0,
 46: 0,
 47: 0,
 48: 0,
 49: 0.29166666666666669,
 50: 0,
 51: 0,
 52: 0.0,
 53: 0.0,
 54: 0,
 55: 0,
 56: 0,
 57: 0,
 58: 0.32954545454545459,
 59: 0.0,
 60: 0.0,
 61: 0.0,
 62: 0,
 63: 0,
 64: 0,
 65: 0,
 66: 0,
 67: 0,
 68: 0.13333333333333333,
 69: 0.39305326969497473,
 70: 0,
 71: 0,
 72: 0.42291882291882293,
 73: 0.015151515151515152,
 74: 0.0,
 75: 0.0,
 76: 0,
 77: 0,
 78: 0.0,
 79: 0.0,
 80: 0.0,
 81: 0,
 82: 0.0,
 83: 0,
 84: 0.0,
 85: 0,
 86: 0.0