## Load necessary modules

In [1]:
# show images inline
%matplotlib inline

# automatically reload modules when they have changed
%load_ext autoreload
%autoreload 2

# import keras
import keras

# import keras_retinanet
from keras_retinanet.models.resnet import custom_objects
from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image

# import miscellaneous modules
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
import time

# set tf backend to allow memory to grow, instead of claiming everything
import tensorflow as tf

def get_session():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    return tf.Session(config=config)

# use this environment flag to change which GPU to use
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# set the modified tf session as backend in keras
keras.backend.tensorflow_backend.set_session(get_session())

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Load RetinaNet model

In [2]:
# adjust this to point to your downloaded/trained model
model_path = os.path.join('..', 'snapshots', 'resnet50_coco_best_v1.2.2.h5')

# load retinanet model
model = keras.models.load_model(model_path, custom_objects=custom_objects)
#print(model.summary())

# load label to names mapping for visualization purposes
labels_to_names = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}

  sample_weight_mode=sample_weight_mode)


## Run detection on example

In [3]:
# load image
image = read_image_bgr('000000008021.jpg')

# copy to draw on
draw = image.copy()
draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

# preprocess image for network
image = preprocess_image(image)
image, scale = resize_image(image)

# process image
start = time.time()
_, _, detections = model.predict_on_batch(np.expand_dims(image, axis=0))
print("processing time: ", time.time() - start)

# compute predicted labels and scores
predicted_labels = np.argmax(detections[0, :, 4:], axis=1)
scores = detections[0, np.arange(detections.shape[1]), 4 + predicted_labels]

# correct for image scale
detections[0, :, :4] /= scale

# visualize detections
for idx, (label, score) in enumerate(zip(predicted_labels, scores)):
    if score < 0.5:
        continue
    b = detections[0, idx, :4].astype(int)
    print(b, label, labels_to_names[label], score)
    cv2.rectangle(draw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 3)
    caption = "{} {:.3f}".format(labels_to_names[label], score)
    cv2.putText(draw, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 0, 0), 3)
    cv2.putText(draw, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1.5, (255, 255, 255), 2)
    
# plt.figure(figsize=(15, 15))
# plt.axis('off')
# plt.imshow(draw)
# plt.show()

FileNotFoundError: [Errno 2] No such file or directory: '../../1.jpeg'

## Automatic Generation of Bounding Boxes

In [None]:
import xml.etree.cElementTree as ET
import os.path as path
import glob

def generate_annotations(super_class='*'):
    start = time.time()
    image_paths = []

    count = 0
    super_class
    for f in glob.iglob('data/{}/**/*.*'.format(super_class), recursive=True):
        if count > 5: break
        if (f.split(".")[-1] == "xml"): continue
        image_paths.append(f)
        count+=1

    # Setup XML
    root = ET.Element("root")

    # Iterate over images in directory
    for  image_path in image_paths:
        # Load Image
        image = read_image_bgr(image_path)
        draw = image.copy()
        draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

        # Preprocess and scale
        image = preprocess_image(image)
        image, scale = resize_image(image)

        # Detect images
        _, _, detections = model.predict_on_batch(np.expand_dims(image, axis=0))

        # compute predicted labels and scores
        predicted_labels = np.argmax(detections[0, :, 4:], axis=1)
        scores = detections[0, np.arange(detections.shape[1]), 4 + predicted_labels]

        # correct for image scale
        detections[0, :, :4] /= scale

        image = ET.SubElement(root, "image")
        annotation_path = image_path.split(".")[0] + ".xml"
        image.set('filename', image_path)

        # visualize detections
        for idx, (label, score) in enumerate(zip(predicted_labels, scores)):
            if score < 0.5 or label != 0: # Skip if not too confident or label != 0 [Person]
                continue 
            b = detections[0, idx, :4].astype(int) 
            person = ET.SubElement(image, "person")
            ET.SubElement(person, "xmin").text = str(b[0])
            ET.SubElement(person, "ymin").text = str(b[1])
            ET.SubElement(person, "xmax").text = str(b[2])
            ET.SubElement(person, "ymax").text = str(b[3])

            # Displaying Bounding Box
            # cv2.rectangle(draw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 3)
            # caption = "{} {:.3f}".format(labels_to_names[label], score)
            # cv2.putText(draw, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1.5, (0, 0, 0), 3)
            # cv2.putText(draw, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1.5, (255, 255, 255), 2)

            # plt.figure(figsize=(15, 15))
            # plt.axis('off')
            # plt.imshow(draw)
            # plt.show()

    # Output XML file
    # print(ET.tostring(root, encoding='utf8', method='xml'))
    tree = ET.ElementTree(root)
    annotations_filename = 'annotations_' + classname + '.xml'
    open(annotations_filename, 'a').close()
    tree.write(annotations_filename) 

    end = time.time()
    print("Generation time for {} images: ".format(len(image_paths)), end - start)
    
generate_annotations('men')

In [None]:
def xml_to_dict(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    result = {}
    # Iterate over the images
    for image in root.findall('image'):
        filename = image.get('filename')
        # Get all persons
        bounding_boxes = []
        for person in image.findall('person'):
            xmin = int(person.find('xmin').text)
            ymin = int(person.find('ymin').text)
            xmax = int(person.find('xmax').text)
            ymax = int(person.find('ymax').text)
            bounding_boxes.append((xmin, ymin, xmax, ymax))
        result[filename] = bounding_boxes
    
    return result

xml_to_dict('annotations.xml')

In [7]:
def xml_to_crops(num_examples=5):
    # Show only from the women's class
    data = xml_to_dict('annotations.xml')
    filenames = [filename for filename in data.keys()][:num_examples]
    print(filenames)
    
    for filename in filenames:
        img = cv2.imread(filename)
        height, width, channels = img.shape
        boxes = data[filename]
        for box in boxes:
            cropped_img = img[max(0, box[1]) : min(height, box[3]), max(0, box[0]): min(width, box[2])] # y, x
            cv2.imshow("cropped", cropped_img)
            cv2.waitKey(10)
            time.sleep(10)
    
#     img = cv2.imread("lenna.png")
#     crop_img = img[y:y+h, x:x+w]
#     cv2.imshow("cropped", crop_img)
#     cv2.waitKey(0)
#     images = []
#     for filename in filenames:
        
        
#     images.append(cropped_image)
#     return images

xml_to_crops()

['data/women/women-jumpsuits/DU7RELCTafrikrea/2DU7RELCTafrikrea.jpeg', 'data/women/women-jumpsuits/DU7RELCTafrikrea/3DU7RELCTafrikrea.jpeg', 'data/women/women-jumpsuits/DU7RELCTafrikrea/0DU7RELCTafrikrea.jpeg', 'data/women/women-jumpsuits/DU7RELCTafrikrea/1DU7RELCTafrikrea.jpeg', 'data/women/women-jumpsuits/DU7RELCTafrikrea/4DU7RELCTafrikrea.jpeg']
(216, -11, 514, 1051)
514 1051 216 -11
(233, 97, 459, 1000)
459 1000 233 97
(115, 81, 551, 1013)
551 1013 115 81
(221, 5, 495, 972)
495 972 221 5
(120, 74, 522, 999)
522 999 120 74
