- `1_create_data` where we build a TFRecords dataset from the images with bounding box annotations,
- `2_training` where we perform the training, evaluation and model exportation,
- `3_prediction` where we evaluate the model on new data.

#### Template Structure

- `install/` contains the instruction to replicate the installation of Tensorflow Object Detection - API,
- `models/` contains the pretrained `ssdlite_mobilenet_v2` model on COCO dataset,
- `object_detection/` is the Tensorflow Object Detection framework,
- `slim/`, the TF Object Detection module is built upon [TF-SLIM](https://github.com/tensorflow/models/tree/master/research/slim) an high-level API of TensorFlow (tensorflow.contrib.slim) for defining, training and evaluating complex models (expecially CNN models),
- `tfrecors_data/` will contains the dataset in TFRecord format after the execution of the `1_create_data` notebook,
- `trained_models` will contains the model trained for the task after the execution of the `2_training` notebook.

In [1]:
import sys
sys.path.append("slim")

from support import draw_outline, draw_rect, draw_text, bb_hw, show_img, open_image

import tensorflow as tf
import numpy as np
import pandas as pd

from PIL import Image
from collections import namedtuple, OrderedDict
from object_detection.utils import dataset_util
from tqdm import tqdm


import path
import cv2
import os
import io
import numpy as np

# Path to the CSV input
CSV_INPUT = '/floyd/input/datos/annotations/20_labels.csv'

# Path to the image directory
IMAGE_DIR = '/floyd/input/datos/images/500_20/'

# Path to output TFRecord
OUTPUT = '/floyd/home/workspace-focas/tfrecords_data'

In [2]:
data = pd.read_csv(CSV_INPUT)
                       
data.head(n=12)                       

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,0_500_DSC_1786.JPG,500,500,foca,286,8,331,24
1,0_500_DSC_3566.JPG,500,500,foca,63,484,137,500
2,0_500_DSC_3566.JPG,500,500,foca,227,405,348,471
3,0_500_DSC_3566.JPG,500,500,foca,144,456,214,500
4,0_500_DSC_3566.JPG,500,500,foca,224,476,332,500
5,0_500_DSC_3889.JPG,500,500,foca,36,400,342,500
6,0_500_DSC_3889.JPG,500,500,foca,275,500,442,500
7,0_1000_DSC_3034.JPG,500,500,foca,108,84,306,131
8,0_1000_DSC_3034.JPG,500,500,foca,412,239,500,304
9,0_1000_DSC_3034.JPG,500,500,foca,333,202,478,267


As you can see from the output above, we have only a few samples available: 10 images for training and 2 for the evalaution. 

## Train / Val split

10 samples for training and 2 for evalauting.

In [3]:
#  Train / Val split
train_df = data.iloc[:10]
eval_df = data.iloc[10:]

In [4]:
def class_text_to_int(row_label):
    """Replace the label with an int"""
    if row_label == 'foca': # EDIT WITH YOUR LABELS
        return 1
    else:
        None
        
def split(df, group):
    """For each images, return a data object with all the labels/bbox in the images
    
    e.g.
    
    [data(filename='1.jpg', object=  filename  width  height  class  xmin  ymin  xmax  ymax
     0    1.jpg   2048    1251  syd   706   513   743   562),
     data(filename='10.jpg', object=   filename  width  height  class  xmin  ymin  xmax  ymax
     1    10.jpg   1600     980  syd   715   157   733   181
     19   10.jpg   1600     980  syd   428    83   483   145),
     ...
     data(filename='9.jpg', object=   filename  width  height  class  xmin  ymin  xmax  ymax
     17    9.jpg   1298     951  syd   231   735   261   769)]
     
    """
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]

def create_tf_example(group, path):
    """
    From data group object to TFRecord file
    
    Note: we are handling JPG data format and bbox labels. 
    If you need to work on PNG data with mask or polygon labels, you will have to edit the code a bit.
    """
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size
    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []
    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes)
    }))
    return tf_example

In [5]:
# Prepare the TFRecord Output
TRAIN_RECORD_FILE = os.path.join(OUTPUT, 'train2.tfrecord')
EVAL_RECORD_FILE = os.path.join(OUTPUT, 'eval2.tfrecord')

# TRAIN TFRecord
writer = tf.python_io.TFRecordWriter(path=TRAIN_RECORD_FILE)
path_to_images = os.path.join(os.getcwd(), IMAGE_DIR)

# From CSV to TFRecord
grouped = split(train_df, 'filename')
for group in tqdm(grouped):
    tf_example = create_tf_example(group, path_to_images)
    writer.write(tf_example.SerializeToString())
writer.close()

# EVAL TFRecord
writer = tf.python_io.TFRecordWriter(path=EVAL_RECORD_FILE)
path_to_images = os.path.join(os.getcwd(), IMAGE_DIR)

# From CSV to TFRecord
grouped = split(eval_df, 'filename')
for group in tqdm(grouped):
    tf_example = create_tf_example(group, path_to_images)
    writer.write(tf_example.SerializeToString())
writer.close()

print('Successfully created the TFRecords: {}'.format(OUTPUT))

100%|██████████| 4/4 [00:00<00:00, 187.77it/s]
100%|██████████| 501/501 [00:01<00:00, 356.28it/s]

Successfully created the TFRecords: /floyd/home/workspace-focas/tfrecords_data





Great, we have now converted the dataset into TFRecord format! In the next step we will train a pretrained model for finding Syd. Let's jump into `1_training` notebook.