Training a custom object detector on the Taco dataset

In [13]:
import os
import io

In [14]:
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util, label_map_util

In [3]:
def bbox_taco_to_normal(taco_bbox):
  """
  Convert a bbox from Taco format (xmin, ymin, width, height)
  to normal format (xmin, ymin, xmax, ymax)
  """
  return [taco_bbox[0], taco_bbox[1],
          taco_bbox[0]+taco_bbox[2], taco_bbox[1]+taco_bbox[3]]

### 1. Creating TF records

Create a Pandas Dataframe with columns 'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax' that  contains the annotations for all images

Load the annotations from the dataset

In [18]:
import json

In [19]:
taco_path = "/home/tensorflow/waste/datasets/taco_dataset/"

In [20]:
cd $taco_path

/home/tensorflow/waste/datasets/taco_dataset


In [21]:
taco_ann_raw = json.load(open('annotations.json', 'r'))

Create a mapping between category id and category label

In [22]:
categories_dict = {}
for cat_desc in taco_ann_raw['categories']:
    categories_dict[cat_desc['id']+1] = cat_desc['name']

We can have multiple annotations for the same image, if it contains multiple boxes

In [23]:
image_annotations = {}
for img_ann in taco_ann_raw['annotations'][:min(1000, len(taco_ann_raw['annotations']))]:
    img_id = img_ann['image_id']
    img_category = img_ann['category_id']+1
    img_bbox = {'box': bbox_taco_to_normal(img_ann['bbox']),
                'category': img_category}
    if img_id in image_annotations:
        image_annotations[img_id]['bboxes'].append(img_bbox)
    else:
        img_desc = taco_ann_raw['images'][img_id]
        img_height = img_desc['height']
        img_width = img_desc['width']
        img_link = img_desc['flickr_url']
        img_name = img_link.split('/')[-1]
        # check if this is a train or test image
        img_purpose = None
        if os.path.exists(os.path.join(taco_path, 'train', img_name)):
            img_purpose = 'train'
        else:
            img_purpose = 'test'
        image_annotations[img_id] = {"filename":img_name,
                                     "purpose": img_purpose,
                                    "width":img_width,
                                    "height":img_height,
                                    "bboxes": [img_bbox]
                                    }

In [24]:
image_annotations[300]

{'filename': 'y5A3l6LUzhWEkRac2NfxIYAlhFnA8HSzDl8nyqsZ.jpeg',
 'purpose': 'test',
 'width': 2988,
 'height': 5312,
 'bboxes': [{'box': [1483.0, 2475.0, 1867.0, 2909.0], 'category': 6},
  {'box': [1484.0, 2835.0, 1558.0, 2908.0], 'category': 8}]}

For each image annotation (with multiple boxes) create a TF example.
This must contain:
* filename
* encoded image
* image format (JPG/PNG)
* bboxes
* category labels

In [2]:
writer_train = tf.io.TFRecordWriter(os.path.join(taco_path, 'taco_dataset_train.record'))
writer_test = tf.io.TFRecordWriter(os.path.join(taco_path, 'taco_dataset_test.record'))
for ann in image_annotations.values():
    # Process the image
    img_purpose = ann['purpose']
    with tf.io.gfile.GFile(os.path.join(taco_path, img_purpose, 'small', ann['filename']), 'rb') as fid:
        img_data = fid.read()
    #img_data_io = io.BytesIO(img_data)
    #image = Image.open(img_data_io)
    #width, height = image.size
    width = ann['width']
    height = ann['height']
    # Prepare the rest of the features
    filename = ann['filename'].encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = [] # This will be the class label
    classes = [] # This will be the integer value of the class
    # Process the bboxes
    for bbox in ann['bboxes']:
        classes.append(bbox['category'])
        classes_text.append(categories_dict[bbox['category']].encode('utf8'))
        xmin, ymin, xmax, ymax = bbox['box']
        xmins.append(xmin/width)
        xmaxs.append(xmax/width)
        ymins.append(ymin/height)
        ymaxs.append(ymax/height)
    # Create the TF record entry
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(img_data),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    if img_purpose == 'train':
        writer_train.write(tf_example.SerializeToString())
    else:
        writer_test.write(tf_example.SerializeToString())
writer_train.close()
writer_test.close()

NameError: name 'tf' is not defined

Now we have training and test records.

Create the label map

In [27]:
with open(os.path.join(taco_path, 'label_map.pbtxt'), 'w') as lbl_map:
    for cat_id, cat_label in categories_dict.items():
        lbl_map.write(f"item {{\n\tid: {cat_id}\n\tname: '{cat_label}'\n}}\n")

### 2. Training the model

We are starting from the pre-trained SSD MobileNetv2

In [34]:
cd ~/waste/work/

/home/tensorflow/waste/work


In [6]:
cp /home/tensorflow/models/research/object_detection/model_main_tf2.py .

In [35]:
!python model_main_tf2.py --model_dir=../models/ssd_mobilenet_v2__taco --pipeline_config_path=../models/ssd_mobilenet_v2__taco/pipeline.config

W0623 09:37:01.953161 140561471125312 cross_device_ops.py:1387] There are non-GPU devices in `tf.distribute.Strategy`, not using nccl allreduce.
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
I0623 09:37:01.957745 140561471125312 mirrored_strategy.py:369] Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
INFO:tensorflow:Maybe overwriting train_steps: None
I0623 09:37:01.962418 140561471125312 config_util.py:552] Maybe overwriting train_steps: None
INFO:tensorflow:Maybe overwriting use_bfloat16: False
I0623 09:37:01.962543 140561471125312 config_util.py:552] Maybe overwriting use_bfloat16: False
Instructions for updating:
rename to distribute_datasets_from_function
W0623 09:37:02.022024 140561471125312 deprecation.py:345] From /home/tensorflow/.local/lib/python3.6/site-packages/object_detection/model_lib_v2.py:564: StrategyBase.experimental_distribute_datasets_from_function (from tensorflow.python

INFO:tensorflow:Step 300 per-step time 5.279s
I0623 10:04:28.608558 140561471125312 model_lib_v2.py:707] Step 300 per-step time 5.279s
INFO:tensorflow:{'Loss/classification_loss': 0.53930664,
 'Loss/localization_loss': 0.24246801,
 'Loss/regularization_loss': 0.1531422,
 'Loss/total_loss': 0.93491685,
 'learning_rate': 0.0426662}
I0623 10:04:28.608823 140561471125312 model_lib_v2.py:708] {'Loss/classification_loss': 0.53930664,
 'Loss/localization_loss': 0.24246801,
 'Loss/regularization_loss': 0.1531422,
 'Loss/total_loss': 0.93491685,
 'learning_rate': 0.0426662}
INFO:tensorflow:Step 400 per-step time 5.258s
I0623 10:13:14.363360 140561471125312 model_lib_v2.py:707] Step 400 per-step time 5.258s
INFO:tensorflow:{'Loss/classification_loss': 0.51535994,
 'Loss/localization_loss': 0.2529274,
 'Loss/regularization_loss': 0.1531298,
 'Loss/total_loss': 0.9214171,
 'learning_rate': 0.047999598}
I0623 10:13:14.363627 140561471125312 model_lib_v2.py:708] {'Loss/classification_loss': 0.515359

INFO:tensorflow:Step 1800 per-step time 5.188s
I0623 12:17:57.983545 140561471125312 model_lib_v2.py:707] Step 1800 per-step time 5.188s
INFO:tensorflow:{'Loss/classification_loss': 0.13503303,
 'Loss/localization_loss': 0.060196254,
 'Loss/regularization_loss': 0.14863648,
 'Loss/total_loss': 0.34386575,
 'learning_rate': 0.0799474}
I0623 12:17:57.983904 140561471125312 model_lib_v2.py:708] {'Loss/classification_loss': 0.13503303,
 'Loss/localization_loss': 0.060196254,
 'Loss/regularization_loss': 0.14863648,
 'Loss/total_loss': 0.34386575,
 'learning_rate': 0.0799474}
INFO:tensorflow:Step 1900 per-step time 5.371s
I0623 12:26:55.117759 140561471125312 model_lib_v2.py:707] Step 1900 per-step time 5.371s
INFO:tensorflow:{'Loss/classification_loss': 0.13923706,
 'Loss/localization_loss': 0.07751569,
 'Loss/regularization_loss': 0.1480144,
 'Loss/total_loss': 0.36476713,
 'learning_rate': 0.07993342}
I0623 12:26:55.118026 140561471125312 model_lib_v2.py:708] {'Loss/classification_loss':

INFO:tensorflow:Step 3300 per-step time 5.442s
I0623 14:26:57.401328 140561471125312 model_lib_v2.py:707] Step 3300 per-step time 5.442s
INFO:tensorflow:{'Loss/classification_loss': 0.10590132,
 'Loss/localization_loss': 0.048713904,
 'Loss/regularization_loss': 0.13891402,
 'Loss/total_loss': 0.29352924,
 'learning_rate': 0.07956588}
I0623 14:26:57.401593 140561471125312 model_lib_v2.py:708] {'Loss/classification_loss': 0.10590132,
 'Loss/localization_loss': 0.048713904,
 'Loss/regularization_loss': 0.13891402,
 'Loss/total_loss': 0.29352924,
 'learning_rate': 0.07956588}
INFO:tensorflow:Step 3400 per-step time 5.453s
I0623 14:36:02.724116 140561471125312 model_lib_v2.py:707] Step 3400 per-step time 5.453s
INFO:tensorflow:{'Loss/classification_loss': 0.11729822,
 'Loss/localization_loss': 0.043495692,
 'Loss/regularization_loss': 0.13824984,
 'Loss/total_loss': 0.29904374,
 'learning_rate': 0.079527386}
I0623 14:36:02.724492 140561471125312 model_lib_v2.py:708] {'Loss/classification_l

### 3. Exporting the trained model

In [3]:
cp /home/tensorflow/models/research/object_detection/exporter_main_v2.py .

In [7]:
pwd

'/home/tensorflow/waste/work'

In [8]:
!python exporter_main_v2.py --input_type image_tensor --pipeline_config_path ../models/ssd_mobilenet_v2__taco/pipeline.config --trained_checkpoint_dir ../models/ssd_mobilenet_v2__taco/ --output_directory ../models/trained

Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.map_fn(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.map_fn(fn, elems))
W0624 12:20:05.140064 139780628993856 deprecation.py:616] From /home/tensorflow/.local/lib/python3.6/site-packages/tensorflow/python/autograph/impl/api.py:463: calling map_fn_v2 (from tensorflow.python.ops.map_fn) with back_prop=False is deprecated and will be removed in a future version.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.map_fn(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.map_fn(fn, elems))
W0624 12:20:26.325408 139780628993856 save_impl.py:72] Skipping full serialization of Keras layer <object_detection.meta_architectures.ssd_meta_arch.SSDMetaArch object at 0x7f20ac779320>, because it is not built.
W0624 12:20:26.456480 139

W0624 12:20:45.293895 139780628993856 save.py:254] Found untraced functions such as WeightSharedConvolutionalBoxPredictor_layer_call_fn, WeightSharedConvolutionalBoxPredictor_layer_call_and_return_conditional_losses, WeightSharedConvolutionalBoxHead_layer_call_fn, WeightSharedConvolutionalBoxHead_layer_call_and_return_conditional_losses, WeightSharedConvolutionalBoxPredictor_layer_call_fn while saving (showing 5 of 260). These functions will not be directly callable after loading.
INFO:tensorflow:Assets written to: ../models/trained/saved_model/assets
I0624 12:20:48.767588 139780628993856 builder_impl.py:781] Assets written to: ../models/trained/saved_model/assets
INFO:tensorflow:Writing pipeline config file to ../models/trained/pipeline.config
I0624 12:20:49.403180 139780628993856 config_util.py:254] Writing pipeline config file to ../models/trained/pipeline.config


Export for TFLite

!pip install tflite_support

In [11]:
!python /home/tensorflow/models/research/object_detection/export_tflite_graph_tf2.py --trained_checkpoint_dir '../models/ssd_mobilenet_v2__taco/' --output_directory '../models/tflite/' --pipeline_config_path '../models/ssd_mobilenet_v2__taco/pipeline.config'

W0627 09:20:29.119112 140472835090240 save_impl.py:72] Skipping full serialization of Keras layer <object_detection.meta_architectures.ssd_meta_arch.SSDMetaArch object at 0x7fc1e8368d30>, because it is not built.
W0627 09:20:29.262025 140472835090240 save_impl.py:72] Skipping full serialization of Keras layer <keras.layers.convolutional.SeparableConv2D object at 0x7fc1e8238208>, because it is not built.
W0627 09:20:29.262196 140472835090240 save_impl.py:72] Skipping full serialization of Keras layer <object_detection.core.freezable_batch_norm.FreezableBatchNorm object at 0x7fc1e01c8240>, because it is not built.
W0627 09:20:29.262248 140472835090240 save_impl.py:72] Skipping full serialization of Keras layer <keras.layers.core.Lambda object at 0x7fc1e01c8d68>, because it is not built.
W0627 09:20:29.262292 140472835090240 save_impl.py:72] Skipping full serialization of Keras layer <keras.layers.convolutional.SeparableConv2D object at 0x7fc1e01c8860>, because it is not built.
W0627 09:2

W0627 09:20:43.172046 140472835090240 save.py:254] Found untraced functions such as WeightSharedConvolutionalBoxPredictor_layer_call_and_return_conditional_losses, WeightSharedConvolutionalBoxPredictor_layer_call_fn, WeightSharedConvolutionalBoxHead_layer_call_and_return_conditional_losses, WeightSharedConvolutionalBoxHead_layer_call_fn, WeightSharedConvolutionalBoxPredictor_layer_call_fn while saving (showing 5 of 260). These functions will not be directly callable after loading.
INFO:tensorflow:Assets written to: ../models/tflite/saved_model/assets
I0627 09:20:46.072693 140472835090240 builder_impl.py:781] Assets written to: ../models/tflite/saved_model/assets


In [15]:
converter = tf.lite.TFLiteConverter.from_saved_model('../models/tflite/saved_model/')
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open('../models/tflite/ssd_taco.tflite', 'wb') as f:
    f.write(tflite_model)