reference
1. https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md
2. https://www.kaggle.com/khanhlvg/cots-detection-w-tensorflow-object-detection-api/notebook#Import-dependencies

In [None]:
!git clone https://github.com/tensorflow/models
    
# Check out a certain commit to ensure that future changes in the TF ODT API codebase won't affect this notebook.
!cd models && git checkout ac8d06519

In [None]:
%%bash
cd models/research

# Compile protos.
protoc object_detection/protos/*.proto --python_out=.

# Install TensorFlow Object Detection API.
# Note: I fixed the version of some dependencies to make it work on Kaggle notebook. In particular:
# * scipy==1.6.3 to avoid the missing GLIBCXX_3.4.26 error
# * tensorflow to 2.6.0 to make it compatible with the CUDA version preinstalled on Kaggle.
# When Kaggle notebook upgrade to TF 2.7, you can use the default setup.py script:
# cp object_detection/packages/tf2/setup.py .
wget https://storage.googleapis.com/odml-dataset/others/setup.py
pip install -q --user .

# Test if the Object Dectection API is working correctly
python object_detection/builders/model_builder_tf2_test.py

In [None]:
# ## Revert some changes in the pyparsing library - fingers crossed
with open("/root/.local/lib/python3.7/site-packages/httplib2/auth.py", 'r') as f:
    text = f.read()

text = text.replace("pp.downcaseTokens", "pp.pyparsing_common.downcase_tokens")

with open("/root/.local/lib/python3.7/site-packages/httplib2/auth.py", 'w') as f:
    f.write(text)

In [None]:
import contextlib2
import io
import IPython
import json
import numpy as np
import os
import pathlib
import pandas as pd
import sys
import tensorflow as tf
import time

from PIL import Image, ImageDraw

# Import the library that is used to submit the prediction result.
INPUT_DIR = '/kaggle/input/tensorflow-great-barrier-reef/'
sys.path.insert(0, INPUT_DIR)
import greatbarrierreef

In [None]:
# The notebook is supposed to run with TF 2.6.0
print(tf.__version__)
print(tf.test.is_gpu_available())
print(tf.config.list_physical_devices('GPU'))

In [None]:
from object_detection.utils import dataset_util
from object_detection.dataset_tools import tf_record_creation_util

In [None]:
class CONFIG:
    full_df_fp = "../input/cots-train-test-split/train_df.csv"

In [None]:
full_df = pd.read_csv(CONFIG.full_df_fp, index_col=0)
full_df.head(2)

In [None]:
valid_df = full_df[full_df['k_fold']==0]
train_df = full_df[full_df['k_fold']!=0]
print("Validation df size: ", len(valid_df))
print("Training df size: ", len(train_df))

## Creating TF records
Following the official documentation, we will need to create examples based on the following format. Then use TF writer to write the examples into TFRecords. Based on previous experience, we will need to have a reasonable number of TFRecords in order to train on TPU eg > 16 in multiples of 8


For each example, we will need the following\
'image/height': dataset_util.int64_feature(height),\
'image/width': dataset_util.int64_feature(width),\
'image/filename': dataset_util.bytes_feature(filename),\
'image/source_id': dataset_util.bytes_feature(filename),\
'image/encoded': dataset_util.bytes_feature(encoded_jpg),\
'image/format': dataset_util.bytes_feature(image_format),\
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),\
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),\
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),\
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),\
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),\
'image/object/class/label': dataset_util.int64_list_feature(classes)

In [None]:
def create_tf_example(video_id, video_frame, annotations, image_path):
    '''Create  a single tf example'''
    with tf.io.gfile.GFile(image_path ,'rb') as bfile:
        encoded_img = bfile.read()
        
    #reduce reading from disk
    encoded_jpg_io = io.BytesIO(encoded_img)
    
    pil_image = Image.open(encoded_jpg_io)
    height = pil_image.height
    width = pil_image.width
    filename = f"{video_id}:{video_frame}".encode('utf8')
    image_format='jpeg'.encode('utf8')
    
    annotations = json.loads(annotations.replace("'", '"'))
    
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    texts = []
    labels = []
    
    for annotation in annotations:
        xmins.append(annotation['x']/ width)
        xmaxs.append((annotation['x']+annotation['width'])/width) # normailzed x 
        ymins.append(annotation['y']/height)
        ymaxs.append((annotation['y']+annotation['height'])/height) #normalized y
        texts.append("COTS".encode('utf8'))
        labels.append(1)
    
    # 1 tf example will contain 1 image
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height), #type tf.train.Feature
        'image/width' : dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_img),
        'image/format' : dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(texts),
        'image/object/class/label': dataset_util.int64_list_feature(labels)
        
    }))
    
    return tf_example
    
def convert_to_tfrecord(data_df, tfrecord_filebase, num_shards):
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(tf_record_close_stack,
                                                                                 tfrecord_filebase,
                                                                                 num_shards)
        for index, row in data_df.iterrows():
            if (index+1) % 500 == 0:
                print(f"Processed {index} images")
            
            tf_example = create_tf_example(row['video_id'], row['video_frame'],
                                           row['annotations'] ,row['filepath'] )
            
            output_shard_index = index % num_shards
            output_tfrecords[output_shard_index].write(tf_example.SerializeToString())
            
        print('Completed processing {0} images.'.format(len(data_df)))


In [None]:
os.makedirs("dataset", exist_ok=True)

## Creating TFRecords

In [None]:
print('Converting TRAIN images...')
convert_to_tfrecord(
  train_df,
  'dataset/cots_train',
  num_shards = 32)

In [None]:
print('Converting validation images...')
convert_to_tfrecord(
  valid_df,
  'dataset/cots_valid',
  num_shards = 8
)

In [None]:

label_map_str = """item {
  id: 1
  name: 'COTS'
}"""

with open('dataset/label_map.pbtxt', 'w') as f:
    f.write(label_map_str)

!more dataset/label_map.pbtxt

## Zipping the TFRecords

In [None]:
!zip -r COTS_tfrecords.zip ./dataset