In [1]:
import os 
import io 
import logging 
import PIL
import hashlib
import numpy as np 
import pandas as pd 
import tensorflow as tf 
from skimage.measure import label, regionprops
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
from sklearn.model_selection import train_test_split
from __future__ import division


flags = tf.app.flags
flags.DEFINE_string('masks_csv', '', 'Path to the Mask CSV')
flags.DEFINE_string('train_proportion', '0.8', 'Proportion of dataset dedicated for training')
flags.DEFINE_string('image_directory', '', 'Input directory for raw images')
flags.DEFINE_string('output_pir', '', 'Output directory for TFRecords')
flags.DEFINE_string('label_map_path', '', 'Path to label map proto')
FLAGS = flags.FLAGS


def main():
    masks_csv = FLAGS.masks_csv
    train_proportion = FLAGS.train_proportion
    image_directory = FLAGS.image_directory
    train_output_path = os.path.join(FLAGS.output_path, 'train.record')
    val_output_path = os.path.join(FLAGS.output_path, 'validation.record')
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    masks_dict = create_masks_dict(masks_csv)
    image_ids = [key for key in masks_dict.keys()]
    train_ids, val_ids = train_test_split(image_ids, train_size=float(train_proportion), 
                                          random_state=0)
    train_masks_dict = {key: masks_dict[key] for key in train_ids}
    val_masks_dict = {key: masks_dict[key] for key in val_ids}

    create_tf_record(train_output_path, 
                     label_map_dict,
                     image_directory,
                     train_masks_dict) 

    create_tf_record(val_output_path, 
                     label_map_dict,
                     image_directory,
                     val_masks_dict)


def create_tf_record(output_path, 
                     label_map_dict,
                     image_directory, 
                     masks_dict):
    writer = tf.python_io.TFRecordWriter(output_path)
    for index, (image_id, masks) in enumerate(masks_dict.items()):
        if index % 1000 == 0:
            logging.info('On image {0} of {1}'.format(index, len(masks_dict)))

        try:
            class_names = ['ship'] * len(masks)
            tf_example = create_tf_example(image_id, masks, class_names, label_map_dict, 
                image_directory)
            writer.write(tf_example.SerializeToString())

        except ValueError:
            logging.error("Error while attempting to create a record for {}".format(image_id))

    writer.close()
    


def create_tf_example(file_name, 
                      masks,
                      class_names, 
                      label_map_dict, 
                      image_directory,
                      image_size=(768, 768)):

    height = image_size[0]
    width = image_size[1]
    xmins = []
    ymins = []
    xmaxs = []
    ymaxs = []
    encoded_masks = []

    # Read image
    img_path = os.path.join(image_directory, file_name)
    
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    
    key = hashlib.sha256(encoded_jpg).hexdigest()

    # Look up class id 
    class_ids = [label_map_dict[class_name] for class_name in class_names]

    # Encode class names into bytes
    class_names = [name.encode('utf8') for name in class_names]

    # Encode mask into png and get bounding box coordinates  
    for mask in masks:
        mask_array = convert_mask_rle_to_img_array(mask)
        encoded_mask = convert_img_array_to_png_str(mask_array)
        encoded_masks.append(encoded_mask)
        
        try:
            xmin, xmax, ymin, ymax = get_bbox_coordinates(mask_array)
            xmins.append(xmin / width)
            xmaxs.append(xmax / width)
            ymins.append(ymin / height)
            ymaxs.append(ymax / height)
        except ValueError:
            print("Error while attempting to create a record for {}".format(file_name))

    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(file_name.encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(file_name.encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(class_names),
        'image/object/class/label': dataset_util.int64_list_feature(class_ids),
        'image/object/mask': dataset_util.bytes_list_feature(encoded_masks),

    }
    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))    
    
    return example 


def create_masks_dict(csv_file):
    df = pd.read_csv(csv_file)
    df = df[df.EncodedPixels.notnull()]
    masks_dict = {}

    for row in df.itertuples():
        if row.ImageId not in masks_dict:
            masks_dict[row.ImageId] = [row.EncodedPixels]
        else:
            masks_dict[row.ImageId].append(row.EncodedPixels)

    return masks_dict



def convert_mask_rle_to_img_array(mask_rle, shape=(768, 768)):
    '''
    rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    
    return img.reshape(shape).T  # Needed to align to RLE direction


def convert_img_array_to_png_str(img_array):
    img = PIL.Image.fromarray(img_array)
    output = io.BytesIO()
    img.save(output, format='PNG')
    
    return output.getvalue()


def get_bbox_coordinates(mask):
    lbl = label(mask)
    props = regionprops(lbl)

    # Only keep masks that have bounding box area of greater than 1
    props = [prop for prop in props if prop.bbox_area > 1]
    if len(props) != 1:
        raise ValueError("The mask had {} regions".format(len(props)))
    else:
        prop = props[0]
        xmin = prop.bbox[0]
        xmax = prop.bbox[2]
        ymin = prop.bbox[1]
        ymax = prop.bbox[3]

        return xmin, xmax, ymin, ymax 

In [60]:
from pycocotools import mask, _mask

In [None]:
mask.toBbox

In [3]:
DATA_PATH = '/home/wentao/Development/data/ships_detection'
TRAIN_DIR = os.path.join(DATA_PATH, 'train')
TEST_DIR = os.path.join(DATA_PATH, 'test')
SAMPLE_CSV = os.path.join(DATA_PATH, 'sample_submission.csv')
MASKS_CSV = os.path.join(DATA_PATH, 'train_ship_segmentations.csv')
LABEL_MAP_PATH = '/home/wentao/Development/ml/models/research/object_detection/data/ship_detection.pbtxt'

In [4]:
masks_df = pd.read_csv(MASKS_CSV)

In [43]:
T1 = masks_df['EncodedPixels'].iloc[1]

In [44]:
T1 = map(int, T1.split())

In [45]:
T2 = list(T1)

In [56]:
test= [['2', '3', '1', '1']]

In [58]:
test= [[2, 3, 1, 1]]

In [62]:
test

[[2, 3, 1, 1]]

In [67]:
_mask.toBbox([[1, 2]])

TypeError: list indices must be integers or slices, not str

In [68]:
detection_binary_mask = np.array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
                                  [  0,   0,   0,   0,   0,   1,   1,   1,   0,   0],
                                  [  0,   0,   0,   0,   0,   1,   1,   1,   0,   0],
                                  [  0,   0,   0,   0,   0,   1,   1,   1,   0,   0],
                                  [  0,   0,   0,   0,   0,   1,   1,   1,   0,   0],
                                  [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
                                  [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
                                  [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
                                  [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0]], dtype=np.uint8)

In [70]:
fortran_detection_binary_mask = np.asfortranarray(detection_binary_mask)
encoded_detection = mask.encode(fortran_detection_binary_mask)

In [75]:
detection_binary_mask

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=uint8)

In [76]:
fortran_detection_binary_mask

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=uint8)

In [77]:
print(type(detection_binary_mask))
print(type(fortran_detection_binary_mask))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [71]:
encoded_detection

{'size': [9, 10], 'counts': b'^145000a0'}

In [80]:
mask.encode(detection_binary_mask)

ValueError: ndarray is not Fortran contiguous

In [3]:
train_proportion=0.8

In [4]:
masks_dict = create_masks_dict(MASKS_CSV)
image_ids = [key for key in masks_dict.keys()]
train_ids, val_ids = train_test_split(image_ids, train_size=float(train_proportion), 
                                      random_state=0)
train_masks_dict = {key: masks_dict[key] for key in train_ids}
val_masks_dict = {key: masks_dict[key] for key in val_ids}




In [5]:
output_dir = os.path.join(DATA_PATH, 'train.record')
label_map_dict = label_map_util.get_label_map_dict(LABEL_MAP_PATH)

In [None]:
create_tf_record(output_dir, label_map_dict, TRAIN_DIR, train_masks_dict)

In [6]:
val_output_dir = os.path.join(DATA_PATH, 'val.record')
label_map_dict = label_map_util.get_label_map_dict(LABEL_MAP_PATH)
create_tf_record(val_output_dir, label_map_dict, TRAIN_DIR, val_masks_dict)

Error while attempting to create a record for 492301277.jpg
Error while attempting to create a record for 8432013c3.jpg
Error while attempting to create a record for 8432013c3.jpg
Error while attempting to create a record for 907193929.jpg
Error while attempting to create a record for 64c84253b.jpg
Error while attempting to create a record for 89e350a24.jpg
Error while attempting to create a record for 7052088f4.jpg
Error while attempting to create a record for 783ee2725.jpg
Error while attempting to create a record for 16d28c367.jpg
Error while attempting to create a record for feacf6719.jpg
Error while attempting to create a record for feacf6719.jpg
Error while attempting to create a record for 60ca4f877.jpg
Error while attempting to create a record for 14a1efc07.jpg
Error while attempting to create a record for 9b4eefbd7.jpg
Error while attempting to create a record for 5a8785be5.jpg
Error while attempting to create a record for 9a39363b0.jpg
Error while attempting to create a recor

In [None]:
masks_df = pd.read_csv(MASKS_CSV)

In [None]:
masks_df[masks_df['ImageId'] == '1b117d4d5.jpg']

In [None]:
masks_dict = create_masks_dict(MASKS_CSV)

In [None]:
print(props)

In [None]:

def get_bbox_coordinates(mask):
    lbl = label(mask)
    props = regionprops(lbl)

    # Only keep masks that have bounding box area of greater than 1
    # props = [prop for prop in props if prop.bbox_area > 1]
    if len(props) != 1:
        import pdb; pdb.set_trace()
        raise ValueError("The mask had {} regions".format(len(props)))
    else:
        prop = props[0]
        xmin = prop.bbox[0]
        xmax = prop.bbox[2]
        ymin = prop.bbox[1]
        ymax = prop.bbox[3]

        return xmin, xmax, ymin, ymax 

In [None]:
for mask in masks_dict['1b117d4d5.jpg']:
    print(mask)
    mask_array = convert_mask_rle_to_img_array(mask)
    get_bbox_coordinates(mask_array)

In [None]:
mask_array = convert_mask_rle_to_img_array('66124 2 66894 1 67662 1 68430 1 69198 1 69966 1 70734 1 73036 2 73804 2 74572 2')
lbl = label(mask_array)
props = regionprops(lbl)
len(props)

In [None]:
for prop in props:
    print(prop.bbox_area)

In [None]:
for prop in props:
    print(prop.bbox[0], prop.bbox[2])
    print(prop.bbox[1], prop.bbox[3])
    print("---")

In [None]:
import matplotlib.pyplot as plt
from skimage.io import imread, imshow
%matplotlib inline

imshow(os.path.join(TRAIN_DIR, '1b117d4d5.jpg'))

In [None]:

imshow(mask_array)

In [None]:
lbl = label(mask_array)
props = regionprops(lbl)

In [None]:
print(props[0].bbox[0])
print(props[0].bbox[2])
print(props[0].bbox[1])
print(props[0].bbox[3])

In [None]:
print(props[1].bbox[0])
print(props[1].bbox[2])
print(props[1].bbox[1])
print(props[1].bbox[3])

In [None]:
props[1].bbox_area

In [None]:
props[0].bbox_area

In [None]:
ships_df = masks_df[masks_df['EncodedPixels'].notnull()]

In [None]:
for row in ships_df.itertuples():
    print(row.ImageId)
    break

In [None]:
masks_dict = {}
for index, row in enumerate(ships_df.itertuples()):
    if index % 10000 == 0:
        print("Processed {} rows".format(index))
        
    image_id = row.ImageId 
    encoded_pixels = row.EncodedPixels

    if image_id not in masks_dict.keys():
        masks_dict[image_id] = [encoded_pixels]
    else:
        masks_dict[image_id].append(encoded_pixels)

In [None]:
image_ids = [key for key in masks_dict.keys()]

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train, val = train_test_split(image_ids)

In [None]:
len(masks_dict.keys())

In [None]:
len(masks_dict['00021ddc3.jpg'])

In [None]:
['ship'.encode('utf8')] * 9

In [None]:
file_name = '00021ddc3.jpg'
masks = masks_dict['00021ddc3.jpg']
class_names = ['ship'] * 9
label_map_dict = label_map_util.get_label_map_dict(LABEL_MAP_PATH)
image_directory = TRAIN_DIR

In [None]:
example = create_tf_example(file_name, masks, class_names, label_map_dict, image_directory)

In [None]:
example

In [None]:
masks[0]