In [2]:
import tensorflow as tf
import numpy as np
import os
import scipy.io as spio
from matplotlib import pyplot as plt
from imageio import imread

#### Obs: If you only have one of the datasets (does not matter which one), just run all the notebook's cells and it will work just fine.

In [3]:
# define base paths for pascal the original VOC dataset training images
base_dataset_dir_voc = '/Users/udaysawhney/Desktop/segmentation/deeplab_v3-master/dataset/VOCdevkit/VOC2012/'
images_folder_name_voc = "JPEGImages/"
annotations_folder_name_voc = "Annotations/"
images_dir_voc = os.path.join(base_dataset_dir_voc, images_folder_name_voc)
print(images_dir_voc)
annotations_dir_voc = os.path.join(base_dataset_dir_voc, annotations_folder_name_voc)

/Users/udaysawhney/Desktop/segmentation/deeplab_v3-master/dataset/VOCdevkit/VOC2012/JPEGImages/


In [4]:
# define base paths for pascal augmented VOC images
# download: http://home.bharathh.info/pubs/codes/SBD/download.html
base_dataset_dir_aug_voc = '<pascal/augmented/VOC/images/path>/benchmark_RELEASE/dataset'
images_folder_name_aug_voc = "img/"
annotations_folder_name_aug_voc = "cls/"
images_dir_aug_voc = os.path.join(base_dataset_dir_aug_voc, images_folder_name_aug_voc)
annotations_dir_aug_voc = os.path.join(base_dataset_dir_aug_voc, annotations_folder_name_aug_voc)

In [5]:
def get_files_list(base_dataset_dir, images_folder_name, annotations_folder_name, filename):
    images_dir = os.path.join(base_dataset_dir, images_folder_name)
    annotations_dir = os.path.join(base_dataset_dir, annotations_folder_name)

    file = open(filename, 'r')
    images_filename_list = [line for line in file]
    return images_filename_list

In [6]:
images_filename_list = get_files_list(base_dataset_dir_aug_voc, images_folder_name_aug_voc, annotations_folder_name_aug_voc, "custom_train.txt")
print("Total number of training images:", len(images_filename_list))

Total number of training images: 8252


In [7]:
# shuffle array and separate 10% to validation
np.random.shuffle(images_filename_list)
val_images_filename_list = images_filename_list[:int(0.10*len(images_filename_list))]
train_images_filename_list = images_filename_list[int(0.10*len(images_filename_list)):]

In [8]:
print("train set size:", len(train_images_filename_list))
print("val set size:", len(val_images_filename_list))

train set size: 7427
val set size: 825


In [9]:
TRAIN_DATASET_DIR="./tfrecords/"
if not os.path.exists(TRAIN_DATASET_DIR):
    os.mkdir(TRAIN_DATASET_DIR)
    
TRAIN_FILE = 'train.tfrecords'
VALIDATION_FILE = 'validation.tfrecords'
train_writer = tf.python_io.TFRecordWriter(os.path.join(TRAIN_DATASET_DIR,TRAIN_FILE))
val_writer = tf.python_io.TFRecordWriter(os.path.join(TRAIN_DATASET_DIR,VALIDATION_FILE))

In [10]:
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [11]:
def read_annotation_from_mat_file(annotations_dir, image_name):
    annotations_path = os.path.join(annotations_dir, (image_name.strip() + ".xml"))
    mat = spio.loadmat(annotations_path)
    img = mat['GTcls']['Segmentation'][0][0]
    return img

In [12]:
for i, image_name in enumerate(train_images_filename_list):
    image_np = imread(os.path.join(images_dir_voc, image_name.strip() + ".jpg"))

In [15]:
def create_tfrecord_dataset(filename_list, writer):

    # create training tfrecord
    read_imgs_counter = 0
    for i, image_name in enumerate(filename_list):
        image_name = str(image_name)

        try:
            image_np = imread(os.path.join(images_dir_aug_voc, image_name.strip() + ".jpg"))
        except FileNotFoundError:
            try:
                # read from Pascal VOC path
                image_np = imread(os.path.join(images_dir_voc, image_name.strip() + ".jpg"))
            except FileNotFoundError:
                print(os.path.join(images_dir_voc, image_name.strip() + ".jpg"))
                print("File:",image_name.strip(),"not found.")
                continue
        try:
            annotation_np = read_annotation_from_mat_file(annotations_dir_aug_voc, image_name)
        except FileNotFoundError:
            # read from Pascal VOC path
            try:
                annotation_np = imread(os.path.join(annotations_dir_voc, image_name.strip() + ".jpg"))
            except FileNotFoundError:
                print("File:",image_name.strip(),"not found.")
                continue
            
        read_imgs_counter += 1
        image_h = image_np.shape[0]
        image_w = image_np.shape[1]

        img_raw = image_np.tostring()
        annotation_raw = annotation_np.tostring()

        example = tf.train.Example(features=tf.train.Features(feature={
                'height': _int64_feature(image_h),
                'width': _int64_feature(image_w),
                'image_raw': _bytes_feature(img_raw),
                'annotation_raw': _bytes_feature(annotation_raw)}))

        writer.write(example.SerializeToString())
    
    print("End of TfRecord. Total of image written:", read_imgs_counter)
    writer.close()

In [16]:
# create training dataset
create_tfrecord_dataset(train_images_filename_list, train_writer)

ValueError: Could not find a format to read the specified file in mode 'i'

In [27]:
# create validation dataset
create_tfrecord_dataset(val_images_filename_list, val_writer)

File: 2009_000303 not found.
File: 2008_005599 not found.
File: 2009_002685 not found.
File: 2008_006753 not found.
File: 2010_002856 not found.
File: 2011_002988 not found.
File: 2007_005227 not found.
File: 2009_004953 not found.
File: 2010_004874 not found.
File: 2008_001876 not found.
File: 2008_005379 not found.
File: 2008_006747 not found.
File: 2009_004374 not found.
File: 2008_007525 not found.
File: 2010_001044 not found.
File: 2010_003139 not found.
File: 2011_002571 not found.
File: 2007_001698 not found.
File: 2010_005937 not found.
File: 2010_000689 not found.
File: 2010_002552 not found.
File: 2009_001500 not found.
File: 2008_005354 not found.
File: 2009_001413 not found.
File: 2007_003000 not found.
File: 2009_003636 not found.
File: 2010_001625 not found.
File: 2008_002177 not found.
File: 2010_000485 not found.
File: 2008_005541 not found.
File: 2010_001277 not found.
File: 2008_003675 not found.
File: 2010_003672 not found.
File: 2011_001949 not found.
File: 2008_008

File: 2009_003039 not found.
File: 2008_008057 not found.
File: 2009_000157 not found.
File: 2010_004597 not found.
File: 2008_000142 not found.
File: 2009_000337 not found.
File: 2011_002236 not found.
File: 2008_007947 not found.
File: 2011_002598 not found.
File: 2008_007054 not found.
File: 2009_002465 not found.
File: 2010_000613 not found.
File: 2008_005685 not found.
File: 2008_005634 not found.
File: 2008_007287 not found.
File: 2009_001085 not found.
File: 2008_001257 not found.
File: 2008_008180 not found.
File: 2008_003534 not found.
File: 2008_006625 not found.
File: 2010_003801 not found.
File: 2011_001399 not found.
File: 2008_007599 not found.
File: 2008_004427 not found.
File: 2010_001119 not found.
File: 2009_004587 not found.
File: 2008_002562 not found.
File: 2011_003151 not found.
File: 2008_005309 not found.
File: 2008_006969 not found.
File: 2008_006808 not found.
File: 2010_004445 not found.
File: 2011_000765 not found.
File: 2010_002456 not found.
File: 2008_006

File: 2008_001691 not found.
File: 2008_003745 not found.
File: 2009_002519 not found.
File: 2010_003887 not found.
File: 2010_003651 not found.
File: 2009_003168 not found.
File: 2009_003668 not found.
File: 2011_001885 not found.
File: 2011_002291 not found.
File: 2008_003151 not found.
File: 2008_006211 not found.
File: 2008_005362 not found.
File: 2010_002457 not found.
File: 2011_002039 not found.
File: 2009_001395 not found.
File: 2010_000386 not found.
File: 2010_002185 not found.
File: 2007_004500 not found.
File: 2009_004096 not found.
File: 2008_006750 not found.
File: 2008_002930 not found.
File: 2009_003351 not found.
File: 2009_001635 not found.
File: 2008_005677 not found.
File: 2008_002961 not found.
File: 2010_001689 not found.
File: 2009_000486 not found.
File: 2008_003417 not found.
File: 2008_000723 not found.
File: 2008_004110 not found.
File: 2008_003449 not found.
File: 2010_004717 not found.
File: 2009_002879 not found.
File: 2008_005136 not found.
File: 2009_000