In [36]:
"""
Baseline for CIL project on road segmentation.
This simple baseline consits of a CNN with two convolutional+pooling layers with a soft-max loss
"""

import gzip
import os
import sys
import urllib
import matplotlib.image as mpimg
from PIL import Image
from random import shuffle
import time
import math
import code

import tensorflow.python.platform

import numpy as np
import tensorflow as tf

import cil_helper as cil

from skimage.feature import greycomatrix, greycoprops

In [37]:
try:
    tf.app.flags.DEFINE_string('log_dir', '/tmp/tensorflow_lukas_balz',
                           """Directory where to write event logs """
                           """and checkpoint.""")
except: 
    print(tf.app.flags.FLAGS.log_dir)
    
FLAGS = tf.app.flags.FLAGS

/tmp/tensorflow_lukas_balz


In [38]:
TRAINING_SIZE = 20 # 100
TEST_SIZE = 50 # 50
SEED = 66478  # Set to None for random seed.
BATCH_SIZE = 32 # 64
NUM_EPOCHS = 20
RESTORE_MODEL = False # If True, restore existing model instead of training a new one
RECORDING_STEP = 200
ERROR_STEP = 500

# Set image patch size in pixels
# IMG_PATCH_SIZE should be a multiple of 4
# image size should be an integer multiple of this number!
IMG_PATCH_SIZE = 8
NUM_CHANNELS = 6
SAMPLING_STEP = 8 #math.ceil(math.sqrt(IMG_PATCH_SIZE))
NUM_LABELS = 2

# all prediction size
ALL_PER = 100

In [39]:
data_dir = 'training/'
train_data_filename = data_dir + 'images/'
train_labels_filename = data_dir + 'groundtruth/' 
test_dir = "test/"

In [40]:
def img_crop(im, size):
    list_patches = []
    imgwidth = im.shape[0]
    imgheight = im.shape[1]
    is_2d = len(im.shape) < 3
    step = SAMPLING_STEP
    for i in range(0,imgheight,step):
        if i+size <=imgheight:
            for j in range(0,imgwidth,step):
                if j+size <=imgwidth:
                    if is_2d:
                        im_patch = im[j:j+size, i:i+size]
                    else:
                        im_patch = im[j:j+size, i:i+size, :]
                    list_patches.append(im_patch)
                    list_patches.append(np.rot90(im_patch))
                    list_patches.append(np.fliplr(im_patch))
    return list_patches

In [41]:
# Extract label images
def extract_labels(filename, num_images):
    """Extract the labels into a 1-hot matrix [image index, label index]."""
    gt_imgs = []
    for i in range(1, num_images+1):
        imageid = "satImage_%.3d" % i
        image_filename = filename + imageid + ".png"
        if os.path.isfile(image_filename):
            #print ('Loading ' + image_filename)
            img = mpimg.imread(image_filename)
            gt_imgs.append(img)
        else:
            print ('File ' + image_filename + ' does not exist')

    num_images = len(gt_imgs)
    gt_patches = [img_crop(gt_imgs[i], IMG_PATCH_SIZE) for i in range(num_images)]
    data = np.asarray([gt_patches[i][j] for i in range(len(gt_patches)) for j in range(len(gt_patches[i]))])
    labels = np.asarray([value_to_class(data[i]) for i in range(len(data))])

    # Convert to dense 1-hot representation.
    return labels.astype(np.float32)

In [42]:
def make_patches(imgs, TRAINING_SIZE):
    num_images = TRAINING_SIZE

    img_patches = [img_crop(imgs[i], IMG_PATCH_SIZE) for i in range(num_images)]
    data = [img_patches[i][j] for i in range(len(img_patches)) for j in range(len(img_patches[i]))]

    return np.asarray(data)

In [43]:
# Assign a label to a patch v
def value_to_class(v):
    df = np.sum(np.mean(v))
    if df < foreground_threshold:
       return [0,1]
    else:
       return [1,0]
    
    
    size = v.shape[0]
    step = int(size/2)
    labels = []
    for i in range(0,size,step):
            for j in range(0,size,step):
                labels.append(get_label(np.mean(v[j:j+step, i:i+step])))
    return labels

def get_label(v):
    foreground_threshold = 0.25 # percentage of pixels > 1 required to assign a foreground label to a patch
    df = np.sum(v)
    return df
    if df < foreground_threshold:
       return 0
    else:
       return 1

In [44]:
def load_preproc(save_dir):
    assert save_dir.endswith('/')
    return np.load(save_dir + 'preprocessed2.npy')

In [45]:
train_preproc = load_preproc(train_data_filename)
train_data = make_patches(train_preproc, TRAINING_SIZE)

train_labels = extract_labels(train_labels_filename, TRAINING_SIZE)

In [46]:
print(train_data.shape)
#NUM_CHANNELS = 3
#train_data = train_data[:,:,:,0:NUM_CHANNELS]
#print(train_data.shape)
print(train_labels.shape)

(144060, 16, 16, 6)
(144060, 4)


In [47]:
c0 = 0
c1 = 0
for i in range(len(train_labels)):
    if np.mean(train_labels[i]) > 0.5:
        c0 = c0 + 1 # foreground -> road
    else:
        c1 = c1 + 1 # background
print ('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1))

Number of data points per class: c0 = 24834 c1 = 119226


In [48]:
sampling = False

In [49]:
print ('Balancing training data...')
min_c = min(c0, c1)
max_c = max(c0, c1)
idx0 = [i for i, j in enumerate(train_labels) if np.mean(j) > 0.5]
idx1 = [i for i, j in enumerate(train_labels) if np.mean(j) <= 0]

if sampling: #sample smaller class
    if c0 < c1:
        while len(idx0) < c1:
            print(len(idx0))
            idx0.extend(idx0)
            print(len(idx0))
    else:
        while len(idx1)<c0:
            idx1.extend(idx1)
    lim = max_c
else: 
    lim = min_c
    
new_indices = idx0[0:lim] + idx1[0:lim]
train_data = train_data[new_indices,:,:,:]
train_labels = train_labels[new_indices]
    
print(len(new_indices))
print(train_data.shape)

Balancing training data...
49668
(49668, 16, 16, 6)


In [50]:
train_size = train_labels.shape[0]

In [51]:
# This is where training samples and labels are fed to the graph.
# These placeholder nodes will be fed a batch of training data at each
# training step using the {feed_dict} argument to the Run() call below.
train_data_node = tf.placeholder(
    tf.float32,
    shape=(BATCH_SIZE, IMG_PATCH_SIZE, IMG_PATCH_SIZE, NUM_CHANNELS),
    name='train_data')
train_labels_node = tf.placeholder(
    tf.float32,
    shape=(BATCH_SIZE, NUM_LABELS),
    name='train_labels')
train_all_data_node = tf.constant(train_data)

In [52]:
# The variables below hold all the trainable weights. They are passed an
# initial value which will be assigned when when we call:
# {tf.initialize_all_variables().run()}
conv1_1_weights = cil.weight_variable([4, 4, NUM_CHANNELS, 20])
conv1_1_biases = cil.bias_variable([20])
conv1_2_weights = cil.weight_variable([4, 4, 20, 20])
conv1_2_biases = cil.bias_variable([20])

conv2_weights = cil.weight_variable([4, 4, 20, 40])
conv2_biases = cil.bias_variable([40])

conv3_weights = cil.weight_variable([4, 4, 40, 80])
conv3_biases = cil.bias_variable([80])

conv4_size = 256
conv4_weights = cil.weight_variable([2, 2, 160, conv4_size])
conv4_biases = cil.bias_variable([conv4_size])

fc1_weights = tf.Variable(  # fully connected, depth 512.
    tf.truncated_normal([640 , 512],
                        stddev=0.1,
                        seed=SEED))
fc1_biases = tf.Variable(tf.constant(0.1, shape=[512]))
fc2_weights = tf.Variable(
    tf.truncated_normal([512, NUM_LABELS],
                        stddev=0.1,
                        seed=SEED))
fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS]))

In [53]:
# We will replicate the model structure for the training subgraph, as well
# as the evaluation subgraphs, while sharing the trainable parameters.
def model(data, train=False):
    """The Model definition."""
    conv1_1 = cil.conv2d(data, conv1_1_weights)
    relu1_1 = tf.nn.relu(tf.nn.bias_add(conv1_1, conv1_1_biases))
    #conv1_2 = cil.conv2d(relu1_1, conv1_2_weights)
    #relu1_2 = tf.nn.relu(tf.nn.bias_add(conv1_2, conv1_2_biases))
    
    pool1 = cil.max_pool_2x2(relu1_1)

    conv2 = cil.conv2d(pool1, conv2_weights)
    relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
    pool2 = cil.max_pool_2x2(relu2)
    
    conv3 = cil.conv2d(pool2, conv3_weights)
    relu3 = tf.nn.relu(tf.nn.bias_add(conv3, conv3_biases))
    maxpool = cil.max_pool_2x2(relu3)
    avgpool = cil.avg_pool_2x2(relu3)
    pool3 = tf.concat(3, [maxpool, avgpool])
    #print(maxpool.get_shape())
    #print(avgpool.get_shape())
    #print(pool3.get_shape())
    
    conv4 = cil.conv2d(pool3, conv4_weights)
    relu4 = tf.nn.relu(tf.nn.bias_add(conv4, conv4_biases))
    pool4 = cil.max_pool_2x2(relu4)

    # Reshape the feature map cuboid into a 2D matrix to feed it to the
    # fully connected layers.
    pool_shape = pool2.get_shape().as_list()
    reshape = tf.reshape(
        pool2,
        [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
    # Fully connected layer. Note that the '+' operation automatically
    # broadcasts the biases.
    
    #print(pool4.get_shape())
    #print(reshape.get_shape())
    #print(fc1_weights.get_shape())
    
    fc1 = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
    if train:
        fc1 = tf.nn.dropout(fc1, 0.5, seed=SEED)
    fc2 = tf.matmul(fc1, fc2_weights) + fc2_biases

    return fc2

In [54]:
# Training computation: logits + cross-entropy loss.
logits = model(train_data_node, True) # BATCH_SIZE*NUM_LABELS
# print 'logits = ' + str(logits.get_shape()) + ' train_labels_node = ' + str(train_labels_node.get_shape())
#loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
 #   logits, train_labels_node))
loss = tf.reduce_mean(tf.square(logits - train_labels_node))

In [55]:
# L2 regularization for the fully connected parameters.
regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
                tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
# Add the regularization term to the loss.
loss += 5e-4 * regularizers

In [56]:
# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    0.01,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)

In [57]:
# Use simple momentum for the optimization.
optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss, global_step=batch)

# try adam
#optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=batch)

#optimizer = tf.train.AdadeltaOptimizer(learning_rate).minimize(loss)

In [58]:
# Predictions for the minibatch, validation set and test set.
train_prediction = logits #tf.nn.softmax(logits)
# We'll compute them only once in a while by calling their {eval()} method.
train_all_prediction = model(train_all_data_node[0:ALL_PER,:,:,:]) #tf.nn.softmax(model(train_all_data_node[0:ALL_PER,:,:,:]))

# Add ops to save and restore all the variables.
saver = tf.train.Saver()

In [59]:
s = tf.InteractiveSession()

Exception ignored in: <bound method InteractiveSession.__del__ of <tensorflow.python.client.session.InteractiveSession object at 0x7f2bff43c400>>
Traceback (most recent call last):
  File "/home/leo/.venvlis/lib/python3.4/site-packages/tensorflow/python/client/session.py", line 171, in __del__
    self.close()
  File "/home/leo/.venvlis/lib/python3.4/site-packages/tensorflow/python/client/session.py", line 976, in close
    self._default_session.__exit__(None, None, None)
  File "/usr/lib/python3.4/contextlib.py", line 66, in __exit__
    next(self.gen)
  File "/home/leo/.venvlis/lib/python3.4/site-packages/tensorflow/python/framework/ops.py", line 3378, in get_controller
    % type(default))
AssertionError: Nesting violated for default stack of <class 'weakref'> objects


In [60]:
if RESTORE_MODEL:
    # Restore variables from disk.
    saver.restore(s,FLAGS.train_dir + "/model8.ckpt")
    print("Model restored.")

In [61]:
#saver.restore(s,FLAGS.train_dir + "/model8.ckpt")

In [62]:
# Run all the initializers to prepare the trainable parameters.
tf.initialize_all_variables().run()
print ('Initialized!')

Initialized!


In [63]:
# Loop through training steps.
print ('Total number of iterations = ' + str(int(NUM_EPOCHS * train_size / BATCH_SIZE)))

training_indices = range(train_size)

for iepoch in range(NUM_EPOCHS):

    # Permute training indices
    perm_indices = np.random.permutation(training_indices)

    for step in range (int(train_size / BATCH_SIZE)):

        offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
        batch_indices = perm_indices[offset:(offset + BATCH_SIZE)]

        # Compute the offset of the current minibatch in the data.
        # Note that we could use better randomization across epochs.
        batch_data = train_data[batch_indices, :, :, :]
        batch_labels = train_labels[batch_indices]
        
        
        #print('batch_data.shape = ' + str(batch_data.shape))
        # This dictionary maps the batch data (as a numpy array) to the
        # node in the graph is should be fed to.
        feed_dict = {train_data_node: batch_data, train_labels_node: batch_labels}
        
        stepNr = step + iepoch*int(train_size / BATCH_SIZE)

        if stepNr % ERROR_STEP == 0:
            start_time = time.time()
            l, lr, all_predictions = s.run(
                        [loss, learning_rate, train_all_prediction],
                        feed_dict=feed_dict)
            
            duration = time.time() - start_time
            #print ('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))
            #print ('Minibatch error: %.1f%%' % cil.error_rate(predictions,
                                                              #   batch_labels))
            loss_value = cil.error_rate(all_predictions, train_labels[:ALL_PER,:])
            
            format_str = ('step %d, error: %.1f%% (%.3f sec/batch)')
            print (format_str % (stepNr, loss_value, duration)) 
        elif stepNr % RECORDING_STEP == 0:
            
            start_time = time.time()
            l, lr, all_predictions = s.run(
                        [loss, learning_rate, train_prediction],
                        feed_dict=feed_dict)
            
            duration = time.time() - start_time
            #print ('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))
            #print ('Minibatch error: %.1f%%' % cil.error_rate(predictions,
                                                              #   batch_labels))
            loss_value = cil.error_rate(predictions, batch_labels)
            
            format_str = ('step %d, batch error: %.1f%% (%.3f sec/batch)')
            print (format_str % (stepNr, loss_value, duration)) 
        else:
            # Run the graph and fetch some of the nodes.
            _, l, lr, predictions = s.run(
                        [optimizer, loss, learning_rate, train_prediction],
                        feed_dict=feed_dict)

Total number of iterations = 31042
step 0, error: 79.0% (0.122 sec/batch)
step 200, batch error: 81.2% (0.058 sec/batch)
step 400, batch error: 78.1% (0.010 sec/batch)
step 500, error: 42.0% (0.022 sec/batch)
step 600, batch error: 65.6% (0.010 sec/batch)
step 800, batch error: 81.2% (0.013 sec/batch)
step 1000, error: 62.0% (0.025 sec/batch)
step 1200, batch error: 84.4% (0.010 sec/batch)
step 1400, batch error: 81.2% (0.010 sec/batch)
step 1500, error: 63.0% (0.025 sec/batch)
step 1600, batch error: 81.2% (0.011 sec/batch)
step 1800, batch error: 68.8% (0.012 sec/batch)
step 2000, error: 66.0% (0.032 sec/batch)
step 2200, batch error: 81.2% (0.012 sec/batch)
step 2400, batch error: 81.2% (0.009 sec/batch)
step 2500, error: 79.0% (0.055 sec/batch)
step 2600, batch error: 87.5% (0.011 sec/batch)
step 2800, batch error: 75.0% (0.010 sec/batch)
step 3000, error: 70.0% (0.024 sec/batch)
step 3200, batch error: 65.6% (0.010 sec/batch)
step 3400, batch error: 75.0% (0.009 sec/batch)
step 35

In [64]:
save_path = saver.save(s, FLAGS.train_dir + "/model8.ckpt")

In [65]:
# Convert array of labels to an image
def label_to_img(imgsize, size, labels):
    array_labels = np.zeros([imgsize, imgsize])
    idx = 0
    step = size #SAMPLING_STEP
    for i in range(0,imgsize,step):
        if i+size <=imgsize:
            for j in range(0,imgsize,step):
                if j+size <=imgsize:
                    ## patch has 4 labels
                    patch = array_labels[j:j+size, i:i+size]
                    # get the 4 classes
                    array_labels[j:j+size, i:i+size] = (array_labels[j:j+size, i:i+size] + class_to_img(patch, labels[idx]))/2
                    idx = idx + 1
    return array_labels

# Assign a label to a patch v
def class_to_img(v, labels):
    size = v.shape[0]
    step = int(size/2)
    idx = 0
    for i in range(0,size,step):
            for j in range(0,size,step):
                if labels[idx] > 0.5:
                    l = 1
                else:
                    l = 0
                v[j:j+step, i:i+step] = l
                idx = idx + 1
    return v

In [66]:
# Get prediction overlaid on the original image for given input image
def get_prediction_with_overlay(img):
    
    img_prediction = get_prediction(img)
    oimg = cil.make_img_overlay(img[:,:,:3], img_prediction)

    return oimg

# Get a concatenation of the prediction and groundtruth for given input file
def get_prediction_with_groundtruth(img):
    
    img_prediction = get_prediction(img)
    cimg = cil.concatenate_images(img[:,:,:3], img_prediction)

    return cimg

# Get prediction for given input image 
def get_prediction(img):
    data = np.asarray(img_crop(img, IMG_PATCH_SIZE))
    data = data[:,:,:,0:NUM_CHANNELS]
    data_node = tf.constant(data)
    output = model(data_node) #tf.nn.softmax(model(data_node))
    output_prediction = s.run(output)
    img_prediction = label_to_img(img.shape[0], IMG_PATCH_SIZE, output_prediction)
    #img_prediction = np.rint(img_prediction/np.max(img_prediction))

    return img_prediction

In [None]:
print ("Running prediction on training set")
prediction_training_dir = "predictions_training/"
if not os.path.isdir(prediction_training_dir):
    os.mkdir(prediction_training_dir)
    
for i in range(TRAINING_SIZE):
    pimg = get_prediction_with_groundtruth(train_preproc[i])
    Image.fromarray(pimg).save(prediction_training_dir + "prediction8_" + str(i+1) + ".png")
    oimg = get_prediction_with_overlay(train_preproc[i])
    oimg.save(prediction_training_dir + "overlay8_" + str(i+1) + ".png")  

Running prediction on training set


In [None]:
print ("Running prediction on test set")
prediction_test_dir = "predictions_test/"
if not os.path.isdir(prediction_test_dir):
    os.mkdir(prediction_test_dir)

test_preproc = load_preproc(test_dir)
for i in range(TEST_SIZE):
    pred = get_prediction(test_preproc[i])
    pimg = cil.img_float_to_uint8(pred)
    Image.fromarray(pimg).save(prediction_test_dir + "prediction8_" + str(i+1) + ".png")
    oimg = get_prediction_with_overlay(test_preproc[i])
    oimg.save(prediction_test_dir + "overlay8_" + str(i+1) + ".png")

Running prediction on test set


In [None]:
import re
th = 0.5

In [None]:
# assign a label to a patch
def patch_to_label(patch):
    df = np.mean(patch)
    if df > th:
        return 1
    else:
        return 0


def mask_to_submission_strings(image_filename):
    """Reads a single image and outputs the strings that should go into the submission file"""
    reg = re.search(r"\_\d+", image_filename)
    group = reg.group(0)[1:]
    img_number = int(group)
    im = mpimg.imread(image_filename)
    patch_size = 16
    for j in range(0, im.shape[1], patch_size):
        for i in range(0, im.shape[0], patch_size):
            patch = im[i:i + patch_size, j:j + patch_size]
            label = patch_to_label(patch)
            yield("{:03d}_{}_{},{}".format(img_number, j, i, label))


def masks_to_submission(submission_filename, *image_filenames):
    """Converts images into a submission file"""
    with open(submission_filename, 'w') as f:
        f.write('id,prediction\n')
        for fn in image_filenames[0:]:
            f.writelines('{}\n'.format(s) for s in mask_to_submission_strings(fn))


def save(submission_filename):
    #subm#ission_filename = 'submission_07.csv'
    image_filenames = []
    for i in range(1, TEST_SIZE+1):
        imagename = 'prediction8_' + str(i)
        image_filename = 'predictions_test/' + imagename + '.png'
        print(image_filename)
        image_filenames.append(image_filename)
    masks_to_submission(submission_filename, *image_filenames)
    
save('submission_lukas_8x8.csv')