In [3]:
import gzip
import os
import sys
import urllib
import matplotlib.image as mpimg
from PIL import Image
import numpy as np

import code

import tensorflow.python.platform

import numpy
import tensorflow as tf

import cil_helper as cil

In [4]:
try:
    tf.app.flags.DEFINE_string('log_dir', '/tmp/lukas_self',
                           """Directory where to write event logs """
                           """and checkpoint.""")
except: 
    print(tf.app.flags.FLAGS.log_dir)
    
FLAGS = tf.app.flags.FLAGS
TRAINING_SIZE = 20
VALIDATION_SIZE = 5  # Size of the validation set.
TEST_SIZE = 50
SEED = None  # Set to None for random seed.
RESTORE_MODEL = False # If True, restore existing model instead of training a new one

In [5]:
BATCH_SIZE = 20 # 64
NUM_EPOCHS = 4
RECORDING_STEP = 100
IMG_PATCH_SIZE = 16
NUM_CHANNELS = 3
NUM_LABELS = 2
DROPOUT = 0.75

In [6]:
def extract_data(filename, num_images, extra_channels=0):
    """Extract the images into a 4D tensor [image index, y, x, channels].
    Values are rescaled from [0, 255] down to [-0.5, 0.5].
    """
    imgs = []
    for i in range(1, num_images+1):
        imageid = "satImage_%.3d" % i
        image_filename = filename + imageid + ".png"
        if os.path.isfile(image_filename):
            #print ('Loading ' + image_filename)
            img = mpimg.imread(image_filename)
            #img = preprocess(img)
            imgs.append(img)
        else:
            print ('File ' + image_filename + ' does not exist')

    num_images = len(imgs)
    IMG_WIDTH = imgs[0].shape[0]
    IMG_HEIGHT = imgs[0].shape[1]
    N_PATCHES_PER_IMAGE = (IMG_WIDTH/IMG_PATCH_SIZE)*(IMG_HEIGHT/IMG_PATCH_SIZE)

    img_patches = [cil.img_crop(imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)]
    data = [img_patches[i][j] for i in range(len(img_patches)) for j in range(len(img_patches[i]))]

    return numpy.asarray(data)

# Extract label images
def extract_labels(filename, num_images):
    """Extract the labels into a 1-hot matrix [image index, label index]."""
    gt_imgs = []
    for i in range(1, num_images+1):
        imageid = "satImage_%.3d" % i
        image_filename = filename + imageid + ".png"
        if os.path.isfile(image_filename):
            #print ('Loading ' + image_filename)
            img = mpimg.imread(image_filename)
            gt_imgs.append(img)
        else:
            print ('File ' + image_filename + ' does not exist')
            
    num_images = len(gt_imgs)
    gt_patches = [cil.img_crop(gt_imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)]
    data = numpy.asarray([gt_patches[i][j] for i in range(len(gt_patches)) for j in range(len(gt_patches[i]))])
    labels = numpy.asarray([cil.value_to_class(numpy.mean(data[i])) for i in range(len(data))])

    # Convert to dense 1-hot representation.
    return labels.astype(numpy.float32)

In [7]:
# returns a numpy array [Saturation, Lightness] of a pixel
def sat_light(rgb):
    Cmax = rgb.max()
    Cmin = rgb.min()
    d = Cmax - Cmin
    L = (Cmax + Cmin + 1) / 2
    S = d / (1 - (abs(2 * L - 1))) - .5
    if np.isnan(S):
        S = 0
    return np.asarray([S, L-.5], dtype=np.float32)

max_offset = 2 #feel free to change
LIGHTNESS_LEVELS = 8
LIGHTNESS_IDX = 4
#returns [contrast, correlation, energy, homogeneity]
def texture_features(image, y, x):
    ylow = max(0, y-max_offset)
    yhi = min(image.shape[0], y+max_offset+1)
    xlow = max(0, x-max_offset)
    xhi = min(image.shape[1], x+max_offset+1)
    #print([ylow,yhi,xlow,xhi])
    window = ((image[ylow:yhi, xlow:xhi, LIGHTNESS_IDX] + .5) * LIGHTNESS_LEVELS).astype(np.uint8)
    print(window)
    glcm = greycomatrix(window, [i for i in range(1,max_offset)], [0, np.pi/2], levels=LIGHTNESS_LEVELS, normed=True)
    contrast = greycoprops(glcm, 'contrast').mean()
    correlation = greycoprops(glcm, 'correlation').mean()
    energy = greycoprops(glcm, 'energy').mean()
    homogeneity = greycoprops(glcm, 'homogeneity').mean()
    #dissimilarity = greycoprops(glcm, 'dissimilarity').mean()
    return np.asarray([contrast, correlation*.5, energy-.5, homogeneity-.5], dtype=np.float32)

In [8]:
#NUM_CHANNELS = 9 # RGB, Saturation, Lightness, Texture
def preprocess(img):
    print('i was called')
    img = np.asarray([[
                    np.append(px, sat_light(px))
                    for px in y] for y in img], dtype=np.float32)
    return np.asarray([[
                    np.append(img[y,x], texture_features(img,y,x))
                    for x in range(len(img[y]))] for y in range(len(img))], dtype=np.float32)

In [9]:
data_dir = 'training/'
train_data_filename = data_dir + 'images/'
train_labels_filename = data_dir + 'groundtruth/' 

# Extract it into numpy arrays.
train_data = extract_data(train_data_filename, TRAINING_SIZE)
train_labels = extract_labels(train_labels_filename, TRAINING_SIZE)
print(train_data.shape)
print(train_labels.shape)

(12500, 16, 16, 3)
(12500, 2)


In [10]:
c0 = 0
c1 = 0
for i in range(len(train_labels)):
    if train_labels[i][0] == 1:
        c0 = c0 + 1
    else:
        c1 = c1 + 1
print ('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1))

Number of data points per class: c0 = 9450 c1 = 3050


In [11]:
balancing = False
if balancing:
    print ('Balancing training data...')
    min_c = min(c0, c1)
    idx0 = [i for i, j in enumerate(train_labels) if j[0] == 1]
    idx1 = [i for i, j in enumerate(train_labels) if j[1] == 1]
    new_indices = idx0[0:min_c] + idx1[0:min_c]
    print (len(new_indices))
    print (train_data.shape)
    train_data = train_data[new_indices,:,:,:]
    train_labels = train_labels[new_indices]

train_size = train_labels.shape[0]
print(train_data.shape)
print(train_labels.shape)

(12500, 16, 16, 3)
(12500, 2)


In [12]:
# This is where training samples and labels are fed to the graph.
# These placeholder nodes will be fed a batch of training data at each
# training step using the {feed_dict} argument to the Run() call below.
x = train_data_node = tf.placeholder(
    tf.float32,
    shape=(BATCH_SIZE, IMG_PATCH_SIZE, IMG_PATCH_SIZE, NUM_CHANNELS),
    name='train_data')
y = train_labels_node = tf.placeholder(
    tf.float32,
    shape=(BATCH_SIZE, NUM_LABELS),
    name='train_labels')
#x = train_data_node = tf.placeholder(tf.float32, [None, IMG_PATCH_SIZE, IMG_PATCH_SIZE, IMG_PATCH_SIZE], name='train_data')
#y = train_labels_node = tf.placeholder(tf.float32, [None, NUM_LABELS], name='train_labels')
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
train_all_data_node = tf.constant(train_data)

In [13]:
# We will replicate the model structure for the training subgraph, as well
# as the evaluation subgraphs, while sharing the trainable parameters.
def model(data, train=False):
    """The Model definition."""
    conv1_weights = cil.weight_variable([14, 14, NUM_CHANNELS, 40])
    conv1_biases = cil.bias_variable([40])

    conv = cil.conv2d(data, conv1_weights)
    relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
    pool = cil.max_pool_2x2(relu)

    conv2_weights = cil.weight_variable([8, 8, 40, 120])
    conv2_biases = cil.bias_variable([120])

    conv2 = cil.conv2d(pool, conv2_weights)
    relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
    pool2 = cil.max_pool_2x2(relu2)


    conv3_weights = cil.weight_variable([4, 4, 120, 200])
    conv3_biases = cil.bias_variable([200])

    conv3 = cil.conv2d(pool2, conv3_weights)
    relu3 = tf.nn.relu(tf.nn.bias_add(conv3, conv3_biases))
    pool3 = cil.max_pool_2x2(relu3)

    print('pool3 ' + str(pool3.get_shape()))

    # Reshape the feature map cuboid into a 2D matrix to feed it to the
    # fully connected layers.
    pool_shape = pool3.get_shape().as_list()
    print('pool_shape ' + str(pool_shape))
    reshape = tf.reshape(pool3, [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
    print(reshape.get_shape())

    fc1_weights = cil.weight_variable([pool_shape[1] * pool_shape[2] * pool_shape[3], 512])
    fc1_biases = cil.bias_variable([512])
    print(fc1_weights.get_shape())
    print(fc1_biases.get_shape())
    fc1 = tf.matmul(reshape, fc1_weights)
    fc1 = tf.add(fc1, fc1_biases)
    fc1 = tf.nn.relu(fc1)

    fc2_weights = cil.weight_variable([512, NUM_LABELS])
    fc2_biases = cil.bias_variable([NUM_LABELS])

    y = tf.matmul(fc1, fc2_weights) + fc2_biases
    if train:
        fc1 = tf.nn.dropout(fc1, DROPOUT)
    logits = tf.matmul(fc1, fc2_weights) + fc2_biases

    if train == True:
        summary_id = '_0'
        s_data = cil.get_image_summary(data)
        filter_summary0 = tf.image_summary('summary_data' + summary_id, s_data)
        s_conv = cil.get_image_summary(conv)
        filter_summary2 = tf.image_summary('summary_conv' + summary_id, s_conv)
        #s_pool = cil.get_image_summary(pool)
        #filter_summary3 = tf.image_summary('summary_pool' + summary_id, s_pool)
        s_conv2 = cil.get_image_summary(conv2)
        filter_summary4 = tf.image_summary('summary_conv2' + summary_id, s_conv2)
        s_pool2 = cil.get_image_summary(pool2)
        filter_summary5 = tf.image_summary('summary_pool2' + summary_id, s_pool2)

    print('y ' + str(y.get_shape()))
    print('out ' + str(logits.get_shape()))
    
    return y, logits, [conv1_weights, conv2_weights, conv3_weights, conv1_biases, conv2_biases, conv3_biases, fc1_weights, fc1_biases, fc2_weights, fc2_biases]

In [14]:
# Training computation: logits + cross-entropy loss.
y, logits, [conv1_weights, conv2_weights, conv3_weights, conv1_biases, conv2_biases, conv3_biases, fc1_weights, fc1_biases, fc2_weights, fc2_biases] = model(train_data_node, True) # BATCH_SIZE*NUM_LABELS

#
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(logits,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

print('logits = ' + str(logits.get_shape()) + ' train_labels_node = ' + str(train_labels_node.get_shape()))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits, train_labels_node))
loss_summary = tf.scalar_summary('loss', loss)

pool3 (20, 2, 2, 200)
pool_shape [20, 2, 2, 200]
(20, 800)
(800, 512)
(512,)
y (20, 2)
out (20, 2)
logits = (20, 2) train_labels_node = (20, 2)


In [15]:
all_params_node = [conv1_weights, conv1_biases, conv2_weights, conv2_biases, fc1_weights, fc1_biases, fc2_weights, fc2_biases]
all_params_names = ['conv1_weights', 'conv1_biases', 'conv2_weights', 'conv2_biases', 'fc1_weights', 'fc1_biases', 'fc2_weights', 'fc2_biases']
all_grads_node = tf.gradients(loss, all_params_node)
all_grad_norms_node = []
for i in range(0, len(all_grads_node)):
    norm_grad_i = tf.global_norm([all_grads_node[i]])
    all_grad_norms_node.append(norm_grad_i)
    all_params_summary = tf.scalar_summary(all_params_names[i], norm_grad_i)

In [16]:
# L2 regularization for the fully connected parameters.
regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
                tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
# Add the regularization term to the loss.
loss += 5e-4 * regularizers

In [17]:
# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
    0.01,                # Base learning rate.
    batch * BATCH_SIZE,  # Current index into the dataset.
    train_size,          # Decay step.
    0.95,                # Decay rate.
    staircase=True)
learning_rate_summary = tf.scalar_summary('learning_rate', learning_rate)

In [18]:
# try adam
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=batch)

In [19]:
# Predictions for the minibatch, validation set and test set.
train_prediction = tf.nn.softmax(logits)
# We'll compute them only once in a while by calling their {eval()} method.
y, logits_all, [conv1_weights, conv2_weights, conv3_weights, conv1_biases, conv2_biases, conv3_biases, fc1_weights, fc1_biases, fc2_weights, fc2_biases] = model(train_all_data_node) # BATCH_SIZE*NUM_LABELS
train_all_prediction = tf.nn.softmax(logits_all)

# Add ops to save and restore all the variables.
saver = tf.train.Saver()

pool3 (12500, 2, 2, 200)
pool_shape [12500, 2, 2, 200]
(12500, 800)
(800, 512)
(512,)
y (12500, 2)
out (12500, 2)


In [None]:
s = tf.InteractiveSession()

In [None]:
# Run all the initializers to prepare the trainable parameters.
tf.initialize_all_variables().run()

# Build the summary operation based on the TF collection of Summaries.
#summary_op = tf.merge_summary([loss_summary, learning_rate_summary, all_params_summary])
summary_op = tf.merge_summary([loss_summary, learning_rate_summary])
summary_writer = tf.train.SummaryWriter(FLAGS.log_dir,
                                                graph=s.graph)
print ('Initialized!')

# Loop through training steps.
print ('Total number of iterations = ' + str(int(NUM_EPOCHS * train_size / BATCH_SIZE)))

training_indices = range(train_size)

for iepoch in range(NUM_EPOCHS):

    # Permute training indices
    perm_indices = numpy.random.permutation(training_indices)

    for step in range (int(train_size / BATCH_SIZE)):

        offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
        batch_indices = perm_indices[offset:(offset + BATCH_SIZE)]

        # Compute the offset of the current minibatch in the data.
        # Note that we could use better randomization across epochs.
        batch_data = train_data[batch_indices, :, :, :]
        batch_labels = train_labels[batch_indices]
        #print('batch_data.shape = ' + str(batch_data.shape))
        # This dictionary maps the batch data (as a numpy array) to the
        # node in the graph is should be fed to.
        feed_dict = {train_data_node: batch_data, train_labels_node: batch_labels}

        if step % 10 == 0: #RECORDING_STEP == 0:

            summary_str, _, l, lr, predictions = s.run(
                        [summary_op, optimizer, loss, learning_rate, train_prediction],
                        feed_dict=feed_dict)
            summary_writer.add_summary(summary_str, step)
            summary_writer.flush()

            # print_predictions(predictions, batch_labels)
            #correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(train_labels, 1))
            #print(s.run(accuracy, feed_dict={train_data_node: train_data, train_labels_node: train_labels}))

            print('Epoch: ' + str(iepoch))
            print('Step: ' + str(step))
            print ('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))
            print ('Minibatch error: %.1f%%' % cil.error_rate(predictions,
                                                                 batch_labels))
            
            l, lr, all_predictions = s.run(
                        [loss, learning_rate, train_all_prediction],
                        feed_dict=feed_dict)
            
            print('Overall error: %.1f%%' % cil.error_rate(all_predictions, train_labels))

            sys.stdout.flush()
        else:
            # Run the graph and fetch some of the nodes.
            _, l, lr, predictions = s.run(
                        [optimizer, loss, learning_rate, train_prediction],
                        feed_dict=feed_dict)

    # Save the variables to disk.
    save_path = saver.save(s, FLAGS.train_dir + "/model.ckpt")
    print("Model saved in file: %s" % save_path) 
    print("-------------------------------------")

Initialized!
Total number of iterations = 2500


In [None]:
# Get prediction overlaid on the original image for given input file
def get_prediction_with_overlay(filename, image_idx):

    imageid = "satImage_%.3d" % image_idx
    image_filename = filename + imageid + ".png"
    img = mpimg.imread(image_filename)

    img_prediction = get_prediction(img)
    oimg = cil.make_img_overlay(img, img_prediction)

    return oimg

# Get a concatenation of the prediction and groundtruth for given input file
def get_prediction_with_groundtruth(filename, image_idx):

    imageid = "satImage_%.3d" % image_idx
    image_filename = filename + imageid + ".png"
    img = mpimg.imread(image_filename)

    img_prediction = get_prediction(img)
    cimg = cil.concatenate_images(img, img_prediction)

    return cimg

# Get prediction for given input image 
def get_prediction(img):
    data = numpy.asarray(cil.img_crop(img, IMG_PATCH_SIZE, IMG_PATCH_SIZE))
    data_node = tf.constant(data)
    output = tf.nn.softmax(model(data_node))
    output_prediction = s.run(output)
    img_prediction = cil.label_to_img(img.shape[0], img.shape[1], IMG_PATCH_SIZE, IMG_PATCH_SIZE, output_prediction)

    return img_prediction

In [None]:
print ("Running prediction on training set")
prediction_training_dir = "predictions_training/"
if not os.path.isdir(prediction_training_dir):
    os.mkdir(prediction_training_dir)
for i in range(1, TRAINING_SIZE+1):
    pimg = get_prediction_with_groundtruth(train_data_filename, i)
    Image.fromarray(pimg).save(prediction_training_dir + "prediction_" + str(i) + ".png")
    oimg = get_prediction_with_overlay(train_data_filename, i)
    oimg.save(prediction_training_dir + "overlay_" + str(i) + ".png") 

In [None]:
print ("Running prediction on test set")
prediction_test_dir = "predictions_test/"
test_dir = "test/"
if not os.path.isdir(prediction_test_dir):
    os.mkdir(prediction_test_dir)
for i in range(1, TEST_SIZE+1):
    imagename = "test_" + str(i)
    image_filename = test_dir + imagename + "/" + imagename + ".png"
    img = mpimg.imread(image_filename)
    pimg = cil.img_float_to_uint8(get_prediction(img))
    Image.fromarray(pimg).save(prediction_test_dir + "prediction_" + str(i) + ".png")

In [None]:
import mask_to_submission as mts
mts.save('submission_lukas_09.csv')