In [1]:
import numpy as np
import tensorflow as tf
from collections import OrderedDict

DATA_TYPE = tf.float32
VARIABLE_COUNTER = 0

def variable(name, shape, initializer,regularizer=None):
    global VARIABLE_COUNTER
    with tf.device('/cpu:0'):
        VARIABLE_COUNTER += np.prod(np.array(shape))
        return tf.get_variable(name, shape, initializer=initializer, regularizer=regularizer, dtype=DATA_TYPE, trainable=True)

def conv_layer(input_tensor,name,kernel_size,output_channels,initializer,stride=1,bn=False,training=False,relu=True):
    input_channels = input_tensor.get_shape().as_list()[-1]
    with tf.variable_scope(name) as scope:
        kernel = variable('weights', [kernel_size, kernel_size, input_channels, output_channels], initializer, regularizer=tf.contrib.layers.l2_regularizer(0.0005))
        conv = tf.nn.conv2d(input_tensor, kernel, [1, stride, stride, 1], padding='SAME')
        biases = variable('biases', [output_channels], tf.constant_initializer(0.0))
        conv_layer = tf.nn.bias_add(conv, biases)
        if bn:
            conv_layer = batch_norm_layer(conv_layer,scope,training)
        if relu:
            conv_layer = tf.nn.relu(conv_layer, name=scope.name)
    print('Conv layer {0} -> {1}'.format(input_tensor.get_shape().as_list(),conv_layer.get_shape().as_list()))
    return conv_layer

def residual_block(input_tensor,name,kernel_size,output_channels,initializer,stride=1,bn=True,training=False):
    print('')
    print('Residual Block')
    input_channels = input_tensor.get_shape().as_list()[-1]
    with tf.variable_scope(name) as scope:
        conv_output = conv_layer(input_tensor,'conv1',kernel_size,output_channels,initializer,stride=stride,bn=bn,training=training,relu=True)
        conv_output = conv_layer(conv_output,'conv2',kernel_size,output_channels,initializer,stride=1,bn=bn,training=training,relu=False)
        if stride != 1 or input_channels != output_channels:
            old_input_shape = input_tensor.get_shape().as_list()
            input_tensor = conv_layer(input_tensor,'projection',stride,output_channels,initializer,stride=stride,bn=False,training=training,relu=False)
            print('Projecting input {0} -> {1}'.format(old_input_shape,input_tensor.get_shape().as_list()))
        res_output = tf.nn.relu(input_tensor + conv_output,name=scope.name)
    print('')
    return res_output

def deconv_layer(input_tensor,name,kernel_size,output_channels,initializer,stride=1,bn=False,training=False,relu=True):
    input_shape = input_tensor.get_shape().as_list()
    input_channels = input_shape[-1]
    output_shape = list(input_shape)
    output_shape[1] *= stride
    output_shape[2] *= stride
    output_shape[3] = output_channels
    with tf.variable_scope(name) as scope:
        kernel = variable('weights', [kernel_size, kernel_size, output_channels, input_channels], initializer, regularizer=tf.contrib.layers.l2_regularizer(0.0005))
        deconv = tf.nn.conv2d_transpose(input_tensor, kernel, output_shape, [1, stride, stride, 1], padding='SAME')
        biases = variable('biases', [output_channels], tf.constant_initializer(0.0))
        deconv_layer = tf.nn.bias_add(deconv, biases)
        if bn:
            deconv_layer = batch_norm_layer(deconv_layer,scope,training)
        if relu:
            deconv_layer = tf.nn.relu(deconv_layer, name=scope.name)
    print('Deconv layer {0} -> {1}'.format(input_tensor.get_shape().as_list(),deconv_layer.get_shape().as_list()))
    return deconv_layer

def max_pooling(input_tensor,name,factor=2):
    pool = tf.nn.max_pool(input_tensor, ksize=[1, factor, factor, 1], strides=[1, factor, factor, 1], padding='SAME', name=name)
    print('Pooling layer {0} -> {1}'.format(input_tensor.get_shape().as_list(),pool.get_shape().as_list()))
    return pool

def fully_connected_layer(input_tensor,name,output_channels,initializer,bn=False,training=False,relu=True):
    input_channels = input_tensor.get_shape().as_list()[-1]
    with tf.variable_scope(name) as scope:
        weights = variable('weights', [input_channels, output_channels], initializer, regularizer=tf.contrib.layers.l2_regularizer(0.0005))
        biases = variable('biases', [output_channels], tf.constant_initializer(0.0))
        fc = tf.add(tf.matmul(input_tensor,weights), biases, name=scope.name)
        if bn:
            fc = batch_norm_layer(fc,scope,training)
        if relu:
            fc = tf.nn.relu(bias, name=scope.name)
    print('Fully connected layer {0} -> {1}'.format(input_tensor.get_shape().as_list(),fc.get_shape().as_list()))
    return fc

def batch_norm_layer(input_tensor,scope,training):
    return tf.contrib.layers.batch_norm(input_tensor,scope=scope,is_training=training,decay=0.99)

def dropout_layer(input_tensor,keep_prob,training):
    if training:
        return tf.nn.dropout(input_tensor,keep_prob)
    return input_tensor

def concat_layer(input_tensor1,input_tensor2,axis=3):
    output = tf.concat(3,[input_tensor1,input_tensor2])
    input1_shape = input_tensor1.get_shape().as_list()
    input2_shape = input_tensor2.get_shape().as_list()
    output_shape = output.get_shape().as_list()
    print('Concat layer {0} and {1} -> {2}'.format(input1_shape,input2_shape,output_shape))
    return output

def flatten(input_tensor,name):
    batch_size = input_tensor.get_shape().as_list()[0]
    with tf.variable_scope(name) as scope:
        flat = tf.reshape(input_tensor, [batch_size,-1])
    print('Flatten layer {0} -> {1}'.format(input_tensor.get_shape().as_list(),flat.get_shape().as_list()))
    return flat

def classification_inference(images,training=True):
    print('-'*30)
    print('Network Architecture')
    print('-'*30)
    global VARIABLE_COUNTER
    VARIABLE_COUNTER = 0
    layer_name_dict = {}
    def layer_name(base_name):
        if base_name not in layer_name_dict:
            layer_name_dict[base_name] = 0
        layer_name_dict[base_name] += 1
        name = base_name + str(layer_name_dict[base_name])
        return name

    NUM_CLASS = 3
    dropout_keep_prob = 0.5
    bn = True
    he_initializer = tf.contrib.layers.variance_scaling_initializer()
    x = images
    for i in range(2):
        x = conv_layer(x,layer_name('conv'),3,64,he_initializer,bn=bn,training=training)
    x = max_pooling(x,layer_name('pool'))
    x = flatten(x,layer_name('flatten'))
    x = fully_connected_layer(x,layer_name('fc'),4096,he_initializer,bn=bn,training=training)
    x = dropout_layer(x,dropout_keep_prob,training)
    x = fully_connected_layer(x,layer_name('fc'),NUM_CLASS,he_initializer,bn=False,training=training)
    print('-'*30)
    print('Number of variables:{0}'.format(VARIABLE_COUNTER))
    print('-'*30)
    print('')
    return x


def unet(images, training=True):
    print('-'*30)
    print('Network Architecture')
    print('-'*30)
    global VARIABLE_COUNTER
    VARIABLE_COUNTER = 0
    layer_name_dict = {}
    def layer_name(base_name):
        if base_name not in layer_name_dict:
            layer_name_dict[base_name] = 0
        layer_name_dict[base_name] += 1
        name = base_name + str(layer_name_dict[base_name])
        return name
        
        
    NUM_CLASS = 3
    bn = True
    he_initializer = tf.contrib.layers.variance_scaling_initializer()
    x = images  
    
    dw_h_convs = OrderedDict()
    up_h_convs = OrderedDict()
    
    #Build the network
    x = conv_layer(x,layer_name('conv'),3,64,he_initializer, bn = bn, training = training)
    dw_h_convs[0] = conv_layer(x,layer_name('conv'),3,64,he_initializer, bn = bn, training = training)
    x = max_pooling(dw_h_convs[0], 'pool1')
   
     
    dw_h_convs[1] = conv_layer(x,layer_name('conv'),3 ,128, he_initializer, bn = bn, training = training)
    dw_h_convs[1] = conv_layer(dw_h_convs[1],layer_name('conv'),3,128, he_initializer, bn = bn, training = training)
    dw_h_convs[2] = max_pooling(dw_h_convs[1],'pool2')
    
    
    dw_h_convs[2] = conv_layer(dw_h_convs[2],layer_name('conv'),3,256,he_initializer, bn = bn, training = training)
    dw_h_convs[2] = conv_layer(dw_h_convs[2],layer_name('conv'),3,256,he_initializer, bn = bn, training = training)
    dw_h_convs[3] = max_pooling(dw_h_convs[2],'pool3')
   
    
    dw_h_convs[3] = conv_layer(dw_h_convs[3],layer_name('conv'),3,512,he_initializer, bn = bn, training = training)
    dw_h_convs[3] = conv_layer(dw_h_convs[3],layer_name('conv'),3,512,he_initializer, bn = bn, training = training)
    dw_h_convs[4] = max_pooling(dw_h_convs[3],'pool4')
    
    
    
    dw_h_convs[4] = conv_layer(dw_h_convs[4],layer_name('conv'),3,1024, he_initializer, bn = bn, training = training) 
    dw_h_convs[4] = conv_layer(dw_h_convs[4],layer_name('conv'),3,512, he_initializer, bn = bn, training = training) 
        
    
    
    up_h_convs[0] = tf.image.resize_images(dw_h_convs[4], [ dw_h_convs[4].get_shape().as_list()[1]*2, 
                                                            dw_h_convs[4].get_shape().as_list()[2]*2] )  

    #print('size of up_h_convs[0] = ', up_h_convs[0].get_shape().as_list())
             
    up_h_convs[0] = tf.concat([up_h_convs[0], dw_h_convs[3] ],3 ) 
    up_h_convs[0] = conv_layer(up_h_convs[0], layer_name('conv'), 3, 512, he_initializer, bn = bn, training = training)
    up_h_convs[0] = conv_layer(up_h_convs[0], layer_name('conv'), 3, 256, he_initializer, bn = bn, training = training)
    
    up_h_convs[1] = tf.image.resize_images(up_h_convs[0], [ up_h_convs[0].get_shape().as_list()[1]*2, 
                                                            up_h_convs[0].get_shape().as_list()[2]*2] )  
    
    #print('size of up_h_convs[1] = ', up_h_convs[1].get_shape().as_list())    
    up_h_convs[1] = tf.concat([up_h_convs[1], dw_h_convs[2] ],3 ) 
    up_h_convs[1] = conv_layer(up_h_convs[1], layer_name('conv'), 3, 256, he_initializer, bn = bn, training = training)
    up_h_convs[1] = conv_layer(up_h_convs[1], layer_name('conv'), 3, 128, he_initializer, bn = bn, training = training)
    
    up_h_convs[2] = tf.image.resize_images(up_h_convs[1], [ up_h_convs[1].get_shape().as_list()[1]*2, 
                                                            up_h_convs[1].get_shape().as_list()[2]*2] )  

    #print('size of up_h_convs[0] = ', up_h_convs[2].get_shape().as_list())        
    up_h_convs[2] = tf.concat([up_h_convs[2], dw_h_convs[1] ],3 ) 
    up_h_convs[2] = conv_layer(up_h_convs[2], layer_name('conv'), 3, 128, he_initializer, bn = bn, training = training)
    up_h_convs[2] = conv_layer(up_h_convs[2], layer_name('conv'), 3, 64, he_initializer, bn = bn, training = training)

    up_h_convs[3] = tf.image.resize_images(up_h_convs[2], [ up_h_convs[2].get_shape().as_list()[1]*2, 
                                                            up_h_convs[2].get_shape().as_list()[2]*2] )
                                                            
    #print('size of up_h_convs[3] = ', up_h_convs[3].get_shape().as_list())                                                            
    #print('size of dw_h_convs[2] = ', dw_h_convs[2].get_shape().as_list())
    
    up_h_convs[3] = tf.concat([up_h_convs[3], dw_h_convs[0] ],3 ) 
    up_h_convs[3] = conv_layer(up_h_convs[3], layer_name('conv'), 3, 64, he_initializer, bn = bn, training = training)
    up_h_convs[3] = conv_layer(up_h_convs[3], layer_name('conv'), 3, 64, he_initializer, bn = bn, training = training)
    

    #out = conv_layer(up_h_convs[0], layer_name('conv'), 1, 3, he_initializer, bn = False, training = training, relu=False)
    out = conv_layer(up_h_convs[3], layer_name('conv'), 1, 3, he_initializer, bn = False, training = training, relu=False)
    out_bhwd = out

        
    
    out = tf.reshape(out,[-1, NUM_CLASS])

    

    print('size of out= ', out.get_shape().as_list())

    return out, out_bhwd

def residual_inference(images,training=True):
    print('-'*30)
    print('Network Architecture')
    print('-'*30)
    global VARIABLE_COUNTER
    VARIABLE_COUNTER = 0
    layer_name_dict = {}
    def layer_name(base_name):
        if base_name not in layer_name_dict:
            layer_name_dict[base_name] = 0
        layer_name_dict[base_name] += 1
        name = base_name + str(layer_name_dict[base_name])
        return name

    NUM_CLASS = 3
    dropout_keep_prob = 0.5
    bn = True
    he_initializer = tf.contrib.layers.variance_scaling_initializer()
    x = images

    # Build and return the network
    for i in range(4):
        x = conv_layer(x,layer_name('conv'),3,64,he_initializer,bn=bn,training=training)
    x = residual_block(x,layer_name('resblock'),3,64,he_initializer,stride=2,bn=bn,training=training)
    for i in range(8):
        x = residual_block(x,layer_name('resblock'),3,64,he_initializer,bn=bn,training=training)
    x = residual_block(x,layer_name('resblock'),3,128,he_initializer,stride=2,bn=bn,training=training)
    for i in range(16):
        x = residual_block(x,layer_name('resblock'),3,128,he_initializer,bn=bn,training=training)
    x = deconv_layer(x,layer_name('deconv'),3,128,he_initializer,stride=2,bn=bn,training=training)
    x = deconv_layer(x,layer_name('deconv'),3,64,he_initializer,stride=2,bn=bn,training=training)
    x = deconv_layer(x,layer_name('deconv'),3,NUM_CLASS,he_initializer,bn=False,training=training,relu=False)
    print('-'*30)
    print('Number of variables:{0}'.format(VARIABLE_COUNTER))
    print('-'*30)
    print('')
    return x

def inference(images,depths,training=True):
    print('-'*30)
    print('Network Architecture')
    print('-'*30)
    global VARIABLE_COUNTER
    VARIABLE_COUNTER = 0
    layer_name_dict = {}
    def layer_name(base_name):
        if base_name not in layer_name_dict:
            layer_name_dict[base_name] = 0
        layer_name_dict[base_name] += 1
        name = base_name + str(layer_name_dict[base_name])
        return name

    NUM_CLASS = 3
    dropout_keep_prob = 0.5
    bn = True
    he_initializer = tf.contrib.layers.variance_scaling_initializer()
    x = images
    y = depths

    # Build and return the network
    # RGB
    for i in range(3):
        x = conv_layer(x,layer_name('conv'),3,64,he_initializer,bn=bn,training=training)
    x = max_pooling(x,layer_name('max_pooling'))
    for i in range(3):
        x = conv_layer(x,layer_name('conv'),3,128,he_initializer,bn=bn,training=training)
    x = max_pooling(x,layer_name('max_pooling'))

    # Depth
    for i in range(3):
        y = conv_layer(y,layer_name('conv'),3,64,he_initializer,bn=bn,training=training)
    y = max_pooling(y,layer_name('max_pooling'))
    for i in range(3):
        y = conv_layer(y,layer_name('conv'),3,128,he_initializer,bn=bn,training=training)
    y = max_pooling(y,layer_name('max_pooling'))

    # Concat the two
    x = concat_layer(x,y)

    x = deconv_layer(x,layer_name('deconv'),3,128,he_initializer,stride=2,bn=bn,training=training)
    x = deconv_layer(x,layer_name('deconv'),3,64,he_initializer,stride=2,bn=bn,training=training)
    x = deconv_layer(x,layer_name('deconv'),3,NUM_CLASS,he_initializer,bn=False,training=training,relu=False)
    print('-'*30)
    print('Number of variables:{0}'.format(VARIABLE_COUNTER))
    print('-'*30)
    print('')
    return x

# def loss(predictions, labels):
#     num_classes = predictions.get_shape().as_list()[-1]
#     flat_predictions = tf.reshape(predictions, [-1,num_classes])
#     flat_labels = tf.cast(tf.reshape(labels, [-1]), tf.int32)
#     cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(flat_predictions, flat_labels, name='cross_entropy_per_example')
#     cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
#     weight_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
#     return tf.add(cross_entropy_mean,weight_loss)

def loss(pred_normals,ground_truth,mask): # Is the data preprocessed by this stage or should we do it again here ?
    
    num_images = 240
    mean_angle_error = 0
    total_pixels = 0
    #for i in range(0,num_images):
    total_pixels += np.count_nonzero(mask)
    mask = mask != 0
        
    pred_normals = ((pred_normals / 255.0) - 0.5) * 2
    ground_truth = ((ground_truth / 255.0) - 0.5) * 2
        

    a11 = np.sum(pred_normals * pred_normals, axis=0)[mask]
    a22 = np.sum(ground_truth * ground_truth, axis=0)[mask]
    a12 = np.sum(pred_normals * ground_truth, axis=0)[mask]

    cos_dist = a12 / np.sqrt(a11 * a22)
    cos_dist[np.isnan(cos_dist)] = -1
    cos_dist = np.clip(cos_dist, -1, 1)
    angle_error = np.arccos(cos_dist)
    mean_angle_error += np.sum(angle_error)

    return mean_angle_error / total_pixels

def accuracy(predictions, labels):
    batch_size = predictions.get_shape().as_list()[0]
    arg_max_preds = tf.argmax(predictions, 3)
    flat_predictions = tf.reshape(arg_max_preds, [batch_size,-1])
    flat_labels = tf.reshape(labels, [batch_size,-1])
    correct_prediction = tf.equal(tf.cast(flat_predictions,tf.int32), flat_labels)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    return accuracy

print("Architecture Load : Complete")

Architecture Load : Complete


In [2]:
from __future__ import print_function

import tensorflow as tf
from PIL import Image
import argparse

import numpy as np
import matplotlib.pyplot as pl
import matplotlib as mpl

import scipy
import matplotlib.image as mpimg
import PIL

num_images = 20000
features = np.array([np.array(PIL.Image.open("train/color/"+str(i)+".png"))for i in range(num_images)])
normals = np.array([np.array(PIL.Image.open("train/normal/"+str(i)+".png"))for i in range(num_images)])
mask = np.array([np.array(PIL.Image.open("train/mask/"+str(i)+".png"))for i in range(num_images)])
xflo = features.astype(np.float32)
yflo = normals.astype(np.float32)
mflo = mask.astype(np.float32)  


x = tf.convert_to_tensor(xflo)
y = tf.convert_to_tensor(yflo)
#mask = tf.convert_to_tensor(mflo)
mask = mflo

training_iters = 100
display_step = 10

img_width = 128
img_height = 128


batch_size = 256

base_learning_rate = 0.01

learning_rate = tf.placeholder(tf.float32,shape=[])


In [None]:
# tf Graph input
# batch_size = 128
# img_height = 128
# img_width = 128
# x = tf.placeholder(tf.float32, [batch_size, img_height, img_width, 3])
# y = tf.placeholder(tf.int32, batch_size*img_width*img_height)
# y_bool = tf.placeholder(tf.int32, batch_size*img_width*img_height)
# x.shape

In [None]:
with tf.device("/cpu:0"):
    # Construct model
    #pred, pred_bhwd = custom_layers_unet.unet(x, training=True)
    predX = unet(x, training=True)
    #gt = tf.cast(tf.reshape(y,[-1]),tf.int32)
    gt = y
    # Define loss and optimizer
    
    #pred= predX.reshape((120,128,128,3))
    pred = predX[1]
    #loss_map = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=gt)
    #loss_map = tf.multiply(loss_map,tf.to_float(tf.not_equal(gt,0)))
    loss= tf.nn.l2_loss(pred - gt)
    cost = tf.reduce_mean(loss)
    #cost = tf.reduce_mean(loss_map)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    # Initializing the variables
    init = tf.global_variables_initializer()
    
    #total steps
    #total_steps = 100

# Launch the graph
    config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    epoch = 1
    #config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess:
        sess.run(init)

        print('This has been initialised')    
        step = 1
        
        while 1:
            #img, label = SUNRGBD_dataset.get_random_shuffle(batch_size)
            img = xflo
            label = yflo
            
            #print(img.shape)

            #plt.show()
            #label = np.reshape(label,[-1])
            
            decay = np.floor((epoch - 1) / 30)
            learningRate = base_learning_rate *  np.power(0.95, decay)
            #learning_rate = (1/step) * 0.1
            
            _,lr = sess.run([optimizer,learning_rate], feed_dict={x: img, y: label, learning_rate:learningRate})
            loss= sess.run([cost], feed_dict={x: img,y: label})
            #print(lmap.shape)
            print('epoch = ', epoch, 'batch = ', step-(np.floor(20000/batch_size))*(epoch-1), 'loss = ', loss, 'learning rate =', lr)
            step = step + 1
            epoch = np.floor(step*batch_size/20000)+1
            tpred, tpred_bhwd = sess.run([predX[0], predX[1]], feed_dict={x: img,y: label})
            print(tpred_bhwd.shape)
            
            #best_labels = np.argmax(tpred_bhwd,axis=3)
            #print(best_labels[1])
            #print(best_labels.shape)
                
            #batchImage = tile_images(best_labels,batch_size, rows, cols, 1)
            #im.set_data(np.uint8(batchImage));
            #print('max = ',img[1].max(),'min= ', img[1].min())
            #im.set_clim(vmin=0.0, vmax=255.0)
            #fig.show();
            pl.pause(0.00001);
  

------------------------------
Network Architecture
------------------------------
Conv layer [240, 128, 128, 3] -> [240, 128, 128, 64]
Conv layer [240, 128, 128, 64] -> [240, 128, 128, 64]
Pooling layer [240, 128, 128, 64] -> [240, 64, 64, 64]
Conv layer [240, 64, 64, 64] -> [240, 64, 64, 128]
Conv layer [240, 64, 64, 128] -> [240, 64, 64, 128]
Pooling layer [240, 64, 64, 128] -> [240, 32, 32, 128]
Conv layer [240, 32, 32, 128] -> [240, 32, 32, 256]
Conv layer [240, 32, 32, 256] -> [240, 32, 32, 256]
Pooling layer [240, 32, 32, 256] -> [240, 16, 16, 256]
Conv layer [240, 16, 16, 256] -> [240, 16, 16, 512]
Conv layer [240, 16, 16, 512] -> [240, 16, 16, 512]
Pooling layer [240, 16, 16, 512] -> [240, 8, 8, 512]
Conv layer [240, 8, 8, 512] -> [240, 8, 8, 1024]
Conv layer [240, 8, 8, 1024] -> [240, 8, 8, 512]
Conv layer [240, 16, 16, 1024] -> [240, 16, 16, 512]
Conv layer [240, 16, 16, 512] -> [240, 16, 16, 256]
Conv layer [240, 32, 32, 512] -> [240, 32, 32, 256]
Conv layer [240, 32, 32, 2

In [None]:
_,lr = sess.run([optimizer,learning_rate], feed_dict={x: img, y: label, learning_rate:learningRate})

In [None]:
 with tf.Session(config=config) as sess:
        sess.run(init)
        _,lr = sess.run([optimizer,learning_rate], feed_dict={x: img, y: label, learning_rate:learningRate})

In [None]:
img = x
label = y

In [None]:
img.eval()