## Try to do ConvNet with Tensorflow

Apr.19.2016

### 0. Libraries and Data Loading

In [2]:
import numpy as np, scipy.io, tensorflow as tf, matplotlib.pyplot as plt
%matplotlib inline

In [3]:
data = scipy.io.loadmat('./gazebo_data.mat')
X, y = data['images'], data['labels']
X = (X - 128) / 255
X1 = np.zeros([X.shape[0], X.shape[1] * X.shape[2]])
for i in range(X.shape[0]):
    X1[i,:] = X[i,:,:].flatten()
image_height = X.shape[1]
image_width = X.shape[2]

X = X1
n = y.shape[0]
image_size = image_height * image_width
labels_count = y.shape[1]

In [4]:
num_train = n * 4 // 5; num_cv = n - num_train
perm = np.random.permutation(n)
Xtrain = X[perm[:num_train], :]
ytrain = y[perm[:num_train], :]
Xcv = X[perm[num_train:], :]
ycv = y[perm[num_train:], :]


### 1. Construction of ConvNet

In [5]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial)
def bias_variable(shape):
    initial = tf.constant(0.1, shape = shape)
    return tf.Variable(initial)
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

# Probably need to edit this method to speed things up
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [6]:
X = tf.placeholder('float32', shape = [None, image_size])
y_ = tf.placeholder('float32', shape = [None, labels_count])

First layer (convolution Plus Pooling):

In [7]:
image_height

60

In [8]:
#5 x 5 patches, 1 input channel, 32 output units
w_conv1 = weight_variable([10, 10, 1, 64])
# bias variable (output of the 1st layer as well)
b_conv1 = bias_variable([64])

#Reshape to 2d image
image = tf.reshape(X, [-1, image_height , image_width, 1])
h_conv1 = tf.nn.relu(conv2d(image, w_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
# First Layer ^ convolution + pooling

# Reshape the 32 lin. comb. of inputs of 2nd layer to 4x8 grid for visualization(?)
layer1 = tf.reshape(h_conv1, (-1, image_height, image_width, 8 ,8))  
layer1 = tf.transpose(layer1, (0, 3, 1, 4, 2))
layer1 = tf.reshape(layer1, (-1, image_height*8, image_width*8)) 

Now, layer 2:

In [9]:
W_conv2 = weight_variable([5, 5, 64, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
layer2 = tf.reshape(h_conv2, (-1, image_height//2, image_width//2, 4 ,16))  
layer2 = tf.transpose(layer2, (0, 3, 1, 4, 2))
layer2 = tf.reshape(layer2, (-1, image_height//2*4, image_width//2*16)) 

Now, layer 3 (A fully connected layer with 1024 neurons):

In [10]:
W_fc1 = weight_variable([20*15 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 20*15*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

Dropout to prevent overfitting:

In [11]:
keep_prob = tf.placeholder('float32')
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

Last layer (a softmax layer w/ dropout in the front)

In [12]:
W_fc2 = weight_variable([1024, 1024])
b_fc2 = bias_variable([1024])
h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)

In [13]:
W_fc3 = weight_variable([1024, labels_count])
b_fc3 = bias_variable([labels_count])
y = tf.matmul(h_fc2_drop, W_fc3) + b_fc3

Lost Functions:

In [14]:
mean_squared = tf.reduce_sum(tf.square(tf.sub(y, y_)))
cross_entropy = -tf.reduce_sum(y_*tf.log(y + 0.01))
loss = mean_squared
LEARNING_RATE = 1e-4
train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

### 1.Train, validation and predict

In [15]:
epoches_completed = 0
index_in_epoch = 0
n = Xtrain.shape[0]
def nextBatch(batch_size):
    global Xtrain
    global ytrain
    global index_in_epoch
    global epoches_completed
    
    if (index_in_epoch + batch_size >= n):
        epoches_completed += 1
        index_in_epoch = 0
        perm = np.arange(n)
        np.random.shuffle(perm)
        Xtrain = Xtrain[perm, :]
        ytrain = ytrain[perm, :]
    index_in_epoch += batch_size
    return Xtrain[index_in_epoch:index_in_epoch + batch_size, :], ytrain[index_in_epoch:index_in_epoch + batch_size, :]


In [16]:
sess = tf.InteractiveSession()

In [None]:
init = tf.initialize_all_variables()
sess.run(init)
num_iter = 10000
sample_rate = 10 # Per Sample
batch_size = 50
cost_history = []
for i in range(num_iter):
    currX, curry = nextBatch(batch_size)
    cost_history.append(loss.eval(feed_dict={X:currX, \
                                          y_: curry, \
                                          keep_prob: 1.0}))
    #print(cost_history[-1])
    #print(W_fc2.eval())
    print("Train cost: " + str(cost_history[-1]))

    if (i % sample_rate == 0):
        print(str(i) + " out of " + str(num_iter) + " iterations have completed.")
        cv_cost = loss.eval(feed_dict={X:Xcv, \
                                               y_: ycv, \
                                               keep_prob: 1.0})
        print("CV cost: " + str(cv_cost))
        
    sess.run(train_step, feed_dict={X:currX, y_: curry, keep_prob:1.0})

Train cost: 1.08145e+06
0 out of 10000 iterations have completed.
CV cost: 1.3092e+07
Train cost: 158766.0
Train cost: 254316.0
Train cost: 446731.0
Train cost: 324485.0
Train cost: 124808.0
Train cost: 49292.1
Train cost: 43770.4
Train cost: 92946.9
Train cost: 112528.0
Train cost: 157629.0
10 out of 10000 iterations have completed.


In [None]:
init = tf.initialize_all_variables()
sess.run(init)
loss.eval(feed_dict={X:Xtrain, \
                                          y_: ytrain, \
                                          keep_prob: 1.0})

In [None]:
Xs, ys = Xtrain[[1],:], ytrain[[1], :]
ypred = y.eval(feed_dict={X:Xs, y_:ys, keep_prob:1.0})

In [None]:
print("Predicted value is: " + str(ypred))
print("Actual value is: " + str(ys))