## Try to do ConvNet with Tensorflow

Apr.19.2016

### 0. Libraries and Data Loading

In [1]:
import numpy as np, scipy.io, tensorflow as tf, matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data = scipy.io.loadmat('./gazebo_data.mat')
X, y = data['images'], data['labels']
X = (X - 128) / 255
X1 = np.zeros([X.shape[0], X.shape[1] * X.shape[2]])
for i in range(X.shape[0]):
    X1[i,:] = X[i,:,:].flatten()
image_height = X.shape[1]
image_width = X.shape[2]

X = X1
n = y.shape[0]
image_size = image_height * image_width
labels_count = y.shape[1]

In [3]:
num_train = n * 4 // 5; num_cv = n - num_train
#perm = np.random.permutation(n)
#print(perm)
perm = np.arange(n-1)
#print(perm[:num_train])
Xtrain = X[perm[:num_train], :]
ytrain = y[perm[:num_train], :]
Xcv = X[perm[num_train:], :]
ycv = y[perm[num_train:], :]


### 1. Construction of ConvNet

In [4]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial)
def bias_variable(shape):
    initial = tf.constant(0.1, shape = shape)
    return tf.Variable(initial)
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

# Probably need to edit this method to speed things up
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [5]:
X = tf.placeholder('float32', shape = [None, image_size])
y_ = tf.placeholder('float32', shape = [None, labels_count])

First layer (convolution Plus Pooling):

In [6]:
image_height

60

In [7]:
#5 x 5 patches, 1 input channel, 32 output units
#We train 32 filters for the first layer
w_conv1 = weight_variable([10, 10, 1, 32])
# bias variable (output of the 1st layer as well)
b_conv1 = bias_variable([32])

#Reshape to 2d image
image = tf.reshape(X, [-1, image_height , image_width, 1])
h_conv1 = tf.nn.relu(conv2d(image, w_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
# First Layer ^ convolution + pooling

# Reshape the 32 lin. comb. of inputs of 2nd layer to 4x8 grid for visualization(?)
layer1 = tf.reshape(h_conv1, (-1, image_height, image_width, 8 ,8))  
layer1 = tf.transpose(layer1, (0, 3, 1, 4, 2)) # Permute the dimension
layer1 = tf.reshape(layer1, (-1, image_height*8, image_width*8)) 

Now, layer 2:

In [8]:
W_conv2 = weight_variable([5, 5, 32, 32])
b_conv2 = bias_variable([32])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
layer2 = tf.reshape(h_conv2, (-1, image_height//2, image_width//2, 4 ,16))  
layer2 = tf.transpose(layer2, (0, 3, 1, 4, 2))
layer2 = tf.reshape(layer2, (-1, image_height//2*4, image_width//2*16)) 

Now, layer 3 (A fully connected layer with 1024 neurons):

In [9]:
W_fc1 = weight_variable([20*15 * 32, 200])
b_fc1 = bias_variable([200])
h_pool2_flat = tf.reshape(h_pool2, [-1, 20*15*32])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

Dropout to prevent overfitting:

In [10]:
keep_prob = tf.placeholder('float32')
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

Last layer (a softmax layer w/ dropout in the front)

In [14]:
W_fc2 = weight_variable([200, 200])
b_fc2 = bias_variable([200])
h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)

In [17]:
W_fc3 = weight_variable([200, labels_count])
b_fc3 = bias_variable([labels_count])
y = tf.matmul(h_fc2_drop, W_fc3) + b_fc3

Lost Functions:

In [None]:
beta = 10 #Scaling factor in order to balance the loss btw position and angle
pos_hat, pos = tf.slice(y, [0, 0], [-1, 3]), tf.slice(y_, [0, 0], [-1, 3])
ang_hat, ang = tf.slice(y, [0, 3], [-1, 3]), tf.slice(y_, [0, 3], [-1, 3])
mean_squared = tf.reduce_sum((pos_hat - pos) ** 2) + beta * tf.reduce_sum((ang_hat - ang) ** 2)
loss = mean_squared
LEARNING_RATE = 1e-5
train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

### 1.Train, validation and predict

In [19]:
epoches_completed = 0
index_in_epoch = 0
n = Xtrain.shape[0]
def nextBatch(batch_size):
    global Xtrain
    global ytrain
    global index_in_epoch
    global epoches_completed
    
    if (index_in_epoch + batch_size >= n):
        epoches_completed += 1
        index_in_epoch = 0
        perm = np.arange(n)
        np.random.shuffle(perm)
        Xtrain = Xtrain[perm, :]
        ytrain = ytrain[perm, :]
    index_in_epoch += batch_size
    return Xtrain[index_in_epoch:index_in_epoch + batch_size, :], ytrain[index_in_epoch:index_in_epoch + batch_size, :]


In [20]:
sess = tf.InteractiveSession()

In [23]:
init = tf.initialize_all_variables()
sess.run(init)
num_iter = 10000
sample_rate = 10 # Per Sample
batch_size = 50
cost_history = []
for i in range(num_iter):
    currX, curry = nextBatch(batch_size)
    cost_history.append(loss.eval(feed_dict={X:currX, \
                                          y_: curry, \
                                          keep_prob: 1}))
    #print(cost_history[-1])
    #print(W_fc2.eval())
    print("Train cost: " + str(cost_history[-1]))

    if (i % sample_rate == 0):
        print(str(i) + " out of " + str(num_iter) + " iterations have completed.")
        cv_cost = loss.eval(feed_dict={X:Xcv, \
                                               y_: ycv, \
                                               keep_prob: 1})
        print("CV cost: " + str(cv_cost))
        
    sess.run(train_step, feed_dict={X:currX, y_: curry, keep_prob:1})

Train cost: 72290.9
0 out of 10000 iterations have completed.
CV cost: 667877.0
Train cost: 37188.9
Train cost: 15348.3
Train cost: 10063.8
Train cost: 15098.7
Train cost: 17519.0
Train cost: 19259.3
Train cost: 19684.9
Train cost: 13201.7
Train cost: 9137.67
Train cost: 7569.96
10 out of 10000 iterations have completed.
CV cost: 145263.0
Train cost: 4503.54
Train cost: 4292.31
Train cost: 3672.78
Train cost: 4114.77
Train cost: 3436.51
Train cost: 3616.86
Train cost: 3859.44
Train cost: 4648.47
Train cost: 3703.4
Train cost: 3423.91
20 out of 10000 iterations have completed.
CV cost: 108272.0
Train cost: 3303.68
Train cost: 2895.04
Train cost: 2758.28
Train cost: 1935.67
Train cost: 1924.86
Train cost: 1590.93
Train cost: 1499.4
Train cost: 1714.63
Train cost: 1793.4
Train cost: 1500.8
30 out of 10000 iterations have completed.
CV cost: 70197.4
Train cost: 1659.17
Train cost: 1539.88
Train cost: 1725.08
Train cost: 1426.65
Train cost: 1216.18
Train cost: 1372.61
Train cost: 1085.84
Tr

KeyboardInterrupt: 

init = tf.initialize_all_variables()
sess.run(init)
loss.eval(feed_dict={X:Xtrain, \
                                          y_: ytrain, \
                                          keep_prob: 1.0})

In [150]:
Xs, ys = Xtrain[[-1],:], ytrain[[-1], :]
ypred = y.eval(feed_dict={X:Xs, y_:ys, keep_prob:1.0})

In [151]:
print("Predicted value is: " + str(ypred))
print("Actual value is: " + str(ys))

Predicted value is: [[ 1.20348179 -1.99820268  1.74368382  0.08602133  0.19297805  1.75704432]]
Actual value is: [[ 1.42 -2.49  1.93  0.2   0.34  2.07]]


In [140]:
n

2148