In [1]:
%matplotlib inline
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import h5py as h5py

In [2]:
def load_dataset():
    train_dataset = h5py.File('signs_dataset/train_signs.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('signs_dataset/test_signs.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [3]:
def put_filters_on_grid(w_filter):
    '''
    Visualize conv. filters as an image (mostly for the 1st layer).
    Arranges filters into a grid, with some paddings between adjacent filters.
    
    Args:
        kernel: tensor of shape [Y, X, NumChannels, NumKernels]
        pad: number of black pixels around each filter (between them)
    
    Return:
        Tensor of shape.
    '''
    pad = 1
    padding = tf.constant([[pad, pad], [pad, pad],[0,0],[0,0]]) #padding of 1 around first 2d filter of n*n
    padded_filt = tf.pad(w_filter,padding,"CONSTANT")
    
    padded_filt = tf.transpose(padded_filt) #this is done so that filter of n*n goes to last dimension
    
    filt_size = padded_filt.get_shape()[3]
    num_chan = padded_filt.get_shape()[0]
    num_outfilt = padded_filt.get_shape()[1]
    
    filt_size = tf.cast(filt_size, tf.int32)   #typecasting and stacking because issue with tf.reshape command
    num_chan = tf.cast(num_chan,tf.int32)
    num_outfilt = tf.cast(num_outfilt,tf.int32)
    roll_shape = tf.stack([1,num_chan*filt_size,num_outfilt*filt_size,1])
    
    grid_x = filt_size * filt_size * num_chan
    grid_y = num_outfilt
    
    #grid = tf.cast(grid_x * grid_y,tf.int32)
    
    grid = tf.reshape(padded_filt,roll_shape)
    #print(grid.get_shape())
        
    return grid

In [4]:
LOGDIR = "/home/vishnu/Dropbox/intel_works/ipython_notebooks/tf_tests/log_alexnet"

#loading the dataset
train_x, train_y, test_x, test_y, classes = load_dataset()
#index = 1079
#plt.imshow(train_set_x_orig[index])
#print(test_x.shape)
print("No. of Training samples (batch): %d"%train_x.shape[0])
print("No. of Test samples: %d"%test_x.shape[0])
print("No. of gestures: %d"%classes.size)
print("Image format (Len, Wid, Chan): %d * %d * %d"%(train_x.shape[1],train_x.shape[2],train_x.shape[3]))

No. of Training samples (batch): 1080
No. of Test samples: 120
No. of gestures: 6
Image format (Len, Wid, Chan): 64 * 64 * 3


In [5]:
x = tf.placeholder(tf.float32, [None, train_x.shape[1],train_x.shape[2],train_x.shape[3]])
y = tf.placeholder(tf.float32, [None, classes.size])

x = tf.cast(x,tf.float32) # casting as the original was uint8

In [6]:
# Non-Alexnet conv layer to make it to alexnet spec
with tf.name_scope('nonalex1_conv1'):
    ch_X = tf.cast(x.shape[3],tf.int32)
    l1_filter_size = 10 # filter size of n*n for convolution
    l1_filter_Num = 96 # No. of filters required
    stride_1 = [1,1,1,1]
    
    w = tf.Variable(tf.truncated_normal([l1_filter_size, l1_filter_size, ch_X, l1_filter_Num], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[l1_filter_Num]), name="B")
    
    z_conv = tf.nn.conv2d(x,w,stride_1,padding="VALID") + b
    nonalex1_act1 = tf.nn.relu(z_conv,name="act1")

    tf.summary.histogram("weights", w)
    tf.summary.histogram("biases", b)
    tf.summary.histogram("activations", nonalex1_act1)
    print(nonalex1_act1.get_shape())
    
    #grid = put_filters_on_grid(w)
    #print(grid.get_shape())
    #tf.summary.image('nonalex1_conv1_kernal', grid, max_outputs=1)

(?, 55, 55, 96)


In [7]:
#max pooling layer 1
with tf.name_scope('max_pool1'):
    ch_2 = tf.cast(nonalex1_act1.shape[3],tf.int32)
    k_size = [1,3,3,1] #filter size
    stride_2 = [1,2,2,1]
    act2_maxpool1 = tf.nn.avg_pool(nonalex1_act1,k_size,stride_2,padding='VALID',name="act2_maxpool1")
    print(act2_maxpool1.get_shape())

(?, 27, 27, 96)


In [8]:
# Alexnet conv layer 1
with tf.name_scope('alex1_conv2'):
    ch_3 = tf.cast(act2_maxpool1.shape[3],tf.int32)
    l3_filter_size = 5 # filter size of n*n for convolution
    l3_filter_Num = 256 # No. of filters required
    stride_3 = [1,1,1,1]
    
    w = tf.Variable(tf.truncated_normal([l3_filter_size, l3_filter_size, ch_3, l3_filter_Num], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[l3_filter_Num]), name="B")
    
    z_conv = tf.nn.conv2d(act2_maxpool1,w,stride_3,padding="SAME") + b
    alex1_act3 = tf.nn.relu(z_conv,name="alex1_act3")

    tf.summary.histogram("weights", w)
    tf.summary.histogram("biases", b)
    tf.summary.histogram("activations", alex1_act3)
    print(alex1_act3.get_shape())
    
    #grid = put_filters_on_grid(w)
    #print(grid.get_shape())
    #tf.summary.image('alex1_act3_kernal', grid, max_outputs=1)

(?, 27, 27, 256)


In [9]:
#max pooling layer 2
with tf.name_scope('max_pool2'):
    ch_4 = tf.cast(alex1_act3.shape[3],tf.int32)
    k_size = [1,3,3,1] #filter size
    stride_4 = [1,2,2,1]
    act4_maxpool2 = tf.nn.avg_pool(alex1_act3,k_size,stride_4,padding='VALID',name="act4_maxpool2")
    print(act4_maxpool2.get_shape())

(?, 13, 13, 256)


In [10]:
# Alexnet conv layer 2
with tf.name_scope('alex2_conv3'):
    ch_5 = tf.cast(act4_maxpool2.shape[3],tf.int32)
    l5_filter_size = 3 # filter size of n*n for convolution
    l5_filter_Num = 384 # No. of filters required
    stride_5 = [1,1,1,1]
    
    w = tf.Variable(tf.truncated_normal([l5_filter_size, l5_filter_size, ch_5, l5_filter_Num], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[l5_filter_Num]), name="B")
    
    z_conv = tf.nn.conv2d(act4_maxpool2,w,stride_5,padding="SAME") + b
    alex2_act5 = tf.nn.relu(z_conv,name="alex2_act5")

    tf.summary.histogram("weights", w)
    tf.summary.histogram("biases", b)
    tf.summary.histogram("activations", alex2_act5)
    print(alex2_act5.get_shape())
    
    #grid = put_filters_on_grid(w)
    #print(grid.get_shape())
    #tf.summary.image('alex2_act5', grid, max_outputs=1)

(?, 13, 13, 384)


In [11]:
# Alexnet conv layer 3
with tf.name_scope('alex3_conv4'):
    ch_6 = tf.cast(alex2_act5.shape[3],tf.int32)
    l6_filter_size = 3 # filter size of n*n for convolution
    l6_filter_Num = 384 # No. of filters required
    stride_6 = [1,1,1,1]
    
    w = tf.Variable(tf.truncated_normal([l6_filter_size, l6_filter_size, ch_6, l6_filter_Num], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[l6_filter_Num]), name="B")
    
    z_conv = tf.nn.conv2d(alex2_act5,w,stride_6,padding="SAME") + b
    alex3_act6 = tf.nn.relu(z_conv,name="alex3_act6")

    tf.summary.histogram("weights", w)
    tf.summary.histogram("biases", b)
    tf.summary.histogram("activations", alex3_act6)
    print(alex3_act6.get_shape())
    
    #grid = put_filters_on_grid(w)
    #print(grid.get_shape())
    #tf.summary.image('alex3_act6', grid, max_outputs=1)

(?, 13, 13, 384)


In [12]:
# Alexnet conv layer 4
with tf.name_scope('alex4_conv5'):
    ch_7 = tf.cast(alex3_act6.shape[3],tf.int32)
    l7_filter_size = 3 # filter size of n*n for convolution
    l7_filter_Num = 256 # No. of filters required
    stride_7 = [1,1,1,1]
    
    w = tf.Variable(tf.truncated_normal([l7_filter_size, l7_filter_size, ch_7, l7_filter_Num], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[l7_filter_Num]), name="B")
    
    z_conv = tf.nn.conv2d(alex3_act6,w,stride_7,padding="SAME") + b
    alex4_act7 = tf.nn.relu(z_conv,name="alex4_act7")

    tf.summary.histogram("weights", w)
    tf.summary.histogram("biases", b)
    tf.summary.histogram("activations", alex4_act7)
    print(alex4_act7.get_shape())
    
    #grid = put_filters_on_grid(w)
    #print(grid.get_shape())
    #tf.summary.image('alex4_act7', grid, max_outputs=1)

(?, 13, 13, 256)


In [13]:
#max pooling layer 3
with tf.name_scope('max_pool3'):
    ch_8 = tf.cast(alex4_act7.shape[3],tf.int32)
    k_size = [1,3,3,1] #filter size
    stride_8 = [1,2,2,1]
    act8_maxpool3 = tf.nn.avg_pool(alex4_act7,k_size,stride_8,padding='VALID',name="act8_maxpool3")
    print(act8_maxpool3.get_shape())
    print(act8_maxpool3.get_shape()[1])

(?, 6, 6, 256)
6


In [14]:
# flattining
act8_flat = tf.reshape(act8_maxpool3, [-1, 6 * 6 * 256])
print(act8_flat.get_shape())

(?, 9216)


In [15]:
# %% Create a fully-connected layer 1:
with tf.name_scope('fc1') as scope:
    #lx = tf.cast(act8_flat.get_shape()[1],tf.int32)
    #ly = tf.cast(act8_flat.get_shape()[2],tf.int32)
    #lz = tf.cast(act8_flat.get_shape()[3],tf.int32)
    n_fc1 = 4096
    W_fc1 = tf.Variable(tf.truncated_normal([6 * 6 * 256, n_fc1], stddev=0.1),name="w_fc1")
    b_fc1 = tf.Variable(tf.truncated_normal([n_fc1], stddev=0.1),name="b_fc1")
    h_fc1 = tf.nn.relu(tf.matmul(act8_flat, W_fc1) + b_fc1)
    
    tf.summary.histogram("weights", W_fc1)
    tf.summary.histogram("biases", b_fc1)
    tf.summary.histogram("activations", h_fc1)
    print(h_fc1.get_shape())

(?, 4096)


In [16]:
# %% Create a fully-connected layer 2:
with tf.name_scope('fc2') as scope:
    #lx = tf.cast(act8_flat.get_shape()[1],tf.int32)
    #ly = tf.cast(act8_flat.get_shape()[2],tf.int32)
    #lz = tf.cast(act8_flat.get_shape()[3],tf.int32)
    n_fc2 = 4096
    W_fc2 = tf.Variable(tf.truncated_normal([n_fc1, n_fc2], stddev=0.1),name="w_fc2")
    b_fc2 = tf.Variable(tf.truncated_normal([n_fc2], stddev=0.1),name="b_fc2")
    h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
    
    tf.summary.histogram("weights", W_fc2)
    tf.summary.histogram("biases", b_fc2)
    tf.summary.histogram("activations", h_fc2)
    print(h_fc2.get_shape())

(?, 4096)


In [17]:
# %% And finally our softmax layer:
with tf.name_scope('softmax_layer') as scope:
    W_fc3 = tf.Variable(tf.truncated_normal([n_fc2, 6], stddev=0.1),name="w_fc3")
    b_fc3 = tf.Variable(tf.truncated_normal([6], stddev=0.1),name="b_fc3")
    y_pred = tf.nn.softmax(tf.matmul(h_fc2, W_fc3) + b_fc3)
    print(y_pred.get_shape())

(?, 6)


In [23]:
# %% Define loss/eval/training functions
with tf.name_scope('cross_entropy'):
    cross_entropy = -tf.reduce_sum(y * tf.log(y_pred))
    
    tf.summary.scalar("cross_entropy", cross_entropy)

with tf.name_scope('train'):    
    optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(cross_entropy)

In [None]:
# %% Monitor accuracy
with tf.name_scope('accuracy'):
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
    print(correct_prediction.get_shape())
    tf.summary.scalar("accuracy", accuracy)

(?,)


In [None]:
summ = tf.summary.merge_all()

# %% We now create a new session to actually perform the initialization the
sess = tf.Session()
sess.run(tf.global_variables_initializer())

writer = tf.summary.FileWriter(LOGDIR)
writer.add_graph(sess.graph)

In [None]:
y_org = np.eye(6)[train_y] #converting it into one-hot encoding
y_org = np.reshape(y_org,[y_org.shape[1],y_org.shape[2]])

y_val = np.eye(6)[test_y] #converting it into one-hot encoding
y_val = np.reshape(y_val,[y_val.shape[1],y_val.shape[2]])

ep = 10
batch_size = 100

for i in range(ep):
    for batch_i in range(train_x.shape[0] // batch_size):
        if i%5 == 0:
            batch_xs, batch_ys = train_x[(batch_i+batch_i*i):batch_size,:,:,:], y_org[(batch_i+batch_i*i):batch_size,:]
            train_accuracy = sess.run(accuracy, feed_dict={x:batch_xs, y:batch_ys})
            print(train_accuracy)
            #s = sess.run(summ, feed_dict={x:batch_xs, y:batch_ys})
            #writer.add_summary(s, i)
            
        if i % 500 == 0:
            #sess.run(assignment, feed_dict={x: train_x[1:100,:,:,:], y: y_org[1:100,:]})
            #saver.save(sess, os.path.join(LOGDIR, "model.ckpt"), i)
            sess.run(optimizer, feed_dict={x: train_x[1:200,:,:,:], y: y_org[1:200,:]})