In [2]:
import tensorflow as tf
import numpy as np
from six.moves import cPickle as pickle
from scipy import ndimage


In [4]:
pickle_file = 'images_tensorflow.pickle' 
try:
    with open(pickle_file, 'rb') as f:
        digits = pickle.load(f) # unpickle
        train_X = digits['train']['feature'].reshape(-1, 32, 32, 1)
        test_X = digits['test']['feature'].reshape(-1, 32, 32, 1)
        valid_X = digits['valid']['feature'].reshape(-1, 32, 32, 1)
        extra_X = digits['extra']['feature'].reshape(-1, 32, 32, 1)
        train_Y = digits['train']['label']
        test_Y = digits['test']['label']
        valid_Y = digits['valid']['label']
        extra_Y = digits['extra']['label']
except Exception as e:
    print('Unable to process data from', pickle_file, ':', e)
    raise
    

Use the final LeNet5 model with regularization [Deep Learning Assignment 4](https://github.com/yinghsienwu/DeepLearning/blob/master/4_convolutions.ipynb)
----
batch_size = 128, 
image_size = 32, 
num_hidden = 84, 
num_labels = 11 (0-9, blank)

The model includes 7 layers:
1. C1: convolutional, C1_depth = 6
    - weights = batch_size x 32 x 32 x C1_depth
    - patch = 5 x 5 x 1 x C1_depth (patch_size = 5, num_channel = 1)
- S2: sub-sampling, stride = 2 [1, 2, 2, 1] 
    - batch_size x 16 x 16 x C1_depth
- C3: convolutional, C3_depth = 16  [1, 2, 2, 1]
    - weights = batch_size x 10 x 10 x C3_depth
    - patch = 5 x 5 x C1_depth x C3_depth
- S4: sub-sampling, stride=2 [1, 2, 2, 1]
    - batch_size x 5 x 5 x C3_depth
- C5: convolutional, C5_depth = 120
    - weights = batch_size x 1 x 1 x C5_depth
    - patch = 5 x 5 x C3_depth x C5_depth
- F6: fully-connected
    - C5_conv_dim = ((((image_size+1)//2+1)//2+1)//2) = 4
    - weights = (C5_conv_dim x C5_conv_dim x C5_depth) x num_hidden
- O7: output
    - weights = num_hidden x num_labels

In [34]:
def accuracy(predictions, labels):
    #return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/predictions.shape[0])
    return (100.0 * np.sum(np.argmax(predictions, 2).T ==labels)/predictions.shape[1]/predictions.shape[0])

In [92]:
batch_size = 64
patch_size = 5
image_size = 32

num_labels = 11
num_channels = 1
c1_depth = 16
c3_depth = 32
c5_depth = 64
num_hidden = 64

graph = tf.Graph()

with graph.as_default():
    # Input data
    tf_train_dataset = tf.placeholder(tf.float32, 
                                      shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size, 6)) #
    tf_valid_dataset = tf.constant(valid_X)
    tf_test_dataset = tf.constant(test_X)
    
    # Variables
    c1_weights = tf.get_variable('c1w', shape=[patch_size, patch_size, num_channels, c1_depth], 
                                initializer=tf.contrib.layers.xavier_initializer_conv2d())
    c1_biases = tf.Variable(tf.constant(1.0, shape=[c1_depth]), name='c1b')
    
    c3_weights = tf.get_variable('c3w', shape=[patch_size, patch_size, c1_depth, c3_depth],
                             initializer=tf.contrib.layers.xavier_initializer_conv2d())
    c3_biases = tf.Variable(tf.constant(1.0, shape=[c3_depth]), name='c3b')
    
    c5_weights = tf.get_variable('c5w', shape=[patch_size, patch_size, c3_depth, num_hidden], 
                                 initializer=tf.contrib.layers.xavier_initializer_conv2d())
    c5_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]), name='c5b')
    '''
    c5_conv_dim = (((((image_size+1)//2)+1)//2)+1)//2
    print(c5_conv_dim)
    f6_weights = tf.get_variable('f6w',shape=[c5_conv_dim * c5_conv_dim * c5_depth, num_hidden],
                                initializer=tf.contrib.layers.xavier_initializer())
    f6_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]), name='f6b')
    '''
    # output layer: sequence of 5 digits
    d1_weights = tf.get_variable('d1w', shape=[num_hidden, num_labels], initializer=tf.contrib.layers.xavier_initializer())
    d1_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]), name='d1b')
    d2_weights = tf.get_variable('d2w', shape=[num_hidden, num_labels], initializer=tf.contrib.layers.xavier_initializer())
    d2_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]), name='d2b')
    d3_weights = tf.get_variable('d3w', shape=[num_hidden, num_labels], initializer=tf.contrib.layers.xavier_initializer())
    d3_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]), name='d3b')
    d4_weights = tf.get_variable('d4w', shape=[num_hidden, num_labels], initializer=tf.contrib.layers.xavier_initializer())
    d4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]), name='d4b')
    d5_weights = tf.get_variable('d5w', shape=[num_hidden, num_labels], initializer=tf.contrib.layers.xavier_initializer())
    d5_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]), name='d5b')
    
    # Model
    def model(data, keep_prob=1.0):
        conv = tf.nn.conv2d(data, c1_weights, [1, 1, 1, 1], padding='VALID')
        hidden = tf.nn.relu(conv + c1_biases)
        lrn = tf.nn.local_response_normalization(hidden)
        maxpool = tf.nn.max_pool(lrn, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        
        conv = tf.nn.conv2d(maxpool, c3_weights, [1, 1, 1, 1], padding='VALID')
        hidden = tf.nn.relu(conv + c3_biases)
        lrn = tf.nn.local_response_normalization(hidden)
        maxpool = tf.nn.max_pool(lrn, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        
        conv = tf.nn.conv2d(maxpool, c5_weights, [1, 1, 1, 1], padding='VALID')
        hidden = tf.nn.relu(conv + c5_biases)
        
        drop = tf.nn.dropout(hidden, keep_prob)
        
        shape = drop.get_shape().as_list()
        print('shape: ',shape)
        hidden = tf.reshape(drop, [shape[0], shape[1]*shape[2]*shape[3]])
        #hidden = tf.nn.relu(tf.matmul(reshape, f6_weights) + f6_biases)
        print('hidden shape: ',hidden.get_shape().as_list())
        l1 = tf.matmul(hidden, d1_weights) + d1_biases
        l2 = tf.matmul(hidden, d2_weights) + d2_biases
        l3 = tf.matmul(hidden, d3_weights) + d3_biases
        l4 = tf.matmul(hidden, d4_weights) + d4_biases
        l5 = tf.matmul(hidden, d5_weights) + d5_biases
        return [l1 ,l2, l3, l4, l5]

    # Training computation
    [l1 ,l2, l3, l4, l5] = model(tf_train_dataset) # keep_prob=1.0
    # L-2 regularization
    #beta = 0 #1e-3
    #R = tf.nn.l2_loss(c1_weights) + tf.nn.l2_loss(c3_weights) + tf.nn.l2_loss(c5_weights)
    # tf.nn.sparse_softmax_cross_entropy_with_logits, no need 1-hot encoding
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(l1, tf_train_labels[:,1]) +\
    tf.nn.sparse_softmax_cross_entropy_with_logits(l2, tf_train_labels[:,2]) + \
    tf.nn.sparse_softmax_cross_entropy_with_logits(l3, tf_train_labels[:,3]) + \
    tf.nn.sparse_softmax_cross_entropy_with_logits(l4, tf_train_labels[:,4]) + \
    tf.nn.sparse_softmax_cross_entropy_with_logits(l5, tf_train_labels[:,5]))
    
    # Optimizer with exponential decay learning rate
    global_step = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(0.05, global_step, 100, 0.96)
    optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # Predictions for the training, validation, and test data
    train_prediction = tf.pack([tf.nn.softmax(l1), tf.nn.softmax(l2), tf.nn.softmax(l3),
                                tf.nn.softmax(l4), tf.nn.softmax(l5)])
    [vl1, vl2, vl3, vl4, vl5] = model(tf_valid_dataset)
    valid_prediction = tf.pack([tf.nn.softmax(vl1), tf.nn.softmax(vl2), tf.nn.softmax(vl3),
                                tf.nn.softmax(vl4), tf.nn.softmax(vl5)])
    [tl1, tl2, tl3, tl4, tl5] = model(tf_test_dataset)
    test_prediction = tf.pack([tf.nn.softmax(tl1), tf.nn.softmax(tl2), tf.nn.softmax(tl3),
                                tf.nn.softmax(tl4), tf.nn.softmax(tl5)])
    

('shape: ', [64, 1, 1, 64])
('hidden shape: ', [64, 64])
('shape: ', [2000, 1, 1, 64])
('hidden shape: ', [2000, 64])
('shape: ', [13068, 1, 1, 64])
('hidden shape: ', [13068, 64])


In [93]:
num_steps = 20000

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()

  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_Y.shape[0] - batch_size)
    batch_data = train_X[offset:(offset + batch_size), :, :, :]
    batch_labels = train_Y[offset:(offset + batch_size),:]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    
    if (step % 2000 == 0): 
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels[:,1:6]))
      print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_Y[:,1:6]))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_Y[:,1:6]))
  #save_path = saver.save(session, "SVHN_MODEL.ckpt")
  #print("Model saved in file: %s" % save_path)


Initialized
Minibatch loss at step 0: 20.761406
Minibatch accuracy: 22.5%
Validation accuracy: 70.6%
Minibatch loss at step 2000: 3.722440
Minibatch accuracy: 76.9%
Validation accuracy: 71.6%
Minibatch loss at step 4000: 3.419424
Minibatch accuracy: 77.8%
Validation accuracy: 71.6%
Minibatch loss at step 6000: 3.312357
Minibatch accuracy: 77.5%
Validation accuracy: 71.5%
Minibatch loss at step 8000: 3.126363
Minibatch accuracy: 79.7%
Validation accuracy: 71.4%
Minibatch loss at step 10000: 3.133566
Minibatch accuracy: 81.9%
Validation accuracy: 71.3%
Minibatch loss at step 12000: 3.787938
Minibatch accuracy: 75.6%
Validation accuracy: 71.4%
Minibatch loss at step 14000: 3.424648
Minibatch accuracy: 78.8%
Validation accuracy: 71.3%
Minibatch loss at step 16000: 2.987230
Minibatch accuracy: 80.0%
Validation accuracy: 71.4%
Minibatch loss at step 18000: 3.039293
Minibatch accuracy: 80.6%
Validation accuracy: 71.4%
Test accuracy: 82.9%
