In [10]:
%reset -f
import tensorflow as tf
import matplotlib.pylab as plt
import numpy as np
import pdb
from time import time
%matplotlib inline

In [11]:
from tensorflow.contrib.layers.python.layers import fully_connected, convolution2d, max_pool2d, flatten

Why doesn't the network perform as well as the examples online? One reason could be the we perform no image preprocessing, like stretching, whitening, rotating and flipping. Another could be that we are implementing the layer operations manually and are not using existing libraries. Let's see if that makes a difference.

We define the Chunk and CIPHAR classes just as before.

In [12]:
class Chunk():
    def __init__(self,chunk_filename):
        self.chunk = self.unpickle(chunk_filename)
        self.images = np.array([self.format_image(image) for image in self.chunk['data']])
        self.labels = np.array([self.one_hot(label) for label in self.chunk['labels']])
        self.idx = 0
        
    def one_hot(self,key):
        vector = np.zeros(10, dtype=int)
        vector[key] = int(1)
        return vector
                       
        
    def unpickle(self,chunk):
        import cPickle
        fo = open(chunk, 'rb')
        dictionary = cPickle.load(fo)
        fo.close()
        return dictionary
    def format_image(self,vector):
        return vector.reshape(-1,32,32).transpose(1,2,0)
    
    def next_batch(self,number):
        if self.idx < len(self.labels):
            batch_images = self.images[self.idx : self.idx + number]
            batch_labels = self.labels[self.idx : self.idx + number]
            self.idx += number
            return batch_images, batch_labels
        else:
            raise Exception('Reached end of chunk')
            
            

class CIPHAR10():
    def __init__(self,test_filepath,train_filepaths):
        self.test_data = self.load_data(test_filepath)
        self.train_data = [self.load_data(chunk) for chunk in train_filepaths]
        
    def load_data(self,filepath):
        return Chunk(filepath)

train_filepaths = ['./data/cifar-10-batches-py/data_batch_{}'.format(i) for i in range(1,6)]
test_filepaths = './data/cifar-10-batches-py/test_batch'

In [13]:
def lrn(x):
    return tf.nn.lrn(x, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

Now however we are using layer from the library `tensorflow.contrib.layers.python.layers`. These are reliable implementations that have been communtity tested.

In [34]:
tf.reset_default_graph() 
X = tf.placeholder(tf.float32, shape = [None, 32, 32, 3])
Y = tf.placeholder(tf.float32, shape = [None, 10])
dropout_prob = tf.placeholder(tf.float32)

conv1 = convolution2d(X, 128, [3,3])
pool1 = max_pool2d(conv1, (3,3), (2,2), padding='SAME')
lrn1 = lrn(pool1)
lrn1 = tf.nn.dropout(lrn1, dropout_prob)
conv2 = convolution2d(pool1, 256, [3,3])
pool2 = max_pool2d(conv2, (3,3), (2,2), padding='SAME')
lrn2 = lrn(pool2)
lrn2 = tf.nn.dropout(lrn2, dropout_prob)
conv3 = convolution2d(lrn2, 512, [3,3])
pool3 = max_pool2d(conv3, (3,3), (2,2), padding='SAME')
lrn3 = lrn(pool3)
lrn3 = tf.nn.dropout(lrn3, dropout_prob)


flat1 = flatten(lrn3)
fc1 = fully_connected(flat1, 2000)
fc1 = tf.nn.dropout(fc1, dropout_prob)
fc2 = fully_connected(fc1, 1000)
fc2 = tf.nn.dropout(fc2, dropout_prob)


y = fully_connected(fc2, 10, None)

In [35]:
cost = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(y,Y))
optimiser = tf.train.AdamOptimizer(0.0005).minimize(cost)
results = tf.cast(tf.equal(tf.arg_max(y,1), tf.arg_max(Y,1)), tf.float32)
count = tf.reduce_sum(results)
accuracy = tf.reduce_mean(results)
init = tf.initialize_all_variables()

batch_size = 500
n_batches = 20
epochs = 50
n_random_select = 200
test_batch_size = 10

In [36]:
with tf.Session() as sess:
    sess.run(init)
    ciphar10 = CIPHAR10(test_filepaths,train_filepaths)

    for ep in range(epochs):
        t = time()
        for chunk in ciphar10.train_data:
            for b in range(n_batches):
                b_images, b_labels = chunk.next_batch(batch_size)
                _, c = sess.run([optimiser, cost], feed_dict={X:b_images, Y:b_labels, dropout_prob:0.5})
            chunk.idx = 0
        epoch_time = time() - t
        print "processed epoch {} in {}s, at {} images/s".format(ep, epoch_time, 50000 / epoch_time)
        
#         if ep % 10 == 0:
        total_count = 0
        for i in range(1000):
            t_batch = ciphar10.test_data.images[test_batch_size*i:test_batch_size*i + test_batch_size]
            t_labels = ciphar10.test_data.labels[test_batch_size*i:test_batch_size*i + test_batch_size]
            cst, cnt, acc = sess.run([cost,count,accuracy], feed_dict={X: t_batch, Y: t_labels, dropout_prob:1})
            total_count += cnt
        print "Loss: {}. Accuracy on total test set: {}".format(cst,total_count/100)
            

#         if ep % 10 == 0:


processed epoch 0 in 23.3828620911s, at 2138.31821807 images/s
Loss: 21.570230484. Accuracy on total test set: 33.27
processed epoch 1 in 22.395179987s, at 2232.62327113 images/s
Loss: 17.7248802185. Accuracy on total test set: 37.49
processed epoch 2 in 22.3864250183s, at 2233.49641397 images/s
Loss: 16.4685516357. Accuracy on total test set: 43.65
processed epoch 3 in 22.3926529884s, at 2232.87522143 images/s
Loss: 16.5044059753. Accuracy on total test set: 47.23
processed epoch 4 in 22.3886089325s, at 2233.27854583 images/s
Loss: 15.5775403976. Accuracy on total test set: 52.84
processed epoch 5 in 22.411823988s, at 2230.96522741 images/s
Loss: 13.1671161652. Accuracy on total test set: 53.5
processed epoch 6 in 22.4050939083s, at 2231.63536848 images/s
Loss: 13.0299911499. Accuracy on total test set: 58.05
processed epoch 7 in 22.4167859554s, at 2230.47140207 images/s
Loss: 12.226805687. Accuracy on total test set: 62.5
processed epoch 8 in 22.4067959785s, at 2231.46584848 images/s

In [None]:
sess.close()

We see that we reach a maximum of 80% and then starts overfitting, even with dropout layers inbetween each compute layers. Not quite as high as the 86% that is known to be possible with these sorts of networks. These sorts of results can be achieved with clever image preprocessing.