In [1]:
import tensorflow as tf
import numpy as np
import cPickle as pickle
import math
import os
import gzip

## Loading Dataset

Adapted from [Classifying MNIST digits using Logistic Regression](http://deeplearning.net/tutorial/logreg.html)

In [2]:
def load_data():
    dataset = "mnist.pkl.gz"
    if ( not os.path.isfile(dataset) ):
        print "Download from {}".format("http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz")
        return None
    else:
        print "Loading data..."

        # Load the dataset
        with gzip.open(dataset, 'rb') as f:
            try:
                train, valid, test = pickle.load(f, encoding='latin1')
            except:
                train, valid, test = pickle.load(f)
                
        ''' Reformat '''
        train = ( train[0], reformat( train[1] ) )
        test = ( test[0], reformat( test[1] ) )
        valid = ( valid[0], reformat( valid[1] ) )
        
        print 'Training set', train[0].shape, train[1].shape
        print 'Validation set', valid[0].shape, valid[1].shape
        print 'Test set', test[0].shape, test[1].shape
        return train, test, valid
    
def reformat( vec ):
    ''' Convert vector into a one-hot vector '''
    return ( np.arange( max( vec ) + 1 ) == vec[ :,None ] ).astype( np.float32 )

## Deep Neural Network implementation

In [3]:
class DeepNeuralNetwork:
    def __init__( self, n_in, n_out, test, valid, hidden_layers, activation=tf.nn.sigmoid, batch_size=128, learning_rate=0.01 ):
        self.graph = tf.Graph()
        with self.graph.as_default():

            ''' Training dataset, given in mini-batches '''
            self.tf_train = ( tf.placeholder( tf.float32, shape=(batch_size, n_in ) ), tf.placeholder( tf.float32, shape=(batch_size, n_out ) ) )

            ''' Validation dataset '''
            tf_valid = ( tf.cast( tf.constant( valid[0] ), tf.float32 ), tf.cast( tf.constant( valid[1] ), tf.float32 ) )

            ''' Testing dataset '''
            tf_test = ( tf.cast( tf.constant( test[0] ), tf.float32 ), tf.cast( tf.constant( test[1] ), tf.float32 ) )

            ''' Model '''
            self.weights = [] #Weights list
            self.bias = [] #Bias list

            ''' L2 Regularization to avoid overfitting '''
            self.l2_reg = 0.

            '''Inputs'''
            train_input = self.tf_train[0]
            valid_input = tf_valid[0]
            test_input = tf_test[0]

            layer_in = n_in #number of incoming connections to the layer
            ''' Add hidden layers '''
            for layer_out, hdf in hidden_layers:
                train_input = self._add_layer( train_input, layer_in, layer_out, activation=activation, dropout=hdf, l2_reg=True )
                valid_input = self._add_layer( valid_input, layer_in, layer_out, activation=activation, weights=self.weights[-1], bias=self.bias[-1] )
                test_input = self._add_layer( test_input, layer_in, layer_out, activation=activation, weights=self.weights[-1], bias=self.bias[-1] )
                ''' Number of input connections to next layer is the number of output connections of the current layer '''
                layer_in = layer_out
                
            ''' Output layers '''
            train_logits = self._add_layer( train_input, layer_in, n_out )
            valid_logits = self._add_layer( valid_input, layer_in, n_out, weights=self.weights[-1], bias=self.bias[-1] )
            test_logits = self._add_layer( test_input, layer_in, n_out, weights=self.weights[-1], bias=self.bias[-1] )

            ''' Cross-Entropy Cost function '''
            self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(train_logits, self.tf_train[1])) + 0.0001 * self.l2_reg

            ''' Adagrad '''
            self.optimizer = tf.train.AdagradOptimizer( learning_rate ).minimize( self.cost )

            ''' Prediction functions '''
            self.train_pred = tf.nn.softmax( train_logits )
            self.valid_pred = tf.nn.softmax( valid_logits )
            self.test_pred = tf.nn.softmax( test_logits )

    def _add_layer( self, input, n_in, n_out, activation=None, weights=None, bias=None, dropout=None, l2_reg=False ):
        if( weights is None ):
            ''' Xavier init '''
            init_range = math.sqrt(6.0 / (n_in + n_out))
            init_w = tf.random_uniform( [n_in,n_out], -init_range, init_range)
            weights = tf.cast( tf.Variable( init_w ), tf.float32 )
            self.weights.append( weights )

        if( bias is None ):
            bias = tf.cast( tf.Variable( tf.zeros( [ n_out ] ) ), tf.float32 )
            self.bias.append( bias )

        if( l2_reg ):
            ''' L2 regularization '''
            l2_reg = tf.nn.l2_loss( weights )
            self.l2_reg += l2_reg

        layer = tf.matmul( input, weights ) + bias
        if( activation is not None ):
            layer = activation( layer )

        if( dropout is not None ):
            ''' Dropout + scaling '''
            layer = tf.nn.dropout( layer, 1-dropout ) * 1/( 1- dropout )

        return layer

Function to compute accuracy given the prediction and the actual target values. `pred` is assumed to be in one-hot vector

In [4]:
def accuracy( pred, labels ):
    return ( 100.0 * np.sum( np.argmax( pred, 1 ) == np.argmax( labels, 1 ) ) / pred.shape[0] )

Main function. Load dataset, create DNN model and perform training over minibatches. Validate the model every 100 epochs and finally test the model

In [5]:
if __name__ == '__main__':
    ''' Dataset '''
    train,valid,test = load_data()
    train_X = train[0]
    train_Y = train[1]

    ''' Params '''
    n_epochs = 5000 #Number of epochs
    batch_size = 128 #Batch size
    learning_rate = 0.01 #Learning rate
    hidden_layers = [ ( 1024, 0.5 ), ( 1024, 0.5 ) ] #Number of hidden neurons and corresponding dropout factor
    n_in = train[0].shape[1] #Number of input neurons
    n_out = train[1].shape[1] #Number of ouptut neurons - number of classes

    ''' Model '''
    dnn = DeepNeuralNetwork( n_in, n_out, test, valid, hidden_layers, tf.nn.relu, batch_size, learning_rate )

    with tf.Session( graph = dnn.graph ) as session:
        ''' Initialize TensorFlow variables '''
        tf.initialize_all_variables().run()
        for ep in range( n_epochs ):
            ''' Mini-batching '''
            offset = (ep * batch_size) % (train_Y.shape[0] - batch_size)
            batch_X = train_X[ offset:(offset + batch_size) ]
            batch_Y = train_Y[ offset:(offset + batch_size) ]

            ''' Input to placeholders '''
            feed_dict = { dnn.tf_train[0]:batch_X, dnn.tf_train[1]:batch_Y }

            ''' Train step '''
            _, cost, train_pred = session.run( [ dnn.optimizer, dnn.cost, dnn.train_pred ], feed_dict=feed_dict )

            if( ep % 100 == 0 ):
                print "Cost at {} - {}".format( ep, cost )
                print "Training accuracy : {}".format( accuracy( train_pred, batch_Y ) )
                print "Validation accuracy : {}".format( accuracy( dnn.valid_pred.eval(), valid[1] ) )

        ''' Testing '''
        print "Test accuracy : {}".format( accuracy( dnn.test_pred.eval(), test[1] ) )

Loading data...
Training set (50000, 784) (50000, 10)
Validation set (10000, 784) (10000, 10)
Test set (10000, 784) (10000, 10)
Cost at 0 - 3.41609025002
Training accuracy : 9.375
Validation accuracy : 24.96
Cost at 100 - 0.543551087379
Training accuracy : 84.375
Validation accuracy : 89.68
Cost at 200 - 0.432214826345
Training accuracy : 88.28125
Validation accuracy : 91.99
Cost at 300 - 0.360873788595
Training accuracy : 93.75
Validation accuracy : 92.73
Cost at 400 - 0.447462260723
Training accuracy : 88.28125
Validation accuracy : 93.4
Cost at 500 - 0.427373856306
Training accuracy : 86.71875
Validation accuracy : 94.07
Cost at 600 - 0.277434289455
Training accuracy : 93.75
Validation accuracy : 94.51
Cost at 700 - 0.335170388222
Training accuracy : 93.75
Validation accuracy : 94.65
Cost at 800 - 0.401612192392
Training accuracy : 92.1875
Validation accuracy : 94.75
Cost at 900 - 0.25052523613
Training accuracy : 96.09375
Validation accuracy : 95.25
Cost at 1000 - 0.380655646324
Tr