In [1]:
#http://deeplearning.net/tutorial/logreg.html
import pickle,timeit
import numpy,theano,theano.tensor as T
from dlutil import LogisticRegression, load_data

In [2]:
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,dataset='mnist.pkl.gz',batch_size=600):
    
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches  = test_set_x.get_value(borrow=True).shape[0]  // batch_size

    print('... building the model')

    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # data
    y = T.ivector('y') # labels
    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # Each MNIST image has size 28*28
    cost = classifier.negative_log_likelihood(y)
    
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    i_from = index  * batch_size
    i_to   = i_from + batch_size
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[i_from: i_to],
            y: test_set_y[i_from: i_to]
        }
    )
    
    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[i_from: i_to],
            y: valid_set_y[i_from: i_to]
        }
    )

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[i_from: i_to],
            y: train_set_y[i_from: i_to]
        }
    )

    print('... training the model')
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is found
    improvement_threshold = 0.995  # a relative improvement of this much is considered significant
    validation_frequency = min(n_train_batches, patience / 2)
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    quit_flag = False
    while (epoch < n_epochs) and not quit_flag:
        epoch += 1
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_loss = numpy.mean([validate_model(i) for i in range(n_valid_batches)])
                if validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    best_validation_loss = validation_loss
                    test_losses = [test_model(i) for i in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)
                    print(('    epoch %i, minibatch %i/%i, test error of best model %f %%'
                        ) %(epoch,minibatch_index + 1,n_train_batches,test_score * 100.))
                    
                    with open('best_model.pkl', 'wb') as f:
                        pickle.dump(classifier, f) # save the best model

            if patience <= iter:
                quit_flag = True
                break
    processing_time = timeit.default_timer() - start_time
    print('Optimisation completed')
    print('Best validation score of %f %%, test performance %f %%' % (best_validation_loss * 100., test_score * 100.))
    print('%d epochs ran for %fs with %f epochs/sec' % (epoch, processing_time, 1. * epoch / processing_time))

In [3]:
def predict():
    classifier = pickle.load(open('best_model.pkl', 'rb'))
    predict_model = theano.function(inputs=[classifier.input], outputs=classifier.y_pred)

    dataset='mnist.pkl.gz'
    datasets = load_data(dataset)
    test_set_x, test_set_y = datasets[2]
    test_set_x = test_set_x.get_value()

    predicted_values = predict_model(test_set_x[:100][:100])
    print("Predicted values for the first 10 examples in test set:")
    print(predicted_values)

In [4]:
if __name__ == '__main__':
    sgd_optimization_mnist()
    #predict()

... loading data
... building the model
... training the model
    epoch 1, minibatch 83/83, test error of best model 12.375000 %
    epoch 2, minibatch 83/83, test error of best model 10.958333 %
    epoch 3, minibatch 83/83, test error of best model 10.312500 %
    epoch 4, minibatch 83/83, test error of best model 9.833333 %
    epoch 5, minibatch 83/83, test error of best model 9.479167 %
    epoch 6, minibatch 83/83, test error of best model 9.291667 %
    epoch 7, minibatch 83/83, test error of best model 9.000000 %
    epoch 8, minibatch 83/83, test error of best model 8.958333 %
    epoch 9, minibatch 83/83, test error of best model 8.812500 %
    epoch 10, minibatch 83/83, test error of best model 8.666667 %
    epoch 11, minibatch 83/83, test error of best model 8.520833 %
    epoch 12, minibatch 83/83, test error of best model 8.416667 %
    epoch 13, minibatch 83/83, test error of best model 8.291667 %
    epoch 14, minibatch 83/83, test error of best model 8.281250 %
    e