## A simple logistic regression example

Start the program

In [None]:
print "Starting program"

Define what kind of data we are loading and predicting.
Data can be:
1) random, in which case we define a random array of values
and output classes
2) mnist, in which case we use the mnist dataset
3) other, in which case we define the dataset we want to use

If data is mnist we will draw the numbers the program misclassified, if we limit the dataset to a 1000 examples or less.

In [None]:
# data can be random, or other (in particular if other is not specified it will be mnist)
data = "random"

if data == "mnist" : draw_images = true

### Start of random data setup

If data is random:

Import needed modules,

Define the number of input neurons (the features) and the number of examples,

Define the number of output classes

Create the data--
    for the data, define a tensor with two entries: 
    the first entry is a matrix of size N (number of examples) by feats (number of features) 
    of random numbers on a normal distribution around 0. 
    The second entry is a vector of size N (number of examples) of either 0 or 1 (the two classes).

Import needed modules

In [None]:
import numpy
import theano
import theano.tensor as T
rng = numpy.random

In [None]:
if data == "random":
    
    # Define the number of input neurons (the features) and the number of examples,
    # N = number of examples
    N = 200
    #feats = number of input neurons
    feats = 784 

    # Define the number of output classes
    num_classes = 10
        
    # Create the data
    D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=num_classes)) 
    train_set_x = D[0] 
    train_set_y = D[1]

For the data, define a tensor with two entries: the first entry is a matrix of size N (number of examples) by feats (number of features) of random numbers on a normal distribution around 0. The second entry is a vector of size N (number of examples) of either 0 or 1 (the two classes).

### End of random data setup

### Start of mnist or other data setup

Else, import the needed modules and load the data.

Split the data in input and output (the target results).

In [None]:
if data != "random":

    # Import needed modules
    import numpy
    import theano
    import theano.tensor as T
    rng = numpy.random
    import os
    import gzip
    import cPickle

    print "Looking for mnist data"

    #############
    # LOAD DATA #
    #############
    if data == "mnist":
        dataset = 'mnist.pkl.gz' 

    # Download the MNIST dataset if it is not present
    data_dir, data_file = os.path.split(dataset) 

    # If the file does not exist and there was no data_dir specified
    if not os.path.isfile(dataset) and data_dir == "":
        print "File %s not found" %dataset
        print "Looking in data directory"
        # Check if dataset is in the data directory.
        new_path = os.path.join(
            os.getcwd(),
            "Python",
            "data",
            "mnist",
            dataset
        )
        dataset = new_path

    # If the dataset had a data_dir or we added the standard
    # data directory, look for the file there
    if not os.path.isfile(dataset):
        print "File %s not found" %dataset
        print "Downloading mnist file from web"
        # If the file is still not found, 
        # we download a copy
        import urllib
        origin = (
            'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        )
        print 'Downloading data from %s' % origin
        urllib.urlretrieve(origin, dataset)
    else:
        # If we find the file, we notify the user
        print "File %s found" %dataset

    print '... loading data'
    # Load the dataset
    f = gzip.open(dataset, 'rb')
    train_set, valid_set, test_set = cPickle.load(f)
    f.close()
    print '... data loaded'

    #train_set, valid_set, test_set format: tuple(input, target)
    #input is an numpy.ndarray of 2 dimensions (a matrix)
    #witch row's correspond to an example. target is a
    #numpy.ndarray of 1 dimensions (vector)) that have the same length as
    #the number of rows in the input. It should give the target
    #target to the example with the same index in the input.
    
    # If we don't want to load all the data,
    # we can just load a subset with a small
    # number of examples.
    # Set it to zero or a negative number
    # if you want to load the whole data set
    training_examples = 100
    validation_examples = 20
    testing_examples = 20
    
    # This is whether you want to always
    # load the same examples or randomise
    # the subset
    random_examples = True
    
    if(random_examples):
        start_training   = numpy.random.randint(50000 - training_examples)
        start_validation = numpy.random.randint(10000 - validation_examples)
        start_testing    = numpy.random.randint(10000 - testing_examples)
    else:
        start_training   = 0
        start_validation = 0
        start_testing    = 0

    if(training_examples <= 0):
        train_set_x, train_set_y = train_set
    else:
        train_set_x = train_set[0][start_training:training_examples + start_training]
        train_set_y = train_set[1][start_training:training_examples + start_training]
        
    if(validation_examples <=0):
        valid_set_x, valid_set_y = valid_set
    else:
        valid_set_x = valid_set[0][start_validation:validation_examples + start_validation]
        valid_set_y = valid_set[1][start_validation:validation_examples + start_validation]
        
    if(testing_examples <= 0):
        test_set_x,  test_set_y  = test_set
    else:
        test_set_x  = test_set [0][start_testing:testing_examples + start_testing]
        test_set_y  = test_set [1][start_testing:testing_examples + start_testing]

### End of mnist or other data setup

Assign names to the different sets of data

In [None]:
Train = (train_set_x, train_set_y)

if data == "mnist":
    Valid = (valid_set_x, valid_set_y)
    Test =  (test_set_x,  test_set_y)

Define training rate and number of training steps.
The training rate defines how fast we move towards
minimising the error (too large a training rate can make
the algorithm overshoot the minimum and never achieve stability).
The number of training steps define after how many steps we 
stop training our network.

In [None]:
#training rate
tr_rate = 0.1

training_steps = 10000

Declare the symbolic variables.
x will represent the input, i.e. a matrix of random numbers of size feats for each example (this is the D[0] entry defined above.
y will represent the output, i.e. whether the example belongs to class 0 or class 1

In [None]:
feats = Train[0].shape[1]
num_classes = numpy.amax(train_set_y)-numpy.amin(train_set_y)+1

# Declare Theano symbolic variables
x = T.matrix("x")
if(num_classes< 3):
    y = T.vector("y")
else:
    y = T.ivector("y")

Define the vector of weights and the bias.
There are num_classes weights associated to each feature, where the output is num_classes neurons.
There are num_classes biases since there are num_classes output neurons.
The weights are randomly initialised, the biases can be initialised to 0.0 or a small value.

In [None]:
if(num_classes < 3):
    w = theano.shared(rng.randn(feats), name="w")
    b = theano.shared(0.01, name="b")
else:
    w = theano.shared(rng.randn(feats, num_classes), name="w")
    b = theano.shared(numpy.full(num_classes, 0.01), name="b")

Optional printing of the initial model weights and bias

In [None]:
print("Initial model:")
print(w.get_value())
print(b.get_value())

Constructing the actual solution. 
Sigma represent the sigmoid 
<br>
$$ \frac{1}{1+exp(-\bf{x}\dot\bf{w}-\bf{b}) } $$ 
<br>

that is expressed in theano as T.nnet.sigmoid(). 
For a multi-class classification, sigma will be represented by a vector 

$$ \sigma_{1}, \dots, \sigma_{j}, \dots, \sigma_{num\_classes} $$ where

<br>
$$ \sigma_{j} = \frac{exp(\bf{x}\dot\bf{w_{j}}+\bf{b})}{\sum_{i=1}^{num\_classes}exp(\bf{x}\dot\bf{w_{i}}+\bf{b}) } $$
<br>

and the theano function representing it is called T.nnet.softmax().

For a 2-class classification, it is enough to check whether sigma is greater than 0.5, otherwise we take 
The prediction can either be 0 or 1 (the two classes) depending on whether the sigmoid is greater or less than 0.5.
The cost function is defined by 

<br>
$$ 
error({\bf w}) = -\frac{1}{N} \sum_{i=1}^{N} [ y^i \ln (\sigma({\bf{x^i}})) + (1-y^i) \ln (1 - \sigma({\bf{x^i}})] 
$$
<br>

where the superscript represents the $i^{th}$ example.
For a multi-class classification, the cost function is modified to be:

<br>
$$ 
error({\bf w}) = -\frac{1}{N} \sum_{i=1}^{N} \sum_{j=1}^{num\_classes} \{ y_{j}=j\} [ y^i \ln (\sigma_{j}({\bf{x^i}}))] 
$$
<br>

which in theano is defined by T.nnet.softmax(). The prediction will be the output class with thehighest value, i.e. the argmax of the output sigma.
The cost adds a value to reduce the possibility of overfitting by keeping larger weights in check.
Finally, theano will calculate the gradient of the cost function that is used for approximating the solution using linear descent.



In [None]:
# Construct Theano expression graph
#sigma = 1 / (1 + T.exp(-T.dot(x, w) - b))        # Probability that target = 1
if(num_classes < 3):
    sigma = T.nnet.sigmoid(T.dot(x,w) + b)        # The prediction thresholded
    prediction = sigma > 0.5 
    print "Using two classes"
else:
    sigma = T.nnet.softmax(T.dot(x,w) + b) 
    prediction = T.argmax(sigma, axis=1)          # The class with highest probability
    print "Using %i classes"%num_classes
 
# Cross-entropy loss function
if( num_classes < 3):
    xent = -y * T.log(sigma) - (1-y) * T.log(1-sigma) 
else:
    xent = -T.mean(T.log(sigma)[T.arange(y.shape[0]), y])
    
cost = xent.mean() + 0.01 * (w ** 2).sum()        # Regularisation 
gw, gb = T.grad(cost, [w, b])                     # Compute the gradient of the cost         

We create the theano function.
The input is given by the set of features per each example.
The output is given by the class per each example.
The training is performed by updating weights and biases using the gradient calculated times a training rate (in order to avoid overshooting the minimum value).

In [None]:
# Compile
train = theano.function(
          inputs=[x,y],
          outputs=[xent],
          updates=((w, w - tr_rate * gw), (b, b - tr_rate * gb)),
          allow_input_downcast=True)
predict = theano.function(inputs=[x], outputs=prediction)

Perform the actual training on the data. 
This updates at each step the weighs and bias making the neural net perform better and get closer to the target solution.

In [None]:
# Train
for i in range(training_steps):
    train(Train[0], Train[1])

Optional printing of the final model weights and bias

In [None]:
print("Final model:")
print(w.get_value())
print(b.get_value())

Printing of the target values (the classes) and the prediction by our model.

In [None]:
print("target values:")
print(Train[1])
print("prediction:")
print(predict(Train[0]))

if data == "mnist":
    print("validation values:")
    print(Valid[1])
    print("prediction:")
    print(predict(Valid[0]))

    print("test values:")
    print(Test[1])
    print("prediction:")
    print(predict(Test[0]))

Calculate the errors, i.e. the numbers of examples in the training set that have not been classified correctly and output the accuracy result.

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.image as img
import matplotlib.pyplot as plt
import matplotlib.cm as cm

matplotlib.rcParams['figure.max_open_warning'] = 0

print "Results for Training set"

N = Train[1].shape[0]
error = 0

if N > 1000 : draw_images = False
result = predict(Train[0])
for index in range(N):
    
    act =  numpy.asscalar(Train[1][index])
    prd =  numpy.asscalar(result[index])
    if act != prd:
        error += 1
        print "Predicted %i, actual value %i" %(prd, act) 
        
        if draw_images :
            tmp = numpy.reshape(Train[0][index], [28, 28])
            plt.figure()
            plt.imshow(tmp, cmap = cm.Greys_r)
        
        
correct_guesses = N - error
accuracyTr = (N - error)*100./N

print
print "correct predictions on training data = %i over %i examples" % (correct_guesses, N)
print "accuracy on training data = %f%%" % accuracyTr
print

Calculate the errors, i.e. the numbers of examples in the validation set that have not been classified correctly and output the accuracy result

In [None]:
if data == "mnist":
    print "Results for Validation set"
    
    N = Valid[1].shape[0]  
    error = 0
    result = predict(Valid[0])
    
    for index in range(N):

        act =  numpy.asscalar(Valid[1][index])
        prd =  numpy.asscalar(result[index])
        if act != prd:
            error += 1
            print "Predicted %i, actual value %i" %(prd, act) 
            if draw_images :
                tmp = numpy.reshape(Valid[0][index], [28, 28])
                plt.figure()
                plt.imshow(tmp, cmap = cm.Greys_r)


    correct_guesses = N - error
    accuracyV = (N - error)*100./N

    print
    print "correct predictions on validation data = %i over %i examples" % (correct_guesses, N)
    print "accuracy on validation data = %f%%" % accuracyV
    print

Calculate the errors, i.e. the numbers of examples in the testing set that have not been classified correctly and output the accuracy result.

In [None]:
if data == "mnist":
    print "Results for Testing set"
        
    N = Test[1].shape[0] 
    error = 0
    result = predict(Test[0])

    for index in range(N):

        act =  numpy.asscalar(Test[1][index])
        prd =  numpy.asscalar(result[index])
        if act != prd:
            error += 1
            print "Predicted %i, actual value %i" %(prd, act) 
            if draw_images :
                tmp = numpy.reshape(Test[0][index], [28, 28])
                plt.figure()
                plt.imshow(tmp, cmap = cm.Greys_r)
    
    correct_guesses = N - error
    accuracyTs = (N - error)*100./N

    print
    print "correct predictions on testing data = %i over %i examples" % (correct_guesses, N)
    print "accuracy on testing data = %f%%" % accuracyTs
    print


In [None]:
print
print "accuracy on training data = %f%%" % accuracyTr
print

if data == "mnist" :
    print
    print "accuracy on validation data = %f%%" % accuracyV
    print

    print
    print "accuracy on testing data = %f%%" % accuracyTs
    print