Neural Network
=============
Ref: https://rolisz.ro/2013/04/18/neural-networks-in-python/

Define activation functions and their derivatives:
----------------------------

In [1]:
import numpy as np
def tanh(x):
    return np.tanh(x)
def tanh_deriv(x):
    return 1.0-np.tanh(x)
def logistic(x):
    return 1/(1+np.exp(-x))
def logistic_derivative(x):
    return logistic(x)*(1-logistic(x))

We need to set the number of neurons in each layer, initialize their weights randomly between -0.25 and 0.25 and set the activation function to be used. Each layer, except the last one, will also have a bias unit which cor­re­sponds to the threshold value for the activation.

In [13]:
class NeuralNetwork:
    def __init__(self, layers, activation='tanh'):
        """
        :param layers: A list containing the number of units in each layer.
        Should be at least two values
        :param activation: The activation function to be used. Can be
        "logistic" or "tanh"
        """
        if activation == 'logistic':
            self.activation = logistic
            self.activation_deriv = logistic_derivative
        elif activation == 'tanh':
            self.activation = tanh
            self.activation_deriv = tanh_deriv

        self.weights = []
        for i in range(1, len(layers) - 1):
            self.weights.append((2*np.random.random((layers[i - 1] + 1, layers[i]+ 1))-1)*0.25)
        self.weights.append((2*np.random.random((layers[i] + 1, layers[i +1]))-1)*0.25)

    ##Training##
    '''
    Given a set of input vectors X and output values y, adjust the weights appropriately. The algorithm we will use 
    is called **stochastic gradient descent**, which chooses randomly a sample from the training data and does the 
    backpropagation for that sample, and this is repeated for a number of times (called epochs). We also have to set 
    the learning rate of the algorithm, which determines how big a change occurs in the weights each time 
    (proportionally to the errors).
    '''

    def fit(self, X, y, learning_rate=0.2, epochs=10000):
        X = np.atleast_2d(X)
        temp = np.ones([X.shape[0], X.shape[1]+1])
        temp[:, 0:-1] = X  # adding the bias unit to the input layer
        X = temp
        y = np.array(y)

        for k in range(epochs):
            i = np.random.randint(X.shape[0])
            a = [X[i]]

            for l in range(len(self.weights)):
                a.append(self.activation(np.dot(a[l], self.weights[l])))
            error = y[i] - a[-1]
            deltas = [error * self.activation_deriv(a[-1])]

            for l in range(len(a) - 2, 0, -1): # we need to begin at the second to last layer
                deltas.append(deltas[-1].dot(self.weights[l].T)*self.activation_deriv(a[l]))
            deltas.reverse()
            for i in range(len(self.weights)):
                layer = np.atleast_2d(a[i])
                delta = np.atleast_2d(deltas[i])
                self.weights[i] += learning_rate * layer.T.dot(delta)


    def predict(self, x):
        x = np.array(x)
        temp = np.ones(x.shape[0]+1)
        temp[0:-1] = x
        a = temp
        for l in range(0, len(self.weights)):
            a = self.activation(np.dot(a, self.weights[l]))
        return a


Testing
------
**XOR**

In [17]:
nn=NeuralNetwork([2,2,1],'tanh')
X=np.array([[0,0],[0,1],[1,0],[1,1]])
y=np.array([0,1,1,0])
nn.fit(X,y)
for i in X:
    print (i,nn.predict(i))

(array([0, 0]), array([ 0.06458202]))
(array([0, 1]), array([ 0.82248188]))
(array([1, 0]), array([ 0.81177321]))
(array([1, 1]), array([-0.31999041]))


**Digits dataset**

This has 1797 8x8 pixel images of digits with their labels. Lets see what accuracies can we get on them. We will have to transform the labels from values (such as 1 or 5), to vectors of 10 elements, which are all 0 except for the position corresponding to the label, which will be one.

In [25]:
import numpy as np
from sklearn.cross_validation import train_test_split 
from sklearn.datasets import load_digits
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelBinarizer
#from NeuralNetwork import NeuralNetwork

digits = load_digits()
X = digits.data
y = digits.target
X -= X.min() # normalize the values to bring them into the range 0-1
X /= X.max()

nn = NeuralNetwork([64,100,10],'logistic')
X_train, X_test, y_train, y_test = train_test_split(X, y)
labels_train = LabelBinarizer().fit_transform(y_train)
labels_test = LabelBinarizer().fit_transform(y_test)

nn.fit(X_train,labels_train,epochs=30000)
predictions = []
for i in range(X_test.shape[0]):
    o = nn.predict(X_test[i] )
    predictions.append(np.argmax(o))
print confusion_matrix(y_test,predictions)
print classification_report(y_test,predictions)


[[51  0  0  0  0  1  0  0  0  0]
 [ 0 44  0  0  0  2  0  0  0  1]
 [ 0  1 51  1  0  0  0  0  0  0]
 [ 0  1  0 42  0  1  0  1  0  0]
 [ 0  0  0  0 34  0  0  0  0  1]
 [ 0  0  0  0  0 41  0  0  0  3]
 [ 0  0  0  0  0  0 43  0  0  0]
 [ 0  0  0  0  0  0  0 41  0  0]
 [ 0  1  0  0  0  0  0  0 34  4]
 [ 0  0  0  0  0  1  0  0  0 50]]
             precision    recall  f1-score   support

          0       1.00      0.98      0.99        52
          1       0.94      0.94      0.94        47
          2       1.00      0.96      0.98        53
          3       0.98      0.93      0.95        45
          4       1.00      0.97      0.99        35
          5       0.89      0.93      0.91        44
          6       1.00      1.00      1.00        43
          7       0.98      1.00      0.99        41
          8       1.00      0.87      0.93        39
          9       0.85      0.98      0.91        51

avg / total       0.96      0.96      0.96       450

