In [1]:
import numpy as np
import scipy

In [2]:
class Neural_Network(object):
    def __init__(self, Lambda=0):        
        #Define Hyperparameters
        self.inputLayerSize = 2
        self.outputLayerSize = 1
        self.hiddenLayerSize = 3
        
        #Weights (parameters)
        self.W1 = np.random.randn(self.inputLayerSize,self.hiddenLayerSize)
        self.W2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
        
        #Regularization Parameter:
        self.Lambda = Lambda
        
    def forward(self, X):
        #Propogate inputs though network
        self.z2 = np.dot(X, self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.W2)
        yHat = self.sigmoid(self.z3) 
        return yHat
        
    def sigmoid(self, z):
        #Apply sigmoid activation function to scalar, vector, or matrix
        return 1/(1+np.exp(-z))
    
    def sigmoidPrime(self,z):
        #Gradient of sigmoid
        return np.exp(-z)/((1+np.exp(-z))**2)
    
    def costFunction(self, X, y):
        #Compute cost for given X,y, use weights already stored in class.
        self.yHat = self.forward(X)
        J = 0.5*sum((y-self.yHat)**2)/X.shape[0] + (self.Lambda/2)*(sum(self.W1**2)+sum(self.W2**2))
        return J
        
    def costFunctionPrime(self, X, y):
        #Compute derivative with respect to W and W2 for a given X and y:
        self.yHat = self.forward(X)
        
        delta3 = np.multiply(-(y-self.yHat), self.sigmoidPrime(self.z3))
        #Add gradient of regularization term:
        dJdW2 = np.dot(self.a2.T, delta3)/X.shape[0] + self.Lambda*self.W2
        
        delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
        #Add gradient of regularization term:
        dJdW1 = np.dot(X.T, delta2)/X.shape[0] + self.Lambda*self.W1
        
        return dJdW1, dJdW2
    
    #Helper functions for interacting with other methods/classes
    def getParams(self):
        #Get W1 and W2 Rolled into vector:
        params = np.concatenate((self.W1.ravel(), self.W2.ravel()))
        return params
    
    def setParams(self, params):
        #Set W1 and W2 using single parameter vector:
        W1_start = 0
        W1_end = self.hiddenLayerSize*self.inputLayerSize
        self.W1 = np.reshape(params[W1_start:W1_end], \
                             (self.inputLayerSize, self.hiddenLayerSize))
        W2_end = W1_end + self.hiddenLayerSize*self.outputLayerSize
        self.W2 = np.reshape(params[W1_end:W2_end], \
                             (self.hiddenLayerSize, self.outputLayerSize))
        
    def computeGradients(self, X, y):
        dJdW1, dJdW2 = self.costFunctionPrime(X, y)
        return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))

In [4]:
x = np.array(([1,1],[3,4],[5,7]), dtype = float)
y = np.array(([2],[6],[8]), dtype = float)

In [7]:
x = np.multiply(x, 0.1)
y = np.multiply(y,0.01)

In [48]:
print x

[[ 0.01  0.01]
 [ 0.03  0.04]
 [ 0.05  0.07]]


In [70]:
print y*100

[[ 2.]
 [ 6.]
 [ 8.]]


In [5]:
NN = Neural_Network()

In [78]:
NN.forward(x)*100

array([[ 50.90821177],
       [ 50.00001469],
       [ 50.        ]])

In [19]:
NN.costFunction(x,y)

array([ 0.3310407,  0.3310407,  0.3310407])

In [35]:
dx, dy = NN.costFunctionPrime(x,y)

In [23]:
print dx, dy

[[ 0.00076926  0.00129997  0.00052349]
 [ 0.00102348  0.00172955  0.00069648]] [[ 0.04660858]
 [ 0.04744152]
 [ 0.04658436]]


In [36]:
NN.W1 = NN.W1 - 10* dx
NN.w2 = NN.W2 - 10* dy

In [77]:
for i in range(0,10000):
    dx, dy = NN.costFunctionPrime(x,y)
    NN.W1 = NN.W1 - 100* dx
    NN.w2 = NN.W2 - 100* dy