In [1]:
import numpy as np
import math

Build the NN

In [2]:
class ANN(object):
    def __init__(self, layers = [2 , 3, 2], activations=['sigmoid', 'sigmoid'],weights=[],biases=[]):
        assert(len(layers) == len(activations)+1)
        self.layers = layers
        self.activations = activations
        self.weights = weights
        self.biases = biases

    
    def feedforward(self, x):
        # return the feedforward value for x, node_vlaue is the value holds on neurons; output_value is the output of neurons
        a = np.copy(x)
        node_value = []
        output_value = [a]
        for i in range(len(self.weights)):
            activation_function = self.getActivationFunction(self.activations[i])
            node_value.append(self.weights[i].dot(a) + self.biases[i])
            a = activation_function(node_value[-1])
            output_value.append(a)
        return (node_value, output_value)

    
    def backpropagation(self,y, node_value, output_value):
        dw = []  # dC/dW
        db = []  # dC/dB
        deltas = [None] * len(self.weights)  # delta = dC/dZ  known as error for each layer
        # insert the last layer error
        deltas[-1] = ((y-output_value[-1])*(self.getDerivitiveActivationFunction(self.activations[-1]))(node_value[-1]))
        # Perform BackPropagation
        for i in reversed(range(len(deltas)-1)):
            deltas[i] = self.weights[i+1].T.dot(deltas[i+1])*(self.getDerivitiveActivationFunction(self.activations[i])(node_value[i]))        
       
        batch_size = y.shape[1]
        db = [d.dot(np.ones((batch_size,1)))/float(batch_size) for d in deltas]
        dw = [d.dot(output_value[i].T)/float(batch_size) for i,d in enumerate(deltas)]
        # return the derivitives respect to weight matrix and biases
        return dw, db

    def train(self, x, y, batch_size=10, epochs=100, lr = 0.01):
# update weights and biases based on the output
         for e in range(epochs): 
            i=0
            while(i<len(y)):
                x_batch = x[i:i+batch_size].reshape(2,1)
                y_batch = y[i:i+batch_size].reshape(2,1)
                i = i+batch_size

                node_value, output_value = self.feedforward(x_batch)
                dw, db = self.backpropagation(y_batch, node_value, output_value)
                print("dw are {}:".format(dw))
                print("db are {}:".format(db))
                self.weights = [w+lr*dweight for w,dweight in  zip(self.weights, dw)]
                print("weights are {}:".format(self.weights))
                self.biases = [w+lr*dbias for w,dbias in  zip(self.biases, db)]
                print("biases are {}:".format(self.biases))
                print("loss = {}".format(np.linalg.norm(output_value[-1]-y_batch) ))
    @staticmethod
    def getActivationFunction(name):
        if(name == 'sigmoid'):
            return lambda x : np.exp(x)/(1+np.exp(x))
        elif(name == 'linear'):
            return lambda x : x
        elif(name == 'relu'):
            def relu(x):
                y = np.copy(x)
                y[y<0] = 0
                return y
            return relu
        else:
            print('Unknown activation function. linear is used')
            return lambda x: x
    
    @staticmethod
    def getDerivitiveActivationFunction(name):
        if(name == 'sigmoid'):
            sig = lambda x : np.exp(x)/(1+np.exp(x))
            return lambda x :sig(x)*(1-sig(x)) 
        elif(name == 'linear'):
            return lambda x: 1
        elif(name == 'relu'):
            def relu_diff(x):
                y = np.copy(x)
                y[y>=0] = 1
                y[y<0] = 0
                return y
            return relu_diff

test the ann by puttitng T1 into network

In [3]:
  w=[]
  b=[]
  w.append(np.array([[0.1,-0.2],[0,0.2],[0.3,-0.4]]))#w13,w23 w14,w24 w15,w25
  w.append(np.array([[-0.4,0.1,0.6],[0.2,-0.1,-0.2]]))#w36,w46,w56 ... 
  b.append(np.array([[0.1],[0.2],[0.5]]))# b3 b4 b5 
  b.append(np.array([[-0.1],[0.6]]))
  #manully setup the weights and biases 
  #use liner as activation function to verify the homework calculation
  #sigmoid it the more common option
  nn = ANN([2, 3, 2],activations=['linear', 'linear'],weights=w,biases=b)
  T1=np.array([[0.6],[0.1]])
  y1= np.array([[1],[0]])
  T2=np.array([[0.2],[0.3]])
  y2= np.array([[0],[1]])
  data=np.append(T1,T2).reshape(2,2)
  label =np.append(y1,y2).reshape(2,2)
  

In [4]:
node_value, output_value = nn.feedforward(T1)
print("node_value are {}:".format(node_value))
print("output_value are {}:".format(output_value))

node_value are [array([[0.14],
       [0.22],
       [0.64]]), array([[0.25 ],
       [0.478]])]:
output_value are [array([[0.6],
       [0.1]]), array([[0.14],
       [0.22],
       [0.64]]), array([[0.25 ],
       [0.478]])]:


In [5]:
nn.train(data, label, epochs=1, batch_size=1, lr = 0.1)   

dw are [array([[-0.23736, -0.03956],
       [ 0.07368,  0.01228],
       [ 0.32736,  0.05456]]), array([[ 0.105  ,  0.165  ,  0.48   ],
       [-0.06692, -0.10516, -0.30592]])]:
db are [array([[-0.3956],
       [ 0.1228],
       [ 0.5456]]), array([[ 0.75 ],
       [-0.478]])]:
weights are [array([[ 0.076264, -0.203956],
       [ 0.007368,  0.201228],
       [ 0.332736, -0.394544]]), array([[-0.3895  ,  0.1165  ,  0.648   ],
       [ 0.193308, -0.110516, -0.230592]])]:
biases are [array([[0.06044],
       [0.21228],
       [0.55456]]), array([[-0.025 ],
       [ 0.5522]])]:
loss = 0.8893728127169168
dw are [array([[ 0.04833572,  0.07250358],
       [-0.02068842, -0.03103262],
       [-0.06965349, -0.10448024]]), array([[-0.00474438, -0.08965523, -0.16442908],
       [ 0.00857623,  0.16206627,  0.29723205]])]:
db are [array([[ 0.24167859],
       [-0.10344208],
       [-0.34826746]]), array([[-0.32706324],
       [ 0.59121949]])]:
weights are [array([[ 0.08109757, -0.19670564],
       [