# **Chain Rule**

# **Import the relevant packages and create feed_forward and mean squared_error**

In [1]:
from copy import deepcopy
import numpy as np
def line():
    print('='*80)
def feed_forward(inputs,outputs,weights):
    pre_hidden = np.dot(inputs,weights[0])+weights[1]
    hidden = 1/1+np.exp(-pre_hidden)
    out = np.dot(hidden,weights[2])+weights[3]
    mean_squared_error = np.mean(np.square(out-outputs))
    return mean_squared_error

# **Updating weights**

In [2]:
def update_weights(inputs, outputs, weights, lr):
    original_weights = deepcopy(weights)
    temp_weights = deepcopy(weights)
    updated_weights = deepcopy(weights)
    original_loss = feed_forward(inputs, outputs, original_weights)
    for i, layer in enumerate(original_weights):
        for index, weight in np.ndenumerate(layer):
            temp_weights = deepcopy(weights)
            temp_weights[i][index] += 0.0001
            _loss_plus = feed_forward(inputs, outputs, temp_weights)
            grad = (_loss_plus - original_loss)/(0.0001)
            updated_weights[i][index] -= grad*lr
    return updated_weights

# **DataSet**

In [3]:
x = np.array([[1,1]]); y = np.array([[0]]) 

# **Initialize the weight and bias values randomly.**

The hidden layer has three units in it and each input node is connected to each of the hidden layer units. Hence, there are a total of six weight values and three bias values – one bias and two weights (two weights coming from two input nodes) corresponding to each of the hidden units. Additionally, the final layer has one unit that is connected to the three units of the hidden layer. Hence, a total of three weights and one bias dictate the value of the output layer. The randomly initialized weights are as follows:

In [4]:
W = [
    np.array([[-0.0053, 0.3793], 
              [-0.5820, -0.5204],
              [-0.2723, 0.1896]], dtype=np.float32).T, 
    np.array([-0.0140, 0.5607, -0.0628], dtype=np.float32), 
    np.array([[ 0.1528,-0.1745,-0.1135]],dtype=np.float32).T, 
    np.array([-0.5516], dtype=np.float32)
]

In [6]:
line()
print('loss'.upper())
print(feed_forward(x,y,W))
line()

print('Weights:'.upper())
[print(w) for w in W]
line()

print("updating weights".upper())
for epx in range(1):
    updated_weights=update_weights(x,y,W,1)
[print(w) for w in updated_weights]


LOSS
1.0229782093232396
WEIGHTS:
[[-0.0053 -0.582  -0.2723]
 [ 0.3793 -0.5204  0.1896]]
[-0.014   0.5607 -0.0628]
[[ 0.1528]
 [-0.1745]
 [-0.1135]]
[-0.5516]
UPDATING WEIGHTS
[[-0.22093554  0.02481993 -0.0067985 ]
 [ 0.16369256  0.08641996  0.45514107]]
[-0.22963554  1.1675199   0.20274107]
[[3.5867105]
 [5.324828 ]
 [4.248618 ]]
[1.4714833]


[None, None, None, None]

# **Chain rule**

Calculating the updated weight value using chain rule...

In [7]:
pre_hidden = np.dot(x,W[0])+W[1]
hidden = 1/(1+np.exp(-pre_hidden))
predicted_value = np.dot(hidden, W[2]) + W[3]

# **Apply this formula...**

-2*(y-y^) * w21 *a11 *(1-a11) * x1

In [10]:
tmp = W[0][0][0] -(-2*(0-(predicted_value[0][0]))*(W[2][0][0])*hidden[0,0]*(1-hidden[0,0])*x[0][0])
print(tmp, updated_weights[0][0][0])

0.03748860333147175 -0.22093554


In [12]:

tmp = W[0][0][1] - (-2*(0-(predicted_value[0][0]))*(W[2][1][0])*hidden[0,1]*(1-hidden[0,1])*x[0][0])
print(tmp, updated_weights[0][0][1])

-0.6289373468565382 0.024819935


In [13]:
tmp = W[0][0][2] - (-2*(0-(predicted_value[0][0]))*(W[2][2][0])*hidden[0,2]*(1-hidden[0,2])*x[0][0])
print(tmp, updated_weights[0][0][0])

-0.304951263947996 -0.22093554


In [14]:
tmp = W[0][1][0] - (-2*(0-(predicted_value[0][0]))*(W[2][0][0])*hidden[0,0]*(1-hidden[0,0])*x[0][1])
print(tmp, updated_weights[0][1][0])

0.42208860145914084 0.16369256


In [15]:
tmp = W[0][1][1] - (-2*(0-(predicted_value[0][0]))*(W[2][1][0])*hidden[0,1]*(1-hidden[0,1])*x[0][1])
print(tmp, updated_weights[0][1][1])

-0.5673373173880019 0.08641996


In [16]:
tmp = W[0][1][2] - (-2*(0-(predicted_value[0][0]))*(W[2][2][0])*hidden[0,2]*(1-hidden[0,2])*x[0][1])
print(tmp, updated_weights[0][1][2])

0.15694874675699821 0.45514107


In [17]:
tmp = W[1][0] - (-2*(0-(predicted_value[0][0]))*hidden[0,0]*(1-hidden[0,0])*W[2][0][0])
print(tmp, updated_weights[1][0])

0.028788602743620932 -0.22963554


In [18]:
tmp = W[1][1] - (-2*(0-(predicted_value[0][0]))*hidden[0,1]*(1-hidden[0,1])*W[2][1][0])
print(tmp, updated_weights[1][1])

0.5137626696420274 1.1675199


In [19]:
tmp = W[1][2] - (-2*(0-(predicted_value[0][0]))*hidden[0,2]*(1-hidden[0,2])*W[2][2][0])
print(tmp, updated_weights[1][2])

-0.0954512566166247 0.20274107


In [20]:
tmp = W[2][0][0]-(-2*(0-(predicted_value[0][0]))*hidden[0][0])
print(tmp, updated_weights[2][0][0])

0.8342055621416937 3.5867105


In [21]:
tmp = W[2][1][0]-(-2*(0-(predicted_value[0][0]))*hidden[0][1])
print(tmp, updated_weights[2][1][0])

0.2509642654210383 5.324828


In [22]:
tmp = W[2][2][0]-(-2*(0-(predicted_value[0][0]))*hidden[0][2])
print(tmp, updated_weights[2][2][0])

0.422898309408289 4.248618


In [23]:
tmp = W[3][0]-(-2*(0-(predicted_value[0][0])))
print(tmp, updated_weights[3][0])

0.6052061234525776 1.4714833
