# **Backpropogation**

* Was published in **"Learning representations by back-propating errors"** by **D. Rumelhart, Geoffrey E. Hinton, Ronald J. Williams**
    * **Reference: https://www.iro.umontreal.ca/~vincentp/ift3395/lectures/backprop_old.pdf**

* The paper enabled the world to come out of AI winter!!! 

* Understanding it mathematically with expressions is available and taught by most of the courses. What they miss out on is the implementation of this algorithm in code.

* The notebook will implement back propogation for two simple datasets to solve regression and classification problems

* **Reference: https://www.youtube.com/watch?v=ma6hWrU-LaI**


### **Solving Regression using Backpropagation**

In [1]:
import numpy as np
import pandas as pd

In [2]:
df=pd.DataFrame([[8,8,4],[7,9,5],[6,10,6],[5,12,7]],columns=['cgpa','resume_score','lpa'])
df

Unnamed: 0,cgpa,resume_score,lpa
0,8,8,4
1,7,9,5
2,6,10,6
3,5,12,7


In [3]:
def initialize_parameters(layer_dims):
    '''
        Layer_dims: neural network architecture, eg: [2,2,1]
        It will create weights and biases (trainable params) and will provide random value
        {temporarily w=0.1,b=0}
    '''
    np.random.seed(3)
    parameters={}
    L=len(layer_dims)
    for l in range(1,L):
        parameters[f'W{l}']=np.ones((layer_dims[l-1],layer_dims[l]))*0.1
        parameters[f'b{l}']=np.zeros((layer_dims[l],1))
    return parameters

In [4]:
initialize_parameters([2,2,1])

{'W1': array([[0.1, 0.1],
        [0.1, 0.1]]),
 'b1': array([[0.],
        [0.]]),
 'W2': array([[0.1],
        [0.1]]),
 'b2': array([[0.]])}

In [5]:
def linear_forward(A_prev,W,b):
    '''
        Calculates output of any given neuron
    '''
    return W.T @ A_prev +b

In [6]:
# Forward propogation

def L_Layer_forward(X,parameters):
    A=X
    L=len(parameters)//2    # no of layers of a Neural Network

    for l in range(1,L+1):
        A_prev=A
        Wl=parameters[f"W{l}"]
        bl=parameters[f"b{l}"]

        # print(f"A{l-1}: {A_prev}")
        # print(f"W{l}: {Wl}")
        # print(f"b{l}: {bl}")
        # print("--"*20)

        A=linear_forward(A_prev,Wl,bl)
        # print(f"A{l}: {A}")
        # print("**"*20)
    return A,A_prev

In [7]:
X=df[['cgpa','resume_score']].values[0].reshape(2,1) # shape(no of features, no of training example)
y=df[['lpa']].values[0][0]

# parameter initialization
parameters=initialize_parameters([2,2,1])
print(X)
print(y)
print(parameters)

[[8]
 [8]]
4
{'W1': array([[0.1, 0.1],
       [0.1, 0.1]]), 'b1': array([[0.],
       [0.]]), 'W2': array([[0.1],
       [0.1]]), 'b2': array([[0.]])}


In [8]:
L_Layer_forward(X,parameters)

(array([[0.32]]),
 array([[1.6],
        [1.6]]))

In [9]:
def update_parameters(parameters,y,y_hat,A1,X):
    eta=0.001
    parameters['W2'][0][0]=parameters['W2'][0][0]+(eta*2*(y-y_hat)*A1[0][0])
    parameters['W2'][1][0]=parameters['W2'][1][0]+(eta*2*(y-y_hat)*A1[1][0])
    parameters['b2'][0][0]=parameters['W2'][1][0]+(eta*2*(y-y_hat))

    parameters['W1'][0][0]=parameters['W1'][0][0]+(eta*2*(y-y_hat)*parameters['W2'][0][0]*X[0][0])
    parameters['W1'][0][1]=parameters['W1'][0][1]+(eta*2*(y-y_hat)*parameters['W2'][0][0]*X[1][0])
    parameters['b1'][0][0]=parameters['b1'][0][0]+(eta*2*(y-y_hat)*parameters['W2'][0][0])

    parameters['W1'][1][0]=parameters['W1'][1][0]+(eta*2*(y-y_hat)*parameters['W2'][1][0]*X[0][0])
    parameters['W1'][1][1]=parameters['W1'][1][1]+(eta*2*(y-y_hat)*parameters['W2'][1][0]*X[1][0])
    parameters['b1'][1][0]=parameters['b1'][1][0]+(eta*2*(y-y_hat)*parameters['W2'][1][0])

In [10]:
X=df[['cgpa','resume_score']].values[0].reshape(2,1) # shape(no of features, no of training example)
y=df[['lpa']].values[0][0]

# parameter initialization
parameters=initialize_parameters([2,2,1])
y_hat,A1=L_Layer_forward(X,parameters)

In [11]:
y_hat=y_hat[0][0]
y_hat

0.32000000000000006

In [12]:
A1

array([[1.6],
       [1.6]])

In [13]:
update_parameters(parameters,y,y_hat,A1,X)

In [14]:
parameters

{'W1': array([[0.10658137, 0.10658137],
        [0.10658137, 0.10658137]]),
 'b1': array([[0.00082267],
        [0.00082267]]),
 'W2': array([[0.111776],
        [0.111776]]),
 'b2': array([[0.119136]])}

In [15]:
X=df[['cgpa','resume_score']].values[1].reshape(2,1) # shape(no of features, no of training example)
y=df[['lpa']].values[0][0]

y_hat,A1=L_Layer_forward(X,parameters)
y_hat=y_hat[0][0]

In [16]:
update_parameters(parameters,y,y_hat,A1,X)

In [17]:
parameters

{'W1': array([[0.11264256, 0.11437433],
        [0.11264256, 0.11437433]]),
 'b1': array([[0.00168856],
        [0.00168856]]),
 'W2': array([[0.12371702],
        [0.12371702]]),
 'b2': array([[0.13071593]])}

In [18]:
# epoch implementation
parameters=initialize_parameters([2,2,1])
epochs=5

for i in range(epochs):
    Loss=[]

    for j in range(df.shape[0]):
        X=df[['cgpa','resume_score']].values[j].reshape(2,1) 
        y=df[['lpa']].values[j][0]

        # forward propagation
        y_hat,A1=L_Layer_forward(X,parameters)
        y_hat=y_hat[0][0]

        # backward propogation
        update_parameters(parameters,y,y_hat,A1,X)

        Loss.append((y-y_hat)**2)
    print(f"Epoch: {i+1}. Loss: {np.array(Loss).mean()}")

parameters



Epoch: 1. Loss: 25.321744156025517
Epoch: 2. Loss: 18.320004165722047
Epoch: 3. Loss: 9.473661050729628
Epoch: 4. Loss: 3.2520938634031613
Epoch: 5. Loss: 1.3407132589299962


{'W1': array([[0.26507636, 0.38558861],
        [0.27800387, 0.40980287]]),
 'b1': array([[0.02749056],
        [0.02974394]]),
 'W2': array([[0.41165744],
        [0.48302736]]),
 'b2': array([[0.48646246]])}

**Implementation in Keras**

In [19]:
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense

model=Sequential()
model.add(Dense(2,activation='linear',input_dim=2))
model.add(Dense(1,activation='linear'))
model.summary()

2024-03-03 23:10:43.554870: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-03 23:10:43.554968: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-03 23:10:43.558036: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-03 23:10:43.569616: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 2)                 6         
                                                                 
 dense_1 (Dense)             (None, 1)                 3         
                                                                 
Total params: 9 (36.00 Byte)
Trainable params: 9 (36.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [20]:
model.get_weights()

[array([[ 0.83731353, -0.85734546],
        [-1.014962  ,  0.06906283]], dtype=float32),
 array([0., 0.], dtype=float32),
 array([[-0.5341526],
        [-1.3330967]], dtype=float32),
 array([0.], dtype=float32)]

In [21]:
new_weights=[np.array([[0.1,  0.1],
        [0.1,  0.1]], dtype=np.float32),
 np.array([0., 0.], dtype=np.float32),
 np.array([[0.1],
        [0.1]], dtype=np.float32),
 np.array([0.], dtype=np.float32)]

In [22]:
model.set_weights(new_weights)
model.get_weights()

[array([[0.1, 0.1],
        [0.1, 0.1]], dtype=float32),
 array([0., 0.], dtype=float32),
 array([[0.1],
        [0.1]], dtype=float32),
 array([0.], dtype=float32)]

In [23]:
optimizer=keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='mean_squared_error',optimizer=optimizer)

In [24]:
model.fit(df.iloc[:,0:-1].values,df['lpa'].values,epochs=75,verbose=1,batch_size=1)

Epoch 1/75


Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


<keras.src.callbacks.History at 0x7f7dc04f79a0>

In [25]:
model.get_weights()

[array([[0.03950404, 0.03950404],
        [0.5673128 , 0.5673128 ]], dtype=float32),
 array([0.23207285, 0.23207285], dtype=float32),
 array([[0.43774492],
        [0.43774492]], dtype=float32),
 array([0.24062337], dtype=float32)]

### **Solving Classification problem using Backpropogation**

In [26]:
import numpy as np
import pandas as pd

In [28]:
df=pd.DataFrame([[8,8,1],[7,9,1],[6,10,0],[5,5,0]],columns=['cgpa','resume_score','placed'])
df.head()

Unnamed: 0,cgpa,resume_score,placed
0,8,8,1
1,7,9,1
2,6,10,0
3,5,5,0


In [29]:
def initialize_parameters(layer_dims):
    '''
        Layer_dims: neural network architecture, eg: [2,2,1]
        It will create weights and biases (trainable params) and will provide random value
        {temporarily w=0.1,b=0}
    '''
    np.random.seed(3)
    parameters={}
    L=len(layer_dims)
    for l in range(1,L):
        parameters[f'W{l}']=np.ones((layer_dims[l-1],layer_dims[l]))*0.1
        parameters[f'b{l}']=np.zeros((layer_dims[l],1))
    return parameters

In [30]:
initialize_parameters([2,2,1])

{'W1': array([[0.1, 0.1],
        [0.1, 0.1]]),
 'b1': array([[0.],
        [0.]]),
 'W2': array([[0.1],
        [0.1]]),
 'b2': array([[0.]])}

In [32]:
# Utility functions
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [34]:
def linear_forward(A_prev,W,b):
    '''
        Calculates output of any given neuron
    '''
    return sigmoid(W.T @ A_prev +b)

In [None]:
# Forward propogation

def L_Layer_forward(X,parameters):
    A=X
    L=len(parameters)//2    # no of layers of a Neural Network

    for l in range(1,L+1):
        A_prev=A
        Wl=parameters[f"W{l}"]
        bl=parameters[f"b{l}"]

        # print(f"A{l-1}: {A_prev}")
        # print(f"W{l}: {Wl}")
        # print(f"b{l}: {bl}")
        # print("--"*20)

        A=linear_forward(A_prev,Wl,bl)
        # print(f"A{l}: {A}")
        # print("**"*20)
    return A,A_prev

In [35]:
def update_parameters(parameters,y,y_hat,A1,X):
    eta=0.001
    parameters['W2'][0][0]=parameters['W2'][0][0]+(eta*(y-y_hat)*A1[0][0])
    parameters['W2'][1][0]=parameters['W2'][1][0]+(eta*(y-y_hat)*A1[1][0])
    parameters['b2'][0][0]=parameters['W2'][1][0]+(eta*(y-y_hat))

    parameters['W1'][0][0]=parameters['W1'][0][0]+(eta*(y-y_hat)*parameters['W2'][0][0]*A1[0][0]*(1-A1[0][0])*X[0][0])
    parameters['W1'][0][1]=parameters['W1'][0][1]+(eta*(y-y_hat)*parameters['W2'][0][0]*A1[0][0]*(1-A1[0][0])*X[1][0])
    parameters['b1'][0][0]=parameters['b1'][0][0]+(eta*(y-y_hat)*parameters['W2'][0][0]*A1[0][0]*(1-A1[0][0]))

    parameters['W1'][1][0]=parameters['W1'][1][0]+(eta*(y-y_hat)*parameters['W2'][1][0]*A1[1][0]*(1-A1[1][0])*X[0][0])
    parameters['W1'][1][1]=parameters['W1'][1][1]+(eta*(y-y_hat)*parameters['W2'][1][0]*A1[1][0]*(1-A1[1][0])*X[1][0])
    parameters['b1'][1][0]=parameters['b1'][1][0]+(eta*(y-y_hat)*parameters['W2'][1][0]*A1[1][0]*(1-A1[1][0]))

In [38]:
# epoch implementation
parameters=initialize_parameters([2,2,1])
epochs=50

for i in range(epochs):
    Loss=[]

    for j in range(df.shape[0]):
        X=df[['cgpa','resume_score']].values[j].reshape(2,1) 
        y=df[['placed']].values[j][0]

        # forward propagation
        y_hat,A1=L_Layer_forward(X,parameters)
        y_hat=y_hat[0][0]

        # backward propogation
        update_parameters(parameters,y,y_hat,A1,X)

        Loss.append(-y*np.log(y_hat)-(1-y)*np.log(1-y_hat))
    print(f"Epoch: {i+1}. Loss: {np.array(Loss).mean()}")

parameters

Epoch: 1. Loss: 0.71050114771481
Epoch: 2. Loss: 0.6993819908638181
Epoch: 3. Loss: 0.6993584295593506
Epoch: 4. Loss: 0.6993349670157292
Epoch: 5. Loss: 0.6993116027845855
Epoch: 6. Loss: 0.6992883364197799
Epoch: 7. Loss: 0.6992651674773883
Epoch: 8. Loss: 0.6992420955156897
Epoch: 9. Loss: 0.6992191200951554
Epoch: 10. Loss: 0.6991962407784353
Epoch: 11. Loss: 0.699173457130348
Epoch: 12. Loss: 0.6991507687178667
Epoch: 13. Loss: 0.6991281751101095
Epoch: 14. Loss: 0.6991056758783265
Epoch: 15. Loss: 0.6990832705958882
Epoch: 16. Loss: 0.699060958838274
Epoch: 17. Loss: 0.6990387401830616
Epoch: 18. Loss: 0.699016614209914
Epoch: 19. Loss: 0.6989945805005696
Epoch: 20. Loss: 0.6989726386388295
Epoch: 21. Loss: 0.698950788210548
Epoch: 22. Loss: 0.6989290288036194
Epoch: 23. Loss: 0.6989073600079687
Epoch: 24. Loss: 0.6988857814155395
Epoch: 25. Loss: 0.6988642926202835
Epoch: 26. Loss: 0.6988428932181494
Epoch: 27. Loss: 0.6988215828070719
Epoch: 28. Loss: 0.6988003609869615
Epoch: 

{'W1': array([[0.09950872, 0.09857616],
        [0.09951369, 0.09857676]]),
 'b1': array([[-0.00031786],
        [-0.00031795]]),
 'W2': array([[0.09240871],
        [0.0924183 ]]),
 'b2': array([[0.09186164]])}

**Implementation in Keras**

In [39]:
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense

model=Sequential()
model.add(Dense(2,activation='sigmoid',input_dim=2))
model.add(Dense(1,activation='sigmoid'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 2)                 6         
                                                                 
 dense_3 (Dense)             (None, 1)                 3         
                                                                 
Total params: 9 (36.00 Byte)
Trainable params: 9 (36.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [40]:
model.get_weights()

[array([[-1.0494909 , -0.39669693],
        [ 0.16728997,  0.66143227]], dtype=float32),
 array([0., 0.], dtype=float32),
 array([[0.4054867],
        [0.3735199]], dtype=float32),
 array([0.], dtype=float32)]

In [41]:
new_weights=[np.array([[0.1,  0.1],
        [0.1,  0.1]], dtype=np.float32),
 np.array([0., 0.], dtype=np.float32),
 np.array([[0.1],
        [0.1]], dtype=np.float32),
 np.array([0.], dtype=np.float32)]

In [42]:
model.set_weights(new_weights)
model.get_weights()

[array([[0.1, 0.1],
        [0.1, 0.1]], dtype=float32),
 array([0., 0.], dtype=float32),
 array([[0.1],
        [0.1]], dtype=float32),
 array([0.], dtype=float32)]

In [45]:
optimizer=keras.optimizers.Adam(learning_rate=0.01) 
model.compile(loss='binary_crossentropy',optimizer=optimizer)

In [46]:
model.fit(df.iloc[:,0:-1].values,df['placed'].values,epochs=75,verbose=1,batch_size=1)

Epoch 1/75


Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75
Epoch 74/75
Epoch 75/75


<keras.src.callbacks.History at 0x7f7dc01c8040>

In [47]:
model.get_weights()

[array([[ 0.2344019 ,  0.2344019 ],
        [-0.03048659, -0.03048659]], dtype=float32),
 array([-0.36952424, -0.36952424], dtype=float32),
 array([[0.11297886],
        [0.11297886]], dtype=float32),
 array([-0.10295635], dtype=float32)]