<a href="https://colab.research.google.com/github/tunde99/TUTORIALS/blob/main/multivariable_linear_regression_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reference: click [here](https://www.youtube.com/watch?v=F2h1Ex7evVA&list=PL3Fc8qNch2_TA33HJRrKbtBrjMN-u5n7_&index=5)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#1: Initialise parameters w, b
def initialise_parameters(len_w):
    w = np.random.randn(1,len_w)
    b = 0
    return w,b

In [None]:
#2: compute forward propagation 
def forward_prop(X,w,b):  # w --> (1,n)   X --> (n,m)     z --> (1,m)
    z = np.dot(w, X) + b
    return z

In [None]:
#3: copmute the cost
def cost_function(z, y):
    m = y.shape[1]
    J = (1/2*m) * np.sum(np.pow((z-y), 2))
    return J

In [None]:
#4: compute back propagation
def back_prop(X,y,z,b):
    m = y.shape[1]
    dz = 1/m * (z-y)
    dw = np.dot(dz,X.T)
    db = np.sum(dz)
    return dw,db

In [None]:
#5: copmute gradient descent update rule
def gradient_descent_update(w,b,dw,db,learning_rate):
    w = w - learning_rate * dw
    b = b - learning_rate * db
    return w,b

In [None]:
# create model
def linear_regression_model(X_train, y_train, X_val, y_val, epochs):
    len_w = X_train.shape[0]
    w,b = initialise_parameters(len_w) #1
    
    train_costs = []
    m_train = y_train.shape[1]
    m_val = y_val.shape[1]
    for i in range(1,epochs):
        z_train = forward_prop(X_train,w,b) #2
        train_cost = cost_function(z_train, y_train) #3
        dw,db = back_prop(X_train,y_train,z_train) #4
        w,b = gradient_descent_update(w,b,dw,db,learning_rate) #5

        # store training costs in a list
        if i%10==0:
            train_costs.append(train_cost)

        # compute MAE
        train_MAE = 1/m_train * np.sum(abs(z_train - y_train))

        # compute validation cost and MAE
        z_val = forward_prop(X_val,w,b)
        val_cost = cost_function(z_val,y_val)
        val_MAE = 1/m_val * np.abs(z_val - y_val)

        # print train_cost, val_cost, train_MAE, val_MAE
        print('Epoch ' + str(i) + '/ ' + str(epochs) + ': ' )
        print('Training costs ' + str(train_cost) + '| ' + 'Validation costs ' +  str(val_cost))
        print('Training MAE ' + str(train_MAE) + '| ' + 'Validation MAE ' +  str(val_MAE))

        # visualisation
        plt.plot(train_costs)
        plt.xlabel = ('Iterations(per tens)')
        plt.ylabel = ('Training costs')
        plt.title('Learning rate')
        plt.show()




# Implementation

In [None]:
import sklearn
from sklearn.datasets import load_boston
import pandas as pd
np.random.seed(99)

In [None]:
boston = load_boston()


In [None]:
type(boston['data'])

numpy.ndarray

In [None]:
boston['data']

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
        4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
        9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
        4.0300e+00],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        5.6400e+00],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
        6.4800e+00],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        7.8800e+00]])

In [None]:
boston_pd = pd.DataFrame(boston['data'], boston['target'])
boston_pd.columns = boston.feature_names
print(boston_pd.head(5))

         CRIM    ZN  INDUS  CHAS    NOX  ...  RAD    TAX  PTRATIO       B  LSTAT
24.0  0.00632  18.0   2.31   0.0  0.538  ...  1.0  296.0     15.3  396.90   4.98
21.6  0.02731   0.0   7.07   0.0  0.469  ...  2.0  242.0     17.8  396.90   9.14
34.7  0.02729   0.0   7.07   0.0  0.469  ...  2.0  242.0     17.8  392.83   4.03
33.4  0.03237   0.0   2.18   0.0  0.458  ...  3.0  222.0     18.7  394.63   2.94
36.2  0.06905   0.0   2.18   0.0  0.458  ...  3.0  222.0     18.7  396.90   5.33

[5 rows x 13 columns]


In [None]:
print(type(boston['target']))
print(boston['target'].shape)


<class 'numpy.ndarray'>
(506,)


In [None]:
# split data
X = boston['data']
y = boston['target']
m = len(X)
np.random.seed(99)
index = np.random.permutation(m)

val_split = int(0.1*m)
test_split = int(0.1*m)
train_split = m - (val_split + test_split)

test_size = index[:test_split]
X_test = X[test_size]
y_test = y[test_size]

val_size = index[:test_split+val_split]
X_val = X[val_size]
y_val = y[val_size]

train_size = index[test_split+val_split:]
X_train = X[train_size]
y_train = y[train_size]

In [None]:
print(X_train.shape)
print(y_train.shape)
print()
#print(val_data)

(406, 13)
(406,)

