# **Mini-Batch Gradient Descent**

Learnt it using:<br>
**Reference: https://www.youtube.com/watch?v=_scscQ4HVTY**<br><br>
**Note:** *Scroll down for the code part, previous codes are added for comparison*

In [1]:
from sklearn.datasets import load_diabetes
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import time

In [2]:
X,y=load_diabetes(return_X_y=True)
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [3]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=2)

In [4]:
reg=LinearRegression()
reg.fit(X_train,y_train)
print(reg.coef_)
print(reg.intercept_)

[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
151.88331005254167


In [5]:
y_pred=reg.predict(X_test)
r2_score(y_test,y_pred)

0.4399338661568968

### **Creating own class for multiple variable linear regression using gradient descent**

In [6]:
class GDRegressor:

    def __init__(self,learning_rate=0.01, epochs=100):
        self.coef_=None
        self.intercept_=None
        self.lr=learning_rate
        self.epochs=epochs

    def fit(self, X_train,y_train):
        # initialize your coefs
        self.intercept_=0
        self.coef_=np.ones(shape=X_train.shape[1])

        for _ in range(self.epochs):
            # update all the coefs and intercept
            y_hat=self.intercept_+(X_train) @ (self.coef_)
            intercept_der=-2*np.mean(y_train-y_hat)
            # vectorization (we don't need to use a loop)
            coef_der=(-2/X_train.shape[0])*((y_train-y_hat).T@(X_train))
            # updating intercept
            self.intercept_=self.intercept_-self.lr*intercept_der
            # updating coefficients
            self.coef_=self.coef_-self.lr*coef_der

        print(self.intercept_,self.coef_)

    def predict(self,X_test):
        return self.intercept_+(X_test) @ (self.coef_)

In [7]:
gdr=GDRegressor(epochs=1000,learning_rate=0.5)
start=time.time()
gdr.fit(X_train,y_train)
print(f"Time taken is {time.time()-start}")
y_pred=gdr.predict(X_test)
r2_score(y_test,y_pred)

152.01351687661833 [  14.38990585 -173.7235727   491.54898524  323.91524824  -39.32648042
 -116.01061213 -194.04077415  103.38135565  451.63448787   97.57218278]
Time taken is 0.01743149757385254


0.4534503034722803

### **Creating own class for multiple variable linear regression using stochastic gradient descent**

In [8]:
# Learning Schedular
t0,t1=5,50
def learning_rate(t):
    return t0/(t+t1)


class SGDRegressor:

    def __init__(self,learning_rate=0.01, epochs=100):
        self.coef_=None
        self.intercept_=None
        self.lr=learning_rate
        self.epochs=epochs

    def fit(self, X_train,y_train):
        # initialize your coefs
        self.intercept_=0
        self.coef_=np.ones(shape=X_train.shape[1])

        for i in range(self.epochs):
            for j in range(X_train.shape[0]):
                # making learning rate as a function of epochs
                # self.lr=learning_rate(i*X.shape[0]+j) 

                idx=np.random.randint(0,X_train.shape[0]) # high not included
                y_hat=(X_train[idx]@self.coef_)+self.intercept_ # a scalar
                intercept_der=-2*(y_train[idx]-y_hat)
                coef_der=-2*(y_train[idx]-y_hat)*X_train[idx]
                self.intercept_=self.intercept_-(self.lr*intercept_der)
                self.coef_=self.coef_-(self.lr*coef_der)

        print(self.intercept_,self.coef_)

    def predict(self,X_test):
        return self.intercept_+(X_test) @ (self.coef_)

In [9]:
sgd=SGDRegressor(epochs=40,learning_rate=0.1)
start=time.time()
sgd.fit(X_train,y_train)
print(f"Time taken is {time.time()-start}")
y_pred=sgd.predict(X_test)
r2_score(y_test,y_pred)

155.8458989041669 [   3.83888249 -199.3026714   531.96715424  304.32184666  -78.49962735
 -109.7301724  -205.33664563   83.2952533   518.55627547   49.37643967]
Time taken is 0.10808205604553223


0.44999022702474756

In [10]:
from sklearn.linear_model import SGDRegressor
reg=SGDRegressor(max_iter=100,learning_rate='constant',eta0=0.01)
start=time.time()
reg.fit(X_train,y_train)
print(f"Time taken is {time.time()-start}")
y_pred=reg.predict(X_test)
r2_score(y_test,y_pred)


Time taken is 0.002301454544067383


0.43035854274369656

### **Creating own class for multiple variable linear regression using Mini Batch gradient descent**

In [13]:
import random
class MBGDRegressor:

    def __init__(self,batch_size,learning_rate=0.01, epochs=100):
        self.coef_=None
        self.intercept_=None
        self.lr=learning_rate
        self.epochs=epochs
        self.batch_size=batch_size

    def fit(self, X_train,y_train):
        # initialize your coefs
        self.intercept_=0
        self.coef_=np.ones(shape=X_train.shape[1])

        for i in range(self.epochs):
            for j in range(int(X_train.shape[0]/self.batch_size)):
                idx=random.sample(range(X_train.shape[0]),self.batch_size)

                # update all the coefs and intercept
                y_hat=self.intercept_+(X_train[idx]) @ (self.coef_)
                intercept_der=-2*np.mean(y_train[idx]-y_hat)
                
                # vectorization (we don't need to use a loop)
                coef_der=(-2/X_train.shape[0])*((y_train[idx]-y_hat).T@(X_train[idx]))
                # updating intercept
                self.intercept_=self.intercept_-self.lr*intercept_der
                # updating coefficients
                self.coef_=self.coef_-self.lr*coef_der

        print(self.intercept_,self.coef_)

    def predict(self,X_test):
        return self.intercept_+(X_test) @ (self.coef_)

In [22]:
mbgd=MBGDRegressor(batch_size=int(X_train.shape[0]/10),epochs=10000,learning_rate=0.4)
start=time.time()
mbgd.fit(X_train,y_train)
print(f"Time taken is {time.time()-start}")
y_pred=mbgd.predict(X_test)
r2_score(y_test,y_pred)

151.2583828237332 [  -9.0406545  -196.53780954  532.00185153  337.67970223 -149.53959471
  -44.56445989 -163.01405556   57.4291265   574.54702299   50.77920183]
Time taken is 4.348752975463867


0.44268825143876134

**implementating minibatch gradient descent using scikit-learn**

In [23]:
from sklearn.linear_model import SGDRegressor
sgd=SGDRegressor(learning_rate='constant',eta0=0.2)
batch_size=35
epochs=100
start=time.time()
for i in range(epochs):
    idx=random.sample(range(X_train.shape[0]),batch_size)
    sgd.partial_fit(X_train[idx],y_train[idx])
print(f"Time taken is {time.time()-start}")
y_pred=sgd.predict(X_test)
r2_score(y_test,y_pred)


Time taken is 0.025076866149902344


0.45315605473942033