In [911]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

In [912]:
x,y = load_diabetes(return_X_y = True)

In [913]:
print(x.shape)
print(y.shape)

(442, 10)
(442,)


In [914]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state = 0)

In [915]:
lr = LinearRegression()
lr.fit(x_train,y_train)

In [916]:
y_pred = lr.predict(x_test)

In [917]:
r2_score(y_test,y_pred)

0.3322332173106184

In [918]:
print(lr.coef_)
print(lr.intercept_)

[ -35.55025079 -243.16508959  562.76234744  305.46348218 -662.70290089
  324.20738537   24.74879489  170.3249615   731.63743545   43.0309307 ]
152.5380470138517


In [919]:
class MBGDRegression :
    def __init__(self,batch_size,learning_rate = 0.01,epochs = 100):
        self.lr = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        self.coef_ = None
        self.intercept_ = None
            
    def fit(self,x_train,y_train):
        import random
        self.intercept_ = 0
        self.coef_ = np.ones(x_train.shape[1])
        
        for i in range(self.epochs):
            
            for j in range( x_train.shape[0] // self.batch_size ):
                
                idx = random.sample(range(x_train.shape[0]),self.batch_size)
                              
                y_hat = np.dot(x_train[idx],self.coef_) + self.intercept_
                
                intercept_der = -2 * np.mean(y_train[idx] - y_hat)
                self.intercept_ = self.intercept_ - ( self.lr * intercept_der )
                
                coef_der = (-2) * np.dot((y_train[idx] - y_hat) , x_train[idx]) 
                self.coef_ = self.coef_ - (self.lr * coef_der)
            
    def predict(self,x_test):
        y_pred = x_test.dot(self.coef_) + self.intercept_
        return y_pred

In [920]:
import time
start = time.time()
gd = MBGDRegression(35,0.01,45)
gd.fit(x_train,y_train)
end = time.time()
print(end - start)

0.03139662742614746


In [921]:
y_pred = gd.predict(x_test)

In [922]:
r2_score(y_test,y_pred)

0.35846253640496406

In [923]:
print(gd.coef_)
print(gd.intercept_)

[  30.39853784  -66.36742429  350.86853956  211.51501968   10.31419257
  -33.32701569 -188.79727334  156.87753146  295.32173891  131.04815176]
150.2042091326635


### By using Sklearn SGDRegressor

In [924]:
import random
from sklearn.linear_model import SGDRegressor
reg = SGDRegressor(learning_rate = "constant" , eta0 = 0.2)
batch_size = 35
for i in range(100):
   
   idx = random.sample(range(x_train.shape[0]),batch_size) 
   reg.partial_fit(x_train[idx],y_train[idx])

In [925]:
y_pred = reg.predict(x_test)

In [926]:
print(r2_score(y_test,y_pred))

0.34543928228527665


In [927]:
print(reg.coef_)
print(reg.intercept_)

[ 2.82764737e-01 -1.92434667e+02  4.80947804e+02  2.47706180e+02
 -1.83752319e+01 -1.12542018e+02 -2.13522010e+02  1.06868313e+02
  4.23005986e+02  1.09486737e+02]
[158.90132547]
