In [296]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import random

In [297]:
df = pd.read_csv("/content/drive/MyDrive/ML_Self/boston.csv")

print(df.isnull().sum())

y = df["MEDV"]
X = df.drop("MEDV", axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
MEDV       0
dtype: int64


In [298]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [299]:
lr = LinearRegression()
lr.fit(X_train, y_train)

print(lr.coef_)
print(lr.intercept_)

r2_score(y_test, lr.predict(X_test))

[-1.00213533  0.69626862  0.27806485  0.7187384  -2.0223194   3.14523956
 -0.17604788 -3.0819076   2.25140666 -1.76701378 -2.03775151  1.12956831
 -3.61165842]
22.796534653465343


0.668759493535632

In [300]:
sgd = SGDRegressor(max_iter=100, learning_rate="constant", eta0=0.01)
sgd.fit(X_train, y_train)

print(sgd.coef_)
print(sgd.intercept_)

r2_score(y_test, sgd.predict(X_test))

[-0.62807142  0.3581866   0.12845291  1.15577986 -2.13392725  3.77761504
 -0.03811376 -3.19276892  2.25584889 -1.88729306 -2.00788828  1.46500377
 -3.76463746]
[22.95124024]


0.6170517593777046

In [301]:
class BatchGD:

  def __init__(self, learning_rate, epocs):
    self.coef_ = None #  m
    self.intercept_ = 0 # c
    self.learning_rate = learning_rate
    self.epocs = epocs

  def fit(self, X_train, y_train):
    # Aim is to calculate intercept and coeffcient using GD (not OLS)

    # Initial Value
    self.intercept_ = 0
    self.coef_ = np.ones(X_train.shape[1]) # number of coeff = number of columns in dataset

    for i in range(self.epocs):
      y_pred = self.intercept_ + np.dot(X_train, self.coef_)
      slope_intercept = -2 * np.mean(y_train - y_pred)
      self.intercept_ = self.intercept_ - slope_intercept*self.learning_rate

      slope_coef = -2 * np.dot((y_train - y_pred), X_train) / X_train.shape[0]
      self.coef_ = self.coef_ - slope_coef*self.learning_rate

  def predict(self, X_test):
    return np.dot(X_test, self.coef_) + self.intercept_

In [302]:
bgd = BatchGD(learning_rate=0.01, epocs=100)
bgd.fit(X_train, y_train)

print(bgd.coef_)
print(bgd.intercept_)

r2_score(y_test, bgd.predict(X_test))

[-0.54157163  0.43711312 -0.08000775  1.0707563  -0.17381133  3.92234989
  0.25325907 -0.48348423  0.1275103  -0.49978617 -1.33928158  1.358985
 -2.57253743]
19.773268351783422


0.451263878722594

In [303]:
class StochasticGD:

  def __init__(self, learning_rate, epocs):
    self.coef_ = None #  m
    self.intercept_ = 0 # c
    self.learning_rate = learning_rate
    self.epocs = epocs

  def fit(self, X_train, y_train):
    # Aim is to calculate intercept and coeffcient using GD (not OLS)

    # Initial Value
    self.intercept_ = 0
    self.coef_ = np.ones(X_train.shape[1]) # number of coeff = number of columns in dataset

    for i in range(self.epocs):
      for j in range(X_train.shape[0]):
        idx = np.random.randint(0, X_train.shape[0])

        y_pred = self.intercept_ + np.dot(X_train[idx], self.coef_)

        slope_intercept = -2 * (y_train[idx] - y_pred)
        self.intercept_ = self.intercept_ - (slope_intercept*self.learning_rate)

        slope_coef = -2 * np.dot((y_train[idx] - y_pred), X_train[idx])
        self.coef_ = self.coef_ - slope_coef*self.learning_rate

  def predict(self, X_test):
    return np.dot(X_test, self.coef_) + self.intercept_

In [304]:
sgd = StochasticGD(epocs=100, learning_rate=0.01)

y_train_ser = y_train.to_numpy()
y_test_ser = y_test.to_numpy()

sgd.fit(X_train, y_train_ser)

print(sgd.coef_)
print(sgd.intercept_)

r2_score(y_test_ser, sgd.predict(X_test))

[-1.48040994  1.13365869  0.39208832  0.19096866 -1.48983927  3.66662807
 -0.31978507 -2.71608968  1.6348558  -1.26876493 -2.42544053  0.78852362
 -2.8877695 ]
22.43251118463466


0.6615402733477844

In [305]:
class MiniBatchGD:

  def __init__(self, batch_size, learning_rate, epocs):
    self.coef_ = None #  m
    self.intercept_ = 0 # c
    self.learning_rate = learning_rate
    self.epocs = epocs
    self.batch_size = batch_size

  def fit(self, X_train, y_train):
    # Aim is to calculate intercept and coeffcient using GD (not OLS)

    # Initial Value
    self.intercept_ = 0
    self.coef_ = np.ones(X_train.shape[1]) # number of coeff = number of columns in dataset

    for i in range(self.epocs):
      for j in range(int(X_train.shape[0] / self.batch_size)):
        idx = random.sample(range(X_train.shape[0]), self.batch_size)

        y_pred = self.intercept_ + np.dot(X_train[idx], self.coef_) # fancy indexing

        slope_intercept = -2 * np.mean(y_train[idx] - y_pred)
        self.intercept_ = self.intercept_ - (slope_intercept*self.learning_rate)

        slope_coef = -2 * np.dot((y_train[idx] - y_pred), X_train[idx])
        self.coef_ = self.coef_ - slope_coef*self.learning_rate

  def predict(self, X_test):
    return np.dot(X_test, self.coef_) + self.intercept_

In [308]:
mgd = MiniBatchGD(batch_size=int(X_train.shape[0]/10), epocs=100, learning_rate=0.001)

mgd.fit(X_train, y_train_ser)

print(mgd.coef_)
print(mgd.intercept_)

r2_score(y_test, mgd.predict(X_test))

[-1.22938608  0.94212033  0.24790636  0.13801834 -2.16918031  3.01822658
 -0.54349044 -2.6603499   1.79733642 -1.78116791 -2.05690575  1.23158638
 -3.21853438]
19.72151317863097


0.5049923975941264