In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [3]:
df=pd.read_csv('FuelConsumption.csv')
df.head()

Unnamed: 0,MODELYEAR,MAKE,MODEL,VEHICLECLASS,ENGINESIZE,CYLINDERS,TRANSMISSION,FUELTYPE,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,2014,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,2014,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,2014,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,2014,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,2014,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244


In [4]:
df.shape

(1067, 13)

In [5]:
x=df['CYLINDERS'].values.reshape(-1,1)
y=df['CO2EMISSIONS'].values
x=(x-x.mean())/x.std()
y=(y-y.mean())/y.std()


In [6]:
ones=np.ones((x.shape[0],1))
x_aug=np.concatenate((ones,x),axis=1)
x_aug


array([[ 1.        , -0.99896853],
       [ 1.        , -0.99896853],
       [ 1.        , -0.99896853],
       ...,
       [ 1.        ,  0.11424235],
       [ 1.        ,  0.11424235],
       [ 1.        ,  0.11424235]])

In [7]:
def lr_predict(X,W):
    return np.dot(X,W)

def mse_loss(y_hat,y):
    error = y_hat-y
    return np.sum(error*error) / error.shape[0]

def calc_gradient(X,error):
    gradient=np.dot(X.T,error)
    return gradient 

def update_weights(W,lr,gradient):
    new_weights=W-lr*gradient
    return new_weights

Batch Gradient Descent

In [8]:
def train_lr_bgd(X,Y,W,n_epochs,lr):
    losses=[]

    for i in range(n_epochs):
        preds=lr_predict(X,W)
        error=preds-Y
        loss=mse_loss(preds,Y)
        losses.append(loss)

        gradient=calc_gradient(X,error)
        W=update_weights(W,lr,gradient)

    return W,losses

In [9]:
lr=0.0004
n_epochs=1000

W0 = np.random.rand(2)


W,losses=train_lr_bgd(x_aug,y,W0,n_epochs,lr)

losses[-1]

np.float64(0.27803609409380803)

------------------------------------------------------------------------------------

Stochastic Gradient Descent - SGD

In [10]:
def learning_schedule(t, a=5, b=1000):
    return a/(t+b)
def train_Sgd(X,Y,W,n_epochs):
    losses=[]
    m=x.shape[0]
    for epoch in range(n_epochs):
        for iteration in range (m):
            Random_index=np.random.randint(m)
            xi=X[Random_index:Random_index+1]
            yi=Y[Random_index:Random_index+1]
            preds=lr_predict(xi,W)
            error=preds-yi
            loss=mse_loss(preds,yi)
            losses.append(loss)
            lr=learning_schedule(m*epoch+iteration)
            gradient=calc_gradient(xi,error)
            W=update_weights(W,lr,gradient)

    return W,losses

In [11]:
n_epochs=1000
W0=np.random.rand(2)
W,losses=train_Sgd(x_aug,y,W0,n_epochs)

losses[-1]

np.float64(0.04628166350233239)

------------------------------------------------------------------------------------

Mini-Batch Gradient Descent

In [12]:
def train_mgd(X,Y,W,n_epochs,batch_size):
    losses=[]
    m=X.shape[0]
    for epoch in range(n_epochs):
        shuffled_indices=np.random.permutation(m)
        x_shuffled=X[shuffled_indices]
        y_shuffled=Y[shuffled_indices]
        for Idx in range(0,m,batch_size):
            xi=x_shuffled[Idx:Idx+batch_size]
            yi=y_shuffled[Idx:Idx+batch_size]

            predictions=lr_predict(xi,W)
            error=predictions-yi
            loss=mse_loss(predictions,yi)
            losses.append(loss)

            lr=learning_schedule(epoch+Idx)
            gradient=calc_gradient(xi,error)
            W=update_weights(W,lr,gradient)
    return W,losses

In [13]:
batch_size=25
m=101
n_epochs=10000
W0=np.random.rand(2)
W,losses=train_mgd(x_aug,y,W0,n_epochs,batch_size)

losses[-1]

np.float64(0.3368600332830434)

------------------------------------------------------------------------------------------

Normal Equation

In [14]:
from numpy import linalg
m=linalg.inv(x_aug.T@x_aug)
W=m@x_aug.T@y

preds=lr_predict(x_aug,W)
mse_loss(preds,y)

np.float64(0.27803609409380803)

------------------------------------------------------------------------------------------

Linear Regression with scikit-learn

In [15]:
from sklearn.linear_model import LinearRegression
lreg=LinearRegression()
lreg.fit(x,y)

In [16]:
print('W0:', lreg.intercept_)
print('W1:',lreg.coef_)

W0: -2.40428587749616e-16
W1: [0.84968459]


In [17]:
from sklearn.metrics import mean_squared_error
pred=lreg.predict(x)
loss=mean_squared_error(pred,y)
loss

0.27803609409380803