In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# generate and plot a toy dataset
m = 20 # number of points
x = -1 + 2*np.random.rand(m) # m random points over the interval [-1,1]
y = 2*x+1+0.25*np.random.randn(m) # y = 2x+1 + random noise
plt.figure(figsize=(10,5)) 
plt.plot(x,y,'o', label='data')
plt.xlabel('x',fontsize=20)
plt.ylabel('y',fontsize=20)
plt.legend(fontsize=15, loc='upper left')

In [None]:
# matrix X
X = np.ones((m,2)) 
X[:,1] = x

In [None]:
# learning rate
lr = 0.5 # learning rate

In [None]:
# Initialization:
theta = np.random.randn(2) # theta randomly chosen

# initial mean squared error
MSE = np.linalg.norm(X.dot(theta)-y)/m

#plot the data 
plt.figure(figsize=(15,7))
plt.plot(x,y,'bo')

# plot the linear regression model
m_plot = 100
x_plot = np.linspace(-1,1,100)
X_plot = np.ones((m_plot,2))
X_plot[:,1] = x_plot
y_plot = X_plot.dot(theta)
plt.plot(x_plot,y_plot,'r-')

plt.title('MSE = '+str(MSE),fontsize=15)

In [None]:
# Gradient Descent Step
gradient = (2/m)*X.T.dot(X.dot(theta)-y)
theta = theta - lr*gradient

# Mean squared error
MSE = np.linalg.norm(X.dot(theta)-y)/m

#plot the data 
plt.figure(figsize=(15,7))
plt.plot(x,y,'bo')

# plot the linear regression model
y_plot = X_plot.dot(theta)
plt.plot(x_plot,y_plot,'r-')

plt.title('MSE = '+str(MSE),fontsize=15)

**Warning 1:** if the learning rate is too small: convergence will take a long time

In [None]:
lr = 0.0001

In [None]:
# Initialization:
theta = np.random.randn(2) # theta randomly chosen

# initial mean squared error
MSE = np.linalg.norm(X.dot(theta)-y)/m

#plot the data 
plt.figure(figsize=(15,7))
plt.plot(x,y,'bo')

# plot the linear regression model
m_plot = 100
x_plot = np.linspace(-1,1,100)
X_plot = np.ones((m_plot,2))
X_plot[:,1] = x_plot
y_plot = X_plot.dot(theta)
plt.plot(x_plot,y_plot,'r-')

plt.title('MSE = '+str(MSE),fontsize=15)

In [None]:
# Gradient Descent Step
gradient = (2/m)*X.T.dot(X.dot(theta)-y)
theta = theta - lr*gradient

# Mean squared error
MSE = np.linalg.norm(X.dot(theta)-y)/m

#plot the data 
plt.figure(figsize=(15,7))
plt.plot(x,y,'bo')

# plot the linear regression model
y_plot = X_plot.dot(theta)
plt.plot(x_plot,y_plot,'r-')

plt.title('MSE = '+str(MSE),fontsize=15)

**Warning 2:** If the learning rate is too large, the algorithm may diverge.

In [None]:
lr = 10

In [None]:
# Initialization:
theta = np.random.randn(2) # theta randomly chosen

# initial mean squared error
MSE = np.linalg.norm(X.dot(theta)-y)/m

#plot the data 
plt.figure(figsize=(15,7))
plt.plot(x,y,'bo')

# plot the linear regression model
m_plot = 100
x_plot = np.linspace(-1,1,100)
X_plot = np.ones((m_plot,2))
X_plot[:,1] = x_plot
y_plot = X_plot.dot(theta)
plt.plot(x_plot,y_plot,'r-')

plt.title('MSE = '+str(MSE),fontsize=15)

In [None]:
# Gradient Descent Step
gradient = (2/m)*X.T.dot(X.dot(theta)-y)
theta = theta - lr*gradient

# Mean squared error
MSE = np.linalg.norm(X.dot(theta)-y)/m

#plot the data 
plt.figure(figsize=(15,7))
plt.plot(x,y,'bo')

# plot the linear regression model
y_plot = X_plot.dot(theta)
plt.plot(x_plot,y_plot,'r-')

plt.title('MSE = '+str(MSE),fontsize=15)

## implementation

In [1]:
def linregression_GD(X,y,learning_rate, n_epochs = 100, return_MSE=False):
    '''
    linear regression with Gradient Descent
    
    INPUT: 
    - the matrix X
    - the vector y
    - learning rate
    - epochs: number of Gradient Descent iterations (defualt 100)
    - return_MSE: if True, it returs the mse at each iteration (default False)
    
    OUTPUT:
    - the vector theta
    - MSE: error at each iteration
    '''
    m,n = X.shape # size of data set, number of features
    theta = np.random.randn(n) # random initialization
    
    # initialize MSE vector (only if retur_MSE = True)
    if return_MSE:
        MSE = np.zeros(n_epochs)
    
    # gradient descent iterations
    for epoch in range(n_epochs):
        gradient = (2/m)*X.T.dot(X.dot(theta)-y) # gradient of the mse function
        theta = theta - learning_rate*gradient # update the vector theta
        # compute mean squared error (only if retur_MSE = True)
        if return_MSE:
            MSE[epoch] = np.linalg.norm(y-X.dot(theta))**2/m
            
    return theta, MSE if return_MSE else theta

In [None]:
# check that the function works
theta, MSE = linregression_GD(X,y,learning_rate=0.1,n_epochs=100, return_MSE=True)


plt.figure(figsize=(15,5))
# plot data + fitted line
plt.subplot(1,2,1)
plt.plot(x,y,'bo')
y_plot = X_plot.dot(theta)
plt.plot(x_plot,y_plot,'r-')

# plot the mean squared error as a function of the number of iterations
plt.subplot(1,2,2)
plt.plot(MSE,'o--')
plt.ylabel('error',fontsize=15)
plt.xlabel('epoch',fontsize=15)