In [None]:
# Python ≥3.5 is required
import sys

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)


# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

# Linear regression Do-It-Yourself

In [None]:
import numpy as np

X = 5 * np.random.rand(100, 1) 
y = 1 + 4 * X + 3*np.random.randn(100, 1)

In [None]:
plt.scatter(X, y, color = 'black' )
plt.xlabel("$x$", fontsize=18)
plt.ylabel("$y$", rotation=0, fontsize=18)
plt.axis([0, 5, 0, 20])
plt.show()

In [None]:
x_new = np.linspace(0,5,100)
theta_1 = # try your best
theta_0 = # try your best
y_hyp = theta_1 * x_new + theta_0

In [None]:
plt.scatter(X, y, color = 'black' )
plt.plot(x_new, y_hyp, color = 'red', lw = 4)
plt.xlabel("$x$", fontsize=18)
plt.ylabel("$y$", rotation=0, fontsize=18)
plt.axis([0, 5, 0, 20])
plt.show()

# Linear regression using batch gradient descent

In [None]:
eta = 0.1  # learning rate
n_iterations = 1000
N = 100


theta_1 = np.random.randn(1)
theta_0 = np.random.randn(1) # random initialization

cum_theta1=[]
cum_theta0=[]

In [None]:
print("theta_1: ", theta_1)
print("theta_0: ", theta_0)

In [None]:
for iteration in range(n_iterations):
    cum_theta1.append(theta_1)
    cum_theta0.append(theta_0)
    
    gradient_theta_1 = 1/N * np.sum((theta_1*X + theta_0 - y)*X)
    gradient_theta_0 = 1/N * np.sum((theta_1*X + theta_0 - y))
    
    theta_1 = theta_1 - eta * gradient_theta_1
    theta_0 = theta_0 - eta * gradient_theta_0
    
    

In [None]:
print("theta_1: ", theta_1)
print("theta_0: ", theta_0)

In [None]:
plt.plot(cum_theta1, 'g.')

In [None]:
from sklearn.linear_model import LinearRegression

lin_reg = LinearRegression()
lin_reg.fit(X, y)
print("theta_1: ", lin_reg.coef_)
print("theta_0: ", lin_reg.intercept_) 

In [None]:
ypred_exact = lin_reg.coef_ * X + lin_reg.intercept_

plt.scatter(X, y, color = 'black' )
plt.plot(X, ypred_exact, color = 'red', lw = 4)
plt.xlabel("$x$", fontsize=18)
plt.ylabel("$y$", rotation=0, fontsize=18)
plt.axis([0, 5, 0, 20])
plt.show()