In [1]:
import numpy as np
import pandas as pd
from scipy.io import loadmat
from scipy.optimize import minimize
import plotly.express as px

# Regularized Linear Regression

In [3]:
raw_data = loadmat("ex5data1.mat")

FileNotFoundError: ignored

In [None]:
X_train = raw_data["X"]
y_train = raw_data["y"]
X_val = raw_data["Xval"]
y_val = raw_data["yval"]
X_test = raw_data["Xtest"]
y_test = raw_data["ytest"]

In [None]:
X_train = np.insert(X_train,0,1,axis=1)
X_val = np.insert(X_val,0,1,axis=1)
X_test = np.insert(X_test,0,1,axis=1)
theta_init = np.array([1,1]).reshape(2,1)

In [None]:
fig = px.scatter(x=X_train[:,1].flatten(),y=y_train.flatten())
fig.show()

In [None]:
def cost(theta:np.ndarray,X:np.ndarray,y:np.ndarray) -> np.ndarray :
    """计算损失函数"""
    n= len(y)
    theta = theta.reshape(-1,1)
    return (h(theta,X)-y).T@(h(theta,X)-y)/(2*n)

def h(theta:np.ndarray,X:np.ndarray) -> np.ndarray :
    theta = theta.reshape(-1,1)

    return X@theta

def regularized_cost(theta:np.ndarray,X:np.ndarray,y:np.ndarray,L=1)-> np.ndarray :
    n= len(y)
    theta = theta.reshape(-1,1)
    theta_no0 = theta[1:]
    regularized_term = (theta_no0.T@theta_no0)/(2*n)*L
    return cost(theta,X,y)+regularized_term
    

In [None]:
regularized_cost(theta_init,X_train,y_train)

array([[303.99319222]])

In [None]:
def gradients(theta:np.ndarray,X:np.ndarray,y:np.ndarray) -> np.ndarray :
    n=len(y)
    theta = theta.reshape(-1,1)
    return (X.T@(h(theta,X)-y))/n

def regularized_gradients(theta:np.ndarray,X:np.ndarray,y:np.ndarray,L=1)-> np.ndarray :
    theta = theta.reshape(-1,1)
    n=len(y)
    regularized_term = theta*L/n
    regularized_term[0] = 0
    return gradients(theta,X,y)+regularized_term

In [None]:
regularized_gradients(theta_init,X_train,y_train)

array([[-15.30301567],
       [598.25074417]])

In [None]:
res = minimize(fun=regularized_cost,x0=theta_init,args=(X_train,y,0),method="TNC",jac=regularized_gradients,options={'disp': True})

In [None]:
final_theta = res.x

In [None]:
final_theta

array([13.08790362,  0.36777923])

In [None]:
line_x = np.array([i for i in np.arange(min(X_train[:,1]),max(X_train[:,1]))]).reshape(-1,1)
line_y = np.insert(line_x,0,1,axis=1)@final_theta
trace = px.line(x=line_x.flatten(),y=line_y.flatten()).data[0]

In [None]:
fig.add_trace(trace)

# Bias-variance

In [None]:
training_cost = []
cv_cost = []
for i in range(len(y_train)):
    res = minimize(fun=regularized_cost,x0=theta_init,args=(X_train[:i+1,:],y_train[:i+1],0),method="TNC",jac=regularized_gradients,options={'disp': True})
    training_cost.append(regularized_cost(res.x,X_train[:i+1,:],y_train[:i+1],0))
    cv_cost.append(regularized_cost(res.x,X_val,y_val,0))


In [None]:
px.line(y=[np.array(training_cost).flatten(),np.array(cv_cost).flatten()])

# 3  Polynomial regression

In [None]:
def poly_features(X,power):
    X_poly = X
    for p in range(2,power+1):
        X_poly = np.insert(X_poly,p-1,np.power(X,p).flatten(),axis=1)
    return X_poly

def normalize_feature(X):
    """Applies function along input axis(default 0) of DataFrame."""
    return np.apply_along_axis(lambda column: (column - np.mean(column)) / np.std(column),axis=0,arr=X)


In [None]:
def prepare(X,power):
    X = poly_features(X,power)
    X = normalize_feature(X)
    X = np.insert(X,0,1,axis=1)
    return X

In [None]:
X_poly = prepare(X_train[:,1].reshape(-1,1),8)
X_val_poly = prepare(X_val[:,1].reshape(-1,1),8)

In [None]:
init_theta = np.ones(9)

In [None]:
res = minimize(fun=regularized_cost,x0=init_theta,args=(X_poly,y_train,0),method="TNC",jac=regularized_gradients,options={'disp': True})

In [None]:
final_theta = res.x

In [None]:
line_x = np.array([i for i in np.arange(min(X_train[:,1]),max(X_train[:,1]))]).reshape(-1,1)
line_x_poly = prepare(line_x,8)
line_y = line_x_poly@final_theta
trace = px.line(x=line_x.flatten(),y=line_y.flatten()).data[0]


In [None]:
fig.add_trace(trace)

In [None]:
training_cost = []
cv_cost = []
for i in range(len(y_train)):
    res = minimize(fun=regularized_cost,x0=init_theta,args=(X_poly[:i+1,:],y_train[:i+1],0),method="TNC",jac=regularized_gradients,options={'disp': True})
    training_cost.append(regularized_cost(res.x,X_poly[:i+1,:],y_train[:i+1],0))
    cv_cost.append(regularized_cost(res.x,X_val_poly,y_val,0))

In [None]:
px.line(y=[np.array(training_cost).flatten(),np.array(cv_cost).flatten()])

In [None]:
training_cost = []
cv_cost = []
for i in range(len(y_train)):
    res = minimize(fun=regularized_cost,x0=init_theta,args=(X_poly[:i+1,:],y_train[:i+1],1),method="TNC",jac=regularized_gradients,options={'disp': True})
    training_cost.append(regularized_cost(res.x,X_poly[:i+1,:],y_train[:i+1],0))
    cv_cost.append(regularized_cost(res.x,X_val_poly,y_val,0))

In [None]:
px.line(y=[np.array(training_cost).flatten(),np.array(cv_cost).flatten()])

In [None]:
training_cost = []
cv_cost = []
for i in range(len(y_train)):
    res = minimize(fun=regularized_cost,x0=init_theta,args=(X_poly[:i+1,:],y_train[:i+1],100),method="TNC",jac=regularized_gradients,options={'disp': True})
    training_cost.append(regularized_cost(res.x,X_poly[:i+1,:],y_train[:i+1],0))
    cv_cost.append(regularized_cost(res.x,X_val_poly,y_val,0))

In [None]:
px.line(y=[np.array(training_cost).flatten(),np.array(cv_cost).flatten()])

# Finding best lambda

In [None]:
l_candidate = [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]
training_cost = []
cv_cost = []
for l in l_candidate:
    res = minimize(fun=regularized_cost,x0=init_theta,args=(X_poly,y_train,l),method="TNC",jac=regularized_gradients,options={'disp': True})
    training_cost.append(regularized_cost(res.x,X_poly,y_train,0))
    cv_cost.append(regularized_cost(res.x,X_val_poly,y_val,0))

In [None]:
px.line(x=l_candidate,y=[np.array(training_cost).flatten(),np.array(cv_cost).flatten()])

In [None]:
# best cv I got from all those candidates
l_candidate[np.argmin(cv_cost)]

1

In [None]:
X_test_poly = prepare(X_test[:,1].reshape(-1,1),8)


In [None]:
# use test data to compute the cost
for l in l_candidate:
    res = minimize(fun=regularized_cost,x0=init_theta,args=(X_poly,y_train,l),method="TNC",jac=regularized_gradients,options={'disp': True})
    print('test cost(l={}) = {}'.format(l, cost(res.x, X_test_poly, y_test)))

test cost(l=0) = [[10.27651822]]
test cost(l=0.001) = [[11.08237423]]
test cost(l=0.003) = [[11.30370888]]
test cost(l=0.01) = [[10.98820301]]
test cost(l=0.03) = [[10.21904699]]
test cost(l=0.1) = [[8.95399122]]
test cost(l=0.3) = [[7.74519971]]
test cost(l=1) = [[7.85157739]]
test cost(l=3) = [[11.77033423]]
test cost(l=10) = [[26.89407638]]


Modify from colab