In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [None]:
def back_sub(R, b_tilde):
    n = R.shape[0]
    x = np.zeros(n)
    for i in reversed(range(n)):
        x[i] = b_tilde[i]
        for j in range(i+1, n):
            x[i] = x[i] - R[i][j]*x[j]
        x[i] = x[i]/R[i,i]
    return x

def solve_via_backsub(A, b):
    Q, R = np.linalg.qr(A)
    b_tilde = Q.T @ b
    return back_sub(R, b_tilde)
    

In [None]:
def solve_ridge(A, b, lambdas):
    n = len(b)
    A_tilde = np.vstack((A, np.sqrt(lambdas)*np.eye(A.shape[1])))
    b_tilde = np.hstack((b, np.zeros(A.shape[1])))
    return solve_via_backsub(A_tilde, b_tilde)


In [None]:
def solve_ridge2(A, b, lambdas):
    return np.linalg.inv(A.T @ A + lambdas*np.eye(A.shape[1]))@A.T @ b

In [None]:
college = pd.read_csv('./datasets/College.csv')
college['isPrivate'] = college['Private'].replace({'Yes': 1, 'No': 0})
college.head()

In [None]:
Io = np.random.permutation(777)
test = np.array(Io[:100])
train = np.array(Io[100:])

In [None]:
folds = 5
I = np.array_split(np.random.permutation(train), folds)
trainrmse = np.zeros(5)
testrmse = np.zeros(5)
pa = ['Accept', 'Enroll', 'Top10perc','Top25perc', 'F.Undergrad', 'P.Undergrad', 'Outstate', 'Room.Board',
       'Books', 'Personal', 'PhD', 'Terminal', 'S.F.Ratio', 'perc.alumni',
       'Expend', 'Grad.Rate', 'isPrivate']
coeff = np.zeros((len(pa), n))


In [None]:
A = college[pa].iloc[train]
b = college['Apps'].iloc[train]
Atest = college[pa].iloc[test]
btest = college['Apps'].iloc[test]
#A1 = np.column_stack((np.ones(len(b)), A))
n = 100
lambdas = np.power(10, np.linspace(-6, 6, n))


In [None]:
for k in range(n):
    coeff[: ,k] = solve_ridge(A, b, lambdas[k])
plt.figure(figsize=(20, 15))
plt.plot(lambdas, coeff.T)
plt.xscale('log')
plt.xlabel('lambdas')
plt.xlim(1e-6, 1e+6)
plt.legend(['Accept', 'Enroll', 'Top10perc','Top25perc', 'F.Undergrad', 'P.Undergrad', 'Outstate', 'Room.Board',
       'Books', 'Personal', 'PhD', 'Terminal', 'S.F.Ratio', 'perc.alumni',
       'Expend', 'Grad.Rate', 'isPrivate'])
plt.show()


In [None]:
tester = np.zeros(n)
for k in range(n):
    tester[k] = (sum((Atest @ coeff[:,k]-btest)**2)/len(btest))**0.5

In [None]:
plt.figure(figsize=(10,10))
plt.plot(lambdas, tester)
plt.xscale('log')
plt.xlabel(lambdas)

plt.xlim(1e-6,1e+6)

plt.show()