# Benchmarks and validation

This notebooks gives benchmark values for Lasso regression with 10 features and performes a gradient descent with Lasso and RMSprop with 10 features for comparison. 

In [54]:
import sys
import os
# Add the project root to sys.path
sys.path.append(os.path.abspath('..'))

import numpy as np
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from Functions.gradients import Gradients
from Functions.gradientdescent_lib import *
from Functions.PLOT import plot

#Runge function 
def runge_function(x):
    return 1 / (1 + 25 * x**2)

np.random.seed(1)
n_samples = 100
x = np.linspace(-1, 1, n_samples).reshape(-1, 1)
y = runge_function(x) + np.random.normal(0, np.sqrt(0.1), size=x.shape)

n_points = 100
n_degree = 10

x_train, x_test, y_train, y_test = generateData(n_points, noise = 0.1)
x_train = x_train.flatten(); x_test = x_test.flatten()

noIntercept = False
X_train = featureMat(x_train, n_degree, noIntercept=noIntercept)
X_test = featureMat(x_test, n_degree, noIntercept=noIntercept)


#Sklearn models 
ols_model = LinearRegression()
ridge_model = Ridge(alpha=0.001)   # small regularization
lasso_model = Lasso(alpha=0.001, max_iter=10000)  # small regularization

#Fit models
ols_model.fit(X_train, y_train)
ridge_model.fit(X_train, y_train)
lasso_model.fit(X_train, y_train)

#Predictions
y_pred_ols = ols_model.predict(X_test)
y_pred_ridge = ridge_model.predict(X_test)
y_pred_lasso = lasso_model.predict(X_test)


#Evaluate models
def evaluate_model(name, y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f"{name}: R^2: {r2:.4f}")

evaluate_model("OLS", y_test, y_pred_ols)
evaluate_model("Ridge", y_test, y_pred_ridge)
evaluate_model("Lasso", y_test, y_pred_lasso)

OLS: R^2: 0.7604
Ridge: R^2: 0.7929
Lasso: R^2: 0.6427


In [55]:
#R2 analytical OLS
theta_a_OLS = theta_analytic_OLS(X_train, y_train)
y_pred = X_test @ theta_a_OLS

R2_a_OLS = R2(y_test, y_pred)
print(f'Closed-form OLS R^2:   {R2_a_OLS:.4f}')

#R2 analytical Ridge
lambd = 0.001
theta_a_Ridge = theta_analytic_Ridge(X_train, y_train, lambd)
y_pred = X_test @ theta_a_Ridge

R2_a_Ridge = R2(y_test, y_pred)
print(f'Closed-form Ridge R^2: {R2_a_Ridge:.4f}')

#Lasso 
epoch = 1000
optimizer = Optimizers.RMSprop(0.01, 0.9, n_degree+int(not noIntercept))
gradient = Gradients.Lasso(lambd)

gd = GradientDescent(n_degree, noIntercept=noIntercept)
gd.setOptimizer(optimizer)
gd.setGradient(gradient)

theta, MSEs, R2, numberEpoch = gd.train(X_train, y_train, X_test, y_test, epoch)
print("Gradient descent R^2: ", R2[numberEpoch])

Closed-form OLS:   0.7604
Closed-form Ridge: 0.7929
Gradient descent R^2:  0.6594100245555866
