# Train-Test Split

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error
from scipy.optimize import minimize

# Load the Wine Quality dataset from a URL
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=";")

# Split the data into features and target
X = df.drop("quality", axis=1).values
y = df["quality"].values

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the loss function for Ridge regression
def ridge_loss(log_alpha, X, y):
    alpha = np.exp(log_alpha).item()  # Convert log(alpha) back to alpha
    model = Ridge(alpha=alpha)
    model.fit(X, y)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)
    penalty = alpha * np.sum(model.coef_ ** 2)
    return mse + penalty

# Define the loss function for Lasso regression
def lasso_loss(log_alpha, X, y):
    alpha = np.exp(log_alpha).item()  # Convert log(alpha) back to alpha
    model = Lasso(alpha=alpha, max_iter=10000)
    model.fit(X, y)
    y_pred = model.predict(X)
    mse = mean_squared_error(y, y_pred)
    penalty = alpha * np.sum(np.abs(model.coef_))
    return mse + penalty

# Optimize alpha for Ridge
ridge_result = minimize(ridge_loss, x0=np.log(1.0), args=(X_train, y_train), bounds=[(None, None)])
optimal_alpha_ridge = np.exp(ridge_result.x[0])

# Optimize alpha for Lasso
lasso_result = minimize(lasso_loss, x0=np.log(1.0), args=(X_train, y_train), bounds=[(None, None)])
optimal_alpha_lasso = np.exp(lasso_result.x[0])

# Fit Ridge model with optimal alpha
ridge_model = Ridge(alpha=optimal_alpha_ridge)
ridge_model.fit(X_train, y_train)
ridge_train_mse = mean_squared_error(y_train, ridge_model.predict(X_train))
ridge_test_mse = mean_squared_error(y_test, ridge_model.predict(X_test))

# Fit Lasso model with optimal alpha
lasso_model = Lasso(alpha=optimal_alpha_lasso, max_iter=10000)
lasso_model.fit(X_train, y_train)
lasso_train_mse = mean_squared_error(y_train, lasso_model.predict(X_train))
lasso_test_mse = mean_squared_error(y_test, lasso_model.predict(X_test))

# Print results
print(f"Optimal Ridge alpha: {optimal_alpha_ridge}")
print(f"Ridge Train MSE: {ridge_train_mse}")
print(f"Ridge Test MSE: {ridge_test_mse}")
print()
print(f"Optimal Lasso alpha: {optimal_alpha_lasso}")
print(f"Lasso Train MSE: {lasso_train_mse}")
print(f"Lasso Test MSE: {lasso_test_mse}")

Optimal Ridge alpha: 4.843104261147604e-05
Ridge Train MSE: 0.42419115731399054
Ridge Test MSE: 0.39002514457300314

Optimal Lasso alpha: 1.0
Lasso Train MSE: 0.6505925698157706
Lasso Test MSE: 0.6571600689645265


# Cross-Validated - Sklearn

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error
from scipy.optimize import minimize

# Load the Wine Quality dataset from a URL
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=";")

# Split the data into features and target
X = df.drop("quality", axis=1).values
y = df["quality"].values

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define the cross-validation function
def cross_val_score_mse(model, X, y, cv=5):
    """
    Perform K-Fold cross-validation and compute the mean squared error (MSE).
    Args:
        model: The regression model (Ridge or Lasso).
        X (ndarray): Features.
        y (ndarray): Target.
        cv (int): Number of folds for cross-validation.
    Returns:
        float: Mean cross-validated MSE.
    """
    kf = KFold(n_splits=cv, shuffle=True, random_state=42)
    mse_scores = []

    for train_idx, val_idx in kf.split(X):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        # Fit the model and predict
        model.fit(X_train, y_train)
        y_pred = model.predict(X_val)

        # Compute MSE
        mse_scores.append(mean_squared_error(y_val, y_pred))

    return np.mean(mse_scores)

# Define the loss function for Ridge regression with cross-validation
def ridge_loss(log_alpha, X, y, cv=5):
    """
    Loss function for Ridge regression using log(alpha) for optimization.
    Args:
        log_alpha (float): Log of regularization parameter alpha.
        X (ndarray): Features.
        y (ndarray): Target.
        cv (int): Number of folds for cross-validation.
    Returns:
        float: Cross-validated MSE.
    """
    alpha = np.exp(log_alpha).item()  # Convert log(alpha) back to alpha
    model = Ridge(alpha=alpha)
    return cross_val_score_mse(model, X, y, cv)

# Define the loss function for Lasso regression with cross-validation
def lasso_loss(log_alpha, X, y, cv=5):
    """
    Loss function for Lasso regression using log(alpha) for optimization.
    Args:
        log_alpha (float): Log of regularization parameter alpha.
        X (ndarray): Features.
        y (ndarray): Target.
        cv (int): Number of folds for cross-validation.
    Returns:
        float: Cross-validated MSE.
    """
    alpha = np.exp(log_alpha).item()  # Convert log(alpha) back to alpha
    model = Lasso(alpha=alpha, max_iter=10000)
    return cross_val_score_mse(model, X, y, cv)

# Optimize alpha for Ridge
ridge_result = minimize(ridge_loss, x0=np.log(1.0), args=(X, y), bounds=[(None, None)])
optimal_alpha_ridge = np.exp(ridge_result.x[0])

# Optimize alpha for Lasso
lasso_result = minimize(lasso_loss, x0=np.log(1.0), args=(X, y), bounds=[(None, None)])
optimal_alpha_lasso = np.exp(lasso_result.x[0])

# Final models with optimal alphas
ridge_model = Ridge(alpha=optimal_alpha_ridge)
lasso_model = Lasso(alpha=optimal_alpha_lasso, max_iter=10000)

# Fit the models on the full dataset
ridge_model.fit(X, y)
lasso_model.fit(X, y)

# Compute cross-validated MSE for Ridge and Lasso
cv_mse_ridge = cross_val_score_mse(ridge_model, X, y, cv=5)
cv_mse_lasso = cross_val_score_mse(lasso_model, X, y, cv=5)

# Print results
print(f"Optimal Ridge alpha: {optimal_alpha_ridge:.4f}")
print(f"Cross-validated MSE for Ridge: {cv_mse_ridge:.4f}")
print("\n")
print("Ridge Coefficients:")
print(ridge_model.coef_)
print("\n")

print(f"Optimal Lasso alpha: {optimal_alpha_lasso:.4f}")
print(f"Cross-validated MSE for Lasso: {cv_mse_lasso:.4f}")
print("\n")
print("Lasso Coefficients:")
print(lasso_model.coef_)

Optimal Ridge alpha: 83.8863
Cross-validated MSE for Ridge: 0.4278


Ridge Coefficients:
[ 0.03526939 -0.15991515 -0.00133552  0.02022562 -0.08424231  0.0392259
 -0.10756028 -0.05997531 -0.07499151  0.15967092  0.25593338]


Optimal Lasso alpha: 1.0000
Cross-validated MSE for Lasso: 0.6523


Lasso Coefficients:
[ 0. -0.  0.  0. -0. -0. -0. -0. -0.  0.  0.]


# Cross-Validated - Statsmodels

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from scipy.optimize import minimize
import statsmodels.api as sm

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=";")

# Split the data into features and target
X = df.drop("quality", axis=1).values
y = df["quality"].values

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Add a constant for statsmodels
X = sm.add_constant(X)

# Define the cross-validation function for Statsmodels
def cross_val_score_mse_statsmodels(X, y, alpha, penalty, cv=5):
    """
    Compute cross-validated MSE using Statsmodels with regularization.
    Args:
        X (ndarray): Features.
        y (ndarray): Target.
        alpha (float): Regularization parameter.
        penalty (str): 'l1' for Lasso, 'l2' for Ridge.
        cv (int): Number of folds for cross-validation.
    Returns:
        float: Mean cross-validated MSE.
    """
    kf = KFold(n_splits=cv, shuffle=True, random_state=42)
    mse_scores = []

    for train_idx, val_idx in kf.split(X):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        # Fit the regularized model
        model = sm.OLS(y_train, X_train)
        result = model.fit_regularized(alpha=alpha, L1_wt=1.0 if penalty == 'l1' else 0.0)

        # Predict on validation set
        y_pred = result.predict(X_val)

        # Compute MSE
        mse_scores.append(mean_squared_error(y_val, y_pred))

    return np.mean(mse_scores)

# Define the loss function for Ridge (L2 regularization)
def ridge_loss(log_alpha, X, y, cv=5):
    """
    Loss function for Ridge regularization.
    Args:
        log_alpha (float): Log of regularization parameter alpha.
        X (ndarray): Features.
        y (ndarray): Target.
        cv (int): Number of folds for cross-validation.
    Returns:
        float: Cross-validated MSE.
    """
    alpha = np.exp(log_alpha).item()  # Convert log(alpha) back to scalar
    return cross_val_score_mse_statsmodels(X, y, alpha, penalty='l2', cv=cv)

# Define the loss function for Lasso (L1 regularization)
def lasso_loss(log_alpha, X, y, cv=5):
    """
    Loss function for Lasso regularization.
    Args:
        log_alpha (float): Log of regularization parameter alpha.
        X (ndarray): Features.
        y (ndarray): Target.
        cv (int): Number of folds for cross-validation.
    Returns:
        float: Cross-validated MSE.
    """
    alpha = np.exp(log_alpha).item()  # Convert log(alpha) back to scalar
    return cross_val_score_mse_statsmodels(X, y, alpha, penalty='l1', cv=cv)

# Optimize alpha for Ridge
ridge_result = minimize(ridge_loss, x0=np.log(1.0), args=(X, y), bounds=[(None, None)])
optimal_alpha_ridge = np.exp(ridge_result.x[0])

# Optimize alpha for Lasso
lasso_result = minimize(lasso_loss, x0=np.log(1.0), args=(X, y), bounds=[(None, None)])
optimal_alpha_lasso = np.exp(lasso_result.x[0])

# Final Ridge and Lasso models
ridge_model = sm.OLS(y, X).fit_regularized(alpha=optimal_alpha_ridge, L1_wt=0.0)
lasso_model = sm.OLS(y, X).fit_regularized(alpha=optimal_alpha_lasso, L1_wt=1.0)

# Compute cross-validated MSE for Ridge and Lasso
cv_mse_ridge = cross_val_score_mse_statsmodels(X, y, alpha=optimal_alpha_ridge, penalty='l2', cv=5)
cv_mse_lasso = cross_val_score_mse_statsmodels(X, y, alpha=optimal_alpha_lasso, penalty='l1', cv=5)

# Print results
print(f"Optimal Ridge alpha: {optimal_alpha_ridge:.4f}")
print(f"Cross-validated MSE for Ridge: {cv_mse_ridge:.4f}")
print("\n")
print("Ridge Coefficients:")
print(ridge_model.params)
print("\n")

print(f"Optimal Lasso alpha: {optimal_alpha_lasso:.4f}")
print(f"Cross-validated MSE for Lasso: {cv_mse_lasso:.4f}")
print("\n")
print("Lasso Coefficients:")
print(lasso_model.params)

Optimal Ridge alpha: 0.0005
Cross-validated MSE for Ridge: 0.4288


Ridge Coefficients:
[ 5.63320946  0.04372845 -0.19382464 -0.03533395  0.02310372 -0.08814925
  0.04552411 -0.10728497 -0.03407612 -0.0635953   0.15524334  0.29391203]


Optimal Lasso alpha: 0.0093
Cross-validated MSE for Lasso: 0.4310


Lasso Coefficients:
[ 5.62676592  0.05021811 -0.18819774  0.          0.00817306 -0.06915114
  0.01678569 -0.07535471 -0.02397234  0.          0.14025734  0.28519736]


# Cross-Validated Elastic Net

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from scipy.optimize import minimize
import statsmodels.api as sm

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
df = pd.read_csv(url, sep=";")

# Split the data into features and target
X = df.drop("quality", axis=1).values
y = df["quality"].values

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Add a constant for statsmodels
X = sm.add_constant(X)

# Define the cross-validation function for Statsmodels
def cross_val_score_mse_statsmodels(X, y, alpha, L1_wt, cv=5):
    """
    Compute cross-validated MSE using Statsmodels with regularization.
    Args:
        X (ndarray): Features.
        y (ndarray): Target.
        alpha (float): Regularization parameter.
        L1_wt (float): Weight for L1 regularization (0.0 = Ridge, 1.0 = Lasso).
        cv (int): Number of folds for cross-validation.
    Returns:
        float: Mean cross-validated MSE.
    """
    kf = KFold(n_splits=cv, shuffle=True, random_state=42)
    mse_scores = []

    for train_idx, val_idx in kf.split(X):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        # Fit the regularized model
        model = sm.OLS(y_train, X_train)
        result = model.fit_regularized(alpha=alpha, L1_wt=L1_wt)

        # Predict on validation set
        y_pred = result.predict(X_val)

        # Compute MSE
        mse_scores.append(mean_squared_error(y_val, y_pred))

    return np.mean(mse_scores)

# Define the loss function for optimizing alpha and L1_wt
def elastic_net_loss(params, X, y, cv=5):
    """
    Loss function for Elastic Net regularization.
    Args:
        params (ndarray): [log_alpha, L1_wt] where:
            log_alpha: Logarithm of regularization parameter alpha.
            L1_wt: Weight for L1 regularization (0.0 to 1.0).
        X (ndarray): Features.
        y (ndarray): Target.
        cv (int): Number of folds for cross-validation.
    Returns:
        float: Cross-validated MSE.
    """
    log_alpha, L1_wt = params
    alpha = np.exp(log_alpha)  # Convert log_alpha back to alpha
    L1_wt = np.clip(L1_wt, 0, 1)  # Ensure L1_wt is between 0 and 1
    return cross_val_score_mse_statsmodels(X, y, alpha, L1_wt, cv)

# Initial guess for log_alpha and L1_wt
initial_params = [np.log(1.0), 0.5]  # Start with alpha=1.0 and L1_wt=0.5

# Optimize log_alpha and L1_wt
result = minimize(elastic_net_loss, x0=initial_params, args=(X, y), bounds=[(None, None), (0, 1)])
optimal_log_alpha, optimal_L1_wt = result.x
optimal_alpha = np.exp(optimal_log_alpha)

# Fit final model with optimal parameters
final_model = sm.OLS(y, X).fit_regularized(alpha=optimal_alpha, L1_wt=optimal_L1_wt)

# Compute cross-validated MSE for the final model
cv_mse_final = cross_val_score_mse_statsmodels(X, y, alpha=optimal_alpha, L1_wt=optimal_L1_wt, cv=5)

# Print results
print(f"Optimal alpha: {optimal_alpha:.4f}")
print(f"Optimal L1_wt: {optimal_L1_wt:.4f}")
print(f"Cross-validated MSE for Elastic Net: {cv_mse_final:.4f}")
print("\nElastic Net Coefficients:")
print(final_model.params)

Optimal alpha: 0.0221
Optimal L1_wt: 1.0000
Cross-validated MSE for Elastic Net: 0.4309

Elastic Net Coefficients:
[ 5.61390416  0.02424287 -0.18927093  0.          0.         -0.05025426
  0.         -0.05279259  0.          0.          0.12057652  0.29225094]
