In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, precision_score, recall_score

X_train_small = pd.read_csv('X_train.csv') 
X_test_scaled = pd.read_csv('X_test.csv') 
y_train_small = pd.read_csv('y_train.csv').values.ravel()
y_test = pd.read_csv('y_test.csv').values.ravel()

# Regularization hyperparameters
alpha_values = [0.001, 0.01, 0.1, 1, 10, 100]

ridge_results = []

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, precision_score, recall_score

model = LinearRegression()
model.fit(X_train_small, y_train_small)

y_train_pred_reg = model.predict(X_train_small)
y_test_pred_reg = model.predict(X_test_scaled)


y_train_pred = np.clip(np.round(y_train_pred_reg), 0, 15).astype(int)
y_test_pred = np.clip(np.round(y_test_pred_reg), 0, 15).astype(int)


train_accuracy = accuracy_score(y_train_small, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred, average='weighted', zero_division=0)
recall = recall_score(y_test, y_test_pred, average='weighted', zero_division=0)


train_mse = mean_squared_error(y_train_small, y_train_pred_reg)
test_mse = mean_squared_error(y_test, y_test_pred_reg)
train_r2 = r2_score(y_train_small, y_train_pred_reg)
test_r2 = r2_score(y_test, y_test_pred_reg)


print("=== Baseline Linear Regression (No Regularization, No Transformations) ===")
print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Train MSE: {train_mse:.4f}")
print(f"Test MSE: {test_mse:.4f}")
print(f"Train R²: {train_r2:.4f}")
print(f"Test R²: {test_r2:.4f}")

In [None]:
def evaluate_ridge(X_train, X_test, transformation_name):
    train_acc_list = []
    test_acc_list = []
    train_mse_list = []
    test_mse_list = []
    precision_list = []
    recall_list = []

    print(f"\n=== {transformation_name} ===")

    for alpha in alpha_values:
        model = Ridge(alpha=alpha, random_state=42)
        model.fit(X_train, y_train_small)

        y_train_pred_reg = model.predict(X_train)
        y_test_pred_reg = model.predict(X_test)

        y_train_pred = np.clip(np.round(y_train_pred_reg), 0, 15).astype(int)
        y_test_pred = np.clip(np.round(y_test_pred_reg), 0, 15).astype(int)


        train_acc = accuracy_score(y_train_small, y_train_pred)
        test_acc = accuracy_score(y_test, y_test_pred)
        precision = precision_score(y_test, y_test_pred, average='weighted', zero_division=0)
        recall = recall_score(y_test, y_test_pred, average='weighted', zero_division=0)

        train_mse = mean_squared_error(y_train_small, y_train_pred_reg)
        test_mse = mean_squared_error(y_test, y_test_pred_reg)

        ridge_results.append({
            'Transformation': transformation_name,
            'Alpha': alpha,
            'Train Accuracy': train_acc,
            'Test Accuracy': test_acc,
            'Precision': precision,
            'Recall': recall,
            'Train MSE': train_mse,
            'Test MSE': test_mse
        })

        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        train_mse_list.append(train_mse)
        test_mse_list.append(test_mse)
        precision_list.append(precision)
        recall_list.append(recall)

    plt.figure(figsize=(8,5))
    plt.plot(alpha_values, train_acc_list, marker='o', label='Training Accuracy')
    plt.plot(alpha_values, test_acc_list, marker='o', label='Testing Accuracy')
    plt.xscale('log')
    plt.xlabel('Alpha (log scale)')
    plt.ylabel('Accuracy')
    plt.title(f'Training vs Testing Accuracy: {transformation_name}')
    plt.legend()
    plt.grid(True)
    plt.show()

    plt.figure(figsize=(8,5))
    plt.plot(alpha_values, train_mse_list, marker='o', label='Training MSE')
    plt.plot(alpha_values, test_mse_list, marker='o', label='Testing MSE')
    plt.xscale('log')
    plt.xlabel('Alpha (log scale)')
    plt.ylabel('MSE')
    plt.title(f'Training vs Testing MSE: {transformation_name}')
    plt.legend()
    plt.grid(True)
    plt.show()

    # Table
    prec_recall_table = pd.DataFrame({
        'Alpha': alpha_values,
        'Precision': precision_list,
        'Recall': recall_list
    })
    print(f"\nPrecision and Recall Table ({transformation_name}):")
    display(prec_recall_table)

In [None]:
alpha_values = [0.0001, 0.001, 0.01, 0.1, 1, 10]
evaluate_ridge(X_train_small, X_test_scaled, 'No Transform')

In [None]:
poly2 = PolynomialFeatures(degree=2)
X_train_poly2 = poly2.fit_transform(X_train_small)
X_test_poly2 = poly2.transform(X_test_scaled)

evaluate_ridge(X_train_poly2, X_test_poly2, 'Polynomial Degree 2')

In [None]:
poly3 = PolynomialFeatures(degree=3)
X_train_poly3 = poly3.fit_transform(X_train_small)
X_test_poly3 = poly3.transform(X_test_scaled)

evaluate_ridge(X_train_poly3, X_test_poly3, 'Polynomial Degree 3')

In [None]:
poly4 = PolynomialFeatures(degree=4)
X_train_poly4 = poly4.fit_transform(X_train_small)
X_test_poly4 = poly4.transform(X_test_scaled)

evaluate_ridge(X_train_poly4, X_test_poly4, 'Polynomial Degree 4')

In [None]:
X_train_log = np.log(X_train_small + 1e-5 - np.min(X_train_small))
X_test_log = np.log(X_test_scaled + 1e-5 - np.min(X_train_small))

evaluate_ridge(X_train_log, X_test_log, 'Log Transform')

In [None]:
alpha_values = [0.01, 0.1, 1, 10, 100, 1000]
pca = PCA(n_components=12)
X_train_pca = pca.fit_transform(X_train_small)
X_test_pca = pca.transform(X_test_scaled)

evaluate_ridge(X_train_pca, X_test_pca, 'PCA Transform 12')

In [None]:
ridge_results_df = pd.DataFrame(ridge_results)
from IPython.display import display
display(ridge_results_df)

ridge_results_df.to_csv('ridge_final_metrics_results.csv', index=False)
print("Saved Ridge Regression metrics to 'ridge_final_metrics_results.csv'")