In [2]:
import pandas as pd
import numpy as np
import optuna
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error, r2_score

### 1. Load the preprocessed datasets

In [3]:
train = pd.read_csv(r"C:\Users\YILMAZ\Desktop\train_fe_scaled.csv")
test = pd.read_csv(r"C:\Users\YILMAZ\Desktop\train_fe_scaled.csv")
# scaled data sets were used

### 2. Define features and target variable

In [4]:
target = "price_z"
X_train = train.drop(columns=[target])
y_train = train[target]

### 3. Define the objective function for Optuna

In [5]:
def objective(trial):
    alpha = trial.suggest_float('alpha', 1.0, 1000.0, log=True)
    model = Ridge(alpha=alpha, random_state=42)

    cv = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse = -cross_val_score(model, X_train, y_train,
                            cv=cv,
                            scoring='neg_root_mean_squared_error').mean()
    return rmse

### 4. Run Optuna study to find the best alpha (regularization strength)

In [6]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=900)

print("\n Best hyperparameter found by Optuna:")
print(study.best_params)

[I 2025-05-23 06:43:41,856] A new study created in memory with name: no-name-b57c8e3e-00ac-4192-8c42-1b9e8dc9550f
[I 2025-05-23 06:43:42,308] Trial 0 finished with value: 170105.78785147294 and parameters: {'alpha': 1.363743688614485}. Best is trial 0 with value: 170105.78785147294.
[I 2025-05-23 06:43:42,671] Trial 1 finished with value: 170114.88691847533 and parameters: {'alpha': 228.23963381335375}. Best is trial 0 with value: 170105.78785147294.
[I 2025-05-23 06:43:43,055] Trial 2 finished with value: 170105.95310118358 and parameters: {'alpha': 32.08857629024616}. Best is trial 0 with value: 170105.78785147294.
[I 2025-05-23 06:43:43,433] Trial 3 finished with value: 170105.78722004552 and parameters: {'alpha': 2.64461710601004}. Best is trial 3 with value: 170105.78722004552.
[I 2025-05-23 06:43:43,787] Trial 4 finished with value: 170105.9249339821 and parameters: {'alpha': 29.506185831777383}. Best is trial 3 with value: 170105.78722004552.
[I 2025-05-23 06:43:44,141] Trial 5 


 Best hyperparameter found by Optuna:
{'alpha': 3.161125012584329}


### 6. Train the Ridge model using the best alpha

In [7]:
best_model = Ridge(alpha=study.best_params['alpha'], random_state=42)
best_model.fit(X_train, y_train)

### 7. Evaluate model performance using 5-fold cross-validation

In [8]:
cv = KFold(n_splits=5, shuffle=True, random_state=42)

cv_rmse = -cross_val_score(best_model, X_train, y_train, scoring='neg_root_mean_squared_error', cv=cv)
cv_mae = -cross_val_score(best_model, X_train, y_train, scoring='neg_mean_absolute_error', cv=cv)
cv_r2 = cross_val_score(best_model, X_train, y_train, scoring='r2', cv=cv)

print("\n Tuned Ridge Regression 5-Fold Cross-Validation Results:")
print(f"🔹 RMSE: {cv_rmse.mean():,.2f} ± {cv_rmse.std():,.2f}")
print(f"🔹 MAE: {cv_mae.mean():,.2f} ± {cv_mae.std():,.2f}")
print(f"🔹 R²: {cv_r2.mean():.4f} ± {cv_r2.std():.4f}")


 Tuned Ridge Regression 5-Fold Cross-Validation Results:
🔹 RMSE: 170,105.79 ± 2,707.44
🔹 MAE: 118,579.12 ± 626.37
🔹 R²: 0.8436 ± 0.0018
