In [1]:
import pandas as pd
import numpy as np
import optuna
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error, r2_score

### 1. Load the preprocessed datasets

In [2]:
train = pd.read_csv(r"C:\Users\YILMAZ\Desktop\train_fe_scaled.csv")
test = pd.read_csv(r"C:\Users\YILMAZ\Desktop\train_fe_scaled.csv")
# scaled data sets were used

### 2. Define features and target variable

In [3]:
target = "price_z"
X_train = train.drop(columns=[target])
y_train = train[target]

### 3. Define the objective function for Optuna

In [4]:
def objective(trial):
    alpha = trial.suggest_float('alpha', 1.0, 1000.0, log=True)
    model = Ridge(alpha=alpha, random_state=42)

    cv = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse = -cross_val_score(model, X_train, y_train,
                            cv=cv,
                            scoring='neg_root_mean_squared_error').mean()
    return rmse

### 4. Run Optuna study to find the best alpha (regularization strength)

In [5]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=900)

print("\n Best hyperparameter found by Optuna:")
print(study.best_params)

[I 2025-11-04 15:02:58,082] A new study created in memory with name: no-name-d9923da7-1d66-42f9-a1ea-352f06dd14f0
[I 2025-11-04 15:02:58,627] Trial 0 finished with value: 170107.64636189083 and parameters: {'alpha': 101.6752851378999}. Best is trial 0 with value: 170107.64636189083.
[I 2025-11-04 15:02:59,102] Trial 1 finished with value: 170105.7876609606 and parameters: {'alpha': 1.636953657506596}. Best is trial 1 with value: 170105.7876609606.
[I 2025-11-04 15:02:59,507] Trial 2 finished with value: 170105.7879489374 and parameters: {'alpha': 1.2384967338390285}. Best is trial 1 with value: 170105.7876609606.
[I 2025-11-04 15:02:59,928] Trial 3 finished with value: 170105.79774066826 and parameters: {'alpha': 10.469363354372394}. Best is trial 1 with value: 170105.7876609606.
[I 2025-11-04 15:03:00,356] Trial 4 finished with value: 170105.81482999126 and parameters: {'alpha': 14.954724468401034}. Best is trial 1 with value: 170105.7876609606.
[I 2025-11-04 15:03:00,749] Trial 5 fin


 Best hyperparameter found by Optuna:
{'alpha': 3.2715013713811003}


### 5. Train the Ridge model using the best alpha

In [6]:
best_model = Ridge(alpha=study.best_params['alpha'], random_state=42)
best_model.fit(X_train, y_train)

### 6. Evaluate model performance using 5-fold cross-validation

In [8]:
cv = KFold(n_splits=5, shuffle=True, random_state=42)

cv_rmse = -cross_val_score(best_model, X_train, y_train, scoring='neg_root_mean_squared_error', cv=cv)
cv_mae = -cross_val_score(best_model, X_train, y_train, scoring='neg_mean_absolute_error', cv=cv)
cv_r2 = cross_val_score(best_model, X_train, y_train, scoring='r2', cv=cv)

print("\n Tuned Ridge Regression 5-Fold Cross-Validation Results:")
print(f" RMSE: {cv_rmse.mean():,.2f} ± {cv_rmse.std():,.2f}")
print(f" MAE: {cv_mae.mean():,.2f} ± {cv_mae.std():,.2f}")
print(f" R²: {cv_r2.mean():.4f} ± {cv_r2.std():.4f}")


 Tuned Ridge Regression 5-Fold Cross-Validation Results:
 RMSE: 170,105.79 ± 2,707.45
 MAE: 118,579.04 ± 626.37
 R²: 0.8436 ± 0.0018
