In [1]:
import pandas as pd
import numpy as np
import optuna
from sklearn.linear_model import Lasso
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

### 1. Load the preprocessed datasets and define features, target variable

In [2]:
train = pd.read_csv(r"C:\Users\YILMAZ\Desktop\train_fe_scaled.csv")
test = pd.read_csv(r"C:\Users\YILMAZ\Desktop\train_fe_scaled.csv")

target = "price_z"
X_train = train.drop(columns=[target])
y_train = train[target]

### 2. Define the objective function for Optuna

In [3]:
def objective(trial):
    alpha = trial.suggest_float('alpha', 1e-4, 100.0, log=True)
    model = Lasso(alpha=alpha, random_state=42, max_iter=10000)

    cv = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse = -cross_val_score(model, X_train, y_train,
                            cv=cv,
                            scoring='neg_root_mean_squared_error').mean()
    return rmse

### 3. Run Optuna study to find the best alpha 

In [4]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=900)

print("\n Best hyperparameters found by Optuna:")
print(study.best_params)

[I 2025-11-04 15:37:41,149] A new study created in memory with name: no-name-0252588f-3a42-48c5-b63e-a56b9b2f5f89
[I 2025-11-04 15:37:51,120] Trial 0 finished with value: 170105.77948214108 and parameters: {'alpha': 0.5137268470066186}. Best is trial 0 with value: 170105.77948214108.
[I 2025-11-04 15:37:56,977] Trial 1 finished with value: 170106.93236153852 and parameters: {'alpha': 72.23945199350247}. Best is trial 0 with value: 170105.77948214108.
[I 2025-11-04 15:38:03,738] Trial 2 finished with value: 170105.57318986676 and parameters: {'alpha': 18.494665968964526}. Best is trial 2 with value: 170105.57318986676.
[I 2025-11-04 15:38:13,822] Trial 3 finished with value: 170105.7760074635 and parameters: {'alpha': 0.7006531851676419}. Best is trial 2 with value: 170105.57318986676.
[I 2025-11-04 15:38:29,460] Trial 4 finished with value: 170105.7892506219 and parameters: {'alpha': 0.0002678461969279075}. Best is trial 2 with value: 170105.57318986676.
[I 2025-11-04 15:38:38,820] Tri


 Best hyperparameters found by Optuna:
{'alpha': 18.85850161686597}


### 4. Train the Lasso model using the best alpha

In [5]:
best_alpha = study.best_params['alpha']
lasso = Lasso(alpha=best_alpha, random_state=42, max_iter=10000)
lasso.fit(X_train, y_train)

### 5. Evaluate model performance using 5-fold cross-validation

In [6]:
cv = KFold(n_splits=5, shuffle=True, random_state=42)
cv_rmse = -cross_val_score(lasso, X_train, y_train, scoring='neg_root_mean_squared_error', cv=cv)
cv_mae = -cross_val_score(lasso, X_train, y_train, scoring='neg_mean_absolute_error', cv=cv)
cv_r2 = cross_val_score(lasso, X_train, y_train, scoring='r2', cv=cv)

print("\n Tuned Lasso 5-Fold Cross-Validation Results:")
print(f" RMSE: {cv_rmse.mean():,.2f} ± {cv_rmse.std():,.2f}")
print(f" MAE: {cv_mae.mean():,.2f} ± {cv_mae.std():,.2f}")
print(f" R²: {cv_r2.mean():.4f} ± {cv_r2.std():.4f}")


 Tuned Lasso 5-Fold Cross-Validation Results:
 RMSE: 170,105.57 ± 2,707.10
 MAE: 118,567.99 ± 625.10
 R²: 0.8436 ± 0.0018
