In [1]:
import pandas as pd
import numpy as np
import optuna
from sklearn.linear_model import Lasso
from sklearn.model_selection import cross_val_score, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

In [2]:
train = pd.read_csv(r"C:\Users\YILMAZ\Desktop\train_fe_scaled.csv")
test = pd.read_csv(r"C:\Users\YILMAZ\Desktop\train_fe_scaled.csv")

target = "price_z"
X_train = train.drop(columns=[target])
y_train = train[target]

In [3]:
def objective(trial):
    alpha = trial.suggest_float('alpha', 1e-4, 100.0, log=True)
    model = Lasso(alpha=alpha, random_state=42, max_iter=10000)

    cv = KFold(n_splits=5, shuffle=True, random_state=42)
    rmse = -cross_val_score(model, X_train, y_train,
                            cv=cv,
                            scoring='neg_root_mean_squared_error').mean()
    return rmse

In [4]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=900)

print("\n Best hyperparameters found by Optuna:")
print(study.best_params)

[I 2025-05-21 20:56:53,744] A new study created in memory with name: no-name-fb1b02b1-c97a-4520-b746-8b90b1ce1dcb
[I 2025-05-21 20:56:57,501] Trial 0 finished with value: 170107.08567757934 and parameters: {'alpha': 74.89033720052088}. Best is trial 0 with value: 170107.08567757934.
[I 2025-05-21 20:57:05,072] Trial 1 finished with value: 170105.7853029877 and parameters: {'alpha': 0.20570085289969695}. Best is trial 1 with value: 170105.7853029877.
[I 2025-05-21 20:57:12,189] Trial 2 finished with value: 170105.78466547487 and parameters: {'alpha': 0.2391572424562645}. Best is trial 2 with value: 170105.78466547487.
[I 2025-05-21 20:57:17,555] Trial 3 finished with value: 170105.70709634395 and parameters: {'alpha': 4.432628283793676}. Best is trial 3 with value: 170105.70709634395.
[I 2025-05-21 20:57:22,776] Trial 4 finished with value: 170105.6957713697 and parameters: {'alpha': 5.150105385210556}. Best is trial 4 with value: 170105.6957713697.
[I 2025-05-21 20:57:30,882] Trial 5 f


 Best hyperparameters found by Optuna:
{'alpha': 19.984077908599467}


In [5]:
best_alpha = study.best_params['alpha']
lasso = Lasso(alpha=best_alpha, random_state=42, max_iter=10000)
lasso.fit(X_train, y_train)

In [6]:
cv = KFold(n_splits=5, shuffle=True, random_state=42)
cv_rmse = -cross_val_score(lasso, X_train, y_train, scoring='neg_root_mean_squared_error', cv=cv)
cv_mae = -cross_val_score(lasso, X_train, y_train, scoring='neg_mean_absolute_error', cv=cv)
cv_r2 = cross_val_score(lasso, X_train, y_train, scoring='r2', cv=cv)

print("\n Tuned Lasso 5-Fold Cross-Validation Results:")
print(f"🔹 RMSE: {cv_rmse.mean():,.2f} ± {cv_rmse.std():,.2f}")
print(f"🔹 MAE: {cv_mae.mean():,.2f} ± {cv_mae.std():,.2f}")
print(f"🔹 R²: {cv_r2.mean():.4f} ± {cv_r2.std():.4f}")


 Tuned Lasso 5-Fold Cross-Validation Results:
🔹 RMSE: 170,105.57 ± 2,707.10
🔹 MAE: 118,567.19 ± 625.02
🔹 R²: 0.8436 ± 0.0018
