In [1]:
import optuna
import json
import numpy as np
import pandas as pd
from feature_settings_ import *
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import TimeSeriesSplit, train_test_split
from sklearn.metrics import root_mean_squared_error


In [2]:
# ✅ 1. Load your data
# Assume your dataset is a pandas DataFrame
# and the target column is named 'option_return'
# Example: df = pd.read_csv('your_data.csv')
df = pd.read_csv(r"C:\Users\55479\PycharmProjects\MS_thesis\datasets\whole_df_all_features_simple_option_return.csv") # open the time series data

X = df[df.columns[:-1]]     # your features
y = df['option_return']     # your target

print(f"--- Your prediction target is [{df.columns[-1]}] ---") # print the target to check

# ✅ 2. Split into train and test sets (e.g., 80/20)
X_full_train, X_test, y_full_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False  # shuffle=False to preserve time order
)


--- Your prediction target is [option_return] ---


In [3]:
# ✅ 3. Set is_tuning_done switch
is_tuning_done = False

In [None]:
# ✅ 4. Tune hyperparameters
def objective(trial):
    # --------- Choose scaler for data ------------
    scaler_name = trial.suggest_categorical("scaler", ["standard", "minmax", "maxabs", "robust", "none"])
    scaler = {
        "standard": StandardScaler(),
        "minmax": MinMaxScaler(),
        "maxabs": MaxAbsScaler(),
        "robust": RobustScaler(),
        "none": None
    }[scaler_name]

    target_scaler_name = trial.suggest_categorical("target_scaler", ["standard", "minmax", "maxabs", "robust", "none"])
    target_scaler = {
        "standard": StandardScaler(),
        "minmax": MinMaxScaler(),
        "maxabs": MaxAbsScaler(),
        "robust": RobustScaler(),
        "none": None
    }[target_scaler_name]

    # --------- Tune Lasso Hyperparameters ----------
    parameters = {
        "alpha": trial.suggest_float("alpha", 1e-8, 100.0, log=True),
        "fit_intercept": trial.suggest_categorical("fit_intercept", [True, False]),
        "precompute": trial.suggest_categorical("precompute", [True, False]),
        "copy_X": trial.suggest_categorical("copy_X", [True, False]),
        "max_iter": trial.suggest_int("max_iter", 500, 20000),
        "tol": trial.suggest_float("tol", 1e-8, 1e-1, log=True),
        "warm_start": trial.suggest_categorical("warm_start", [True, False]),
        "positive": trial.suggest_categorical("positive", [True, False]),
        "selection": trial.suggest_categorical("selection", ["cyclic", "random"]),
        "random_state": 42  # fixed for reproducibility
    }

    # --------- Time Series Cross Validation ----------
    tscv = TimeSeriesSplit(n_splits=5)
    rmses = []

    for train_idx, val_idx in tscv.split(X_full_train):
        X_train, X_val = X_full_train.iloc[train_idx], X_full_train.iloc[val_idx]
        y_train, y_val = y_full_train.iloc[train_idx], y_full_train.iloc[val_idx]

        # --------- Apply scaling if needed ----------
        if scaler:
            ct = ColumnTransformer(
                transformers=[('scale', scaler, FEATURES_WHOLE_SCALED)],
                remainder='passthrough'  # leave all other columns untouched
            )
            X_train_scaled = pd.DataFrame(
                ct.fit_transform(X_train),
                index=X_train.index
            )
            X_val_scaled = pd.DataFrame(
                ct.transform(X_val),
                index=X_val.index
            )
        else:
            X_train_scaled = X_train
            X_val_scaled = X_val

        if target_scaler:
            y_train_scaled = target_scaler.fit_transform(y_train.to_numpy().reshape(-1, 1)).ravel()
        else:
            y_train_scaled = y_train

        # --------- Train model ----------
        model = Lasso(**parameters)
        model.fit(X_train_scaled, y_train_scaled)

        # --------- Predict & evaluate ----------
        y_pred_scaled = model.predict(X_val_scaled)

        if target_scaler:
            y_pred = target_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
        else:
            y_pred = y_pred_scaled

        rmse = root_mean_squared_error(y_val, y_pred)
        rmses.append(rmse)

    return np.mean(rmses)  # return average cross-validated rmse

# --------- Set the switch ----------
if not is_tuning_done:

    # --------- Run Optuna Study ----------
    study = optuna.create_study(
        direction="minimize",
        sampler=optuna.samplers.TPESampler(seed=42, n_startup_trials=20), # random search 20 trials and then use TPE
        pruner=optuna.pruners.MedianPruner(n_startup_trials=10, n_warmup_steps=2)
    )
    study.optimize(objective, n_trials=150, show_progress_bar=True)

    # --------- Best Result ----------
    print("Best params:", study.best_trial.params)
    print("Best rmse:", study.best_value)

    # --------- Save Best Parameters ----------
    with open("Lasso_whole_tuning_best_parameters.json", "w") as f:
        json.dump(study.best_trial.params, f, indent=4)


[I 2025-07-14 17:42:59,324] A new study created in memory with name: no-name-2f8844fe-0f14-4a4f-9e64-1bcb3b63e832


  0%|          | 0/150 [00:00<?, ?it/s]

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


[I 2025-07-14 17:43:02,685] Trial 0 finished with value: 1.139640567625203 and parameters: {'scaler': 'minmax', 'target_scaler': 'maxabs', 'alpha': 1.6063676259174453e-08, 'fit_intercept': True, 'precompute': True, 'copy_X': False, 'max_iter': 10733, 'tol': 1.0558813779064815e-05, 'warm_start': False, 'positive': False, 'selection': 'random'}. Best is trial 0 with value: 1.139640567625203.
[I 2025-07-14 17:43:05,908] Trial 1 finished with value: 1.2307383861907069 and parameters: {'scaler': 'standard', 'target_scaler': 'none', 'alpha': 1.2133147609286499, 'fit_intercept': True, 'precompute': True, 'copy_X': False, 'max_iter': 1170, 'tol': 0.023186906702901938, 'warm_start': False, 'positive': False, 'selection': 'cyclic'}. Best is trial 0 with value: 1.139640567625203.


  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


[I 2025-07-14 17:43:09,246] Trial 2 finished with value: 1.193759775826528 and parameters: {'scaler': 'standard', 'target_scaler': 'standard', 'alpha': 7.705004503489671e-05, 'fit_intercept': False, 'precompute': True, 'copy_X': True, 'max_iter': 16143, 'tol': 3.3254812672742136e-08, 'warm_start': True, 'positive': True, 'selection': 'cyclic'}. Best is trial 0 with value: 1.139640567625203.
[I 2025-07-14 17:43:11,732] Trial 3 finished with value: 1.1839782614899064 and parameters: {'scaler': 'minmax', 'target_scaler': 'standard', 'alpha': 1.785801651522147e-05, 'fit_intercept': True, 'precompute': True, 'copy_X': False, 'max_iter': 15336, 'tol': 8.490639132761147e-05, 'warm_start': True, 'positive': True, 'selection': 'random'}. Best is trial 0 with value: 1.139640567625203.
[I 2025-07-14 17:43:12,883] Trial 4 finished with value: 1.319966965819377 and parameters: {'scaler': 'none', 'target_scaler': 'maxabs', 'alpha': 7.897952712237135e-06, 'fit_intercept': False, 'precompute': True, '

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


[I 2025-07-14 17:43:21,037] Trial 6 finished with value: 1.1324078641862227 and parameters: {'scaler': 'none', 'target_scaler': 'maxabs', 'alpha': 4.762459848032179e-05, 'fit_intercept': False, 'precompute': True, 'copy_X': True, 'max_iter': 4137, 'tol': 1.929416511888843e-08, 'warm_start': False, 'positive': False, 'selection': 'random'}. Best is trial 6 with value: 1.1324078641862227.
[I 2025-07-14 17:43:24,237] Trial 7 finished with value: 1.2307383861907069 and parameters: {'scaler': 'robust', 'target_scaler': 'maxabs', 'alpha': 0.039796095129452105, 'fit_intercept': True, 'precompute': True, 'copy_X': False, 'max_iter': 18059, 'tol': 0.0002702154649551783, 'warm_start': False, 'positive': False, 'selection': 'cyclic'}. Best is trial 6 with value: 1.1324078641862227.
[I 2025-07-14 17:43:28,628] Trial 8 finished with value: 1.217521135365877 and parameters: {'scaler': 'robust', 'target_scaler': 'maxabs', 'alpha': 0.003071410705373143, 'fit_intercept': True, 'precompute': False, 'cop

  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(
  model = cd_fast.enet_coordinate_descent_gram(


[I 2025-07-14 17:45:55,951] Trial 12 finished with value: 1.1842670414634857 and parameters: {'scaler': 'none', 'target_scaler': 'standard', 'alpha': 0.00024530940950079816, 'fit_intercept': True, 'precompute': True, 'copy_X': True, 'max_iter': 3879, 'tol': 1.2446845497676886e-07, 'warm_start': False, 'positive': True, 'selection': 'random'}. Best is trial 6 with value: 1.1324078641862227.
[I 2025-07-14 17:45:58,723] Trial 13 finished with value: 1.1841126226017251 and parameters: {'scaler': 'standard', 'target_scaler': 'minmax', 'alpha': 2.554394810315551e-08, 'fit_intercept': True, 'precompute': True, 'copy_X': True, 'max_iter': 3877, 'tol': 1.0888336764584933e-05, 'warm_start': False, 'positive': True, 'selection': 'random'}. Best is trial 6 with value: 1.1324078641862227.
[I 2025-07-14 17:48:24,091] Trial 14 finished with value: 1.1353700287259791 and parameters: {'scaler': 'minmax', 'target_scaler': 'robust', 'alpha': 7.615514722769491e-05, 'fit_intercept': True, 'precompute': Fal

  model = cd_fast.enet_coordinate_descent(


[I 2025-07-14 17:51:25,660] Trial 18 finished with value: 1.2202057742077166 and parameters: {'scaler': 'robust', 'target_scaler': 'standard', 'alpha': 1.4971401859629912e-07, 'fit_intercept': False, 'precompute': False, 'copy_X': True, 'max_iter': 17438, 'tol': 3.6742683316655024e-07, 'warm_start': True, 'positive': True, 'selection': 'cyclic'}. Best is trial 6 with value: 1.1324078641862227.
[I 2025-07-14 17:51:28,685] Trial 19 finished with value: 1.3364845757003052 and parameters: {'scaler': 'standard', 'target_scaler': 'minmax', 'alpha': 10.270228468903733, 'fit_intercept': False, 'precompute': True, 'copy_X': False, 'max_iter': 5909, 'tol': 2.0741880033374006e-07, 'warm_start': True, 'positive': True, 'selection': 'random'}. Best is trial 6 with value: 1.1324078641862227.


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[I 2025-07-14 18:10:19,355] Trial 20 finished with value: 1.1383691450478288 and parameters: {'scaler': 'none', 'target_scaler': 'robust', 'alpha': 1.6956448807460853e-06, 'fit_intercept': False, 'precompute': False, 'copy_X': False, 'max_iter': 12026, 'tol': 1.110839461806612e-08, 'warm_start': False, 'positive': False, 'selection': 'random'}. Best is trial 6 with value: 1.1324078641862227.


  model = cd_fast.enet_coordinate_descent(
