In [None]:
# Install dependencies
#%pip install vectorbt
#%pip install "dask[distributed]" --upgrade
# %pip install dask
# Create local directories
#!mkdir -p $HOME/kohv04/lib
#!mkdir -p $HOME/kohv04/lib/python3.10/site-packages

# Download and extract libta-lib to local directory
#!curl -L https://anaconda.org/conda-forge/libta-lib/0.4.0/download/linux-64/libta-lib-0.4.0-h166bdaf_1.tar.bz2 | tar xj -C $HOME/kohv04/lib --strip-components=1

# Download ta-lib Python package
#!wget https://anaconda.org/conda-forge/ta-lib/0.4.19/download/linux-64/ta-lib-0.4.19-py310hde88566_4.tar.bz2

# Extract ta-lib to local site-packages
#!tar -xjf ta-lib-0.4.19-py310hde88566_4.tar.bz2 -C $HOME/kohv04/lib/python3.10/site-packages --strip-components=2 lib/python3.10/site-packages/talib

# Clean up
#!rm ta-lib-0.4.19-py310hde88566_4.tar.bz2

In [None]:
# Setting LD_LIBRARY_PATH for libta-lib (required for talib)
import os
os.environ['LD_LIBRARY_PATH'] = f"{os.environ.get('HOME')}/kohv04/lib:{os.environ.get('LD_LIBRARY_PATH', '')}"

# Appending talib path to sys.path (instead of modifying PYTHONPATH)
import sys
sys.path.append('/home/jupyter-kohv04@vse.cz/kohv04/lib/python3.10/site-packages')

from dask.distributed import Client, LocalCluster
from dask.diagnostics import ProgressBar
import os
import dask.dataframe as dd
import pandas as pd
try:
    import talib
    print(f"TA-Lib version: {talib.__version__}")
except ImportError as e:
    print(f"Failed to import TA-Lib: {e}")
import os
import warnings
import glob
import json
from typing import Optional, Tuple, Dict, List
import dask
import numpy as np
import pandas as pd
import vectorbt as vbt
vbt.settings.caching['enabled'] = False
from numba import njit
import warnings
import traceback
from datetime import datetime, time
import io
import contextlib
import traceback
import dataclasses
from itertools import product
from vectorbt.portfolio.enums import SizeType 

TA-Lib version: 0.4.19


In [None]:
# --- Dask Cluster ---
print("Setting up Dask cluster...")
temp_dir = "/tmp/dask-spill"
os.makedirs(temp_dir, exist_ok=True)

cluster = LocalCluster(
    n_workers=10,
    threads_per_worker=1,
    memory_limit="33GB",
    processes=True,
    local_directory=temp_dir
)
client = Client(cluster)
ProgressBar().register()
print("Dask Dashboard:", client.dashboard_link)

dask.config.set({
    "temporary-directory": temp_dir,
    "distributed.worker.memory.spill": 0.8,
    "distributed.worker.memory.target": 0.7,
    "distributed.worker.memory.pause": 0.9,
    "distributed.worker.memory.terminate": 0.95,
})

warnings.filterwarnings(
    "ignore",
    category=pd.errors.SettingWithCopyWarning,
    module="pandas.core.frame"
)

# --- Configuration and Constants ---
BASE_DIR = "/home/jupyter-kohv04@vse.cz/kohv04/backtesting_final/"
METADATA_FILE = f"{BASE_DIR}/metadata/nasdaq100_ticker_dataset.json"
INITIAL_CASH = 1_000_000  # $1,000,000

# --- Parameter Grids ---


DEPENDENT_PARAMS_CONFIG = {
    'alpha_tp': {
        'func': lambda sl, rr: round(sl * rr, 2),  # TP = SL * RR_Ratio
        'args': ['alpha_atr_sl', 'rr_ratio']      
    }
}

# --- Strategy-Specific Parameter Grids ---

BASELINE_BREAKOUT_PARAMS = {
    'sl_stop': np.arange(0.001, 0.0151, 0.001),
    'tp_stop': np.arange(0.001, 0.0151, 0.001),
}

BASELINE_BBANDS_PARAMS = {
    'timeperiod': np.arange(10, 61, 2),     
    'nbdev': np.arange(1.5, 3.51, 0.1),         
}

BASELINE_MOMENTUM_PARAMS = {
    'window': np.arange(3, 15, 1),            
    'sl_stop': np.arange(0.005, 0.041, 0.005), 
}

VOLUME_MOMENTUM_PARAMS = {
    'timeperiod': np.arange(5, 31, 1),           
    'kappa_vol_mom': np.arange(1.0, 5.1, 0.25),
    'adx_threshold': np.arange(10, 51, 1),      
    'alpha_atr': np.arange(1.0, 6.1, 0.25),     
}

VOLUME_BREAKOUT_PARAMS = {
    'phi_va': np.arange(0.6, 0.96, 0.05),
    'kappa_surge': np.arange(1.5, 6.1, 0.25),
    'timeperiod': np.arange(5, 31, 1),           
    'adx_threshold': np.arange(15, 31, 1),      
    'alpha_atr_sl': np.arange(1.0, 6.1, 0.25),
    'rr_ratio': np.arange(1.5, 4.1, 0.25),
}

VOLUME_VWAP_REVERSION_PARAMS = {
    'window': np.arange(20, 61, 4),              
    'quantile': np.arange(0.8, 1.0, 0.02),        
    'slope': np.arange(-0.0006, 0.00061, 0.00005), 
    'tau_vwap_trend': np.arange(10, 31, 1),
    'alpha_atr_sl': np.arange(1.0, 6.1, 0.25),
    'rr_ratio': np.arange(1.5, 4.1, 0.25),
}

DL_BREAKOUT_PARAMS = {
    'phi_va': np.arange(0.45, 0.9, 0.02),
    'kappa_dl': np.arange(0.5, 4.1, 0.1),
    'timeperiod': np.arange(5, 31, 1),
    'adx_threshold': np.arange(10, 31, 1),
    'alpha_atr_sl': np.arange(1.0, 6.1, 0.25),
    'rr_ratio': np.arange(1.5, 4.1, 0.25),

}

DL_VOLUME_MOMENTUM_PARAMS = {
    'timeperiod': np.arange(5, 31, 1),           
    'kappa_dl': np.arange(0.5, 5.1, 0.1),
    'adx_threshold': np.arange(10, 51, 1),      
    'alpha_atr': np.arange(1.0, 6.1, 0.25),     
    'tau_vol_trend': np.arange(6, 9, 1),
}

DL_VWAP_REVERSION_PARAMS = {
    'delta_vwap': np.arange(0.0005, 0.0051, 0.0005),
    'tau_vwap_trend': np.arange(5, 31, 1),
    'volume_multiplier': np.arange(0.5, 2.01, 0.1), 
    'alpha_atr_sl': np.arange(1.0, 6.1, 0.25),
    'rr_ratio': np.arange(1.5, 4.1, 0.25),
}


Setting up Dask cluster...
Dask Dashboard: http://127.0.0.1:8787/status




In [None]:
# --- Data Loading and Indicator Functions ---

def load_optimization_data(ticker: str, strategy_name: str) -> Optional[pd.DataFrame]:
    """Loads and combines parquet files for a given ticker and strategy."""
    path = os.path.join(BASE_DIR, f"ticker={ticker}_standardized", f"optimization_indicators_{strategy_name}")
    all_files = glob.glob(os.path.join(path, "part.*.parquet"))
    if not all_files:
        print(f"Warning: No optimization files found for {ticker} in {path}")
        return None
    try:
        df = pd.concat((pd.read_parquet(f) for f in all_files), ignore_index=True)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df = df.sort_values('timestamp').set_index('timestamp')
        return df
    except Exception as e:
        print(f"Error loading data for {ticker} - {strategy_name}: {e}")
        return None

######################################################################################### BASELINE - STRATEGIES

def optimize_baseline_breakout(df: pd.DataFrame, sl_stop: list, tp_stop: list):
    """
    Implements a breakout strategy with a 15:00 entry cutoff.
    """
    print("\n--- Running Breakout with 15:00 Entry Cutoff ---")
    df.index = pd.to_datetime(df.index)
    
    param_index = pd.MultiIndex.from_arrays([sl_stop, tp_stop], names=['sl_stop', 'tp_stop'])
    
    broadcast_target = pd.DataFrame(index=df.index, columns=param_index)
    price_df = df['close'].vbt.broadcast_to(broadcast_target)
    
    time_mask = (df.index.hour < 15)
    time_filter = pd.Series(time_mask, index=df.index).vbt.broadcast_to(price_df)
    
    long_entries = df['breakout_buy'].vbt.broadcast_to(broadcast_target).fillna(False) & time_filter
    short_entries = df['breakout_sell'].vbt.broadcast_to(broadcast_target).fillna(False) & time_filter
    
    sl_stop_params = broadcast_target.columns.get_level_values('sl_stop').to_numpy()
    tp_stop_params = broadcast_target.columns.get_level_values('tp_stop').to_numpy()
    
    pf = vbt.Portfolio.from_signals(
        price_df,
        entries=long_entries,
        short_entries=short_entries,
        sl_stop=sl_stop_params,
        tp_stop=tp_stop_params,
        freq='1min', 
        init_cash=INITIAL_CASH,
        fees=0.0001, 
        slippage=0.0002,
        size=0.01, 
        size_type=SizeType.Percent,
        upon_opposite_entry='ignore'
    )

    signals = {
        "long_entries": long_entries,
        "short_entries": short_entries,
        "long_exits": None,
        "short_exits": None
    }
    return pf, signals
    

def optimize_baseline_bbands(df: pd.DataFrame, timeperiod: list, nbdev: list):
    """
    Generates signals for a two-sided Bollinger Bands strategy with a time filter.
    timeperiod: 58
    nbdev: 3.4000000000000017

    """
    print("\n--- Running Two-Sided Bollinger Bands with Time Filter ---")
    df.index = pd.to_datetime(df.index)

    timeperiod_int = [int(p) for p in timeperiod]
    nbdev_float = [float(n) for n in nbdev]
    
    BBANDS = vbt.IndicatorFactory.from_talib('BBANDS')
    bbands = BBANDS.run(
        df['close'], timeperiod=timeperiod_int,
        nbdevup=nbdev_float, nbdevdn=nbdev_float, param_product=False
    )
    
    price_df = df['close'].vbt.broadcast_to(bbands.lowerband)
    time_mask = (df.index.hour < 15)
    time_filter = pd.Series(time_mask, index=df.index).vbt.broadcast_to(price_df)
    
    long_entries = (price_df < bbands.lowerband) & time_filter
    long_exits = price_df > bbands.middleband
    short_entries = (price_df > bbands.upperband) & time_filter
    short_exits = price_df < bbands.middleband
    
    pf = vbt.Portfolio.from_signals(
        price_df, 
        entries=long_entries, 
        exits=long_exits,
        short_entries=short_entries,
        short_exits=short_exits,
        freq='1min', 
        init_cash=INITIAL_CASH,
        fees=0.0001,
        slippage=0.0002,
        size=0.01,
        size_type=SizeType.Percent,
        upon_opposite_entry='ignore'
    )
    

    signals = {
        "long_entries": long_entries,
        "short_entries": short_entries,
        "long_exits": long_exits,
        "short_exits": short_exits
    }
    return pf, signals

def optimize_baseline_momentum(df: pd.DataFrame, window: list, sl_stop: list):
    """
    Generates two-sided momentum signals with a trailing stop-loss and time filter.
    Best Parameters:
    window: 14
    sl_stop: 0.01
    """
    print("\n--- Running Baseline Momentum ---")
    df.index = pd.to_datetime(df.index)

    param_index = pd.MultiIndex.from_arrays([window, sl_stop], names=['window', 'sl_stop'])
    broadcast_target = pd.DataFrame(index=df.index, columns=param_index)
    
    is_up = (df['close'] > df['open']).astype(int)
    is_down = (df['close'] < df['open']).astype(int)
    long_entries_list, short_entries_list = [], []
    for w in sorted(list(set(window))):
        cols_for_window = param_index[param_index.get_level_values('window') == w]
        if cols_for_window.empty:
            continue
        broadcast_for_window = pd.DataFrame(index=df.index, columns=cols_for_window)
        
        consecutive_up = is_up.rolling(window=w, min_periods=w).sum()
        long_entries_for_window = (consecutive_up >= w)
        long_entries_list.append(long_entries_for_window.vbt.broadcast_to(broadcast_for_window))
        
        consecutive_down = is_down.rolling(window=w, min_periods=w).sum()
        short_entries_for_window = (consecutive_down >= w)
        short_entries_list.append(short_entries_for_window.vbt.broadcast_to(broadcast_for_window))
        
    if not long_entries_list:
        return vbt.Portfolio.from_signals(df['close'].vbt.broadcast_to(broadcast_target)), {}
    long_entries = pd.concat(long_entries_list, axis=1).reindex(columns=param_index)
    short_entries = pd.concat(short_entries_list, axis=1).reindex(columns=param_index)
    
    time_mask = (df.index.hour < 15)
    time_filter = pd.Series(time_mask, index=df.index).vbt.broadcast_to(long_entries)
    
    final_long_entries = long_entries & time_filter
    final_short_entries = short_entries & time_filter
    
    price_df = df['close'].vbt.broadcast_to(broadcast_target)
    sl_stop_params = broadcast_target.columns.get_level_values('sl_stop').to_numpy()

    pf = vbt.Portfolio.from_signals(
        price_df,
        entries=final_long_entries,
        short_entries=final_short_entries,
        sl_stop=sl_stop_params, 
        sl_trail=True,          
        tp_stop=None,
        freq='1min',
        init_cash=INITIAL_CASH,
        fees=0.0001,
        slippage=0.0002,
        size=0.01,
        size_type=SizeType.Percent,
        upon_opposite_entry='ignore'
    )
    
    signals = {
        "long_entries": final_long_entries,
        "short_entries": final_short_entries,
        "long_exits": None,
        "short_exits": None
    }
    return pf, signals


######################################################################################### VOLUME - STRATEGIES

@njit
def _expanding_profile_nb(a: np.ndarray, phi_va: float, min_periods: int) -> np.ndarray:
    """
    Numba kernel that calculates a developing profile for a SINGLE day's data.
    It takes a 2D array (close, volume) and returns a 2D array with (poc, vah, val).
    """
    output = np.full((a.shape[0], 3), np.nan, dtype=np.float64)
    close_col = a[:, 0]
    volume_col = a[:, 1]

    for i in range(a.shape[0]):
        if i + 1 >= min_periods:
            current_close_window = close_col[:i+1]
            current_volume_window = volume_col[:i+1]
            
            poc, vah, val = np.nan, np.nan, np.nan
            valid_mask = ~np.isnan(current_close_window) & ~np.isnan(current_volume_window)
            close, volume = current_close_window[valid_mask], current_volume_window[valid_mask]
            
            if close.shape[0] > 0:
                price_bins = np.floor(close * 100) / 100
                unique_prices = np.unique(price_bins)
                
                daily_profile = np.zeros(len(unique_prices), dtype=np.float64)
                for k in range(len(price_bins)):
                    idx = np.searchsorted(unique_prices, price_bins[k])
                    if unique_prices[idx] == price_bins[k]:
                        daily_profile[idx] += volume[k]

                total_volume = np.sum(daily_profile)
                if total_volume > 0:
                    max_volume = np.max(daily_profile)
                    poc_candidate_indices = np.where(daily_profile == max_volume)[0]
                    
                    if len(poc_candidate_indices) == 1:
                        poc_idx = poc_candidate_indices[0]
                    else:
                        session_midpoint = (np.max(unique_prices) + np.min(unique_prices)) / 2
                        distances = np.abs(unique_prices[poc_candidate_indices] - session_midpoint)
                        poc_idx = poc_candidate_indices[np.argmin(distances)]

                    poc = unique_prices[poc_idx]
                    target_volume = total_volume * phi_va
                    va_indices = [poc_idx]
                    va_volume = daily_profile[poc_idx]

                    up_idx, down_idx = poc_idx + 1, poc_idx - 1
                    while va_volume < target_volume and (up_idx < len(daily_profile) or down_idx >= 0):
                        vol_above = -1.0
                        if up_idx < len(daily_profile):
                            vol_above = daily_profile[up_idx]
                        vol_below = -1.0
                        if down_idx >= 0:
                            vol_below = daily_profile[down_idx]

                        if vol_above > vol_below:
                            va_indices.append(up_idx)
                            va_volume += vol_above
                            up_idx += 1
                        else:
                            va_indices.append(down_idx)
                            va_volume += vol_below
                            down_idx -= 1
                    
                    value_area_prices = unique_prices[np.array(va_indices, dtype=np.int64)]
                    vah = np.max(value_area_prices)
                    val = np.min(value_area_prices)
            
            output[i, 0] = poc
            output[i, 1] = vah
            output[i, 2] = val
            
    return output

def calculate_developing_profiles(df: pd.DataFrame, phi_va_params: list, min_periods: int = 60) -> pd.DataFrame:
    """
    Calculates a DEVELOPING POC, VAH, and VAL for each intraday bar.
    This version uses pandas.groupby().apply() to handle daily resets, while the
    heavy computation is done in a Numba kernel for performance.
    """
    print("  --> Entering calculate_developing_profiles")
    
    all_phi_va_results = []

    def _apply_kernel_to_day(day_df: pd.DataFrame, phi_va: float, min_p: int) -> pd.DataFrame:
        """Helper function to apply the Numba kernel to a single day's dataframe."""
        metrics = _expanding_profile_nb(
            day_df[['close', 'volume']].values,
            phi_va,
            min_p
        )
        return pd.DataFrame(metrics, index=day_df.index, columns=['poc', 'vah', 'val'])

    for phi_va in phi_va_params:
        print(f"    - Processing developing profile for phi_va: {phi_va}")
        
        daily_results = df.groupby(df.index.normalize()).apply(
            _apply_kernel_to_day,
            phi_va=phi_va,
            min_p=min_periods
        )
        daily_results.index = daily_results.index.droplevel(0)
        all_phi_va_results.append(daily_results)

    print("    - Concatenating results for all phi_va parameters...")
    result_df = pd.concat(all_phi_va_results, axis=1, keys=phi_va_params, names=['phi_va', 'metric'])
    
    print("  <-- Exiting calculate_developing_profiles successfully.")
    return result_df

def optimize_volume_breakout(df: pd.DataFrame, phi_va: list, kappa_surge: list, timeperiod: list, adx_threshold: list, alpha_atr: list, alpha_tp: list):
    """
    Implements and backtests a VAH/VAL breakout strategy with ADX and volume filters.
    This version now uses a developing intraday profile.
    Best Parameters:
    phi_va: 0.9000000000000002
    kappa_surge: 1.5
    timeperiod: 14
    adx_threshold: 21
    alpha_atr_sl: 4.75
    rr_ratio: 19.0 -> rratio * alpha atr stl, this is alpha tp, rratio would be 4
    """
    print("\n--- Running Volume-Enhanced Breakout Strategy (ADX filter) ---")
    df.index = pd.to_datetime(df.index)

    try:
        print("Step 1: Creating parameter space...")
        param_names = ['phi_va', 'kappa_surge', 'timeperiod', 'adx_threshold', 'alpha_atr', 'alpha_tp']
        param_lists = [phi_va, kappa_surge, timeperiod, adx_threshold, alpha_atr, alpha_tp]
        param_index = pd.MultiIndex.from_arrays(param_lists, names=param_names)
        broadcast_target = pd.DataFrame(index=df.index, columns=param_index)
        print(f" > Success. Parameter space shape: {broadcast_target.shape}")

        print("Step 2: Calculating raw indicators...")

        unique_phi_vas = sorted(list(set(phi_va)))
        vp_raw = calculate_developing_profiles(df, unique_phi_vas, min_periods=60)
        vah_raw = vp_raw.xs('vah', level='metric', axis=1)
        val_raw = vp_raw.xs('val', level='metric', axis=1)

        unique_timeperiods = sorted(list(set(timeperiod)))
        ADX = vbt.IndicatorFactory.from_talib('ADX')
        adx_raw = ADX.run(df['high'], df['low'], df['close'], timeperiod=unique_timeperiods, run_unique=True).real
        print(" > Success. Raw indicators calculated.")

        print("Step 3: Aligning data to parameter space...")
        vah_aligned = vah_raw[broadcast_target.columns.get_level_values('phi_va')]
        val_aligned = val_raw[broadcast_target.columns.get_level_values('phi_va')]
        adx_aligned = adx_raw[broadcast_target.columns.get_level_values('timeperiod')]

        vah_aligned.columns = broadcast_target.columns
        val_aligned.columns = broadcast_target.columns
        adx_aligned.columns = broadcast_target.columns

        price_df = df['close'].vbt.broadcast_to(broadcast_target)
        volume_df = df['volume'].vbt.broadcast_to(broadcast_target)
        volume_avg_df = df['volume_avg'].vbt.broadcast_to(broadcast_target)
        atr_df = df['atr'].vbt.broadcast_to(broadcast_target)
        print(" > Success. Data alignment complete.")

        print("Step 4: Extracting parameters for vectorization...")
        kappa_surge_param = broadcast_target.columns.get_level_values('kappa_surge')
        adx_threshold_param = broadcast_target.columns.get_level_values('adx_threshold')
        alpha_atr_param = broadcast_target.columns.get_level_values('alpha_atr')
        alpha_tp_param = broadcast_target.columns.get_level_values('alpha_tp')
        print(" > Success. Parameters extracted.")

        print("Step 5: Generating entry and exit signals...")
        volume_confirm = (volume_df > kappa_surge_param * volume_avg_df)
        adx_confirm = (adx_aligned > adx_threshold_param)

        long_entries = (price_df > vah_aligned) & volume_confirm & adx_confirm
        short_entries = (price_df < val_aligned) & volume_confirm & adx_confirm

        time_mask = (df.index.hour < 15)
        time_filter = pd.Series(time_mask, index=df.index).vbt.broadcast_to(price_df)

        final_long_entries = long_entries & time_filter
        final_short_entries = short_entries & time_filter
        print(" > Success. Signals generated.")

        print("Step 6: Calculating stops and simulating portfolio...")
        sl_pct = (alpha_atr_param * atr_df) / price_df
        tp_pct = (alpha_tp_param * atr_df) / price_df

        pf = vbt.Portfolio.from_signals(
            price_df,
            entries=final_long_entries,
            short_entries=final_short_entries,
            sl_stop=sl_pct,
            tp_stop=tp_pct,
            freq='1min',
            init_cash=INITIAL_CASH,
            fees=0.0001,
            slippage=0.0002,
            size=0.01,
            size_type=SizeType.Percent,
            upon_opposite_entry='ignore'
        )
        print(" > Success. Portfolio simulation complete.")

        signals = {
            "long_entries": final_long_entries,
            "short_entries": final_short_entries,
        }
        return pf, signals

    except Exception as e:
        print(f"!!! AN ERROR OCCURRED IN optimize_volume_breakout: {e}")
        traceback.print_exc()
        empty_pf = vbt.Portfolio.from_signals(df['close'], freq='1T')
        return empty_pf, {}




def optimize_volume_momentum(df: pd.DataFrame, timeperiod: list, kappa_vol_mom: list, adx_threshold: list, alpha_atr: list):
    """
    DEBUG VERSION: Implements the Volume-Driven Momentum strategy with intense print
    statements to diagnose the 'ambiguous truth value' error.
    --- Summary ---
      Best Parameters:
        tau_adx: 12
        kappa_vol_mom: 3.75
        theta_adx: 36
        alpha_atr_sl: 0.75
    """
    df.index = pd.to_datetime(df.index)

    ADX = vbt.IndicatorFactory.from_talib('ADX')
    adx_indicator = ADX.run(
        df['high'], df['low'], df['close'],
        timeperiod=sorted(list(set(timeperiod))),
        run_unique=True
    ).real
    adx_indicator.vbt.rename_levels({'adx_timeperiod': 'timeperiod'}, inplace=True)

    param_index = pd.MultiIndex.from_arrays(
        [timeperiod, kappa_vol_mom, adx_threshold, alpha_atr],
        names=['timeperiod', 'kappa_vol_mom', 'adx_threshold', 'alpha_atr']
    )
    broadcast_target = pd.DataFrame(index=df.index, columns=param_index)

    price_df = df['close'].vbt.broadcast_to(broadcast_target)
    rel_vol_df = df['relative_volume'].vbt.broadcast_to(broadcast_target)
    prev_high_df = df['prev_session_high'].vbt.broadcast_to(broadcast_target)
    prev_low_df = df['prev_session_low'].vbt.broadcast_to(broadcast_target)
    atr_df = df['atr'].vbt.broadcast_to(broadcast_target)
    adx_df = adx_indicator.vbt.align_to(broadcast_target)

    kappa_param = broadcast_target.columns.get_level_values('kappa_vol_mom')
    theta_param = broadcast_target.columns.get_level_values('adx_threshold')
    alpha_sl_param = broadcast_target.columns.get_level_values('alpha_atr')

    adx_confirm = adx_df > theta_param
    rel_vol_confirm = rel_vol_df > kappa_param
    long_entries = (price_df > prev_high_df) & rel_vol_confirm & adx_confirm
    short_entries = (price_df < prev_low_df) & rel_vol_confirm & adx_confirm
    print(f"[DEBUG] Long entries generated. Total: {long_entries.sum().sum()}")
    print(f"[DEBUG] Short entries generated. Total: {short_entries.sum().sum()}")

    sl_pct = (alpha_sl_param * atr_df) / price_df

    try:
        pf = vbt.Portfolio.from_signals(
            price_df,
            entries=long_entries,
            short_entries=short_entries,
            sl_stop=sl_pct,
            sl_trail=True,
            tp_stop=None,
            freq='1min',
            init_cash=INITIAL_CASH,
            fees=0.0001,
            slippage=0.0002,
            size=0.01,
            size_type=SizeType.Percent,
            upon_long_conflict='ignore',
            upon_opposite_entry='ignore'
        )
    except ValueError as e:
        print("\n--- [DEBUG] !!! ERROR OCCURRED DURING PORTFOLIO SIMULATION !!! ---")
        raise e

    signals = {
        "long_entries": long_entries,
        "short_entries": short_entries,
        "long_exits": None,
        "short_exits": None
    }
    return pf, signals



@njit
def quantile_nb(a, q):
    b = a[~np.isnan(a)]
    if len(b) > 0:
        return np.quantile(b, q)
    return np.nan


def optimize_volume_vwap_reversion(
    df: pd.DataFrame, 
    window: list, 
    quantile: list, 
    slope: list, 
    tau_vwap_trend: list,
    alpha_atr: list,
    alpha_tp: list
):
    """ 
    Best Parameters:
    window: 36
    quantile: 0.8
    slope: 0.0001500000000000004
    tau_vwap_trend: 30
    alpha_atr_sl: 4.5
    rr_ratio: 6.38
    """
    print("\n--- Running Volume-Enhanced VWAP Reversion with SL/TP ---")

    df.index = pd.to_datetime(df.index)
    if not df.index.is_unique:
        raise ValueError("DataFrame index is not unique after initial processing.")

    @njit
    def generate_vwap_entries_nb(
        close_arr, obv_arr, vwap_slope_arr, vwap_actual_arr,
        window_scalar, quantile_scalar, slope_scalar, tau_vwap_trend_scalar,
        alpha_atr_scalar, alpha_tp_scalar 
    ):
        
        win = int(window_scalar)
        tau = int(tau_vwap_trend_scalar)
        vwap_trend_confirm_arr = vbt.nb.fshift_1d_nb(vwap_actual_arr, tau)
        lower_q_vals = np.full_like(obv_arr, np.nan, dtype=np.float64)
        upper_q_vals = np.full_like(obv_arr, np.nan, dtype=np.float64)
        for i in range(win, len(obv_arr)):
            window_slice = obv_arr[i - win:i]
            clean_slice = window_slice[~np.isnan(window_slice)]
            if len(clean_slice) > 0:
                lower_q_vals[i] = np.quantile(clean_slice, quantile_scalar)
                upper_q_vals[i] = np.quantile(clean_slice, 1.0 - quantile_scalar)
        trend_down = vwap_actual_arr < vwap_trend_confirm_arr
        trend_up = vwap_actual_arr > vwap_trend_confirm_arr
        long_entries = (close_arr < vwap_actual_arr) & (obv_arr < lower_q_vals) & (vwap_slope_arr >= slope_scalar) & trend_down
        short_entries = (close_arr > vwap_actual_arr) & (obv_arr > upper_q_vals) & (vwap_slope_arr <= -slope_scalar) & trend_up

        return long_entries, short_entries


    VwapReversionInd = vbt.IndicatorFactory(
        input_names=['close', 'obv', 'vwap_slope', 'vwap_actual'],
        param_names=[
            'window', 'quantile', 'slope', 'tau_vwap_trend', 
            'alpha_atr', 'alpha_tp' 
        ],
        output_names=['long_entries', 'short_entries']
    ).from_apply_func(
        generate_vwap_entries_nb,
        keep_pd=False,
        to_2d=False
    )
    
    print("DEBUG: Running entry signal indicator...")
    indicator = VwapReversionInd.run(
        df['close'], df['obv'], df['vwap_actual_slope'], df['vwap_actual'],
        window=window, quantile=quantile, slope=slope, tau_vwap_trend=tau_vwap_trend,
        alpha_atr=alpha_atr, alpha_tp=alpha_tp, 
        param_product=False
    )
    print(f"DEBUG: Indicator run complete.")

    price_df_for_pf = df['close'].vbt.broadcast_to(indicator.long_entries)
    
    print("DEBUG: Simulating final portfolio with SL/TP...")
    final_pf = vbt.Portfolio.from_signals(
        price_df_for_pf,
        entries=indicator.long_entries,
        short_entries=indicator.short_entries,
        sl_stop=df['atr'].vbt.broadcast_to(price_df_for_pf) * alpha_atr / price_df_for_pf,
        tp_stop=df['atr'].vbt.broadcast_to(price_df_for_pf) * alpha_tp / price_df_for_pf,
        freq='1min', 
        init_cash=INITIAL_CASH,
        fees=0.0001, 
        slippage=0.0002, 
        size=0.01,
        size_type=SizeType.Percent,
        upon_opposite_entry='ignore'
    )
    print("DEBUG: Final portfolio simulation complete.")
    
    return (final_pf, None, None)


###################################################################################################### PREDICTED VOLUME STRATEGIES

def optimize_dl_breakout(df: pd.DataFrame, phi_va: list, kappa_dl: list, timeperiod: list, adx_threshold: list, alpha_atr: list, alpha_tp: list):
    """
    Implements a breakout strategy using PREDICTED volume and a developing Volume Profile.
    Best Parameters:
    phi_va: 0.8900000000000003
    kappa_dl: 0.5
    timeperiod: 15
    adx_threshold: 27
    alpha_atr: 4.5
    alpha_tp: 15.75
    """
    print("\n--- Running DL Breakout Strategy ---")
    df.index = pd.to_datetime(df.index)

    try:
        param_names = ['phi_va', 'kappa_dl', 'timeperiod', 'adx_threshold', 'alpha_atr', 'alpha_tp']
        param_lists = [phi_va, kappa_dl, timeperiod, adx_threshold, alpha_atr, alpha_tp]
        param_index = pd.MultiIndex.from_arrays(param_lists, names=param_names)
        broadcast_target = pd.DataFrame(index=df.index, columns=param_index)
        
        profile_df = df[['close']].copy()
        profile_df['volume'] = df['pred_volume_15_tft'] 

        unique_phi_vas = sorted(list(set(phi_va)))
        

        profiles_raw = calculate_developing_profiles(profile_df, unique_phi_vas, min_periods=60)
        
  
        vol_vah_raw = profiles_raw.xs('vah', level='metric', axis=1)
        vol_val_raw = profiles_raw.xs('val', level='metric', axis=1)

        unique_timeperiods = sorted(list(set(timeperiod)))
        ADX = vbt.IndicatorFactory.from_talib('ADX')
        adx_raw = ADX.run(df['high'], df['low'], df['close'], timeperiod=unique_timeperiods, run_unique=True).real
        

        vol_vah_aligned = vol_vah_raw[broadcast_target.columns.get_level_values('phi_va')]
        vol_val_aligned = vol_val_raw[broadcast_target.columns.get_level_values('phi_va')]
        adx_aligned = adx_raw[broadcast_target.columns.get_level_values('timeperiod')]
        
        vol_vah_aligned.columns = broadcast_target.columns
        vol_val_aligned.columns = broadcast_target.columns
        adx_aligned.columns = broadcast_target.columns
        
        price_df = df['close'].vbt.broadcast_to(broadcast_target)
        pred_vol_df = df['pred_volume_15_tft'].vbt.broadcast_to(broadcast_target)
        volume_avg_df = df['volume_avg'].vbt.broadcast_to(broadcast_target)
        atr_df = df['atr'].vbt.broadcast_to(broadcast_target)
        

        kappa_dl_param = broadcast_target.columns.get_level_values('kappa_dl')
        adx_threshold_param = broadcast_target.columns.get_level_values('adx_threshold')
        alpha_atr_param = broadcast_target.columns.get_level_values('alpha_atr')
        alpha_tp_param = broadcast_target.columns.get_level_values('alpha_tp')


        volume_confirm = (pred_vol_df > kappa_dl_param * volume_avg_df)
        adx_confirm = (adx_aligned > adx_threshold_param)
        

        long_entries = (price_df > vol_vah_aligned) & volume_confirm & adx_confirm
        short_entries = (price_df < vol_val_aligned) & volume_confirm & adx_confirm
        
        time_mask = (df.index.hour < 15)
        time_filter = pd.Series(time_mask, index=df.index).vbt.broadcast_to(price_df)
        
        final_long_entries = long_entries & time_filter
        final_short_entries = short_entries & time_filter
        

        sl_pct = (alpha_atr_param * atr_df) / price_df
        tp_pct = (alpha_tp_param * atr_df) / price_df

        pf = vbt.Portfolio.from_signals(
            price_df,
            entries=final_long_entries,
            short_entries=final_short_entries,
            sl_stop=sl_pct,
            tp_stop=tp_pct,
            freq='1min',
            init_cash=INITIAL_CASH,
            fees=0.0001,
            slippage=0.0002,
            size=0.01,
            size_type=SizeType.Percent,
            upon_opposite_entry='ignore'
        )
        
        signals = {
            "long_entries": final_long_entries,
            "short_entries": final_short_entries,
        }
        return pf, signals

    except Exception as e:
        print(f"!!! AN ERROR OCCURRED IN optimize_dl_breakout: {e}")
        traceback.print_exc()
        empty_pf = vbt.Portfolio.from_signals(df['close'], freq='1min')
        return empty_pf, {}

        
def optimize_dl_volume_momentum(
    df: pd.DataFrame, 
    timeperiod: list, 
    kappa_dl: list, 
    adx_threshold: list, 
    alpha_atr: list,
    tau_vol_trend: list
):
    """
        Best Parameters:
        timeperiod: 25
        kappa_dl: 0.7
        adx_threshold: 23
        alpha_atr: 3.5
        tau_vol_trend: 7
    """
    df.index = pd.to_datetime(df.index)


    ADX = vbt.IndicatorFactory.from_talib('ADX')
    adx_indicator = ADX.run(
        df['high'], df['low'], df['close'], 
        timeperiod=sorted(list(set(timeperiod))), 
        run_unique=True
    ).real
    adx_indicator.vbt.rename_levels({'adx_timeperiod': 'timeperiod'}, inplace=True)


    param_names = ['timeperiod', 'kappa_dl', 'adx_threshold', 'alpha_atr', 'tau_vol_trend']
    param_lists = [timeperiod, kappa_dl, adx_threshold, alpha_atr, tau_vol_trend]
    param_index = pd.MultiIndex.from_arrays(param_lists, names=param_names)
    broadcast_target = pd.DataFrame(index=df.index, columns=param_index)
    

    price_df = df['close'].vbt.broadcast_to(broadcast_target)
    pred_vol_df = df['pred_volume_tft_15_scaled'].vbt.broadcast_to(broadcast_target)
    volume_avg_df = df['volume_avg'].vbt.broadcast_to(broadcast_target)
    prev_high_df = df['prev_session_high'].vbt.broadcast_to(broadcast_target)
    prev_low_df = df['prev_session_low'].vbt.broadcast_to(broadcast_target)
    atr_df = df['atr'].vbt.broadcast_to(broadcast_target)
    adx_df = adx_indicator.vbt.align_to(broadcast_target)
    

    vol_trend = vbt.MA.run(
        pred_vol_df,
        window=sorted(list(set(tau_vol_trend))),
        run_unique=True
    ).ma
    vol_trend.vbt.rename_levels({'ma_window': 'tau_vol_trend'}, inplace=True)
    vol_trend_df = vol_trend.vbt.align_to(broadcast_target)
    

    kappa_param = broadcast_target.columns.get_level_values('kappa_dl')
    theta_param = broadcast_target.columns.get_level_values('adx_threshold')
    alpha_sl_param = broadcast_target.columns.get_level_values('alpha_atr')
    

    adx_confirm = adx_df > theta_param
    volume_confirm = vol_trend_df > kappa_param * volume_avg_df
    
    long_entries = (price_df > prev_high_df) & volume_confirm & adx_confirm
    short_entries = (price_df < prev_low_df) & volume_confirm & adx_confirm


    sl_pct = (alpha_sl_param * atr_df) / price_df

    pf = vbt.Portfolio.from_signals(
        price_df,
        entries=long_entries,
        short_entries=short_entries,
        sl_stop=sl_pct,
        sl_trail=True,
        tp_stop=None,
        freq='1min', 
        init_cash=INITIAL_CASH,
        fees=0.0001, 
        slippage=0.0002,
        size=0.01, 
        size_type=SizeType.Percent,
        upon_long_conflict='ignore',
        upon_opposite_entry='ignore'
    )
    
    signals = {
        "long_entries": long_entries,
        "short_entries": short_entries,
        "long_exits": None,
        "short_exits": None
    }
    return pf, signals


def optimize_dl_vwap_reversion(
    df: pd.DataFrame, 
    delta_vwap: list, 
    tau_vwap_trend: list, 
    volume_multiplier: list,
    alpha_atr: list,
    alpha_tp: list
):
    """
    VWAP mean-reversion strategy using PREDICTED future volume for confirmation.
    Exits are triggered by a revert-to-VWAP signal OR an ATR-based stop-loss/take-profit.
    Best Parameters:
    delta_vwap: 0.0045000000000000005
    tau_vwap_trend: 7
    volume_multiplier: 0.9999999999999999
    alpha_atr_sl: 5.0
    rr_ratio: 10.0  -> this is alpha_tp, rratio would be 2 here
    """
    print("\n--- Running DL VWAP Reversion Strategy (Predicted Volume + ATR Stops) ---")

    try:
        df.index = pd.to_datetime(df.index)

        @njit
        def generate_dl_vwap_entries_nb(
            close_arr, vwap_arr, pred_vol_arr, current_vol_arr,
            delta_vwap_scalar, tau_vwap_trend_scalar, vol_mult_scalar,
            alpha_atr_scalar, alpha_tp_scalar
        ):
            tau = int(tau_vwap_trend_scalar)
            vwap_trend_confirm_arr = vbt.nb.fshift_1d_nb(vwap_arr, tau)
            trend_down = vwap_arr < vwap_trend_confirm_arr
            trend_up = vwap_arr > vwap_trend_confirm_arr
            pred_vol_high = pred_vol_arr > (vol_mult_scalar * current_vol_arr)
            price_below_vwap = close_arr < (1 - delta_vwap_scalar) * vwap_arr
            price_above_vwap = close_arr > (1 + delta_vwap_scalar) * vwap_arr
            long_entries = price_below_vwap & trend_down & pred_vol_high
            short_entries = price_above_vwap & trend_up & pred_vol_high
            return long_entries, short_entries

        DLVwapReversionInd = vbt.IndicatorFactory(
            input_names=['close', 'vwap', 'pred_vol', 'current_vol'],
            param_names=[
                'delta_vwap', 'tau_vwap_trend', 'volume_multiplier', 
                'alpha_atr', 'alpha_tp'
            ],
            output_names=['long_entries', 'short_entries'],
            prepend_name=False  
        ).from_apply_func(
            generate_dl_vwap_entries_nb,
            keep_pd=False,
            to_2d=False
        )
        
        indicator = DLVwapReversionInd.run(
            df['close'], df['vwap_dl'], df['pred_volume_tft_15_scaled'], df['volume'],
            delta_vwap=delta_vwap, 
            tau_vwap_trend=tau_vwap_trend, 
            volume_multiplier=volume_multiplier,
            alpha_atr=alpha_atr, 
            alpha_tp=alpha_tp,
            param_product=False 
        )

        price_df = df['close'].vbt.broadcast_to(indicator.long_entries)
        atr_df = df['atr'].vbt.broadcast_to(indicator.long_entries)
        vwap_dl_df_for_exit = df['vwap_dl'].vbt.broadcast_to(indicator.long_entries)

        long_exits = price_df.vbt.crossed_above(vwap_dl_df_for_exit)
        short_exits = price_df.vbt.crossed_below(vwap_dl_df_for_exit)
        
        alpha_atr_params = indicator.long_entries.columns.get_level_values('alpha_atr')
        alpha_tp_params = indicator.long_entries.columns.get_level_values('alpha_tp')

        sl_pct = (alpha_atr_params.to_numpy() * atr_df) / price_df
        tp_pct = (alpha_tp_params.to_numpy() * atr_df) / price_df
        
        pf = vbt.Portfolio.from_signals(
            price_df,
            entries=indicator.long_entries,
            exits=long_exits,
            short_entries=indicator.short_entries,
            short_exits=short_exits,
            sl_stop=sl_pct,
            tp_stop=tp_pct,
            freq='1min',
            init_cash=INITIAL_CASH,
            fees=0.0001,
            slippage=0.0002,
            size=0.01,
            size_type=SizeType.Percent,
            upon_opposite_entry='ignore'
        )

        signals = {
            "long_entries": indicator.long_entries,
            "long_exits": long_exits,
            "short_entries": indicator.short_entries,
            "short_exits": short_exits,
        }
        
        metrics = {'calmar_ratio': pf.calmar_ratio()}

        return pf, signals, metrics

    except Exception as e:
        print(f"!!! AN ERROR OCCURRED in optimize_dl_vwap_reversion: {e}")
        traceback.print_exc()
        empty_pf = vbt.Portfolio.from_signals(df['close'], freq='1T')
        return empty_pf, {}, {}


In [None]:
# --- Randomized Search and Optimization (Kelly-Filtered) ---

def calculate_kelly(pf):
    """
    Calculates the Kelly Criterion percentage for each column in a portfolio.
    This version is robust to both single-column and multi-column portfolio objects.
    """
    win_rate = pf.trades.win_rate(group_by=False)
    avg_win = pf.trades.winning.pnl.mean(group_by=False)
    
    avg_loss_val = pf.trades.losing.pnl.mean(group_by=False)
    
    avg_loss = abs(avg_loss_val)

    W = win_rate / 100.0
    R = avg_win / avg_loss
    kelly_percentage = W - ((1 - W) / R)
    
    if isinstance(kelly_percentage, pd.Series):
        kelly_percentage.replace([np.inf, -np.inf], np.nan, inplace=True)
        
    return kelly_percentage

# --- Simplified Parameter Sampling Function ---

def get_structured_random_params(
    param_ranges: Dict[str, np.ndarray], 
    dependent_config: Dict[str, dict],
    n_trials: int,
    seed: Optional[int] = None
) -> Dict[str, np.ndarray]:
    """
    Samples N_TRIALS random combinations from a single strategy's parameter grid,
    and calculates any dependent parameters.
    """
    if seed is not None:
        np.random.seed(seed)

    source_param_names = list(param_ranges.keys())
    source_range_shapes = [len(r) for r in param_ranges.values()]
    total_combinations = np.prod(source_range_shapes)
    safe_total_combinations = int(total_combinations)
    n_trials = min(n_trials, safe_total_combinations)

    if n_trials == safe_total_combinations:
        flat_indices = np.arange(safe_total_combinations)
    else:
        flat_indices = np.random.choice(safe_total_combinations, size=n_trials, replace=False)

    multi_dim_indices = np.unravel_index(flat_indices, shape=source_range_shapes)
    final_params = {
        name: np.asarray(param_ranges[name])[multi_dim_indices[i]]
        for i, name in enumerate(source_param_names)
    }

    for dep_name, config in dependent_config.items():
        if all(arg_name in param_ranges for arg_name in config['args']):
            source_args = [final_params[arg_name] for arg_name in config['args']]
            dep_values = np.array([config['func'](*args) for args in zip(*source_args)])
            final_params[dep_name] = dep_values

    if 'rr_ratio' in final_params:
        del final_params['rr_ratio']
    if 'alpha_atr_sl' in final_params:
        final_params['alpha_atr'] = final_params.pop('alpha_atr_sl')
        
    return final_params




def run_optimization(strategy_func, df: pd.DataFrame, param_ranges: Dict[str, np.ndarray], n_trials: int, seed: Optional[int] = None):
    """
    A wrapper to run an optimization search on a Dask worker.
    This function now returns a SINGLE, serializable dictionary with all results.
    """
    log_stream = io.StringIO()
    with contextlib.redirect_stdout(log_stream):
        try:
            if df is None or df.empty:
                return {"status": "NO_DATA", "message": f"DataFrame for {strategy_func.__name__} is None or empty."}

            sampled_params = get_structured_random_params(
                param_ranges=param_ranges,
                dependent_config=DEPENDENT_PARAMS_CONFIG,
                n_trials=n_trials,
                seed=seed
            )
            
            result_tuple_from_strat = strategy_func(df, **sampled_params)
            pf = result_tuple_from_strat[0]

            if pf is None:
                return {"status": "NO_TRADES", "message": "The strategy returned a None portfolio."}

            if pf.trades.count().sum() == 0:
                return {"status": "NO_TRADES", "message": "The strategy did not produce any trades across all parameter sets."}
            
            metric_values = pf.calmar_ratio()
            metric_name = "Calmar Ratio"
            
            valid_metric_values = metric_values.replace([np.inf, -np.inf], np.nan).dropna()

            if valid_metric_values.empty or (valid_metric_values <= 0).all():
                return {"status": f"INVALID_{metric_name.upper()}", "message": f"All {metric_name}s were invalid or non-positive."}

            best_idx = valid_metric_values.idxmax()
            best_metric_value = valid_metric_values.max()
            best_pf_slice = pf[best_idx]
            
            # serializable payload 
            payload = {
                "status": "SUCCESS",
                "log": "", 
                "best_params": dict(zip(pf.wrapper.columns.names, best_idx)),
                "metrics": {
                    "Calmar Ratio": best_metric_value,
                    "Sharpe Ratio": best_pf_slice.sharpe_ratio(),
                    "Sortino Ratio": best_pf_slice.sortino_ratio(),
                    "Kelly Criterion": calculate_kelly(best_pf_slice),
                },
                "total_trades": best_pf_slice.trades.count(),
                "trades_df_str": best_pf_slice.trades.records_readable.to_string() if best_pf_slice.trades.count() > 0 else "No trades for best run."
            }
            
            print("\n--- Inspecting Trades for Best Performing Parameters ---", file=log_stream)
            print(f"Primary Optimization Metric: {metric_name}", file=log_stream)
            print(f"Best {metric_name}: {payload['metrics']['Calmar Ratio']:.4f}", file=log_stream)
            print(f"  - Sharpe Ratio:    {payload['metrics']['Sharpe Ratio']:.4f}", file=log_stream)
            print(f"  - Sortino Ratio:   {payload['metrics']['Sortino Ratio']:.4f}", file=log_stream)
            print(f"  - Kelly Criterion: {payload['metrics']['Kelly Criterion']:.4f}", file=log_stream)
            print(f"\nBest Parameters:", file=log_stream)
            for k, v in payload['best_params'].items():
                print(f"    {k}: {v}", file=log_stream)
            print(f"\nTrades for the best parameter set:", file=log_stream)
            print(payload['trades_df_str'], file=log_stream)
            print("--- End of Trade Inspection ---\n", file=log_stream)

            payload['log'] = log_stream.getvalue()
            return payload

        except Exception as e:
            error_string = f"--- ERROR IN STRATEGY: {strategy_func.__name__} ---\n"
            error_string += traceback.format_exc()
            return {"status": "ERROR", "message": error_string, "log": log_stream.getvalue()}

def print_single_result(name, payload):
    """
    Prints a formatted summary using only the payload dictionary.
    """
    print(f"\n{'='*60}")
    print(f"Strategy: {name}")
    print(f"{'='*60}")

    log_string = payload.get('log', '')
    if log_string:
        print("\n--- Worker Log ---")
        print(log_string)
        print("--- End Worker Log ---")

    print("--- Overall Best Batch Summary ---")
    if payload.get('status') == "SUCCESS":
        metric_name = "Calmar Ratio"
        best_metric_value = payload['metrics'][metric_name]
        
        print(f"  Best {metric_name}: {best_metric_value:.4f}")
        print("  Sharpe Ratio:    {:.4f}".format(payload['metrics']['Sharpe Ratio']))
        print("  Sortino Ratio:   {:.4f}".format(payload['metrics']['Sortino Ratio']))
        print("  Kelly Criterion: {:.4f}".format(payload['metrics']['Kelly Criterion']))
        print("\n  Best Parameters:")
        for k, v in payload['best_params'].items():
            print(f"    {k}: {v}")
            
    else:
        print(f"  Optimization failed or skipped: {payload.get('message', 'No details available.')}")


In [None]:
def main():
    """
    Runs a one-stage optimization process for each strategy to find
    the best core parameters, using BATCHING to manage memory.
    """
    
    print(f"Loading tickers from {METADATA_FILE}")
    try:
        with open(METADATA_FILE, 'r') as f:
            tickers = [item['Ticker'] for item in json.load(f)]
        #tickers_to_process = tickers[:1]
        tickers_to_process = tickers
        print(f"Loaded {len(tickers)} tickers. Processing: {tickers_to_process}")
    except FileNotFoundError:
        print(f"Error: Metadata file not found at {METADATA_FILE}")
        return

    # batching config
    N_TRIALS = 1000
    BATCH_SIZE = 50 

    all_best_results = []
    for ticker in tickers_to_process:
        print(f"\n{'='*80}")
        print(f"===== Starting Optimization for Ticker: {ticker} on {datetime.now().isoformat()} =====")
        print(f"{'='*80}")

        print("Restarting Dask workers for a clean memory state...")
        client.restart()

        try:
            print("Loading data...")
            df_baseline = load_optimization_data(ticker, 'Baseline')
            df_volume = load_optimization_data(ticker, 'Volume_Enhanced')
            df_dl = load_optimization_data(ticker, 'Deep_Learning_Enhanced')
            print("Data loading complete.")
        except Exception as e:
            print(f"An error occurred during data loading for {ticker}: {e}\n")
            continue

        # --- STAGE 1: Finding the best core parameters for each strategy ---
        print("\n--- Optimizing Core Strategy Parameters ---")
        stage1_tasks = {
            "Baseline Breakout": (
                optimize_baseline_breakout,
                df_baseline,
                BASELINE_BREAKOUT_PARAMS
            ),
            "Baseline Momentum": (
                optimize_baseline_momentum,
                df_baseline,
                BASELINE_MOMENTUM_PARAMS
            ),
            "Baseline Bollinger Bands": (
                optimize_baseline_bbands,
                df_baseline,
                BASELINE_BBANDS_PARAMS
            ),
            "Volume-Enhanced Breakout": (
                optimize_volume_breakout,
                df_volume,
                VOLUME_BREAKOUT_PARAMS
            ),
            "Volume-Enhanced VWAP Reversion": (
                optimize_volume_vwap_reversion,
                df_volume,
                VOLUME_VWAP_REVERSION_PARAMS
            ),
            "Volume-Enhanced Momentum": (
                optimize_volume_momentum,
                df_volume,
                VOLUME_MOMENTUM_PARAMS
            ),
            "Deep Learning Breakout": (
                optimize_dl_breakout,
                df_dl,
                DL_BREAKOUT_PARAMS
            ),
            "Deep Learning VWAP Reversion": (
                optimize_dl_vwap_reversion,
                df_dl,
                DL_VWAP_REVERSION_PARAMS
            ),
            "Deep Learning Momentum": (
                optimize_dl_volume_momentum,
                df_dl,
                DL_VOLUME_MOMENTUM_PARAMS
            ),
        }

        # --- BATCHED OPTIMIZATION LOOP ---
        for name, (func, df, param_grid) in stage1_tasks.items():
            if df is None or df.empty:
                print(f"\nSkipping task '{name}' for ticker {ticker} due to missing data.")
                continue

            num_batches = (N_TRIALS + BATCH_SIZE - 1) // BATCH_SIZE
            all_batch_futures = []
            
            print(f"\n--- Submitting Batched Optimization for '{name}' on {ticker} ---")

            for i in range(num_batches):
                future = client.submit(run_optimization, func, df, param_grid, BATCH_SIZE, seed=i)
                all_batch_futures.append((name, future)) 

            print("\n--- Waiting for all batch results... ---")
            
            overall_best_metric = -np.inf
            overall_best_payload = None
            
            # --- RESULT GATHERING LOOP ---
            for task_name, future in all_batch_futures:
                try:
                    payload = future.result()
                    
                    if payload.get('status') == "SUCCESS":
                        current_metric = payload['metrics']['Calmar Ratio']
                        if current_metric > overall_best_metric:
                            overall_best_metric = current_metric
                            overall_best_payload = payload
                    else:
                        print(f"  - A batch for '{task_name}' failed with status '{payload.get('status', 'UNKNOWN')}': {payload.get('message', '')}")

                except Exception as e:
                    print(f"  - A batch for '{task_name}' raised an exception during gathering: {e}")

            if overall_best_payload:
                print_single_result(name, overall_best_payload)
                
                flat_result = {
                    'ticker': ticker,
                    'strategy': name,
                    'trade_count': overall_best_payload['total_trades'],
                }
                flat_result.update(overall_best_payload['metrics'])
                flat_result.update(overall_best_payload['best_params'])
                all_best_results.append(flat_result)
            else:
                print(f"\nNo successful batches found for strategy '{name}' on ticker {ticker}.")

    if all_best_results:
        print("\n--- All optimizations complete. Saving summary... ---")
        results_df = pd.DataFrame(all_best_results)
        
        cols_order = ['ticker', 'strategy', 'Calmar Ratio', 'Sharpe Ratio', 'Sortino Ratio', 'Kelly Criterion', 'trade_count']
        param_cols = [col for col in results_df.columns if col not in cols_order]
        final_cols = cols_order + sorted(param_cols)
        
        results_df = results_df[final_cols]
        
        output_filename = os.path.join(BASE_DIR, "optimization_summary.csv")
        results_df.to_csv(output_filename, index=False)
        print(f"Successfully saved optimization summary to {output_filename}")
    else:
        print("\nNo successful optimizations to save.")

    print("\nMain optimization process complete.")

In [None]:
if __name__ == "__main__":
    main()

Loading tickers from /home/jupyter-kohv04@vse.cz/kohv04/backtesting_final//metadata/nasdaq100_ticker_dataset.json
Loaded 101 tickers. Processing: ['AAPL', 'ABNB', 'ADBE', 'ADI', 'ADP', 'ADSK', 'AEP', 'AMAT', 'AMD', 'AMGN', 'AMZN', 'ANSS', 'APP', 'ARM', 'ASML', 'AVGO', 'AXON', 'AZN', 'BIIB', 'BKNG', 'BKR', 'CCEP', 'CDNS', 'CDW', 'CEG', 'CHTR', 'CMCSA', 'COST', 'CPRT', 'CRWD', 'CSCO', 'CSGP', 'CSX', 'CTAS', 'CTSH', 'DASH', 'DDOG', 'DXCM', 'EA', 'EXC', 'FANG', 'FAST', 'FTNT', 'GEHC', 'GFS', 'GILD', 'GOOG', 'GOOGL', 'HON', 'IDXX', 'INTC', 'INTU', 'ISRG', 'KDP', 'KHC', 'KLAC', 'LIN', 'LRCX', 'LULU', 'MAR', 'MCHP', 'MDB', 'MDLZ', 'MELI', 'META', 'MNST', 'MRVL', 'MSFT', 'MSTR', 'MU', 'NFLX', 'NVDA', 'NXPI', 'ODFL', 'ON', 'ORLY', 'PANW', 'PAYX', 'PCAR', 'PDD', 'PEP', 'PLTR', 'PYPL', 'QCOM', 'REGN', 'ROP', 'ROST', 'SBUX', 'SNPS', 'TEAM', 'TMUS', 'TSLA', 'TTD', 'TTWO', 'TXN', 'VRSK', 'VRTX', 'WBD', 'WDAY', 'XEL', 'ZS']

===== Starting Optimization for Ticker: AAPL on 2025-06-24T16:45:28.981034 =

In [None]:
def analyze_optimization_results(input_file_path, output_dir):
    """
    Analyzes trading strategy optimization results from a CSV file.

    This function partitions the dataset by strategy and then calculates:
    1. The average performance metrics for each strategy across all tickers, rounded to 2 decimal points.
    2. The mean of the best parameter values for each strategy. This provides a 
       generalized parameter set derived from the optimal parameters found for 
       each individual ticker.
    3. It then formats the parameter output to show only the relevant parameters for each strategy.

    Args:
        input_file_path (str): The absolute path to the optimization summary CSV file.
        output_dir (str): The directory where the output CSV files will be saved.
    """
    try:
        df = pd.read_csv(input_file_path)
        print(f"Successfully loaded '{input_file_path}'.")
    except FileNotFoundError:
        print(f"Error: The file '{input_file_path}' was not found.")
        print("Please ensure the path is correct and the file exists.")
        return

    
    metric_columns = ['Calmar Ratio', 'Sharpe Ratio', 'Sortino Ratio', 'Kelly Criterion', 'trade_count']
    
    average_metrics = df.groupby('strategy')[metric_columns].mean().round(2)
    
    print("\n" + "="*60)
    print("Average Performance Metrics per Strategy")
    print("="*60)
    print(average_metrics)
    
    metrics_output_path = os.path.join(output_dir, 'average_strategy_metrics.csv')
    average_metrics.to_csv(metrics_output_path)
    print(f"\nSaved average metrics to '{metrics_output_path}'")

    try:
        first_param_index = df.columns.get_loc('trade_count') + 1
        parameter_columns = df.columns[first_param_index:]
    except KeyError:
        print("\nError: 'trade_count' column not found. Cannot identify parameter columns.")
        return

    average_parameters = df.groupby('strategy')[parameter_columns].mean()
    

    full_params_output_path = os.path.join(output_dir, 'average_strategy_parameters_full.csv')
    average_parameters.to_csv(full_params_output_path)
    print(f"\nSaved full average parameters table to '{full_params_output_path}'")
    
    
    print("\n" + "="*60)
    print("Cleaned Average Parameter Values per Strategy Group")
    print("="*60)
    print("(Showing only parameters used by each strategy)\n")

    cleaned_summary_path = os.path.join(output_dir, 'average_strategy_parameters_summary.txt')
    with open(cleaned_summary_path, 'w') as f:
        for strategy_name, params in average_parameters.iterrows():
            cleaned_params = params.dropna()
            
            header = f"--- {strategy_name} ---\n"
            content = cleaned_params.to_string() + "\n\n"
            
            # Print to console
            print(header)
            print(content)
            
            # Write to file
            f.write(header)
            f.write(content)

    print(f"Saved cleaned summary to '{cleaned_summary_path}'")


if __name__ == '__main__':
    # config
    DATA_DIR = "/home/jupyter-kohv04@vse.cz/kohv04/backtesting_final/"
    
    OUTPUT_DIR = DATA_DIR

    input_csv_path = os.path.join(DATA_DIR, 'optimization_summary.csv')

    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    analyze_optimization_results(input_csv_path, OUTPUT_DIR)


Successfully loaded '/home/jupyter-kohv04@vse.cz/kohv04/backtesting_final/optimization_summary.csv'.

Average Performance Metrics per Strategy
                                Calmar Ratio  Sharpe Ratio  Sortino Ratio  \
strategy                                                                    
Baseline Bollinger Bands              129.65          9.17          18.04   
Baseline Breakout                      46.64          4.59           7.50   
Baseline Momentum                      67.38          6.09           9.63   
Deep Learning Breakout                135.06          7.59          17.01   
Deep Learning Momentum                105.64          6.09          16.12   
Deep Learning VWAP Reversion           66.45          6.37          11.18   
Volume-Enhanced Breakout              271.07          7.71          33.77   
Volume-Enhanced Momentum               94.44          6.71          13.55   
Volume-Enhanced VWAP Reversion        157.94          9.95          20.73   

         

In [11]:
client.close()
cluster.close()