In [None]:
import os
import sys
import glob
import json
import numpy as np
import pandas as pd
import warnings
import traceback
from typing import Optional, Dict
import quantstats as qs 

# --- Environment Setup ---
os.environ['LD_LIBRARY_PATH'] = f"{os.environ.get('HOME')}/kohv04/lib:{os.environ.get('LD_LIBRARY_PATH', '')}"
sys.path.append('/home/jupyter-kohv04@vse.cz/kohv04/lib/python3.10/site-packages')

try:
    import talib
except ImportError as e:
    print(f"Failed to import TA-Lib: {e}")
import vectorbt as vbt
from numba import njit
from vectorbt.portfolio.enums import SizeType

vbt.settings.caching['enabled'] = False
warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning, module="pandas.core.frame")

BASE_DIR = "/home/jupyter-kohv04@vse.cz/kohv04/backtesting_final/"
METADATA_FILE = f"{BASE_DIR}/metadata/nasdaq100_ticker_dataset.json"
RESULTS_DIR = os.path.join(BASE_DIR, "simulation_results")


BEST_BASELINE_BREAKOUT_PARAMS = {'sl_stop': [0.011], 'tp_stop': [0.01]}
BEST_BASELINE_BBANDS_PARAMS = {'timeperiod': [25], 'nbdev': [2.5]}
BEST_BASELINE_MOMENTUM_PARAMS = {'window': [6], 'sl_stop': [0.024]}
BEST_VOLUME_MOMENTUM_PARAMS = {'timeperiod': [17], 'kappa_vol_mom': [2.8], 'adx_threshold': [33], 'alpha_atr': [3.3]}
BEST_VOLUME_BREAKOUT_PARAMS = {'phi_va': [0.80], 'kappa_surge': [3.5], 'timeperiod': [20], 'adx_threshold': [20], 'alpha_atr': [3.0], 'alpha_tp': [6.0]}
BEST_VOLUME_VWAP_REVERSION_PARAMS = {'window': [40], 'quantile': [0.9], 'slope': [0.0001], 'tau_vwap_trend': [19], 'alpha_atr': [3.2], 'alpha_tp': [6.0]}
BEST_DL_BREAKOUT_PARAMS = {'phi_va': [0.75], 'kappa_dl': [1.6], 'timeperiod': [16], 'adx_threshold': [20], 'alpha_atr': [3.0], 'alpha_tp': [4.0]}
BEST_DL_VOLUME_MOMENTUM_PARAMS = {'timeperiod': [20], 'kappa_dl': [1.5], 'adx_threshold': [30], 'alpha_atr': [4.0], 'tau_vol_trend': [7]}
BEST_DL_VWAP_REVERSION_PARAMS = {'delta_vwap': [0.003], 'tau_vwap_trend': [18], 'volume_multiplier': [1.5], 'alpha_atr': [3.0], 'alpha_tp': [7.0]}

ALL_PARAM_KEYS = set()
for param_dict in [
    BEST_BASELINE_BREAKOUT_PARAMS, BEST_BASELINE_BBANDS_PARAMS, BEST_BASELINE_MOMENTUM_PARAMS,
    BEST_VOLUME_BREAKOUT_PARAMS, BEST_VOLUME_MOMENTUM_PARAMS, BEST_VOLUME_VWAP_REVERSION_PARAMS,
    BEST_DL_BREAKOUT_PARAMS, BEST_DL_VOLUME_MOMENTUM_PARAMS, BEST_DL_VWAP_REVERSION_PARAMS
]:
    ALL_PARAM_KEYS.update(param_dict.keys())

In [None]:
def calculate_average_strategy_metrics(summary_file_path: str, output_filename: str):
    """
    Loads a simulation summary, calculates the average metrics for each strategy,
    applies custom sorting, formats the results, and saves them to a new CSV file.

    Args:
        summary_file_path (str): The path to the simulation_summary.csv file.
        output_filename (str): The name of the output CSV file for average metrics.
    """
    print("--- Calculating Average Metrics Per Strategy ---")
    try:
        df = pd.read_csv(summary_file_path)
        print(f"Successfully loaded {summary_file_path} with {len(df)} records.")

        non_metric_cols = {'Ticker', 'Strategy', 'Start', 'End', 'Period'}.union(ALL_PARAM_KEYS)

        for col in df.columns:
            if 'Duration' in col:
                df[col] = pd.to_timedelta(df[col], errors='coerce').dt.total_seconds()

        metric_cols = [col for col in df.columns if col not in non_metric_cols]
        numeric_metric_cols = df[metric_cols].select_dtypes(include=np.number).columns.tolist()

        print(f"Averaging the following metrics: {numeric_metric_cols}")

        print("Grouping by strategy and calculating average metrics...")
        average_metrics_df = df.groupby('Strategy')[numeric_metric_cols].mean().reset_index()

        desired_columns = [
            'Strategy', 'Start Value', 'End Value', 'Total Return [%]', 'Benchmark Return [%]',
            'Max Gross Exposure [%]', 'Total Fees Paid', 'Max Drawdown [%]', 'Max Drawdown Duration',
            'Total Trades', 'Total Closed Trades', 'Total Open Trades', 'Open Trade PnL',
            'Win Rate [%]', 'Best Trade [%]', 'Worst Trade [%]', 'Avg Winning Trade [%]',
            'Avg Losing Trade [%]', 'Avg Winning Trade Duration', 'Avg Losing Trade Duration',
            'Profit Factor', 'Expectancy', 'Sharpe Ratio', 'Calmar Ratio', 'Omega Ratio',
            'Sortino Ratio', 'Kelly Criterion'
        ]
        
        strategy_order = ['Baseline', 'Volume-Enhanced', 'Deep Learning']

        # Filtering, sorting and formatting the final DataFrame
        
        cols_to_keep = [col for col in desired_columns if col in average_metrics_df.columns]
        average_metrics_df = average_metrics_df[cols_to_keep]
        
        average_metrics_df['Strategy_Type'] = pd.Categorical(
            average_metrics_df['Strategy'].apply(lambda x: x.split(' ')[0]),
            categories=strategy_order,
            ordered=True
        )
        average_metrics_df = average_metrics_df.sort_values(
            by=['Strategy_Type', 'Strategy']
        ).drop(columns='Strategy_Type')
        
        numeric_cols_to_format = average_metrics_df.select_dtypes(include=np.number).columns.tolist()
        average_metrics_df[numeric_cols_to_format] = average_metrics_df[numeric_cols_to_format].replace([np.inf, -np.inf], np.nan)
        average_metrics_df[numeric_cols_to_format] = average_metrics_df[numeric_cols_to_format].fillna(0)
        average_metrics_df[numeric_cols_to_format] = average_metrics_df[numeric_cols_to_format].round(2)

        output_path = os.path.join(os.path.dirname(summary_file_path), output_filename)
        average_metrics_df.to_csv(output_path, index=False)

        print(f"Saved to: {output_path}")
        print("\n--- Average Metrics Preview ---")
        print(average_metrics_df.to_string())

    except FileNotFoundError:
        print(f"Error: The file '{summary_file_path}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")
        traceback.print_exc()

if __name__ == "__main__":
    summary_file = os.path.join(RESULTS_DIR, "simulation_summary.csv")
    average_metrics_file = "strategy_average_metrics.csv"

    if not os.path.exists(summary_file):
        print(f"Please ensure '{summary_file}' exists before running.")
    else:
        calculate_average_strategy_metrics(summary_file, average_metrics_file)

--- Calculating Average Metrics Per Strategy ---
Successfully loaded /home/jupyter-kohv04@vse.cz/kohv04/backtesting_final/simulation_results/simulation_summary.csv with 909 records.
Averaging the following metrics: ['Start Value', 'End Value', 'Total Return [%]', 'Benchmark Return [%]', 'Max Gross Exposure [%]', 'Total Fees Paid', 'Max Drawdown [%]', 'Max Drawdown Duration', 'Total Trades', 'Total Closed Trades', 'Total Open Trades', 'Open Trade PnL', 'Win Rate [%]', 'Best Trade [%]', 'Worst Trade [%]', 'Avg Winning Trade [%]', 'Avg Losing Trade [%]', 'Avg Winning Trade Duration', 'Avg Losing Trade Duration', 'Profit Factor', 'Expectancy', 'Sharpe Ratio', 'Calmar Ratio', 'Omega Ratio', 'Sortino Ratio', 'Kelly Criterion']
Grouping by strategy and calculating average metrics...

Successfully created average strategy metrics summary.
Saved to: /home/jupyter-kohv04@vse.cz/kohv04/backtesting_final/simulation_results/strategy_average_metrics.csv

--- Average Metrics Preview ---
             

In [None]:
def find_best_fit_per_strategy(summary_file_path: str, output_filename: str):
    """
    Finds the best performing ticker for each strategy by filtering for profitable
    conditions (Kelly > 0, Win Rate > 50%) and then maximizing for Total Return.
    Saves a curated, formatted, and custom-sorted set of columns to a CSV file.

    Args:
        summary_file_path (str): The path to the simulation_summary.csv file.
        output_filename (str): The name of the output CSV file for the best fits.
    """
    print("\n--- Identifying Best-Fit Ticker Per Strategy (New Criteria) ---")
    try:
        df = pd.read_csv(summary_file_path)
        print(f"Successfully loaded {summary_file_path} with {len(df)} records.")

        # Initial filtering
        print("Filtering for Kelly Criterion > 0 and Win Rate > 50%...")
        required_filter_cols = ['Kelly Criterion', 'Win Rate [%]']
        if not all(col in df.columns for col in required_filter_cols):
            print(f"Error: One of the required filter columns {required_filter_cols} is not in the summary file.")
            return

        filtered_df = df[(df['Kelly Criterion'] > 0) & (df['Win Rate [%]'] > 50)].copy()
        
        if filtered_df.empty:
            print("No simulations met the criteria (Kelly > 0 and Win Rate > 50%). No output file will be generated.")
            return
            
        print(f"{len(filtered_df)} records passed the initial filter.")

        # Grouping by strategy and finding the best total return
        print("Finding the best 'Total Return [%]' for each strategy from the filtered set...")
        best_indices = filtered_df.groupby('Strategy')['Total Return [%]'].idxmax()
        final_best_fits_df = filtered_df.loc[best_indices]

        # Defining and selecting the desired columns
        desired_columns = [
            'Strategy', 
            'Ticker', 
            'Win Rate [%]',
            'Kelly Criterion',
            'Total Return [%]', 
            'Benchmark Return [%]',
            'Start', 
            'End', 
            'Max Gross Exposure [%]',
            'Total Fees Paid', 
            'Max Drawdown [%]', 
            'Max Drawdown Duration', 
            'Total Trades',
            'Profit Factor', 
            'Expectancy',
            'Sharpe Ratio', 
            'Calmar Ratio', 
            'Omega Ratio', 
            'Sortino Ratio',
        ]
        
        cols_to_keep = [col for col in desired_columns if col in final_best_fits_df.columns]
        final_best_fits_df = final_best_fits_df[cols_to_keep]
        
        # Custom sorting logic
        strategy_order = ['Baseline', 'Volume-Enhanced', 'Deep Learning']
        
        # Creating a temporary categorical column for sorting
        final_best_fits_df['Strategy_Type'] = pd.Categorical(
            final_best_fits_df['Strategy'].apply(lambda x: x.split(' ')[0]),
            categories=strategy_order,
            ordered=True
        )
        
        # Sorting by the new categorical column, then alphabetically within each category
        final_best_fits_df = final_best_fits_df.sort_values(
            by=['Strategy_Type', 'Strategy']
        ).reset_index(drop=True)
        
        # Dropping the temporary sorting column
        final_best_fits_df.drop(columns='Strategy_Type', inplace=True)

        # Cleaning and formatting
        numeric_cols = final_best_fits_df.select_dtypes(include=np.number).columns.tolist()
        final_best_fits_df[numeric_cols] = final_best_fits_df[numeric_cols].replace([np.inf, -np.inf], np.nan)
        final_best_fits_df[numeric_cols] = final_best_fits_df[numeric_cols].round(2)
        
        # Saving the best-fit results to a new CSV file
        output_path = os.path.join(os.path.dirname(summary_file_path), output_filename)
        final_best_fits_df.to_csv(output_path, index=False)
        
        print(f"\nSuccessfully created the best-fits summary with custom sorting.")
        print(f"Saved to: {output_path}")
        print("\n--- Best Fits Preview ---")
        print(final_best_fits_df.to_string())

    except FileNotFoundError:
        print(f"Error: The file '{summary_file_path}' was not found.")
    except KeyError as e:
        print(f"Error: A required column is missing from the summary file: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")
        traceback.print_exc()

# Main
if __name__ == "__main__":
    summary_file = os.path.join(RESULTS_DIR, "simulation_summary.csv")
    best_fits_file = "best_fits.csv"

    if not os.path.exists(summary_file):
        print(f"FATAL ERROR: The required input file was not found at the specified path.")
        print(f"Please ensure '{summary_file}' exists before running.")
    else:
        find_best_fit_per_strategy(summary_file, best_fits_file)


--- Identifying Best-Fit Ticker Per Strategy (New Criteria) ---
Successfully loaded /home/jupyter-kohv04@vse.cz/kohv04/backtesting_final/simulation_results/simulation_summary.csv with 909 records.
Filtering for Kelly Criterion > 0 and Win Rate > 50%...
168 records passed the initial filter.
Finding the best 'Total Return [%]' for each strategy from the filtered set...

Successfully created the best-fits summary with custom sorting.
Saved to: /home/jupyter-kohv04@vse.cz/kohv04/backtesting_final/simulation_results/best_fits.csv

--- Best Fits Preview ---
                         Strategy Ticker  Win Rate [%]  Kelly Criterion  Total Return [%]  Benchmark Return [%]                Start                  End  Max Gross Exposure [%]  Total Fees Paid  Max Drawdown [%] Max Drawdown Duration  Total Trades  Profit Factor  Expectancy  Sharpe Ratio  Calmar Ratio  Omega Ratio  Sortino Ratio
0        Baseline Bollinger Bands     ON         66.76             0.09              5.38                -25