# Time-LLM-Cryptex Model Performance Analysis

Comprehensive analysis of Bitcoin trading strategies using different LLM models with enhanced 26-feature dataset.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Load the combined analysis
df = pd.read_csv('./backtest_results/combined_analysis.csv')
print("Dataset shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nModels:", df['Model'].unique())
print("\nStrategies:", df['Strategy'].unique())
df.head()

## 1. Total Return Comparison

In [None]:
# Create pivot table for returns
returns_pivot = df.pivot_table(index='Model', columns='Strategy', values='Total Return', aggfunc='first')

# Remove Buy & Hold for cleaner comparison
strategy_cols = [col for col in returns_pivot.columns if col != 'Buy & Hold']
returns_clean = returns_pivot[strategy_cols]

# Create heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(returns_clean, annot=True, fmt='.1%', cmap='RdYlGn', center=0, 
            cbar_kws={'label': 'Total Return'})
plt.title('Total Return by Model and Strategy', fontsize=16, fontweight='bold')
plt.xlabel('Trading Strategy', fontsize=12)
plt.ylabel('Model', fontsize=12)
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# Bar plot comparison
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Trading Strategy Performance by Model', fontsize=16, fontweight='bold')

strategies = strategy_cols
for i, strategy in enumerate(strategies):
    ax = axes[i//2, i%2]
    strategy_data = returns_clean[strategy].sort_values(ascending=False)
    
    bars = ax.bar(range(len(strategy_data)), strategy_data.values, 
                  color=sns.color_palette("husl", len(strategy_data)))
    ax.set_title(f'{strategy} Strategy', fontsize=14, fontweight='bold')
    ax.set_ylabel('Total Return', fontsize=12)
    ax.set_xticks(range(len(strategy_data)))
    ax.set_xticklabels(strategy_data.index, rotation=45, ha='right')
    
    # Add value labels on bars
    for bar, value in zip(bars, strategy_data.values):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
                f'{value:.1%}', ha='center', va='bottom', fontsize=10)
    
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.0%}'))
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 2. Risk-Adjusted Performance (Sharpe Ratio)

In [None]:
# Sharpe Ratio comparison
sharpe_pivot = df.pivot_table(index='Model', columns='Strategy', values='Sharpe Ratio', aggfunc='first')
sharpe_clean = sharpe_pivot[strategy_cols]

plt.figure(figsize=(12, 8))
sns.heatmap(sharpe_clean, annot=True, fmt='.3f', cmap='viridis', 
            cbar_kws={'label': 'Sharpe Ratio'})
plt.title('Sharpe Ratio by Model and Strategy', fontsize=16, fontweight='bold')
plt.xlabel('Trading Strategy', fontsize=12)
plt.ylabel('Model', fontsize=12)
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# Best performers by Sharpe ratio
plt.figure(figsize=(12, 6))
best_sharpe = sharpe_clean.max(axis=1).sort_values(ascending=True)
colors = ['red' if x < 0.5 else 'orange' if x < 0.7 else 'green' for x in best_sharpe.values]

bars = plt.barh(range(len(best_sharpe)), best_sharpe.values, color=colors, alpha=0.7)
plt.yticks(range(len(best_sharpe)), best_sharpe.index)
plt.xlabel('Best Sharpe Ratio', fontsize=12)
plt.title('Best Sharpe Ratio by Model (Across All Strategies)', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)

# Add value labels
for i, (bar, value) in enumerate(zip(bars, best_sharpe.values)):
    plt.text(value + 0.01, bar.get_y() + bar.get_height()/2, 
             f'{value:.3f}', va='center', fontsize=10)

plt.tight_layout()
plt.show()

## 3. Risk Analysis (Max Drawdown)

In [None]:
# Max Drawdown analysis
drawdown_pivot = df.pivot_table(index='Model', columns='Strategy', values='Max Drawdown', aggfunc='first')
drawdown_clean = drawdown_pivot[strategy_cols]

plt.figure(figsize=(12, 8))
sns.heatmap(drawdown_clean, annot=True, fmt='.1%', cmap='RdYlGn_r', 
            cbar_kws={'label': 'Max Drawdown'})
plt.title('Max Drawdown by Model and Strategy', fontsize=16, fontweight='bold')
plt.xlabel('Trading Strategy', fontsize=12)
plt.ylabel('Model', fontsize=12)
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# Risk vs Return scatter plot
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
fig.suptitle('Risk vs Return Analysis by Strategy', fontsize=16, fontweight='bold')

for i, strategy in enumerate(strategy_cols):
    strategy_data = df[df['Strategy'] == strategy]
    
    axes[i].scatter(abs(strategy_data['Max Drawdown']), strategy_data['Total Return'], 
                   s=100, alpha=0.7, c=range(len(strategy_data)), cmap='viridis')
    
    # Add model labels
    for j, row in strategy_data.iterrows():
        axes[i].annotate(row['Model'].replace('_', '\n'), 
                        (abs(row['Max Drawdown']), row['Total Return']),
                        xytext=(5, 5), textcoords='offset points', 
                        fontsize=8, ha='left')
    
    axes[i].set_xlabel('Max Drawdown (abs)', fontsize=12)
    axes[i].set_ylabel('Total Return', fontsize=12)
    axes[i].set_title(f'{strategy} Strategy', fontsize=14)
    axes[i].grid(True, alpha=0.3)
    axes[i].xaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.0%}'))
    axes[i].yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.0%}'))

plt.tight_layout()
plt.show()

## 4. Overall Model Rankings

In [None]:
# Calculate average performance across strategies (excluding Buy & Hold)
strategy_data = df[df['Strategy'] != 'Buy & Hold']
model_avg = strategy_data.groupby('Model').agg({
    'Total Return': 'mean',
    'Sharpe Ratio': 'mean', 
    'Max Drawdown': 'mean',
    'Win Rate': 'mean'
}).round(4)

# Create ranking visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Model Rankings - Average Performance Across All Strategies', fontsize=16, fontweight='bold')

metrics = ['Total Return', 'Sharpe Ratio', 'Max Drawdown', 'Win Rate']
colors = ['skyblue', 'lightgreen', 'salmon', 'gold']

for i, metric in enumerate(metrics):
    ax = axes[i//2, i%2]
    
    if metric == 'Max Drawdown':
        # For drawdown, less negative is better
        sorted_data = model_avg[metric].sort_values(ascending=False)
    else:
        sorted_data = model_avg[metric].sort_values(ascending=False)
    
    bars = ax.bar(range(len(sorted_data)), sorted_data.values, 
                  color=colors[i], alpha=0.7, edgecolor='black')
    
    ax.set_title(f'Average {metric}', fontsize=14, fontweight='bold')
    ax.set_xticks(range(len(sorted_data)))
    ax.set_xticklabels(sorted_data.index, rotation=45, ha='right')
    
    # Format y-axis based on metric
    if metric in ['Total Return', 'Max Drawdown', 'Win Rate']:
        ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.0%}'))
    else:
        ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.3f}'))
    
    # Add value labels on bars
    for bar, value in zip(bars, sorted_data.values):
        height = bar.get_height()
        if metric in ['Total Return', 'Max Drawdown', 'Win Rate']:
            label = f'{value:.1%}'
        else:
            label = f'{value:.3f}'
        
        y_pos = height + (0.01 if height >= 0 else -0.03)
        ax.text(bar.get_x() + bar.get_width()/2., y_pos, label,
                ha='center', va='bottom' if height >= 0 else 'top', fontsize=10)
    
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print summary table
print("\nMODEL PERFORMANCE SUMMARY (Average across all strategies):")
print("=" * 80)
print(model_avg.sort_values('Total Return', ascending=False))

## 5. Strategy Effectiveness Analysis

In [None]:
# Strategy comparison across all models
strategy_avg = strategy_data.groupby('Strategy').agg({
    'Total Return': ['mean', 'std', 'max', 'min'],
    'Sharpe Ratio': ['mean', 'std', 'max', 'min']
}).round(4)

print("STRATEGY EFFECTIVENESS SUMMARY:")
print("=" * 60)
print(strategy_avg)

# Box plots for strategy performance distribution
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Total Return distribution
sns.boxplot(data=strategy_data, x='Strategy', y='Total Return', ax=axes[0])
axes[0].set_title('Total Return Distribution by Strategy', fontsize=14, fontweight='bold')
axes[0].tick_params(axis='x', rotation=45)
axes[0].yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.0%}'))
axes[0].grid(True, alpha=0.3)

# Sharpe Ratio distribution
sns.boxplot(data=strategy_data, x='Strategy', y='Sharpe Ratio', ax=axes[1])
axes[1].set_title('Sharpe Ratio Distribution by Strategy', fontsize=14, fontweight='bold')
axes[1].tick_params(axis='x', rotation=45)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Key Insights and Conclusions

In [None]:
# Generate insights
print("KEY INSIGHTS FROM THE ANALYSIS:")
print("=" * 50)

# Best overall performer
best_model = model_avg['Total Return'].idxmax()
best_return = model_avg.loc[best_model, 'Total Return']
best_sharpe = model_avg.loc[best_model, 'Sharpe Ratio']

print(f"\n1. BEST OVERALL MODEL: {best_model}")
print(f"   - Average Return: {best_return:.2%}")
print(f"   - Average Sharpe: {best_sharpe:.3f}")

# Best strategy
best_strategy_return = strategy_avg.loc[:, ('Total Return', 'mean')].idxmax()
best_strategy_sharpe = strategy_avg.loc[:, ('Sharpe Ratio', 'mean')].idxmax()

print(f"\n2. BEST STRATEGIES:")
print(f"   - Highest Returns: {best_strategy_return}")
print(f"   - Best Risk-Adjusted: {best_strategy_sharpe}")

# Outstanding performers
outstanding_performances = []
for _, row in strategy_data.iterrows():
    if row['Total Return'] > 5.0:  # > 500% return
        outstanding_performances.append((row['Model'], row['Strategy'], row['Total Return']))

if outstanding_performances:
    print(f"\n3. OUTSTANDING PERFORMANCES (>500% return):")
    for model, strategy, return_val in outstanding_performances:
        print(f"   - {model} with {strategy}: {return_val:.1%}")

# Risk analysis
safest_model = model_avg['Max Drawdown'].idxmax()  # Least negative drawdown
safest_drawdown = model_avg.loc[safest_model, 'Max Drawdown']

print(f"\n4. RISK ANALYSIS:")
print(f"   - Safest Model: {safest_model} (Max Drawdown: {safest_drawdown:.1%})")

# Strategy consistency
strategy_std = strategy_data.groupby('Strategy')['Total Return'].std()
most_consistent = strategy_std.idxmin()
print(f"   - Most Consistent Strategy: {most_consistent} (lowest volatility across models)")

print(f"\n5. COMPARISON TO BUY & HOLD:")
buy_hold_return = df[df['Strategy'] == 'Buy & Hold']['Total Return'].iloc[0]
models_beating_bh = len(strategy_data[strategy_data['Total Return'] > buy_hold_return])
total_model_strategy_combos = len(strategy_data)
print(f"   - Buy & Hold Return: {buy_hold_return:.1%}")
print(f"   - Model/Strategy combinations beating B&H: {models_beating_bh}/{total_model_strategy_combos} ({models_beating_bh/total_model_strategy_combos:.1%})")