In [None]:
import sqlite3
from pathlib import Path

import pandas as pd

# === Adjust this if your DB lives elsewhere ===
DB_PATH = Path("../data/trading_risk.db")

if not DB_PATH.exists():
    raise FileNotFoundError(f"Could not find database at {DB_PATH.resolve()}")
conn = sqlite3.connect(DB_PATH)
tables = pd.read_sql_query(
    "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;", conn
)["name"].tolist()
print(f"Found {len(tables)} tables: {tables}")

In [None]:
fills = pd.read_sql_query("SELECT * FROM fills", conn)
print(fills.shape)
print(fills.info())
display(fills.head())

In [None]:
summaries = pd.read_sql_query("SELECT * FROM account_daily_summary", conn)
print(summaries.shape)
print(summaries.info())
display(summaries.head())

In [None]:
fills.columns

In [None]:
cols_to_drop = ["id", "route", "liquidity", "currency", "status", "propreports_id", 'commission', 'ecn_fee', 'sec_fee', 'orf_fee',
       'cat_fee', 'taf_fee', 'ftt_fee', 'nscc_fee', 'acc_fee', 'clr_fee',
       'misc_fee', 'total_fees']

fills.drop(columns=cols_to_drop, inplace=True)




In [None]:
# Calculate weighted average price for each order
# Weighted price = sum(price * quantity) / sum(quantity)

order_weighted_prices = fills.groupby('order_id').apply(
    lambda x: pd.Series({
        'weighted_avg_price': (x['price'] * x['quantity']).sum() / x['quantity'].sum(),
        'total_quantity': x['quantity'].sum(),
        'num_fills': len(x),
        'side': x['side'].iloc[0],
        'symbol': x['symbol'].iloc[0],
        'account_id': x['account_id'].iloc[0],
        'first_fill_time': x['datetime'].min(),
        'last_fill_time': x['datetime'].max()
    })
).reset_index()

print(f"Total unique orders: {len(order_weighted_prices)}")
print(f"\nSample of weighted average prices by order:")
display(order_weighted_prices.head(10))

In [None]:
# Rename columns and drop unnecessary ones
order_weighted_prices = order_weighted_prices.rename(columns={
    'weighted_avg_price': 'price',
    'first_fill_time': 'datetime'
})

# Drop the specified columns
columns_to_drop = ['order_id', 'last_fill_time', 'price_diff_pct', 'simple_avg_price']
# Only drop columns that exist in the dataframe
columns_to_drop = [col for col in columns_to_drop if col in order_weighted_prices.columns]
order_weighted_prices = order_weighted_prices.drop(columns=columns_to_drop)

print("Updated columns:")
print(order_weighted_prices.columns.tolist())
print(f"\nShape: {order_weighted_prices.shape}")
display(order_weighted_prices.head())

In [ ]:
# Essential imports for visualization
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime, time

# Set aesthetic style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

In [ ]:
# 1. ESSENTIAL BEHAVIORAL FEATURES
# Convert datetime and extract time-based features
order_weighted_prices['datetime'] = pd.to_datetime(order_weighted_prices['datetime'])
order_weighted_prices['date'] = order_weighted_prices['datetime'].dt.date
order_weighted_prices['hour'] = order_weighted_prices['datetime'].dt.hour
order_weighted_prices['minute'] = order_weighted_prices['datetime'].dt.minute
order_weighted_prices['time_of_day'] = order_weighted_prices['hour'] + order_weighted_prices['minute']/60

# Calculate PnL for each order (sells - buys)
order_weighted_prices['signed_value'] = order_weighted_prices.apply(
    lambda x: -x['price'] * x['total_quantity'] if x['side'] == 'B' 
    else x['price'] * x['total_quantity'], axis=1
)

# Group by account and date for daily analysis
daily_features = order_weighted_prices.groupby(['account_id', 'date']).agg({
    'signed_value': ['sum', 'std', 'min', 'max'],
    'total_quantity': ['sum', 'mean'],
    'num_fills': ['sum', 'mean'],
    'time_of_day': ['min', 'max', 'std'],
    'side': lambda x: (x == 'B').sum() / len(x)  # Buy ratio
}).round(4)

daily_features.columns = ['_'.join(col).strip() for col in daily_features.columns]
daily_features = daily_features.reset_index()

print("Daily behavioral features shape:", daily_features.shape)
display(daily_features.head())

In [ ]:
# 2. INTRADAY EMOTIONAL PATTERNS
# Identify potential emotional trading signals

# Early morning vs late day trading intensity
order_weighted_prices['is_early'] = order_weighted_prices['hour'] < 10
order_weighted_prices['is_late'] = order_weighted_prices['hour'] >= 14

# Rapid trading detection (multiple orders within 5 minutes)
order_weighted_prices = order_weighted_prices.sort_values(['account_id', 'datetime'])
order_weighted_prices['time_since_last'] = order_weighted_prices.groupby('account_id')['datetime'].diff().dt.total_seconds() / 60
order_weighted_prices['is_rapid'] = order_weighted_prices['time_since_last'] < 5

# Revenge trading indicator (quick reversals)
order_weighted_prices['side_changed'] = order_weighted_prices.groupby('account_id')['side'].shift() != order_weighted_prices['side']
order_weighted_prices['is_reversal'] = order_weighted_prices['is_rapid'] & order_weighted_prices['side_changed']

# Aggregate emotional indicators by trader and date
emotional_features = order_weighted_prices.groupby(['account_id', 'date']).agg({
    'is_early': 'mean',
    'is_late': 'mean',
    'is_rapid': 'mean',
    'is_reversal': 'sum',
    'time_since_last': ['mean', 'std']
}).round(4)

emotional_features.columns = ['_'.join(col).strip() for col in emotional_features.columns]
print("Emotional pattern features:")
display(emotional_features.head())

In [ ]:
# 3. VISUALIZATION: Trading Behavior Patterns

fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Essential Trading Behavior Patterns', fontsize=16, y=1.02)

# 3.1 Intraday trading distribution
ax1 = axes[0, 0]
hourly_volume = order_weighted_prices.groupby('hour')['total_quantity'].sum()
ax1.bar(hourly_volume.index, hourly_volume.values, color='skyblue', alpha=0.8)
ax1.axvspan(9, 10, alpha=0.2, color='red', label='Opening hour')
ax1.axvspan(14, 16, alpha=0.2, color='orange', label='Closing hours')
ax1.set_xlabel('Hour of Day')
ax1.set_ylabel('Total Volume')
ax1.set_title('Intraday Trading Volume Distribution')
ax1.legend()

# 3.2 Rapid trading frequency by trader
ax2 = axes[0, 1]
rapid_by_trader = order_weighted_prices.groupby('account_id')['is_rapid'].mean().sort_values(ascending=False)
top_traders = rapid_by_trader.head(10)
ax2.barh(range(len(top_traders)), top_traders.values, color='coral')
ax2.set_yticks(range(len(top_traders)))
ax2.set_yticklabels([f'Trader {id}' for id in top_traders.index])
ax2.set_xlabel('Rapid Trading Rate')
ax2.set_title('Top 10 Traders by Rapid Trading Frequency')
ax2.axvline(0.5, color='red', linestyle='--', alpha=0.5, label='50% threshold')

# 3.3 Buy/Sell imbalance over time
ax3 = axes[1, 0]
daily_buy_ratio = order_weighted_prices.groupby('date').apply(
    lambda x: (x['side'] == 'B').sum() / len(x)
).rolling(window=5).mean()
ax3.plot(daily_buy_ratio.index, daily_buy_ratio.values, linewidth=2, color='green')
ax3.axhline(0.5, color='black', linestyle='--', alpha=0.5)
ax3.fill_between(daily_buy_ratio.index, 0.5, daily_buy_ratio.values, 
                 where=(daily_buy_ratio > 0.5), alpha=0.3, color='green', label='Buy bias')
ax3.fill_between(daily_buy_ratio.index, daily_buy_ratio.values, 0.5,
                 where=(daily_buy_ratio < 0.5), alpha=0.3, color='red', label='Sell bias')
ax3.set_xlabel('Date')
ax3.set_ylabel('Buy Ratio (5-day MA)')
ax3.set_title('Market Sentiment: Buy/Sell Ratio Over Time')
ax3.legend()
ax3.tick_params(axis='x', rotation=45)

# 3.4 Reversal frequency heatmap
ax4 = axes[1, 1]
reversal_pivot = order_weighted_prices.pivot_table(
    values='is_reversal', 
    index='hour', 
    columns='account_id',
    aggfunc='sum',
    fill_value=0
)
# Select top 8 most active traders
top_traders_vol = order_weighted_prices.groupby('account_id')['total_quantity'].sum().nlargest(8).index
reversal_pivot_subset = reversal_pivot[top_traders_vol]
sns.heatmap(reversal_pivot_subset, cmap='YlOrRd', ax=ax4, cbar_kws={'label': 'Reversal Count'})
ax4.set_xlabel('Trader ID')
ax4.set_ylabel('Hour of Day')
ax4.set_title('Reversal Trading Patterns by Hour')

plt.tight_layout()
plt.show()

In [ ]:
# 4. CRITICAL RISK METRICS
# Calculate running PnL and drawdowns

# First, ensure proper ordering and calculate cumulative metrics
trader_risk_metrics = []

for account_id in order_weighted_prices['account_id'].unique():
    trader_data = order_weighted_prices[order_weighted_prices['account_id'] == account_id].sort_values('datetime')
    
    # Calculate running PnL
    trader_data['cumulative_pnl'] = trader_data['signed_value'].cumsum()
    trader_data['running_max'] = trader_data['cumulative_pnl'].cummax()
    trader_data['drawdown'] = trader_data['cumulative_pnl'] - trader_data['running_max']
    trader_data['drawdown_pct'] = (trader_data['drawdown'] / trader_data['running_max'].abs()).fillna(0) * 100
    
    # Win rate calculation
    daily_pnl = trader_data.groupby('date')['signed_value'].sum()
    win_rate = (daily_pnl > 0).mean()
    
    # Calculate metrics
    metrics = {
        'account_id': account_id,
        'total_pnl': trader_data['cumulative_pnl'].iloc[-1],
        'max_drawdown': trader_data['drawdown'].min(),
        'max_drawdown_pct': trader_data['drawdown_pct'].min(),
        'win_rate': win_rate,
        'daily_sharpe': daily_pnl.mean() / daily_pnl.std() * np.sqrt(252) if daily_pnl.std() > 0 else 0,
        'avg_trade_size': trader_data['total_quantity'].mean(),
        'trade_count': len(trader_data),
        'avg_trades_per_day': len(trader_data) / trader_data['date'].nunique()
    }
    trader_risk_metrics.append(metrics)

risk_df = pd.DataFrame(trader_risk_metrics).round(4)
risk_df = risk_df.sort_values('daily_sharpe', ascending=False)

print("Risk Metrics by Trader:")
display(risk_df)

In [ ]:
# 5. RISK VISUALIZATION: Drawdown and Performance Analysis

fig, axes = plt.subplots(2, 2, figsize=(16, 10))
fig.suptitle('Risk Management Dashboard', fontsize=16, y=1.02)

# 5.1 Cumulative PnL curves for top traders
ax1 = axes[0, 0]
top_5_traders = risk_df.head(5)['account_id'].values
for trader_id in top_5_traders:
    trader_data = order_weighted_prices[order_weighted_prices['account_id'] == trader_id].sort_values('datetime')
    cumulative_pnl = trader_data['signed_value'].cumsum()
    ax1.plot(trader_data['datetime'], cumulative_pnl, label=f'Trader {trader_id}', linewidth=2)
ax1.set_xlabel('Date')
ax1.set_ylabel('Cumulative PnL ($)')
ax1.set_title('Cumulative PnL - Top 5 Traders by Sharpe')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 5.2 Drawdown visualization
ax2 = axes[0, 1]
# Select trader with median performance for drawdown example
median_trader = risk_df.iloc[len(risk_df)//2]['account_id']
trader_data = order_weighted_prices[order_weighted_prices['account_id'] == median_trader].sort_values('datetime')
trader_data['cumulative_pnl'] = trader_data['signed_value'].cumsum()
trader_data['running_max'] = trader_data['cumulative_pnl'].cummax()

ax2.plot(trader_data['datetime'], trader_data['cumulative_pnl'], label='Cumulative PnL', linewidth=2)
ax2.plot(trader_data['datetime'], trader_data['running_max'], label='Running Maximum', linewidth=1, linestyle='--', color='green')
ax2.fill_between(trader_data['datetime'], trader_data['running_max'], trader_data['cumulative_pnl'],
                 where=(trader_data['running_max'] > trader_data['cumulative_pnl']), 
                 alpha=0.3, color='red', label='Drawdown')
ax2.set_xlabel('Date')
ax2.set_ylabel('PnL ($)')
ax2.set_title(f'Drawdown Analysis - Trader {median_trader}')
ax2.legend()
ax2.grid(True, alpha=0.3)

# 5.3 Risk-Return scatter
ax3 = axes[1, 0]
ax3.scatter(risk_df['max_drawdown_pct'], risk_df['daily_sharpe'], 
           s=risk_df['trade_count']/10, alpha=0.6, c=risk_df['win_rate'], cmap='RdYlGn')
for i, txt in enumerate(risk_df['account_id']):
    ax3.annotate(txt, (risk_df['max_drawdown_pct'].iloc[i], risk_df['daily_sharpe'].iloc[i]), 
                fontsize=8, alpha=0.7)
ax3.set_xlabel('Max Drawdown (%)')
ax3.set_ylabel('Daily Sharpe Ratio')
ax3.set_title('Risk-Return Profile by Trader')
ax3.axhline(0, color='black', linestyle='--', alpha=0.3)
ax3.axvline(0, color='black', linestyle='--', alpha=0.3)
cbar = plt.colorbar(ax3.collections[0], ax=ax3)
cbar.set_label('Win Rate')

# 5.4 Trading frequency vs performance
ax4 = axes[1, 1]
ax4.scatter(risk_df['avg_trades_per_day'], risk_df['total_pnl'], 
           s=100, alpha=0.6, c=risk_df['daily_sharpe'], cmap='coolwarm')
for i, txt in enumerate(risk_df['account_id']):
    ax4.annotate(txt, (risk_df['avg_trades_per_day'].iloc[i], risk_df['total_pnl'].iloc[i]), 
                fontsize=8, alpha=0.7)
ax4.set_xlabel('Average Trades per Day')
ax4.set_ylabel('Total PnL ($)')
ax4.set_title('Trading Frequency vs Performance')
ax4.axhline(0, color='black', linestyle='--', alpha=0.3)
cbar2 = plt.colorbar(ax4.collections[0], ax=ax4)
cbar2.set_label('Sharpe Ratio')

plt.tight_layout()
plt.show()

In [ ]:
# 6. KEY BEHAVIORAL INSIGHTS SUMMARY

# Identify high-risk behavioral patterns
high_risk_traders = risk_df[
    (risk_df['max_drawdown_pct'] < -20) | 
    (risk_df['daily_sharpe'] < 0) |
    (risk_df['win_rate'] < 0.45)
]['account_id'].values

# Calculate behavioral risk scores
behavioral_scores = order_weighted_prices.groupby('account_id').agg({
    'is_rapid': 'mean',
    'is_reversal': 'sum',
    'is_early': 'mean',
    'is_late': 'mean',
    'time_since_last': 'std'
}).round(4)

behavioral_scores['risk_score'] = (
    behavioral_scores['is_rapid'] * 0.3 +
    (behavioral_scores['is_reversal'] / behavioral_scores.index.map(risk_df.set_index('account_id')['trade_count'])) * 0.3 +
    behavioral_scores['time_since_last'].fillna(0) / behavioral_scores['time_since_last'].max() * 0.2 +
    abs(behavioral_scores['is_early'] - behavioral_scores['is_late']) * 0.2
)

print("🚨 HIGH RISK TRADERS:")
print(f"Traders with concerning metrics: {high_risk_traders}")
print("\n📊 BEHAVIORAL RISK SCORES (0-1 scale, higher = riskier):")
display(behavioral_scores.sort_values('risk_score', ascending=False).head(10))

# Essential features for modeling
print("\n✅ ESSENTIAL FEATURES CREATED:")
print("1. Intraday patterns: early/late trading bias, hourly distribution")
print("2. Emotional signals: rapid trading, reversals, time between trades")
print("3. Risk metrics: max drawdown, Sharpe ratio, win rate")
print("4. Volume patterns: average trade size, daily volume variability")
print("5. Behavioral score: composite risk indicator")

In [None]:
# # Let's also check the distribution of fills per order
# fills_per_order = order_weighted_prices['num_fills'].value_counts().sort_index()
# print("\nDistribution of fills per order:")
# print(f"Orders with 1 fill: {fills_per_order[1] if 1 in fills_per_order else 0}")
# print(f"Orders with 2-5 fills: {fills_per_order[2:6].sum()}")
# print(f"Orders with 6-10 fills: {fills_per_order[6:11].sum()}")
# print(f"Orders with >10 fills: {fills_per_order[11:].sum()}")

# # Check weighted price difference from simple average
# order_weighted_prices['simple_avg_price'] = fills.groupby('order_id')['price'].mean().values
# order_weighted_prices['price_diff_pct'] = ((order_weighted_prices['weighted_avg_price'] - order_weighted_prices['simple_avg_price']) / order_weighted_prices['simple_avg_price'] * 100).abs()

# print(f"\nWeighted vs Simple Average Price Difference:")
# print(f"Mean absolute difference: {order_weighted_prices['price_diff_pct'].mean():.2f}%")
# print(f"Max absolute difference: {order_weighted_prices['price_diff_pct'].max():.2f}%")
# print(f"Orders with >1% difference: {(order_weighted_prices['price_diff_pct'] > 1).sum()}")

In [None]:
display(fills.sort_values(by="datetime").head())

In [None]:
display(order_weighted_prices.sort_values(by="first_fill_time").head())

In [ ]:
# TRADER PNL VISUALIZATION

# 1. Calculate comprehensive PnL metrics for each trader
trader_pnl_analysis = []

for account_id in order_weighted_prices['account_id'].unique():
    trader_orders = order_weighted_prices[order_weighted_prices['account_id'] == account_id].sort_values('datetime')
    
    # Calculate daily PnL
    daily_pnl = trader_orders.groupby('date')['signed_value'].sum()
    
    # Calculate cumulative PnL
    cumulative_pnl = daily_pnl.cumsum()
    
    # Monthly aggregation
    trader_orders['month'] = pd.to_datetime(trader_orders['date']).dt.to_period('M')
    monthly_pnl = trader_orders.groupby('month')['signed_value'].sum()
    
    # Store metrics
    trader_pnl_analysis.append({
        'account_id': account_id,
        'total_pnl': daily_pnl.sum(),
        'avg_daily_pnl': daily_pnl.mean(),
        'std_daily_pnl': daily_pnl.std(),
        'best_day': daily_pnl.max(),
        'worst_day': daily_pnl.min(),
        'positive_days': (daily_pnl > 0).sum(),
        'negative_days': (daily_pnl < 0).sum(),
        'total_days': len(daily_pnl),
        'win_rate': (daily_pnl > 0).mean(),
        'profit_factor': daily_pnl[daily_pnl > 0].sum() / abs(daily_pnl[daily_pnl < 0].sum()) if (daily_pnl < 0).any() else np.inf,
        'daily_data': daily_pnl,
        'cumulative_data': cumulative_pnl,
        'monthly_data': monthly_pnl
    })

# Convert to DataFrame for easy access
pnl_summary = pd.DataFrame([{k: v for k, v in trader.items() if k not in ['daily_data', 'cumulative_data', 'monthly_data']} 
                            for trader in trader_pnl_analysis])
pnl_summary = pnl_summary.sort_values('total_pnl', ascending=False)

print("📊 TRADER PNL SUMMARY STATISTICS")
print("=" * 80)
display(pnl_summary.round(2))

# 2. Create comprehensive PnL visualizations
fig = plt.figure(figsize=(20, 16))
gs = fig.add_gridspec(4, 3, hspace=0.3, wspace=0.3)

# 2.1 Total PnL by Trader (Bar Chart)
ax1 = fig.add_subplot(gs[0, :])
colors = ['green' if x > 0 else 'red' for x in pnl_summary['total_pnl']]
bars = ax1.bar(range(len(pnl_summary)), pnl_summary['total_pnl'], color=colors, alpha=0.7)
ax1.set_xticks(range(len(pnl_summary)))
ax1.set_xticklabels([f'Trader {id}' for id in pnl_summary['account_id']], rotation=45)
ax1.set_ylabel('Total PnL ($)')
ax1.set_title('Total PnL by Trader', fontsize=14, fontweight='bold')
ax1.axhline(0, color='black', linestyle='-', linewidth=0.5)
ax1.grid(True, alpha=0.3, axis='y')

# Add value labels on bars
for i, (bar, val) in enumerate(zip(bars, pnl_summary['total_pnl'])):
    ax1.text(bar.get_x() + bar.get_width()/2, val + (max(pnl_summary['total_pnl'])*0.01 if val > 0 else -max(pnl_summary['total_pnl'])*0.01), 
             f'${val:,.0f}', ha='center', va='bottom' if val > 0 else 'top', fontsize=9)

# 2.2 Cumulative PnL Curves (Top 6 Traders)
ax2 = fig.add_subplot(gs[1, :])
top_6_traders = pnl_summary.head(6)['account_id'].values
for i, trader_id in enumerate(top_6_traders):
    trader_data = next(t for t in trader_pnl_analysis if t['account_id'] == trader_id)
    ax2.plot(trader_data['cumulative_data'].index, trader_data['cumulative_data'].values, 
             label=f'Trader {trader_id}', linewidth=2, alpha=0.8)
ax2.set_xlabel('Date')
ax2.set_ylabel('Cumulative PnL ($)')
ax2.set_title('Cumulative PnL Evolution - Top 6 Traders', fontsize=14, fontweight='bold')
ax2.legend(loc='best')
ax2.grid(True, alpha=0.3)
ax2.axhline(0, color='black', linestyle='--', alpha=0.5)

# 2.3 Win Rate vs Average Daily PnL
ax3 = fig.add_subplot(gs[2, 0])
scatter = ax3.scatter(pnl_summary['win_rate'], pnl_summary['avg_daily_pnl'], 
                     s=pnl_summary['total_days']*5, alpha=0.6, 
                     c=pnl_summary['total_pnl'], cmap='RdYlGn')
for i, txt in enumerate(pnl_summary['account_id']):
    ax3.annotate(txt, (pnl_summary['win_rate'].iloc[i], pnl_summary['avg_daily_pnl'].iloc[i]), 
                fontsize=8, alpha=0.7)
ax3.set_xlabel('Win Rate')
ax3.set_ylabel('Average Daily PnL ($)')
ax3.set_title('Win Rate vs Average Daily PnL', fontsize=12, fontweight='bold')
ax3.axhline(0, color='black', linestyle='--', alpha=0.5)
ax3.axvline(0.5, color='black', linestyle='--', alpha=0.5)
ax3.grid(True, alpha=0.3)
cbar = plt.colorbar(scatter, ax=ax3)
cbar.set_label('Total PnL ($)')

# 2.4 Best Day vs Worst Day
ax4 = fig.add_subplot(gs[2, 1])
ax4.scatter(pnl_summary['worst_day'], pnl_summary['best_day'], 
           s=100, alpha=0.6, c=pnl_summary['total_pnl'], cmap='RdYlGn')
for i, txt in enumerate(pnl_summary['account_id']):
    ax4.annotate(txt, (pnl_summary['worst_day'].iloc[i], pnl_summary['best_day'].iloc[i]), 
                fontsize=8, alpha=0.7)
# Add diagonal line
max_val = max(abs(pnl_summary['worst_day'].min()), pnl_summary['best_day'].max())
ax4.plot([-max_val, max_val], [max_val, -max_val], 'k--', alpha=0.3, label='Symmetric line')
ax4.set_xlabel('Worst Day PnL ($)')
ax4.set_ylabel('Best Day PnL ($)')
ax4.set_title('Best vs Worst Day Performance', fontsize=12, fontweight='bold')
ax4.grid(True, alpha=0.3)
ax4.axhline(0, color='black', linestyle='-', linewidth=0.5)
ax4.axvline(0, color='black', linestyle='-', linewidth=0.5)

# 2.5 PnL Distribution (Violin Plot)
ax5 = fig.add_subplot(gs[2, 2])
# Prepare data for violin plot
daily_pnl_data = []
for trader_id in pnl_summary.head(8)['account_id']:
    trader_data = next(t for t in trader_pnl_analysis if t['account_id'] == trader_id)
    daily_pnl_data.extend([(trader_id, pnl) for pnl in trader_data['daily_data'].values])

df_violin = pd.DataFrame(daily_pnl_data, columns=['Trader', 'Daily_PnL'])
ax5.violinplot([df_violin[df_violin['Trader'] == t]['Daily_PnL'].values for t in pnl_summary.head(8)['account_id']], 
               positions=range(8), showmeans=True, showextrema=True)
ax5.set_xticks(range(8))
ax5.set_xticklabels([f'{id}' for id in pnl_summary.head(8)['account_id']], rotation=45)
ax5.set_xlabel('Trader ID')
ax5.set_ylabel('Daily PnL Distribution ($)')
ax5.set_title('Daily PnL Distribution - Top 8 Traders', fontsize=12, fontweight='bold')
ax5.axhline(0, color='red', linestyle='--', alpha=0.5)
ax5.grid(True, alpha=0.3, axis='y')

# 2.6 Monthly PnL Heatmap
ax6 = fig.add_subplot(gs[3, :])
# Prepare monthly data for heatmap
monthly_pivot = pd.DataFrame()
for trader in trader_pnl_analysis[:10]:  # Top 10 traders
    monthly_pivot[trader['account_id']] = trader['monthly_data']

monthly_pivot = monthly_pivot.fillna(0).T
sns.heatmap(monthly_pivot, cmap='RdYlGn', center=0, 
            fmt='.0f', annot=True, cbar_kws={'label': 'Monthly PnL ($)'}, 
            ax=ax6, annot_kws={'size': 8})
ax6.set_xlabel('Month')
ax6.set_ylabel('Trader ID')
ax6.set_title('Monthly PnL Heatmap - Top 10 Traders', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

# 3. Risk-Adjusted Performance Metrics
print("\n📈 RISK-ADJUSTED PERFORMANCE METRICS")
print("=" * 80)

risk_adjusted_metrics = []
for trader in trader_pnl_analysis:
    daily_returns = trader['daily_data']
    if len(daily_returns) > 1 and daily_returns.std() > 0:
        sharpe = (daily_returns.mean() / daily_returns.std()) * np.sqrt(252)
        sortino_denominator = daily_returns[daily_returns < 0].std()
        sortino = (daily_returns.mean() / sortino_denominator) * np.sqrt(252) if sortino_denominator > 0 else np.inf
        
        # Calculate max drawdown
        cumulative = daily_returns.cumsum()
        running_max = cumulative.cummax()
        drawdown = cumulative - running_max
        max_drawdown = drawdown.min()
        
        risk_adjusted_metrics.append({
            'account_id': trader['account_id'],
            'sharpe_ratio': sharpe,
            'sortino_ratio': sortino if sortino != np.inf else 'N/A',
            'max_drawdown': max_drawdown,
            'calmar_ratio': trader['total_pnl'] / abs(max_drawdown) if max_drawdown != 0 else np.inf,
            'avg_win': daily_returns[daily_returns > 0].mean() if (daily_returns > 0).any() else 0,
            'avg_loss': daily_returns[daily_returns < 0].mean() if (daily_returns < 0).any() else 0,
            'win_loss_ratio': abs(daily_returns[daily_returns > 0].mean() / daily_returns[daily_returns < 0].mean()) 
                             if (daily_returns < 0).any() and (daily_returns > 0).any() else 'N/A'
        })

risk_df = pd.DataFrame(risk_adjusted_metrics)
risk_df = risk_df.sort_values('sharpe_ratio', ascending=False)
display(risk_df.round(3))

print("\n🎯 KEY INSIGHTS:")
print(f"- Best performing trader: {pnl_summary.iloc[0]['account_id']} with total PnL of ${pnl_summary.iloc[0]['total_pnl']:,.2f}")
print(f"- Highest win rate: {pnl_summary.loc[pnl_summary['win_rate'].idxmax(), 'account_id']} ({pnl_summary['win_rate'].max():.1%})")
print(f"- Most consistent trader (lowest std): {pnl_summary.loc[pnl_summary['std_daily_pnl'].idxmin(), 'account_id']} (σ = ${pnl_summary['std_daily_pnl'].min():,.2f})")
print(f"- Highest Sharpe ratio: {risk_df.iloc[0]['account_id']} ({risk_df.iloc[0]['sharpe_ratio']:.3f})")