In [None]:
# Risk Management MVP - Updated Analysis Notebook


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
import logging

warnings.filterwarnings("ignore")

# Import your current modules
from src.database import Database
from src.feature_engineer import FeatureEngineer
from src.model_trainer import ModelTrainer

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set plotting style
plt.style.use("seaborn-v0_8-darkgrid")
sns.set_palette("husl")

print("=== Risk Management MVP Analysis ===")
print(f"Analysis started at: {datetime.now()}")

In [None]:
## 1. Load Data and Initialize Components

print("\n1. Initializing components...")
db = Database()
feature_engineer = FeatureEngineer()
model_trainer = ModelTrainer()

# Get database statistics
stats = db.get_database_stats()
print(f"\nDatabase Statistics:")
for key, value in stats.items():
    print(f"  {key}: {value}")

# Get all traders
traders_df = db.get_all_traders()
print(f"\nFound {len(traders_df)} traders")
print(f"Total trading days across all traders: {traders_df['trading_days'].sum()}")
print(f"Total P&L across all traders: ${traders_df['total_pnl'].sum():,.2f}")

In [None]:
## 2. Time Series Validation Setup


def create_time_splits(features_df, test_size=0.2, val_size=0.15):
    """
    Create proper time series splits avoiding data leakage
    - Training: First 65% of data
    - Validation: Next 15% of data (for hyperparameter tuning)
    - Test: Last 20% of data (final evaluation)
    """
    n = len(features_df)

    # Calculate split indices
    train_end = int(n * (1 - test_size - val_size))
    val_end = int(n * (1 - test_size))

    train_df = features_df.iloc[:train_end].copy()
    val_df = features_df.iloc[train_end:val_end].copy()
    test_df = features_df.iloc[val_end:].copy()

    return train_df, val_df, test_df


def evaluate_predictions(y_true, y_pred, trader_name):
    """Evaluate model predictions with trading metrics"""
    from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    # Trading-specific metrics
    directional_accuracy = np.mean((y_true > 0) == (y_pred > 0))

    # Simulate trading based on predictions (simple threshold)
    threshold = np.median(y_pred)  # Trade when prediction > median
    trade_signals = y_pred > threshold

    if np.sum(trade_signals) > 0:
        actual_pnl = np.sum(y_true)
        strategy_pnl = np.sum(y_true[trade_signals])
        trading_days = np.sum(trade_signals)
        total_days = len(y_true)
    else:
        actual_pnl = strategy_pnl = 0
        trading_days = total_days = len(y_true)

    return {
        "trader_name": trader_name,
        "rmse": rmse,
        "mae": mae,
        "r2": r2,
        "directional_accuracy": directional_accuracy,
        "actual_total_pnl": actual_pnl,
        "strategy_total_pnl": strategy_pnl,
        "trading_days": trading_days,
        "total_days": total_days,
        "trade_reduction_pct": (1 - trading_days / total_days) * 100,
    }

In [None]:
## 3. Process Each Trader with Proper Time Series Validation

results = []
all_predictions = []

# Filter traders with sufficient data
min_days = 60  # Minimum days for analysis
valid_traders = traders_df[traders_df["trading_days"] >= min_days]
print(f"\nProcessing {len(valid_traders)} traders with >= {min_days} days of data")

for idx, trader in valid_traders.iterrows():
    account_id = str(trader["account_id"])
    trader_name = trader["trader_name"]

    print(f"\n{'='*60}")
    print(f"Processing {trader_name} ({account_id})")
    print(
        f"Trading days: {trader['trading_days']}, Total P&L: ${trader['total_pnl']:,.2f}"
    )

    # Get trader data
    totals_df, fills_df = db.get_trader_data(account_id)

    if totals_df.empty:
        print(f"No data found for {trader_name}")
        continue

    # Create features
    features_df = feature_engineer.create_features(totals_df, fills_df)

    if len(features_df) < min_days:
        print(f"Insufficient features: {len(features_df)} days")
        continue

    print(f"Created features for {len(features_df)} days")

    # Time series split
    train_df, val_df, test_df = create_time_splits(features_df)

    print(f"Split: Train={len(train_df)}, Val={len(val_df)}, Test={len(test_df)}")

    if len(test_df) < 10:  # Need reasonable test set
        print(f"Test set too small: {len(test_df)} days")
        continue

    # Train model with validation
    result = model_trainer.train_personal_model(
        train_df, account_id, feature_engineer.get_feature_columns()
    )

    if not result:
        print(f"Model training failed for {trader_name}")
        continue

    # Load trained model for evaluation
    model_data = model_trainer.load_model(account_id)
    if not model_data:
        print(f"Failed to load model for {trader_name}")
        continue

    model = model_data["model"]
    feature_columns = model_data["feature_columns"]

    # Evaluate on test set (unseen data)
    X_test = test_df[feature_columns].values
    y_test = test_df["target"].values

    test_pred = model.predict(X_test, num_iteration=model.best_iteration)

    # Calculate evaluation metrics
    eval_result = evaluate_predictions(y_test, test_pred, trader_name)
    eval_result.update(
        {
            "account_id": account_id,
            "model_rmse": result["rmse"],
            "model_mae": result["mae"],
            "n_features": len(feature_columns),
            "test_start_date": test_df["date"].min(),
            "test_end_date": test_df["date"].max(),
        }
    )

    results.append(eval_result)

    # Store predictions for visualization
    test_predictions = test_df[["date", "net_pnl"]].copy()
    test_predictions["predicted_pnl"] = test_pred
    test_predictions["trader_name"] = trader_name
    test_predictions["account_id"] = account_id
    all_predictions.append(test_predictions)

    print(
        f"Results: RMSE={eval_result['rmse']:.2f}, "
        f"Dir Acc={eval_result['directional_accuracy']:.3f}, "
        f"Strategy P&L=${eval_result['strategy_total_pnl']:,.2f}"
    )

print(f"\n✅ Analysis complete! Processed {len(results)} traders successfully")

In [None]:
## 4. Results Analysis and Visualization

if results:
    results_df = pd.DataFrame(results)

    print(f"\n📊 SUMMARY STATISTICS")
    print(f"Average RMSE: {results_df['rmse'].mean():.2f}")
    print(
        f"Average Directional Accuracy: {results_df['directional_accuracy'].mean():.3f}"
    )
    print(f"Average Trade Reduction: {results_df['trade_reduction_pct'].mean():.1f}%")

    # Total P&L improvement
    total_actual = results_df["actual_total_pnl"].sum()
    total_strategy = results_df["strategy_total_pnl"].sum()
    total_improvement = total_strategy - total_actual

    print(f"\n💰 P&L ANALYSIS")
    print(f"Total Actual P&L: ${total_actual:,.2f}")
    print(f"Total Strategy P&L: ${total_strategy:,.2f}")
    print(
        f"Total Improvement: ${total_improvement:,.2f} ({(total_improvement/abs(total_actual)*100):.1f}%)"
    )

    # Top performers
    top_performers = results_df.nlargest(3, "strategy_total_pnl")
    print(f"\n🏆 TOP PERFORMERS")
    for idx, row in top_performers.iterrows():
        print(
            f"{row['trader_name']}: ${row['strategy_total_pnl']:,.2f} "
            f"(vs ${row['actual_total_pnl']:,.2f})"
        )
else:
    print("❌ No results to analyze")

In [None]:
## 5. Visualization

if results and all_predictions:
    # Create comprehensive plots
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))

    # 1. Model Performance Distribution
    ax1 = axes[0, 0]
    ax1.hist(results_df["directional_accuracy"], bins=15, alpha=0.7, edgecolor="black")
    ax1.axvline(
        results_df["directional_accuracy"].mean(),
        color="red",
        linestyle="--",
        label=f'Mean: {results_df["directional_accuracy"].mean():.3f}',
    )
    ax1.set_xlabel("Directional Accuracy")
    ax1.set_ylabel("Number of Traders")
    ax1.set_title("Distribution of Model Directional Accuracy")
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    # 2. P&L Improvement Scatter
    ax2 = axes[0, 1]
    scatter = ax2.scatter(
        results_df["actual_total_pnl"],
        results_df["strategy_total_pnl"],
        c=results_df["directional_accuracy"],
        cmap="viridis",
        alpha=0.7,
        s=60,
    )
    ax2.plot(
        [results_df["actual_total_pnl"].min(), results_df["actual_total_pnl"].max()],
        [results_df["actual_total_pnl"].min(), results_df["actual_total_pnl"].max()],
        "r--",
        alpha=0.5,
        label="No Improvement Line",
    )
    ax2.set_xlabel("Actual P&L ($)")
    ax2.set_ylabel("Strategy P&L ($)")
    ax2.set_title("Strategy vs Actual P&L")
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    plt.colorbar(scatter, ax=ax2, label="Directional Accuracy")

    # 3. Trade Reduction vs Performance
    ax3 = axes[1, 0]
    ax3.scatter(
        results_df["trade_reduction_pct"],
        results_df["strategy_total_pnl"] - results_df["actual_total_pnl"],
        alpha=0.7,
        s=60,
    )
    ax3.set_xlabel("Trade Reduction (%)")
    ax3.set_ylabel("P&L Improvement ($)")
    ax3.set_title("Trade Reduction vs P&L Improvement")
    ax3.axhline(y=0, color="red", linestyle="--", alpha=0.5)
    ax3.grid(True, alpha=0.3)

    # 4. Model Quality vs Sample Size
    ax4 = axes[1, 1]
    ax4.scatter(results_df["total_days"], results_df["rmse"], alpha=0.7, s=60)
    ax4.set_xlabel("Total Trading Days")
    ax4.set_ylabel("RMSE")
    ax4.set_title("Model Quality vs Data Size")
    ax4.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # Individual trader time series (top 3 performers)
    if len(all_predictions) >= 3:
        fig, axes = plt.subplots(3, 1, figsize=(15, 12))

        top_3_accounts = top_performers["account_id"].head(3).values

        for i, account_id in enumerate(top_3_accounts):
            trader_pred = next(
                pred for pred in all_predictions if pred["account_id"] == account_id
            )

            ax = axes[i]
            ax.plot(
                trader_pred["date"],
                trader_pred["net_pnl"],
                label="Actual P&L",
                linewidth=2,
                alpha=0.8,
            )
            ax.plot(
                trader_pred["date"],
                trader_pred["predicted_pnl"],
                label="Predicted P&L",
                linewidth=2,
                alpha=0.8,
            )

            ax.set_title(
                f"{trader_pred['trader_name'].iloc[0]} - Actual vs Predicted P&L"
            )
            ax.set_ylabel("Daily P&L ($)")
            ax.legend()
            ax.grid(True, alpha=0.3)

            if i == 2:  # Last subplot
                ax.set_xlabel("Date")

        plt.tight_layout()
        plt.show()

print(f"\n🎯 Analysis completed at: {datetime.now()}")
print(f"Results saved for {len(results)} traders")

In [None]:
# Trader Risk Management MVP - Analysis Notebook

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings

warnings.filterwarnings("ignore")
import sys

sys.path.append("src")
# Import our custom modules
from src.minimal_database_solution import MinimalDataLoader as DataLoader

from src.simple_feature_engineer import SimpleFeatureEngineer

# from src.personal_lgbm import PersonalizedlgbmModel
from src.lgbm_trader_model import PersonalizedLGBMModel

# Set plotting style
plt.style.use("seaborn-v0_8-darkgrid")
sns.set_palette("husl")

# Initialize components
print("Initializing data loader and feature engineer...")
data_loader = DataLoader()
feature_engineer = SimpleFeatureEngineer()

# %% [markdown]
# ## 1. Load and Prepare Data

# Load all trader data
print("Loading trader data...")
all_trader_data = data_loader.load_all_traders_data()
from minimal_database_solution import MinimalDatabaseManager

db = MinimalDatabaseManager()

# Get all totals data
with db.get_connection() as conn:
    master_totals = pd.read_sql_query(
        """
        SELECT t.*, tr.trader_name 
        FROM totals t
        JOIN traders tr ON t.account_id = tr.account_id
        ORDER BY t.account_id, t.date
    """,
        conn,
    )

    master_fills = pd.read_sql_query(
        """
        SELECT f.*, tr.trader_name 
        FROM fills f
        JOIN traders tr ON f.account_id = tr.account_id
        ORDER BY f.account_id, f.datetime
    """,
        conn,
    )

print(f"Loaded {len(master_totals)} total records and {len(master_fills)} fill records")
print(f"Loaded data for {len(all_trader_data)} traders")
print(f"Total trading days: {len(master_totals)}")
print(f"Total fills: {len(master_fills)}")

# Display trader summary
trader_summary = (
    master_totals.groupby("account_id")
    .agg({"date": ["min", "max", "count"], "net_pnl": ["sum", "mean", "std"]})
    .round(2)
)
trader_summary.columns = [
    "start_date",
    "end_date",
    "trading_days",
    "total_pnl",
    "avg_daily_pnl",
    "pnl_std",
]
print("\nTrader Summary:")
print(trader_summary)

In [None]:
# %% [markdown]
# ## 2. Feature Engineering

# Engineer features
print("\nEngineering features...")

features_df = feature_engineer.engineer_features(master_totals, master_fills)
feature_columns = feature_engineer.get_feature_columns()

print(f"Total features created: {len(feature_columns)}")
print(f"Feature categories:")
print(
    "- Behavioral signals (5):",
    [
        f
        for f in feature_columns
        if f
        in [
            "trading_frequency",
            "hour_concentration",
            "symbol_diversity",
            "size_inconsistency",
            "morning_bias",
        ]
    ],
)
print(
    "- Essential metrics (10):",
    [
        f
        for f in feature_columns
        if f
        in [
            "net_pnl",
            "gross_pnl",
            "total_fees",
            "fee_ratio",
            "qty",
            "orders_count",
            "avg_fill_size",
            "net_pnl_5d_avg",
            "cum_win_rate",
            "momentum_3d",
        ]
    ],
)
print(
    "- Temporal features (3):",
    [f for f in feature_columns if "day" in f or "monday" in f or "friday" in f],
)

In [None]:
# %% [markdown]
# ## 3. Train lgbm Models for Each Trader

results_summary = []
all_predictions = []

for trader_id in features_df["account_id"].unique():

    print(f"\n{'='*50}")
    print(f"Training model for trader: {trader_id}")

    # Filter data for this trader
    trader_data = features_df[features_df["account_id"] == trader_id].copy()

    if trader_data["date"].unique().size < 50:
        print(f"Skipping trader {trader_id} due to insufficient data.")
        continue

    # Split train/test
    train_end_date = features_df.groupby("account_id").get_group(trader_id)[
        "date"
    ].max() - timedelta(days=60)
    train_data = trader_data[trader_data["date"] <= train_end_date]
    test_data = trader_data[trader_data["date"] > train_end_date]

    print(f"Train samples: {len(train_data)}, Test samples: {len(test_data)}")

    # Initialize and train model
    model = PersonalizedLGBMModel(trader_id)  # instead of PersonalizedlgbmModel
    model.fit(train_data, feature_columns)

    print(f"Best LGBM params: {model.best_params}")

    print(f"Optimal threshold: {model.threshold:.3f}")

    # Make predictions
    predictions = model.predict(test_data)

    # Evaluate
    evaluation = model.evaluate(predictions)
    results_summary.append(evaluation)

    # Store predictions for visualization
    predictions["trader_name"] = all_trader_data[trader_id]["name"]
    all_predictions.append(predictions)

    print(f"\nResults for {all_trader_data[trader_id]['name']}:")
    print(f"  Actual P&L: ${evaluation['actual_total_pnl']:,.2f}")
    print(f"  Signal P&L: ${evaluation['signal_total_pnl']:,.2f}")
    print(
        f"  Improvement: ${evaluation['pnl_improvement']:,.2f} ({evaluation['pnl_improvement_pct']:.1f}%)"
    )
    print(
        f"  Days traded: {evaluation['traded_days']}/{evaluation['total_days']} ({evaluation['trade_reduction_pct']:.1f}% reduction)"
    )
    print(f"  Win rate when trading: {evaluation['win_rate_when_trading']:.1f}%")
    print(f"  Avoided losses: ${evaluation['avoided_losses']:,.2f}")

# Combine all predictions
all_predictions_df = pd.concat(all_predictions, ignore_index=True)

In [None]:
# %% [markdown]
# ## 4. Visualization and Analysis

# Create summary dataframe
results_df = pd.DataFrame(results_summary)

# Overall performance improvement
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# 1. P&L Comparison
ax1 = axes[0, 0]
traders = results_df["trader_id"]
x = np.arange(len(traders))
width = 0.35

bars1 = ax1.bar(
    x - width / 2, results_df["actual_total_pnl"], width, label="Actual P&L", alpha=0.8
)
bars2 = ax1.bar(
    x + width / 2, results_df["signal_total_pnl"], width, label="Signal P&L", alpha=0.8
)

ax1.set_xlabel("Trader ID")
ax1.set_ylabel("Total P&L ($)")
ax1.set_title("P&L Comparison: Actual vs Signal-Based Trading")
ax1.set_xticks(x)
ax1.set_xticklabels(traders, rotation=45)
ax1.legend()
ax1.grid(True, alpha=0.3)

# Add value labels
for bars in [bars1, bars2]:
    for bar in bars:
        height = bar.get_height()
        ax1.annotate(
            f"${height:,.0f}",
            xy=(bar.get_x() + bar.get_width() / 2, height),
            xytext=(0, 3),
            textcoords="offset points",
            ha="center",
            va="bottom",
            fontsize=8,
        )

# 2. Risk Reduction
ax2 = axes[0, 1]
ax2.bar(traders, results_df["trade_reduction_pct"], alpha=0.8, color="coral")
ax2.set_xlabel("Trader ID")
ax2.set_ylabel("Trade Reduction (%)")
ax2.set_title("Trading Day Reduction by Risk Signals")
ax2.set_xticklabels(traders, rotation=45)
ax2.grid(True, alpha=0.3)

# 3. Win Rate Analysis
ax3 = axes[1, 0]
ax3.scatter(
    results_df["trade_reduction_pct"],
    results_df["win_rate_when_trading"],
    s=100,
    alpha=0.6,
)
for i, trader in enumerate(traders):
    ax3.annotate(
        trader,
        (
            results_df.iloc[i]["trade_reduction_pct"],
            results_df.iloc[i]["win_rate_when_trading"],
        ),
        xytext=(5, 5),
        textcoords="offset points",
        fontsize=8,
    )
ax3.set_xlabel("Trade Reduction (%)")
ax3.set_ylabel("Win Rate When Trading (%)")
ax3.set_title("Trade Reduction vs Win Rate")
ax3.grid(True, alpha=0.3)

# 4. Avoided Losses vs Missed Gains
ax4 = axes[1, 1]
ax4.scatter(
    results_df["avoided_losses"],
    results_df["missed_gains"],
    s=100,
    alpha=0.6,
    c=results_df["pnl_improvement"],
    cmap="RdYlGn",
)
for i, trader in enumerate(traders):
    ax4.annotate(
        trader,
        (results_df.iloc[i]["avoided_losses"], results_df.iloc[i]["missed_gains"]),
        xytext=(5, 5),
        textcoords="offset points",
        fontsize=8,
    )
ax4.set_xlabel("Avoided Losses ($)")
ax4.set_ylabel("Missed Gains ($)")
ax4.set_title("Risk-Reward Trade-off")
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# %% [markdown]
# ## 5. Individual Trader Analysis

# Select best and worst performing traders for detailed analysis
best_trader = results_df.loc[results_df["pnl_improvement"].idxmax(), "trader_id"]
worst_trader = results_df.loc[results_df["pnl_improvement"].idxmin(), "trader_id"]

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

for idx, (trader_id, label) in enumerate(
    [(best_trader, "Best"), (worst_trader, "Worst")]
):
    trader_predictions = all_predictions_df[
        all_predictions_df["account_id"] == trader_id
    ].copy()
    trader_predictions = trader_predictions.sort_values("date")

    # Cumulative P&L comparison
    ax1 = axes[idx, 0]
    trader_predictions["actual_cumsum"] = trader_predictions["net_pnl"].cumsum()
    trader_predictions["signal_cumsum"] = (
        trader_predictions["net_pnl"] * trader_predictions["trade_signal"]
    ).cumsum()

    ax1.plot(
        trader_predictions["date"],
        trader_predictions["actual_cumsum"],
        label="Actual Trading",
        linewidth=2,
    )
    ax1.plot(
        trader_predictions["date"],
        trader_predictions["signal_cumsum"],
        label="Signal-Based Trading",
        linewidth=2,
    )
    ax1.fill_between(
        trader_predictions["date"],
        trader_predictions["actual_cumsum"],
        trader_predictions["signal_cumsum"],
        alpha=0.3,
    )
    ax1.set_title(f"{label} Performer: {trader_id} - Cumulative P&L")
    ax1.set_xlabel("Date")
    ax1.set_ylabel("Cumulative P&L ($)")
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    # Trading signals over time
    ax2 = axes[idx, 1]
    colors = ["red" if x == 0 else "green" for x in trader_predictions["trade_signal"]]
    ax2.scatter(
        trader_predictions["date"],
        trader_predictions["net_pnl"],
        c=colors,
        alpha=0.6,
        s=50,
    )
    ax2.axhline(y=0, color="black", linestyle="--", alpha=0.5)
    ax2.set_title(f"{label} Performer: {trader_id} - Daily P&L with Signals")
    ax2.set_xlabel("Date")
    ax2.set_ylabel("Daily P&L ($)")
    ax2.grid(True, alpha=0.3)

    # Add legend
    from matplotlib.patches import Patch

    legend_elements = [
        Patch(facecolor="green", label="Trade (Signal=1)"),
        Patch(facecolor="red", label="No Trade (Signal=0)"),
    ]
    ax2.legend(handles=legend_elements)

plt.tight_layout()
plt.show()

In [None]:
# %% [markdown]
# ## 6. Risk Score Distribution Analysis

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Risk score distribution
ax1 = axes[0]
for trader_id in all_predictions_df["account_id"].unique():
    trader_data = all_predictions_df[all_predictions_df["account_id"] == trader_id]
    ax1.hist(trader_data["risk_score"], alpha=0.5, bins=20, label=trader_id)
ax1.set_xlabel("Risk Score")
ax1.set_ylabel("Frequency")
ax1.set_title("Risk Score Distribution by Trader")
ax1.legend()
ax1.grid(True, alpha=0.3)

# Threshold analysis
ax2 = axes[1]
thresholds = results_df["threshold"].values
ax2.bar(results_df["trader_id"], thresholds, alpha=0.8)
ax2.axhline(y=0.5, color="red", linestyle="--", label="Default (0.5)")
ax2.set_xlabel("Trader ID")
ax2.set_ylabel("Optimal Threshold")
ax2.set_title("Optimal Risk Thresholds by Trader")
ax2.set_xticklabels(results_df["trader_id"], rotation=45)
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# %% [markdown]
# ## 7. Summary Report

print("\n" + "=" * 80)
print("TRADER RISK MANAGEMENT MVP - SUMMARY REPORT")
print("=" * 80)

print("\n1. OVERALL PERFORMANCE IMPROVEMENT")
print("-" * 40)
total_actual_pnl = results_df["actual_total_pnl"].sum()
total_signal_pnl = results_df["signal_total_pnl"].sum()
total_improvement = total_signal_pnl - total_actual_pnl
print(f"Total Actual P&L (all traders): ${total_actual_pnl:,.2f}")
print(f"Total Signal P&L (all traders): ${total_signal_pnl:,.2f}")
print(
    f"Total Improvement: ${total_improvement:,.2f} ({(total_improvement/abs(total_actual_pnl))*100:.1f}%)"
)

print("\n2. RISK REDUCTION METRICS")
print("-" * 40)
avg_trade_reduction = results_df["trade_reduction_pct"].mean()
total_avoided_losses = results_df["avoided_losses"].sum()
total_missed_gains = results_df["missed_gains"].sum()
print(f"Average Trade Reduction: {avg_trade_reduction:.1f}%")
print(f"Total Avoided Losses: ${total_avoided_losses:,.2f}")
print(f"Total Missed Gains: ${total_missed_gains:,.2f}")
print(f"Net Benefit: ${total_avoided_losses - total_missed_gains:,.2f}")

print("\n3. INDIVIDUAL TRADER RECOMMENDATIONS")
print("-" * 40)
for _, row in results_df.iterrows():
    trader_name = all_trader_data[row["trader_id"]]["name"]
    if row["pnl_improvement"] > 0:
        recommendation = "IMPLEMENT RISK SIGNALS"
        color = "✅"
    else:
        recommendation = "REVIEW STRATEGY"
        color = "⚠️"

    print(f"\n{color} {trader_name} ({row['trader_id']})")
    print(
        f"   - P&L Impact: ${row['pnl_improvement']:,.2f} ({row['pnl_improvement_pct']:.1f}%)"
    )
    print(f"   - Optimal Threshold: {row['threshold']:.3f}")
    print(f"   - Trade Reduction: {row['trade_reduction_pct']:.1f}%")
    print(f"   - Win Rate: {row['win_rate_when_trading']:.1f}%")
    print(f"   - Recommendation: {recommendation}")

In [None]:
print("\n4. KEY BEHAVIORAL INSIGHTS")
print("-" * 40)

# Analyze feature importance by looking at correlation with target
feature_importance = []
for col in feature_columns:
    corr = features_df[[col, "target"]].corr().iloc[0, 1]
    feature_importance.append({"feature": col, "correlation": abs(corr)})

feature_importance_df = pd.DataFrame(feature_importance).sort_values(
    "correlation", ascending=False
)
print("\nTop 5 Most Predictive Features:")
for idx, row in feature_importance_df.head(5).iterrows():
    print(f"   {idx+1}. {row['feature']}: {row['correlation']:.3f}")

print("\n5. TOTAL P&L IMPROVEMENT")

total_pnl_improvement = results_df["pnl_improvement"].sum()
print(f"Total P&L Improvement Across All Traders: ${total_pnl_improvement:,.2f}")

print("\n" + "=" * 80)
print("END OF REPORT")
print("=" * 80)

In [None]:
# %% [markdown]
# ## 8. Export Results for Production

# Save model parameters and thresholds
model_params = results_df[["trader_id", "threshold", "best_params"]].to_dict("records")

# Save to CSV for easy implementation
results_df.to_csv("trader_risk_results.csv", index=False)
all_predictions_df.to_csv("trader_predictions.csv", index=False)

print("\nResults saved to:")
print("- trader_risk_results.csv")
print("- trader_predictions.csv")

# Create implementation guide
implementation_guide = {
    "model_parameters": model_params,
    "feature_list": feature_columns,
    "behavioral_signals": [
        "trading_frequency",
        "hour_concentration",
        "symbol_diversity",
        "size_inconsistency",
        "morning_bias",
    ],
    "risk_thresholds": {
        row["trader_id"]: row["threshold"] for _, row in results_df.iterrows()
    },
}

import json

with open("implementation_guide.json", "w") as f:
    json.dump(implementation_guide, f, indent=2)

print("- implementation_guide.json")
print("\n✅ MVP Complete! Ready for production implementation.")