# Trading Risk Management System - Complete Usage Example

This notebook demonstrates the complete workflow of the Trading Risk Management System, from data ingestion to risk prediction and monitoring.

## 1. Setup and Imports

In [None]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Configure display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', '{:.4f}'.format)

# System imports
import sys
from pathlib import Path
sys.path.append(str(Path('..').resolve()))

# Project imports
from src.data.database_manager import DatabaseManager
from src.data.data_downloader import DataDownloader
from src.pipeline.data_validator import DataValidator
from src.pipeline.feature_pipeline import FeaturePipeline
from src.pipeline.model_pipeline import ModelPipeline
from src.features.technical_features import TechnicalFeatures
from src.features.behavioral_features import BehavioralFeatures
from src.features.market_regime_features import MarketRegimeFeatures
from src.models.risk_model import RiskModel
from src.utils.time_series_cv import TimeSeriesSplit, WalkForwardAnalysis
from src.backtesting.backtest_engine import BacktestEngine, OrderType, Order
from src.backtesting.performance_metrics import PerformanceMetrics
from src.monitoring.model_monitor import ModelMonitor
from src.monitoring.drift_detector import DriftDetector
from src.monitoring.alert_system import AlertSystem
from src.monitoring.dashboard_generator import DashboardGenerator
from src.core.constants import TradingConstants as TC

print("All imports successful!")

## 2. Initialize System Components

In [None]:
# Initialize core components
db = Database()
validator = DataValidator()
feature_pipeline = FeaturePipeline()
model_pipeline = ModelPipeline()
perf_metrics = PerformanceMetrics()

# Check database status
stats = db.get_database_stats()
print("Database Statistics:")
for key, value in stats.items():
    print(f"  {key}: {value}")

## 3. Data Exploration

In [None]:
# Get list of traders
traders = db.get_all_traders()
print(f"Total traders: {len(traders)}")
print("\nTop 10 traders by total P&L:")
traders.sort_values('total_pnl', ascending=False).head(10)

In [None]:
# Select a trader for detailed analysis
example_trader = traders.iloc[0]
account_id = example_trader['account_id']
trader_name = example_trader['trader_name']

print(f"Analyzing trader: {trader_name} (ID: {account_id})")

# Get trader data
totals_df, fills_df = db.get_trader_data(account_id)

print(f"\nData summary:")
print(f"  Daily totals: {len(totals_df)} days")
print(f"  Fills: {len(fills_df)} trades")
print(f"  Date range: {totals_df['date'].min()} to {totals_df['date'].max()}")

In [None]:
# Visualize P&L history
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Cumulative P&L
cumulative_pnl = totals_df['net_pnl'].cumsum()
axes[0, 0].plot(totals_df['date'], cumulative_pnl)
axes[0, 0].set_title('Cumulative P&L')
axes[0, 0].set_xlabel('Date')
axes[0, 0].set_ylabel('Cumulative P&L')

# Daily P&L distribution
axes[0, 1].hist(totals_df['net_pnl'], bins=50, edgecolor='black')
axes[0, 1].set_title('Daily P&L Distribution')
axes[0, 1].set_xlabel('Daily P&L')
axes[0, 1].set_ylabel('Frequency')

# Rolling volatility
rolling_vol = totals_df['net_pnl'].rolling(20).std()
axes[1, 0].plot(totals_df['date'], rolling_vol)
axes[1, 0].set_title('20-Day Rolling Volatility')
axes[1, 0].set_xlabel('Date')
axes[1, 0].set_ylabel('Volatility')

# Drawdown
cumsum = totals_df['net_pnl'].cumsum()
running_max = cumsum.expanding().max()
drawdown = (cumsum - running_max) / running_max
axes[1, 1].fill_between(totals_df['date'], drawdown * 100, 0, alpha=0.3, color='red')
axes[1, 1].set_title('Drawdown %')
axes[1, 1].set_xlabel('Date')
axes[1, 1].set_ylabel('Drawdown %')

plt.tight_layout()
plt.show()

# Calculate key metrics
returns = totals_df['net_pnl'].values
metrics = perf_metrics.calculate_metrics(returns)
print("\nKey Performance Metrics:")
for metric, value in list(metrics.items())[:10]:
    print(f"  {metric}: {value:.4f}")

## 4. Data Validation

In [None]:
# Validate data quality
validation_result = validator.validate_combined(totals_df, fills_df)

print(f"Validation Status: {'PASSED' if validation_result.is_valid else 'FAILED'}")

if validation_result.errors:
    print("\nErrors:")
    for error in validation_result.errors:
        print(f"  - {error}")

if validation_result.warnings:
    print("\nWarnings:")
    for warning in validation_result.warnings[:5]:  # Show first 5
        print(f"  - {warning}")

# Print validation report
print("\n" + validator.generate_report(validation_result))

## 5. Feature Engineering

In [None]:
# Generate features
print("Generating features...")
features_df = feature_pipeline.generate_features(totals_df, fills_df)

print(f"\nFeatures generated: {features_df.shape[1]} features for {len(features_df)} samples")
print(f"\nFeature categories:")

# Count features by category
feature_categories = {}
for col in features_df.columns:
    if col not in ['date', 'account_id']:
        category = col.split('_')[0]
        feature_categories[category] = feature_categories.get(category, 0) + 1

for cat, count in sorted(feature_categories.items()):
    print(f"  {cat}: {count} features")

In [None]:
# Explore feature correlations
numeric_features = features_df.select_dtypes(include=[np.number]).columns
if len(numeric_features) > 20:
    # Select top features for visualization
    feature_subset = numeric_features[:20]
else:
    feature_subset = numeric_features

plt.figure(figsize=(12, 10))
correlation_matrix = features_df[feature_subset].corr()
sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix (Top 20 Features)')
plt.tight_layout()
plt.show()

## 6. Model Training Example

In [None]:
# Prepare data for modeling
# Create target (next day's P&L)
modeling_df = features_df.copy()
modeling_df['target'] = totals_df['net_pnl'].shift(-1)
modeling_df = modeling_df.dropna(subset=['target'])

# Split features and target
feature_cols = [col for col in modeling_df.columns 
                if col not in ['date', 'account_id', 'target']]
X = modeling_df[feature_cols]
y = modeling_df['target']

# Add date index for time series CV
X.index = pd.to_datetime(modeling_df['date']) if 'date' in modeling_df else modeling_df.index
y.index = X.index

print(f"Modeling data shape: {X.shape}")
print(f"Target statistics: mean={y.mean():.2f}, std={y.std():.2f}")

In [None]:
# Train a simple risk model
from sklearn.model_selection import train_test_split

# Simple train/test split (for demonstration)
split_idx = int(len(X) * 0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# Create and train model
risk_model = RiskModel(model_name="example_risk_model")
risk_model.fit(X_train, y_train, validation_data=(X_test, y_test))

# Evaluate
train_score = risk_model.score(X_train, y_train)
test_score = risk_model.score(X_test, y_test)

print(f"\nModel Performance:")
print(f"  Train R²: {train_score:.4f}")
print(f"  Test R²: {test_score:.4f}")

# Get feature importance
importance_df = risk_model.get_feature_importance(top_k=15)
print("\nTop 15 Features:")
print(importance_df)

In [None]:
# Make predictions and calculate risk scores
risk_predictions = risk_model.predict_risk(X_test)

# Visualize predictions vs actual
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Prediction vs Actual
axes[0].scatter(y_test, risk_predictions['predicted_pnl'], alpha=0.5)
axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
axes[0].set_xlabel('Actual P&L')
axes[0].set_ylabel('Predicted P&L')
axes[0].set_title('Predictions vs Actual')

# Risk score distribution
axes[1].hist(risk_predictions['risk_score'], bins=30, edgecolor='black')
axes[1].axvline(TC.HIGH_RISK_SCORE, color='red', linestyle='--', label='High Risk Threshold')
axes[1].set_xlabel('Risk Score')
axes[1].set_ylabel('Frequency')
axes[1].set_title('Risk Score Distribution')
axes[1].legend()

plt.tight_layout()
plt.show()

# Risk analysis
high_risk_days = risk_predictions[risk_predictions['risk_score'] > TC.HIGH_RISK_SCORE]
print(f"\nHigh risk days: {len(high_risk_days)} ({len(high_risk_days)/len(risk_predictions)*100:.1f}%)")

# Check if high risk predictions were accurate
if len(high_risk_days) > 0:
    actual_losses = y_test[high_risk_days.index] < 0
    print(f"Accuracy on high risk days: {actual_losses.mean():.2%} were actual losses")

## 7. Time Series Cross-Validation

In [None]:
# Demonstrate time series cross-validation
ts_cv = TimeSeriesSplit(n_splits=3, mode='expanding')

cv_scores = []
for fold_idx, fold in enumerate(ts_cv.split(X)):
    X_train_cv = X.iloc[fold.train_idx]
    X_val_cv = X.iloc[fold.val_idx]
    y_train_cv = y.iloc[fold.train_idx]
    y_val_cv = y.iloc[fold.val_idx]
    
    # Train model
    fold_model = RiskModel(model_name=f"cv_fold_{fold_idx}")
    fold_model.fit(X_train_cv, y_train_cv)
    
    # Evaluate
    val_score = fold_model.score(X_val_cv, y_val_cv)
    cv_scores.append(val_score)
    
    print(f"Fold {fold_idx}: Train samples={len(X_train_cv)}, Val samples={len(X_val_cv)}, Val R²={val_score:.4f}")

print(f"\nAverage CV Score: {np.mean(cv_scores):.4f} (+/- {np.std(cv_scores):.4f})")

## 8. Backtesting Example

In [None]:
# Simple backtesting strategy based on risk predictions
def risk_based_strategy(data_slice):
    """Generate trading signals based on risk predictions"""
    orders = []
    
    # This is a placeholder - in reality, you'd use the model predictions
    for _, row in data_slice.iterrows():
        # Simple mean reversion strategy
        if 'net_pnl' in row:
            if row['net_pnl'] < -100:  # Buy after losses
                orders.append(Order(
                    timestamp=row.name,
                    symbol='SPY',
                    quantity=100,
                    order_type=OrderType.MARKET
                ))
            elif row['net_pnl'] > 100:  # Sell after gains
                orders.append(Order(
                    timestamp=row.name,
                    symbol='SPY',
                    quantity=-100,
                    order_type=OrderType.MARKET
                ))
    
    return orders

# Run backtest
backtest_engine = BacktestEngine(initial_capital=100000)

# Prepare backtest data
backtest_data = totals_df.copy()
backtest_data['symbol'] = 'SPY'
backtest_data['price'] = 100 + backtest_data['net_pnl'].cumsum() / 100  # Synthetic prices

# Run backtest
results = backtest_engine.run_backtest(
    data=backtest_data,
    signal_func=risk_based_strategy
)

print("Backtest Results:")
print(f"  Total Return: {results.total_return:.2%}")
print(f"  Annual Return: {results.annual_return:.2%}")
print(f"  Sharpe Ratio: {results.sharpe_ratio:.2f}")
print(f"  Max Drawdown: {results.max_drawdown:.2%}")
print(f"  Win Rate: {results.win_rate:.2%}")
print(f"  Number of Trades: {results.n_trades}")

## 9. Model Monitoring

In [None]:
# Initialize monitoring components
model_monitor = ModelMonitor(model_name="example_risk_model")
drift_detector = DriftDetector()
alert_system = AlertSystem()

# Set baseline for drift detection
drift_detector.set_reference(X_train)

# Simulate monitoring over time
print("Simulating model monitoring...")

# Log predictions
model_monitor.log_predictions(
    predictions=risk_predictions,
    actuals=y_test,
    features=X_test,
    prediction_time_ms=50
)

# Check for drift
drift_results = drift_detector.detect_drift(
    current_data=X_test,
    features=feature_cols[:10]  # Check top 10 features
)

# Print drift results
drifted_features = [name for name, result in drift_results.items() if result.is_drifted]
print(f"\nDrift Detection Results:")
print(f"  Features checked: {len(drift_results)}")
print(f"  Features with drift: {len(drifted_features)}")

if drifted_features:
    print("\nDrifted features:")
    for feat in drifted_features[:5]:
        result = drift_results[feat]
        print(f"  - {feat}: score={result.drift_score:.3f}, type={result.drift_type}")

# Check alerts
alert_metrics = {
    'rmse': 0.03,
    'accuracy': 0.65,
    'drift_rate': len(drifted_features) / len(drift_results) if drift_results else 0,
    'n_drifted': len(drifted_features),
    'drifted_features': drifted_features
}

triggered_alerts = alert_system.check_alerts(alert_metrics, source='monitoring')
print(f"\nAlerts triggered: {len(triggered_alerts)}")
for alert in triggered_alerts:
    print(f"  [{alert.severity.value}] {alert.title}: {alert.message}")

## 10. Generate Dashboard

In [None]:
# Generate monitoring dashboard
dashboard_gen = DashboardGenerator()

# Prepare data for dashboard
# Get feature importance
feature_importance = dict(zip(
    importance_df['feature'].values,
    importance_df['importance'].values
))

# Create monitoring metrics
monitoring_metrics = {
    'accuracy_history': [0.6, 0.62, 0.65, 0.63, 0.64],  # Example data
    'summary': model_monitor.get_monitoring_summary()
}

# Generate dashboard
dashboard_path = dashboard_gen.create_risk_dashboard(
    predictions=risk_predictions,
    historical_performance=totals_df,
    feature_importance=feature_importance,
    monitoring_metrics=monitoring_metrics,
    drift_results=drift_results
)

print(f"\nDashboard generated: {dashboard_path}")
print("Open this file in your browser to view the interactive dashboard.")

## 11. Complete Pipeline Example

In [None]:
# Example of running the complete pipeline for multiple traders
def analyze_all_traders(db, limit=5):
    """Analyze multiple traders and generate risk report"""
    
    traders = db.get_all_traders().head(limit)
    risk_assessments = []
    
    for _, trader in traders.iterrows():
        try:
            # Get data
            totals, fills = db.get_trader_data(trader['account_id'])
            
            if len(totals) < 50:
                continue
            
            # Validate
            validation = validator.validate_totals(totals)
            if not validation.is_valid:
                continue
            
            # Generate features
            features = feature_pipeline.generate_features(totals, fills)
            
            # Get latest features for prediction
            latest_features = features.iloc[-1:]
            
            # Assess risk (using simple heuristics for demo)
            recent_volatility = totals['net_pnl'].tail(20).std()
            recent_return = totals['net_pnl'].tail(20).mean()
            
            risk_assessments.append({
                'trader_id': trader['account_id'],
                'trader_name': trader['trader_name'],
                'total_pnl': trader['total_pnl'],
                'recent_volatility': recent_volatility,
                'recent_return': recent_return,
                'risk_score': recent_volatility / (abs(recent_return) + 1),
                'recommendation': 'Monitor' if recent_volatility > 100 else 'Normal'
            })
            
        except Exception as e:
            print(f"Error analyzing {trader['trader_name']}: {e}")
    
    return pd.DataFrame(risk_assessments)

# Run analysis
print("Analyzing multiple traders...")
risk_report = analyze_all_traders(db, limit=5)

if not risk_report.empty:
    print("\nRisk Assessment Report:")
    print(risk_report.sort_values('risk_score', ascending=False))
else:
    print("No traders analyzed successfully.")

## 12. Summary and Next Steps

In [None]:
print("=" * 80)
print("TRADING RISK MANAGEMENT SYSTEM - SUMMARY")
print("=" * 80)
print("\nThis notebook demonstrated the complete workflow:")
print("\n1. Data Ingestion and Storage")
print("   - Database setup and data retrieval")
print("   - Support for multiple traders")
print("\n2. Data Validation")
print("   - Comprehensive quality checks")
print("   - Trading-specific validations")
print("\n3. Feature Engineering")
print("   - Technical indicators")
print("   - Behavioral features")
print("   - Market regime features")
print("\n4. Model Training")
print("   - LightGBM risk models")
print("   - Time series cross-validation")
print("   - Feature importance analysis")
print("\n5. Risk Prediction")
print("   - Daily P&L prediction")
print("   - Risk score calculation")
print("   - Confidence estimation")
print("\n6. Backtesting")
print("   - Strategy simulation")
print("   - Performance metrics")
print("\n7. Monitoring")
print("   - Model performance tracking")
print("   - Data drift detection")
print("   - Automated alerting")
print("\n8. Reporting")
print("   - Interactive dashboards")
print("   - Risk reports")
print("\nNext Steps:")
print("1. Run scripts/setup_database.py to download all historical data")
print("2. Run scripts/train_models.py to train production models")
print("3. Run scripts/daily_predict.py for daily risk predictions")
print("4. Monitor model performance and retrain as needed")
print("\n" + "=" * 80)