# 🤖 Advanced ML Trading System Demo
## Professional Algorithmic Trading with Machine Learning

**System Overview:** Multi-model ensemble trading system using advanced feature engineering and time series ML  
**Performance Target:** Enhanced risk-adjusted returns through adaptive market pattern recognition  
**Technology Stack:** Python, XGBoost, LightGBM, Custom Technical Analysis, Time Series Cross-Validation  

---

### 🎯 **Key Features:**
- **50+ Advanced Features** from market microstructure analysis
- **Ensemble ML Models** with time-aware cross-validation
- **Custom Technical Indicators** for enhanced signal quality
- **Risk Management Integration** with position sizing
- **Production-Ready Architecture** for live deployment

*Note: This demo shows system architecture and capabilities. Proprietary features and specific trading logic are abstracted for IP protection.*

## 📦 System Setup & Dependencies

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime, timedelta
import yfinance as yf

# ML Libraries
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, accuracy_score
import xgboost as xgb

# Configure visualization
warnings.filterwarnings('ignore')
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("🚀 ML Trading System Initialized")
print(f"📊 Environment Ready - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 📊 Market Data Pipeline

In [None]:
# Professional data loading with error handling
def load_market_data(ticker="QQQ", period_days=365):
    """
    Load market data with robust error handling and validation
    """
    try:
        end_date = datetime.now()
        start_date = end_date - timedelta(days=period_days)
        
        print(f"📥 Loading {ticker} data ({period_days} days)...")
        df = yf.download(ticker, start=start_date, end=end_date, interval="1d")
        
        # Handle MultiIndex columns from yfinance
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = [col[0].lower() if isinstance(col, tuple) else col.lower() for col in df.columns]
        else:
            df.columns = [col.lower() for col in df.columns]
        
        # Data validation
        df = df.dropna()
        
        print(f"✅ Data loaded: {len(df)} records")
        print(f"📅 Period: {df.index[0].date()} to {df.index[-1].date()}")
        
        return df
        
    except Exception as e:
        print(f"❌ Data loading failed: {e}")
        return None

# Load demonstration data
df = load_market_data("QQQ", 500)  # NASDAQ ETF for demo

if df is not None:
    print(f"\n📈 Price Summary:")
    print(f"   Range: ${df['close'].min():.2f} - ${df['close'].max():.2f}")
    print(f"   Latest: ${df['close'].iloc[-1]:.2f}")
    
    # Display sample
    df.head()

## 🔧 Advanced Feature Engineering Framework

**Professional ML feature extraction pipeline with 50+ indicators:**

In [None]:
class AdvancedFeatureExtractor:
    """
    Professional-grade feature extraction for trading systems
    Implements market microstructure, technical, and behavioral features
    """
    
    def __init__(self):
        self.scaler = StandardScaler()
        print("🔧 Advanced Feature Extractor Initialized")
        
    def extract_features(self, df):
        """
        Extract comprehensive feature set for ML models
        """
        features = pd.DataFrame(index=df.index)
        print("⚙️ Extracting ML features...")
        
        # === PRICE ACTION FEATURES ===
        print("   📊 Price action analysis...")
        for period in [5, 10, 20, 50]:
            features[f'sma_{period}'] = df['close'].rolling(period).mean()
            features[f'price_to_sma_{period}'] = df['close'] / features[f'sma_{period}']
            features[f'volatility_{period}'] = df['close'].rolling(period).std()
        
        # === MOMENTUM INDICATORS ===
        print("   ⚡ Momentum analysis...")
        for period in [1, 3, 5, 10, 20]:
            features[f'return_{period}d'] = df['close'].pct_change(period)
            features[f'momentum_{period}'] = (df['close'] - df['close'].shift(period)) / df['close'].shift(period)
        
        # === VOLUME ANALYSIS ===
        print("   📈 Volume microstructure...")
        features['volume_sma_20'] = df['volume'].rolling(20).mean()
        features['volume_ratio'] = df['volume'] / features['volume_sma_20']
        features['price_volume'] = df['close'] * df['volume']
        features['volume_trend'] = df['volume'].rolling(5).mean() / df['volume'].rolling(20).mean()
        
        # === RANGE ANALYSIS ===
        print("   📏 Range and positioning...")
        for period in [10, 20, 50]:
            features[f'high_{period}'] = df['high'].rolling(period).max()
            features[f'low_{period}'] = df['low'].rolling(period).min()
            features[f'range_{period}'] = features[f'high_{period}'] - features[f'low_{period}']
            features[f'position_in_range_{period}'] = (df['close'] - features[f'low_{period}']) / features[f'range_{period}']
        
        # === ADVANCED TECHNICAL INDICATORS ===
        print("   🎯 Advanced technical analysis...")
        # Custom RSI implementation
        def calculate_rsi(prices, period=14):
            delta = prices.diff()
            gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
            loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
            rs = gain / loss
            return 100 - (100 / (1 + rs))
        
        features['rsi_14'] = calculate_rsi(df['close'], 14)
        features['rsi_7'] = calculate_rsi(df['close'], 7)
        
        # Bollinger Bands
        features['bb_middle'] = df['close'].rolling(20).mean()
        features['bb_std'] = df['close'].rolling(20).std()
        features['bb_upper'] = features['bb_middle'] + (features['bb_std'] * 2)
        features['bb_lower'] = features['bb_middle'] - (features['bb_std'] * 2)
        features['bb_position'] = (df['close'] - features['bb_lower']) / (features['bb_upper'] - features['bb_lower'])
        
        # === MARKET TIMING FEATURES ===
        print("   ⏰ Temporal pattern analysis...")
        features['day_of_week'] = df.index.dayofweek
        features['hour'] = df.index.hour
        features['day_of_month'] = df.index.day
        
        # Cyclical encoding for time features
        features['dow_sin'] = np.sin(2 * np.pi * features['day_of_week'] / 7)
        features['dow_cos'] = np.cos(2 * np.pi * features['day_of_week'] / 7)
        
        # === PROPRIETARY FEATURES (ABSTRACTED) ===
        print("   🔒 Proprietary signal processing...")
        # Note: Actual proprietary features are abstracted for IP protection
        features['custom_signal_1'] = np.random.normal(0, 1, len(df))  # Placeholder
        features['custom_signal_2'] = np.random.normal(0, 1, len(df))  # Placeholder
        features['custom_timing'] = np.random.choice([0, 1], len(df), p=[0.9, 0.1])  # Placeholder
        
        # === TARGET VARIABLES ===
        print("   🎯 Target variable creation...")
        for horizon in [1, 5, 10, 20]:
            features[f'forward_return_{horizon}'] = df['close'].shift(-horizon) / df['close'] - 1
            features[f'target_up_{horizon}'] = (features[f'forward_return_{horizon}'] > 0).astype(int)
        
        print(f"✅ Feature extraction complete: {len(features.columns)} features")
        return features.dropna()
    
    def prepare_ml_data(self, features_df, target_col='forward_return_5'):
        """
        Prepare features and targets for ML training
        """
        # Separate features from targets
        feature_cols = [col for col in features_df.columns 
                       if not col.startswith('forward_return') and not col.startswith('target_up')]
        
        X = features_df[feature_cols]
        y = features_df[target_col]
        
        # Remove NaN values
        valid_idx = ~y.isna()
        X = X[valid_idx]
        y = y[valid_idx]
        
        return X, y

# Initialize and run feature extraction
if df is not None:
    extractor = AdvancedFeatureExtractor()
    features_df = extractor.extract_features(df)
    
    print(f"\n📊 Feature Engineering Results:")
    print(f"   Samples: {len(features_df)}")
    print(f"   Features: {len(features_df.columns)}")
    print(f"   Date Range: {features_df.index[0].date()} to {features_df.index[-1].date()}")

## 🤖 ML Model Training Pipeline

In [None]:
class ProfessionalMLTrainer:
    """
    Enterprise-grade ML training pipeline for trading systems
    """
    
    def __init__(self):
        self.models = {}
        self.feature_importance = {}
        print("🤖 Professional ML Trainer Initialized")
        
    def train_ensemble(self, X, y, task_type='regression'):
        """
        Train ensemble of ML models with time series validation
        """
        print(f"\n🚀 Training {task_type} ensemble...")
        print("=" * 50)
        
        # Time series cross-validation
        tscv = TimeSeriesSplit(n_splits=5)
        
        # Model configurations
        if task_type == 'regression':
            models_config = {
                'xgboost': xgb.XGBRegressor(
                    n_estimators=100,
                    learning_rate=0.1,
                    max_depth=6,
                    random_state=42,
                    verbosity=0
                ),
                'random_forest': RandomForestRegressor(
                    n_estimators=50,
                    max_depth=10,
                    random_state=42
                )
            }
        else:  # classification
            models_config = {
                'xgboost': xgb.XGBClassifier(
                    n_estimators=100,
                    learning_rate=0.1,
                    max_depth=6,
                    random_state=42,
                    verbosity=0
                ),
                'random_forest': RandomForestClassifier(
                    n_estimators=50,
                    max_depth=10,
                    random_state=42
                )
            }
        
        results = {}
        for name, model in models_config.items():
            print(f"📈 Training {name}...")
            
            try:
                # Cross-validation
                if task_type == 'regression':
                    cv_scores = cross_val_score(model, X, y, cv=tscv, scoring='neg_mean_squared_error')
                    score = -cv_scores.mean()
                    metric = 'RMSE'
                else:
                    cv_scores = cross_val_score(model, X, y, cv=tscv, scoring='accuracy')
                    score = cv_scores.mean()
                    metric = 'Accuracy'
                
                # Fit final model
                model.fit(X, y)
                self.models[f"{task_type}_{name}"] = model
                
                # Feature importance
                if hasattr(model, 'feature_importances_'):
                    importance_dict = dict(zip(X.columns, model.feature_importances_))
                    self.feature_importance[f"{task_type}_{name}"] = importance_dict
                
                results[name] = {
                    'score': score,
                    'std': cv_scores.std(),
                    'metric': metric
                }
                
                print(f"✅ {name}: {metric} = {score:.4f} (±{cv_scores.std():.4f})")
                
            except Exception as e:
                print(f"❌ {name}: Training failed - {str(e)}")
                results[name] = {'score': 0, 'std': 0, 'metric': 'Error'}
        
        return results
    
    def analyze_features(self, top_n=15):
        """
        Analyze feature importance across models
        """
        if not self.feature_importance:
            print("No feature importance data available")
            return []
        
        # Aggregate importance across models
        all_importance = {}
        for model_name, importance_dict in self.feature_importance.items():
            for feature, importance in importance_dict.items():
                if feature not in all_importance:
                    all_importance[feature] = []
                all_importance[feature].append(importance)
        
        # Average importance
        avg_importance = {
            feature: np.mean(importances)
            for feature, importances in all_importance.items()
        }
        
        # Sort by importance
        sorted_features = sorted(avg_importance.items(), key=lambda x: x[1], reverse=True)
        
        print(f"\n📊 TOP {top_n} MOST IMPORTANT FEATURES:")
        print("=" * 60)
        for i, (feature, importance) in enumerate(sorted_features[:top_n], 1):
            print(f"{i:2d}. {feature:<25} {importance:.4f}")
        
        return sorted_features

# Train models if features are available
if 'features_df' in locals() and not features_df.empty:
    # Prepare training data
    X, y = extractor.prepare_ml_data(features_df, target_col='forward_return_5')
    
    print(f"\n📋 Training Data Prepared:")
    print(f"   Samples: {len(X)}")
    print(f"   Features: {len(X.columns)}")
    print(f"   Target Mean: {y.mean():.4f}")
    
    # Initialize trainer and train models
    trainer = ProfessionalMLTrainer()
    
    # Train regression models
    reg_results = trainer.train_ensemble(X, y, task_type='regression')
    
    # Analyze feature importance
    important_features = trainer.analyze_features(top_n=20)
    
    print(f"\n🎉 ML Training Complete!")
else:
    print("❌ No feature data available for training")

## 📊 Feature Importance Visualization

In [None]:
# Visualize feature importance
if 'important_features' in locals() and important_features:
    # Get top features for visualization
    top_features = important_features[:12]
    feature_names = [f[0] for f in top_features]
    feature_scores = [f[1] for f in top_features]
    
    plt.figure(figsize=(12, 8))
    bars = plt.barh(range(len(feature_names)), feature_scores, color='steelblue')
    plt.yticks(range(len(feature_names)), feature_names)
    plt.xlabel('Feature Importance')
    plt.title('🎯 Top ML Features for Trading System', fontsize=16, fontweight='bold')
    plt.grid(axis='x', alpha=0.3)
    
    # Add value labels
    for i, bar in enumerate(bars):
        width = bar.get_width()
        plt.text(width + 0.001, bar.get_y() + bar.get_height()/2, 
                f'{width:.3f}', ha='left', va='center', fontsize=10)
    
    plt.tight_layout()
    plt.show()
    
    print("\n🔍 Feature Analysis Summary:")
    print(f"   Most Important: {important_features[0][0]} ({important_features[0][1]:.4f})")
    print(f"   Technical Features: {len([f for f in important_features if any(x in f[0] for x in ['sma', 'rsi', 'bb'])])}")
    print(f"   Volume Features: {len([f for f in important_features if 'volume' in f[0]])}")
    print(f"   Timing Features: {len([f for f in important_features if any(x in f[0] for x in ['hour', 'dow', 'custom'])])}")
else:
    print("⚠️ Feature importance data not available")

## 🎯 Signal Generation System

In [None]:
class ProfessionalSignalGenerator:
    """
    Production-grade signal generation system
    """
    
    def __init__(self, trainer):
        self.trainer = trainer
        self.models = trainer.models
        print("🎯 Professional Signal Generator Initialized")
    
    def generate_ensemble_predictions(self, X, task_type='regression'):
        """
        Generate ensemble predictions from trained models
        """
        predictions = {}
        
        for model_name, model in self.models.items():
            if not model_name.startswith(task_type):
                continue
            
            name = model_name.replace(f"{task_type}_", "")
            
            try:
                predictions[name] = model.predict(X)
            except Exception as e:
                print(f"⚠️ Model {name} prediction failed: {e}")
        
        if not predictions:
            return np.zeros(len(X))
        
        # Simple ensemble average
        ensemble_pred = np.zeros(len(X))
        for name, pred in predictions.items():
            ensemble_pred += pred / len(predictions)
        
        return ensemble_pred
    
    def generate_trading_signals(self, features_df, threshold=0.01):
        """
        Generate professional trading signals
        """
        print(f"🤖 Generating trading signals (threshold: {threshold})...")
        
        # Prepare features
        feature_cols = [col for col in features_df.columns 
                       if not col.startswith('forward_return') and not col.startswith('target_up')]
        X = features_df[feature_cols]
        
        # Get predictions
        predictions = self.generate_ensemble_predictions(X, task_type='regression')
        
        # Create signals dataframe
        signals = pd.DataFrame(index=features_df.index)
        signals['ml_prediction'] = predictions
        signals['signal_strength'] = np.abs(predictions)
        
        # Generate trading signals with risk management
        signals['long_signal'] = (
            (predictions > threshold) &
            (features_df['volume_ratio'] > 1.2) &  # Volume confirmation
            (features_df['rsi_14'] < 80)  # Not overbought
        )
        
        signals['short_signal'] = (
            (predictions < -threshold) &
            (features_df['volume_ratio'] > 1.2) &  # Volume confirmation
            (features_df['rsi_14'] > 20)  # Not oversold
        )
        
        # Enhanced signals with proprietary filters (abstracted)
        signals['enhanced_long'] = (
            signals['long_signal'] &
            (features_df['custom_timing'] == 1)  # Proprietary timing (placeholder)
        )
        
        signals['enhanced_short'] = (
            signals['short_signal'] &
            (features_df['custom_timing'] == 1)  # Proprietary timing (placeholder)
        )
        
        print(f"✅ Generated {len(signals)} signals")
        return signals

# Generate signals if trainer is available
if 'trainer' in locals() and 'features_df' in locals():
    signal_gen = ProfessionalSignalGenerator(trainer)
    signals = signal_gen.generate_trading_signals(features_df, threshold=0.005)
    
    print(f"\n📊 SIGNAL STATISTICS:")
    print(f"   Total periods: {len(signals)}")
    print(f"   Long signals: {signals['long_signal'].sum()}")
    print(f"   Short signals: {signals['short_signal'].sum()}")
    print(f"   Enhanced long: {signals['enhanced_long'].sum()}")
    print(f"   Enhanced short: {signals['enhanced_short'].sum()}")
    
    # Show sample of enhanced signals
    enhanced_signals = signals[signals['enhanced_long'] | signals['enhanced_short']]
    if len(enhanced_signals) > 0:
        print(f"\n📋 SAMPLE ENHANCED SIGNALS:")
        for i, (timestamp, row) in enumerate(enhanced_signals.head(5).iterrows()):
            direction = "LONG" if row['enhanced_long'] else "SHORT"
            strength = row['signal_strength']
            print(f"   {i+1}. {timestamp.date()}: {direction:5s} | Strength: {strength:.4f}")
    else:
        print("\n💡 No enhanced signals in sample period - adjust threshold for more signals")
else:
    print("⚠️ Signal generation requires trained models")

## 📈 Performance Visualization

In [None]:
# Performance visualization
if 'signals' in locals() and 'df' in locals():
    fig, axes = plt.subplots(2, 2, figsize=(16, 10))
    
    # 1. Price chart with signals
    axes[0,0].plot(df.index, df['close'], label='Price', linewidth=1)
    
    # Mark long signals
    long_signals = signals[signals['enhanced_long']]
    if len(long_signals) > 0:
        long_prices = [df.loc[idx, 'close'] for idx in long_signals.index if idx in df.index]
        axes[0,0].scatter(long_signals.index, long_prices, 
                         color='green', marker='^', s=50, label='Long Signals', alpha=0.7)
    
    # Mark short signals  
    short_signals = signals[signals['enhanced_short']]
    if len(short_signals) > 0:
        short_prices = [df.loc[idx, 'close'] for idx in short_signals.index if idx in df.index]
        axes[0,0].scatter(short_signals.index, short_prices,
                         color='red', marker='v', s=50, label='Short Signals', alpha=0.7)
    
    axes[0,0].set_title('🎯 ML Trading Signals', fontweight='bold')
    axes[0,0].legend()
    axes[0,0].grid(True, alpha=0.3)
    
    # 2. Signal distribution
    signal_hours = [t.hour for t in signals[signals['enhanced_long'] | signals['enhanced_short']].index]
    if signal_hours:
        axes[0,1].hist(signal_hours, bins=24, alpha=0.7, color='steelblue')
        axes[0,1].set_title('⏰ Signal Distribution by Hour')
        axes[0,1].set_xlabel('Hour of Day')
        axes[0,1].grid(True, alpha=0.3)
    else:
        axes[0,1].text(0.5, 0.5, 'No signals in sample', ha='center', va='center', transform=axes[0,1].transAxes)
        axes[0,1].set_title('⏰ Signal Distribution')
    
    # 3. Prediction accuracy
    axes[1,0].hist(signals['ml_prediction'], bins=30, alpha=0.7, color='orange')
    axes[1,0].axvline(0, color='red', linestyle='--', alpha=0.7)
    axes[1,0].set_title('🎲 ML Prediction Distribution')
    axes[1,0].set_xlabel('Predicted Return')
    axes[1,0].grid(True, alpha=0.3)
    
    # 4. Signal strength
    axes[1,1].plot(signals.index, signals['signal_strength'], alpha=0.7, color='purple')
    axes[1,1].set_title('💪 Signal Strength Over Time')
    axes[1,1].set_ylabel('Signal Strength')
    axes[1,1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("\n📊 System Performance Summary:")
    print(f"   Average prediction: {signals['ml_prediction'].mean():.4f}")
    print(f"   Prediction volatility: {signals['ml_prediction'].std():.4f}")
    print(f"   Signal rate: {(signals['enhanced_long'].sum() + signals['enhanced_short'].sum()) / len(signals) * 100:.1f}%")
    
else:
    print("⚠️ Performance visualization requires signal data")

## 💾 System Export & Production Notes

In [None]:
# Export system for production (demo version)
import pickle
from datetime import datetime

if 'trainer' in locals() and 'signal_gen' in locals():
    # Create demo system package
    demo_system = {
        'version': '1.0.0',
        'created': datetime.now().isoformat(),
        'description': 'ML Trading System Demo - Professional Implementation',
        'models_trained': len(trainer.models),
        'features_count': len(X.columns) if 'X' in locals() else 0,
        'data_samples': len(features_df) if 'features_df' in locals() else 0,
        'config': {
            'signal_threshold': 0.005,
            'volume_filter': True,
            'rsi_bounds': [20, 80],
            'ensemble_models': list(trainer.models.keys()) if hasattr(trainer, 'models') else []
        }
    }
    
    # Save demo info (not the actual models for IP protection)
    with open('ml_trading_system_demo_info.pkl', 'wb') as f:
        pickle.dump(demo_system, f)
    
    print("💾 DEMO SYSTEM EXPORT COMPLETE")
    print("=" * 40)
    print(f"📋 System Version: {demo_system['version']}")
    print(f"🤖 Models Trained: {demo_system['models_trained']}")
    print(f"📊 Features: {demo_system['features_count']}")
    print(f"📈 Data Samples: {demo_system['data_samples']}")
    
    print("\n🚀 PRODUCTION DEPLOYMENT NOTES:")
    print("=" * 40)
    print("✅ Real-time data pipeline integration required")
    print("✅ Risk management layer implementation")
    print("✅ Position sizing optimization")
    print("✅ Performance monitoring dashboard")
    print("✅ Model retraining schedule (monthly recommended)")
    print("✅ Proprietary features integration (IP protected)")
    
    print("\n🔒 IP PROTECTION NOTES:")
    print("=" * 40)
    print("• Proprietary timing algorithms abstracted")
    print("• Specific feature engineering techniques protected")
    print("• Model hyperparameters and ensemble weights secured")
    print("• Custom indicators and signals anonymized")
    
else:
    print("⚠️ System export requires trained models")

print("\n🎉 ML TRADING SYSTEM DEMO COMPLETE!")
print("\n📝 This demonstration showcases:")
print("   • Professional ML pipeline architecture")
print("   • Advanced feature engineering capabilities")
print("   • Ensemble model training and validation")
print("   • Production-ready signal generation")
print("   • Comprehensive performance analysis")
print("\n🔐 Proprietary components abstracted for IP protection")