# 06 - ML Models

This notebook trains XGBoost and LSTM models to predict trade profitability.

In [None]:
import sys
sys.path.append('../src')

from ml_models import TradePredictor
import pandas as pd
import numpy as np
import os

In [None]:
# Load Data
data_dir = '../data'
results_dir = '../results'

features_df = pd.read_csv(f'{data_dir}/nifty_features_5min.csv')
trades_df = pd.read_csv(f'{results_dir}/backtest_trades.csv')

print(f"Features: {features_df.shape}")
print(f"Trades: {trades_df.shape}")

In [None]:
# Problem Definition
print("""
=== ML Problem Definition ===

Target: Binary classification
- 1 if trade is profitable (PnL > 0)
- 0 if trade is unprofitable

Features:
- All engineered features at signal time
- Regime, time-based features
- Signal strength (EMA gap)
""")

In [None]:
# Initialize Predictor
predictor = TradePredictor()

# Prepare Features
X, y = predictor.prepare_features(features_df, trades_df)

if X is not None:
    print(f"Samples: {len(X)}")
    print(f"Features: {len(predictor.feature_cols)}")
    print(f"Target Distribution: {y.value_counts().to_dict()}")

In [None]:
# Train XGBoost
try:
    xgb_model = predictor.train_xgboost(X, y)
    
    if xgb_model:
        # Feature Importance
        importance = pd.DataFrame({
            'feature': predictor.feature_cols,
            'importance': xgb_model.feature_importances_
        }).sort_values('importance', ascending=False)
        
        print("\nTop 10 Features:")
        print(importance.head(10))
except Exception as e:
    print(f"XGBoost training failed: {e}")

In [None]:
# Train LSTM
try:
    lstm_model = predictor.train_lstm(X, y, sequence_length=10)
except Exception as e:
    print(f"LSTM training failed: {e}")

In [None]:
# Save Models
models_dir = '../models'
predictor.save_models(models_dir)

In [None]:
# ML-Enhanced Backtest Comparison
print("""
=== Strategy Comparison ===

To be implemented:
1. Baseline Strategy (without ML)
2. XGBoost Filter (only take trades with confidence > 0.5)
3. LSTM Filter (only take trades with confidence > 0.5)

Expected improvement: Higher win rate with fewer trades
""")