# Quantitative Trading System: Full Workflow Example

This notebook demonstrates the complete workflow of the Quantitative Trading System, including:
1. Data loading and preprocessing
2. Exploratory data analysis
3. Model training
4. Strategy optimization
5. Backtesting
6. Performance analysis

Let's start by importing the necessary modules and setting up our environment.

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import yaml

# Add src directory to path
import sys
sys.path.append('../')

# Import project modules
from src.data_loader import fetch_data, prepare_dataset
from src.model import build_model, TimeSeriesDataset, ModelTrainer, ModelWrapper
from src.strategy import (MomentumStrategy, MeanReversionStrategy, 
                         MovingAverageCrossoverStrategy, MACDStrategy, 
                         EnsembleStrategy, MLStrategy)
from src.backtest import BacktestEngine
from src.optimization import StrategyOptimizer
from src.visualization import (
    plot_portfolio_performance, plot_return_distribution,
    plot_rolling_performance, plot_performance_table,
    plot_trade_analysis, plot_correlation_matrix
)

# Set up plotting style
plt.style.use('ggplot')
sns.set_style('whitegrid')
%matplotlib inline

# Ensure output directories exist
os.makedirs('../results', exist_ok=True)
os.makedirs('../models', exist_ok=True)
os.makedirs('../reports', exist_ok=True)

## 1. Configuration

Let's set up our configuration parameters for the trading system.

In [None]:
# Load configuration from YAML file
with open('../config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Display configuration
config

## 2. Data Loading and Preprocessing

Let's fetch data for the S&P 500 index and process it for our analysis.

In [None]:
# Get ticker, start date, and end date from config
ticker = config['data']['tickers'][1]  # S&P 500
start_date = config['data']['start_date']
end_date = config['data']['end_date']

print(f"Fetching data for {ticker} from {start_date} to {end_date}")

# Fetch data
data = fetch_data(ticker, start_date, end_date)

# Add High, Low, Close columns if needed
if 'High' not in data.columns or 'Low' not in data.columns or 'Close' not in data.columns:
    data['High'] = data['Price']
    data['Low'] = data['Price']
    data['Close'] = data['Price']

# Split data into training and testing sets (80/20)
split_index = int(0.8 * len(data))
train_data = data.iloc[:split_index]
test_data = data.iloc[split_index:]

print(f"Training data: {train_data.shape} rows (from {train_data.index[0]} to {train_data.index[-1]})")
print(f"Testing data: {test_data.shape} rows (from {test_data.index[0]} to {test_data.index[-1]})")

# Display the first few rows of the data
data.head()

Let's visualize the price data to get a feel for what we're working with.

In [None]:
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['Price'])
plt.title(f'{ticker} Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.grid(True)

# Add vertical line to mark train/test split
plt.axvline(x=test_data.index[0], color='r', linestyle='--', label='Train/Test Split')
plt.legend()
plt.show()

## 3. Exploratory Data Analysis

Let's perform some basic exploratory analysis on the data.

In [None]:
# Calculate daily returns
data['Return'] = data['Price'].pct_change()

# Calculate rolling statistics
data['Rolling_Mean_30d'] = data['Price'].rolling(window=30).mean()
data['Rolling_Std_30d'] = data['Price'].rolling(window=30).std()
data['Rolling_Vol_30d'] = data['Return'].rolling(window=30).std() * np.sqrt(252)  # Annualized

# Plot prices with rolling mean and standard deviation
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['Price'], label='Price')
plt.plot(data.index, data['Rolling_Mean_30d'], label='30-day MA', color='red')
plt.fill_between(
    data.index, 
    data['Rolling_Mean_30d'] - 2*data['Rolling_Std_30d'],
    data['Rolling_Mean_30d'] + 2*data['Rolling_Std_30d'],
    color='red', alpha=0.2, label='±2σ')
plt.title(f'{ticker} Price with 30-day Moving Average')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

# Plot returns distribution
plt.figure(figsize=(14, 7))
plt.subplot(1, 2, 1)
data['Return'].hist(bins=50)
plt.title('Returns Distribution')
plt.xlabel('Daily Return')
plt.ylabel('Frequency')
plt.grid(True)

plt.subplot(1, 2, 2)
data['Return'].plot(kind='kde')
plt.title('Returns Density')
plt.xlabel('Daily Return')
plt.grid(True)
plt.tight_layout()
plt.show()

# Plot rolling volatility
plt.figure(figsize=(14, 7))
plt.plot(data.index, data['Rolling_Vol_30d'])
plt.title(f'{ticker} 30-day Rolling Volatility (Annualized)')
plt.xlabel('Date')
plt.ylabel('Volatility')
plt.grid(True)
plt.show()

## 4. Model Training (LSTM)

Let's train an LSTM model to forecast prices.

In [None]:
# Prepare dataset with a sliding window
window_size = 60  # Use 60 days of price history for prediction
X_train, y_train = prepare_dataset(train_data, window_size)
X_test, y_test = prepare_dataset(test_data, window_size)

print(f"Training set: {X_train.shape}, {y_train.shape}")
print(f"Testing set: {X_test.shape}, {y_test.shape}")

In [None]:
# Build LSTM model
model = build_model(
    input_shape=(X_train.shape[1], X_train.shape[2]),
    lstm_units=config['model']['lstm_units'],
    dropout_rate=config['model']['dropout'],
    model_type='LSTM'
)

# Create model trainer
trainer = ModelTrainer(model, learning_rate=0.001)

# Train the model
print("Training LSTM model...")
history = trainer.train(
    X_train, y_train, X_test, y_test, 
    batch_size=config['model']['batch_size'],
    epochs=config['model']['epochs'],
    verbose=1
)

In [None]:
# Plot training history
plt.figure(figsize=(10, 6))
plt.plot(history['train_loss'], label='Training Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Model Training History')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Generate predictions
predictions = trainer.predict(X_test)

# Create a DataFrame with actual and predicted prices
prediction_dates = test_data.index[window_size:window_size+len(predictions)]
prediction_df = pd.DataFrame({
    'Actual': test_data['Price'].values[window_size:window_size+len(predictions)],
    'Predicted': predictions
}, index=prediction_dates)

# Plot actual vs predicted prices
plt.figure(figsize=(14, 7))
plt.plot(prediction_df.index, prediction_df['Actual'], label='Actual Price')
plt.plot(prediction_df.index, prediction_df['Predicted'], label='Predicted Price', alpha=0.7)
plt.title('Actual vs Predicted Price')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Calculate error metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import pearsonr

# Calculate error metrics
mse = mean_squared_error(prediction_df['Actual'], prediction_df['Predicted'])
rmse = np.sqrt(mse)
mae = mean_absolute_error(prediction_df['Actual'], prediction_df['Predicted'])
r2 = r2_score(prediction_df['Actual'], prediction_df['Predicted'])
corr, _ = pearsonr(prediction_df['Actual'], prediction_df['Predicted'])

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R-squared (R²): {r2:.4f}")
print(f"Correlation Coefficient: {corr:.4f}")

# Save model
model_path = '../models/lstm_model.pt'
import torch
torch.save({
    'model_state_dict': model.state_dict(),
    'model_type': model.__class__.__name__,
    'timestamp': datetime.now().strftime("%Y%m%d%H%M%S")
}, model_path)
print(f"Model saved to {model_path}")

# Create model wrapper for use with trading strategy
model_wrapper = ModelWrapper(model, trainer)

In [None]:
# Create ML strategy using the trained model
ml_strategy = MLStrategy(model_wrapper, config)

# Generate trading signals
ml_signals = ml_strategy.generate_signals(test_data)

# Plot signals
plt.figure(figsize=(14, 7))
plt.subplot(2, 1, 1)
plt.plot(test_data.index, test_data['Price'])
plt.title('Price')
plt.grid(True)

plt.subplot(2, 1, 2)
plt.plot(ml_signals.index, ml_signals)
plt.title('ML Strategy Signals')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Now let's run backtests for all our strategies

# 1. Momentum Strategy
momentum_strategy = MomentumStrategy(config)
momentum_backtest = BacktestEngine(test_data, momentum_strategy, config)
momentum_backtest.run()
momentum_metrics = momentum_backtest.performance_metrics

print("\nMomentum Strategy Performance:")
print(f"Annual Return: {momentum_metrics['annual_return_strategy']:.2%}")
print(f"Sharpe Ratio: {momentum_metrics['sharpe_ratio_strategy']:.2f}")
print(f"Max Drawdown: {momentum_metrics['max_drawdown_strategy']:.2%}")

# 2. Mean Reversion Strategy
mean_reversion_strategy = MeanReversionStrategy(config)
mean_reversion_backtest = BacktestEngine(test_data, mean_reversion_strategy, config)
mean_reversion_backtest.run()
mean_reversion_metrics = mean_reversion_backtest.performance_metrics

print("\nMean Reversion Strategy Performance:")
print(f"Annual Return: {mean_reversion_metrics['annual_return_strategy']:.2%}")
print(f"Sharpe Ratio: {mean_reversion_metrics['sharpe_ratio_strategy']:.2f}")
print(f"Max Drawdown: {mean_reversion_metrics['max_drawdown_strategy']:.2%}")

# 3. Moving Average Crossover Strategy
ma_strategy = MovingAverageCrossoverStrategy(config)
ma_backtest = BacktestEngine(test_data, ma_strategy, config)
ma_backtest.run()
ma_metrics = ma_backtest.performance_metrics

print("\nMoving Average Crossover Strategy Performance:")
print(f"Annual Return: {ma_metrics['annual_return_strategy']:.2%}")
print(f"Sharpe Ratio: {ma_metrics['sharpe_ratio_strategy']:.2f}")
print(f"Max Drawdown: {ma_metrics['max_drawdown_strategy']:.2%}")

# 4. MACD Strategy
macd_strategy = MACDStrategy(config)
macd_backtest = BacktestEngine(test_data, macd_strategy, config)
macd_backtest.run()
macd_metrics = macd_backtest.performance_metrics

print("\nMACD Strategy Performance:")
print(f"Annual Return: {macd_metrics['annual_return_strategy']:.2%}")
print(f"Sharpe Ratio: {macd_metrics['sharpe_ratio_strategy']:.2f}")
print(f"Max Drawdown: {macd_metrics['max_drawdown_strategy']:.2%}")

# 5. ML Strategy
ml_backtest = BacktestEngine(test_data, ml_strategy, config)
ml_backtest.run()
ml_metrics = ml_backtest.performance_metrics

print("\nML Strategy Performance:")
print(f"Annual Return: {ml_metrics['annual_return_strategy']:.2%}")
print(f"Sharpe Ratio: {ml_metrics['sharpe_ratio_strategy']:.2f}")
print(f"Max Drawdown: {ml_metrics['max_drawdown_strategy']:.2%}")

In [None]:
# Let's create an Ensemble Strategy that combines all our strategies
strategies = [
    momentum_strategy,
    mean_reversion_strategy,
    ma_strategy,
    macd_strategy,
    ml_strategy
]

# Equal weighting
weights = {strategy.name: 1/len(strategies) for strategy in strategies}
ensemble_strategy = EnsembleStrategy(strategies, weights, config)

# Run backtest for ensemble strategy
ensemble_backtest = BacktestEngine(test_data, ensemble_strategy, config)
ensemble_backtest.run()
ensemble_metrics = ensemble_backtest.performance_metrics

print("\nEnsemble Strategy Performance:")
print(f"Annual Return: {ensemble_metrics['annual_return_strategy']:.2%}")
print(f"Sharpe Ratio: {ensemble_metrics['sharpe_ratio_strategy']:.2f}")
print(f"Max Drawdown: {ensemble_metrics['max_drawdown_strategy']:.2%}")

In [None]:
# Compare all strategies with market performance
strategy_returns = {
    'Momentum': momentum_backtest.portfolio['Strategy_Net'],
    'Mean Reversion': mean_reversion_backtest.portfolio['Strategy_Net'],
    'Moving Average': ma_backtest.portfolio['Strategy_Net'],
    'MACD': macd_backtest.portfolio['Strategy_Net'],
    'ML': ml_backtest.portfolio['Strategy_Net'],
    'Ensemble': ensemble_backtest.portfolio['Strategy_Net'],
    'Market': test_data['Return'].iloc[1:]  # Skip first NaN
}

# Calculate cumulative returns
cumulative_returns = {}
for strategy, returns in strategy_returns.items():
    cumulative_returns[strategy] = (1 + returns).cumprod()

# Plot cumulative returns
plt.figure(figsize=(14, 7))
for strategy, cum_return in cumulative_returns.items():
    plt.plot(cum_return.index, cum_return, label=strategy)
plt.title('Cumulative Returns Comparison')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Create a performance comparison table
performance_metrics = {
    'Strategy': [],
    'Annual Return': [],
    'Sharpe Ratio': [],
    'Sortino Ratio': [],
    'Max Drawdown': [],
    'Win Rate': [],
    'Profit Factor': []
}

# Add metrics for each strategy
for strategy, backtest, metrics in [
    ('Momentum', momentum_backtest, momentum_metrics),
    ('Mean Reversion', mean_reversion_backtest, mean_reversion_metrics),
    ('Moving Average', ma_backtest, ma_metrics),
    ('MACD', macd_backtest, macd_metrics),
    ('ML', ml_backtest, ml_metrics),
    ('Ensemble', ensemble_backtest, ensemble_metrics),
    ('Market', None, momentum_metrics)  # Use any backtest for market metrics
]:
    performance_metrics['Strategy'].append(strategy)
    if strategy == 'Market':
        # Add market metrics
        performance_metrics['Annual Return'].append(momentum_metrics['annual_return_market'])
        performance_metrics['Sharpe Ratio'].append(momentum_metrics['sharpe_ratio_market'])
        performance_metrics['Sortino Ratio'].append(momentum_metrics['sortino_ratio_market'])
        performance_metrics['Max Drawdown'].append(momentum_metrics['max_drawdown_market'])
        performance_metrics['Win Rate'].append(np.nan)
        performance_metrics['Profit Factor'].append(np.nan)
    else:
        # Add strategy metrics
        performance_metrics['Annual Return'].append(metrics['annual_return_strategy'])
        performance_metrics['Sharpe Ratio'].append(metrics['sharpe_ratio_strategy'])
        performance_metrics['Sortino Ratio'].append(metrics['sortino_ratio_strategy'])
        performance_metrics['Max Drawdown'].append(metrics['max_drawdown_strategy'])
        performance_metrics['Win Rate'].append(metrics.get('win_rate', np.nan))
        performance_metrics['Profit Factor'].append(metrics.get('profit_factor', np.nan))

# Create DataFrame
performance_df = pd.DataFrame(performance_metrics)
performance_df.set_index('Strategy', inplace=True)

# Display table
performance_df

In [None]:
# Let's optimize the momentum strategy parameters
print("Optimizing Momentum Strategy Parameters...")

# Define parameter grid
param_grid = {
    'momentum_window': [5, 10, 12, 15, 20, 30],
    'momentum_threshold': [0.0, 0.005, 0.01, 0.02]
}

# Create optimizer
optimizer = StrategyOptimizer(
    data=test_data,
    strategy_class=MomentumStrategy,
    param_grid=param_grid,
    config=config
)

# Run optimization
best_result = optimizer.optimize()

# Display best parameters
print("\nBest Momentum Strategy Parameters:")
print(f"Parameters: {best_result['params']}")
print(f"Sharpe Ratio: {best_result['metrics']['sharpe_ratio_strategy']:.2f}")
print(f"Annual Return: {best_result['metrics']['annual_return_strategy']:.2%}")
print(f"Max Drawdown: {best_result['metrics']['max_drawdown_strategy']:.2%}")

# Create and backtest optimized strategy
optimized_config = config.copy()
optimized_config.update(best_result['params'])
optimized_momentum = MomentumStrategy(optimized_config)
optimized_backtest = BacktestEngine(test_data, optimized_momentum, optimized_config)
optimized_backtest.run()

# Compare original vs optimized strategies
plt.figure(figsize=(14, 7))
plt.plot(momentum_backtest.portfolio.index, momentum_backtest.portfolio['Cumulative_Strategy'], 
         label='Original Momentum')
plt.plot(optimized_backtest.portfolio.index, optimized_backtest.portfolio['Cumulative_Strategy'], 
         label='Optimized Momentum')
plt.plot(momentum_backtest.portfolio.index, momentum_backtest.portfolio['Cumulative_Market'], 
         label='Market', linestyle='--')
plt.title('Original vs Optimized Momentum Strategy')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Generate a comprehensive performance report for the optimized momentum strategy
from src.visualization import (
    plot_portfolio_performance, plot_return_distribution,
    plot_rolling_performance, plot_performance_table,
    plot_trade_analysis
)

# Create a comprehensive report
fig1 = plot_portfolio_performance(optimized_backtest.portfolio, strategy_name='Optimized Momentum')
plt.show()

fig2 = plot_return_distribution(
    optimized_backtest.portfolio['Strategy_Net'], 
    benchmark_returns=optimized_backtest.portfolio['Return']
)
plt.show()

fig3 = plot_performance_table(optimized_backtest.performance_metrics)
plt.show()

if optimized_backtest.trades:
    fig4 = plot_trade_analysis(optimized_backtest.trades)
    plt.show()

In [None]:
# Save the optimized strategy performance
print("Saving optimized strategy performance to CSV...")
optimized_backtest.portfolio.to_csv('../results/optimized_momentum_strategy.csv')

# Print performance summary
print(optimized_backtest.get_performance_summary())

print("\nBacktest complete! The optimized momentum strategy achieved:")
print(f"Total Return: {optimized_backtest.performance_metrics['total_return_strategy']:.2%}")
print(f"Annual Return: {optimized_backtest.performance_metrics['annual_return_strategy']:.2%}")
print(f"Sharpe Ratio: {optimized_backtest.performance_metrics['sharpe_ratio_strategy']:.2f}")
print(f"Sortino Ratio: {optimized_backtest.performance_metrics['sortino_ratio_strategy']:.2f}")
print(f"Max Drawdown: {optimized_backtest.performance_metrics['max_drawdown_strategy']:.2%}")
print(f"Win Rate: {optimized_backtest.performance_metrics.get('win_rate', 0):.2%}")
print(f"Profit Factor: {optimized_backtest.performance_metrics.get('profit_factor', 0):.2f}")