# Neutral Pairs Strategy Analysis

This notebook demonstrates the core strategy logic and analysis for the market-neutral pairs trading system.

In [2]:
import sys
import os
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Import our modules
from src.strategies.neutral_pairs import compute_signals, compute_spread_zscore
from src.backtest.engine import BacktestEngine
from src.utils.config import create_default_config

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

ImportError: attempted relative import beyond top-level package

## Generate Synthetic Data

Create realistic crypto pair data for analysis.

In [None]:
def generate_crypto_pair_data(n_periods=1000, correlation=0.7):
    """Generate synthetic but realistic crypto pair data"""
    
    dates = pd.date_range(start='2023-01-01', periods=n_periods, freq='1H')
    
    # Generate correlated returns
    np.random.seed(42)
    base_returns = np.random.normal(0.0001, 0.02, n_periods)  # BTC-like volatility
    
    # Create hedge returns with specified correlation
    hedge_noise = np.random.normal(0, 0.015, n_periods)
    hedge_returns = correlation * base_returns + np.sqrt(1 - correlation**2) * hedge_noise
    
    # Convert to prices
    base_prices = 20000 * np.cumprod(1 + base_returns)
    hedge_prices = 1500 * np.cumprod(1 + hedge_returns)
    
    # Create DataFrames
    base_data = pd.DataFrame({
        'symbol': 'BTCUSDT',
        'open': base_prices * 0.999,
        'high': base_prices * 1.002,
        'low': base_prices * 0.998,
        'close': base_prices,
        'volume': np.random.lognormal(5, 1, n_periods)
    }, index=dates)
    
    hedge_data = pd.DataFrame({
        'symbol': 'ETHUSDT',
        'open': hedge_prices * 0.999,
        'high': hedge_prices * 1.002,
        'low': hedge_prices * 0.998,
        'close': hedge_prices,
        'volume': np.random.lognormal(4, 1, n_periods)
    }, index=dates)
    
    return {
        'BTCUSDT': base_data,
        'ETHUSDT': hedge_data
    }

# Generate data
data = generate_crypto_pair_data(n_periods=2000)
print(f"Generated {len(data['BTCUSDT'])} periods of data")
print(f"Date range: {data['BTCUSDT'].index[0]} to {data['BTCUSDT'].index[-1]}")

## Price and Spread Analysis

In [None]:
# Plot price series
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Price series
axes[0, 0].plot(data['BTCUSDT'].index, data['BTCUSDT']['close'], label='BTC/USDT', color='orange')
axes[0, 0].set_title('BTC Price')
axes[0, 0].set_ylabel('Price (USDT)')
axes[0, 0].legend()

axes[0, 1].plot(data['ETHUSDT'].index, data['ETHUSDT']['close'], label='ETH/USDT', color='blue')
axes[0, 1].set_title('ETH Price')
axes[0, 1].set_ylabel('Price (USDT)')
axes[0, 1].legend()

# Returns
btc_returns = data['BTCUSDT']['close'].pct_change().dropna()
eth_returns = data['ETHUSDT']['close'].pct_change().dropna()

axes[1, 0].plot(btc_returns.index, btc_returns.cumsum(), label='BTC Cumulative Returns', color='orange')
axes[1, 0].plot(eth_returns.index, eth_returns.cumsum(), label='ETH Cumulative Returns', color='blue')
axes[1, 0].set_title('Cumulative Returns')
axes[1, 0].set_ylabel('Cumulative Return')
axes[1, 0].legend()

# Spread analysis
spread = btc_returns - eth_returns
axes[1, 1].plot(spread.index, spread, alpha=0.7, color='green')
axes[1, 1].set_title('Return Spread (BTC - ETH)')
axes[1, 1].set_ylabel('Spread')
axes[1, 1].axhline(0, color='black', linestyle='--', alpha=0.5)

plt.tight_layout()
plt.show()

# Correlation analysis
correlation = btc_returns.corr(eth_returns)
print(f"\nBTC-ETH Return Correlation: {correlation:.3f}")
print(f"Spread Volatility: {spread.std():.4f}")
print(f"Spread Mean: {spread.mean():.6f}")

## Z-Score Analysis

In [None]:
# Calculate rolling z-scores
lookback = 100
zscores = []
spreads = []
timestamps = []

btc_prices = data['BTCUSDT']['close']
eth_prices = data['ETHUSDT']['close']

for i in range(lookback, len(btc_prices)):
    zscore, spread_val, _ = compute_spread_zscore(
        btc_prices.iloc[:i+1], 
        eth_prices.iloc[:i+1], 
        lookback
    )
    if not np.isnan(zscore):
        zscores.append(zscore)
        spreads.append(spread_val)
        timestamps.append(btc_prices.index[i])

zscore_series = pd.Series(zscores, index=timestamps)
spread_series = pd.Series(spreads, index=timestamps)

# Plot z-score analysis
fig, axes = plt.subplots(3, 1, figsize=(15, 12))

# Z-score time series
axes[0].plot(zscore_series.index, zscore_series, color='purple', alpha=0.8)
axes[0].axhline(2, color='red', linestyle='--', label='Entry Threshold (+2)')
axes[0].axhline(-2, color='red', linestyle='--', label='Entry Threshold (-2)')
axes[0].axhline(0.5, color='green', linestyle='--', label='Exit Threshold (+0.5)')
axes[0].axhline(-0.5, color='green', linestyle='--', label='Exit Threshold (-0.5)')
axes[0].axhline(0, color='black', linestyle='-', alpha=0.3)
axes[0].set_title('Rolling Z-Score (100-period lookback)')
axes[0].set_ylabel('Z-Score')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Z-score distribution
axes[1].hist(zscore_series, bins=50, alpha=0.7, color='purple', edgecolor='black')
axes[1].axvline(zscore_series.mean(), color='red', linestyle='--', label=f'Mean: {zscore_series.mean():.3f}')
axes[1].axvline(zscore_series.mean() + zscore_series.std(), color='orange', linestyle='--', alpha=0.7)
axes[1].axvline(zscore_series.mean() - zscore_series.std(), color='orange', linestyle='--', alpha=0.7)
axes[1].set_title('Z-Score Distribution')
axes[1].set_xlabel('Z-Score')
axes[1].set_ylabel('Frequency')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Signal opportunities
entry_signals = np.abs(zscore_series) >= 2.0
exit_signals = np.abs(zscore_series) <= 0.5

axes[2].plot(zscore_series.index, zscore_series, color='purple', alpha=0.6, label='Z-Score')
axes[2].scatter(zscore_series[entry_signals].index, zscore_series[entry_signals], 
               color='red', s=30, label=f'Entry Signals ({entry_signals.sum()})', zorder=5)
axes[2].scatter(zscore_series[exit_signals].index, zscore_series[exit_signals], 
               color='green', s=15, label=f'Exit Signals ({exit_signals.sum()})', zorder=5)
axes[2].axhline(2, color='red', linestyle='--', alpha=0.5)
axes[2].axhline(-2, color='red', linestyle='--', alpha=0.5)
axes[2].axhline(0, color='black', linestyle='-', alpha=0.3)
axes[2].set_title('Trading Signals')
axes[2].set_xlabel('Date')
axes[2].set_ylabel('Z-Score')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nZ-Score Statistics:")
print(f"Mean: {zscore_series.mean():.4f}")
print(f"Std: {zscore_series.std():.4f}")
print(f"Min: {zscore_series.min():.4f}")
print(f"Max: {zscore_series.max():.4f}")
print(f"\nEntry signals (|z| >= 2.0): {entry_signals.sum()}")
print(f"Exit signals (|z| <= 0.5): {exit_signals.sum()}")
print(f"Signal frequency: {entry_signals.sum() / len(zscore_series):.2%}")

## Strategy Signal Generation

In [None]:
# Test signal generation
config = create_default_config()
config['pairs'][0]['entry_z'] = 2.0
config['pairs'][0]['exit_z'] = 0.5
config['pairs'][0]['lookback'] = 100

# Get a slice of data for signal testing
test_data = {
    'BTCUSDT': data['BTCUSDT'].iloc[500:600],  # 100 periods
    'ETHUSDT': data['ETHUSDT'].iloc[500:600]
}

signals = compute_signals(test_data, config)

print(f"Generated {len(signals)} signals from test data")

for i, signal in enumerate(signals):
    print(f"\nSignal {i+1}:")
    print(f"  Pair: {signal.pair_name}")
    print(f"  Z-Score: {signal.spread_zscore:.3f}")
    print(f"  Reason: {signal.entry_reason}")
    print(f"  Base: {signal.base_signal.side} {signal.base_signal.size:.4f} {signal.base_signal.symbol}")
    print(f"  Hedge: {signal.hedge_signal.side} {signal.hedge_signal.size:.4f} {signal.hedge_signal.symbol}")
    print(f"  Confidence: {signal.base_signal.confidence:.2f}")

## Parameter Sensitivity Analysis

In [None]:
# Test different parameter combinations
entry_thresholds = [1.0, 1.5, 2.0, 2.5, 3.0]
lookback_periods = [50, 100, 150, 200]

results = []

for entry_z in entry_thresholds:
    for lookback in lookback_periods:
        config_test = config.copy()
        config_test['pairs'][0]['entry_z'] = entry_z
        config_test['pairs'][0]['lookback'] = lookback
        
        # Use more data for parameter testing
        param_test_data = {
            'BTCUSDT': data['BTCUSDT'].iloc[200:1200],
            'ETHUSDT': data['ETHUSDT'].iloc[200:1200]
        }
        
        signals = compute_signals(param_test_data, config_test)
        
        results.append({
            'entry_z': entry_z,
            'lookback': lookback,
            'signal_count': len(signals),
            'signal_frequency': len(signals) / len(param_test_data['BTCUSDT']),
            'avg_zscore': np.mean([abs(s.spread_zscore) for s in signals]) if signals else 0
        })

results_df = pd.DataFrame(results)

# Create parameter sensitivity heatmap
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Signal count heatmap
signal_count_pivot = results_df.pivot(index='entry_z', columns='lookback', values='signal_count')
sns.heatmap(signal_count_pivot, annot=True, fmt='.0f', cmap='YlOrRd', ax=axes[0])
axes[0].set_title('Signal Count by Parameters')
axes[0].set_ylabel('Entry Z-Score Threshold')
axes[0].set_xlabel('Lookback Period')

# Signal frequency heatmap
signal_freq_pivot = results_df.pivot(index='entry_z', columns='lookback', values='signal_frequency')
sns.heatmap(signal_freq_pivot, annot=True, fmt='.3f', cmap='YlGnBu', ax=axes[1])
axes[1].set_title('Signal Frequency by Parameters')
axes[1].set_ylabel('Entry Z-Score Threshold')
axes[1].set_xlabel('Lookback Period')

plt.tight_layout()
plt.show()

# Display parameter analysis
print("Parameter Sensitivity Analysis:")
print(results_df.groupby('entry_z')['signal_frequency'].mean().to_string())
print("\nBest parameters (highest signal quality):")
best_params = results_df.loc[results_df['avg_zscore'].idxmax()]
print(f"Entry Z: {best_params['entry_z']}, Lookback: {best_params['lookback']}, Avg Z-Score: {best_params['avg_zscore']:.3f}")

## Next Steps

1. **Backtest the strategy** using `BacktestEngine`
2. **Optimize parameters** using walk-forward analysis
3. **Add more sophisticated features** (volatility filtering, regime detection)
4. **Test on real historical data** from Binance API
5. **Implement risk management** rules and position sizing

This analysis shows the core mechanics of the neutral pairs strategy. The next notebook will focus on backtesting and performance evaluation.