In [40]:
import sys
sys.path.append('../src')

from pairs_trading import PairsTradingStrategy
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings("ignore")

print("Imports successful!")

Imports successful!


In [52]:
ticker1 = 'KO'
ticker2 = 'PEP'

start_date = '2022-01-01'
end_date = '2025-01-01'

ENTRY_THRESHOLD = 2.0
EXIT_THRESHOLD = 0.5
INITIAL_CAPITAL = 100000
TRANSACTION_COST = 0.001

print(f"Pair: {ticker1} vs {ticker2}")
print(f"Period: {start_date} to {end_date}")
print(f"Strategy: Mean reversion on cointegrated spread")

Pair: KO vs PEP
Period: 2022-01-01 to 2025-01-01
Strategy: Mean reversion on cointegrated spread


In [53]:
print("=" * 60)
print("BUSINESS RATIONALE FOR KO/PEP PAIR")
print("=" * 60)
print("""
Why this pair should be cointegrated:

1. Market Structure: Duopoly in carbonated soft drinks
   - Combined ~50% market share in US beverage industry
   - Similar product portfolios (soda, juice, sports drinks)
   
2. Common Drivers:
   - Consumer spending patterns
   - Commodity input costs (sugar, aluminum)
   - Retail distribution channels
   - Marketing/advertising cycles
   
3. Business Model Similarities:
   - Brand-focused consumer packaged goods
   - Franchise bottling systems
   - International revenue exposure
   
4. Expected Relationship:
   - Both stocks react to same macro factors
   - Temporary mispricings create arbitrage opportunities
   - Spread should revert to equilibrium over time

This is NOT just statistical pattern matching - there's a 
fundamental economic reason these stocks move together.
""")

BUSINESS RATIONALE FOR KO/PEP PAIR

Why this pair should be cointegrated:

1. Market Structure: Duopoly in carbonated soft drinks
   - Combined ~50% market share in US beverage industry
   - Similar product portfolios (soda, juice, sports drinks)

2. Common Drivers:
   - Consumer spending patterns
   - Commodity input costs (sugar, aluminum)
   - Retail distribution channels
   - Marketing/advertising cycles

3. Business Model Similarities:
   - Brand-focused consumer packaged goods
   - Franchise bottling systems
   - International revenue exposure

4. Expected Relationship:
   - Both stocks react to same macro factors
   - Temporary mispricings create arbitrage opportunities
   - Spread should revert to equilibrium over time

This is NOT just statistical pattern matching - there's a 
fundamental economic reason these stocks move together.



In [54]:
strategy = PairsTradingStrategy(ticker1, ticker2, start_date, end_date)

prices = strategy.fetch_data()

print("\nPrice Summary")
print(prices.describe())

coint_result = strategy.test_cointegration(significance_level=0.05)

if not coint_result['is_cointegrated']:
    print("\n WARNING: Pair is NOT cointegrated at 5% significance level")
    print("Strategy may not be viable - consider different pair or time period")
else:
    print("\n Pair is cointegrated - proceeding with strategy")

fetching data for KO and PEP...
downloaded 753 days of data

Price Summary
               KO         PEP
count  753.000000  753.000000
mean    57.348757  158.572869
std      4.031220    7.753455
min     48.981018  136.502457
25%     54.931126  153.406097
50%     56.543877  157.720978
75%     58.745338  164.477249
max     69.904633  178.833298

Cointegration Test Results:
Test Statistics: -1.3725
P-value: 0.8068
Cointegrated at 0.05 level: False

Strategy may not be viable - consider different pair or time period


In [55]:
#visualize price series
fig = make_subplots(
    rows = 2, cols = 1,
    subplot_titles = ('Normalized Prices', 'Price Ratio'),
    vertical_spacing = 0.12,
    row_heights= [0.6, 0.4]
)

norm_ko = (prices['KO'] / prices['KO'].iloc[0]) * 100
norm_pep = (prices['PEP'] / prices['PEP'].iloc[0]) * 100

fig.add_trace(
    go.Scatter(x = prices.index, y = norm_ko, name = 'KO(Normalized)',
            line = dict(color = 'red', width = 2)),
    row = 1, col = 1
)

fig.add_trace(
    go.Scatter(x = prices.index, y = norm_pep, name = 'PEP(Normalized)',
            line = dict(color = 'blue', width = 2)),
    row = 1, col = 1
)

ratio = prices['KO'] / prices['PEP']
fig.add_trace(
    go.Scatter(x = prices.index, y = ratio, name = 'KO/PEP Ratio',
            line = dict(color = 'purple', width = 2)),
    row = 2, col = 1
)

fig.add_hline(y = ratio.mean(), line_dash = 'dash', line_color = 'gray',
              annotation_text = 'Mean', annotation_position = 'top left',
              row = 2, col = 1)

fig.update_layout(
    height = 700,
    title_text = "Coca-Cola (KO) vs PepsiCo (PEP) Price Relationship Analysis",
    template = 'plotly_white',
    showlegend = True
)

fig.update_xaxes(title_text = "Date", row = 2, col = 1)
fig.update_yaxes(title_text = "Price (Normalized)", row = 1, col = 1)
fig.update_yaxes(title_text = "KO/PEP Ratio", row = 2, col = 1)

fig.write_html('../visualizations/price_relationship.html')
print("Saved: visualizations/price_relationship.html")

Saved: visualizations/price_relationship.html


In [56]:
#calulate spread and generate signals

spread, zscore = strategy.calculate_spread()

signals = strategy.generate_signals(
    entry_threshold = ENTRY_THRESHOLD,
    exit_threshold = EXIT_THRESHOLD
)

print(f"\nSignal Distribution:")
print(signals['position'].value_counts())


Spread Calculation:
Hedge Ratio (beta): 0.2108
R-squared: 0.1644
Mean Spread: 23.9233
Std Dev Spread: 3.6851

Signal Generation:
Entry Threshold: ±2.0 std devs
Exit Threshold: ±0.5 std devs
Total Trades: 39

Signal Distribution:
position
 1.0    688
-1.0     46
 0.0     19
Name: count, dtype: int64


In [57]:
#visualize spread and trading signals

fig = make_subplots(
    rows = 3, cols = 1,
    subplot_titles = ('Spread (KO - beta*PEP)', 'Z-Score with Entry/Exit Thresholds', 'Trading Positions'),
    vertical_spacing = 0.08,
    row_heights = [0.35, 0.35, 0.3]
)

#spread
fig.add_trace(
    go.Scatter(x = spread.index, y = spread, name = 'Spread',
            line = dict(color = 'green', width = 1)),
    row = 1, col = 1
)
fig.add_hline(y = spread.mean(), line_dash = 'dash', line_color = 'gray',
              annotation_text = 'Mean', annotation_position = 'top left',
              row = 1, col = 1)

#z-score with thresholds
fig.add_trace(
    go.Scatter(x = zscore.index, y = zscore, name = 'Z-Score',
               line = dict(color = 'orange', width = 1)),
    row = 2, col = 1
)

fig.add_hline(y = ENTRY_THRESHOLD, line_dash = 'dash', line_color = 'red',
              annotation_text = f"Entry (+{ENTRY_THRESHOLD}σ)", row = 2, col = 1)
fig.add_hline(y = -ENTRY_THRESHOLD, line_dash = 'dash', line_color = 'red',
              annotation_text = f"Entry (-{ENTRY_THRESHOLD}σ)", row = 2, col = 1)
fig.add_hline(y = EXIT_THRESHOLD, line_dash = 'dot', line_color = 'blue',
                annotation_text = f"Exit (+{EXIT_THRESHOLD}σ)", row = 2, col = 1)
fig.add_hline(y = -EXIT_THRESHOLD, line_dash = 'dot', line_color = 'blue',
                annotation_text = f"Exit (-{EXIT_THRESHOLD}σ)", row = 2, col = 1)
fig.add_hline(y = 0, line_color = "gray", row = 2, col = 1)

#positions
fig.add_trace(
    go.Scatter(x = signals.index, y = signals['position'], name = 'Position',
               mode = 'lines', line = dict(color = 'purple', width = 2),
               fill = 'tozeroy'),
    row = 3, col = 1
)

fig.update_layout(
    height = 900,
    title_text = "Pairs Trading: Spread Analysis and Signal Generation for KO/PEP",
    template = 'plotly_white',
    showlegend = True
)

fig.update_xaxes(title_text = "Date", row = 3, col = 1)
fig.update_yaxes(title_text = "Spread Value", row = 1, col = 1)
fig.update_yaxes(title_text = "Standard Deviations", row = 2, col = 1)
fig.update_yaxes(title_text = "Position (1=Long KO/Short PEP, -1=Short KO/Long PEP)", row = 3, col = 1)

fig.write_html('../visualizations/spread_and_signals.html')
print("Saved: visualizations/spread_and_signals.html")

Saved: visualizations/spread_and_signals.html


In [58]:
#run backtest

portfolio = strategy.backtest(
    initial_capital = INITIAL_CAPITAL,
    transaction_cost = TRANSACTION_COST
)

metrics = strategy.calculate_performance_metrics()


STRATEGY PERFORMANCE METRICS
Total Return -0.19%
Annualized Return -0.06%
Volatility 12.92%
Sharpe Ratio -0.005
Max Drawdown -15.57%
Win Rate 52.04%
Total Trades 39
Benchmark Return 5.48%
Outperformace -5.67%


In [59]:
#performance visualization

fig = make_subplots(
    rows = 2, cols = 1,
    subplot_titles = ('Portfolio Value Over Time', 'Drawdown'),
    vertical_spacing = 0.12,
    row_heights = [0.7, 0.3]
)

#portfolio value
fig.add_trace(
    go.Scatter(x = portfolio.index, y = portfolio['portfolio_value'], name = 'Pairs Strategy',
               line = dict(color = 'green', width = 2)),
    row = 1, col = 1
)

fig.add_trace(
    go.Scatter(x = portfolio.index, y = portfolio['benchmark_value'], name = 'Buy & Hold Benchmark',
               line = dict(color = 'gray', width = 2, dash = 'dash')),
    row = 1, col = 1
)

fig.add_hline(y = INITIAL_CAPITAL, line_dash = 'dot', line_color = 'black',
               annotation_text = 'Initial Capital',
               row = 1, col = 1)

#drawdown
cumulative = portfolio['cumulative_return']
running_max = cumulative.cummax()
drawdown = (cumulative - running_max) / running_max * 100

fig.add_trace(
    go.Scatter(x = portfolio.index, y = drawdown, name = 'Drawdown (%)',
               line = dict(color = 'red', width = 1), fill = 'tozeroy'),
    row = 2, col = 1
)

fig.update_layout(
    height = 700,
    title_text = f"Pairs Trading Strategy Performance: {ticker1} vs {ticker2}",
    template = 'plotly_white',
    showlegend = True
)

fig.update_xaxes(title_text = "Date", row = 2, col = 1)
fig.update_yaxes(title_text = "Portfolio Value ($)", row = 1, col = 1)
fig.update_yaxes(title_text = "Drawdown (%)", row = 2, col = 1)

fig.write_html('../visualizations/strategy_performance.html')
print("Saved: visualizations/strategy_performance.html")

Saved: visualizations/strategy_performance.html


In [60]:
#analyze individual trades
trade_dates = signals[signals['position'] > 0].index

print(f"\nTrade History (First 10 Trades):")
print("=" * 60)

for i, date in enumerate(trade_dates[:10]):
    position = signals.loc[date, 'position']
    zscore_val = signals.loc[date, 'zscore']
    
    if position == 1:
        action = "LONG SPREAD (Long KO, Short PEP)"
    elif position == -1:
        action = "SHORT SPREAD (Short KO, Long PEP)"
    else:
        action = "EXIT POSITION"
    print(f"{date.date()} : {action} | Z-Score: {zscore_val:.2f}")


Trade History (First 10 Trades):
2022-01-31 : LONG SPREAD (Long KO, Short PEP) | Z-Score: 1.41
2022-02-01 : LONG SPREAD (Long KO, Short PEP) | Z-Score: 0.90
2022-02-02 : LONG SPREAD (Long KO, Short PEP) | Z-Score: 0.70
2022-02-03 : LONG SPREAD (Long KO, Short PEP) | Z-Score: 1.96
2022-02-04 : LONG SPREAD (Long KO, Short PEP) | Z-Score: 1.58
2022-02-09 : LONG SPREAD (Long KO, Short PEP) | Z-Score: 0.90
2022-02-14 : LONG SPREAD (Long KO, Short PEP) | Z-Score: 1.51
2022-02-15 : LONG SPREAD (Long KO, Short PEP) | Z-Score: 1.77
2022-02-16 : LONG SPREAD (Long KO, Short PEP) | Z-Score: 1.47
2022-02-22 : LONG SPREAD (Long KO, Short PEP) | Z-Score: 1.56


In [61]:
#monthly returns heatmap

monthly_returns = portfolio['strategy_return'].resample('ME').apply(lambda x: (1 + x).prod() - 1)

monthly_returns_df = pd.DataFrame({
    'Year': monthly_returns.index.year,
    'Month': monthly_returns.index.month,
    'Return': monthly_returns.values * 100
})

pivot = monthly_returns_df.pivot(index = 'Month', columns = 'Year', values = 'Return')

fig = go.Figure(data = go.Heatmap(
    z = pivot.values,
    x = pivot.columns,
    y = [
        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
    ],
    colorscale = 'RdYlGn',
    zmid = 0,
    text = np.round(pivot.values, 2),
    texttemplate = "%{text.1f}%",
    textfont = {"size": 10},
    colorbar = dict(title = "Return (%)")
))

fig.update_layout(
    title = f"Monthly Returns Heatmap",
    xaxis_title = "Year",
    yaxis_title = "Month",
    template = 'plotly_white',
    height = 500
)

fig.write_html('../visualizations/monthly_returns_heatmap.html')
print("Saved: visualizations/monthly_returns_heatmap.html")


Saved: visualizations/monthly_returns_heatmap.html


In [63]:
# Create comprehensive summary
summary = pd.DataFrame({
    'Metric': [
        'Trading Pair',
        'Backtest Period',
        'Initial Capital',
        'Final Portfolio Value',
        'Total Return',
        'Annualized Return',
        'Volatility (Annual)',
        'Sharpe Ratio',
        'Maximum Drawdown',
        'Win Rate',
        'Total Trades',
        'Hedge Ratio (β)',
        'Cointegration P-Value',
        'Benchmark Return',
        'Outperformance'
    ],
    'Value': [
        f"{ticker1} / {ticker2}",
        f"{start_date} to {end_date}",
        f"${INITIAL_CAPITAL:,.0f}",
        f"${portfolio['portfolio_value'].iloc[-1]:,.2f}",
        metrics['Total Return'],
        metrics['Annualized Return'],
        metrics['Volatility'],
        metrics['Sharpe Ratio'],
        metrics['Max Drawdown'],
        metrics['Win Rate'],
        metrics['Total Trades'],
        f"{strategy.hedge_ratio:.4f}",
        f"{coint_result['p_value']:.4f}",
        metrics['Benchmark Return'],
        metrics['Outperformace']
    ]
})

print("\n" + "="*60)
print("STRATEGY SUMMARY")
print("="*60)
display(summary)

summary.to_csv('../data/strategy_summary.csv', index=False)
print("\nSaved: data/strategy_summary.csv")


STRATEGY SUMMARY


Unnamed: 0,Metric,Value
0,Trading Pair,KO / PEP
1,Backtest Period,2022-01-01 to 2025-01-01
2,Initial Capital,"$100,000"
3,Final Portfolio Value,"$99,809.52"
4,Total Return,-0.19%
5,Annualized Return,-0.06%
6,Volatility (Annual),12.92%
7,Sharpe Ratio,-0.005
8,Maximum Drawdown,-15.57%
9,Win Rate,52.04%



Saved: data/strategy_summary.csv
