In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller
import matplotlib.pyplot as plt

# 1. Download adjusted close prices to handle dividends/splits automatically
tickers = [
    "AXISBANK.NS", "KOTAKBANK.NS", "ICICIBANK.NS",
    "INDUSINDBK.NS", "FEDERALBNK.NS", "HDFCBANK.NS"
]

data = yf.download(tickers, start="2024-01-01", auto_adjust=True)["Close"].dropna()

# 2. Transform to log prices for statistical stability
log_data = np.log(data)

In [None]:
def screen_pairs(df, alpha=0.05):
    """
    Finds cointegrated pairs using the Engle-Granger two-step method.
    Null Hypothesis: No cointegration exists.
    """
    results = []
    cols = df.columns

    for i in range(len(cols)):
        for j in range(i + 1, len(cols)):
            # score: t-statistic, pvalue: significance
            score, pvalue, _ = coint(df[cols[i]], df[cols[j]])
            if pvalue < alpha:
                results.append((cols[i], cols[j], pvalue))

    return pd.DataFrame(
        results,
        columns=["Stock_A", "Stock_B", "P_Value"]
    ).sort_values("P_Value")

# Identify historically linked pairs
valid_pairs = screen_pairs(log_data)
print("Verified Cointegrated Pairs:")
print(valid_pairs)

In [None]:
def select_best_direction(df, stock_a, stock_b, window=180):
    """
    Determines which stock should be the Dependent variable (Y).
    We choose the direction with the lowest ADF p-value on residuals.
    """
    s1 = df[stock_a].tail(window)
    s2 = df[stock_b].tail(window)

    # Direction 1: A = alpha + beta * B + epsilon
    res1 = sm.OLS(s1, sm.add_constant(s2)).fit()
    p1 = adfuller(s1 - res1.fittedvalues)[1]

    # Direction 2: B = alpha + beta * A + epsilon
    res2 = sm.OLS(s2, sm.add_constant(s1)).fit()
    p2 = adfuller(s2 - res2.fittedvalues)[1]

    if p1 < p2:
        return stock_a, stock_b, p1
    else:
        return stock_b, stock_a, p2

# Select the primary pair based on screening
Y_name, X_name, adf_p = select_best_direction(log_data, "HDFCBANK.NS", "ICICIBANK.NS")
print(f"Optimal Direction: {Y_name} follows {X_name}")
print(f"Residual ADF p-value: {adf_p:.4f}")

In [None]:
# Use a 60-day window to capture recent price dynamics
lookback = 180
signal_window = 60

Y = log_data[Y_name].tail(lookback)
X = log_data[X_name].tail(lookback)


# Regression: Y = alpha + beta * X
model = sm.OLS(Y, sm.add_constant(X)).fit()

alpha = model.params.iloc[0]
beta  = model.params.iloc[1]

print(f"Regression Parameters for {lookback}D Window:")
print(f"Alpha (Intercept): {alpha:.4f}")
print(f"Beta (Hedge Ratio): {beta:.4f}")

In [None]:
# 1. Construct the Spread
spread = Y - (beta * X + alpha)

# 2. Calculate Rolling Z-Score
rolling_mean = spread.rolling(signal_window).mean()
rolling_std  = spread.rolling(signal_window).std()

z_score = (spread - rolling_mean) / rolling_std

# 3. Final Stationarity Check on the Spread itself
final_adf = adfuller(spread.dropna())[1]
print(f"Live Spread Stationarity (p-value): {final_adf:.4f}")

In [None]:
plt.figure(figsize=(12, 6))
z_score.plot(label="Z-Score", lw=1.5, color='royalblue')
plt.axhline(2, color='red', linestyle="--", label="+2 Entry (Short Spread)")
plt.axhline(-2, color='green', linestyle="--", label="-2 Entry (Long Spread)")
plt.axhline(0, color='black', alpha=0.5, label="Mean Exit")
plt.title(f"Z-Score Signal: {Y_name} vs {X_name}")
plt.ylabel("Standard Deviations")
plt.legend(loc='upper left')
plt.grid(alpha=0.3)

latest_z = z_score.iloc[-1]
print(f"Latest Z-Score: {latest_z:.2f}")