In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#import blpapi
import scipy
import statsmodels.api as sm
import statsmodels.tsa.stattools as ts
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from scipy import stats
#from xbbg import blp


path = '/Users/tobiastschuemperlin/Developer/Algorithmic_Trading/src/data/data pairs.csv'

# make a function to load data
def load_data(path):
    # load csv file in data pairs
    data = pd.read_csv(path, index_col=0, parse_dates=True)
    # rename column index to date
    data.index.name = 'date'
    return data



In [10]:
def find_cointegrated_pairs(data):
    ''' find from a list cointegrated pairs'''
    n = data.shape[1]
    keys = data.keys()
    pvalue_matrix = np.ones((n, n))
    pairs = []

    # Loop through each combination of assets
    for i in range(n):
        for j in range(i+1, n):
            S1 = data[keys[i]]
            S2 = data[keys[j]]

            # Test for cointegration
            result = ts.coint(S1, S2)
            pvalue = result[1]

            # Store p-value in matrix
            pvalue_matrix[i, j] = pvalue

            # Add cointegrated pair to list (if p-value is less than 0.05)
            if pvalue < 0.05:
                pairs.append((keys[i], keys[j], pvalue))

    # Sort cointegrated pairs by p-value in ascending order
    pairs.sort(key=lambda x: x[2])

    return pd.DataFrame(pairs)

In [11]:
find_cointegrated_pairs(data)

Unnamed: 0,0,1,2
0,/isin/CH0432492467,/isin/CH0531751755,0.003754
1,/isin/CH0012142631,/isin/CH0012138530,0.006012
2,/isin/CH0315966322,/isin/CH0010645932,0.011291
3,/isin/CH0012142631,/isin/CH0010645932,0.024629
4,/isin/CH1176493729,/isin/CH0360826991,0.027511
5,/isin/CH0012142631,/isin/CH0030170408,0.040174
6,/isin/CH0315966322,/isin/CH0030170408,0.041267
7,/isin/CH0432492467,/isin/CH0009002962,0.04174
8,/isin/CH0432492467,/isin/CH0360826991,0.046067
9,/isin/CH0012142631,/isin/CH0466642201,0.049355


In [12]:
# parameters
window = 20
threshold = 2

In [13]:
def calc_dynamic_hedge_ratio_ols(data, window):
    """
    Calculates rolling hedge ratio using OLS
    """
    hedge_ratio = []
    for i in range(window, len(data)):
        # Estimate hedge ratio using OLS
        y = data.iloc[i-window:i,0]
        x = data.iloc[i-window:i,1]
        x = sm.add_constant(x)
        model = sm.OLS(y, x).fit()
        hedge_ratio.append(model.params[1])

    spread_ols = data.iloc[window::, 0] - data.iloc[window::, 1] * hedge_ratio

    return hedge_ratio, spread_ols

In [14]:
calc_dynamic_hedge_ratio_ols(data, window)

([0.019174836288158748,
  -0.00538365945544772,
  -0.005983624416300293,
  0.010357786531892454,
  0.01084244086141739,
  -0.0061173850988927025,
  -0.029614341822483015,
  -0.008796983490687249,
  0.004381251007822606,
  0.003447779089327474,
  -0.0066946681722809265,
  -0.02543774853606906,
  -0.10251664892947326,
  -0.09370912424782019,
  -0.09264981349644513,
  -0.07471567195909423,
  -0.07588718524886451,
  -0.1632601020301334,
  -0.19164645211424958,
  -0.15207684880274375,
  -0.17538501123936112,
  -0.03618688408539961,
  0.014034964238073933,
  0.010749415028123432,
  0.045914279237530664,
  0.16365407919474495,
  0.2980645984152317,
  0.2735842775032795,
  0.22730670814998932,
  0.25792514039925063,
  0.26408425240193634,
  0.2726102876031622,
  0.28566846675492896,
  0.2914600989564905,
  0.29240981781477704,
  0.3020344492939536,
  0.30552166462903996,
  0.32533069791989977,
  0.35758234858537236,
  0.3883881569605027,
  0.3923362709272635,
  0.38952129450828415,
  0.3890242