# Pairs trading and mean reversion

## Checking if pair of stocks is cointegrated

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
from collections import OrderedDict
#from tests import project_test, assert_output

In [3]:
#fixing a seed so that we can reproduce the results
np.random.seed(2021)

In [4]:
# use returns to create a price series
drift = 100
r1 = np.random.normal(0, 1, 1000) 
s1 = pd.Series(np.cumsum(r1), name='s1') + drift

#make second series
offset = 10
noise = np.random.normal(0, 1, 1000)
s2 = s1 + offset + noise
s2.name = 's2'

## hedge ratio
lr = LinearRegression()
lr.fit(s1.values.reshape(-1,1),s2.values.reshape(-1,1))
hedge_ratio = lr.coef_[0][0]

#spread
spread = s2 - s1 * hedge_ratio

In [29]:
def is_spread_stationary(spread, p_level=0.05):
    """
    spread: obtained from linear combination of two series with a hedge ratio
    
    p_level: level of significance required to reject null hypothesis of non-stationarity
    
    returns:
        True if spread can be considered stationary
        False otherwise
    """
    adf_result = adfuller(spread)
    pvalue = adf_result[1]
    print(f"pvalue {pvalue:.4f}")
    if pvalue <= p_level:
        print(f"pvalue is <= {p_level}, assume spread is stationary")
        return True
    else:
        print(f"pvalue is > {p_level}, assume spread is not stationary")
        return False
    


In [30]:
is_spread_stationary(spread)

pvalue 0.0000
pvalue is <= 0.05, assume spread is stationary


True